From 071d36bf21bcc837be00cea55bcef8d129e7f609 Mon Sep 17 00:00:00 2001 From: "subashab@codeaurora.org" Date: Wed, 23 Mar 2016 22:39:50 -0600 Subject: [PATCH 01/73] xfrm: Fix crash observed during device unregistration and decryption A crash is observed when a decrypted packet is processed in receive path. get_rps_cpus() tries to dereference the skb->dev fields but it appears that the device is freed from the poison pattern. [] get_rps_cpu+0x94/0x2f0 [] netif_rx_internal+0x140/0x1cc [] netif_rx+0x74/0x94 [] xfrm_input+0x754/0x7d0 [] xfrm_input_resume+0x10/0x1c [] esp_input_done+0x20/0x30 [] process_one_work+0x244/0x3fc [] worker_thread+0x2f8/0x418 [] kthread+0xe0/0xec -013|get_rps_cpu( | dev = 0xFFFFFFC08B688000, | skb = 0xFFFFFFC0C76AAC00 -> ( | dev = 0xFFFFFFC08B688000 -> ( | name = "...................................................... | name_hlist = (next = 0xAAAAAAAAAAAAAAAA, pprev = 0xAAAAAAAAAAA Following are the sequence of events observed - - Encrypted packet in receive path from netdevice is queued - Encrypted packet queued for decryption (asynchronous) - Netdevice brought down and freed - Packet is decrypted and returned through callback in esp_input_done - Packet is queued again for process in network stack using netif_rx Since the device appears to have been freed, the dereference of skb->dev in get_rps_cpus() leads to an unhandled page fault exception. Fix this by holding on to device reference when queueing packets asynchronously and releasing the reference on call back return. v2: Make the change generic to xfrm as mentioned by Steffen and update the title to xfrm Suggested-by: Herbert Xu Signed-off-by: Jerome Stanislaus Signed-off-by: Subash Abhinov Kasiviswanathan Signed-off-by: David S. Miller --- net/xfrm/xfrm_input.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c index ad7f5b3f9b61..1c4ad477ce93 100644 --- a/net/xfrm/xfrm_input.c +++ b/net/xfrm/xfrm_input.c @@ -292,12 +292,15 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type) XFRM_SKB_CB(skb)->seq.input.hi = seq_hi; skb_dst_force(skb); + dev_hold(skb->dev); nexthdr = x->type->input(x, skb); if (nexthdr == -EINPROGRESS) return 0; resume: + dev_put(skb->dev); + spin_lock(&x->lock); if (nexthdr <= 0) { if (nexthdr == -EBADMSG) { From 526131262223e9315bfd41213f5a76e792f184ff Mon Sep 17 00:00:00 2001 From: Qianqian Xie Date: Thu, 24 Mar 2016 19:08:00 +0800 Subject: [PATCH 02/73] net: hns: fix a bug for cycle index The cycle index should be varied while the variable j is a fixed value. The patch will fix this bug. Signed-off-by: Qianqian Xie Signed-off-by: Yisen Zhuang Signed-off-by: David S. Miller --- drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c index 5c1ac9ba1bf2..5978a5c8ef35 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c @@ -2219,17 +2219,17 @@ void hns_dsaf_get_regs(struct dsaf_device *ddev, u32 port, void *data) /* dsaf onode registers */ for (i = 0; i < DSAF_XOD_NUM; i++) { p[311 + i] = dsaf_read_dev(ddev, - DSAF_XOD_ETS_TSA_TC0_TC3_CFG_0_REG + j * 0x90); + DSAF_XOD_ETS_TSA_TC0_TC3_CFG_0_REG + i * 0x90); p[319 + i] = dsaf_read_dev(ddev, - DSAF_XOD_ETS_TSA_TC4_TC7_CFG_0_REG + j * 0x90); + DSAF_XOD_ETS_TSA_TC4_TC7_CFG_0_REG + i * 0x90); p[327 + i] = dsaf_read_dev(ddev, - DSAF_XOD_ETS_BW_TC0_TC3_CFG_0_REG + j * 0x90); + DSAF_XOD_ETS_BW_TC0_TC3_CFG_0_REG + i * 0x90); p[335 + i] = dsaf_read_dev(ddev, - DSAF_XOD_ETS_BW_TC4_TC7_CFG_0_REG + j * 0x90); + DSAF_XOD_ETS_BW_TC4_TC7_CFG_0_REG + i * 0x90); p[343 + i] = dsaf_read_dev(ddev, - DSAF_XOD_ETS_BW_OFFSET_CFG_0_REG + j * 0x90); + DSAF_XOD_ETS_BW_OFFSET_CFG_0_REG + i * 0x90); p[351 + i] = dsaf_read_dev(ddev, - DSAF_XOD_ETS_TOKEN_CFG_0_REG + j * 0x90); + DSAF_XOD_ETS_TOKEN_CFG_0_REG + i * 0x90); } p[359] = dsaf_read_dev(ddev, DSAF_XOD_PFS_CFG_0_0_REG + port * 0x90); From 8d71397bd2021f8ef68ae34d8bea431cc903fcdd Mon Sep 17 00:00:00 2001 From: Qianqian Xie Date: Thu, 24 Mar 2016 19:08:01 +0800 Subject: [PATCH 03/73] net: hns: optimizate fmt of snprintf() It misses string format in function snprintf(), as below: snprintf(buff, ETH_GSTRING_LEN, g_gmac_stats_string[i].desc); It needs to add "%s" to fix it as below: snprintf(buff, ETH_GSTRING_LEN, "%s", g_gmac_stats_string[i].desc); Signed-off-by: Qianqian Xie Signed-off-by: Kejian Yan Signed-off-by: Yisen Zhuang Signed-off-by: David S. Miller --- drivers/net/ethernet/hisilicon/hns/hns_dsaf_gmac.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_gmac.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_gmac.c index 6e2b76ede075..44abb08de155 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_gmac.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_gmac.c @@ -664,7 +664,8 @@ static void hns_gmac_get_strings(u32 stringset, u8 *data) return; for (i = 0; i < ARRAY_SIZE(g_gmac_stats_string); i++) { - snprintf(buff, ETH_GSTRING_LEN, g_gmac_stats_string[i].desc); + snprintf(buff, ETH_GSTRING_LEN, "%s", + g_gmac_stats_string[i].desc); buff = buff + ETH_GSTRING_LEN; } } From 055a94177dfa0bcbf2c4be475fef56bc80c9a233 Mon Sep 17 00:00:00 2001 From: Qianqian Xie Date: Thu, 24 Mar 2016 19:08:02 +0800 Subject: [PATCH 04/73] net: hns: bug fix for return values The return values in the first two functions mdiobus_write() are ignored. The patch will fix it. Signed-off-by: Qianqian Xie Signed-off-by: Yisen Zhuang Signed-off-by: David S. Miller --- drivers/net/ethernet/hisilicon/hns/hns_ethtool.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c b/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c index 9c3ba65988e1..0e7da3f0a564 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c @@ -1013,8 +1013,8 @@ int hns_phy_led_set(struct net_device *netdev, int value) struct phy_device *phy_dev = priv->phy; retval = phy_write(phy_dev, HNS_PHY_PAGE_REG, HNS_PHY_PAGE_LED); - retval = phy_write(phy_dev, HNS_LED_FC_REG, value); - retval = phy_write(phy_dev, HNS_PHY_PAGE_REG, HNS_PHY_PAGE_COPPER); + retval |= phy_write(phy_dev, HNS_LED_FC_REG, value); + retval |= phy_write(phy_dev, HNS_PHY_PAGE_REG, HNS_PHY_PAGE_COPPER); if (retval) { netdev_err(netdev, "mdiobus_write fail !\n"); return retval; From 1c3bae6e475fed5c570ac16fef2afe743c477886 Mon Sep 17 00:00:00 2001 From: Qianqian Xie Date: Thu, 24 Mar 2016 19:08:03 +0800 Subject: [PATCH 05/73] net: hns: remove useless variable assignment and comment The variable head in hns_nic_tx_fini_pro has read a value, but it is obviously no use. The patch will fix it. And the comment is nothing to do with the routine, so it has to be removed Signed-off-by: Qianqian Xie Signed-off-by: Yisen Zhuang Signed-off-by: David S. Miller --- drivers/net/ethernet/hisilicon/hns/hns_enet.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns/hns_enet.c b/drivers/net/ethernet/hisilicon/hns/hns_enet.c index 71aa37b4b338..f0c9a41a7443 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_enet.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_enet.c @@ -913,10 +913,7 @@ static int hns_nic_tx_poll_one(struct hns_nic_ring_data *ring_data, static void hns_nic_tx_fini_pro(struct hns_nic_ring_data *ring_data) { struct hnae_ring *ring = ring_data->ring; - int head = ring->next_to_clean; - - /* for hardware bug fixed */ - head = readl_relaxed(ring->io_base + RCB_REG_HEAD); + int head = readl_relaxed(ring->io_base + RCB_REG_HEAD); if (head != ring->next_to_clean) { ring_data->ring->q->handle->dev->ops->toggle_ring_irq( From 4b34aa412c1213912608742b4d016f616b23d9e2 Mon Sep 17 00:00:00 2001 From: Sheng Li Date: Thu, 24 Mar 2016 19:08:04 +0800 Subject: [PATCH 06/73] net: hns: optimizate irq proccess for HNS V2 In hns V1, common_poll should check and clean fbd pkts, because it can not pend irq to clean them if there is no new pkt comes in. But hns V2 hw fixes this bug, and will pend irq itself to do this. So, for hns V2, we set ring_data->fini_process to NULL. Signed-off-by: Sheng Li Signed-off-by: Yisen Zhuang Signed-off-by: David S. Miller --- drivers/net/ethernet/hisilicon/hns/hns_enet.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns/hns_enet.c b/drivers/net/ethernet/hisilicon/hns/hns_enet.c index f0c9a41a7443..66d1652ffbed 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_enet.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_enet.c @@ -956,8 +956,8 @@ static int hns_nic_common_poll(struct napi_struct *napi, int budget) napi_complete(napi); ring_data->ring->q->handle->dev->ops->toggle_ring_irq( ring_data->ring, 0); - - ring_data->fini_process(ring_data); + if (ring_data->fini_process) + ring_data->fini_process(ring_data); return 0; } @@ -1720,6 +1720,7 @@ static int hns_nic_init_ring_data(struct hns_nic_priv *priv) { struct hnae_handle *h = priv->ae_handle; struct hns_nic_ring_data *rd; + bool is_ver1 = AE_IS_VER1(priv->enet_ver); int i; if (h->q_num > NIC_MAX_Q_PER_VF) { @@ -1737,7 +1738,7 @@ static int hns_nic_init_ring_data(struct hns_nic_priv *priv) rd->queue_index = i; rd->ring = &h->qs[i]->tx_ring; rd->poll_one = hns_nic_tx_poll_one; - rd->fini_process = hns_nic_tx_fini_pro; + rd->fini_process = is_ver1 ? hns_nic_tx_fini_pro : NULL; netif_napi_add(priv->netdev, &rd->napi, hns_nic_common_poll, NIC_TX_CLEAN_MAX_NUM); @@ -1749,7 +1750,7 @@ static int hns_nic_init_ring_data(struct hns_nic_priv *priv) rd->ring = &h->qs[i - h->q_num]->rx_ring; rd->poll_one = hns_nic_rx_poll_one; rd->ex_process = hns_nic_rx_up_pro; - rd->fini_process = hns_nic_rx_fini_pro; + rd->fini_process = is_ver1 ? hns_nic_rx_fini_pro : NULL; netif_napi_add(priv->netdev, &rd->napi, hns_nic_common_poll, NIC_RX_CLEAN_MAX_NUM); From daa8cfd9da728b9e35c49311b23127a8859a5f3b Mon Sep 17 00:00:00 2001 From: Kejian Yan Date: Thu, 24 Mar 2016 19:08:05 +0800 Subject: [PATCH 07/73] net: hns: fix warning of passing zero to 'PTR_ERR' There is a misuse of PTR as shown below: ae_node = (void *)of_parse_phandle(dev->of_node, "ae-handle", 0); if (IS_ERR_OR_NULL(ae_node)) { ret = PTR_ERR(ae_node); dev_err(dev, "not find ae-handle\n"); goto out_read_prop_fail; } if the ae_node is NULL, PTR_ERR(ae_node) means it returns success. And the return value should be -ENODEV. Signed-off-by: Kejian Yan Signed-off-by: Yisen Zhuang Signed-off-by: David S. Miller --- drivers/net/ethernet/hisilicon/hns/hns_enet.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/hisilicon/hns/hns_enet.c b/drivers/net/ethernet/hisilicon/hns/hns_enet.c index 66d1652ffbed..687204b780b0 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_enet.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_enet.c @@ -1814,7 +1814,7 @@ static int hns_nic_try_get_ae(struct net_device *ndev) h = hnae_get_handle(&priv->netdev->dev, priv->ae_node, priv->port_id, NULL); if (IS_ERR_OR_NULL(h)) { - ret = PTR_ERR(h); + ret = -ENODEV; dev_dbg(priv->dev, "has not handle, register notifier!\n"); goto out; } From c1203fe7bb34e2fe7d281882bcce8b6f107df038 Mon Sep 17 00:00:00 2001 From: Sheng Li Date: Thu, 24 Mar 2016 19:08:06 +0800 Subject: [PATCH 08/73] net: hns: bug fix about getting hilink status for HNS v2 The hilink status reg in HNS V2 is different from HNS v1. In HNS V2, It distinguishes differnt lane status according to the bit-field of the reg. As is shown below: [0:0] ---> lane0 [1:1] ---> lane1 ... But the current driver reads the reg to get the hilink status ONLY concidering HNS V1 situation. Here is a patch to support both of them. Signed-off-by: Sheng Li Signed-off-by: Daode Huang Signed-off-by: David S. Miller --- .../ethernet/hisilicon/hns/hns_dsaf_misc.c | 44 ++++++++++--------- .../net/ethernet/hisilicon/hns/hns_dsaf_reg.h | 2 + 2 files changed, 26 insertions(+), 20 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c index 607c3be42241..e69b02287c44 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c @@ -244,31 +244,35 @@ void hns_ppe_com_srst(struct ppe_common_cb *ppe_common, u32 val) */ phy_interface_t hns_mac_get_phy_if(struct hns_mac_cb *mac_cb) { - u32 hilink3_mode; - u32 hilink4_mode; + u32 mode; + u32 reg; + u32 shift; + bool is_ver1 = AE_IS_VER1(mac_cb->dsaf_dev->dsaf_ver); void __iomem *sys_ctl_vaddr = mac_cb->sys_ctl_vaddr; - int dev_id = mac_cb->mac_id; + int mac_id = mac_cb->mac_id; phy_interface_t phy_if = PHY_INTERFACE_MODE_NA; - hilink3_mode = dsaf_read_reg(sys_ctl_vaddr, HNS_MAC_HILINK3_REG); - hilink4_mode = dsaf_read_reg(sys_ctl_vaddr, HNS_MAC_HILINK4_REG); - if (dev_id >= 0 && dev_id <= 3) { - if (hilink4_mode == 0) - phy_if = PHY_INTERFACE_MODE_SGMII; - else - phy_if = PHY_INTERFACE_MODE_XGMII; - } else if (dev_id >= 4 && dev_id <= 5) { - if (hilink3_mode == 0) - phy_if = PHY_INTERFACE_MODE_SGMII; - else - phy_if = PHY_INTERFACE_MODE_XGMII; - } else { + if (is_ver1 && (mac_id >= 6 && mac_id <= 7)) { phy_if = PHY_INTERFACE_MODE_SGMII; + } else if (mac_id >= 0 && mac_id <= 3) { + reg = is_ver1 ? HNS_MAC_HILINK4_REG : HNS_MAC_HILINK4V2_REG; + mode = dsaf_read_reg(sys_ctl_vaddr, reg); + /* mac_id 0, 1, 2, 3 ---> hilink4 lane 0, 1, 2, 3 */ + shift = is_ver1 ? 0 : mac_id; + if (dsaf_get_bit(mode, shift)) + phy_if = PHY_INTERFACE_MODE_XGMII; + else + phy_if = PHY_INTERFACE_MODE_SGMII; + } else if (mac_id >= 4 && mac_id <= 7) { + reg = is_ver1 ? HNS_MAC_HILINK3_REG : HNS_MAC_HILINK3V2_REG; + mode = dsaf_read_reg(sys_ctl_vaddr, reg); + /* mac_id 4, 5, 6, 7 ---> hilink3 lane 2, 3, 0, 1 */ + shift = is_ver1 ? 0 : mac_id <= 5 ? mac_id - 2 : mac_id - 6; + if (dsaf_get_bit(mode, shift)) + phy_if = PHY_INTERFACE_MODE_XGMII; + else + phy_if = PHY_INTERFACE_MODE_SGMII; } - - dev_dbg(mac_cb->dev, - "hilink3_mode=%d, hilink4_mode=%d dev_id=%d, phy_if=%d\n", - hilink3_mode, hilink4_mode, dev_id, phy_if); return phy_if; } diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_reg.h b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_reg.h index bf62687e5ea7..e2206f938a9c 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_reg.h +++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_reg.h @@ -103,6 +103,8 @@ /*serdes offset**/ #define HNS_MAC_HILINK3_REG DSAF_SUB_SC_HILINK3_CRG_CTRL0_REG #define HNS_MAC_HILINK4_REG DSAF_SUB_SC_HILINK4_CRG_CTRL0_REG +#define HNS_MAC_HILINK3V2_REG DSAF_SUB_SC_HILINK3_CRG_CTRL1_REG +#define HNS_MAC_HILINK4V2_REG DSAF_SUB_SC_HILINK4_CRG_CTRL1_REG #define HNS_MAC_LANE0_CTLEDFE_REG 0x000BFFCCULL #define HNS_MAC_LANE1_CTLEDFE_REG 0x000BFFBCULL #define HNS_MAC_LANE2_CTLEDFE_REG 0x000BFFACULL From ba5049945421b8d2f3e2af786a15d13b82316503 Mon Sep 17 00:00:00 2001 From: Cyrille Pitchen Date: Thu, 24 Mar 2016 15:40:04 +0100 Subject: [PATCH 09/73] net: macb: replace macb_writel() call by queue_writel() to update queue ISR macb_interrupt() should not use macb_writel(bp, ISR, ) but only queue_writel(queue, ISR, ). There is one IRQ and one set of {ISR, IER, IDR, IMR} [1] registers per queue on gem hardware, though only queue0 is actually used for now to receive frames: other queues can already be used to transmit frames. The queue_readl() and queue_writel() helper macros are designed to access the relevant IRQ registers. [1] ISR: Interrupt Status Register IER: Interrupt Enable Register IDR: Interrupt Disable Register IMR: Interrupt Mask Register Signed-off-by: Cyrille Pitchen Fixes: bfbb92c44670 ("net: macb: Handle the RXUBR interrupt on all devices") Acked-by: Nicolas Ferre Signed-off-by: David S. Miller --- drivers/net/ethernet/cadence/macb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/cadence/macb.c b/drivers/net/ethernet/cadence/macb.c index 6619178ed77b..f715352a9b85 100644 --- a/drivers/net/ethernet/cadence/macb.c +++ b/drivers/net/ethernet/cadence/macb.c @@ -1100,7 +1100,7 @@ static irqreturn_t macb_interrupt(int irq, void *dev_id) macb_writel(bp, NCR, ctrl | MACB_BIT(RE)); if (bp->caps & MACB_CAPS_ISR_CLEAR_ON_WRITE) - macb_writel(bp, ISR, MACB_BIT(RXUBR)); + queue_writel(queue, ISR, MACB_BIT(RXUBR)); } if (status & MACB_BIT(ISR_ROVR)) { From 3e347660488818070bff7533f8561928e09e1d65 Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Thu, 24 Mar 2016 16:50:00 +0100 Subject: [PATCH 10/73] switchdev: fix typo in comments/doc Two minor typo. Signed-off-by: Nicolas Dichtel Signed-off-by: David S. Miller --- Documentation/networking/switchdev.txt | 2 +- net/switchdev/switchdev.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Documentation/networking/switchdev.txt b/Documentation/networking/switchdev.txt index fad63136ee3e..2f659129694b 100644 --- a/Documentation/networking/switchdev.txt +++ b/Documentation/networking/switchdev.txt @@ -386,7 +386,7 @@ used. First phase is to "prepare" anything needed, including various checks, memory allocation, etc. The goal is to handle the stuff that is not unlikely to fail here. The second phase is to "commit" the actual changes. -Switchdev provides an inftrastructure for sharing items (for example memory +Switchdev provides an infrastructure for sharing items (for example memory allocations) between the two phases. The object created by a driver in "prepare" phase and it is queued up by: diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c index 8b5833c1ff2e..2b9b98f1c2ff 100644 --- a/net/switchdev/switchdev.c +++ b/net/switchdev/switchdev.c @@ -1079,7 +1079,7 @@ static int switchdev_port_fdb_dump_cb(struct switchdev_obj *obj) * @filter_dev: filter device * @idx: * - * Delete FDB entry from switch device. + * Dump FDB entries from switch device. */ int switchdev_port_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb, struct net_device *dev, From 55868120a3e5420bf5aa26a816c07d691579c9e6 Mon Sep 17 00:00:00 2001 From: Petri Gynther Date: Thu, 24 Mar 2016 11:27:20 -0700 Subject: [PATCH 11/73] net: bcmgenet: fix dev->stats.tx_bytes accounting 1. Add bytes_compl local variable to __bcmgenet_tx_reclaim() to collect transmitted bytes. dev->stats updates can then be moved outside the while-loop. bytes_compl is also needed for future BQL support. 2. When bcmgenet device uses Tx checksum offload, each transmitted skb gets an extra 64-byte header prepended to it. Before this header is prepended to the skb, we need to save the skb "wire" length in GENET_CB(skb)->bytes_sent, so that proper Tx bytes accounting can be done in __bcmgenet_tx_reclaim(). 3. skb->len covers the entire length of skb, whether it is linear or fragmented. Thus, when we clean the fragments, do not increase transmitted bytes. Fixes: 1c1008c793fa ("net: bcmgenet: add main driver file") Signed-off-by: Petri Gynther Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/genet/bcmgenet.c | 14 ++++++++++---- drivers/net/ethernet/broadcom/genet/bcmgenet.h | 6 ++++++ 2 files changed, 16 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c index 6746fd03cb3a..c1c7c0e6fb7c 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c @@ -1171,6 +1171,7 @@ static unsigned int __bcmgenet_tx_reclaim(struct net_device *dev, struct enet_cb *tx_cb_ptr; struct netdev_queue *txq; unsigned int pkts_compl = 0; + unsigned int bytes_compl = 0; unsigned int c_index; unsigned int txbds_ready; unsigned int txbds_processed = 0; @@ -1193,16 +1194,13 @@ static unsigned int __bcmgenet_tx_reclaim(struct net_device *dev, tx_cb_ptr = &priv->tx_cbs[ring->clean_ptr]; if (tx_cb_ptr->skb) { pkts_compl++; - dev->stats.tx_packets++; - dev->stats.tx_bytes += tx_cb_ptr->skb->len; + bytes_compl += GENET_CB(tx_cb_ptr->skb)->bytes_sent; dma_unmap_single(&dev->dev, dma_unmap_addr(tx_cb_ptr, dma_addr), dma_unmap_len(tx_cb_ptr, dma_len), DMA_TO_DEVICE); bcmgenet_free_cb(tx_cb_ptr); } else if (dma_unmap_addr(tx_cb_ptr, dma_addr)) { - dev->stats.tx_bytes += - dma_unmap_len(tx_cb_ptr, dma_len); dma_unmap_page(&dev->dev, dma_unmap_addr(tx_cb_ptr, dma_addr), dma_unmap_len(tx_cb_ptr, dma_len), @@ -1220,6 +1218,9 @@ static unsigned int __bcmgenet_tx_reclaim(struct net_device *dev, ring->free_bds += txbds_processed; ring->c_index = (ring->c_index + txbds_processed) & DMA_C_INDEX_MASK; + dev->stats.tx_packets += pkts_compl; + dev->stats.tx_bytes += bytes_compl; + if (ring->free_bds > (MAX_SKB_FRAGS + 1)) { txq = netdev_get_tx_queue(dev, ring->queue); if (netif_tx_queue_stopped(txq)) @@ -1464,6 +1465,11 @@ static netdev_tx_t bcmgenet_xmit(struct sk_buff *skb, struct net_device *dev) goto out; } + /* Retain how many bytes will be sent on the wire, without TSB inserted + * by transmit checksum offload + */ + GENET_CB(skb)->bytes_sent = skb->len; + /* set the SKB transmit checksum */ if (priv->desc_64b_en) { skb = bcmgenet_put_tx_csum(dev, skb); diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.h b/drivers/net/ethernet/broadcom/genet/bcmgenet.h index 967367557309..1e2dc34d331a 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet.h +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.h @@ -531,6 +531,12 @@ struct bcmgenet_hw_params { u32 flags; }; +struct bcmgenet_skb_cb { + unsigned int bytes_sent; /* bytes on the wire (no TSB) */ +}; + +#define GENET_CB(skb) ((struct bcmgenet_skb_cb *)((skb)->cb)) + struct bcmgenet_tx_ring { spinlock_t lock; /* ring lock */ struct napi_struct napi; /* NAPI per tx queue */ From 7dd399130efb5a454daf24075b7563d197114e39 Mon Sep 17 00:00:00 2001 From: Petri Gynther Date: Thu, 24 Mar 2016 11:27:21 -0700 Subject: [PATCH 12/73] net: bcmgenet: fix skb_len in bcmgenet_xmit_single() skb_len needs to be skb_headlen(skb) in bcmgenet_xmit_single(). Fragmented skbs can have only Ethernet + IP + TCP headers (14+20+20=54 bytes) in the linear buffer, followed by the rest in fragments. Bumping skb_len to ETH_ZLEN would be incorrect for this case, as it would introduce garbage between TCP header and the fragment data. This also works with regular/non-fragmented small packets < ETH_ZLEN bytes. Successfully tested this on GENETv3 with 42-byte ARP frames. For testing, I used: ethtool -K eth0 tx-checksum-ipv4 off ethtool -K eth0 tx-checksum-ipv6 off echo 0 > /proc/sys/net/ipv4/tcp_timestamps Fixes: 1c1008c793fa ("net: bcmgenet: add main driver file") Signed-off-by: Petri Gynther Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/genet/bcmgenet.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c index c1c7c0e6fb7c..cf6445d148ca 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c @@ -1297,7 +1297,7 @@ static int bcmgenet_xmit_single(struct net_device *dev, tx_cb_ptr->skb = skb; - skb_len = skb_headlen(skb) < ETH_ZLEN ? ETH_ZLEN : skb_headlen(skb); + skb_len = skb_headlen(skb); mapping = dma_map_single(kdev, skb->data, skb_len, DMA_TO_DEVICE); ret = dma_mapping_error(kdev, mapping); From 543e3a8da5a4c453e992d5351ef405d5e32f27d7 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Thu, 24 Mar 2016 21:56:21 -0500 Subject: [PATCH 13/73] netpoll: Fix extra refcount release in netpoll_cleanup() netpoll_setup() does a dev_hold() on np->dev, the netpoll device. If it fails, it correctly does a dev_put() but leaves np->dev set. If we call netpoll_cleanup() after the failure, np->dev is still set so we do another dev_put(), which decrements the refcount an extra time. It's questionable to call netpoll_cleanup() after netpoll_setup() fails, but it can be difficult to find the problem, and we can easily avoid it in this case. The extra decrements can lead to hangs like this: unregister_netdevice: waiting for bond0 to become free. Usage count = -3 Set and clear np->dev at the points where we dev_hold() and dev_put() the device. Signed-off-by: Bjorn Helgaas Signed-off-by: David S. Miller --- net/core/netpoll.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/core/netpoll.c b/net/core/netpoll.c index 94acfc89ad97..a57bd17805b4 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -603,7 +603,6 @@ int __netpoll_setup(struct netpoll *np, struct net_device *ndev) const struct net_device_ops *ops; int err; - np->dev = ndev; strlcpy(np->dev_name, ndev->name, IFNAMSIZ); INIT_WORK(&np->cleanup_work, netpoll_async_cleanup); @@ -670,6 +669,7 @@ int netpoll_setup(struct netpoll *np) goto unlock; } dev_hold(ndev); + np->dev = ndev; if (netdev_master_upper_dev_get(ndev)) { np_err(np, "%s is a slave device, aborting\n", np->dev_name); @@ -770,6 +770,7 @@ int netpoll_setup(struct netpoll *np) return 0; put: + np->dev = NULL; dev_put(ndev); unlock: rtnl_unlock(); From 322cea2f41adb62c975f46a3242f4e3b43226fa1 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 25 Mar 2016 00:30:25 +0100 Subject: [PATCH 14/73] bpf: add missing map_flags to bpf_map_show_fdinfo Add map_flags attribute to bpf_map_show_fdinfo(), so that tools like tc can check for them when loading objects from a pinned entry, e.g. if user intent wrt allocation (BPF_F_NO_PREALLOC) is different to the pinned object, it can bail out. Follow-up to 6c9059817432 ("bpf: pre-allocate hash map elements"), so that tc can still support this with v4.6. Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- kernel/bpf/syscall.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 2a2efe1bc76c..adc5e4bd74f8 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -137,11 +137,13 @@ static void bpf_map_show_fdinfo(struct seq_file *m, struct file *filp) "map_type:\t%u\n" "key_size:\t%u\n" "value_size:\t%u\n" - "max_entries:\t%u\n", + "max_entries:\t%u\n" + "map_flags:\t%#x\n", map->map_type, map->key_size, map->value_size, - map->max_entries); + map->max_entries, + map->map_flags); } #endif From 4cef191d05871fbc8bb3b812880e1997e855a3b9 Mon Sep 17 00:00:00 2001 From: Jaedon Shin Date: Fri, 25 Mar 2016 12:46:54 +0900 Subject: [PATCH 15/73] net: phy: bcm7xxx: Add entries for Broadcom BCM7346 and BCM7362 Add PHY entries for the Broadcom BCM7346 and BCM7362 chips, these are 40nm generation Ethernet PHY. Fixes: 815717d1473e ("net: phy: bcm7xxx: Remove wildcard entries") Signed-off-by: Jaedon Shin Signed-off-by: David S. Miller --- drivers/net/phy/bcm7xxx.c | 4 ++++ include/linux/brcmphy.h | 2 ++ 2 files changed, 6 insertions(+) diff --git a/drivers/net/phy/bcm7xxx.c b/drivers/net/phy/bcm7xxx.c index b881a7b1e4f6..9636da0b6efc 100644 --- a/drivers/net/phy/bcm7xxx.c +++ b/drivers/net/phy/bcm7xxx.c @@ -339,6 +339,8 @@ static struct phy_driver bcm7xxx_driver[] = { BCM7XXX_28NM_GPHY(PHY_ID_BCM7439, "Broadcom BCM7439"), BCM7XXX_28NM_GPHY(PHY_ID_BCM7439_2, "Broadcom BCM7439 (2)"), BCM7XXX_28NM_GPHY(PHY_ID_BCM7445, "Broadcom BCM7445"), + BCM7XXX_40NM_EPHY(PHY_ID_BCM7346, "Broadcom BCM7346"), + BCM7XXX_40NM_EPHY(PHY_ID_BCM7362, "Broadcom BCM7362"), BCM7XXX_40NM_EPHY(PHY_ID_BCM7425, "Broadcom BCM7425"), BCM7XXX_40NM_EPHY(PHY_ID_BCM7429, "Broadcom BCM7429"), BCM7XXX_40NM_EPHY(PHY_ID_BCM7435, "Broadcom BCM7435"), @@ -348,6 +350,8 @@ static struct mdio_device_id __maybe_unused bcm7xxx_tbl[] = { { PHY_ID_BCM7250, 0xfffffff0, }, { PHY_ID_BCM7364, 0xfffffff0, }, { PHY_ID_BCM7366, 0xfffffff0, }, + { PHY_ID_BCM7346, 0xfffffff0, }, + { PHY_ID_BCM7362, 0xfffffff0, }, { PHY_ID_BCM7425, 0xfffffff0, }, { PHY_ID_BCM7429, 0xfffffff0, }, { PHY_ID_BCM7439, 0xfffffff0, }, diff --git a/include/linux/brcmphy.h b/include/linux/brcmphy.h index f0ba9c2ec639..e3354b74286c 100644 --- a/include/linux/brcmphy.h +++ b/include/linux/brcmphy.h @@ -24,6 +24,8 @@ #define PHY_ID_BCM7250 0xae025280 #define PHY_ID_BCM7364 0xae025260 #define PHY_ID_BCM7366 0x600d8490 +#define PHY_ID_BCM7346 0x600d8650 +#define PHY_ID_BCM7362 0x600d84b0 #define PHY_ID_BCM7425 0x600d86b0 #define PHY_ID_BCM7429 0x600d8730 #define PHY_ID_BCM7435 0x600d8750 From d31fecd1af991e39cbe27a59f290cdcefb7ba6ae Mon Sep 17 00:00:00 2001 From: Manish Chopra Date: Fri, 25 Mar 2016 07:14:09 -0400 Subject: [PATCH 16/73] qlge: Update version to 1.00.00.35 Just updating version as many fixes got accumulated over 1.00.00.34 Signed-off-by: Manish Chopra Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qlge/qlge.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/qlogic/qlge/qlge.h b/drivers/net/ethernet/qlogic/qlge/qlge.h index ef332708e5f2..6d31f92ef2b6 100644 --- a/drivers/net/ethernet/qlogic/qlge/qlge.h +++ b/drivers/net/ethernet/qlogic/qlge/qlge.h @@ -18,7 +18,7 @@ */ #define DRV_NAME "qlge" #define DRV_STRING "QLogic 10 Gigabit PCI-E Ethernet Driver " -#define DRV_VERSION "1.00.00.34" +#define DRV_VERSION "1.00.00.35" #define WQ_ADDR_ALIGN 0x3 /* 4 byte alignment */ From 9ba723b081a2d1be81fb6f60f912bd94fa744965 Mon Sep 17 00:00:00 2001 From: Cyrille Pitchen Date: Fri, 25 Mar 2016 10:37:34 +0100 Subject: [PATCH 17/73] net: macb: remove BUG_ON() and reset the queue to handle RX errors This patch removes two BUG_ON() used to notify about RX queue corruptions on macb (not gem) hardware without actually handling the error. The new code skips corrupted frames but still processes faultless frames. Then it resets the RX queue before restarting the reception from a clean state. This patch is a rework of an older patch proposed by Neil Armstrong: http://patchwork.ozlabs.org/patch/371525/ Signed-off-by: Cyrille Pitchen Acked-by: Neil Armstrong Acked-by: Nicolas Ferre Signed-off-by: David S. Miller --- drivers/net/ethernet/cadence/macb.c | 59 ++++++++++++++++++++++++----- 1 file changed, 49 insertions(+), 10 deletions(-) diff --git a/drivers/net/ethernet/cadence/macb.c b/drivers/net/ethernet/cadence/macb.c index f715352a9b85..6c3dc27cb100 100644 --- a/drivers/net/ethernet/cadence/macb.c +++ b/drivers/net/ethernet/cadence/macb.c @@ -917,7 +917,10 @@ static int macb_rx_frame(struct macb *bp, unsigned int first_frag, unsigned int frag_len = bp->rx_buffer_size; if (offset + frag_len > len) { - BUG_ON(frag != last_frag); + if (unlikely(frag != last_frag)) { + dev_kfree_skb_any(skb); + return -1; + } frag_len = len - offset; } skb_copy_to_linear_data_offset(skb, offset, @@ -945,8 +948,23 @@ static int macb_rx_frame(struct macb *bp, unsigned int first_frag, return 0; } +static inline void macb_init_rx_ring(struct macb *bp) +{ + dma_addr_t addr; + int i; + + addr = bp->rx_buffers_dma; + for (i = 0; i < RX_RING_SIZE; i++) { + bp->rx_ring[i].addr = addr; + bp->rx_ring[i].ctrl = 0; + addr += bp->rx_buffer_size; + } + bp->rx_ring[RX_RING_SIZE - 1].addr |= MACB_BIT(RX_WRAP); +} + static int macb_rx(struct macb *bp, int budget) { + bool reset_rx_queue = false; int received = 0; unsigned int tail; int first_frag = -1; @@ -972,10 +990,18 @@ static int macb_rx(struct macb *bp, int budget) if (ctrl & MACB_BIT(RX_EOF)) { int dropped; - BUG_ON(first_frag == -1); + + if (unlikely(first_frag == -1)) { + reset_rx_queue = true; + continue; + } dropped = macb_rx_frame(bp, first_frag, tail); first_frag = -1; + if (unlikely(dropped < 0)) { + reset_rx_queue = true; + continue; + } if (!dropped) { received++; budget--; @@ -983,6 +1009,26 @@ static int macb_rx(struct macb *bp, int budget) } } + if (unlikely(reset_rx_queue)) { + unsigned long flags; + u32 ctrl; + + netdev_err(bp->dev, "RX queue corruption: reset it\n"); + + spin_lock_irqsave(&bp->lock, flags); + + ctrl = macb_readl(bp, NCR); + macb_writel(bp, NCR, ctrl & ~MACB_BIT(RE)); + + macb_init_rx_ring(bp); + macb_writel(bp, RBQP, bp->rx_ring_dma); + + macb_writel(bp, NCR, ctrl | MACB_BIT(RE)); + + spin_unlock_irqrestore(&bp->lock, flags); + return received; + } + if (first_frag != -1) bp->rx_tail = first_frag; else @@ -1523,15 +1569,8 @@ static void gem_init_rings(struct macb *bp) static void macb_init_rings(struct macb *bp) { int i; - dma_addr_t addr; - addr = bp->rx_buffers_dma; - for (i = 0; i < RX_RING_SIZE; i++) { - bp->rx_ring[i].addr = addr; - bp->rx_ring[i].ctrl = 0; - addr += bp->rx_buffer_size; - } - bp->rx_ring[RX_RING_SIZE - 1].addr |= MACB_BIT(RX_WRAP); + macb_init_rx_ring(bp); for (i = 0; i < TX_RING_SIZE; i++) { bp->queues[0].tx_ring[i].addr = 0; From a69f0e281d9eb08eb277be051f920ea863e258e2 Mon Sep 17 00:00:00 2001 From: Diego Viola Date: Sun, 27 Mar 2016 20:58:15 -0300 Subject: [PATCH 18/73] drivers/net/usb/plusb.c: Fix typo Signed-off-by: Diego Viola Signed-off-by: David S. Miller --- drivers/net/usb/plusb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/usb/plusb.c b/drivers/net/usb/plusb.c index 1bfe0fcaccf5..22e1a9a99a7d 100644 --- a/drivers/net/usb/plusb.c +++ b/drivers/net/usb/plusb.c @@ -38,7 +38,7 @@ * HEADS UP: this handshaking isn't all that robust. This driver * gets confused easily if you unplug one end of the cable then * try to connect it again; you'll need to restart both ends. The - * "naplink" software (used by some PlayStation/2 deveopers) does + * "naplink" software (used by some PlayStation/2 developers) does * the handshaking much better! Also, sometimes this hardware * seems to get wedged under load. Prolific docs are weak, and * don't identify differences between PL2301 and PL2302, much less From 995096a0a438396a570104e9c240a2a26623a5a1 Mon Sep 17 00:00:00 2001 From: Quentin Armitage Date: Sun, 27 Mar 2016 17:06:11 +0100 Subject: [PATCH 19/73] Fix returned tc and hoplimit values for route with IPv6 encapsulation For a route with IPv6 encapsulation, the traffic class and hop limit values are interchanged when returned to userspace by the kernel. For example, see below. ># ip route add 192.168.0.1 dev eth0.2 encap ip6 dst 0x50 tc 0x50 hoplimit 100 table 1000 ># ip route show table 1000 192.168.0.1 encap ip6 id 0 src :: dst fe83::1 hoplimit 80 tc 100 dev eth0.2 scope link Signed-off-by: Quentin Armitage Signed-off-by: David S. Miller --- net/ipv4/ip_tunnel_core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c index 02dd990af542..6165f30c4d72 100644 --- a/net/ipv4/ip_tunnel_core.c +++ b/net/ipv4/ip_tunnel_core.c @@ -372,8 +372,8 @@ static int ip6_tun_fill_encap_info(struct sk_buff *skb, if (nla_put_be64(skb, LWTUNNEL_IP6_ID, tun_info->key.tun_id) || nla_put_in6_addr(skb, LWTUNNEL_IP6_DST, &tun_info->key.u.ipv6.dst) || nla_put_in6_addr(skb, LWTUNNEL_IP6_SRC, &tun_info->key.u.ipv6.src) || - nla_put_u8(skb, LWTUNNEL_IP6_HOPLIMIT, tun_info->key.tos) || - nla_put_u8(skb, LWTUNNEL_IP6_TC, tun_info->key.ttl) || + nla_put_u8(skb, LWTUNNEL_IP6_TC, tun_info->key.tos) || + nla_put_u8(skb, LWTUNNEL_IP6_HOPLIMIT, tun_info->key.ttl) || nla_put_be16(skb, LWTUNNEL_IP6_FLAGS, tun_info->key.tun_flags)) return -ENOMEM; From 66391b383b70920629df403f2881ea48805e6659 Mon Sep 17 00:00:00 2001 From: Cosmin-Gabriel Samoila Date: Sat, 26 Mar 2016 02:49:50 +0200 Subject: [PATCH 20/73] Drivers: isdn: hisax: isac.c: Fix assignment and check into one expression. Fix variable assignment inside if statement. It is error-prone and hard to read. Signed-off-by: Cosmin-Gabriel Samoila Signed-off-by: David S. Miller --- drivers/isdn/hisax/isac.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/drivers/isdn/hisax/isac.c b/drivers/isdn/hisax/isac.c index 7fdf78f46433..df7e05ca8f9c 100644 --- a/drivers/isdn/hisax/isac.c +++ b/drivers/isdn/hisax/isac.c @@ -215,9 +215,11 @@ isac_interrupt(struct IsdnCardState *cs, u_char val) if (count == 0) count = 32; isac_empty_fifo(cs, count); - if ((count = cs->rcvidx) > 0) { + count = cs->rcvidx; + if (count > 0) { cs->rcvidx = 0; - if (!(skb = alloc_skb(count, GFP_ATOMIC))) + skb = alloc_skb(count, GFP_ATOMIC); + if (!skb) printk(KERN_WARNING "HiSax: D receive out of memory\n"); else { memcpy(skb_put(skb, count), cs->rcvbuf, count); @@ -251,7 +253,8 @@ isac_interrupt(struct IsdnCardState *cs, u_char val) cs->tx_skb = NULL; } } - if ((cs->tx_skb = skb_dequeue(&cs->sq))) { + cs->tx_skb = skb_dequeue(&cs->sq); + if (cs->tx_skb) { cs->tx_cnt = 0; isac_fill_fifo(cs); } else @@ -313,7 +316,8 @@ isac_interrupt(struct IsdnCardState *cs, u_char val) #if ARCOFI_USE if (v1 & 0x08) { if (!cs->dc.isac.mon_rx) { - if (!(cs->dc.isac.mon_rx = kmalloc(MAX_MON_FRAME, GFP_ATOMIC))) { + cs->dc.isac.mon_rx = kmalloc(MAX_MON_FRAME, GFP_ATOMIC); + if (!cs->dc.isac.mon_rx) { if (cs->debug & L1_DEB_WARN) debugl1(cs, "ISAC MON RX out of memory!"); cs->dc.isac.mocr &= 0xf0; @@ -343,7 +347,8 @@ isac_interrupt(struct IsdnCardState *cs, u_char val) afterMONR0: if (v1 & 0x80) { if (!cs->dc.isac.mon_rx) { - if (!(cs->dc.isac.mon_rx = kmalloc(MAX_MON_FRAME, GFP_ATOMIC))) { + cs->dc.isac.mon_rx = kmalloc(MAX_MON_FRAME, GFP_ATOMIC); + if (!cs->dc.isac.mon_rx) { if (cs->debug & L1_DEB_WARN) debugl1(cs, "ISAC MON RX out of memory!"); cs->dc.isac.mocr &= 0x0f; From 79c134239df989167acd277364b83ff9e91e70c4 Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Sat, 26 Mar 2016 22:24:09 +0100 Subject: [PATCH 21/73] net: sxgbe: fix error paths in sxgbe_platform_probe() We need to use post-decrement to ensure that irq_dispose_mapping is also called on priv->rxq[0]->irq_no; moreover, if one of the above for loops failed already at i==0 (so we reach one of these labels with that value of i), we'll enter an essentially infinite loop of out-of-bounds accesses. Signed-off-by: Rasmus Villemoes Reviewed-by: Francois Romieu Signed-off-by: David S. Miller --- drivers/net/ethernet/samsung/sxgbe/sxgbe_platform.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/samsung/sxgbe/sxgbe_platform.c b/drivers/net/ethernet/samsung/sxgbe/sxgbe_platform.c index b02eed12bfc5..73427e29df2a 100644 --- a/drivers/net/ethernet/samsung/sxgbe/sxgbe_platform.c +++ b/drivers/net/ethernet/samsung/sxgbe/sxgbe_platform.c @@ -155,11 +155,11 @@ static int sxgbe_platform_probe(struct platform_device *pdev) return 0; err_rx_irq_unmap: - while (--i) + while (i--) irq_dispose_mapping(priv->rxq[i]->irq_no); i = SXGBE_TX_QUEUES; err_tx_irq_unmap: - while (--i) + while (i--) irq_dispose_mapping(priv->txq[i]->irq_no); irq_dispose_mapping(priv->irq); err_drv_remove: From d7be81a5916bdb1d904803958e5991a16f7ae4b2 Mon Sep 17 00:00:00 2001 From: Lino Sanfilippo Date: Sun, 27 Mar 2016 12:22:02 +0200 Subject: [PATCH 22/73] ravb: fix software timestamping In ravb_start_xmit dont call skb_tx_timestamp only when hardware timestamping is requested: in the latter case software timestamps are suppressed and thus the call of skb_tx_timestamp does not have any effect. Instead call skb_tx_timestamp unconditionally in ravb_start_xmit, since the function checks itself if software timestamping is required or should be skipped due to hardware timestamping. Signed-off-by: Lino Sanfilippo Acked-by: Sergei Shtylyov Signed-off-by: David S. Miller --- drivers/net/ethernet/renesas/ravb_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c index 4e1a7dba7c4a..087e14a3fba7 100644 --- a/drivers/net/ethernet/renesas/ravb_main.c +++ b/drivers/net/ethernet/renesas/ravb_main.c @@ -1377,11 +1377,11 @@ static netdev_tx_t ravb_start_xmit(struct sk_buff *skb, struct net_device *ndev) /* TAG and timestamp required flag */ skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS; - skb_tx_timestamp(skb); desc->tagh_tsr = (ts_skb->tag >> 4) | TX_TSR; desc->ds_tagl |= le16_to_cpu(ts_skb->tag << 12); } + skb_tx_timestamp(skb); /* Descriptor type must be set after all the above writes */ dma_wmb(); desc->die_dt = DT_FEND; From ac71b46efd2838c02ec193987c8f61c3ba33b495 Mon Sep 17 00:00:00 2001 From: Haishuang Yan Date: Mon, 28 Mar 2016 18:08:59 +0800 Subject: [PATCH 23/73] openvswitch: Use proper buffer size in nla_memcpy For the input parameter count, it's better to use the size of destination buffer size, as nla_memcpy would take into account the length of the source netlink attribute when a data is copied from an attribute. Signed-off-by: Haishuang Yan Signed-off-by: David S. Miller --- net/openvswitch/conntrack.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c index dc5eb29fe7d6..f8a8d4390a8a 100644 --- a/net/openvswitch/conntrack.c +++ b/net/openvswitch/conntrack.c @@ -968,7 +968,8 @@ static int parse_nat(const struct nlattr *attr, break; case OVS_NAT_ATTR_IP_MIN: - nla_memcpy(&info->range.min_addr, a, nla_len(a)); + nla_memcpy(&info->range.min_addr, a, + sizeof(info->range.min_addr)); info->range.flags |= NF_NAT_RANGE_MAP_IPS; break; From 43adc067c5a070a5ef97d0c25e33df19c4481484 Mon Sep 17 00:00:00 2001 From: Lisheng Date: Mon, 28 Mar 2016 18:40:56 +0800 Subject: [PATCH 24/73] net: hns: fixed the setting and getting overtime bug The overtime setting and getting REGs in HNS V2 is defferent from HNS V1. It needs to be distinguished between them if getting or setting the REGs. Signed-off-by: Lisheng Signed-off-by: Yisen Zhuang Signed-off-by: David S. Miller --- .../net/ethernet/hisilicon/hns/hns_ae_adapt.c | 60 ++---- .../net/ethernet/hisilicon/hns/hns_dsaf_rcb.c | 196 ++++++++---------- .../net/ethernet/hisilicon/hns/hns_dsaf_rcb.h | 23 +- .../net/ethernet/hisilicon/hns/hns_dsaf_reg.h | 1 + 4 files changed, 126 insertions(+), 154 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c b/drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c index 285c893ab135..1dd1d6974d47 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c @@ -159,11 +159,6 @@ struct hnae_handle *hns_ae_get_handle(struct hnae_ae_dev *dev, ae_handle->qs[i]->tx_ring.q = ae_handle->qs[i]; ring_pair_cb->used_by_vf = 1; - if (port_idx < DSAF_SERVICE_PORT_NUM_PER_DSAF) - ring_pair_cb->port_id_in_dsa = port_idx; - else - ring_pair_cb->port_id_in_dsa = 0; - ring_pair_cb++; } @@ -453,59 +448,46 @@ static int hns_ae_set_pauseparam(struct hnae_handle *handle, static void hns_ae_get_coalesce_usecs(struct hnae_handle *handle, u32 *tx_usecs, u32 *rx_usecs) { - int port; + struct ring_pair_cb *ring_pair = + container_of(handle->qs[0], struct ring_pair_cb, q); - port = hns_ae_map_eport_to_dport(handle->eport_id); - - *tx_usecs = hns_rcb_get_coalesce_usecs( - hns_ae_get_dsaf_dev(handle->dev), - hns_dsaf_get_comm_idx_by_port(port)); - *rx_usecs = hns_rcb_get_coalesce_usecs( - hns_ae_get_dsaf_dev(handle->dev), - hns_dsaf_get_comm_idx_by_port(port)); + *tx_usecs = hns_rcb_get_coalesce_usecs(ring_pair->rcb_common, + ring_pair->port_id_in_comm); + *rx_usecs = hns_rcb_get_coalesce_usecs(ring_pair->rcb_common, + ring_pair->port_id_in_comm); } static void hns_ae_get_rx_max_coalesced_frames(struct hnae_handle *handle, u32 *tx_frames, u32 *rx_frames) { - int port; + struct ring_pair_cb *ring_pair = + container_of(handle->qs[0], struct ring_pair_cb, q); - assert(handle); - - port = hns_ae_map_eport_to_dport(handle->eport_id); - - *tx_frames = hns_rcb_get_coalesced_frames( - hns_ae_get_dsaf_dev(handle->dev), port); - *rx_frames = hns_rcb_get_coalesced_frames( - hns_ae_get_dsaf_dev(handle->dev), port); + *tx_frames = hns_rcb_get_coalesced_frames(ring_pair->rcb_common, + ring_pair->port_id_in_comm); + *rx_frames = hns_rcb_get_coalesced_frames(ring_pair->rcb_common, + ring_pair->port_id_in_comm); } static void hns_ae_set_coalesce_usecs(struct hnae_handle *handle, u32 timeout) { - int port; + struct ring_pair_cb *ring_pair = + container_of(handle->qs[0], struct ring_pair_cb, q); - assert(handle); - - port = hns_ae_map_eport_to_dport(handle->eport_id); - - hns_rcb_set_coalesce_usecs(hns_ae_get_dsaf_dev(handle->dev), - port, timeout); + (void)hns_rcb_set_coalesce_usecs( + ring_pair->rcb_common, ring_pair->port_id_in_comm, timeout); } static int hns_ae_set_coalesce_frames(struct hnae_handle *handle, u32 coalesce_frames) { - int port; - int ret; + struct ring_pair_cb *ring_pair = + container_of(handle->qs[0], struct ring_pair_cb, q); - assert(handle); - - port = hns_ae_map_eport_to_dport(handle->eport_id); - - ret = hns_rcb_set_coalesced_frames(hns_ae_get_dsaf_dev(handle->dev), - port, coalesce_frames); - return ret; + return hns_rcb_set_coalesced_frames( + ring_pair->rcb_common, + ring_pair->port_id_in_comm, coalesce_frames); } void hns_ae_update_stats(struct hnae_handle *handle, diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_rcb.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_rcb.c index 12188807468c..28ee26e5c478 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_rcb.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_rcb.c @@ -215,9 +215,9 @@ static void hns_rcb_ring_init(struct ring_pair_cb *ring_pair, int ring_type) dsaf_write_dev(q, RCB_RING_RX_RING_BD_LEN_REG, bd_size_type); dsaf_write_dev(q, RCB_RING_RX_RING_BD_NUM_REG, - ring_pair->port_id_in_dsa); + ring_pair->port_id_in_comm); dsaf_write_dev(q, RCB_RING_RX_RING_PKTLINE_REG, - ring_pair->port_id_in_dsa); + ring_pair->port_id_in_comm); } else { dsaf_write_dev(q, RCB_RING_TX_RING_BASEADDR_L_REG, (u32)dma); @@ -227,9 +227,9 @@ static void hns_rcb_ring_init(struct ring_pair_cb *ring_pair, int ring_type) dsaf_write_dev(q, RCB_RING_TX_RING_BD_LEN_REG, bd_size_type); dsaf_write_dev(q, RCB_RING_TX_RING_BD_NUM_REG, - ring_pair->port_id_in_dsa); + ring_pair->port_id_in_comm); dsaf_write_dev(q, RCB_RING_TX_RING_PKTLINE_REG, - ring_pair->port_id_in_dsa); + ring_pair->port_id_in_comm); } } @@ -256,50 +256,16 @@ static void hns_rcb_set_port_desc_cnt(struct rcb_common_cb *rcb_common, desc_cnt); } -/** - *hns_rcb_set_port_coalesced_frames - set rcb port coalesced frames - *@rcb_common: rcb_common device - *@port_idx:port index - *@coalesced_frames:BD num for coalesced frames - */ -static int hns_rcb_set_port_coalesced_frames(struct rcb_common_cb *rcb_common, - u32 port_idx, - u32 coalesced_frames) +static void hns_rcb_set_port_timeout( + struct rcb_common_cb *rcb_common, u32 port_idx, u32 timeout) { - if (coalesced_frames >= rcb_common->desc_num || - coalesced_frames > HNS_RCB_MAX_COALESCED_FRAMES) - return -EINVAL; - - dsaf_write_dev(rcb_common, RCB_CFG_PKTLINE_REG + port_idx * 4, - coalesced_frames); - return 0; -} - -/** - *hns_rcb_get_port_coalesced_frames - set rcb port coalesced frames - *@rcb_common: rcb_common device - *@port_idx:port index - * return coaleseced frames value - */ -static u32 hns_rcb_get_port_coalesced_frames(struct rcb_common_cb *rcb_common, - u32 port_idx) -{ - if (port_idx >= HNS_RCB_SERVICE_NW_ENGINE_NUM) - port_idx = 0; - - return dsaf_read_dev(rcb_common, - RCB_CFG_PKTLINE_REG + port_idx * 4); -} - -/** - *hns_rcb_set_timeout - set rcb port coalesced time_out - *@rcb_common: rcb_common device - *@time_out:time for coalesced time_out - */ -static void hns_rcb_set_timeout(struct rcb_common_cb *rcb_common, - u32 timeout) -{ - dsaf_write_dev(rcb_common, RCB_CFG_OVERTIME_REG, timeout); + if (AE_IS_VER1(rcb_common->dsaf_dev->dsaf_ver)) + dsaf_write_dev(rcb_common, RCB_CFG_OVERTIME_REG, + timeout * HNS_RCB_CLK_FREQ_MHZ); + else + dsaf_write_dev(rcb_common, + RCB_PORT_CFG_OVERTIME_REG + port_idx * 4, + timeout); } static int hns_rcb_common_get_port_num(struct rcb_common_cb *rcb_common) @@ -361,10 +327,11 @@ int hns_rcb_common_init_hw(struct rcb_common_cb *rcb_common) for (i = 0; i < port_num; i++) { hns_rcb_set_port_desc_cnt(rcb_common, i, rcb_common->desc_num); - (void)hns_rcb_set_port_coalesced_frames( - rcb_common, i, rcb_common->coalesced_frames); + (void)hns_rcb_set_coalesced_frames( + rcb_common, i, HNS_RCB_DEF_COALESCED_FRAMES); + hns_rcb_set_port_timeout( + rcb_common, i, HNS_RCB_DEF_COALESCED_USECS); } - hns_rcb_set_timeout(rcb_common, rcb_common->timeout); dsaf_write_dev(rcb_common, RCB_COM_CFG_ENDIAN_REG, HNS_RCB_COMMON_ENDIAN); @@ -460,7 +427,8 @@ static void hns_rcb_ring_pair_get_cfg(struct ring_pair_cb *ring_pair_cb) hns_rcb_ring_get_cfg(&ring_pair_cb->q, TX_RING); } -static int hns_rcb_get_port(struct rcb_common_cb *rcb_common, int ring_idx) +static int hns_rcb_get_port_in_comm( + struct rcb_common_cb *rcb_common, int ring_idx) { int comm_index = rcb_common->comm_index; int port; @@ -470,7 +438,7 @@ static int hns_rcb_get_port(struct rcb_common_cb *rcb_common, int ring_idx) q_num = (int)rcb_common->max_q_per_vf * rcb_common->max_vfn; port = ring_idx / q_num; } else { - port = HNS_RCB_SERVICE_NW_ENGINE_NUM + comm_index - 1; + port = 0; /* config debug-ports port_id_in_comm to 0*/ } return port; @@ -518,7 +486,8 @@ void hns_rcb_get_cfg(struct rcb_common_cb *rcb_common) ring_pair_cb->index = i; ring_pair_cb->q.io_base = RCB_COMM_BASE_TO_RING_BASE(rcb_common->io_base, i); - ring_pair_cb->port_id_in_dsa = hns_rcb_get_port(rcb_common, i); + ring_pair_cb->port_id_in_comm = + hns_rcb_get_port_in_comm(rcb_common, i); ring_pair_cb->virq[HNS_RCB_IRQ_IDX_TX] = is_ver1 ? irq_of_parse_and_map(np, base_irq_idx + i * 2) : platform_get_irq(pdev, base_irq_idx + i * 3 + 1); @@ -534,82 +503,95 @@ void hns_rcb_get_cfg(struct rcb_common_cb *rcb_common) /** *hns_rcb_get_coalesced_frames - get rcb port coalesced frames *@rcb_common: rcb_common device - *@comm_index:port index - *return coalesced_frames + *@port_idx:port id in comm + * + *Returns: coalesced_frames */ -u32 hns_rcb_get_coalesced_frames(struct dsaf_device *dsaf_dev, int port) +u32 hns_rcb_get_coalesced_frames( + struct rcb_common_cb *rcb_common, u32 port_idx) { - int comm_index = hns_dsaf_get_comm_idx_by_port(port); - struct rcb_common_cb *rcb_comm = dsaf_dev->rcb_common[comm_index]; - - return hns_rcb_get_port_coalesced_frames(rcb_comm, port); + return dsaf_read_dev(rcb_common, RCB_CFG_PKTLINE_REG + port_idx * 4); } /** *hns_rcb_get_coalesce_usecs - get rcb port coalesced time_out *@rcb_common: rcb_common device - *@comm_index:port index - *return time_out + *@port_idx:port id in comm + * + *Returns: time_out */ -u32 hns_rcb_get_coalesce_usecs(struct dsaf_device *dsaf_dev, int comm_index) +u32 hns_rcb_get_coalesce_usecs( + struct rcb_common_cb *rcb_common, u32 port_idx) { - struct rcb_common_cb *rcb_comm = dsaf_dev->rcb_common[comm_index]; - - return rcb_comm->timeout; + if (AE_IS_VER1(rcb_common->dsaf_dev->dsaf_ver)) + return dsaf_read_dev(rcb_common, RCB_CFG_OVERTIME_REG) / + HNS_RCB_CLK_FREQ_MHZ; + else + return dsaf_read_dev(rcb_common, + RCB_PORT_CFG_OVERTIME_REG + port_idx * 4); } /** *hns_rcb_set_coalesce_usecs - set rcb port coalesced time_out *@rcb_common: rcb_common device - *@comm_index: comm :index - *@etx_usecs:tx time for coalesced time_out - *@rx_usecs:rx time for coalesced time_out + *@port_idx:port id in comm + *@timeout:tx/rx time for coalesced time_out + * + * Returns: + * Zero for success, or an error code in case of failure */ -void hns_rcb_set_coalesce_usecs(struct dsaf_device *dsaf_dev, - int port, u32 timeout) +int hns_rcb_set_coalesce_usecs( + struct rcb_common_cb *rcb_common, u32 port_idx, u32 timeout) { - int comm_index = hns_dsaf_get_comm_idx_by_port(port); - struct rcb_common_cb *rcb_comm = dsaf_dev->rcb_common[comm_index]; + u32 old_timeout = hns_rcb_get_coalesce_usecs(rcb_common, port_idx); - if (rcb_comm->timeout == timeout) - return; + if (timeout == old_timeout) + return 0; - if (comm_index == HNS_DSAF_COMM_SERVICE_NW_IDX) { - dev_err(dsaf_dev->dev, - "error: not support coalesce_usecs setting!\n"); - return; + if (AE_IS_VER1(rcb_common->dsaf_dev->dsaf_ver)) { + if (rcb_common->comm_index == HNS_DSAF_COMM_SERVICE_NW_IDX) { + dev_err(rcb_common->dsaf_dev->dev, + "error: not support coalesce_usecs setting!\n"); + return -EINVAL; + } } - rcb_comm->timeout = timeout; - hns_rcb_set_timeout(rcb_comm, rcb_comm->timeout); + if (timeout > HNS_RCB_MAX_COALESCED_USECS) { + dev_err(rcb_common->dsaf_dev->dev, + "error: not support coalesce %dus!\n", timeout); + return -EINVAL; + } + hns_rcb_set_port_timeout(rcb_common, port_idx, timeout); + return 0; } /** *hns_rcb_set_coalesced_frames - set rcb coalesced frames *@rcb_common: rcb_common device - *@tx_frames:tx BD num for coalesced frames - *@rx_frames:rx BD num for coalesced frames - *Return 0 on success, negative on failure + *@port_idx:port id in comm + *@coalesced_frames:tx/rx BD num for coalesced frames + * + * Returns: + * Zero for success, or an error code in case of failure */ -int hns_rcb_set_coalesced_frames(struct dsaf_device *dsaf_dev, - int port, u32 coalesced_frames) +int hns_rcb_set_coalesced_frames( + struct rcb_common_cb *rcb_common, u32 port_idx, u32 coalesced_frames) { - int comm_index = hns_dsaf_get_comm_idx_by_port(port); - struct rcb_common_cb *rcb_comm = dsaf_dev->rcb_common[comm_index]; - u32 coalesced_reg_val; - int ret; + u32 old_waterline = hns_rcb_get_coalesced_frames(rcb_common, port_idx); - coalesced_reg_val = hns_rcb_get_port_coalesced_frames(rcb_comm, port); - - if (coalesced_reg_val == coalesced_frames) + if (coalesced_frames == old_waterline) return 0; - if (coalesced_frames >= HNS_RCB_MIN_COALESCED_FRAMES) { - ret = hns_rcb_set_port_coalesced_frames(rcb_comm, port, - coalesced_frames); - return ret; - } else { + if (coalesced_frames >= rcb_common->desc_num || + coalesced_frames > HNS_RCB_MAX_COALESCED_FRAMES || + coalesced_frames < HNS_RCB_MIN_COALESCED_FRAMES) { + dev_err(rcb_common->dsaf_dev->dev, + "error: not support coalesce_frames setting!\n"); return -EINVAL; } + + dsaf_write_dev(rcb_common, RCB_CFG_PKTLINE_REG + port_idx * 4, + coalesced_frames); + return 0; } /** @@ -749,8 +731,6 @@ int hns_rcb_common_get_cfg(struct dsaf_device *dsaf_dev, rcb_common->dsaf_dev = dsaf_dev; rcb_common->desc_num = dsaf_dev->desc_num; - rcb_common->coalesced_frames = HNS_RCB_DEF_COALESCED_FRAMES; - rcb_common->timeout = HNS_RCB_MAX_TIME_OUT; hns_rcb_get_queue_mode(dsaf_mode, comm_index, &max_vfn, &max_q_per_vf); rcb_common->max_vfn = max_vfn; @@ -951,6 +931,10 @@ void hns_rcb_get_strings(int stringset, u8 *data, int index) void hns_rcb_get_common_regs(struct rcb_common_cb *rcb_com, void *data) { u32 *regs = data; + bool is_ver1 = AE_IS_VER1(rcb_com->dsaf_dev->dsaf_ver); + bool is_dbg = (rcb_com->comm_index != HNS_DSAF_COMM_SERVICE_NW_IDX); + u32 reg_tmp; + u32 reg_num_tmp; u32 i = 0; /*rcb common registers */ @@ -1004,12 +988,16 @@ void hns_rcb_get_common_regs(struct rcb_common_cb *rcb_com, void *data) = dsaf_read_dev(rcb_com, RCB_CFG_PKTLINE_REG + 4 * i); } - regs[70] = dsaf_read_dev(rcb_com, RCB_CFG_OVERTIME_REG); - regs[71] = dsaf_read_dev(rcb_com, RCB_CFG_PKTLINE_INT_NUM_REG); - regs[72] = dsaf_read_dev(rcb_com, RCB_CFG_OVERTIME_INT_NUM_REG); + reg_tmp = is_ver1 ? RCB_CFG_OVERTIME_REG : RCB_PORT_CFG_OVERTIME_REG; + reg_num_tmp = (is_ver1 || is_dbg) ? 1 : 6; + for (i = 0; i < reg_num_tmp; i++) + regs[70 + i] = dsaf_read_dev(rcb_com, reg_tmp); + + regs[76] = dsaf_read_dev(rcb_com, RCB_CFG_PKTLINE_INT_NUM_REG); + regs[77] = dsaf_read_dev(rcb_com, RCB_CFG_OVERTIME_INT_NUM_REG); /* mark end of rcb common regs */ - for (i = 73; i < 80; i++) + for (i = 78; i < 80; i++) regs[i] = 0xcccccccc; } diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_rcb.h b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_rcb.h index 81fe9f849973..eb61014ad615 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_rcb.h +++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_rcb.h @@ -38,7 +38,9 @@ struct rcb_common_cb; #define HNS_RCB_MAX_COALESCED_FRAMES 1023 #define HNS_RCB_MIN_COALESCED_FRAMES 1 #define HNS_RCB_DEF_COALESCED_FRAMES 50 -#define HNS_RCB_MAX_TIME_OUT 0x500 +#define HNS_RCB_CLK_FREQ_MHZ 350 +#define HNS_RCB_MAX_COALESCED_USECS 0x3ff +#define HNS_RCB_DEF_COALESCED_USECS 3 #define HNS_RCB_COMMON_ENDIAN 1 @@ -82,7 +84,7 @@ struct ring_pair_cb { int virq[HNS_RCB_IRQ_NUM_PER_QUEUE]; - u8 port_id_in_dsa; + u8 port_id_in_comm; u8 used_by_vf; struct hns_ring_hw_stats hw_stats; @@ -97,8 +99,6 @@ struct rcb_common_cb { u8 comm_index; u32 ring_num; - u32 coalesced_frames; /* frames threshold of rx interrupt */ - u32 timeout; /* time threshold of rx interrupt */ u32 desc_num; /* desc num per queue*/ struct ring_pair_cb ring_pair_cb[0]; @@ -125,13 +125,14 @@ void hns_rcbv2_int_clr_hw(struct hnae_queue *q, u32 flag); void hns_rcb_init_hw(struct ring_pair_cb *ring); void hns_rcb_reset_ring_hw(struct hnae_queue *q); void hns_rcb_wait_fbd_clean(struct hnae_queue **qs, int q_num, u32 flag); - -u32 hns_rcb_get_coalesced_frames(struct dsaf_device *dsaf_dev, int comm_index); -u32 hns_rcb_get_coalesce_usecs(struct dsaf_device *dsaf_dev, int comm_index); -void hns_rcb_set_coalesce_usecs(struct dsaf_device *dsaf_dev, - int comm_index, u32 timeout); -int hns_rcb_set_coalesced_frames(struct dsaf_device *dsaf_dev, - int comm_index, u32 coalesce_frames); +u32 hns_rcb_get_coalesced_frames( + struct rcb_common_cb *rcb_common, u32 port_idx); +u32 hns_rcb_get_coalesce_usecs( + struct rcb_common_cb *rcb_common, u32 port_idx); +int hns_rcb_set_coalesce_usecs( + struct rcb_common_cb *rcb_common, u32 port_idx, u32 timeout); +int hns_rcb_set_coalesced_frames( + struct rcb_common_cb *rcb_common, u32 port_idx, u32 coalesced_frames); void hns_rcb_update_stats(struct hnae_queue *queue); void hns_rcb_get_stats(struct hnae_queue *queue, u64 *data); diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_reg.h b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_reg.h index e2206f938a9c..7d7204f45e78 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_reg.h +++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_reg.h @@ -406,6 +406,7 @@ #define RCB_CFG_OVERTIME_REG 0x9300 #define RCB_CFG_PKTLINE_INT_NUM_REG 0x9304 #define RCB_CFG_OVERTIME_INT_NUM_REG 0x9308 +#define RCB_PORT_CFG_OVERTIME_REG 0x9430 #define RCB_RING_RX_RING_BASEADDR_L_REG 0x00000 #define RCB_RING_RX_RING_BASEADDR_H_REG 0x00004 From 9832ce4c0bb90e4dcedf4280947341b1eb6728e6 Mon Sep 17 00:00:00 2001 From: Lisheng Date: Mon, 28 Mar 2016 18:40:57 +0800 Subject: [PATCH 25/73] net: hns: set-coalesce-usecs returns errno by dsaf.ko It may fail to set coalesce usecs to HW, and Ethtool needs to know if it is successful to cfg the parameter or not. So it needs return the errno by dsaf.ko. Signed-off-by: Lisheng Signed-off-by: Yisen Zhuang Signed-off-by: David S. Miller --- drivers/net/ethernet/hisilicon/hns/hnae.h | 2 +- drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c | 6 +++--- drivers/net/ethernet/hisilicon/hns/hns_ethtool.c | 6 ++++-- 3 files changed, 8 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns/hnae.h b/drivers/net/ethernet/hisilicon/hns/hnae.h index 37d0cce392be..e8d36aaea223 100644 --- a/drivers/net/ethernet/hisilicon/hns/hnae.h +++ b/drivers/net/ethernet/hisilicon/hns/hnae.h @@ -469,7 +469,7 @@ struct hnae_ae_ops { u32 *tx_usecs, u32 *rx_usecs); void (*get_rx_max_coalesced_frames)(struct hnae_handle *handle, u32 *tx_frames, u32 *rx_frames); - void (*set_coalesce_usecs)(struct hnae_handle *handle, u32 timeout); + int (*set_coalesce_usecs)(struct hnae_handle *handle, u32 timeout); int (*set_coalesce_frames)(struct hnae_handle *handle, u32 coalesce_frames); void (*set_promisc_mode)(struct hnae_handle *handle, u32 en); diff --git a/drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c b/drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c index 1dd1d6974d47..a1cb461ac45f 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c @@ -469,13 +469,13 @@ static void hns_ae_get_rx_max_coalesced_frames(struct hnae_handle *handle, ring_pair->port_id_in_comm); } -static void hns_ae_set_coalesce_usecs(struct hnae_handle *handle, - u32 timeout) +static int hns_ae_set_coalesce_usecs(struct hnae_handle *handle, + u32 timeout) { struct ring_pair_cb *ring_pair = container_of(handle->qs[0], struct ring_pair_cb, q); - (void)hns_rcb_set_coalesce_usecs( + return hns_rcb_set_coalesce_usecs( ring_pair->rcb_common, ring_pair->port_id_in_comm, timeout); } diff --git a/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c b/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c index 0e7da3f0a564..3d746c887873 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c @@ -794,8 +794,10 @@ static int hns_set_coalesce(struct net_device *net_dev, (!ops->set_coalesce_frames)) return -ESRCH; - ops->set_coalesce_usecs(priv->ae_handle, - ec->rx_coalesce_usecs); + ret = ops->set_coalesce_usecs(priv->ae_handle, + ec->rx_coalesce_usecs); + if (ret) + return ret; ret = ops->set_coalesce_frames( priv->ae_handle, From 0e3e7999703e7df93a9deee8ba89b604c5fd662d Mon Sep 17 00:00:00 2001 From: Charles Keepax Date: Mon, 28 Mar 2016 13:47:42 +0100 Subject: [PATCH 26/73] net: macb: Only call GPIO functions if there is a valid GPIO GPIOlib will print warning messages if we call GPIO functions without a valid GPIO. Change the code to avoid doing so. Signed-off-by: Charles Keepax Signed-off-by: David S. Miller --- drivers/net/ethernet/cadence/macb.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/cadence/macb.c b/drivers/net/ethernet/cadence/macb.c index 6c3dc27cb100..48a7d7dee846 100644 --- a/drivers/net/ethernet/cadence/macb.c +++ b/drivers/net/ethernet/cadence/macb.c @@ -2996,9 +2996,10 @@ static int macb_probe(struct platform_device *pdev) phy_node = of_get_next_available_child(np, NULL); if (phy_node) { int gpio = of_get_named_gpio(phy_node, "reset-gpios", 0); - if (gpio_is_valid(gpio)) + if (gpio_is_valid(gpio)) { bp->reset_gpio = gpio_to_desc(gpio); - gpiod_direction_output(bp->reset_gpio, 1); + gpiod_direction_output(bp->reset_gpio, 1); + } } of_node_put(phy_node); @@ -3068,7 +3069,8 @@ static int macb_remove(struct platform_device *pdev) mdiobus_free(bp->mii_bus); /* Shutdown the PHY if there is a GPIO reset */ - gpiod_set_value(bp->reset_gpio, 0); + if (bp->reset_gpio) + gpiod_set_value(bp->reset_gpio, 0); unregister_netdev(dev); clk_disable_unprepare(bp->tx_clk); From 596cf3fe5854fe2b1703b0466ed6bf9cfb83c91e Mon Sep 17 00:00:00 2001 From: Vishwanath Pai Date: Wed, 16 Mar 2016 21:49:00 +0100 Subject: [PATCH 27/73] netfilter: ipset: fix race condition in ipset save, swap and delete This fix adds a new reference counter (ref_netlink) for the struct ip_set. The other reference counter (ref) can be swapped out by ip_set_swap and we need a separate counter to keep track of references for netlink events like dump. Using the same ref counter for dump causes a race condition which can be demonstrated by the following script: ipset create hash_ip1 hash:ip family inet hashsize 1024 maxelem 500000 \ counters ipset create hash_ip2 hash:ip family inet hashsize 300000 maxelem 500000 \ counters ipset create hash_ip3 hash:ip family inet hashsize 1024 maxelem 500000 \ counters ipset save & ipset swap hash_ip3 hash_ip2 ipset destroy hash_ip3 /* will crash the machine */ Swap will exchange the values of ref so destroy will see ref = 0 instead of ref = 1. With this fix in place swap will not succeed because ipset save still has ref_netlink on the set (ip_set_swap doesn't swap ref_netlink). Both delete and swap will error out if ref_netlink != 0 on the set. Note: The changes to *_head functions is because previously we would increment ref whenever we called these functions, we don't do that anymore. Reviewed-by: Joshua Hunt Signed-off-by: Vishwanath Pai Signed-off-by: Jozsef Kadlecsik Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter/ipset/ip_set.h | 4 +++ net/netfilter/ipset/ip_set_bitmap_gen.h | 2 +- net/netfilter/ipset/ip_set_core.c | 33 +++++++++++++++++++++---- net/netfilter/ipset/ip_set_hash_gen.h | 2 +- net/netfilter/ipset/ip_set_list_set.c | 2 +- 5 files changed, 35 insertions(+), 8 deletions(-) diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h index 0e1f433cc4b7..f48b8a664b0f 100644 --- a/include/linux/netfilter/ipset/ip_set.h +++ b/include/linux/netfilter/ipset/ip_set.h @@ -234,6 +234,10 @@ struct ip_set { spinlock_t lock; /* References to the set */ u32 ref; + /* References to the set for netlink events like dump, + * ref can be swapped out by ip_set_swap + */ + u32 ref_netlink; /* The core set type */ struct ip_set_type *type; /* The type variant doing the real job */ diff --git a/net/netfilter/ipset/ip_set_bitmap_gen.h b/net/netfilter/ipset/ip_set_bitmap_gen.h index b0bc475f641e..2e8e7e5fb4a6 100644 --- a/net/netfilter/ipset/ip_set_bitmap_gen.h +++ b/net/netfilter/ipset/ip_set_bitmap_gen.h @@ -95,7 +95,7 @@ mtype_head(struct ip_set *set, struct sk_buff *skb) if (!nested) goto nla_put_failure; if (mtype_do_head(skb, map) || - nla_put_net32(skb, IPSET_ATTR_REFERENCES, htonl(set->ref - 1)) || + nla_put_net32(skb, IPSET_ATTR_REFERENCES, htonl(set->ref)) || nla_put_net32(skb, IPSET_ATTR_MEMSIZE, htonl(memsize))) goto nla_put_failure; if (unlikely(ip_set_put_flags(skb, set))) diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c index 7e6568cad494..a748b0c2c981 100644 --- a/net/netfilter/ipset/ip_set_core.c +++ b/net/netfilter/ipset/ip_set_core.c @@ -497,6 +497,26 @@ __ip_set_put(struct ip_set *set) write_unlock_bh(&ip_set_ref_lock); } +/* set->ref can be swapped out by ip_set_swap, netlink events (like dump) need + * a separate reference counter + */ +static inline void +__ip_set_get_netlink(struct ip_set *set) +{ + write_lock_bh(&ip_set_ref_lock); + set->ref_netlink++; + write_unlock_bh(&ip_set_ref_lock); +} + +static inline void +__ip_set_put_netlink(struct ip_set *set) +{ + write_lock_bh(&ip_set_ref_lock); + BUG_ON(set->ref_netlink == 0); + set->ref_netlink--; + write_unlock_bh(&ip_set_ref_lock); +} + /* Add, del and test set entries from kernel. * * The set behind the index must exist and must be referenced @@ -1002,7 +1022,7 @@ static int ip_set_destroy(struct net *net, struct sock *ctnl, if (!attr[IPSET_ATTR_SETNAME]) { for (i = 0; i < inst->ip_set_max; i++) { s = ip_set(inst, i); - if (s && s->ref) { + if (s && (s->ref || s->ref_netlink)) { ret = -IPSET_ERR_BUSY; goto out; } @@ -1024,7 +1044,7 @@ static int ip_set_destroy(struct net *net, struct sock *ctnl, if (!s) { ret = -ENOENT; goto out; - } else if (s->ref) { + } else if (s->ref || s->ref_netlink) { ret = -IPSET_ERR_BUSY; goto out; } @@ -1171,6 +1191,9 @@ static int ip_set_swap(struct net *net, struct sock *ctnl, struct sk_buff *skb, from->family == to->family)) return -IPSET_ERR_TYPE_MISMATCH; + if (from->ref_netlink || to->ref_netlink) + return -EBUSY; + strncpy(from_name, from->name, IPSET_MAXNAMELEN); strncpy(from->name, to->name, IPSET_MAXNAMELEN); strncpy(to->name, from_name, IPSET_MAXNAMELEN); @@ -1206,7 +1229,7 @@ ip_set_dump_done(struct netlink_callback *cb) if (set->variant->uref) set->variant->uref(set, cb, false); pr_debug("release set %s\n", set->name); - __ip_set_put_byindex(inst, index); + __ip_set_put_netlink(set); } return 0; } @@ -1328,7 +1351,7 @@ ip_set_dump_start(struct sk_buff *skb, struct netlink_callback *cb) if (!cb->args[IPSET_CB_ARG0]) { /* Start listing: make sure set won't be destroyed */ pr_debug("reference set\n"); - set->ref++; + set->ref_netlink++; } write_unlock_bh(&ip_set_ref_lock); nlh = start_msg(skb, NETLINK_CB(cb->skb).portid, @@ -1396,7 +1419,7 @@ ip_set_dump_start(struct sk_buff *skb, struct netlink_callback *cb) if (set->variant->uref) set->variant->uref(set, cb, false); pr_debug("release set %s\n", set->name); - __ip_set_put_byindex(inst, index); + __ip_set_put_netlink(set); cb->args[IPSET_CB_ARG0] = 0; } out: diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h index e5336ab36d67..d32fd6b036bf 100644 --- a/net/netfilter/ipset/ip_set_hash_gen.h +++ b/net/netfilter/ipset/ip_set_hash_gen.h @@ -1082,7 +1082,7 @@ mtype_head(struct ip_set *set, struct sk_buff *skb) if (nla_put_u32(skb, IPSET_ATTR_MARKMASK, h->markmask)) goto nla_put_failure; #endif - if (nla_put_net32(skb, IPSET_ATTR_REFERENCES, htonl(set->ref - 1)) || + if (nla_put_net32(skb, IPSET_ATTR_REFERENCES, htonl(set->ref)) || nla_put_net32(skb, IPSET_ATTR_MEMSIZE, htonl(memsize))) goto nla_put_failure; if (unlikely(ip_set_put_flags(skb, set))) diff --git a/net/netfilter/ipset/ip_set_list_set.c b/net/netfilter/ipset/ip_set_list_set.c index 24c6c1962aea..a2a89e4e0a14 100644 --- a/net/netfilter/ipset/ip_set_list_set.c +++ b/net/netfilter/ipset/ip_set_list_set.c @@ -458,7 +458,7 @@ list_set_head(struct ip_set *set, struct sk_buff *skb) if (!nested) goto nla_put_failure; if (nla_put_net32(skb, IPSET_ATTR_SIZE, htonl(map->size)) || - nla_put_net32(skb, IPSET_ATTR_REFERENCES, htonl(set->ref - 1)) || + nla_put_net32(skb, IPSET_ATTR_REFERENCES, htonl(set->ref)) || nla_put_net32(skb, IPSET_ATTR_MEMSIZE, htonl(sizeof(*map) + n * set->dsize))) goto nla_put_failure; From 5745b0be05a0f8ccbc92a36b69f3a6bc58e91954 Mon Sep 17 00:00:00 2001 From: Jarno Rajahalme Date: Mon, 21 Mar 2016 11:15:19 -0700 Subject: [PATCH 28/73] openvswitch: Fix checking for new expected connections. OVS should call into CT NAT for packets of new expected connections only when the conntrack state is persisted with the 'commit' option to the OVS CT action. The test for this condition is doubly wrong, as the CT status field is ANDed with the bit number (IPS_EXPECTED_BIT) rather than the mask (IPS_EXPECTED), and due to the wrong assumption that the expected bit would apply only for the first (i.e., 'new') packet of a connection, while in fact the expected bit remains on for the lifetime of an expected connection. The 'ctinfo' value IP_CT_RELATED derived from the ct status can be used instead, as it is only ever applicable to the 'new' packets of the expected connection. Fixes: 05752523e565 ('openvswitch: Interface with NAT.') Reported-by: Dan Carpenter Signed-off-by: Jarno Rajahalme Signed-off-by: Pablo Neira Ayuso --- net/openvswitch/conntrack.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c index dc5eb29fe7d6..47f7c62761d2 100644 --- a/net/openvswitch/conntrack.c +++ b/net/openvswitch/conntrack.c @@ -664,11 +664,12 @@ static int ovs_ct_nat(struct net *net, struct sw_flow_key *key, /* Determine NAT type. * Check if the NAT type can be deduced from the tracked connection. - * Make sure expected traffic is NATted only when committing. + * Make sure new expected connections (IP_CT_RELATED) are NATted only + * when committing. */ if (info->nat & OVS_CT_NAT && ctinfo != IP_CT_NEW && ct->status & IPS_NAT_MASK && - (!(ct->status & IPS_EXPECTED_BIT) || info->commit)) { + (ctinfo != IP_CT_RELATED || info->commit)) { /* NAT an established or related connection like before. */ if (CTINFO2DIR(ctinfo) == IP_CT_DIR_REPLY) /* This is the REPLY direction for a connection From 99b7248e2ad57ca93ada10c6598affb267ffc99a Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 18 Mar 2016 14:33:45 +0100 Subject: [PATCH 29/73] openvswitch: call only into reachable nf-nat code The openvswitch code has gained support for calling into the nf-nat-ipv4/ipv6 modules, however those can be loadable modules in a configuration in which openvswitch is built-in, leading to link errors: net/built-in.o: In function `__ovs_ct_lookup': :(.text+0x2cc2c8): undefined reference to `nf_nat_icmp_reply_translation' :(.text+0x2cc66c): undefined reference to `nf_nat_icmpv6_reply_translation' The dependency on (!NF_NAT || NF_NAT) prevents similar issues, but NF_NAT is set to 'y' if any of the symbols selecting it are built-in, but the link error happens when any of them are modular. A second issue is that even if CONFIG_NF_NAT_IPV6 is built-in, CONFIG_NF_NAT_IPV4 might be completely disabled. This is unlikely to be useful in practice, but the driver currently only handles IPv6 being optional. This patch improves the Kconfig dependency so that openvswitch cannot be built-in if either of the two other symbols are set to 'm', and it replaces the incorrect #ifdef in ovs_ct_nat_execute() with two "if (IS_ENABLED())" checks that should catch all corner cases also make the code more readable. The same #ifdef exists ovs_ct_nat_to_attr(), where it does not cause a link error, but for consistency I'm changing it the same way. Signed-off-by: Arnd Bergmann Fixes: 05752523e565 ("openvswitch: Interface with NAT.") Acked-by: Joe Stringer Signed-off-by: Pablo Neira Ayuso --- net/openvswitch/Kconfig | 4 +++- net/openvswitch/conntrack.c | 16 ++++++++-------- 2 files changed, 11 insertions(+), 9 deletions(-) diff --git a/net/openvswitch/Kconfig b/net/openvswitch/Kconfig index 234a73344c6e..ce947292ae77 100644 --- a/net/openvswitch/Kconfig +++ b/net/openvswitch/Kconfig @@ -7,7 +7,9 @@ config OPENVSWITCH depends on INET depends on !NF_CONNTRACK || \ (NF_CONNTRACK && ((!NF_DEFRAG_IPV6 || NF_DEFRAG_IPV6) && \ - (!NF_NAT || NF_NAT))) + (!NF_NAT || NF_NAT) && \ + (!NF_NAT_IPV4 || NF_NAT_IPV4) && \ + (!NF_NAT_IPV6 || NF_NAT_IPV6))) select LIBCRC32C select MPLS select NET_MPLS_GSO diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c index 47f7c62761d2..3797879b0bf8 100644 --- a/net/openvswitch/conntrack.c +++ b/net/openvswitch/conntrack.c @@ -535,14 +535,15 @@ static int ovs_ct_nat_execute(struct sk_buff *skb, struct nf_conn *ct, switch (ctinfo) { case IP_CT_RELATED: case IP_CT_RELATED_REPLY: - if (skb->protocol == htons(ETH_P_IP) && + if (IS_ENABLED(CONFIG_NF_NAT_IPV4) && + skb->protocol == htons(ETH_P_IP) && ip_hdr(skb)->protocol == IPPROTO_ICMP) { if (!nf_nat_icmp_reply_translation(skb, ct, ctinfo, hooknum)) err = NF_DROP; goto push; -#if IS_ENABLED(CONFIG_NF_NAT_IPV6) - } else if (skb->protocol == htons(ETH_P_IPV6)) { + } else if (IS_ENABLED(CONFIG_NF_NAT_IPV6) && + skb->protocol == htons(ETH_P_IPV6)) { __be16 frag_off; u8 nexthdr = ipv6_hdr(skb)->nexthdr; int hdrlen = ipv6_skip_exthdr(skb, @@ -557,7 +558,6 @@ static int ovs_ct_nat_execute(struct sk_buff *skb, struct nf_conn *ct, err = NF_DROP; goto push; } -#endif } /* Non-ICMP, fall thru to initialize if needed. */ case IP_CT_NEW: @@ -1239,7 +1239,8 @@ static bool ovs_ct_nat_to_attr(const struct ovs_conntrack_info *info, } if (info->range.flags & NF_NAT_RANGE_MAP_IPS) { - if (info->family == NFPROTO_IPV4) { + if (IS_ENABLED(CONFIG_NF_NAT_IPV4) && + info->family == NFPROTO_IPV4) { if (nla_put_in_addr(skb, OVS_NAT_ATTR_IP_MIN, info->range.min_addr.ip) || (info->range.max_addr.ip @@ -1247,8 +1248,8 @@ static bool ovs_ct_nat_to_attr(const struct ovs_conntrack_info *info, (nla_put_in_addr(skb, OVS_NAT_ATTR_IP_MAX, info->range.max_addr.ip)))) return false; -#if IS_ENABLED(CONFIG_NF_NAT_IPV6) - } else if (info->family == NFPROTO_IPV6) { + } else if (IS_ENABLED(CONFIG_NF_NAT_IPV6) && + info->family == NFPROTO_IPV6) { if (nla_put_in6_addr(skb, OVS_NAT_ATTR_IP_MIN, &info->range.min_addr.in6) || (memcmp(&info->range.max_addr.in6, @@ -1257,7 +1258,6 @@ static bool ovs_ct_nat_to_attr(const struct ovs_conntrack_info *info, (nla_put_in6_addr(skb, OVS_NAT_ATTR_IP_MAX, &info->range.max_addr.in6)))) return false; -#endif } else { return false; } From bdf533de6968e9686df777dc178486f600c6e617 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 22 Mar 2016 18:02:49 +0100 Subject: [PATCH 30/73] netfilter: x_tables: validate e->target_offset early We should check that e->target_offset is sane before mark_source_chains gets called since it will fetch the target entry for loop detection. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/ipv4/netfilter/arp_tables.c | 17 ++++++++--------- net/ipv4/netfilter/ip_tables.c | 17 ++++++++--------- net/ipv6/netfilter/ip6_tables.c | 17 ++++++++--------- 3 files changed, 24 insertions(+), 27 deletions(-) diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index bf081927e06b..830bbe8ec13d 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -474,14 +474,12 @@ static int mark_source_chains(const struct xt_table_info *newinfo, return 1; } -static inline int check_entry(const struct arpt_entry *e, const char *name) +static inline int check_entry(const struct arpt_entry *e) { const struct xt_entry_target *t; - if (!arp_checkentry(&e->arp)) { - duprintf("arp_tables: arp check failed %p %s.\n", e, name); + if (!arp_checkentry(&e->arp)) return -EINVAL; - } if (e->target_offset + sizeof(struct xt_entry_target) > e->next_offset) return -EINVAL; @@ -522,10 +520,6 @@ find_check_entry(struct arpt_entry *e, const char *name, unsigned int size) struct xt_target *target; int ret; - ret = check_entry(e, name); - if (ret) - return ret; - e->counters.pcnt = xt_percpu_counter_alloc(); if (IS_ERR_VALUE(e->counters.pcnt)) return -ENOMEM; @@ -576,6 +570,7 @@ static inline int check_entry_size_and_hooks(struct arpt_entry *e, unsigned int valid_hooks) { unsigned int h; + int err; if ((unsigned long)e % __alignof__(struct arpt_entry) != 0 || (unsigned char *)e + sizeof(struct arpt_entry) >= limit) { @@ -590,6 +585,10 @@ static inline int check_entry_size_and_hooks(struct arpt_entry *e, return -EINVAL; } + err = check_entry(e); + if (err) + return err; + /* Check hooks & underflows */ for (h = 0; h < NF_ARP_NUMHOOKS; h++) { if (!(valid_hooks & (1 << h))) @@ -1246,7 +1245,7 @@ check_compat_entry_size_and_hooks(struct compat_arpt_entry *e, } /* For purposes of check_entry casting the compat entry is fine */ - ret = check_entry((struct arpt_entry *)e, name); + ret = check_entry((struct arpt_entry *)e); if (ret) return ret; diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index e53f8d6f326d..1d72a3c4a7e7 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -569,14 +569,12 @@ static void cleanup_match(struct xt_entry_match *m, struct net *net) } static int -check_entry(const struct ipt_entry *e, const char *name) +check_entry(const struct ipt_entry *e) { const struct xt_entry_target *t; - if (!ip_checkentry(&e->ip)) { - duprintf("ip check failed %p %s.\n", e, name); + if (!ip_checkentry(&e->ip)) return -EINVAL; - } if (e->target_offset + sizeof(struct xt_entry_target) > e->next_offset) @@ -666,10 +664,6 @@ find_check_entry(struct ipt_entry *e, struct net *net, const char *name, struct xt_mtchk_param mtpar; struct xt_entry_match *ematch; - ret = check_entry(e, name); - if (ret) - return ret; - e->counters.pcnt = xt_percpu_counter_alloc(); if (IS_ERR_VALUE(e->counters.pcnt)) return -ENOMEM; @@ -741,6 +735,7 @@ check_entry_size_and_hooks(struct ipt_entry *e, unsigned int valid_hooks) { unsigned int h; + int err; if ((unsigned long)e % __alignof__(struct ipt_entry) != 0 || (unsigned char *)e + sizeof(struct ipt_entry) >= limit) { @@ -755,6 +750,10 @@ check_entry_size_and_hooks(struct ipt_entry *e, return -EINVAL; } + err = check_entry(e); + if (err) + return err; + /* Check hooks & underflows */ for (h = 0; h < NF_INET_NUMHOOKS; h++) { if (!(valid_hooks & (1 << h))) @@ -1506,7 +1505,7 @@ check_compat_entry_size_and_hooks(struct compat_ipt_entry *e, } /* For purposes of check_entry casting the compat entry is fine */ - ret = check_entry((struct ipt_entry *)e, name); + ret = check_entry((struct ipt_entry *)e); if (ret) return ret; diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index 84f9baf7aee8..26a5ad1cc4fd 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -581,14 +581,12 @@ static void cleanup_match(struct xt_entry_match *m, struct net *net) } static int -check_entry(const struct ip6t_entry *e, const char *name) +check_entry(const struct ip6t_entry *e) { const struct xt_entry_target *t; - if (!ip6_checkentry(&e->ipv6)) { - duprintf("ip_tables: ip check failed %p %s.\n", e, name); + if (!ip6_checkentry(&e->ipv6)) return -EINVAL; - } if (e->target_offset + sizeof(struct xt_entry_target) > e->next_offset) @@ -679,10 +677,6 @@ find_check_entry(struct ip6t_entry *e, struct net *net, const char *name, struct xt_mtchk_param mtpar; struct xt_entry_match *ematch; - ret = check_entry(e, name); - if (ret) - return ret; - e->counters.pcnt = xt_percpu_counter_alloc(); if (IS_ERR_VALUE(e->counters.pcnt)) return -ENOMEM; @@ -753,6 +747,7 @@ check_entry_size_and_hooks(struct ip6t_entry *e, unsigned int valid_hooks) { unsigned int h; + int err; if ((unsigned long)e % __alignof__(struct ip6t_entry) != 0 || (unsigned char *)e + sizeof(struct ip6t_entry) >= limit) { @@ -767,6 +762,10 @@ check_entry_size_and_hooks(struct ip6t_entry *e, return -EINVAL; } + err = check_entry(e); + if (err) + return err; + /* Check hooks & underflows */ for (h = 0; h < NF_INET_NUMHOOKS; h++) { if (!(valid_hooks & (1 << h))) @@ -1518,7 +1517,7 @@ check_compat_entry_size_and_hooks(struct compat_ip6t_entry *e, } /* For purposes of check_entry casting the compat entry is fine */ - ret = check_entry((struct ip6t_entry *)e, name); + ret = check_entry((struct ip6t_entry *)e); if (ret) return ret; From 6e94e0cfb0887e4013b3b930fa6ab1fe6bb6ba91 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 22 Mar 2016 18:02:50 +0100 Subject: [PATCH 31/73] netfilter: x_tables: make sure e->next_offset covers remaining blob size Otherwise this function may read data beyond the ruleset blob. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/ipv4/netfilter/arp_tables.c | 6 ++++-- net/ipv4/netfilter/ip_tables.c | 6 ++++-- net/ipv6/netfilter/ip6_tables.c | 6 ++++-- 3 files changed, 12 insertions(+), 6 deletions(-) diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index 830bbe8ec13d..51d4fe56b807 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -573,7 +573,8 @@ static inline int check_entry_size_and_hooks(struct arpt_entry *e, int err; if ((unsigned long)e % __alignof__(struct arpt_entry) != 0 || - (unsigned char *)e + sizeof(struct arpt_entry) >= limit) { + (unsigned char *)e + sizeof(struct arpt_entry) >= limit || + (unsigned char *)e + e->next_offset > limit) { duprintf("Bad offset %p\n", e); return -EINVAL; } @@ -1232,7 +1233,8 @@ check_compat_entry_size_and_hooks(struct compat_arpt_entry *e, duprintf("check_compat_entry_size_and_hooks %p\n", e); if ((unsigned long)e % __alignof__(struct compat_arpt_entry) != 0 || - (unsigned char *)e + sizeof(struct compat_arpt_entry) >= limit) { + (unsigned char *)e + sizeof(struct compat_arpt_entry) >= limit || + (unsigned char *)e + e->next_offset > limit) { duprintf("Bad offset %p, limit = %p\n", e, limit); return -EINVAL; } diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index 1d72a3c4a7e7..fb7694e6663e 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -738,7 +738,8 @@ check_entry_size_and_hooks(struct ipt_entry *e, int err; if ((unsigned long)e % __alignof__(struct ipt_entry) != 0 || - (unsigned char *)e + sizeof(struct ipt_entry) >= limit) { + (unsigned char *)e + sizeof(struct ipt_entry) >= limit || + (unsigned char *)e + e->next_offset > limit) { duprintf("Bad offset %p\n", e); return -EINVAL; } @@ -1492,7 +1493,8 @@ check_compat_entry_size_and_hooks(struct compat_ipt_entry *e, duprintf("check_compat_entry_size_and_hooks %p\n", e); if ((unsigned long)e % __alignof__(struct compat_ipt_entry) != 0 || - (unsigned char *)e + sizeof(struct compat_ipt_entry) >= limit) { + (unsigned char *)e + sizeof(struct compat_ipt_entry) >= limit || + (unsigned char *)e + e->next_offset > limit) { duprintf("Bad offset %p, limit = %p\n", e, limit); return -EINVAL; } diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index 26a5ad1cc4fd..b248528f2a17 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -750,7 +750,8 @@ check_entry_size_and_hooks(struct ip6t_entry *e, int err; if ((unsigned long)e % __alignof__(struct ip6t_entry) != 0 || - (unsigned char *)e + sizeof(struct ip6t_entry) >= limit) { + (unsigned char *)e + sizeof(struct ip6t_entry) >= limit || + (unsigned char *)e + e->next_offset > limit) { duprintf("Bad offset %p\n", e); return -EINVAL; } @@ -1504,7 +1505,8 @@ check_compat_entry_size_and_hooks(struct compat_ip6t_entry *e, duprintf("check_compat_entry_size_and_hooks %p\n", e); if ((unsigned long)e % __alignof__(struct compat_ip6t_entry) != 0 || - (unsigned char *)e + sizeof(struct compat_ip6t_entry) >= limit) { + (unsigned char *)e + sizeof(struct compat_ip6t_entry) >= limit || + (unsigned char *)e + e->next_offset > limit) { duprintf("Bad offset %p, limit = %p\n", e, limit); return -EINVAL; } From 54d83fc74aa9ec72794373cb47432c5f7fb1a309 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 22 Mar 2016 18:02:52 +0100 Subject: [PATCH 32/73] netfilter: x_tables: fix unconditional helper Ben Hawkes says: In the mark_source_chains function (net/ipv4/netfilter/ip_tables.c) it is possible for a user-supplied ipt_entry structure to have a large next_offset field. This field is not bounds checked prior to writing a counter value at the supplied offset. Problem is that mark_source_chains should not have been called -- the rule doesn't have a next entry, so its supposed to return an absolute verdict of either ACCEPT or DROP. However, the function conditional() doesn't work as the name implies. It only checks that the rule is using wildcard address matching. However, an unconditional rule must also not be using any matches (no -m args). The underflow validator only checked the addresses, therefore passing the 'unconditional absolute verdict' test, while mark_source_chains also tested for presence of matches, and thus proceeeded to the next (not-existent) rule. Unify this so that all the callers have same idea of 'unconditional rule'. Reported-by: Ben Hawkes Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/ipv4/netfilter/arp_tables.c | 18 +++++++++--------- net/ipv4/netfilter/ip_tables.c | 23 +++++++++++------------ net/ipv6/netfilter/ip6_tables.c | 23 +++++++++++------------ 3 files changed, 31 insertions(+), 33 deletions(-) diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index 51d4fe56b807..a1bb5e7129a2 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -359,11 +359,12 @@ unsigned int arpt_do_table(struct sk_buff *skb, } /* All zeroes == unconditional rule. */ -static inline bool unconditional(const struct arpt_arp *arp) +static inline bool unconditional(const struct arpt_entry *e) { static const struct arpt_arp uncond; - return memcmp(arp, &uncond, sizeof(uncond)) == 0; + return e->target_offset == sizeof(struct arpt_entry) && + memcmp(&e->arp, &uncond, sizeof(uncond)) == 0; } /* Figures out from what hook each rule can be called: returns 0 if @@ -402,11 +403,10 @@ static int mark_source_chains(const struct xt_table_info *newinfo, |= ((1 << hook) | (1 << NF_ARP_NUMHOOKS)); /* Unconditional return/END. */ - if ((e->target_offset == sizeof(struct arpt_entry) && + if ((unconditional(e) && (strcmp(t->target.u.user.name, XT_STANDARD_TARGET) == 0) && - t->verdict < 0 && unconditional(&e->arp)) || - visited) { + t->verdict < 0) || visited) { unsigned int oldpos, size; if ((strcmp(t->target.u.user.name, @@ -551,7 +551,7 @@ static bool check_underflow(const struct arpt_entry *e) const struct xt_entry_target *t; unsigned int verdict; - if (!unconditional(&e->arp)) + if (!unconditional(e)) return false; t = arpt_get_target_c(e); if (strcmp(t->u.user.name, XT_STANDARD_TARGET) != 0) @@ -598,9 +598,9 @@ static inline int check_entry_size_and_hooks(struct arpt_entry *e, newinfo->hook_entry[h] = hook_entries[h]; if ((unsigned char *)e - base == underflows[h]) { if (!check_underflow(e)) { - pr_err("Underflows must be unconditional and " - "use the STANDARD target with " - "ACCEPT/DROP\n"); + pr_debug("Underflows must be unconditional and " + "use the STANDARD target with " + "ACCEPT/DROP\n"); return -EINVAL; } newinfo->underflow[h] = underflows[h]; diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index fb7694e6663e..89b5d959eda3 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -168,11 +168,12 @@ get_entry(const void *base, unsigned int offset) /* All zeroes == unconditional rule. */ /* Mildly perf critical (only if packet tracing is on) */ -static inline bool unconditional(const struct ipt_ip *ip) +static inline bool unconditional(const struct ipt_entry *e) { static const struct ipt_ip uncond; - return memcmp(ip, &uncond, sizeof(uncond)) == 0; + return e->target_offset == sizeof(struct ipt_entry) && + memcmp(&e->ip, &uncond, sizeof(uncond)) == 0; #undef FWINV } @@ -229,11 +230,10 @@ get_chainname_rulenum(const struct ipt_entry *s, const struct ipt_entry *e, } else if (s == e) { (*rulenum)++; - if (s->target_offset == sizeof(struct ipt_entry) && + if (unconditional(s) && strcmp(t->target.u.kernel.target->name, XT_STANDARD_TARGET) == 0 && - t->verdict < 0 && - unconditional(&s->ip)) { + t->verdict < 0) { /* Tail of chains: STANDARD target (return/policy) */ *comment = *chainname == hookname ? comments[NF_IP_TRACE_COMMENT_POLICY] @@ -476,11 +476,10 @@ mark_source_chains(const struct xt_table_info *newinfo, e->comefrom |= ((1 << hook) | (1 << NF_INET_NUMHOOKS)); /* Unconditional return/END. */ - if ((e->target_offset == sizeof(struct ipt_entry) && + if ((unconditional(e) && (strcmp(t->target.u.user.name, XT_STANDARD_TARGET) == 0) && - t->verdict < 0 && unconditional(&e->ip)) || - visited) { + t->verdict < 0) || visited) { unsigned int oldpos, size; if ((strcmp(t->target.u.user.name, @@ -715,7 +714,7 @@ static bool check_underflow(const struct ipt_entry *e) const struct xt_entry_target *t; unsigned int verdict; - if (!unconditional(&e->ip)) + if (!unconditional(e)) return false; t = ipt_get_target_c(e); if (strcmp(t->u.user.name, XT_STANDARD_TARGET) != 0) @@ -763,9 +762,9 @@ check_entry_size_and_hooks(struct ipt_entry *e, newinfo->hook_entry[h] = hook_entries[h]; if ((unsigned char *)e - base == underflows[h]) { if (!check_underflow(e)) { - pr_err("Underflows must be unconditional and " - "use the STANDARD target with " - "ACCEPT/DROP\n"); + pr_debug("Underflows must be unconditional and " + "use the STANDARD target with " + "ACCEPT/DROP\n"); return -EINVAL; } newinfo->underflow[h] = underflows[h]; diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index b248528f2a17..541b59f83595 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -198,11 +198,12 @@ get_entry(const void *base, unsigned int offset) /* All zeroes == unconditional rule. */ /* Mildly perf critical (only if packet tracing is on) */ -static inline bool unconditional(const struct ip6t_ip6 *ipv6) +static inline bool unconditional(const struct ip6t_entry *e) { static const struct ip6t_ip6 uncond; - return memcmp(ipv6, &uncond, sizeof(uncond)) == 0; + return e->target_offset == sizeof(struct ip6t_entry) && + memcmp(&e->ipv6, &uncond, sizeof(uncond)) == 0; } static inline const struct xt_entry_target * @@ -258,11 +259,10 @@ get_chainname_rulenum(const struct ip6t_entry *s, const struct ip6t_entry *e, } else if (s == e) { (*rulenum)++; - if (s->target_offset == sizeof(struct ip6t_entry) && + if (unconditional(s) && strcmp(t->target.u.kernel.target->name, XT_STANDARD_TARGET) == 0 && - t->verdict < 0 && - unconditional(&s->ipv6)) { + t->verdict < 0) { /* Tail of chains: STANDARD target (return/policy) */ *comment = *chainname == hookname ? comments[NF_IP6_TRACE_COMMENT_POLICY] @@ -488,11 +488,10 @@ mark_source_chains(const struct xt_table_info *newinfo, e->comefrom |= ((1 << hook) | (1 << NF_INET_NUMHOOKS)); /* Unconditional return/END. */ - if ((e->target_offset == sizeof(struct ip6t_entry) && + if ((unconditional(e) && (strcmp(t->target.u.user.name, XT_STANDARD_TARGET) == 0) && - t->verdict < 0 && - unconditional(&e->ipv6)) || visited) { + t->verdict < 0) || visited) { unsigned int oldpos, size; if ((strcmp(t->target.u.user.name, @@ -727,7 +726,7 @@ static bool check_underflow(const struct ip6t_entry *e) const struct xt_entry_target *t; unsigned int verdict; - if (!unconditional(&e->ipv6)) + if (!unconditional(e)) return false; t = ip6t_get_target_c(e); if (strcmp(t->u.user.name, XT_STANDARD_TARGET) != 0) @@ -775,9 +774,9 @@ check_entry_size_and_hooks(struct ip6t_entry *e, newinfo->hook_entry[h] = hook_entries[h]; if ((unsigned char *)e - base == underflows[h]) { if (!check_underflow(e)) { - pr_err("Underflows must be unconditional and " - "use the STANDARD target with " - "ACCEPT/DROP\n"); + pr_debug("Underflows must be unconditional and " + "use the STANDARD target with " + "ACCEPT/DROP\n"); return -EINVAL; } newinfo->underflow[h] = underflows[h]; From 931401137f60fc299256bbc221c0b756be31c32c Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Wed, 23 Mar 2016 12:58:01 +0100 Subject: [PATCH 33/73] netfilter: nfnetlink_queue: honor NFQA_CFG_F_FAIL_OPEN when netlink unicast fails When netlink unicast fails to deliver the message to userspace, we should also check if the NFQA_CFG_F_FAIL_OPEN flag is set so we reinject the packet back to the stack. I think the user expects no packet drops when this flag is set due to queueing to userspace errors, no matter if related to the internal queue or when sending the netlink message to userspace. The userspace application will still get the ENOBUFS error via recvmsg() so the user still knows that, with the current configuration that is in place, the userspace application is not consuming the messages at the pace that the kernel needs. Reported-by: "Yigal Reiss (yreiss)" Signed-off-by: Pablo Neira Ayuso Tested-by: "Yigal Reiss (yreiss)" --- net/netfilter/nfnetlink_queue.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c index 75429997ed41..cb5b630a645b 100644 --- a/net/netfilter/nfnetlink_queue.c +++ b/net/netfilter/nfnetlink_queue.c @@ -582,7 +582,12 @@ __nfqnl_enqueue_packet(struct net *net, struct nfqnl_instance *queue, /* nfnetlink_unicast will either free the nskb or add it to a socket */ err = nfnetlink_unicast(nskb, net, queue->peer_portid, MSG_DONTWAIT); if (err < 0) { - queue->queue_user_dropped++; + if (queue->flags & NFQA_CFG_F_FAIL_OPEN) { + failopen = 1; + err = 0; + } else { + queue->queue_user_dropped++; + } goto err_out_unlock; } From b301f2538759933cf9ff1f7c4f968da72e3f0757 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Thu, 24 Mar 2016 21:29:53 +0100 Subject: [PATCH 34/73] netfilter: x_tables: enforce nul-terminated table name from getsockopt GET_ENTRIES Make sure the table names via getsockopt GET_ENTRIES is nul-terminated in ebtables and all the x_tables variants and their respective compat code. Uncovered by KASAN. Reported-by: Baozeng Ding Signed-off-by: Pablo Neira Ayuso --- net/bridge/netfilter/ebtables.c | 4 ++++ net/ipv4/netfilter/arp_tables.c | 2 ++ net/ipv4/netfilter/ip_tables.c | 2 ++ net/ipv6/netfilter/ip6_tables.c | 2 ++ 4 files changed, 10 insertions(+) diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c index 67b2e27999aa..8570bc7744c2 100644 --- a/net/bridge/netfilter/ebtables.c +++ b/net/bridge/netfilter/ebtables.c @@ -1521,6 +1521,8 @@ static int do_ebt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) if (copy_from_user(&tmp, user, sizeof(tmp))) return -EFAULT; + tmp.name[sizeof(tmp.name) - 1] = '\0'; + t = find_table_lock(net, tmp.name, &ret, &ebt_mutex); if (!t) return ret; @@ -2332,6 +2334,8 @@ static int compat_do_ebt_get_ctl(struct sock *sk, int cmd, if (copy_from_user(&tmp, user, sizeof(tmp))) return -EFAULT; + tmp.name[sizeof(tmp.name) - 1] = '\0'; + t = find_table_lock(net, tmp.name, &ret, &ebt_mutex); if (!t) return ret; diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index a1bb5e7129a2..4133b0f513af 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -969,6 +969,7 @@ static int get_entries(struct net *net, struct arpt_get_entries __user *uptr, sizeof(struct arpt_get_entries) + get.size); return -EINVAL; } + get.name[sizeof(get.name) - 1] = '\0'; t = xt_find_table_lock(net, NFPROTO_ARP, get.name); if (!IS_ERR_OR_NULL(t)) { @@ -1663,6 +1664,7 @@ static int compat_get_entries(struct net *net, *len, sizeof(get) + get.size); return -EINVAL; } + get.name[sizeof(get.name) - 1] = '\0'; xt_compat_lock(NFPROTO_ARP); t = xt_find_table_lock(net, NFPROTO_ARP, get.name); diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index 89b5d959eda3..631c100a1338 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -1156,6 +1156,7 @@ get_entries(struct net *net, struct ipt_get_entries __user *uptr, *len, sizeof(get) + get.size); return -EINVAL; } + get.name[sizeof(get.name) - 1] = '\0'; t = xt_find_table_lock(net, AF_INET, get.name); if (!IS_ERR_OR_NULL(t)) { @@ -1935,6 +1936,7 @@ compat_get_entries(struct net *net, struct compat_ipt_get_entries __user *uptr, *len, sizeof(get) + get.size); return -EINVAL; } + get.name[sizeof(get.name) - 1] = '\0'; xt_compat_lock(AF_INET); t = xt_find_table_lock(net, AF_INET, get.name); diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index 541b59f83595..86b67b70b626 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -1168,6 +1168,7 @@ get_entries(struct net *net, struct ip6t_get_entries __user *uptr, *len, sizeof(get) + get.size); return -EINVAL; } + get.name[sizeof(get.name) - 1] = '\0'; t = xt_find_table_lock(net, AF_INET6, get.name); if (!IS_ERR_OR_NULL(t)) { @@ -1944,6 +1945,7 @@ compat_get_entries(struct net *net, struct compat_ip6t_get_entries __user *uptr, *len, sizeof(get) + get.size); return -EINVAL; } + get.name[sizeof(get.name) - 1] = '\0'; xt_compat_lock(AF_INET6); t = xt_find_table_lock(net, AF_INET6, get.name); From 29421198c3a860092e27c2ad8499dfe603398817 Mon Sep 17 00:00:00 2001 From: Liping Zhang Date: Sat, 26 Mar 2016 16:32:57 +0800 Subject: [PATCH 35/73] netfilter: ipv4: fix NULL dereference Commit fa50d974d104 ("ipv4: Namespaceify ip_default_ttl sysctl knob") use sock_net(skb->sk) to get the net namespace, but we can't assume that sk_buff->sk is always exist, so when it is NULL, oops will happen. Signed-off-by: Liping Zhang Reviewed-by: Nikolay Borisov Signed-off-by: Pablo Neira Ayuso --- net/bridge/netfilter/nft_reject_bridge.c | 20 ++++----- net/ipv4/netfilter/ipt_SYNPROXY.c | 54 ++++++++++++------------ 2 files changed, 38 insertions(+), 36 deletions(-) diff --git a/net/bridge/netfilter/nft_reject_bridge.c b/net/bridge/netfilter/nft_reject_bridge.c index adc8d7221dbb..77f7e7a9ebe1 100644 --- a/net/bridge/netfilter/nft_reject_bridge.c +++ b/net/bridge/netfilter/nft_reject_bridge.c @@ -40,7 +40,8 @@ static void nft_reject_br_push_etherhdr(struct sk_buff *oldskb, /* We cannot use oldskb->dev, it can be either bridge device (NF_BRIDGE INPUT) * or the bridge port (NF_BRIDGE PREROUTING). */ -static void nft_reject_br_send_v4_tcp_reset(struct sk_buff *oldskb, +static void nft_reject_br_send_v4_tcp_reset(struct net *net, + struct sk_buff *oldskb, const struct net_device *dev, int hook) { @@ -48,7 +49,6 @@ static void nft_reject_br_send_v4_tcp_reset(struct sk_buff *oldskb, struct iphdr *niph; const struct tcphdr *oth; struct tcphdr _oth; - struct net *net = sock_net(oldskb->sk); if (!nft_bridge_iphdr_validate(oldskb)) return; @@ -75,7 +75,8 @@ static void nft_reject_br_send_v4_tcp_reset(struct sk_buff *oldskb, br_deliver(br_port_get_rcu(dev), nskb); } -static void nft_reject_br_send_v4_unreach(struct sk_buff *oldskb, +static void nft_reject_br_send_v4_unreach(struct net *net, + struct sk_buff *oldskb, const struct net_device *dev, int hook, u8 code) { @@ -86,7 +87,6 @@ static void nft_reject_br_send_v4_unreach(struct sk_buff *oldskb, void *payload; __wsum csum; u8 proto; - struct net *net = sock_net(oldskb->sk); if (oldskb->csum_bad || !nft_bridge_iphdr_validate(oldskb)) return; @@ -273,17 +273,17 @@ static void nft_reject_bridge_eval(const struct nft_expr *expr, case htons(ETH_P_IP): switch (priv->type) { case NFT_REJECT_ICMP_UNREACH: - nft_reject_br_send_v4_unreach(pkt->skb, pkt->in, - pkt->hook, + nft_reject_br_send_v4_unreach(pkt->net, pkt->skb, + pkt->in, pkt->hook, priv->icmp_code); break; case NFT_REJECT_TCP_RST: - nft_reject_br_send_v4_tcp_reset(pkt->skb, pkt->in, - pkt->hook); + nft_reject_br_send_v4_tcp_reset(pkt->net, pkt->skb, + pkt->in, pkt->hook); break; case NFT_REJECT_ICMPX_UNREACH: - nft_reject_br_send_v4_unreach(pkt->skb, pkt->in, - pkt->hook, + nft_reject_br_send_v4_unreach(pkt->net, pkt->skb, + pkt->in, pkt->hook, nft_reject_icmp_code(priv->icmp_code)); break; } diff --git a/net/ipv4/netfilter/ipt_SYNPROXY.c b/net/ipv4/netfilter/ipt_SYNPROXY.c index 7b8fbb352877..db5b87509446 100644 --- a/net/ipv4/netfilter/ipt_SYNPROXY.c +++ b/net/ipv4/netfilter/ipt_SYNPROXY.c @@ -18,10 +18,10 @@ #include static struct iphdr * -synproxy_build_ip(struct sk_buff *skb, __be32 saddr, __be32 daddr) +synproxy_build_ip(struct net *net, struct sk_buff *skb, __be32 saddr, + __be32 daddr) { struct iphdr *iph; - struct net *net = sock_net(skb->sk); skb_reset_network_header(skb); iph = (struct iphdr *)skb_put(skb, sizeof(*iph)); @@ -40,14 +40,12 @@ synproxy_build_ip(struct sk_buff *skb, __be32 saddr, __be32 daddr) } static void -synproxy_send_tcp(const struct synproxy_net *snet, +synproxy_send_tcp(struct net *net, const struct sk_buff *skb, struct sk_buff *nskb, struct nf_conntrack *nfct, enum ip_conntrack_info ctinfo, struct iphdr *niph, struct tcphdr *nth, unsigned int tcp_hdr_size) { - struct net *net = nf_ct_net(snet->tmpl); - nth->check = ~tcp_v4_check(tcp_hdr_size, niph->saddr, niph->daddr, 0); nskb->ip_summed = CHECKSUM_PARTIAL; nskb->csum_start = (unsigned char *)nth - nskb->head; @@ -72,7 +70,7 @@ synproxy_send_tcp(const struct synproxy_net *snet, } static void -synproxy_send_client_synack(const struct synproxy_net *snet, +synproxy_send_client_synack(struct net *net, const struct sk_buff *skb, const struct tcphdr *th, const struct synproxy_options *opts) { @@ -91,7 +89,7 @@ synproxy_send_client_synack(const struct synproxy_net *snet, return; skb_reserve(nskb, MAX_TCP_HEADER); - niph = synproxy_build_ip(nskb, iph->daddr, iph->saddr); + niph = synproxy_build_ip(net, nskb, iph->daddr, iph->saddr); skb_reset_transport_header(nskb); nth = (struct tcphdr *)skb_put(nskb, tcp_hdr_size); @@ -109,15 +107,16 @@ synproxy_send_client_synack(const struct synproxy_net *snet, synproxy_build_options(nth, opts); - synproxy_send_tcp(snet, skb, nskb, skb->nfct, IP_CT_ESTABLISHED_REPLY, + synproxy_send_tcp(net, skb, nskb, skb->nfct, IP_CT_ESTABLISHED_REPLY, niph, nth, tcp_hdr_size); } static void -synproxy_send_server_syn(const struct synproxy_net *snet, +synproxy_send_server_syn(struct net *net, const struct sk_buff *skb, const struct tcphdr *th, const struct synproxy_options *opts, u32 recv_seq) { + struct synproxy_net *snet = synproxy_pernet(net); struct sk_buff *nskb; struct iphdr *iph, *niph; struct tcphdr *nth; @@ -132,7 +131,7 @@ synproxy_send_server_syn(const struct synproxy_net *snet, return; skb_reserve(nskb, MAX_TCP_HEADER); - niph = synproxy_build_ip(nskb, iph->saddr, iph->daddr); + niph = synproxy_build_ip(net, nskb, iph->saddr, iph->daddr); skb_reset_transport_header(nskb); nth = (struct tcphdr *)skb_put(nskb, tcp_hdr_size); @@ -153,12 +152,12 @@ synproxy_send_server_syn(const struct synproxy_net *snet, synproxy_build_options(nth, opts); - synproxy_send_tcp(snet, skb, nskb, &snet->tmpl->ct_general, IP_CT_NEW, + synproxy_send_tcp(net, skb, nskb, &snet->tmpl->ct_general, IP_CT_NEW, niph, nth, tcp_hdr_size); } static void -synproxy_send_server_ack(const struct synproxy_net *snet, +synproxy_send_server_ack(struct net *net, const struct ip_ct_tcp *state, const struct sk_buff *skb, const struct tcphdr *th, const struct synproxy_options *opts) @@ -177,7 +176,7 @@ synproxy_send_server_ack(const struct synproxy_net *snet, return; skb_reserve(nskb, MAX_TCP_HEADER); - niph = synproxy_build_ip(nskb, iph->daddr, iph->saddr); + niph = synproxy_build_ip(net, nskb, iph->daddr, iph->saddr); skb_reset_transport_header(nskb); nth = (struct tcphdr *)skb_put(nskb, tcp_hdr_size); @@ -193,11 +192,11 @@ synproxy_send_server_ack(const struct synproxy_net *snet, synproxy_build_options(nth, opts); - synproxy_send_tcp(snet, skb, nskb, NULL, 0, niph, nth, tcp_hdr_size); + synproxy_send_tcp(net, skb, nskb, NULL, 0, niph, nth, tcp_hdr_size); } static void -synproxy_send_client_ack(const struct synproxy_net *snet, +synproxy_send_client_ack(struct net *net, const struct sk_buff *skb, const struct tcphdr *th, const struct synproxy_options *opts) { @@ -215,7 +214,7 @@ synproxy_send_client_ack(const struct synproxy_net *snet, return; skb_reserve(nskb, MAX_TCP_HEADER); - niph = synproxy_build_ip(nskb, iph->saddr, iph->daddr); + niph = synproxy_build_ip(net, nskb, iph->saddr, iph->daddr); skb_reset_transport_header(nskb); nth = (struct tcphdr *)skb_put(nskb, tcp_hdr_size); @@ -231,15 +230,16 @@ synproxy_send_client_ack(const struct synproxy_net *snet, synproxy_build_options(nth, opts); - synproxy_send_tcp(snet, skb, nskb, skb->nfct, IP_CT_ESTABLISHED_REPLY, + synproxy_send_tcp(net, skb, nskb, skb->nfct, IP_CT_ESTABLISHED_REPLY, niph, nth, tcp_hdr_size); } static bool -synproxy_recv_client_ack(const struct synproxy_net *snet, +synproxy_recv_client_ack(struct net *net, const struct sk_buff *skb, const struct tcphdr *th, struct synproxy_options *opts, u32 recv_seq) { + struct synproxy_net *snet = synproxy_pernet(net); int mss; mss = __cookie_v4_check(ip_hdr(skb), th, ntohl(th->ack_seq) - 1); @@ -255,7 +255,7 @@ synproxy_recv_client_ack(const struct synproxy_net *snet, if (opts->options & XT_SYNPROXY_OPT_TIMESTAMP) synproxy_check_timestamp_cookie(opts); - synproxy_send_server_syn(snet, skb, th, opts, recv_seq); + synproxy_send_server_syn(net, skb, th, opts, recv_seq); return true; } @@ -263,7 +263,8 @@ static unsigned int synproxy_tg4(struct sk_buff *skb, const struct xt_action_param *par) { const struct xt_synproxy_info *info = par->targinfo; - struct synproxy_net *snet = synproxy_pernet(par->net); + struct net *net = par->net; + struct synproxy_net *snet = synproxy_pernet(net); struct synproxy_options opts = {}; struct tcphdr *th, _th; @@ -292,12 +293,12 @@ synproxy_tg4(struct sk_buff *skb, const struct xt_action_param *par) XT_SYNPROXY_OPT_SACK_PERM | XT_SYNPROXY_OPT_ECN); - synproxy_send_client_synack(snet, skb, th, &opts); + synproxy_send_client_synack(net, skb, th, &opts); return NF_DROP; } else if (th->ack && !(th->fin || th->rst || th->syn)) { /* ACK from client */ - synproxy_recv_client_ack(snet, skb, th, &opts, ntohl(th->seq)); + synproxy_recv_client_ack(net, skb, th, &opts, ntohl(th->seq)); return NF_DROP; } @@ -308,7 +309,8 @@ static unsigned int ipv4_synproxy_hook(void *priv, struct sk_buff *skb, const struct nf_hook_state *nhs) { - struct synproxy_net *snet = synproxy_pernet(nhs->net); + struct net *net = nhs->net; + struct synproxy_net *snet = synproxy_pernet(net); enum ip_conntrack_info ctinfo; struct nf_conn *ct; struct nf_conn_synproxy *synproxy; @@ -365,7 +367,7 @@ static unsigned int ipv4_synproxy_hook(void *priv, * therefore we need to add 1 to make the SYN sequence * number match the one of first SYN. */ - if (synproxy_recv_client_ack(snet, skb, th, &opts, + if (synproxy_recv_client_ack(net, skb, th, &opts, ntohl(th->seq) + 1)) this_cpu_inc(snet->stats->cookie_retrans); @@ -391,12 +393,12 @@ static unsigned int ipv4_synproxy_hook(void *priv, XT_SYNPROXY_OPT_SACK_PERM); swap(opts.tsval, opts.tsecr); - synproxy_send_server_ack(snet, state, skb, th, &opts); + synproxy_send_server_ack(net, state, skb, th, &opts); nf_ct_seqadj_init(ct, ctinfo, synproxy->isn - ntohl(th->seq)); swap(opts.tsval, opts.tsecr); - synproxy_send_client_ack(snet, skb, th, &opts); + synproxy_send_client_ack(net, skb, th, &opts); consume_skb(skb); return NF_STOLEN; From e84810c7b85a2d7897797b3ad3e879168a8e032a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B8rn=20Mork?= Date: Mon, 28 Mar 2016 22:38:16 +0200 Subject: [PATCH 36/73] qmi_wwan: add "D-Link DWM-221 B1" device id MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Thomas reports: "Windows: 00 diagnostics 01 modem 02 at-port 03 nmea 04 nic Linux: T: Bus=02 Lev=01 Prnt=01 Port=03 Cnt=01 Dev#= 4 Spd=480 MxCh= 0 D: Ver= 2.00 Cls=00(>ifc ) Sub=00 Prot=00 MxPS=64 #Cfgs= 1 P: Vendor=2001 ProdID=7e19 Rev=02.32 S: Manufacturer=Mobile Connect S: Product=Mobile Connect S: SerialNumber=0123456789ABCDEF C: #Ifs= 6 Cfg#= 1 Atr=a0 MxPwr=500mA I: If#= 0 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=ff Driver=option I: If#= 1 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option I: If#= 2 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option I: If#= 3 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option I: If#= 4 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=ff Driver=qmi_wwan I: If#= 5 Alt= 0 #EPs= 2 Cls=08(stor.) Sub=06 Prot=50 Driver=usb-storage" Reported-by: Thomas Schäfer Signed-off-by: Bjørn Mork Signed-off-by: David S. Miller --- drivers/net/usb/qmi_wwan.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c index 7d717c66bcb0..9d1fce8a6e84 100644 --- a/drivers/net/usb/qmi_wwan.c +++ b/drivers/net/usb/qmi_wwan.c @@ -844,6 +844,7 @@ static const struct usb_device_id products[] = { {QMI_FIXED_INTF(0x19d2, 0x1426, 2)}, /* ZTE MF91 */ {QMI_FIXED_INTF(0x19d2, 0x1428, 2)}, /* Telewell TW-LTE 4G v2 */ {QMI_FIXED_INTF(0x19d2, 0x2002, 4)}, /* ZTE (Vodafone) K3765-Z */ + {QMI_FIXED_INTF(0x2001, 0x7e19, 4)}, /* D-Link DWM-221 B1 */ {QMI_FIXED_INTF(0x0f3d, 0x68a2, 8)}, /* Sierra Wireless MC7700 */ {QMI_FIXED_INTF(0x114f, 0x68a2, 8)}, /* Sierra Wireless MC7750 */ {QMI_FIXED_INTF(0x1199, 0x68a2, 8)}, /* Sierra Wireless MC7710 in QMI mode */ From 56768045186c183f1d6e5cd916dd07751a777a8d Mon Sep 17 00:00:00 2001 From: Tushar Dave Date: Thu, 7 Jan 2016 14:17:03 -0800 Subject: [PATCH 37/73] ixgbe: Fix for RAR0 not being set to default MAC addr commit c9f53e63c208 ("ixgbe: Refactor MAC address configuration code") introduced code that doesn't set HW register RAR0 to default mac address but FF:FF:FF:FF:FF:FF. Due to this, ixgbe HW discards all incoming packets that doesn't have destination mac address equals to FF:FF:FF:FF:FF:FF. This commit sets RAR0 correctly to default HW mac address. Signed-off-by: Tushar Dave Tested-by: Sowmini Varadhan Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index 569cb0757c93..f19327325675 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -9329,6 +9329,8 @@ static int ixgbe_probe(struct pci_dev *pdev, const struct pci_device_id *ent) goto err_sw_init; } + /* Set hw->mac.addr to permanent MAC address */ + ether_addr_copy(hw->mac.addr, hw->mac.perm_addr); ixgbe_mac_set_default_filter(adapter); setup_timer(&adapter->service_timer, &ixgbe_service_timer, From 9f12df906cd807a05d71aa53a951532d1dd3b888 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Mon, 25 Jan 2016 19:36:29 -0800 Subject: [PATCH 38/73] ixgbe: Store VXLAN port number in network order The VXLAN port number should be stored in network order instead of in host order as it is accessed from the hot-path in ATR. This way we can avoid having to do any byte swaps in order to validate the port number. I moved the vxlan_port value into a hole in the read-mostly region of the adapter struct. This way it should be in a warm cache-line instead of in some isolated region in memory when it needs to be accessed. In addition I went through and stripped a bunch of unneeded ifdef flags since having an extra variable present doesn't really hurt anything and makes the code easier to read. I also went through and dropped the NETIF_F_RXCSUM flag which was being set in hw_encap_features but provides no value as the flag is not evaluated in the Rx path. Signed-off-by: Alexander Duyck Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/ixgbe/ixgbe.h | 8 +-- drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 61 ++++++------------- 2 files changed, 20 insertions(+), 49 deletions(-) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe.h b/drivers/net/ethernet/intel/ixgbe/ixgbe.h index 84fa28ceb200..458549c06a4f 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe.h +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe.h @@ -661,9 +661,7 @@ struct ixgbe_adapter { #define IXGBE_FLAG2_RSS_FIELD_IPV6_UDP (u32)(1 << 9) #define IXGBE_FLAG2_PTP_PPS_ENABLED (u32)(1 << 10) #define IXGBE_FLAG2_PHY_INTERRUPT (u32)(1 << 11) -#ifdef CONFIG_IXGBE_VXLAN #define IXGBE_FLAG2_VXLAN_REREG_NEEDED BIT(12) -#endif #define IXGBE_FLAG2_VLAN_PROMISC BIT(13) /* Tx fast path data */ @@ -675,6 +673,9 @@ struct ixgbe_adapter { int num_rx_queues; u16 rx_itr_setting; + /* Port number used to identify VXLAN traffic */ + __be16 vxlan_port; + /* TX */ struct ixgbe_ring *tx_ring[MAX_TX_QUEUES] ____cacheline_aligned_in_smp; @@ -782,9 +783,6 @@ struct ixgbe_adapter { u32 timer_event_accumulator; u32 vferr_refcount; struct ixgbe_mac_addr *mac_table; -#ifdef CONFIG_IXGBE_VXLAN - u16 vxlan_port; -#endif struct kobject *info_kobj; #ifdef CONFIG_IXGBE_HWMON struct hwmon_buff *ixgbe_hwmon_buff; diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index f19327325675..f67c9a6429ac 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -4531,9 +4531,7 @@ static void ixgbe_clear_vxlan_port(struct ixgbe_adapter *adapter) case ixgbe_mac_X550: case ixgbe_mac_X550EM_x: IXGBE_WRITE_REG(&adapter->hw, IXGBE_VXLANCTRL, 0); -#ifdef CONFIG_IXGBE_VXLAN adapter->vxlan_port = 0; -#endif break; default: break; @@ -7561,9 +7559,6 @@ static void ixgbe_atr(struct ixgbe_ring *ring, } hdr; struct tcphdr *th; struct sk_buff *skb; -#ifdef CONFIG_IXGBE_VXLAN - u8 encap = false; -#endif /* CONFIG_IXGBE_VXLAN */ __be16 vlan_id; /* if ring doesn't have a interrupt vector, cannot perform ATR */ @@ -7579,28 +7574,21 @@ static void ixgbe_atr(struct ixgbe_ring *ring, /* snag network header to get L4 type and address */ skb = first->skb; hdr.network = skb_network_header(skb); - if (!skb->encapsulation) { - th = tcp_hdr(skb); - } else { + th = tcp_hdr(skb); #ifdef CONFIG_IXGBE_VXLAN + if (skb->encapsulation && + first->protocol == htons(ETH_P_IP) && + hdr.ipv4->protocol != IPPROTO_UDP) { struct ixgbe_adapter *adapter = q_vector->adapter; - if (!adapter->vxlan_port) - return; - if (first->protocol != htons(ETH_P_IP) || - hdr.ipv4->version != IPVERSION || - hdr.ipv4->protocol != IPPROTO_UDP) { - return; + /* verify the port is recognized as VXLAN */ + if (adapter->vxlan_port && + udp_hdr(skb)->dest == adapter->vxlan_port) { + hdr.network = skb_inner_network_header(skb); + th = inner_tcp_hdr(skb); } - if (ntohs(udp_hdr(skb)->dest) != adapter->vxlan_port) - return; - encap = true; - hdr.network = skb_inner_network_header(skb); - th = inner_tcp_hdr(skb); -#else - return; -#endif /* CONFIG_IXGBE_VXLAN */ } +#endif /* CONFIG_IXGBE_VXLAN */ /* Currently only IPv4/IPv6 with TCP is supported */ switch (hdr.ipv4->version) { @@ -7682,10 +7670,8 @@ static void ixgbe_atr(struct ixgbe_ring *ring, break; } -#ifdef CONFIG_IXGBE_VXLAN - if (encap) + if (hdr.network != skb_network_header(skb)) input.formatted.flow_type |= IXGBE_ATR_L4TYPE_TUNNEL_MASK; -#endif /* CONFIG_IXGBE_VXLAN */ /* This assumes the Rx queue and Tx queue are bound to the same CPU */ ixgbe_fdir_add_signature_filter_82599(&q_vector->adapter->hw, @@ -8554,7 +8540,6 @@ static void ixgbe_add_vxlan_port(struct net_device *dev, sa_family_t sa_family, { struct ixgbe_adapter *adapter = netdev_priv(dev); struct ixgbe_hw *hw = &adapter->hw; - u16 new_port = ntohs(port); if (!(adapter->flags & IXGBE_FLAG_VXLAN_OFFLOAD_CAPABLE)) return; @@ -8562,18 +8547,18 @@ static void ixgbe_add_vxlan_port(struct net_device *dev, sa_family_t sa_family, if (sa_family == AF_INET6) return; - if (adapter->vxlan_port == new_port) + if (adapter->vxlan_port == port) return; if (adapter->vxlan_port) { netdev_info(dev, "Hit Max num of VXLAN ports, not adding port %d\n", - new_port); + ntohs(port)); return; } - adapter->vxlan_port = new_port; - IXGBE_WRITE_REG(hw, IXGBE_VXLANCTRL, new_port); + adapter->vxlan_port = port; + IXGBE_WRITE_REG(hw, IXGBE_VXLANCTRL, ntohs(port)); } /** @@ -8586,7 +8571,6 @@ static void ixgbe_del_vxlan_port(struct net_device *dev, sa_family_t sa_family, __be16 port) { struct ixgbe_adapter *adapter = netdev_priv(dev); - u16 new_port = ntohs(port); if (!(adapter->flags & IXGBE_FLAG_VXLAN_OFFLOAD_CAPABLE)) return; @@ -8594,9 +8578,9 @@ static void ixgbe_del_vxlan_port(struct net_device *dev, sa_family_t sa_family, if (sa_family == AF_INET6) return; - if (adapter->vxlan_port != new_port) { + if (adapter->vxlan_port != port) { netdev_info(dev, "Port %d was not found, not deleting\n", - new_port); + ntohs(port)); return; } @@ -9265,17 +9249,6 @@ static int ixgbe_probe(struct pci_dev *pdev, const struct pci_device_id *ent) netdev->priv_flags |= IFF_UNICAST_FLT; netdev->priv_flags |= IFF_SUPP_NOFCS; -#ifdef CONFIG_IXGBE_VXLAN - switch (adapter->hw.mac.type) { - case ixgbe_mac_X550: - case ixgbe_mac_X550EM_x: - netdev->hw_enc_features |= NETIF_F_RXCSUM; - break; - default: - break; - } -#endif /* CONFIG_IXGBE_VXLAN */ - #ifdef CONFIG_IXGBE_DCB netdev->dcbnl_ops = &dcbnl_ops; #endif From e2873d43f9c607e9d855b8ae120d5990ba1722df Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Mon, 25 Jan 2016 19:39:40 -0800 Subject: [PATCH 39/73] ixgbe: Fix ATR so that it correctly handles IPv6 extension headers The ATR code was assuming that it would be able to use tcp_hdr for every TCP frame that came through. However this isn't the case as it is possible for a frame to arrive that is TCP but sent through something like a raw socket. As a result the driver was setting up bad filters in which tcp_hdr was really pointing to the network header so the data was all invalid. In order to correct this I have added a bit of parsing logic that will determine the TCP header location based off of the network header and either the offset in the case of the IPv4 header, or a walk through the IPv6 extension headers until it encounters the header that indicates IPPROTO_TCP. In addition I have added checks to verify that the lowest protocol provided is recognized as IPv4 or IPv6 to help mitigate raw sockets using ETH_P_ALL from having ATR applied to them. Signed-off-by: Alexander Duyck Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 45 +++++++++---------- 1 file changed, 21 insertions(+), 24 deletions(-) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index f67c9a6429ac..ee81618bb9f0 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -7558,8 +7558,10 @@ static void ixgbe_atr(struct ixgbe_ring *ring, struct ipv6hdr *ipv6; } hdr; struct tcphdr *th; + unsigned int hlen; struct sk_buff *skb; __be16 vlan_id; + int l4_proto; /* if ring doesn't have a interrupt vector, cannot perform ATR */ if (!q_vector) @@ -7571,10 +7573,14 @@ static void ixgbe_atr(struct ixgbe_ring *ring, ring->atr_count++; + /* currently only IPv4/IPv6 with TCP is supported */ + if ((first->protocol != htons(ETH_P_IP)) && + (first->protocol != htons(ETH_P_IPV6))) + return; + /* snag network header to get L4 type and address */ skb = first->skb; hdr.network = skb_network_header(skb); - th = tcp_hdr(skb); #ifdef CONFIG_IXGBE_VXLAN if (skb->encapsulation && first->protocol == htons(ETH_P_IP) && @@ -7583,43 +7589,34 @@ static void ixgbe_atr(struct ixgbe_ring *ring, /* verify the port is recognized as VXLAN */ if (adapter->vxlan_port && - udp_hdr(skb)->dest == adapter->vxlan_port) { + udp_hdr(skb)->dest == adapter->vxlan_port) hdr.network = skb_inner_network_header(skb); - th = inner_tcp_hdr(skb); - } } #endif /* CONFIG_IXGBE_VXLAN */ /* Currently only IPv4/IPv6 with TCP is supported */ switch (hdr.ipv4->version) { case IPVERSION: - if (hdr.ipv4->protocol != IPPROTO_TCP) - return; + /* access ihl as u8 to avoid unaligned access on ia64 */ + hlen = (hdr.network[0] & 0x0F) << 2; + l4_proto = hdr.ipv4->protocol; break; case 6: - if (likely((unsigned char *)th - hdr.network == - sizeof(struct ipv6hdr))) { - if (hdr.ipv6->nexthdr != IPPROTO_TCP) - return; - } else { - __be16 frag_off; - u8 l4_hdr; - - ipv6_skip_exthdr(skb, hdr.network - skb->data + - sizeof(struct ipv6hdr), - &l4_hdr, &frag_off); - if (unlikely(frag_off)) - return; - if (l4_hdr != IPPROTO_TCP) - return; - } + hlen = hdr.network - skb->data; + l4_proto = ipv6_find_hdr(skb, &hlen, IPPROTO_TCP, NULL, NULL); + hlen -= hdr.network - skb->data; break; default: return; } - /* skip this packet since it is invalid or the socket is closing */ - if (!th || th->fin) + if (l4_proto != IPPROTO_TCP) + return; + + th = (struct tcphdr *)(hdr.network + hlen); + + /* skip this packet since the socket is closing */ + if (th->fin) return; /* sample on all syn packets or once every atr sample count */ From d90b5b0ec1c7a3099dee9c18071a5b714eb3de52 Mon Sep 17 00:00:00 2001 From: Mark Rustad Date: Fri, 29 Jan 2016 14:44:29 -0800 Subject: [PATCH 40/73] ixgbe: Use udelay to avoid sleeping while atomic Use udelay instead of usleep_range because this can be called while a lock is held. Signed-off-by: Mark Rustad Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c index 87aca3f7c3de..68a9c646498e 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c @@ -355,7 +355,7 @@ static s32 ixgbe_iosf_wait(struct ixgbe_hw *hw, u32 *ctrl) command = IXGBE_READ_REG(hw, IXGBE_SB_IOSF_INDIRECT_CTRL); if (!(command & IXGBE_SB_IOSF_CTRL_BUSY)) break; - usleep_range(10, 20); + udelay(10); } if (ctrl) *ctrl = command; From 6c211fe1e71e2bf7baa45a78ac5358a1f45a7fe9 Mon Sep 17 00:00:00 2001 From: Stefan Assmann Date: Wed, 3 Feb 2016 09:20:48 +0100 Subject: [PATCH 41/73] ixgbe: call ndo_stop() instead of dev_close() when running offline selftest Calling dev_close() causes IFF_UP to be cleared which will remove the interfaces routes and some addresses. That's probably not what the user intended when running the offline selftest. Besides this does not happen if the interface is brought down before the test, so the current behaviour is inconsistent. Instead call the net_device_ops ndo_stop function directly and avoid touching IFF_UP at all. Signed-off-by: Stefan Assmann Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/ixgbe/ixgbe.h | 2 ++ drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c | 4 ++-- drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 4 ++-- 3 files changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe.h b/drivers/net/ethernet/intel/ixgbe/ixgbe.h index 458549c06a4f..e4949af7dd6b 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe.h +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe.h @@ -877,6 +877,8 @@ extern const char ixgbe_driver_version[]; extern char ixgbe_default_device_descr[]; #endif /* IXGBE_FCOE */ +int ixgbe_open(struct net_device *netdev); +int ixgbe_close(struct net_device *netdev); void ixgbe_up(struct ixgbe_adapter *adapter); void ixgbe_down(struct ixgbe_adapter *adapter); void ixgbe_reinit_locked(struct ixgbe_adapter *adapter); diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c index 726e0eeee63b..b3530e1e3ce1 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c @@ -2053,7 +2053,7 @@ static void ixgbe_diag_test(struct net_device *netdev, if (if_running) /* indicate we're in test mode */ - dev_close(netdev); + ixgbe_close(netdev); else ixgbe_reset(adapter); @@ -2091,7 +2091,7 @@ static void ixgbe_diag_test(struct net_device *netdev, /* clear testing bit and return adapter to previous state */ clear_bit(__IXGBE_TESTING, &adapter->state); if (if_running) - dev_open(netdev); + ixgbe_open(netdev); else if (hw->mac.ops.disable_tx_laser) hw->mac.ops.disable_tx_laser(hw); } else { diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index ee81618bb9f0..267a50747cbf 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -5992,7 +5992,7 @@ static int ixgbe_change_mtu(struct net_device *netdev, int new_mtu) * handler is registered with the OS, the watchdog timer is started, * and the stack is notified that the interface is ready. **/ -static int ixgbe_open(struct net_device *netdev) +int ixgbe_open(struct net_device *netdev) { struct ixgbe_adapter *adapter = netdev_priv(netdev); struct ixgbe_hw *hw = &adapter->hw; @@ -6094,7 +6094,7 @@ static void ixgbe_close_suspend(struct ixgbe_adapter *adapter) * needs to be disabled. A global MAC reset is issued to stop the * hardware, and all transmit and receive resources are freed. **/ -static int ixgbe_close(struct net_device *netdev) +int ixgbe_close(struct net_device *netdev) { struct ixgbe_adapter *adapter = netdev_priv(netdev); From 324d086709978fce1671ba04087bf90865b04398 Mon Sep 17 00:00:00 2001 From: Stefan Assmann Date: Wed, 3 Feb 2016 09:20:49 +0100 Subject: [PATCH 42/73] ixgbevf: call ndo_stop() instead of dev_close() when running offline selftest Calling dev_close() causes IFF_UP to be cleared which will remove the interfaces routes and some addresses. That's probably not what the user intended when running the offline selftest. Besides this does not happen if the interface is brought down before the test, so the current behaviour is inconsistent. Instead call the net_device_ops ndo_stop function directly and avoid touching IFF_UP at all. Signed-off-by: Stefan Assmann Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/ixgbevf/ethtool.c | 4 ++-- drivers/net/ethernet/intel/ixgbevf/ixgbevf.h | 2 ++ drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c | 4 ++-- 3 files changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/intel/ixgbevf/ethtool.c b/drivers/net/ethernet/intel/ixgbevf/ethtool.c index c48aef613b0a..d7aa4b203f40 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ethtool.c +++ b/drivers/net/ethernet/intel/ixgbevf/ethtool.c @@ -680,7 +680,7 @@ static void ixgbevf_diag_test(struct net_device *netdev, if (if_running) /* indicate we're in test mode */ - dev_close(netdev); + ixgbevf_close(netdev); else ixgbevf_reset(adapter); @@ -692,7 +692,7 @@ static void ixgbevf_diag_test(struct net_device *netdev, clear_bit(__IXGBEVF_TESTING, &adapter->state); if (if_running) - dev_open(netdev); + ixgbevf_open(netdev); } else { hw_dbg(&adapter->hw, "online testing starting\n"); /* Online tests */ diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h b/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h index 68ec7daa04fd..991eeae81473 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h @@ -486,6 +486,8 @@ extern const struct ixgbe_mbx_operations ixgbevf_mbx_ops; extern const char ixgbevf_driver_name[]; extern const char ixgbevf_driver_version[]; +int ixgbevf_open(struct net_device *netdev); +int ixgbevf_close(struct net_device *netdev); void ixgbevf_up(struct ixgbevf_adapter *adapter); void ixgbevf_down(struct ixgbevf_adapter *adapter); void ixgbevf_reinit_locked(struct ixgbevf_adapter *adapter); diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c index 0ea14c0a2e74..6a337bbf6820 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c @@ -3122,7 +3122,7 @@ static void ixgbevf_free_all_rx_resources(struct ixgbevf_adapter *adapter) * handler is registered with the OS, the watchdog timer is started, * and the stack is notified that the interface is ready. **/ -static int ixgbevf_open(struct net_device *netdev) +int ixgbevf_open(struct net_device *netdev) { struct ixgbevf_adapter *adapter = netdev_priv(netdev); struct ixgbe_hw *hw = &adapter->hw; @@ -3205,7 +3205,7 @@ static int ixgbevf_open(struct net_device *netdev) * needs to be disabled. A global MAC reset is issued to stop the * hardware, and all transmit and receive resources are freed. **/ -static int ixgbevf_close(struct net_device *netdev) +int ixgbevf_close(struct net_device *netdev) { struct ixgbevf_adapter *adapter = netdev_priv(netdev); From 32ca68683532ab629d16cede1102b36ae5346f40 Mon Sep 17 00:00:00 2001 From: Emil Tantilov Date: Wed, 10 Feb 2016 15:16:30 -0800 Subject: [PATCH 43/73] ixgbevf: fix error code path when setting MAC address Return error when a MAC address change is rejected by the PF. This will prevent the user from modifying the MAC address when that operation is not permitted. Signed-off-by: Emil Tantilov Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c | 12 ++++++++---- drivers/net/ethernet/intel/ixgbevf/vf.c | 4 +++- 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c index 6a337bbf6820..b0edae94d73d 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c @@ -3692,19 +3692,23 @@ static int ixgbevf_set_mac(struct net_device *netdev, void *p) struct ixgbevf_adapter *adapter = netdev_priv(netdev); struct ixgbe_hw *hw = &adapter->hw; struct sockaddr *addr = p; + int err; if (!is_valid_ether_addr(addr->sa_data)) return -EADDRNOTAVAIL; - ether_addr_copy(netdev->dev_addr, addr->sa_data); - ether_addr_copy(hw->mac.addr, addr->sa_data); - spin_lock_bh(&adapter->mbx_lock); - hw->mac.ops.set_rar(hw, 0, hw->mac.addr, 0); + err = hw->mac.ops.set_rar(hw, 0, addr->sa_data, 0); spin_unlock_bh(&adapter->mbx_lock); + if (err) + return -EPERM; + + ether_addr_copy(hw->mac.addr, addr->sa_data); + ether_addr_copy(netdev->dev_addr, addr->sa_data); + return 0; } diff --git a/drivers/net/ethernet/intel/ixgbevf/vf.c b/drivers/net/ethernet/intel/ixgbevf/vf.c index 61a98f4c5746..4d613a4f2a7f 100644 --- a/drivers/net/ethernet/intel/ixgbevf/vf.c +++ b/drivers/net/ethernet/intel/ixgbevf/vf.c @@ -408,8 +408,10 @@ static s32 ixgbevf_set_rar_vf(struct ixgbe_hw *hw, u32 index, u8 *addr, /* if nacked the address was rejected, use "perm_addr" */ if (!ret_val && - (msgbuf[0] == (IXGBE_VF_SET_MAC_ADDR | IXGBE_VT_MSGTYPE_NACK))) + (msgbuf[0] == (IXGBE_VF_SET_MAC_ADDR | IXGBE_VT_MSGTYPE_NACK))) { ixgbevf_get_mac_addr_vf(hw, hw->mac.addr); + return IXGBE_ERR_MBX; + } return ret_val; } From 6e2a60b57a83ea134c06c5226aaff20e7e9ce221 Mon Sep 17 00:00:00 2001 From: Emil Tantilov Date: Wed, 17 Feb 2016 15:55:06 -0800 Subject: [PATCH 44/73] ixgbe: make __ixgbe_setup_tc static This function is only used in ixgbe_main.c Resolves a "missing prototype" warning when building the driver with W=1 Reported-by: Phil Schmitt Signed-off-by: Emil Tantilov Acked-by: John Fastabend Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index 267a50747cbf..f5736a3b252e 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -8376,8 +8376,8 @@ static int ixgbe_configure_clsu32(struct ixgbe_adapter *adapter, return -EINVAL; } -int __ixgbe_setup_tc(struct net_device *dev, u32 handle, __be16 proto, - struct tc_to_netdev *tc) +static int __ixgbe_setup_tc(struct net_device *dev, u32 handle, __be16 proto, + struct tc_to_netdev *tc) { struct ixgbe_adapter *adapter = netdev_priv(dev); From 176621c964e9279c42c6b641688360e5cd0baedf Mon Sep 17 00:00:00 2001 From: Sridhar Samudrala Date: Mon, 7 Mar 2016 09:41:47 -0800 Subject: [PATCH 45/73] ixgbe: fix error handling in TC cls_u32 offload routines Check for handle ids when adding/deleting hash nodes OR adding/deleting filter entries and limit them to max number of links or header nodes supported(IXGBE_MAX_LINK_HANDLE). Start from bit 0 when setting hash table bit-map.(adapter->tables) Signed-off-by: Sridhar Samudrala Acked-by: John Fastabend Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 52 ++++++++++++------- 1 file changed, 34 insertions(+), 18 deletions(-) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index f5736a3b252e..ca9c54341784 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -8192,10 +8192,17 @@ int ixgbe_setup_tc(struct net_device *dev, u8 tc) static int ixgbe_delete_clsu32(struct ixgbe_adapter *adapter, struct tc_cls_u32_offload *cls) { + u32 uhtid = TC_U32_USERHTID(cls->knode.handle); + u32 loc; int err; + if ((uhtid != 0x800) && (uhtid >= IXGBE_MAX_LINK_HANDLE)) + return -EINVAL; + + loc = cls->knode.handle & 0xfffff; + spin_lock(&adapter->fdir_perfect_lock); - err = ixgbe_update_ethtool_fdir_entry(adapter, NULL, cls->knode.handle); + err = ixgbe_update_ethtool_fdir_entry(adapter, NULL, loc); spin_unlock(&adapter->fdir_perfect_lock); return err; } @@ -8204,20 +8211,30 @@ static int ixgbe_configure_clsu32_add_hnode(struct ixgbe_adapter *adapter, __be16 protocol, struct tc_cls_u32_offload *cls) { + u32 uhtid = TC_U32_USERHTID(cls->hnode.handle); + + if (uhtid >= IXGBE_MAX_LINK_HANDLE) + return -EINVAL; + /* This ixgbe devices do not support hash tables at the moment * so abort when given hash tables. */ if (cls->hnode.divisor > 0) return -EINVAL; - set_bit(TC_U32_USERHTID(cls->hnode.handle), &adapter->tables); + set_bit(uhtid - 1, &adapter->tables); return 0; } static int ixgbe_configure_clsu32_del_hnode(struct ixgbe_adapter *adapter, struct tc_cls_u32_offload *cls) { - clear_bit(TC_U32_USERHTID(cls->hnode.handle), &adapter->tables); + u32 uhtid = TC_U32_USERHTID(cls->hnode.handle); + + if (uhtid >= IXGBE_MAX_LINK_HANDLE) + return -EINVAL; + + clear_bit(uhtid - 1, &adapter->tables); return 0; } @@ -8235,27 +8252,29 @@ static int ixgbe_configure_clsu32(struct ixgbe_adapter *adapter, #endif int i, err = 0; u8 queue; - u32 handle; + u32 uhtid, link_uhtid; memset(&mask, 0, sizeof(union ixgbe_atr_input)); - handle = cls->knode.handle; + uhtid = TC_U32_USERHTID(cls->knode.handle); + link_uhtid = TC_U32_USERHTID(cls->knode.link_handle); - /* At the moment cls_u32 jumps to transport layer and skips past + /* At the moment cls_u32 jumps to network layer and skips past * L2 headers. The canonical method to match L2 frames is to use * negative values. However this is error prone at best but really * just broken because there is no way to "know" what sort of hdr - * is in front of the transport layer. Fix cls_u32 to support L2 + * is in front of the network layer. Fix cls_u32 to support L2 * headers when needed. */ if (protocol != htons(ETH_P_IP)) return -EINVAL; - if (cls->knode.link_handle || - cls->knode.link_handle >= IXGBE_MAX_LINK_HANDLE) { + if (link_uhtid) { struct ixgbe_nexthdr *nexthdr = ixgbe_ipv4_jumps; - u32 uhtid = TC_U32_USERHTID(cls->knode.link_handle); - if (!test_bit(uhtid, &adapter->tables)) + if (link_uhtid >= IXGBE_MAX_LINK_HANDLE) + return -EINVAL; + + if (!test_bit(link_uhtid - 1, &adapter->tables)) return -EINVAL; for (i = 0; nexthdr[i].jump; i++) { @@ -8271,10 +8290,7 @@ static int ixgbe_configure_clsu32(struct ixgbe_adapter *adapter, nexthdr->mask != cls->knode.sel->keys[0].mask) return -EINVAL; - if (uhtid >= IXGBE_MAX_LINK_HANDLE) - return -EINVAL; - - adapter->jump_tables[uhtid] = nexthdr->jump; + adapter->jump_tables[link_uhtid] = nexthdr->jump; } return 0; } @@ -8291,13 +8307,13 @@ static int ixgbe_configure_clsu32(struct ixgbe_adapter *adapter, * To add support for new nodes update ixgbe_model.h parse structures * this function _should_ be generic try not to hardcode values here. */ - if (TC_U32_USERHTID(handle) == 0x800) { + if (uhtid == 0x800) { field_ptr = adapter->jump_tables[0]; } else { - if (TC_U32_USERHTID(handle) >= ARRAY_SIZE(adapter->jump_tables)) + if (uhtid >= IXGBE_MAX_LINK_HANDLE) return -EINVAL; - field_ptr = adapter->jump_tables[TC_U32_USERHTID(handle)]; + field_ptr = adapter->jump_tables[uhtid]; } if (!field_ptr) From ebd83ad818d2d4502d5e343388000d5dc829b7a8 Mon Sep 17 00:00:00 2001 From: Sridhar Samudrala Date: Mon, 7 Mar 2016 09:41:48 -0800 Subject: [PATCH 46/73] ixgbe: Fix cls_u32 offload support for fields with masks Remove the incorrect check for mask in ixgbe_configure_clsu32 and drop the 'mask' field that is not required in struct ixgbe_mat_field Verified with the following filters: #tc qdisc add dev p4p1 ingress #tc filter add dev p4p1 parent ffff: protocol ip prio 99 \ handle 800:0:1 u32 ht 800: \ match ip dst 10.0.0.1/8 match ip src 10.0.0.2/8 action drop #tc filter add dev p4p1 parent ffff: protocol ip prio 99 \ handle 800:0:2 u32 ht 800: \ match ip dst 11.0.0.1/16 match ip src 11.0.0.2/16 action drop #tc filter add dev p4p1 parent ffff: protocol ip prio 99 \ handle 800:0:3 u32 ht 800: \ match ip dst 12.0.0.1/24 match ip src 12.0.0.2/24 action drop #tc filter add dev p4p1 parent ffff: protocol ip prio 99 \ handle 800:0:4 u32 ht 800: \ match ip dst 13.0.0.1/32 match ip src 13.0.0.2/32 action drop Signed-off-by: Sridhar Samudrala Acked-by: John Fastabend Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 3 +-- drivers/net/ethernet/intel/ixgbe/ixgbe_model.h | 9 ++++----- 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index ca9c54341784..7df3fe29b210 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -8331,8 +8331,7 @@ static int ixgbe_configure_clsu32(struct ixgbe_adapter *adapter, int j; for (j = 0; field_ptr[j].val; j++) { - if (field_ptr[j].off == off && - field_ptr[j].mask == m) { + if (field_ptr[j].off == off) { field_ptr[j].val(input, &mask, val, m); input->filter.formatted.flow_type |= field_ptr[j].type; diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_model.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_model.h index ce48872d4782..61f729073978 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_model.h +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_model.h @@ -32,7 +32,6 @@ struct ixgbe_mat_field { unsigned int off; - unsigned int mask; int (*val)(struct ixgbe_fdir_filter *input, union ixgbe_atr_input *mask, u32 val, u32 m); @@ -58,9 +57,9 @@ static inline int ixgbe_mat_prgm_dip(struct ixgbe_fdir_filter *input, } static struct ixgbe_mat_field ixgbe_ipv4_fields[] = { - { .off = 12, .mask = -1, .val = ixgbe_mat_prgm_sip, + { .off = 12, .val = ixgbe_mat_prgm_sip, .type = IXGBE_ATR_FLOW_TYPE_IPV4}, - { .off = 16, .mask = -1, .val = ixgbe_mat_prgm_dip, + { .off = 16, .val = ixgbe_mat_prgm_dip, .type = IXGBE_ATR_FLOW_TYPE_IPV4}, { .val = NULL } /* terminal node */ }; @@ -84,9 +83,9 @@ static inline int ixgbe_mat_prgm_dport(struct ixgbe_fdir_filter *input, }; static struct ixgbe_mat_field ixgbe_tcp_fields[] = { - {.off = 0, .mask = 0xffff, .val = ixgbe_mat_prgm_sport, + {.off = 0, .val = ixgbe_mat_prgm_sport, .type = IXGBE_ATR_FLOW_TYPE_TCPV4}, - {.off = 2, .mask = 0xffff, .val = ixgbe_mat_prgm_dport, + {.off = 2, .val = ixgbe_mat_prgm_dport, .type = IXGBE_ATR_FLOW_TYPE_TCPV4}, { .val = NULL } /* terminal node */ }; From b5aea3de88e526a46839b7ccf049fab4f8ad7a4b Mon Sep 17 00:00:00 2001 From: Sridhar Samudrala Date: Mon, 7 Mar 2016 09:41:49 -0800 Subject: [PATCH 47/73] ixgbe: Fix cls_u32 offload support for L4 ports Fix support for 16 bit source/dest port matches in ixgbe model. u32 uses a single 32-bit key value for both source and destination ports starting at offset 0. So replace the 2 functions with a single function that takes this key value/mask to program both source and dest ports. Verified with the following filter: #tc qdisc add dev p4p1 ingress #tc filter add dev p4p1 parent ffff: protocol ip prio 99 \ handle 1: u32 divisor 1 #tc filter add dev p4p1 parent ffff: protocol ip prio 99 \ handle 800:0:10 u32 ht 800: link 1: \ offset at 0 mask 0f00 shift 6 plus 0 eat match ip protocol 6 ff #tc filter add dev p4p1 parent ffff: protocol ip prio 99 \ handle 1:0:10 u32 ht 1: \ match tcp src 1024 ffff match tcp dst 80 ffff action drop #tc filter add dev p4p1 parent ffff: protocol ip prio 99 \ handle 1:0:11 u32 ht 1: \ match tcp src 1025 ffff action drop #tc filter add dev p4p1 parent ffff: protocol ip prio 99 \ handle 1:0:12 u32 ht 1: \ match tcp dst 81 ffff action drop Signed-off-by: Sridhar Samudrala Acked-by: John Fastabend Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/ixgbe/ixgbe_model.h | 16 ++++------------ 1 file changed, 4 insertions(+), 12 deletions(-) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_model.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_model.h index 61f729073978..74c53ad9d268 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_model.h +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_model.h @@ -64,28 +64,20 @@ static struct ixgbe_mat_field ixgbe_ipv4_fields[] = { { .val = NULL } /* terminal node */ }; -static inline int ixgbe_mat_prgm_sport(struct ixgbe_fdir_filter *input, +static inline int ixgbe_mat_prgm_ports(struct ixgbe_fdir_filter *input, union ixgbe_atr_input *mask, u32 val, u32 m) { input->filter.formatted.src_port = val & 0xffff; mask->formatted.src_port = m & 0xffff; - return 0; -}; + input->filter.formatted.dst_port = val >> 16; + mask->formatted.dst_port = m >> 16; -static inline int ixgbe_mat_prgm_dport(struct ixgbe_fdir_filter *input, - union ixgbe_atr_input *mask, - u32 val, u32 m) -{ - input->filter.formatted.dst_port = val & 0xffff; - mask->formatted.dst_port = m & 0xffff; return 0; }; static struct ixgbe_mat_field ixgbe_tcp_fields[] = { - {.off = 0, .val = ixgbe_mat_prgm_sport, - .type = IXGBE_ATR_FLOW_TYPE_TCPV4}, - {.off = 2, .val = ixgbe_mat_prgm_dport, + {.off = 0, .val = ixgbe_mat_prgm_ports, .type = IXGBE_ATR_FLOW_TYPE_TCPV4}, { .val = NULL } /* terminal node */ }; From 5e263f712691615fb802f06c98d7638c378f5d11 Mon Sep 17 00:00:00 2001 From: Haishuang Yan Date: Tue, 29 Mar 2016 18:48:08 +0800 Subject: [PATCH 48/73] bridge: Allow set bridge ageing time when switchdev disabled When NET_SWITCHDEV=n, switchdev_port_attr_set will return -EOPNOTSUPP, we should ignore this error code and continue to set the ageing time. Fixes: c62987bbd8a1 ("bridge: push bridge setting ageing_time down to switchdev") Signed-off-by: Haishuang Yan Acked-by: Ido Schimmel Signed-off-by: David S. Miller --- net/bridge/br_stp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/bridge/br_stp.c b/net/bridge/br_stp.c index e23449094188..9cb7044d0801 100644 --- a/net/bridge/br_stp.c +++ b/net/bridge/br_stp.c @@ -582,7 +582,7 @@ int br_set_ageing_time(struct net_bridge *br, u32 ageing_time) int err; err = switchdev_port_attr_set(br->dev, &attr); - if (err) + if (err && err != -EOPNOTSUPP) return err; br->ageing_time = t; From 28fd34985bd5edaea6282a0586f91bcddd336f5e Mon Sep 17 00:00:00 2001 From: Marcelo Ricardo Leitner Date: Tue, 29 Mar 2016 10:41:03 -0300 Subject: [PATCH 49/73] sctp: really allow using GFP_KERNEL on sctp_packet_transmit Somehow my patch for commit cea8768f333e ("sctp: allow sctp_transmit_packet and others to use gfp") missed two important chunks, which are now added. Fixes: cea8768f333e ("sctp: allow sctp_transmit_packet and others to use gfp") Signed-off-by: Marcelo Ricardo Leitner Acked-By: Neil Horman Signed-off-by: David S. Miller --- net/sctp/output.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/net/sctp/output.c b/net/sctp/output.c index 736c004abfbc..97745351d58c 100644 --- a/net/sctp/output.c +++ b/net/sctp/output.c @@ -401,7 +401,7 @@ int sctp_packet_transmit(struct sctp_packet *packet, gfp_t gfp) sk = chunk->skb->sk; /* Allocate the new skb. */ - nskb = alloc_skb(packet->size + MAX_HEADER, GFP_ATOMIC); + nskb = alloc_skb(packet->size + MAX_HEADER, gfp); if (!nskb) goto nomem; @@ -523,8 +523,8 @@ int sctp_packet_transmit(struct sctp_packet *packet, gfp_t gfp) */ if (auth) sctp_auth_calculate_hmac(asoc, nskb, - (struct sctp_auth_chunk *)auth, - GFP_ATOMIC); + (struct sctp_auth_chunk *)auth, + gfp); /* 2) Calculate the Adler-32 checksum of the whole packet, * including the SCTP common header and all the From fea24857bb62990b839feb62090ad72a1c93735a Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Tue, 29 Mar 2016 18:00:50 +0100 Subject: [PATCH 50/73] qed: initialize return rc to avoid returning garbage in the case where qed_slowpath_irq_req is not called, rc is not assigned and so qed_int_igu_enable will return a garbage value. Fix this by initializing rc to 0. Signed-off-by: Colin Ian King Acked-by: Yuval Mintz Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qed/qed_int.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/qlogic/qed/qed_int.c b/drivers/net/ethernet/qlogic/qed/qed_int.c index ffd0accc2ec9..2017b0121f5f 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_int.c +++ b/drivers/net/ethernet/qlogic/qed/qed_int.c @@ -2750,7 +2750,7 @@ void qed_int_igu_enable_int(struct qed_hwfn *p_hwfn, int qed_int_igu_enable(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt, enum qed_int_mode int_mode) { - int rc; + int rc = 0; /* Configure AEU signal change to produce attentions */ qed_wr(p_hwfn, p_ptt, IGU_REG_ATTENTION_ENABLE, 0); From c3483384ee511ee2af40b4076366cd82a6a47b86 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Tue, 29 Mar 2016 14:55:22 -0700 Subject: [PATCH 51/73] gro: Allow tunnel stacking in the case of FOU/GUE This patch should fix the issues seen with a recent fix to prevent tunnel-in-tunnel frames from being generated with GRO. The fix itself is correct for now as long as we do not add any devices that support NETIF_F_GSO_GRE_CSUM. When such a device is added it could have the potential to mess things up due to the fact that the outer transport header points to the outer UDP header and not the GRE header as would be expected. Fixes: fac8e0f579695 ("tunnels: Don't apply GRO to multiple layers of encapsulation.") Signed-off-by: Alexander Duyck Signed-off-by: David S. Miller --- net/ipv4/fou.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/net/ipv4/fou.c b/net/ipv4/fou.c index a0586b4a197d..5a94aea280d3 100644 --- a/net/ipv4/fou.c +++ b/net/ipv4/fou.c @@ -195,6 +195,14 @@ static struct sk_buff **fou_gro_receive(struct sk_buff **head, u8 proto = NAPI_GRO_CB(skb)->proto; const struct net_offload **offloads; + /* We can clear the encap_mark for FOU as we are essentially doing + * one of two possible things. We are either adding an L4 tunnel + * header to the outer L3 tunnel header, or we are are simply + * treating the GRE tunnel header as though it is a UDP protocol + * specific header such as VXLAN or GENEVE. + */ + NAPI_GRO_CB(skb)->encap_mark = 0; + rcu_read_lock(); offloads = NAPI_GRO_CB(skb)->is_ipv6 ? inet6_offloads : inet_offloads; ops = rcu_dereference(offloads[proto]); @@ -352,6 +360,14 @@ static struct sk_buff **gue_gro_receive(struct sk_buff **head, } } + /* We can clear the encap_mark for GUE as we are essentially doing + * one of two possible things. We are either adding an L4 tunnel + * header to the outer L3 tunnel header, or we are are simply + * treating the GRE tunnel header as though it is a UDP protocol + * specific header such as VXLAN or GENEVE. + */ + NAPI_GRO_CB(skb)->encap_mark = 0; + rcu_read_lock(); offloads = NAPI_GRO_CB(skb)->is_ipv6 ? inet6_offloads : inet_offloads; ops = rcu_dereference(offloads[guehdr->proto_ctype]); From cb41c997d4446b39dd03b8a74196125739d7c3ac Mon Sep 17 00:00:00 2001 From: Xin Long Date: Wed, 30 Mar 2016 10:58:39 +0800 Subject: [PATCH 52/73] team: team should sync the port's uc/mc addrs when add a port There is an issue when we use mavtap over team: When we replug nic links from team0, the real nics's mc list will not include the maddr for macvtap any more. then we can't receive pkts to macvtap device, as they are filterred by mc list of nic. In Bonding Driver, it syncs the uc/mc addrs in bond_enslave(). We will fix this issue on team by adding the port's uc/mc addrs sync in team_port_add. Signed-off-by: Xin Long Signed-off-by: David S. Miller --- drivers/net/team/team.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c index 26c64d2782fa..a0f64cba86ba 100644 --- a/drivers/net/team/team.c +++ b/drivers/net/team/team.c @@ -1198,6 +1198,9 @@ static int team_port_add(struct team *team, struct net_device *port_dev) goto err_dev_open; } + dev_uc_sync_multiple(port_dev, dev); + dev_mc_sync_multiple(port_dev, dev); + err = vlan_vids_add_by_dev(port_dev, dev); if (err) { netdev_err(dev, "Failed to add vlan ids to device %s\n", @@ -1261,6 +1264,8 @@ static int team_port_add(struct team *team, struct net_device *port_dev) vlan_vids_del_by_dev(port_dev, dev); err_vids_add: + dev_uc_unsync(port_dev, dev); + dev_mc_unsync(port_dev, dev); dev_close(port_dev); err_dev_open: From 33e52d888d0c84a0c66f13357a53113fd9710bd6 Mon Sep 17 00:00:00 2001 From: Prashant Sreedharan Date: Mon, 28 Mar 2016 19:46:04 -0400 Subject: [PATCH 53/73] bnxt_en: Initialize CP doorbell value before ring allocation The existing code does the following: allocate completion ring initialize completion ring doorbell disable interrupts on this completion ring by writing to the doorbell We can have a race where firmware sends an asynchronous event to the host after completion ring allocation and before doorbell is initialized. When this happens driver can crash while ringing the doorbell using uninitialized value as part of handling the IRQ/napi request. Signed-off-by: Prashant Sreedharan Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index aabbd51db981..c92053c8b2cc 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -3391,11 +3391,11 @@ static int bnxt_hwrm_ring_alloc(struct bnxt *bp) struct bnxt_cp_ring_info *cpr = &bnapi->cp_ring; struct bnxt_ring_struct *ring = &cpr->cp_ring_struct; + cpr->cp_doorbell = bp->bar1 + i * 0x80; rc = hwrm_ring_alloc_send_msg(bp, ring, HWRM_RING_ALLOC_CMPL, i, INVALID_STATS_CTX_ID); if (rc) goto err_out; - cpr->cp_doorbell = bp->bar1 + i * 0x80; BNXT_CP_DB(cpr->cp_doorbell, cpr->cp_raw_cons); bp->grp_info[i].cp_fw_ring_id = ring->fw_ring_id; } From e6ef26991a46e20879bebb8298080eb7ceed4ae8 Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Mon, 28 Mar 2016 19:46:05 -0400 Subject: [PATCH 54/73] bnxt_en: Implement proper firmware message padding. The size of every padded firmware message is specified in the first HWRM_VER_GET response message. Use this value to pad every message after that. Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 6 +++++- drivers/net/ethernet/broadcom/bnxt/bnxt.h | 2 ++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index c92053c8b2cc..4600a05a1d7d 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -2653,7 +2653,7 @@ static int bnxt_hwrm_do_send_msg(struct bnxt *bp, void *msg, u32 msg_len, /* Write request msg to hwrm channel */ __iowrite32_copy(bp->bar0, data, msg_len / 4); - for (i = msg_len; i < HWRM_MAX_REQ_LEN; i += 4) + for (i = msg_len; i < BNXT_HWRM_MAX_REQ_LEN; i += 4) writel(0, bp->bar0 + i); /* currently supports only one outstanding message */ @@ -3830,6 +3830,7 @@ static int bnxt_hwrm_ver_get(struct bnxt *bp) struct hwrm_ver_get_input req = {0}; struct hwrm_ver_get_output *resp = bp->hwrm_cmd_resp_addr; + bp->hwrm_max_req_len = HWRM_MAX_REQ_LEN; bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_VER_GET, -1, -1); req.hwrm_intf_maj = HWRM_VERSION_MAJOR; req.hwrm_intf_min = HWRM_VERSION_MINOR; @@ -3855,6 +3856,9 @@ static int bnxt_hwrm_ver_get(struct bnxt *bp) if (!bp->hwrm_cmd_timeout) bp->hwrm_cmd_timeout = DFLT_HWRM_CMD_TIMEOUT; + if (resp->hwrm_intf_maj >= 1) + bp->hwrm_max_req_len = le16_to_cpu(resp->max_req_win_len); + hwrm_ver_get_exit: mutex_unlock(&bp->hwrm_cmd_lock); return rc; diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h index ec04c47172b7..709b95b8fcba 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h @@ -477,6 +477,7 @@ struct rx_tpa_end_cmp_ext { #define RING_CMP(idx) ((idx) & bp->cp_ring_mask) #define NEXT_CMP(idx) RING_CMP(ADV_RAW_CMP(idx, 1)) +#define BNXT_HWRM_MAX_REQ_LEN (bp->hwrm_max_req_len) #define DFLT_HWRM_CMD_TIMEOUT 500 #define HWRM_CMD_TIMEOUT (bp->hwrm_cmd_timeout) #define HWRM_RESET_TIMEOUT ((HWRM_CMD_TIMEOUT) * 4) @@ -953,6 +954,7 @@ struct bnxt { dma_addr_t hw_tx_port_stats_map; int hw_port_stats_size; + u16 hwrm_max_req_len; int hwrm_cmd_timeout; struct mutex hwrm_cmd_lock; /* serialize hwrm messages */ struct hwrm_ver_get_output ver_resp; From 49b5c7a125201bb42c25831fda3a50305c29ef50 Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Mon, 28 Mar 2016 19:46:06 -0400 Subject: [PATCH 55/73] bnxt_en: Fix typo in bnxt_hwrm_set_pause_common(). The typo caused the wrong flow control bit to be set. Reported by: Ajit Khaparde Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 4600a05a1d7d..12a009d720cd 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -4559,7 +4559,7 @@ bnxt_hwrm_set_pause_common(struct bnxt *bp, struct hwrm_port_phy_cfg_input *req) if (bp->link_info.req_flow_ctrl & BNXT_LINK_PAUSE_RX) req->auto_pause |= PORT_PHY_CFG_REQ_AUTO_PAUSE_RX; if (bp->link_info.req_flow_ctrl & BNXT_LINK_PAUSE_TX) - req->auto_pause |= PORT_PHY_CFG_REQ_AUTO_PAUSE_RX; + req->auto_pause |= PORT_PHY_CFG_REQ_AUTO_PAUSE_TX; req->enables |= cpu_to_le32(PORT_PHY_CFG_REQ_ENABLES_AUTO_PAUSE); } else { From 3c02d1bb32347d0674714ee170772d771d513469 Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Mon, 28 Mar 2016 19:46:07 -0400 Subject: [PATCH 56/73] bnxt_en: Fix ethtool -a reporting. To report flow control tx/rx settings accurately regardless of autoneg setting, we should use link_info->req_flow_ctrl. Before this patch, the reported settings were only correct when autoneg was on. Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c index 9ada1662b651..2e472f6dbf2d 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c @@ -855,10 +855,8 @@ static void bnxt_get_pauseparam(struct net_device *dev, if (BNXT_VF(bp)) return; epause->autoneg = !!(link_info->autoneg & BNXT_AUTONEG_FLOW_CTRL); - epause->rx_pause = - ((link_info->auto_pause_setting & BNXT_LINK_PAUSE_RX) != 0); - epause->tx_pause = - ((link_info->auto_pause_setting & BNXT_LINK_PAUSE_TX) != 0); + epause->rx_pause = !!(link_info->req_flow_ctrl & BNXT_LINK_PAUSE_RX); + epause->tx_pause = !!(link_info->req_flow_ctrl & BNXT_LINK_PAUSE_TX); } static int bnxt_set_pauseparam(struct net_device *dev, From 2d4212261fdf13e29728ddb5ea9d60c342cc92b5 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 29 Mar 2016 08:43:41 -0700 Subject: [PATCH 57/73] ipv6: udp: fix UDP_MIB_IGNOREDMULTI updates IPv6 counters updates use a different macro than IPv4. Fixes: 36cbb2452cbaf ("udp: Increment UDP_MIB_IGNOREDMULTI for arriving unmatched multicasts") Signed-off-by: Eric Dumazet Cc: Rick Jones Cc: Willem de Bruijn Signed-off-by: David S. Miller --- net/ipv6/udp.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index fd25e447a5fa..8125931106be 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -843,8 +843,8 @@ static int __udp6_lib_mcast_deliver(struct net *net, struct sk_buff *skb, flush_stack(stack, count, skb, count - 1); } else { if (!inner_flushed) - UDP_INC_STATS_BH(net, UDP_MIB_IGNOREDMULTI, - proto == IPPROTO_UDPLITE); + UDP6_INC_STATS_BH(net, UDP_MIB_IGNOREDMULTI, + proto == IPPROTO_UDPLITE); consume_skb(skb); } return 0; From c0e760c9c66ebd8a5a1ede81868677f4df993dfb Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Wed, 30 Mar 2016 00:02:00 +0200 Subject: [PATCH 58/73] bpf: make padding in bpf_tunnel_key explicit Make the 2 byte padding in struct bpf_tunnel_key between tunnel_ttl and tunnel_label members explicit. No issue has been observed, and gcc/llvm does padding for the old struct already, where tunnel_label was not yet present, so the current code works, but since it's part of uapi, make sure we don't introduce holes in structs. Therefore, add tunnel_ext that we can use generically in future (f.e. to flag OAM messages for backends, etc). Also add the offset to the compat tests to be sure should some compilers not padd the tail of the old version of bpf_tunnel_key. Fixes: 4018ab1875e0 ("bpf: support flow label for bpf_skb_{set, get}_tunnel_key") Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/uapi/linux/bpf.h | 1 + net/core/filter.c | 5 ++++- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 924f537183fd..23917bb47bf3 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -375,6 +375,7 @@ struct bpf_tunnel_key { }; __u8 tunnel_tos; __u8 tunnel_ttl; + __u16 tunnel_ext; __u32 tunnel_label; }; diff --git a/net/core/filter.c b/net/core/filter.c index b7177d01ecb0..4b81b71171b4 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -1764,6 +1764,7 @@ static u64 bpf_skb_get_tunnel_key(u64 r1, u64 r2, u64 size, u64 flags, u64 r5) if (unlikely(size != sizeof(struct bpf_tunnel_key))) { switch (size) { case offsetof(struct bpf_tunnel_key, tunnel_label): + case offsetof(struct bpf_tunnel_key, tunnel_ext): goto set_compat; case offsetof(struct bpf_tunnel_key, remote_ipv6[1]): /* Fixup deprecated structure layouts here, so we have @@ -1849,6 +1850,7 @@ static u64 bpf_skb_set_tunnel_key(u64 r1, u64 r2, u64 size, u64 flags, u64 r5) if (unlikely(size != sizeof(struct bpf_tunnel_key))) { switch (size) { case offsetof(struct bpf_tunnel_key, tunnel_label): + case offsetof(struct bpf_tunnel_key, tunnel_ext): case offsetof(struct bpf_tunnel_key, remote_ipv6[1]): /* Fixup deprecated structure layouts here, so we have * a common path later on. @@ -1861,7 +1863,8 @@ static u64 bpf_skb_set_tunnel_key(u64 r1, u64 r2, u64 size, u64 flags, u64 r5) return -EINVAL; } } - if (unlikely(!(flags & BPF_F_TUNINFO_IPV6) && from->tunnel_label)) + if (unlikely((!(flags & BPF_F_TUNINFO_IPV6) && from->tunnel_label) || + from->tunnel_ext)) return -EINVAL; skb_dst_drop(skb); From 75baacf00fb471c3b01e604357ab53da7cf72dd4 Mon Sep 17 00:00:00 2001 From: Patrick Uiterwijk Date: Wed, 30 Mar 2016 01:39:40 +0000 Subject: [PATCH 59/73] net: dsa: mv88e6xxx: Introduce _mv88e6xxx_phy_page_{read, write} Add versions of the phy_page_read and _write functions to be used in a context where the SMI mutex is held. Tested-by: Vivien Didelot Reviewed-by: Vivien Didelot Signed-off-by: Patrick Uiterwijk Signed-off-by: David S. Miller --- drivers/net/dsa/mv88e6xxx.c | 49 +++++++++++++++++++++++++++---------- 1 file changed, 36 insertions(+), 13 deletions(-) diff --git a/drivers/net/dsa/mv88e6xxx.c b/drivers/net/dsa/mv88e6xxx.c index fa086e09d6b7..86a202975c02 100644 --- a/drivers/net/dsa/mv88e6xxx.c +++ b/drivers/net/dsa/mv88e6xxx.c @@ -2264,6 +2264,38 @@ static void mv88e6xxx_bridge_work(struct work_struct *work) mutex_unlock(&ps->smi_mutex); } +static int _mv88e6xxx_phy_page_write(struct dsa_switch *ds, int port, int page, + int reg, int val) +{ + int ret; + + ret = _mv88e6xxx_phy_write_indirect(ds, port, 0x16, page); + if (ret < 0) + goto restore_page_0; + + ret = _mv88e6xxx_phy_write_indirect(ds, port, reg, val); +restore_page_0: + _mv88e6xxx_phy_write_indirect(ds, port, 0x16, 0x0); + + return ret; +} + +static int _mv88e6xxx_phy_page_read(struct dsa_switch *ds, int port, int page, + int reg) +{ + int ret; + + ret = _mv88e6xxx_phy_write_indirect(ds, port, 0x16, page); + if (ret < 0) + goto restore_page_0; + + ret = _mv88e6xxx_phy_read_indirect(ds, port, reg); +restore_page_0: + _mv88e6xxx_phy_write_indirect(ds, port, 0x16, 0x0); + + return ret; +} + static int mv88e6xxx_setup_port(struct dsa_switch *ds, int port) { struct mv88e6xxx_priv_state *ps = ds_to_priv(ds); @@ -2714,13 +2746,9 @@ int mv88e6xxx_phy_page_read(struct dsa_switch *ds, int port, int page, int reg) int ret; mutex_lock(&ps->smi_mutex); - ret = _mv88e6xxx_phy_write_indirect(ds, port, 0x16, page); - if (ret < 0) - goto error; - ret = _mv88e6xxx_phy_read_indirect(ds, port, reg); -error: - _mv88e6xxx_phy_write_indirect(ds, port, 0x16, 0x0); + ret = _mv88e6xxx_phy_page_read(ds, port, page, reg); mutex_unlock(&ps->smi_mutex); + return ret; } @@ -2731,14 +2759,9 @@ int mv88e6xxx_phy_page_write(struct dsa_switch *ds, int port, int page, int ret; mutex_lock(&ps->smi_mutex); - ret = _mv88e6xxx_phy_write_indirect(ds, port, 0x16, page); - if (ret < 0) - goto error; - - ret = _mv88e6xxx_phy_write_indirect(ds, port, reg, val); -error: - _mv88e6xxx_phy_write_indirect(ds, port, 0x16, 0x0); + ret = _mv88e6xxx_phy_page_write(ds, port, page, reg, val); mutex_unlock(&ps->smi_mutex); + return ret; } From 13a7ebb38a659254e71a4a95cf39429a9287912b Mon Sep 17 00:00:00 2001 From: Patrick Uiterwijk Date: Wed, 30 Mar 2016 01:39:41 +0000 Subject: [PATCH 60/73] net: dsa: mv88e6xxx: Clear the PDOWN bit on setup Some of the vendor-specific bootloaders set up this part of the initialization for us, so this was never added. However, since upstream bootloaders don't initialize the chip specifically, they leave the fiber MII's PDOWN flag set, which means that the CPU port doesn't connect. This patch checks whether this flag has been clear prior by something else, and if not make us clear it. Reviewed-by: Andrew Lunn Signed-off-by: Patrick Uiterwijk Tested-by: Vivien Didelot Signed-off-by: David S. Miller --- drivers/net/dsa/mv88e6xxx.c | 36 ++++++++++++++++++++++++++++++++++++ drivers/net/dsa/mv88e6xxx.h | 8 ++++++++ 2 files changed, 44 insertions(+) diff --git a/drivers/net/dsa/mv88e6xxx.c b/drivers/net/dsa/mv88e6xxx.c index 86a202975c02..50454be86570 100644 --- a/drivers/net/dsa/mv88e6xxx.c +++ b/drivers/net/dsa/mv88e6xxx.c @@ -2296,6 +2296,25 @@ static int _mv88e6xxx_phy_page_read(struct dsa_switch *ds, int port, int page, return ret; } +static int mv88e6xxx_power_on_serdes(struct dsa_switch *ds) +{ + int ret; + + ret = _mv88e6xxx_phy_page_read(ds, REG_FIBER_SERDES, PAGE_FIBER_SERDES, + MII_BMCR); + if (ret < 0) + return ret; + + if (ret & BMCR_PDOWN) { + ret &= ~BMCR_PDOWN; + ret = _mv88e6xxx_phy_page_write(ds, REG_FIBER_SERDES, + PAGE_FIBER_SERDES, MII_BMCR, + ret); + } + + return ret; +} + static int mv88e6xxx_setup_port(struct dsa_switch *ds, int port) { struct mv88e6xxx_priv_state *ps = ds_to_priv(ds); @@ -2399,6 +2418,23 @@ static int mv88e6xxx_setup_port(struct dsa_switch *ds, int port) goto abort; } + /* If this port is connected to a SerDes, make sure the SerDes is not + * powered down. + */ + if (mv88e6xxx_6352_family(ds)) { + ret = _mv88e6xxx_reg_read(ds, REG_PORT(port), PORT_STATUS); + if (ret < 0) + goto abort; + ret &= PORT_STATUS_CMODE_MASK; + if ((ret == PORT_STATUS_CMODE_100BASE_X) || + (ret == PORT_STATUS_CMODE_1000BASE_X) || + (ret == PORT_STATUS_CMODE_SGMII)) { + ret = mv88e6xxx_power_on_serdes(ds); + if (ret < 0) + goto abort; + } + } + /* Port Control 2: don't force a good FCS, set the maximum frame size to * 10240 bytes, disable 802.1q tags checking, don't discard tagged or * untagged frames on this port, do a destination address lookup on all diff --git a/drivers/net/dsa/mv88e6xxx.h b/drivers/net/dsa/mv88e6xxx.h index 9a038aba48fb..26a424acd10f 100644 --- a/drivers/net/dsa/mv88e6xxx.h +++ b/drivers/net/dsa/mv88e6xxx.h @@ -28,6 +28,10 @@ #define SMI_CMD_OP_45_READ_DATA_INC ((3 << 10) | SMI_CMD_BUSY) #define SMI_DATA 0x01 +/* Fiber/SERDES Registers are located at SMI address F, page 1 */ +#define REG_FIBER_SERDES 0x0f +#define PAGE_FIBER_SERDES 0x01 + #define REG_PORT(p) (0x10 + (p)) #define PORT_STATUS 0x00 #define PORT_STATUS_PAUSE_EN BIT(15) @@ -45,6 +49,10 @@ #define PORT_STATUS_MGMII BIT(6) /* 6185 */ #define PORT_STATUS_TX_PAUSED BIT(5) #define PORT_STATUS_FLOW_CTRL BIT(4) +#define PORT_STATUS_CMODE_MASK 0x0f +#define PORT_STATUS_CMODE_100BASE_X 0x8 +#define PORT_STATUS_CMODE_1000BASE_X 0x9 +#define PORT_STATUS_CMODE_SGMII 0xa #define PORT_PCS_CTRL 0x01 #define PORT_PCS_CTRL_RGMII_DELAY_RXCLK BIT(15) #define PORT_PCS_CTRL_RGMII_DELAY_TXCLK BIT(14) From b7854efce20be7c7bcd43424dee027124e9af27f Mon Sep 17 00:00:00 2001 From: Jisheng Zhang Date: Wed, 30 Mar 2016 19:53:41 +0800 Subject: [PATCH 61/73] net: mvpp2: replace MVPP2_CPU_D_CACHE_LINE_SIZE with L1_CACHE_BYTES The mvpp2 ip maybe used in SoCs which may have have 64bytes cacheline size. Replace the MVPP2_CPU_D_CACHE_LINE_SIZE with L1_CACHE_BYTES. And since dma_alloc_coherent() is always cacheline size aligned, so remove the align checks. Signed-off-by: Jisheng Zhang Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/mvpp2.c | 14 +------------- 1 file changed, 1 insertion(+), 13 deletions(-) diff --git a/drivers/net/ethernet/marvell/mvpp2.c b/drivers/net/ethernet/marvell/mvpp2.c index c797971aefab..05f358b81791 100644 --- a/drivers/net/ethernet/marvell/mvpp2.c +++ b/drivers/net/ethernet/marvell/mvpp2.c @@ -321,7 +321,6 @@ /* Lbtd 802.3 type */ #define MVPP2_IP_LBDT_TYPE 0xfffa -#define MVPP2_CPU_D_CACHE_LINE_SIZE 32 #define MVPP2_TX_CSUM_MAX_SIZE 9800 /* Timeout constants */ @@ -377,7 +376,7 @@ #define MVPP2_RX_PKT_SIZE(mtu) \ ALIGN((mtu) + MVPP2_MH_SIZE + MVPP2_VLAN_TAG_LEN + \ - ETH_HLEN + ETH_FCS_LEN, MVPP2_CPU_D_CACHE_LINE_SIZE) + ETH_HLEN + ETH_FCS_LEN, L1_CACHE_BYTES) #define MVPP2_RX_BUF_SIZE(pkt_size) ((pkt_size) + NET_SKB_PAD) #define MVPP2_RX_TOTAL_SIZE(buf_size) ((buf_size) + MVPP2_SKB_SHINFO_SIZE) @@ -4493,10 +4492,6 @@ static int mvpp2_aggr_txq_init(struct platform_device *pdev, if (!aggr_txq->descs) return -ENOMEM; - /* Make sure descriptor address is cache line size aligned */ - BUG_ON(aggr_txq->descs != - PTR_ALIGN(aggr_txq->descs, MVPP2_CPU_D_CACHE_LINE_SIZE)); - aggr_txq->last_desc = aggr_txq->size - 1; /* Aggr TXQ no reset WA */ @@ -4526,9 +4521,6 @@ static int mvpp2_rxq_init(struct mvpp2_port *port, if (!rxq->descs) return -ENOMEM; - BUG_ON(rxq->descs != - PTR_ALIGN(rxq->descs, MVPP2_CPU_D_CACHE_LINE_SIZE)); - rxq->last_desc = rxq->size - 1; /* Zero occupied and non-occupied counters - direct access */ @@ -4616,10 +4608,6 @@ static int mvpp2_txq_init(struct mvpp2_port *port, if (!txq->descs) return -ENOMEM; - /* Make sure descriptor address is cache line size aligned */ - BUG_ON(txq->descs != - PTR_ALIGN(txq->descs, MVPP2_CPU_D_CACHE_LINE_SIZE)); - txq->last_desc = txq->size - 1; /* Set Tx descriptors queue starting address - indirect access */ From 9bd9ddb7f89edae241d2da78e3119f226b9b0cf6 Mon Sep 17 00:00:00 2001 From: Jisheng Zhang Date: Wed, 30 Mar 2016 19:55:21 +0800 Subject: [PATCH 62/73] net: mvneta: replace MVNETA_CPU_D_CACHE_LINE_SIZE with L1_CACHE_BYTES The mvneta is also used in some Marvell berlin family SoCs which may have 64bytes cacheline size. Replace the MVNETA_CPU_D_CACHE_LINE_SIZE usage with L1_CACHE_BYTES. And since dma_alloc_coherent() is always cacheline size aligned, so remove the align checks. Signed-off-by: Jisheng Zhang Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/mvneta.c | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c index 577f7ca7deba..58808718d114 100644 --- a/drivers/net/ethernet/marvell/mvneta.c +++ b/drivers/net/ethernet/marvell/mvneta.c @@ -260,7 +260,6 @@ #define MVNETA_VLAN_TAG_LEN 4 -#define MVNETA_CPU_D_CACHE_LINE_SIZE 32 #define MVNETA_TX_CSUM_DEF_SIZE 1600 #define MVNETA_TX_CSUM_MAX_SIZE 9800 #define MVNETA_ACC_MODE_EXT1 1 @@ -300,7 +299,7 @@ #define MVNETA_RX_PKT_SIZE(mtu) \ ALIGN((mtu) + MVNETA_MH_SIZE + MVNETA_VLAN_TAG_LEN + \ ETH_HLEN + ETH_FCS_LEN, \ - MVNETA_CPU_D_CACHE_LINE_SIZE) + L1_CACHE_BYTES) #define IS_TSO_HEADER(txq, addr) \ ((addr >= txq->tso_hdrs_phys) && \ @@ -2764,9 +2763,6 @@ static int mvneta_rxq_init(struct mvneta_port *pp, if (rxq->descs == NULL) return -ENOMEM; - BUG_ON(rxq->descs != - PTR_ALIGN(rxq->descs, MVNETA_CPU_D_CACHE_LINE_SIZE)); - rxq->last_desc = rxq->size - 1; /* Set Rx descriptors queue starting address */ @@ -2837,10 +2833,6 @@ static int mvneta_txq_init(struct mvneta_port *pp, if (txq->descs == NULL) return -ENOMEM; - /* Make sure descriptor address is cache line size aligned */ - BUG_ON(txq->descs != - PTR_ALIGN(txq->descs, MVNETA_CPU_D_CACHE_LINE_SIZE)); - txq->last_desc = txq->size - 1; /* Set maximum bandwidth for enabled TXQs */ From 32867fcc0ef9e56939d5200ad983801bbfd581ad Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Thu, 31 Mar 2016 12:05:17 -0300 Subject: [PATCH 63/73] fec: Do not access unexisting register in Coldfire Commit 55cd48c821de ("net: fec: stop the "rcv is not +last, " error messages") introduces a write to a register that does not exist in Coldfire. Move the FEC_FTRL register access inside the FEC_QUIRK_HAS_RACC 'if' block, so that we guarantee it will not be used on Coldfire CPUs. Reported-by: Greg Ungerer Signed-off-by: Fabio Estevam Signed-off-by: David S. Miller --- drivers/net/ethernet/freescale/fec_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index 37c081583084..08243c2ff4b4 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -943,8 +943,8 @@ fec_restart(struct net_device *ndev) else val &= ~FEC_RACC_OPTIONS; writel(val, fep->hwp + FEC_RACC); + writel(PKT_MAXBUF_SIZE, fep->hwp + FEC_FTRL); } - writel(PKT_MAXBUF_SIZE, fep->hwp + FEC_FTRL); #endif /* From c57c7a95da842807b475b823ed2e5435c42cb3b0 Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Thu, 31 Mar 2016 18:10:31 +0200 Subject: [PATCH 64/73] rtnl: fix msg size calculation in if_nlmsg_size() Size of the attribute IFLA_PHYS_PORT_NAME was missing. Fixes: db24a9044ee1 ("net: add support for phys_port_name") CC: David Ahern Signed-off-by: Nicolas Dichtel Acked-by: David Ahern Signed-off-by: David S. Miller --- net/core/rtnetlink.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index f2066772d0f3..a75f7e94b445 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -909,6 +909,7 @@ static noinline size_t if_nlmsg_size(const struct net_device *dev, + rtnl_link_get_af_size(dev, ext_filter_mask) /* IFLA_AF_SPEC */ + nla_total_size(MAX_PHYS_ITEM_ID_LEN) /* IFLA_PHYS_PORT_ID */ + nla_total_size(MAX_PHYS_ITEM_ID_LEN) /* IFLA_PHYS_SWITCH_ID */ + + nla_total_size(IFNAMSIZ) /* IFLA_PHYS_PORT_NAME */ + nla_total_size(1); /* IFLA_PROTO_DOWN */ } From 79f4223257bfef52b0a26d0d7ad4019e764be6ce Mon Sep 17 00:00:00 2001 From: Daniele Palmas Date: Thu, 31 Mar 2016 15:16:47 +0200 Subject: [PATCH 65/73] net: usb: cdc_ncm: adding Telit LE910 V2 mobile broadband card MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Telit LE910 V2 is a mobile broadband card with no ARP capabilities: the patch makes this device to use wwan_noarp_info struct Signed-off-by: Daniele Palmas Reviewed-by: Bjørn Mork Signed-off-by: David S. Miller --- drivers/net/usb/cdc_ncm.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/net/usb/cdc_ncm.c b/drivers/net/usb/cdc_ncm.c index 86ba30ba35e8..2fb31edab125 100644 --- a/drivers/net/usb/cdc_ncm.c +++ b/drivers/net/usb/cdc_ncm.c @@ -1626,6 +1626,13 @@ static const struct usb_device_id cdc_devs[] = { .driver_info = (unsigned long) &wwan_info, }, + /* Telit LE910 V2 */ + { USB_DEVICE_AND_INTERFACE_INFO(0x1bc7, 0x0036, + USB_CLASS_COMM, + USB_CDC_SUBCLASS_NCM, USB_CDC_PROTO_NONE), + .driver_info = (unsigned long)&wwan_noarp_info, + }, + /* DW5812 LTE Verizon Mobile Broadband Card * Unlike DW5550 this device requires FLAG_NOARP */ From 5a5abb1fa3b05dd6aa821525832644c1e7d2905f Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Thu, 31 Mar 2016 02:13:18 +0200 Subject: [PATCH 66/73] tun, bpf: fix suspicious RCU usage in tun_{attach, detach}_filter Sasha Levin reported a suspicious rcu_dereference_protected() warning found while fuzzing with trinity that is similar to this one: [ 52.765684] net/core/filter.c:2262 suspicious rcu_dereference_protected() usage! [ 52.765688] other info that might help us debug this: [ 52.765695] rcu_scheduler_active = 1, debug_locks = 1 [ 52.765701] 1 lock held by a.out/1525: [ 52.765704] #0: (rtnl_mutex){+.+.+.}, at: [] rtnl_lock+0x17/0x20 [ 52.765721] stack backtrace: [ 52.765728] CPU: 1 PID: 1525 Comm: a.out Not tainted 4.5.0+ #264 [...] [ 52.765768] Call Trace: [ 52.765775] [] dump_stack+0x85/0xc8 [ 52.765784] [] lockdep_rcu_suspicious+0xd5/0x110 [ 52.765792] [] sk_detach_filter+0x82/0x90 [ 52.765801] [] tun_detach_filter+0x35/0x90 [tun] [ 52.765810] [] __tun_chr_ioctl+0x354/0x1130 [tun] [ 52.765818] [] ? selinux_file_ioctl+0x130/0x210 [ 52.765827] [] tun_chr_ioctl+0x13/0x20 [tun] [ 52.765834] [] do_vfs_ioctl+0x96/0x690 [ 52.765843] [] ? security_file_ioctl+0x43/0x60 [ 52.765850] [] SyS_ioctl+0x79/0x90 [ 52.765858] [] do_syscall_64+0x62/0x140 [ 52.765866] [] entry_SYSCALL64_slow_path+0x25/0x25 Same can be triggered with PROVE_RCU (+ PROVE_RCU_REPEATEDLY) enabled from tun_attach_filter() when user space calls ioctl(tun_fd, TUN{ATTACH, DETACH}FILTER, ...) for adding/removing a BPF filter on tap devices. Since the fix in f91ff5b9ff52 ("net: sk_{detach|attach}_filter() rcu fixes") sk_attach_filter()/sk_detach_filter() now dereferences the filter with rcu_dereference_protected(), checking whether socket lock is held in control path. Since its introduction in 994051625981 ("tun: socket filter support"), tap filters are managed under RTNL lock from __tun_chr_ioctl(). Thus the sock_owned_by_user(sk) doesn't apply in this specific case and therefore triggers the false positive. Extend the BPF API with __sk_attach_filter()/__sk_detach_filter() pair that is used by tap filters and pass in lockdep_rtnl_is_held() for the rcu_dereference_protected() checks instead. Reported-by: Sasha Levin Signed-off-by: Daniel Borkmann Signed-off-by: David S. Miller --- drivers/net/tun.c | 8 +++++--- include/linux/filter.h | 4 ++++ net/core/filter.c | 33 +++++++++++++++++++++------------ 3 files changed, 30 insertions(+), 15 deletions(-) diff --git a/drivers/net/tun.c b/drivers/net/tun.c index afdf950617c3..510e90a6bb26 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -622,7 +622,8 @@ static int tun_attach(struct tun_struct *tun, struct file *file, bool skip_filte /* Re-attach the filter to persist device */ if (!skip_filter && (tun->filter_attached == true)) { - err = sk_attach_filter(&tun->fprog, tfile->socket.sk); + err = __sk_attach_filter(&tun->fprog, tfile->socket.sk, + lockdep_rtnl_is_held()); if (!err) goto out; } @@ -1822,7 +1823,7 @@ static void tun_detach_filter(struct tun_struct *tun, int n) for (i = 0; i < n; i++) { tfile = rtnl_dereference(tun->tfiles[i]); - sk_detach_filter(tfile->socket.sk); + __sk_detach_filter(tfile->socket.sk, lockdep_rtnl_is_held()); } tun->filter_attached = false; @@ -1835,7 +1836,8 @@ static int tun_attach_filter(struct tun_struct *tun) for (i = 0; i < tun->numqueues; i++) { tfile = rtnl_dereference(tun->tfiles[i]); - ret = sk_attach_filter(&tun->fprog, tfile->socket.sk); + ret = __sk_attach_filter(&tun->fprog, tfile->socket.sk, + lockdep_rtnl_is_held()); if (ret) { tun_detach_filter(tun, i); return ret; diff --git a/include/linux/filter.h b/include/linux/filter.h index 43aa1f8855c7..a51a5361695f 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -465,10 +465,14 @@ int bpf_prog_create_from_user(struct bpf_prog **pfp, struct sock_fprog *fprog, void bpf_prog_destroy(struct bpf_prog *fp); int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk); +int __sk_attach_filter(struct sock_fprog *fprog, struct sock *sk, + bool locked); int sk_attach_bpf(u32 ufd, struct sock *sk); int sk_reuseport_attach_filter(struct sock_fprog *fprog, struct sock *sk); int sk_reuseport_attach_bpf(u32 ufd, struct sock *sk); int sk_detach_filter(struct sock *sk); +int __sk_detach_filter(struct sock *sk, bool locked); + int sk_get_filter(struct sock *sk, struct sock_filter __user *filter, unsigned int len); diff --git a/net/core/filter.c b/net/core/filter.c index 4b81b71171b4..ca7f832b2980 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -1149,7 +1149,8 @@ void bpf_prog_destroy(struct bpf_prog *fp) } EXPORT_SYMBOL_GPL(bpf_prog_destroy); -static int __sk_attach_prog(struct bpf_prog *prog, struct sock *sk) +static int __sk_attach_prog(struct bpf_prog *prog, struct sock *sk, + bool locked) { struct sk_filter *fp, *old_fp; @@ -1165,10 +1166,8 @@ static int __sk_attach_prog(struct bpf_prog *prog, struct sock *sk) return -ENOMEM; } - old_fp = rcu_dereference_protected(sk->sk_filter, - sock_owned_by_user(sk)); + old_fp = rcu_dereference_protected(sk->sk_filter, locked); rcu_assign_pointer(sk->sk_filter, fp); - if (old_fp) sk_filter_uncharge(sk, old_fp); @@ -1247,7 +1246,8 @@ struct bpf_prog *__get_filter(struct sock_fprog *fprog, struct sock *sk) * occurs or there is insufficient memory for the filter a negative * errno code is returned. On success the return is zero. */ -int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk) +int __sk_attach_filter(struct sock_fprog *fprog, struct sock *sk, + bool locked) { struct bpf_prog *prog = __get_filter(fprog, sk); int err; @@ -1255,7 +1255,7 @@ int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk) if (IS_ERR(prog)) return PTR_ERR(prog); - err = __sk_attach_prog(prog, sk); + err = __sk_attach_prog(prog, sk, locked); if (err < 0) { __bpf_prog_release(prog); return err; @@ -1263,7 +1263,12 @@ int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk) return 0; } -EXPORT_SYMBOL_GPL(sk_attach_filter); +EXPORT_SYMBOL_GPL(__sk_attach_filter); + +int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk) +{ + return __sk_attach_filter(fprog, sk, sock_owned_by_user(sk)); +} int sk_reuseport_attach_filter(struct sock_fprog *fprog, struct sock *sk) { @@ -1309,7 +1314,7 @@ int sk_attach_bpf(u32 ufd, struct sock *sk) if (IS_ERR(prog)) return PTR_ERR(prog); - err = __sk_attach_prog(prog, sk); + err = __sk_attach_prog(prog, sk, sock_owned_by_user(sk)); if (err < 0) { bpf_prog_put(prog); return err; @@ -2250,7 +2255,7 @@ static int __init register_sk_filter_ops(void) } late_initcall(register_sk_filter_ops); -int sk_detach_filter(struct sock *sk) +int __sk_detach_filter(struct sock *sk, bool locked) { int ret = -ENOENT; struct sk_filter *filter; @@ -2258,8 +2263,7 @@ int sk_detach_filter(struct sock *sk) if (sock_flag(sk, SOCK_FILTER_LOCKED)) return -EPERM; - filter = rcu_dereference_protected(sk->sk_filter, - sock_owned_by_user(sk)); + filter = rcu_dereference_protected(sk->sk_filter, locked); if (filter) { RCU_INIT_POINTER(sk->sk_filter, NULL); sk_filter_uncharge(sk, filter); @@ -2268,7 +2272,12 @@ int sk_detach_filter(struct sock *sk) return ret; } -EXPORT_SYMBOL_GPL(sk_detach_filter); +EXPORT_SYMBOL_GPL(__sk_detach_filter); + +int sk_detach_filter(struct sock *sk) +{ + return __sk_detach_filter(sk, sock_owned_by_user(sk)); +} int sk_get_filter(struct sock *sk, struct sock_filter __user *ubuf, unsigned int len) From d82b0c21d44d1cc61a23db56c2b2cf52ea9f3220 Mon Sep 17 00:00:00 2001 From: Jisheng Zhang Date: Thu, 31 Mar 2016 17:01:23 +0800 Subject: [PATCH 67/73] net: mvpp2: fix maybe-uninitialized warning This is to fix the following maybe-uninitialized warning: drivers/net/ethernet/marvell/mvpp2.c:6007:18: warning: 'err' may be used uninitialized in this function [-Wmaybe-uninitialized] Signed-off-by: Jisheng Zhang Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/mvpp2.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/marvell/mvpp2.c b/drivers/net/ethernet/marvell/mvpp2.c index 05f358b81791..2750cb978f42 100644 --- a/drivers/net/ethernet/marvell/mvpp2.c +++ b/drivers/net/ethernet/marvell/mvpp2.c @@ -6047,8 +6047,10 @@ static int mvpp2_port_init(struct mvpp2_port *port) /* Map physical Rx queue to port's logical Rx queue */ rxq = devm_kzalloc(dev, sizeof(*rxq), GFP_KERNEL); - if (!rxq) + if (!rxq) { + err = -ENOMEM; goto err_free_percpu; + } /* Map this Rx queue to a physical queue */ rxq->id = port->first_rxq + queue; rxq->port = port->id; From 4a0a12d27ca5e892579f3c61c18c8dcfebf425d4 Mon Sep 17 00:00:00 2001 From: Jisheng Zhang Date: Fri, 1 Apr 2016 17:11:05 +0800 Subject: [PATCH 68/73] net: mvpp2: use cache_line_size() to get cacheline size L1_CACHE_BYTES may not be the real cacheline size, use cache_line_size to determine the cacheline size in runtime. Signed-off-by: Jisheng Zhang Suggested-by: Marcin Wojtas Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/mvpp2.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/marvell/mvpp2.c b/drivers/net/ethernet/marvell/mvpp2.c index 2750cb978f42..868a957f24bb 100644 --- a/drivers/net/ethernet/marvell/mvpp2.c +++ b/drivers/net/ethernet/marvell/mvpp2.c @@ -376,7 +376,7 @@ #define MVPP2_RX_PKT_SIZE(mtu) \ ALIGN((mtu) + MVPP2_MH_SIZE + MVPP2_VLAN_TAG_LEN + \ - ETH_HLEN + ETH_FCS_LEN, L1_CACHE_BYTES) + ETH_HLEN + ETH_FCS_LEN, cache_line_size()) #define MVPP2_RX_BUF_SIZE(pkt_size) ((pkt_size) + NET_SKB_PAD) #define MVPP2_RX_TOTAL_SIZE(buf_size) ((buf_size) + MVPP2_SKB_SHINFO_SIZE) From c66e98c9532bf7b58fe78cc59fb37d3ff651756d Mon Sep 17 00:00:00 2001 From: Jisheng Zhang Date: Fri, 1 Apr 2016 17:12:49 +0800 Subject: [PATCH 69/73] net: mvneta: use cache_line_size() to get cacheline size L1_CACHE_BYTES may not be the real cacheline size, use cache_line_size to determine the cacheline size in runtime. Signed-off-by: Jisheng Zhang Suggested-by: Marcin Wojtas Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/mvneta.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c index 58808718d114..b1db000f1927 100644 --- a/drivers/net/ethernet/marvell/mvneta.c +++ b/drivers/net/ethernet/marvell/mvneta.c @@ -299,7 +299,7 @@ #define MVNETA_RX_PKT_SIZE(mtu) \ ALIGN((mtu) + MVNETA_MH_SIZE + MVNETA_VLAN_TAG_LEN + \ ETH_HLEN + ETH_FCS_LEN, \ - L1_CACHE_BYTES) + cache_line_size()) #define IS_TSO_HEADER(txq, addr) \ ((addr >= txq->tso_hdrs_phys) && \ From a00e3ab64be2e482bd25fe30bdc9bfa6e2b0cf1c Mon Sep 17 00:00:00 2001 From: Giuseppe CAVALLARO Date: Fri, 1 Apr 2016 09:07:14 +0200 Subject: [PATCH 70/73] stmmac: fix TX normal DESC MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch fixs a regression raised when test on chips that use the normal descriptor layout. In fact, no len bits were set for the TDES1 and no OWN bit inside the TDES0. Signed-off-by: Giuseppe CAVALLARO Tested-by: Andreas Färber Cc: Fabrice Gasnier Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/norm_desc.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/norm_desc.c b/drivers/net/ethernet/stmicro/stmmac/norm_desc.c index e13228f115f0..011386f6f24d 100644 --- a/drivers/net/ethernet/stmicro/stmmac/norm_desc.c +++ b/drivers/net/ethernet/stmicro/stmmac/norm_desc.c @@ -199,11 +199,6 @@ static void ndesc_prepare_tx_desc(struct dma_desc *p, int is_fs, int len, { unsigned int tdes1 = p->des1; - if (mode == STMMAC_CHAIN_MODE) - norm_set_tx_desc_len_on_chain(p, len); - else - norm_set_tx_desc_len_on_ring(p, len); - if (is_fs) tdes1 |= TDES1_FIRST_SEGMENT; else @@ -217,10 +212,15 @@ static void ndesc_prepare_tx_desc(struct dma_desc *p, int is_fs, int len, if (ls) tdes1 |= TDES1_LAST_SEGMENT; - if (tx_own) - tdes1 |= TDES0_OWN; - p->des1 = tdes1; + + if (mode == STMMAC_CHAIN_MODE) + norm_set_tx_desc_len_on_chain(p, len); + else + norm_set_tx_desc_len_on_ring(p, len); + + if (tx_own) + p->des0 |= TDES0_OWN; } static void ndesc_set_tx_ic(struct dma_desc *p) From d7e944c8ddc0983640a9a32868fb217485d12ca2 Mon Sep 17 00:00:00 2001 From: Giuseppe CAVALLARO Date: Fri, 1 Apr 2016 09:07:15 +0200 Subject: [PATCH 71/73] Revert "stmmac: Fix 'eth0: No PHY found' regression" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit 88f8b1bb41c6208f81b6a480244533ded7b59493. due to problems on GeekBox and Banana Pi M1 board when connected to a real transceiver instead of a switch via fixed-link. Signed-off-by: Giuseppe Cavallaro Cc: Gabriel Fernandez Cc: Andreas Färber Cc: Frank Schäfer Cc: Dinh Nguyen Cc: David S. Miller Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c | 11 ++++++++++- drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c | 9 +-------- include/linux/stmmac.h | 1 - 3 files changed, 11 insertions(+), 10 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c index ea76129dafc2..af09ced53bd8 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c @@ -199,12 +199,21 @@ int stmmac_mdio_register(struct net_device *ndev) struct stmmac_priv *priv = netdev_priv(ndev); struct stmmac_mdio_bus_data *mdio_bus_data = priv->plat->mdio_bus_data; int addr, found; - struct device_node *mdio_node = priv->plat->mdio_node; + struct device_node *mdio_node = NULL; + struct device_node *child_node = NULL; if (!mdio_bus_data) return 0; if (IS_ENABLED(CONFIG_OF)) { + for_each_child_of_node(priv->device->of_node, child_node) { + if (of_device_is_compatible(child_node, + "snps,dwmac-mdio")) { + mdio_node = child_node; + break; + } + } + if (mdio_node) { netdev_dbg(ndev, "FOUND MDIO subnode\n"); } else { diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c index dcbd2a1601e8..9cf181f839fd 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c @@ -146,7 +146,6 @@ stmmac_probe_config_dt(struct platform_device *pdev, const char **mac) struct device_node *np = pdev->dev.of_node; struct plat_stmmacenet_data *plat; struct stmmac_dma_cfg *dma_cfg; - struct device_node *child_node = NULL; plat = devm_kzalloc(&pdev->dev, sizeof(*plat), GFP_KERNEL); if (!plat) @@ -177,19 +176,13 @@ stmmac_probe_config_dt(struct platform_device *pdev, const char **mac) plat->phy_node = of_node_get(np); } - for_each_child_of_node(np, child_node) - if (of_device_is_compatible(child_node, "snps,dwmac-mdio")) { - plat->mdio_node = child_node; - break; - } - /* "snps,phy-addr" is not a standard property. Mark it as deprecated * and warn of its use. Remove this when phy node support is added. */ if (of_property_read_u32(np, "snps,phy-addr", &plat->phy_addr) == 0) dev_warn(&pdev->dev, "snps,phy-addr property is deprecated\n"); - if ((plat->phy_node && !of_phy_is_fixed_link(np)) || !plat->mdio_node) + if ((plat->phy_node && !of_phy_is_fixed_link(np)) || plat->phy_bus_name) plat->mdio_bus_data = NULL; else plat->mdio_bus_data = diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h index 4bcf5a61aada..6e53fa8942a4 100644 --- a/include/linux/stmmac.h +++ b/include/linux/stmmac.h @@ -114,7 +114,6 @@ struct plat_stmmacenet_data { int interface; struct stmmac_mdio_bus_data *mdio_bus_data; struct device_node *phy_node; - struct device_node *mdio_node; struct stmmac_dma_cfg *dma_cfg; int clk_csr; int has_gmac; From a7657f128c279ae5796ab2ca7d04a7819f4259f0 Mon Sep 17 00:00:00 2001 From: Giuseppe CAVALLARO Date: Fri, 1 Apr 2016 09:07:16 +0200 Subject: [PATCH 72/73] stmmac: fix MDIO settings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Initially the phy_bus_name was added to manipulate the driver name but it was recently just used to manage the fixed-link and then to take some decision at run-time. So the patch uses the is_pseudo_fixed_link and removes the phy_bus_name variable not necessary anymore. The driver can manage the mdio registration by using phy-handle, dwmac-mdio and own parameter e.g. snps,phy-addr. This patch takes care about all these possible configurations and fixes the mdio registration in case of there is a real transceiver or a switch (that needs to be managed by using fixed-link). Signed-off-by: Giuseppe Cavallaro Reviewed-by: Andreas Färber Tested-by: Frank Schäfer Cc: Gabriel Fernandez Cc: Dinh Nguyen Cc: David S. Miller Cc: Phil Reid Signed-off-by: David S. Miller --- .../net/ethernet/stmicro/stmmac/stmmac_main.c | 16 ++-- .../net/ethernet/stmicro/stmmac/stmmac_mdio.c | 19 +---- .../ethernet/stmicro/stmmac/stmmac_platform.c | 84 +++++++++++++++---- include/linux/stmmac.h | 2 +- 4 files changed, 73 insertions(+), 48 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 4c5ce9848ca9..78464fa7fe1f 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -278,7 +278,6 @@ static void stmmac_eee_ctrl_timer(unsigned long arg) */ bool stmmac_eee_init(struct stmmac_priv *priv) { - char *phy_bus_name = priv->plat->phy_bus_name; unsigned long flags; bool ret = false; @@ -290,7 +289,7 @@ bool stmmac_eee_init(struct stmmac_priv *priv) goto out; /* Never init EEE in case of a switch is attached */ - if (phy_bus_name && (!strcmp(phy_bus_name, "fixed"))) + if (priv->phydev->is_pseudo_fixed_link) goto out; /* MAC core supports the EEE feature. */ @@ -827,12 +826,8 @@ static int stmmac_init_phy(struct net_device *dev) phydev = of_phy_connect(dev, priv->plat->phy_node, &stmmac_adjust_link, 0, interface); } else { - if (priv->plat->phy_bus_name) - snprintf(bus_id, MII_BUS_ID_SIZE, "%s-%x", - priv->plat->phy_bus_name, priv->plat->bus_id); - else - snprintf(bus_id, MII_BUS_ID_SIZE, "stmmac-%x", - priv->plat->bus_id); + snprintf(bus_id, MII_BUS_ID_SIZE, "stmmac-%x", + priv->plat->bus_id); snprintf(phy_id_fmt, MII_BUS_ID_SIZE + 3, PHY_ID_FMT, bus_id, priv->plat->phy_addr); @@ -871,9 +866,8 @@ static int stmmac_init_phy(struct net_device *dev) } /* If attached to a switch, there is no reason to poll phy handler */ - if (priv->plat->phy_bus_name) - if (!strcmp(priv->plat->phy_bus_name, "fixed")) - phydev->irq = PHY_IGNORE_INTERRUPT; + if (phydev->is_pseudo_fixed_link) + phydev->irq = PHY_IGNORE_INTERRUPT; pr_debug("stmmac_init_phy: %s: attached to PHY (UID 0x%x)" " Link = %d\n", dev->name, phydev->phy_id, phydev->link); diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c index af09ced53bd8..06704ca6f9ca 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c @@ -198,29 +198,12 @@ int stmmac_mdio_register(struct net_device *ndev) struct mii_bus *new_bus; struct stmmac_priv *priv = netdev_priv(ndev); struct stmmac_mdio_bus_data *mdio_bus_data = priv->plat->mdio_bus_data; + struct device_node *mdio_node = priv->plat->mdio_node; int addr, found; - struct device_node *mdio_node = NULL; - struct device_node *child_node = NULL; if (!mdio_bus_data) return 0; - if (IS_ENABLED(CONFIG_OF)) { - for_each_child_of_node(priv->device->of_node, child_node) { - if (of_device_is_compatible(child_node, - "snps,dwmac-mdio")) { - mdio_node = child_node; - break; - } - } - - if (mdio_node) { - netdev_dbg(ndev, "FOUND MDIO subnode\n"); - } else { - netdev_warn(ndev, "No MDIO subnode found\n"); - } - } - new_bus = mdiobus_alloc(); if (new_bus == NULL) return -ENOMEM; diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c index 9cf181f839fd..cf37ea558ecc 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c @@ -131,6 +131,69 @@ static struct stmmac_axi *stmmac_axi_setup(struct platform_device *pdev) return axi; } +/** + * stmmac_dt_phy - parse device-tree driver parameters to allocate PHY resources + * @plat: driver data platform structure + * @np: device tree node + * @dev: device pointer + * Description: + * The mdio bus will be allocated in case of a phy transceiver is on board; + * it will be NULL if the fixed-link is configured. + * If there is the "snps,dwmac-mdio" sub-node the mdio will be allocated + * in any case (for DSA, mdio must be registered even if fixed-link). + * The table below sums the supported configurations: + * ------------------------------- + * snps,phy-addr | Y + * ------------------------------- + * phy-handle | Y + * ------------------------------- + * fixed-link | N + * ------------------------------- + * snps,dwmac-mdio | + * even if | Y + * fixed-link | + * ------------------------------- + * + * It returns 0 in case of success otherwise -ENODEV. + */ +static int stmmac_dt_phy(struct plat_stmmacenet_data *plat, + struct device_node *np, struct device *dev) +{ + bool mdio = true; + + /* If phy-handle property is passed from DT, use it as the PHY */ + plat->phy_node = of_parse_phandle(np, "phy-handle", 0); + if (plat->phy_node) + dev_dbg(dev, "Found phy-handle subnode\n"); + + /* If phy-handle is not specified, check if we have a fixed-phy */ + if (!plat->phy_node && of_phy_is_fixed_link(np)) { + if ((of_phy_register_fixed_link(np) < 0)) + return -ENODEV; + + dev_dbg(dev, "Found fixed-link subnode\n"); + plat->phy_node = of_node_get(np); + mdio = false; + } + + /* If snps,dwmac-mdio is passed from DT, always register the MDIO */ + for_each_child_of_node(np, plat->mdio_node) { + if (of_device_is_compatible(plat->mdio_node, "snps,dwmac-mdio")) + break; + } + + if (plat->mdio_node) { + dev_dbg(dev, "Found MDIO subnode\n"); + mdio = true; + } + + if (mdio) + plat->mdio_bus_data = + devm_kzalloc(dev, sizeof(struct stmmac_mdio_bus_data), + GFP_KERNEL); + return 0; +} + /** * stmmac_probe_config_dt - parse device-tree driver parameters * @pdev: platform_device structure @@ -165,30 +228,15 @@ stmmac_probe_config_dt(struct platform_device *pdev, const char **mac) /* Default to phy auto-detection */ plat->phy_addr = -1; - /* If we find a phy-handle property, use it as the PHY */ - plat->phy_node = of_parse_phandle(np, "phy-handle", 0); - - /* If phy-handle is not specified, check if we have a fixed-phy */ - if (!plat->phy_node && of_phy_is_fixed_link(np)) { - if ((of_phy_register_fixed_link(np) < 0)) - return ERR_PTR(-ENODEV); - - plat->phy_node = of_node_get(np); - } - /* "snps,phy-addr" is not a standard property. Mark it as deprecated * and warn of its use. Remove this when phy node support is added. */ if (of_property_read_u32(np, "snps,phy-addr", &plat->phy_addr) == 0) dev_warn(&pdev->dev, "snps,phy-addr property is deprecated\n"); - if ((plat->phy_node && !of_phy_is_fixed_link(np)) || plat->phy_bus_name) - plat->mdio_bus_data = NULL; - else - plat->mdio_bus_data = - devm_kzalloc(&pdev->dev, - sizeof(struct stmmac_mdio_bus_data), - GFP_KERNEL); + /* To Configure PHY by using all device-tree supported properties */ + if (stmmac_dt_phy(plat, np, &pdev->dev)) + return ERR_PTR(-ENODEV); of_property_read_u32(np, "tx-fifo-depth", &plat->tx_fifo_size); diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h index 6e53fa8942a4..e6bc30a42a74 100644 --- a/include/linux/stmmac.h +++ b/include/linux/stmmac.h @@ -108,12 +108,12 @@ struct stmmac_axi { }; struct plat_stmmacenet_data { - char *phy_bus_name; int bus_id; int phy_addr; int interface; struct stmmac_mdio_bus_data *mdio_bus_data; struct device_node *phy_node; + struct device_node *mdio_node; struct stmmac_dma_cfg *dma_cfg; int clk_csr; int has_gmac; From db5dd0db2d8352bb7fd5e9d16e17b79d66c7e4e3 Mon Sep 17 00:00:00 2001 From: Marcin Wojtas Date: Fri, 1 Apr 2016 15:21:18 +0200 Subject: [PATCH 73/73] net: mvneta: fix changing MTU when using per-cpu processing After enabling per-cpu processing it appeared that under heavy load changing MTU can result in blocking all port's interrupts and transmitting data is not possible after the change. This commit fixes above issue by disabling percpu interrupts for the time, when TXQs and RXQs are reconfigured. Signed-off-by: Marcin Wojtas Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/mvneta.c | 30 ++++++++++++++------------- 1 file changed, 16 insertions(+), 14 deletions(-) diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c index b1db000f1927..7fc490225da5 100644 --- a/drivers/net/ethernet/marvell/mvneta.c +++ b/drivers/net/ethernet/marvell/mvneta.c @@ -3042,6 +3042,20 @@ static int mvneta_check_mtu_valid(struct net_device *dev, int mtu) return mtu; } +static void mvneta_percpu_enable(void *arg) +{ + struct mvneta_port *pp = arg; + + enable_percpu_irq(pp->dev->irq, IRQ_TYPE_NONE); +} + +static void mvneta_percpu_disable(void *arg) +{ + struct mvneta_port *pp = arg; + + disable_percpu_irq(pp->dev->irq); +} + /* Change the device mtu */ static int mvneta_change_mtu(struct net_device *dev, int mtu) { @@ -3066,6 +3080,7 @@ static int mvneta_change_mtu(struct net_device *dev, int mtu) * reallocation of the queues */ mvneta_stop_dev(pp); + on_each_cpu(mvneta_percpu_disable, pp, true); mvneta_cleanup_txqs(pp); mvneta_cleanup_rxqs(pp); @@ -3089,6 +3104,7 @@ static int mvneta_change_mtu(struct net_device *dev, int mtu) return ret; } + on_each_cpu(mvneta_percpu_enable, pp, true); mvneta_start_dev(pp); mvneta_port_up(pp); @@ -3242,20 +3258,6 @@ static void mvneta_mdio_remove(struct mvneta_port *pp) pp->phy_dev = NULL; } -static void mvneta_percpu_enable(void *arg) -{ - struct mvneta_port *pp = arg; - - enable_percpu_irq(pp->dev->irq, IRQ_TYPE_NONE); -} - -static void mvneta_percpu_disable(void *arg) -{ - struct mvneta_port *pp = arg; - - disable_percpu_irq(pp->dev->irq); -} - /* Electing a CPU must be done in an atomic way: it should be done * after or before the removal/insertion of a CPU and this function is * not reentrant.