From 042cb56478152b31c50bea8a784fc826891eb38e Mon Sep 17 00:00:00 2001 From: Tao Ren Date: Mon, 5 Nov 2018 14:35:40 -0800 Subject: [PATCH 01/41] net: phy: Allow BCM54616S PHY to setup internal TX/RX clock delay This patch allows users to enable/disable internal TX and/or RX clock delay for BCM54616S PHYs so as to satisfy RGMII timing specifications. On a particular platform, whether TX and/or RX clock delay is required depends on how PHY connected to the MAC IP. This requirement can be specified through "phy-mode" property in the platform device tree. The patch is inspired by commit 733336262b28 ("net: phy: Allow BCM5481x PHYs to setup internal TX/RX clock delay"). Signed-off-by: Tao Ren Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/phy/broadcom.c | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/drivers/net/phy/broadcom.c b/drivers/net/phy/broadcom.c index e86ea105c802..704537010453 100644 --- a/drivers/net/phy/broadcom.c +++ b/drivers/net/phy/broadcom.c @@ -92,7 +92,7 @@ static int bcm54612e_config_init(struct phy_device *phydev) return 0; } -static int bcm5481x_config(struct phy_device *phydev) +static int bcm54xx_config_clock_delay(struct phy_device *phydev) { int rc, val; @@ -429,7 +429,7 @@ static int bcm5481_config_aneg(struct phy_device *phydev) ret = genphy_config_aneg(phydev); /* Then we can set up the delay. */ - bcm5481x_config(phydev); + bcm54xx_config_clock_delay(phydev); if (of_property_read_bool(np, "enet-phy-lane-swap")) { /* Lane Swap - Undocumented register...magic! */ @@ -442,6 +442,19 @@ static int bcm5481_config_aneg(struct phy_device *phydev) return ret; } +static int bcm54616s_config_aneg(struct phy_device *phydev) +{ + int ret; + + /* Aneg firsly. */ + ret = genphy_config_aneg(phydev); + + /* Then we can set up the delay. */ + bcm54xx_config_clock_delay(phydev); + + return ret; +} + static int brcm_phy_setbits(struct phy_device *phydev, int reg, int set) { int val; @@ -636,6 +649,7 @@ static struct phy_driver broadcom_drivers[] = { .features = PHY_GBIT_FEATURES, .flags = PHY_HAS_INTERRUPT, .config_init = bcm54xx_config_init, + .config_aneg = bcm54616s_config_aneg, .ack_interrupt = bcm_phy_ack_intr, .config_intr = bcm_phy_config_intr, }, { From ce317dd9f809c8da9656c88761e30f0a82a8c2e6 Mon Sep 17 00:00:00 2001 From: Anirudh Venkataramanan Date: Fri, 26 Oct 2018 10:40:50 -0700 Subject: [PATCH 02/41] ice: Set carrier state and start/stop queues in rebuild Set the carrier state post rebuild by querying the link status. Also start/stop queues based on link status. Signed-off-by: Anirudh Venkataramanan Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/ice/ice_main.c | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index 05993451147a..6d31ffb64940 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -3296,7 +3296,7 @@ static void ice_rebuild(struct ice_pf *pf) struct device *dev = &pf->pdev->dev; struct ice_hw *hw = &pf->hw; enum ice_status ret; - int err; + int err, i; if (test_bit(__ICE_DOWN, pf->state)) goto clear_recovery; @@ -3370,6 +3370,22 @@ static void ice_rebuild(struct ice_pf *pf) } ice_reset_all_vfs(pf, true); + + for (i = 0; i < pf->num_alloc_vsi; i++) { + bool link_up; + + if (!pf->vsi[i] || pf->vsi[i]->type != ICE_VSI_PF) + continue; + ice_get_link_status(pf->vsi[i]->port_info, &link_up); + if (link_up) { + netif_carrier_on(pf->vsi[i]->netdev); + netif_tx_wake_all_queues(pf->vsi[i]->netdev); + } else { + netif_carrier_off(pf->vsi[i]->netdev); + netif_tx_stop_all_queues(pf->vsi[i]->netdev); + } + } + /* if we get here, reset flow is successful */ clear_bit(__ICE_RESET_FAILED, pf->state); return; From afd9d4ab58db20029a75cf82f23b6a5641cd7d6f Mon Sep 17 00:00:00 2001 From: Anirudh Venkataramanan Date: Fri, 26 Oct 2018 10:40:51 -0700 Subject: [PATCH 03/41] ice: Check for reset in progress during remove The remove path does not currently check to see if a reset is in progress before proceeding. This can cause a resource collision resulting in various types of errors. Check for reset in progress and wait for a reasonable amount of time before allowing the remove to progress. Signed-off-by: Dave Ertman Signed-off-by: Anirudh Venkataramanan Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/ice/ice.h | 2 ++ drivers/net/ethernet/intel/ice/ice_main.c | 6 ++++++ 2 files changed, 8 insertions(+) diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h index 4c4b5717a627..e5b37fa60884 100644 --- a/drivers/net/ethernet/intel/ice/ice.h +++ b/drivers/net/ethernet/intel/ice/ice.h @@ -76,6 +76,8 @@ extern const char ice_drv_ver[]; #define ICE_MIN_INTR_PER_VF (ICE_MIN_QS_PER_VF + 1) #define ICE_DFLT_INTR_PER_VF (ICE_DFLT_QS_PER_VF + 1) +#define ICE_MAX_RESET_WAIT 20 + #define ICE_VSIQF_HKEY_ARRAY_SIZE ((VSIQF_HKEY_MAX_INDEX + 1) * 4) #define ICE_DFLT_NETIF_M (NETIF_MSG_DRV | NETIF_MSG_PROBE | NETIF_MSG_LINK) diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index 6d31ffb64940..aee22f11a41a 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -2182,6 +2182,12 @@ static void ice_remove(struct pci_dev *pdev) if (!pf) return; + for (i = 0; i < ICE_MAX_RESET_WAIT; i++) { + if (!ice_is_reset_in_progress(pf->state)) + break; + msleep(100); + } + set_bit(__ICE_DOWN, pf->state); ice_service_task_stop(pf); From 0f5d4c21a50716f8bd4e220544b82dca7408d113 Mon Sep 17 00:00:00 2001 From: Akeem G Abodunrin Date: Fri, 26 Oct 2018 10:40:52 -0700 Subject: [PATCH 04/41] ice: Fix dead device link issue with flow control Setting Rx or Tx pause parameter currently results in link loss on the interface, requiring the platform/host to be cold power cycled. Fix it. Signed-off-by: Akeem G Abodunrin Signed-off-by: Anirudh Venkataramanan Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/ice/ice_ethtool.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.c b/drivers/net/ethernet/intel/ice/ice_ethtool.c index 96923580f2a6..648acdb4c644 100644 --- a/drivers/net/ethernet/intel/ice/ice_ethtool.c +++ b/drivers/net/ethernet/intel/ice/ice_ethtool.c @@ -1517,10 +1517,15 @@ ice_set_pauseparam(struct net_device *netdev, struct ethtool_pauseparam *pause) } if (!test_bit(__ICE_DOWN, pf->state)) { - /* Give it a little more time to try to come back */ + /* Give it a little more time to try to come back. If still + * down, restart autoneg link or reinitialize the interface. + */ msleep(75); if (!test_bit(__ICE_DOWN, pf->state)) return ice_nway_reset(netdev); + + ice_down(vsi); + ice_up(vsi); } return err; From 33e055fcc26909b1d66b5d1f334aee38356068d7 Mon Sep 17 00:00:00 2001 From: Victor Raj Date: Fri, 26 Oct 2018 10:40:53 -0700 Subject: [PATCH 05/41] ice: Free VSI contexts during for unload In the unload path, all VSIs are freed. Also free the related VSI contexts to prevent memory leaks. Signed-off-by: Victor Raj Signed-off-by: Anirudh Venkataramanan Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/ice/ice_common.c | 3 +++ drivers/net/ethernet/intel/ice/ice_switch.c | 12 ++++++++++++ drivers/net/ethernet/intel/ice/ice_switch.h | 2 ++ 3 files changed, 17 insertions(+) diff --git a/drivers/net/ethernet/intel/ice/ice_common.c b/drivers/net/ethernet/intel/ice/ice_common.c index 8cd6a2401fd9..554fd707a6d6 100644 --- a/drivers/net/ethernet/intel/ice/ice_common.c +++ b/drivers/net/ethernet/intel/ice/ice_common.c @@ -811,6 +811,9 @@ void ice_deinit_hw(struct ice_hw *hw) /* Attempt to disable FW logging before shutting down control queues */ ice_cfg_fw_log(hw, false); ice_shutdown_all_ctrlq(hw); + + /* Clear VSI contexts if not already cleared */ + ice_clear_all_vsi_ctx(hw); } /** diff --git a/drivers/net/ethernet/intel/ice/ice_switch.c b/drivers/net/ethernet/intel/ice/ice_switch.c index 33403f39f1b3..40c9c6558956 100644 --- a/drivers/net/ethernet/intel/ice/ice_switch.c +++ b/drivers/net/ethernet/intel/ice/ice_switch.c @@ -347,6 +347,18 @@ static void ice_clear_vsi_ctx(struct ice_hw *hw, u16 vsi_handle) } } +/** + * ice_clear_all_vsi_ctx - clear all the VSI context entries + * @hw: pointer to the hw struct + */ +void ice_clear_all_vsi_ctx(struct ice_hw *hw) +{ + u16 i; + + for (i = 0; i < ICE_MAX_VSI; i++) + ice_clear_vsi_ctx(hw, i); +} + /** * ice_add_vsi - add VSI context to the hardware and VSI handle list * @hw: pointer to the hw struct diff --git a/drivers/net/ethernet/intel/ice/ice_switch.h b/drivers/net/ethernet/intel/ice/ice_switch.h index b88d96a1ef69..d5ef0bd58bf9 100644 --- a/drivers/net/ethernet/intel/ice/ice_switch.h +++ b/drivers/net/ethernet/intel/ice/ice_switch.h @@ -190,6 +190,8 @@ ice_update_vsi(struct ice_hw *hw, u16 vsi_handle, struct ice_vsi_ctx *vsi_ctx, struct ice_sq_cd *cd); bool ice_is_vsi_valid(struct ice_hw *hw, u16 vsi_handle); struct ice_vsi_ctx *ice_get_vsi_ctx(struct ice_hw *hw, u16 vsi_handle); +void ice_clear_all_vsi_ctx(struct ice_hw *hw); +/* Switch config */ enum ice_status ice_get_initial_sw_cfg(struct ice_hw *hw); /* Switch/bridge related commands */ From 9ecd25c26810a61b9c3abc6c73de32dca6da96e1 Mon Sep 17 00:00:00 2001 From: Anirudh Venkataramanan Date: Fri, 26 Oct 2018 10:40:54 -0700 Subject: [PATCH 06/41] ice: Remove duplicate addition of VLANs in replay path ice_restore_vlan and active_vlans were originally put in place to reprogram VLAN filters in the replay path. This is now done as part of the much broader VSI rebuild/replay framework. So remove both ice_restore_vlan and active_vlans Signed-off-by: Anirudh Venkataramanan Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/ice/ice.h | 1 - drivers/net/ethernet/intel/ice/ice_main.c | 42 +++---------------- .../net/ethernet/intel/ice/ice_virtchnl_pf.c | 2 - 3 files changed, 6 insertions(+), 39 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h index e5b37fa60884..1639e955f158 100644 --- a/drivers/net/ethernet/intel/ice/ice.h +++ b/drivers/net/ethernet/intel/ice/ice.h @@ -191,7 +191,6 @@ struct ice_vsi { u64 tx_linearize; DECLARE_BITMAP(state, __ICE_STATE_NBITS); DECLARE_BITMAP(flags, ICE_VSI_FLAG_NBITS); - unsigned long active_vlans[BITS_TO_LONGS(VLAN_N_VID)]; unsigned int current_netdev_flags; u32 tx_restart; u32 tx_busy; diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index aee22f11a41a..338abb1b9233 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -1622,7 +1622,6 @@ static int ice_vlan_rx_add_vid(struct net_device *netdev, { struct ice_netdev_priv *np = netdev_priv(netdev); struct ice_vsi *vsi = np->vsi; - int ret; if (vid >= VLAN_N_VID) { netdev_err(netdev, "VLAN id requested %d is out of range %d\n", @@ -1635,7 +1634,8 @@ static int ice_vlan_rx_add_vid(struct net_device *netdev, /* Enable VLAN pruning when VLAN 0 is added */ if (unlikely(!vid)) { - ret = ice_cfg_vlan_pruning(vsi, true); + int ret = ice_cfg_vlan_pruning(vsi, true); + if (ret) return ret; } @@ -1644,12 +1644,7 @@ static int ice_vlan_rx_add_vid(struct net_device *netdev, * needed to continue allowing all untagged packets since VLAN prune * list is applied to all packets by the switch */ - ret = ice_vsi_add_vlan(vsi, vid); - - if (!ret) - set_bit(vid, vsi->active_vlans); - - return ret; + return ice_vsi_add_vlan(vsi, vid); } /** @@ -1677,8 +1672,6 @@ static int ice_vlan_rx_kill_vid(struct net_device *netdev, if (status) return status; - clear_bit(vid, vsi->active_vlans); - /* Disable VLAN pruning when VLAN 0 is removed */ if (unlikely(!vid)) status = ice_cfg_vlan_pruning(vsi, false); @@ -2515,31 +2508,6 @@ static int ice_vsi_vlan_setup(struct ice_vsi *vsi) return ret; } -/** - * ice_restore_vlan - Reinstate VLANs when vsi/netdev comes back up - * @vsi: the VSI being brought back up - */ -static int ice_restore_vlan(struct ice_vsi *vsi) -{ - int err; - u16 vid; - - if (!vsi->netdev) - return -EINVAL; - - err = ice_vsi_vlan_setup(vsi); - if (err) - return err; - - for_each_set_bit(vid, vsi->active_vlans, VLAN_N_VID) { - err = ice_vlan_rx_add_vid(vsi->netdev, htons(ETH_P_8021Q), vid); - if (err) - break; - } - - return err; -} - /** * ice_vsi_cfg - Setup the VSI * @vsi: the VSI being configured @@ -2552,7 +2520,9 @@ static int ice_vsi_cfg(struct ice_vsi *vsi) if (vsi->netdev) { ice_set_rx_mode(vsi->netdev); - err = ice_restore_vlan(vsi); + + err = ice_vsi_vlan_setup(vsi); + if (err) return err; } diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c index 45f10f8f01dc..9576b958622b 100644 --- a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c +++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c @@ -2171,7 +2171,6 @@ static int ice_vc_process_vlan_msg(struct ice_vf *vf, u8 *msg, bool add_v) if (!ice_vsi_add_vlan(vsi, vid)) { vf->num_vlan++; - set_bit(vid, vsi->active_vlans); /* Enable VLAN pruning when VLAN 0 is added */ if (unlikely(!vid)) @@ -2190,7 +2189,6 @@ static int ice_vc_process_vlan_msg(struct ice_vf *vf, u8 *msg, bool add_v) */ if (!ice_vsi_kill_vlan(vsi, vid)) { vf->num_vlan--; - clear_bit(vid, vsi->active_vlans); /* Disable VLAN pruning when removing VLAN 0 */ if (unlikely(!vid)) From 58297dd133f64ea028e7d52dd00cd9ff8aa4479f Mon Sep 17 00:00:00 2001 From: Md Fahad Iqbal Polash Date: Fri, 26 Oct 2018 10:40:55 -0700 Subject: [PATCH 07/41] ice: Fix flags for port VLAN According to the spec, whenever insert PVID field is set, the VLAN driver insertion mode should be set to 01b which isn't done currently. Fix it. Signed-off-by: Md Fahad Iqbal Polash Signed-off-by: Anirudh Venkataramanan Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c index 9576b958622b..e71065f9d391 100644 --- a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c +++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c @@ -348,7 +348,7 @@ static int ice_vsi_set_pvid(struct ice_vsi *vsi, u16 vid) struct ice_vsi_ctx ctxt = { 0 }; enum ice_status status; - ctxt.info.vlan_flags = ICE_AQ_VSI_VLAN_MODE_TAGGED | + ctxt.info.vlan_flags = ICE_AQ_VSI_VLAN_MODE_UNTAGGED | ICE_AQ_VSI_PVLAN_INSERT_PVID | ICE_AQ_VSI_VLAN_EMOD_STR; ctxt.info.pvid = cpu_to_le16(vid); From 31082519c11b01fe1fb6dd512055f252812c1508 Mon Sep 17 00:00:00 2001 From: Anirudh Venkataramanan Date: Fri, 26 Oct 2018 10:40:56 -0700 Subject: [PATCH 08/41] ice: Fix typo in error message Print should say "Enabling" instead of "Enaabling" Signed-off-by: Akeem G Abodunrin Signed-off-by: Anirudh Venkataramanan Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/ice/ice_lib.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c index 5bacad01f0c9..c604a44c8cfb 100644 --- a/drivers/net/ethernet/intel/ice/ice_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_lib.c @@ -1997,7 +1997,7 @@ int ice_cfg_vlan_pruning(struct ice_vsi *vsi, bool ena) status = ice_update_vsi(&vsi->back->hw, vsi->idx, ctxt, NULL); if (status) { netdev_err(vsi->netdev, "%sabling VLAN pruning on VSI handle: %d, VSI HW ID: %d failed, err = %d, aq_err = %d\n", - ena ? "Ena" : "Dis", vsi->idx, vsi->vsi_num, status, + ena ? "En" : "Dis", vsi->idx, vsi->vsi_num, status, vsi->back->hw.adminq.sq_last_status); goto err_out; } From 25525b69bb44a628841492f44a5a8e74f34724f4 Mon Sep 17 00:00:00 2001 From: Dave Ertman Date: Fri, 26 Oct 2018 10:40:57 -0700 Subject: [PATCH 09/41] ice: Fix napi delete calls for remove In the remove path, the vsi->netdev is being set to NULL before the call to free vectors. This is causing the netif_napi_del call to never be made. Add a call to ice_napi_del to the same location as the calls to unregister_netdev and just prior to them. This will use the reverse flow as the register and netif_napi_add calls. Signed-off-by: Dave Ertman Signed-off-by: Anirudh Venkataramanan Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/ice/ice.h | 1 + drivers/net/ethernet/intel/ice/ice_lib.c | 1 + drivers/net/ethernet/intel/ice/ice_main.c | 2 +- 3 files changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h index 1639e955f158..b8548370f1c7 100644 --- a/drivers/net/ethernet/intel/ice/ice.h +++ b/drivers/net/ethernet/intel/ice/ice.h @@ -370,5 +370,6 @@ int ice_set_rss(struct ice_vsi *vsi, u8 *seed, u8 *lut, u16 lut_size); int ice_get_rss(struct ice_vsi *vsi, u8 *seed, u8 *lut, u16 lut_size); void ice_fill_rss_lut(u8 *lut, u16 rss_table_size, u16 rss_size); void ice_print_link_msg(struct ice_vsi *vsi, bool isup); +void ice_napi_del(struct ice_vsi *vsi); #endif /* _ICE_H_ */ diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c index c604a44c8cfb..1041fa2a7767 100644 --- a/drivers/net/ethernet/intel/ice/ice_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_lib.c @@ -2458,6 +2458,7 @@ int ice_vsi_release(struct ice_vsi *vsi) * on this wq */ if (vsi->netdev && !ice_is_reset_in_progress(pf->state)) { + ice_napi_del(vsi); unregister_netdev(vsi->netdev); free_netdev(vsi->netdev); vsi->netdev = NULL; diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index 338abb1b9233..82f49dbd762c 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -1465,7 +1465,7 @@ static int ice_req_irq_msix_misc(struct ice_pf *pf) * ice_napi_del - Remove NAPI handler for the VSI * @vsi: VSI for which NAPI handler is to be removed */ -static void ice_napi_del(struct ice_vsi *vsi) +void ice_napi_del(struct ice_vsi *vsi) { int v_idx; From c585ea42ec75e8d3afa278b7095d9f0dd6ee515b Mon Sep 17 00:00:00 2001 From: Brett Creeley Date: Fri, 26 Oct 2018 10:40:58 -0700 Subject: [PATCH 10/41] ice: Fix tx_timeout in PF driver Prior to this commit the driver was running into tx_timeouts when a queue was stressed enough. This was happening because the HW tail and SW tail (NTU) were incorrectly out of sync. Consequently this was causing the HW head to collide with the HW tail, which to the hardware means that all descriptors posted for Tx have been processed. Due to the Tx logic used in the driver SW tail and HW tail are allowed to be out of sync. This is done as an optimization because it allows the driver to write HW tail as infrequently as possible, while still updating the SW tail index to keep track. However, there are situations where this results in the tail never getting updated, resulting in Tx timeouts. Tx HW tail write condition: if (netif_xmit_stopped(txring_txq(tx_ring) || !skb->xmit_more) writel(sw_tail, tx_ring->tail); An issue was found in the Tx logic that was causing the afore mentioned condition for updating HW tail to never happen, causing tx_timeouts. In ice_xmit_frame_ring we calculate how many descriptors we need for the Tx transaction based on the skb the kernel hands us. This is then passed into ice_maybe_stop_tx along with some extra padding to determine if we have enough descriptors available for this transaction. If we don't then we return -EBUSY to the stack, otherwise we move on and eventually prepare the Tx descriptors accordingly in ice_tx_map and set next_to_watch. In ice_tx_map we make another call to ice_maybe_stop_tx with a value of MAX_SKB_FRAGS + 4. The key here is that this value is possibly less than the value we sent in the first call to ice_maybe_stop_tx in ice_xmit_frame_ring. Now, if the number of unused descriptors is between MAX_SKB_FRAGS + 4 and the value used in the first call to ice_maybe_stop_tx in ice_xmit_frame_ring then we do not update the HW tail because of the "Tx HW tail write condition" above. This is because in ice_maybe_stop_tx we return success from ice_maybe_stop_tx instead of calling __ice_maybe_stop_tx and subsequently calling netif_stop_subqueue, which sets the __QUEUE_STATE_DEV_XOFF bit. This bit is then checked in the "Tx HW tail write condition" by calling netif_xmit_stopped and subsequently updating HW tail if the afore mentioned bit is set. In ice_clean_tx_irq, if next_to_watch is not NULL, we end up cleaning the descriptors that HW sets the DD bit on and we have the budget. The HW head will eventually run into the HW tail in response to the description in the paragraph above. The next time through ice_xmit_frame_ring we make the initial call to ice_maybe_stop_tx with another skb from the stack. This time we do not have enough descriptors available and we return NETDEV_TX_BUSY to the stack and end up setting next_to_watch to NULL. This is where we are stuck. In ice_clean_tx_irq we never clean anything because next_to_watch is always NULL and in ice_xmit_frame_ring we never update HW tail because we already return NETDEV_TX_BUSY to the stack and eventually we hit a tx_timeout. This issue was fixed by making sure that the second call to ice_maybe_stop_tx in ice_tx_map is passed a value that is >= the value that was used on the initial call to ice_maybe_stop_tx in ice_xmit_frame_ring. This was done by adding the following defines to make the logic more clear and to reduce the chance of mucking this up again: ICE_CACHE_LINE_BYTES 64 ICE_DESCS_PER_CACHE_LINE (ICE_CACHE_LINE_BYTES / \ sizeof(struct ice_tx_desc)) ICE_DESCS_FOR_CTX_DESC 1 ICE_DESCS_FOR_SKB_DATA_PTR 1 The ICE_CACHE_LINE_BYTES being 64 is an assumption being made so we don't have to figure this out on every pass through the Tx path. Instead I added a sanity check in ice_probe to verify cache line size and print a message if it's not 64 Bytes. This will make it easier to file issues if they are seen when the cache line size is not 64 Bytes when reading from the GLPCI_CNF2 register. Signed-off-by: Brett Creeley Signed-off-by: Anirudh Venkataramanan Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- .../net/ethernet/intel/ice/ice_hw_autogen.h | 2 ++ drivers/net/ethernet/intel/ice/ice_main.c | 18 ++++++++++++++++++ drivers/net/ethernet/intel/ice/ice_txrx.c | 9 +++++---- drivers/net/ethernet/intel/ice/ice_txrx.h | 17 +++++++++++++++-- 4 files changed, 40 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_hw_autogen.h b/drivers/net/ethernet/intel/ice/ice_hw_autogen.h index 5fdea6ec7675..596b9fb1c510 100644 --- a/drivers/net/ethernet/intel/ice/ice_hw_autogen.h +++ b/drivers/net/ethernet/intel/ice/ice_hw_autogen.h @@ -242,6 +242,8 @@ #define GLNVM_ULD 0x000B6008 #define GLNVM_ULD_CORER_DONE_M BIT(3) #define GLNVM_ULD_GLOBR_DONE_M BIT(4) +#define GLPCI_CNF2 0x000BE004 +#define GLPCI_CNF2_CACHELINE_SIZE_M BIT(1) #define PF_FUNC_RID 0x0009E880 #define PF_FUNC_RID_FUNC_NUM_S 0 #define PF_FUNC_RID_FUNC_NUM_M ICE_M(0x7, 0) diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index 82f49dbd762c..333312a1d595 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -1994,6 +1994,22 @@ static int ice_init_interrupt_scheme(struct ice_pf *pf) return 0; } +/** + * ice_verify_cacheline_size - verify driver's assumption of 64 Byte cache lines + * @pf: pointer to the PF structure + * + * There is no error returned here because the driver should be able to handle + * 128 Byte cache lines, so we only print a warning in case issues are seen, + * specifically with Tx. + */ +static void ice_verify_cacheline_size(struct ice_pf *pf) +{ + if (rd32(&pf->hw, GLPCI_CNF2) & GLPCI_CNF2_CACHELINE_SIZE_M) + dev_warn(&pf->pdev->dev, + "%d Byte cache line assumption is invalid, driver may have Tx timeouts!\n", + ICE_CACHE_LINE_BYTES); +} + /** * ice_probe - Device initialization routine * @pdev: PCI device information struct @@ -2144,6 +2160,8 @@ static int ice_probe(struct pci_dev *pdev, /* since everything is good, start the service timer */ mod_timer(&pf->serv_tmr, round_jiffies(jiffies + pf->serv_tmr_period)); + ice_verify_cacheline_size(pf); + return 0; err_alloc_sw_unroll: diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c index 5dae968d853e..3387c67c848d 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx.c +++ b/drivers/net/ethernet/intel/ice/ice_txrx.c @@ -1556,15 +1556,15 @@ int ice_tso(struct ice_tx_buf *first, struct ice_tx_offload_params *off) * magnitude greater than our largest possible GSO size. * * This would then be implemented as: - * return (((size >> 12) * 85) >> 8) + 1; + * return (((size >> 12) * 85) >> 8) + ICE_DESCS_FOR_SKB_DATA_PTR; * * Since multiplication and division are commutative, we can reorder * operations into: - * return ((size * 85) >> 20) + 1; + * return ((size * 85) >> 20) + ICE_DESCS_FOR_SKB_DATA_PTR; */ static unsigned int ice_txd_use_count(unsigned int size) { - return ((size * 85) >> 20) + 1; + return ((size * 85) >> 20) + ICE_DESCS_FOR_SKB_DATA_PTR; } /** @@ -1706,7 +1706,8 @@ ice_xmit_frame_ring(struct sk_buff *skb, struct ice_ring *tx_ring) * + 1 desc for context descriptor, * otherwise try next time */ - if (ice_maybe_stop_tx(tx_ring, count + 4 + 1)) { + if (ice_maybe_stop_tx(tx_ring, count + ICE_DESCS_PER_CACHE_LINE + + ICE_DESCS_FOR_CTX_DESC)) { tx_ring->tx_stats.tx_busy++; return NETDEV_TX_BUSY; } diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.h b/drivers/net/ethernet/intel/ice/ice_txrx.h index 1d0f58bd389b..75d0eaf6c9dd 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx.h +++ b/drivers/net/ethernet/intel/ice/ice_txrx.h @@ -22,8 +22,21 @@ #define ICE_RX_BUF_WRITE 16 /* Must be power of 2 */ #define ICE_MAX_TXQ_PER_TXQG 128 -/* Tx Descriptors needed, worst case */ -#define DESC_NEEDED (MAX_SKB_FRAGS + 4) +/* We are assuming that the cache line is always 64 Bytes here for ice. + * In order to make sure that is a correct assumption there is a check in probe + * to print a warning if the read from GLPCI_CNF2 tells us that the cache line + * size is 128 bytes. We do it this way because we do not want to read the + * GLPCI_CNF2 register or a variable containing the value on every pass through + * the Tx path. + */ +#define ICE_CACHE_LINE_BYTES 64 +#define ICE_DESCS_PER_CACHE_LINE (ICE_CACHE_LINE_BYTES / \ + sizeof(struct ice_tx_desc)) +#define ICE_DESCS_FOR_CTX_DESC 1 +#define ICE_DESCS_FOR_SKB_DATA_PTR 1 +/* Tx descriptors needed, worst case */ +#define DESC_NEEDED (MAX_SKB_FRAGS + ICE_DESCS_FOR_CTX_DESC + \ + ICE_DESCS_PER_CACHE_LINE + ICE_DESCS_FOR_SKB_DATA_PTR) #define ICE_DESC_UNUSED(R) \ ((((R)->next_to_clean > (R)->next_to_use) ? 0 : (R)->count) + \ (R)->next_to_clean - (R)->next_to_use - 1) From d944b46992f8e99b6bdc721e44b02e5ca294fa2b Mon Sep 17 00:00:00 2001 From: Brett Creeley Date: Fri, 26 Oct 2018 10:40:59 -0700 Subject: [PATCH 11/41] ice: Fix the bytecount sent to netdev_tx_sent_queue Currently if the driver does a TSO offload the bytecount sent to netdev_tx_sent_queue will be incorrect. This is because in ice_tso we overwrite the initial value that we set in ice_tx_map. This creates a mismatch between the Tx and Tx clean flow. In the Tx clean flow we calculate the bytecount (called total_bytes) as we clean the descriptors so the value used in the Tx clean path is correct. Fix this by using += in ice_tso instead of =. This fixes the mismatch in bytecount mentioned above. Signed-off-by: Brett Creeley Signed-off-by: Anirudh Venkataramanan Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/ice/ice_txrx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c index 3387c67c848d..fe5bbabbb41e 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx.c +++ b/drivers/net/ethernet/intel/ice/ice_txrx.c @@ -1520,7 +1520,7 @@ int ice_tso(struct ice_tx_buf *first, struct ice_tx_offload_params *off) /* update gso_segs and bytecount */ first->gso_segs = skb_shinfo(skb)->gso_segs; - first->bytecount = (first->gso_segs - 1) * off->header_len; + first->bytecount += (first->gso_segs - 1) * off->header_len; cd_tso_len = skb->len - off->header_len; cd_mss = skb_shinfo(skb)->gso_size; From 4c9b658eeaefedd402a59e858d8ac3bfdf6153e3 Mon Sep 17 00:00:00 2001 From: Miroslav Lichvar Date: Fri, 26 Oct 2018 19:13:00 +0200 Subject: [PATCH 12/41] igb: shorten maximum PHC timecounter update interval The timecounter needs to be updated at least once per ~550 seconds in order to avoid a 40-bit SYSTIM timestamp to be misinterpreted as an old timestamp. Since commit 500462a9de65 ("timers: Switch to a non-cascading wheel"), scheduling of delayed work seems to be less accurate and a requested delay of 540 seconds may actually be longer than 550 seconds. Also, the PHC may be adjusted to run up to 6% faster than real time and the system clock up to 10% slower. Shorten the delay to 360 seconds to be sure the timecounter is updated in time. This fixes an issue with HW timestamps on 82580/I350/I354 being off by ~1100 seconds for few seconds every ~9 minutes. Cc: Thomas Gleixner Signed-off-by: Miroslav Lichvar Acked-by: Jacob Keller Acked-by: Richard Cochran Tested-by: Aaron Brown Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/igb/igb_ptp.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/intel/igb/igb_ptp.c b/drivers/net/ethernet/intel/igb/igb_ptp.c index 29ced6b74d36..2b95dc9c7a6a 100644 --- a/drivers/net/ethernet/intel/igb/igb_ptp.c +++ b/drivers/net/ethernet/intel/igb/igb_ptp.c @@ -53,13 +53,15 @@ * 2^40 * 10^-9 / 60 = 18.3 minutes. * * SYSTIM is converted to real time using a timecounter. As - * timecounter_cyc2time() allows old timestamps, the timecounter - * needs to be updated at least once per half of the SYSTIM interval. - * Scheduling of delayed work is not very accurate, so we aim for 8 - * minutes to be sure the actual interval is shorter than 9.16 minutes. + * timecounter_cyc2time() allows old timestamps, the timecounter needs + * to be updated at least once per half of the SYSTIM interval. + * Scheduling of delayed work is not very accurate, and also the NIC + * clock can be adjusted to run up to 6% faster and the system clock + * up to 10% slower, so we aim for 6 minutes to be sure the actual + * interval in the NIC time is shorter than 9.16 minutes. */ -#define IGB_SYSTIM_OVERFLOW_PERIOD (HZ * 60 * 8) +#define IGB_SYSTIM_OVERFLOW_PERIOD (HZ * 60 * 6) #define IGB_PTP_TX_TIMEOUT (HZ * 15) #define INCPERIOD_82576 BIT(E1000_TIMINCA_16NS_SHIFT) #define INCVALUE_82576_MASK GENMASK(E1000_TIMINCA_16NS_SHIFT - 1, 0) From ffe498237b36ee42624e139b21efa05da4ff1f48 Mon Sep 17 00:00:00 2001 From: Chinh T Cao Date: Mon, 5 Nov 2018 12:18:35 -0800 Subject: [PATCH 13/41] ice: Change req_speeds to be u16 Since the req_speeds field in struct ice_link_status is a u8, req_speeds & ICE_AQ_LINK_SPEED_40GB always returns 0. This was caught by a coverity scan. Fix this by changing req_speeds to be u16. Reported-by: Bruce Allan Signed-off-by: Chinh T Cao Signed-off-by: Anirudh Venkataramanan Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/ice/ice_type.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/ice/ice_type.h b/drivers/net/ethernet/intel/ice/ice_type.h index 12f9432abf11..f4dbc81c1988 100644 --- a/drivers/net/ethernet/intel/ice/ice_type.h +++ b/drivers/net/ethernet/intel/ice/ice_type.h @@ -92,12 +92,12 @@ struct ice_link_status { u64 phy_type_low; u16 max_frame_size; u16 link_speed; + u16 req_speeds; u8 lse_ena; /* Link Status Event notification */ u8 link_info; u8 an_info; u8 ext_info; u8 pacing; - u8 req_speeds; /* Refer to #define from module_type[ICE_MODULE_TYPE_TOTAL_BYTE] of * ice_aqc_get_phy_caps structure */ From ba766b8b99c30ad3c55ed8cf224d1185ecff1476 Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Mon, 29 Oct 2018 10:52:42 -0700 Subject: [PATCH 14/41] i40e: restore NETIF_F_GSO_IPXIP[46] to netdev features Since commit bacd75cfac8a ("i40e/i40evf: Add capability exchange for outer checksum", 2017-04-06) the i40e driver has not reported support for IP-in-IP offloads. This likely occurred due to a bad rebase, as the commit extracts hw_enc_features into its own variable. As part of this change, it dropped the NETIF_F_FSO_IPXIP flags from the netdev->hw_enc_features. This was unfortunately not caught during code review. Fix this by adding back the missing feature flags. For reference, NETIF_F_GSO_IPXIP4 was added in commit 7e13318daa4a ("net: define gso types for IPx over IPv4 and IPv6", 2016-05-20), replacing NETIF_F_GSO_IPIP and NETIF_F_GSO_SIT. NETIF_F_GSO_IPXIP6 was added in commit bf2d1df39502 ("intel: Add support for IPv6 IP-in-IP offload", 2016-05-20). Signed-off-by: Jacob Keller Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e_main.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index bc71a21c1dc2..3ff5ee49818b 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -12249,6 +12249,8 @@ static int i40e_config_netdev(struct i40e_vsi *vsi) NETIF_F_GSO_GRE | NETIF_F_GSO_GRE_CSUM | NETIF_F_GSO_PARTIAL | + NETIF_F_GSO_IPXIP4 | + NETIF_F_GSO_IPXIP6 | NETIF_F_GSO_UDP_TUNNEL | NETIF_F_GSO_UDP_TUNNEL_CSUM | NETIF_F_SCTP_CRC | From d5596fd46770550873ab4c02bcb69f83d3d63f63 Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Mon, 29 Oct 2018 10:52:43 -0700 Subject: [PATCH 15/41] i40e: enable NETIF_F_NTUPLE and NETIF_F_HW_TC at driver load The assignment of the feature flag NETIF_F_NTUPLE and NETIF_F_HW_TC occurs prior to the initial setup of the local hw_features variable. This means the features are set as user-changeable, but are not set in the currently active feature list. This results in the features being disabled at the driver's initial load. Move the assignment after the initial assignment of hw_features, and assign to the local variable. This ensures that NETIF_F_NTUPLE and NETIF_F_HW_TC are marked as user-changeable, and also enables them by default when the driver loads. Signed-off-by: Jacob Keller Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e_main.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 3ff5ee49818b..21c2688d6308 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -12268,13 +12268,13 @@ static int i40e_config_netdev(struct i40e_vsi *vsi) /* record features VLANs can make use of */ netdev->vlan_features |= hw_enc_features | NETIF_F_TSO_MANGLEID; - if (!(pf->flags & I40E_FLAG_MFP_ENABLED)) - netdev->hw_features |= NETIF_F_NTUPLE | NETIF_F_HW_TC; - hw_features = hw_enc_features | NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX; + if (!(pf->flags & I40E_FLAG_MFP_ENABLED)) + hw_features |= NETIF_F_NTUPLE | NETIF_F_HW_TC; + netdev->hw_features |= hw_features; netdev->features |= hw_features | NETIF_F_HW_VLAN_CTAG_FILTER; From 96ed82cc1f515a2329f93506129ab1de35429b89 Mon Sep 17 00:00:00 2001 From: "Maciej W. Rozycki" Date: Wed, 7 Nov 2018 12:06:52 +0000 Subject: [PATCH 16/41] FDDI: defza: Fix SPDX annotation The SPDX annotation for this driver does not match the license text, which specifies GNU GPL 2 or later. Make the two match by correcting the SPDX tag. Signed-off-by: Maciej W. Rozycki Signed-off-by: David S. Miller --- drivers/net/fddi/defza.c | 2 +- drivers/net/fddi/defza.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/fddi/defza.c b/drivers/net/fddi/defza.c index 3b7f10a5f06a..79980eeb0e43 100644 --- a/drivers/net/fddi/defza.c +++ b/drivers/net/fddi/defza.c @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: GPL-2.0 +// SPDX-License-Identifier: GPL-2.0+ /* FDDI network adapter driver for DEC FDDIcontroller 700/700-C devices. * * Copyright (c) 2018 Maciej W. Rozycki diff --git a/drivers/net/fddi/defza.h b/drivers/net/fddi/defza.h index b06acf32738e..ed659ed17215 100644 --- a/drivers/net/fddi/defza.h +++ b/drivers/net/fddi/defza.h @@ -1,4 +1,4 @@ -/* SPDX-License-Identifier: GPL-2.0 */ +/* SPDX-License-Identifier: GPL-2.0+ */ /* FDDI network adapter driver for DEC FDDIcontroller 700/700-C devices. * * Copyright (c) 2018 Maciej W. Rozycki From 5f5fae37dbcf37feae48c40d2580dbd67c5df131 Mon Sep 17 00:00:00 2001 From: "Maciej W. Rozycki" Date: Wed, 7 Nov 2018 12:06:59 +0000 Subject: [PATCH 17/41] FDDI: defza: Add missing comment closing Fix: drivers/net/fddi/defza.h:238:1: warning: "/*" within comment [-Wcomment] by adding a missing comment closing. Signed-off-by: Maciej W. Rozycki Signed-off-by: David S. Miller --- drivers/net/fddi/defza.h | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/fddi/defza.h b/drivers/net/fddi/defza.h index ed659ed17215..93bda61be8e3 100644 --- a/drivers/net/fddi/defza.h +++ b/drivers/net/fddi/defza.h @@ -235,6 +235,7 @@ struct fza_ring_cmd { #define FZA_RING_CMD 0x200400 /* command ring address */ #define FZA_RING_CMD_SIZE 0x40 /* command descriptor ring * size + */ /* Command constants. */ #define FZA_RING_CMD_MASK 0x7fffffff #define FZA_RING_CMD_NOP 0x00000000 /* nop */ From 04453b6b241913f85af013eec187bab776428af5 Mon Sep 17 00:00:00 2001 From: "Maciej W. Rozycki" Date: Wed, 7 Nov 2018 12:07:05 +0000 Subject: [PATCH 18/41] FDDI: defza: Move SMT Tx data buffer declaration next to its skb Move the temporary data buffer used when tapping into the SMT Tx queue from the outer function level into the conditional block it's actually used in and its containing skb is also declared, making the structure of code better. Signed-off-by: Maciej W. Rozycki Signed-off-by: David S. Miller --- drivers/net/fddi/defza.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/fddi/defza.c b/drivers/net/fddi/defza.c index 79980eeb0e43..bebd0d6d2320 100644 --- a/drivers/net/fddi/defza.c +++ b/drivers/net/fddi/defza.c @@ -784,7 +784,7 @@ static void fza_rx(struct net_device *dev) static void fza_tx_smt(struct net_device *dev) { struct fza_private *fp = netdev_priv(dev); - struct fza_buffer_tx __iomem *smt_tx_ptr, *skb_data_ptr; + struct fza_buffer_tx __iomem *smt_tx_ptr; int i, len; u32 own; @@ -799,6 +799,7 @@ static void fza_tx_smt(struct net_device *dev) if (!netif_queue_stopped(dev)) { if (dev_nit_active(dev)) { + struct fza_buffer_tx *skb_data_ptr; struct sk_buff *skb; /* Length must be a multiple of 4 as only word From 8f5365ebf7b17c35dddbb694b4f0ffd1293a947f Mon Sep 17 00:00:00 2001 From: "Maciej W. Rozycki" Date: Wed, 7 Nov 2018 12:07:10 +0000 Subject: [PATCH 19/41] FDDI: defza: Make the driver version string constant The driver version string is obviously not meant to be changed at run time, so mark it `const'. Signed-off-by: Maciej W. Rozycki Signed-off-by: David S. Miller --- drivers/net/fddi/defza.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/fddi/defza.c b/drivers/net/fddi/defza.c index bebd0d6d2320..c5cae8e74dc4 100644 --- a/drivers/net/fddi/defza.c +++ b/drivers/net/fddi/defza.c @@ -56,7 +56,7 @@ #define DRV_VERSION "v.1.1.4" #define DRV_RELDATE "Oct 6 2018" -static char version[] = +static const char version[] = DRV_NAME ": " DRV_VERSION " " DRV_RELDATE " Maciej W. Rozycki\n"; MODULE_AUTHOR("Maciej W. Rozycki "); From e84b47941e15e6666afb8ee8b21d1c3fc1a013af Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Miros=C5=82aw?= Date: Wed, 7 Nov 2018 17:50:52 +0100 Subject: [PATCH 20/41] ibmvnic: fix accelerated VLAN handling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Don't request tag insertion when it isn't present in outgoing skb. Signed-off-by: Michał Mirosław Signed-off-by: David S. Miller --- drivers/net/ethernet/ibm/ibmvnic.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index 7893beffcc71..c9d5d0a7fbf1 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -1545,7 +1545,7 @@ static netdev_tx_t ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev) tx_crq.v1.sge_len = cpu_to_be32(skb->len); tx_crq.v1.ioba = cpu_to_be64(data_dma_addr); - if (adapter->vlan_header_insertion) { + if (adapter->vlan_header_insertion && skb_vlan_tag_present(skb)) { tx_crq.v1.flags2 |= IBMVNIC_TX_VLAN_INSERT; tx_crq.v1.vlan_id = cpu_to_be16(skb->vlan_tci); } From b25ddb00bc1b96613edcb525f19203a7d1405fce Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Miros=C5=82aw?= Date: Wed, 7 Nov 2018 17:50:53 +0100 Subject: [PATCH 21/41] qlcnic: remove assumption that vlan_tci != 0 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit VLAN.TCI == 0 is perfectly valid (802.1p), so allow it to be accelerated. Signed-off-by: Michał Mirosław Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c index 9647578cbe6a..14f26bf3b388 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c @@ -459,7 +459,7 @@ static int qlcnic_tx_pkt(struct qlcnic_adapter *adapter, struct cmd_desc_type0 *first_desc, struct sk_buff *skb, struct qlcnic_host_tx_ring *tx_ring) { - u8 l4proto, opcode = 0, hdr_len = 0; + u8 l4proto, opcode = 0, hdr_len = 0, tag_vlan = 0; u16 flags = 0, vlan_tci = 0; int copied, offset, copy_len, size; struct cmd_desc_type0 *hwdesc; @@ -472,14 +472,16 @@ static int qlcnic_tx_pkt(struct qlcnic_adapter *adapter, flags = QLCNIC_FLAGS_VLAN_TAGGED; vlan_tci = ntohs(vh->h_vlan_TCI); protocol = ntohs(vh->h_vlan_encapsulated_proto); + tag_vlan = 1; } else if (skb_vlan_tag_present(skb)) { flags = QLCNIC_FLAGS_VLAN_OOB; vlan_tci = skb_vlan_tag_get(skb); + tag_vlan = 1; } if (unlikely(adapter->tx_pvid)) { - if (vlan_tci && !(adapter->flags & QLCNIC_TAGGING_ENABLED)) + if (tag_vlan && !(adapter->flags & QLCNIC_TAGGING_ENABLED)) return -EIO; - if (vlan_tci && (adapter->flags & QLCNIC_TAGGING_ENABLED)) + if (tag_vlan && (adapter->flags & QLCNIC_TAGGING_ENABLED)) goto set_flags; flags = QLCNIC_FLAGS_VLAN_OOB; From e12c225258f2584906765234ca6db4ad4c618192 Mon Sep 17 00:00:00 2001 From: Huazhong Tan Date: Thu, 8 Nov 2018 10:13:19 +0800 Subject: [PATCH 22/41] net: hns3: bugfix for not checking return value hns3_reset_notify_init_enet() only return error early if the return value of hns3_restore_vlan() is not 0. This patch adds checking for the return value of hns3_restore_vlan. Fixes: 7fa6be4fd2f6 ("net: hns3: fix incorrect return value/type of some functions") Signed-off-by: Huazhong Tan Signed-off-by: David S. Miller --- drivers/net/ethernet/hisilicon/hns3/hns3_enet.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c index 3f96aa30068e..20fcf0d1c2ce 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c @@ -3760,7 +3760,8 @@ static int hns3_reset_notify_init_enet(struct hnae3_handle *handle) /* Hardware table is only clear when pf resets */ if (!(handle->flags & HNAE3_SUPPORT_VF)) { ret = hns3_restore_vlan(netdev); - return ret; + if (ret) + return ret; } ret = hns3_restore_fd_rules(netdev); From 0d5b9311baf27bb545f187f12ecfd558220c607d Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 8 Nov 2018 17:34:27 -0800 Subject: [PATCH 23/41] inet: frags: better deal with smp races MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Multiple cpus might attempt to insert a new fragment in rhashtable, if for example RPS is buggy, as reported by 배석진 in https://patchwork.ozlabs.org/patch/994601/ We use rhashtable_lookup_get_insert_key() instead of rhashtable_insert_fast() to let cpus losing the race free their own inet_frag_queue and use the one that was inserted by another cpu. Fixes: 648700f76b03 ("inet: frags: use rhashtables for reassembly units") Signed-off-by: Eric Dumazet Reported-by: 배석진 Signed-off-by: David S. Miller --- net/ipv4/inet_fragment.c | 29 +++++++++++++++-------------- 1 file changed, 15 insertions(+), 14 deletions(-) diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c index bcb11f3a27c0..760a9e52e02b 100644 --- a/net/ipv4/inet_fragment.c +++ b/net/ipv4/inet_fragment.c @@ -178,21 +178,22 @@ static struct inet_frag_queue *inet_frag_alloc(struct netns_frags *nf, } static struct inet_frag_queue *inet_frag_create(struct netns_frags *nf, - void *arg) + void *arg, + struct inet_frag_queue **prev) { struct inet_frags *f = nf->f; struct inet_frag_queue *q; - int err; q = inet_frag_alloc(nf, f, arg); - if (!q) + if (!q) { + *prev = ERR_PTR(-ENOMEM); return NULL; - + } mod_timer(&q->timer, jiffies + nf->timeout); - err = rhashtable_insert_fast(&nf->rhashtable, &q->node, - f->rhash_params); - if (err < 0) { + *prev = rhashtable_lookup_get_insert_key(&nf->rhashtable, &q->key, + &q->node, f->rhash_params); + if (*prev) { q->flags |= INET_FRAG_COMPLETE; inet_frag_kill(q); inet_frag_destroy(q); @@ -204,22 +205,22 @@ static struct inet_frag_queue *inet_frag_create(struct netns_frags *nf, /* TODO : call from rcu_read_lock() and no longer use refcount_inc_not_zero() */ struct inet_frag_queue *inet_frag_find(struct netns_frags *nf, void *key) { - struct inet_frag_queue *fq; + struct inet_frag_queue *fq = NULL, *prev; if (!nf->high_thresh || frag_mem_limit(nf) > nf->high_thresh) return NULL; rcu_read_lock(); - fq = rhashtable_lookup(&nf->rhashtable, key, nf->f->rhash_params); - if (fq) { + prev = rhashtable_lookup(&nf->rhashtable, key, nf->f->rhash_params); + if (!prev) + fq = inet_frag_create(nf, key, &prev); + if (prev && !IS_ERR(prev)) { + fq = prev; if (!refcount_inc_not_zero(&fq->refcnt)) fq = NULL; - rcu_read_unlock(); - return fq; } rcu_read_unlock(); - - return inet_frag_create(nf, key); + return fq; } EXPORT_SYMBOL(inet_frag_find); From 39477551df940ddb1339203817de04f5caaacf7a Mon Sep 17 00:00:00 2001 From: Denis Bolotin Date: Thu, 8 Nov 2018 16:46:08 +0200 Subject: [PATCH 24/41] qed: Fix memory/entry leak in qed_init_sp_request() Free the allocated SPQ entry or return the acquired SPQ entry to the free list in error flows. Signed-off-by: Denis Bolotin Signed-off-by: Michal Kalderon Signed-off-by: David S. Miller --- .../net/ethernet/qlogic/qed/qed_sp_commands.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qed/qed_sp_commands.c b/drivers/net/ethernet/qlogic/qed/qed_sp_commands.c index 77b6248ad3b9..e86a1ea23613 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_sp_commands.c +++ b/drivers/net/ethernet/qlogic/qed/qed_sp_commands.c @@ -80,7 +80,7 @@ int qed_sp_init_request(struct qed_hwfn *p_hwfn, case QED_SPQ_MODE_BLOCK: if (!p_data->p_comp_data) - return -EINVAL; + goto err; p_ent->comp_cb.cookie = p_data->p_comp_data->cookie; break; @@ -95,7 +95,7 @@ int qed_sp_init_request(struct qed_hwfn *p_hwfn, default: DP_NOTICE(p_hwfn, "Unknown SPQE completion mode %d\n", p_ent->comp_mode); - return -EINVAL; + goto err; } DP_VERBOSE(p_hwfn, QED_MSG_SPQ, @@ -109,6 +109,18 @@ int qed_sp_init_request(struct qed_hwfn *p_hwfn, memset(&p_ent->ramrod, 0, sizeof(p_ent->ramrod)); return 0; + +err: + /* qed_spq_get_entry() can either get an entry from the free_pool, + * or, if no entries are left, allocate a new entry and add it to + * the unlimited_pending list. + */ + if (p_ent->queue == &p_hwfn->p_spq->unlimited_pending) + kfree(p_ent); + else + qed_spq_return_entry(p_hwfn, p_ent); + + return -EINVAL; } static enum tunnel_clss qed_tunn_clss_to_fw_clss(u8 type) From 2632f22ebd08da249c2017962a199a0cfb2324bf Mon Sep 17 00:00:00 2001 From: Denis Bolotin Date: Thu, 8 Nov 2018 16:46:09 +0200 Subject: [PATCH 25/41] qed: Fix blocking/unlimited SPQ entries leak When there are no SPQ entries left in the free_pool, new entries are allocated and are added to the unlimited list. When an entry in the pool is available, the content is copied from the original entry, and the new entry is sent to the device. qed_spq_post() is not aware of that, so the additional entry is stored in the original entry as p_post_ent, which can later be returned to the pool. Signed-off-by: Denis Bolotin Signed-off-by: Michal Kalderon Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qed/qed_sp.h | 3 ++ drivers/net/ethernet/qlogic/qed/qed_spq.c | 57 ++++++++++++----------- 2 files changed, 33 insertions(+), 27 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qed/qed_sp.h b/drivers/net/ethernet/qlogic/qed/qed_sp.h index e95431f6acd4..04259df8a5c2 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_sp.h +++ b/drivers/net/ethernet/qlogic/qed/qed_sp.h @@ -167,6 +167,9 @@ struct qed_spq_entry { enum spq_mode comp_mode; struct qed_spq_comp_cb comp_cb; struct qed_spq_comp_done comp_done; /* SPQ_MODE_EBLOCK */ + + /* Posted entry for unlimited list entry in EBLOCK mode */ + struct qed_spq_entry *post_ent; }; struct qed_eq { diff --git a/drivers/net/ethernet/qlogic/qed/qed_spq.c b/drivers/net/ethernet/qlogic/qed/qed_spq.c index c4a6274dd625..c1a81ec0524b 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_spq.c +++ b/drivers/net/ethernet/qlogic/qed/qed_spq.c @@ -685,6 +685,8 @@ static int qed_spq_add_entry(struct qed_hwfn *p_hwfn, /* EBLOCK responsible to free the allocated p_ent */ if (p_ent->comp_mode != QED_SPQ_MODE_EBLOCK) kfree(p_ent); + else + p_ent->post_ent = p_en2; p_ent = p_en2; } @@ -767,6 +769,25 @@ static int qed_spq_pend_post(struct qed_hwfn *p_hwfn) SPQ_HIGH_PRI_RESERVE_DEFAULT); } +/* Avoid overriding of SPQ entries when getting out-of-order completions, by + * marking the completions in a bitmap and increasing the chain consumer only + * for the first successive completed entries. + */ +static void qed_spq_comp_bmap_update(struct qed_hwfn *p_hwfn, __le16 echo) +{ + u16 pos = le16_to_cpu(echo) % SPQ_RING_SIZE; + struct qed_spq *p_spq = p_hwfn->p_spq; + + __set_bit(pos, p_spq->p_comp_bitmap); + while (test_bit(p_spq->comp_bitmap_idx, + p_spq->p_comp_bitmap)) { + __clear_bit(p_spq->comp_bitmap_idx, + p_spq->p_comp_bitmap); + p_spq->comp_bitmap_idx++; + qed_chain_return_produced(&p_spq->chain); + } +} + int qed_spq_post(struct qed_hwfn *p_hwfn, struct qed_spq_entry *p_ent, u8 *fw_return_code) { @@ -824,11 +845,12 @@ int qed_spq_post(struct qed_hwfn *p_hwfn, p_ent->queue == &p_spq->unlimited_pending); if (p_ent->queue == &p_spq->unlimited_pending) { - /* This is an allocated p_ent which does not need to - * return to pool. - */ + struct qed_spq_entry *p_post_ent = p_ent->post_ent; + kfree(p_ent); - return rc; + + /* Return the entry which was actually posted */ + p_ent = p_post_ent; } if (rc) @@ -842,7 +864,7 @@ int qed_spq_post(struct qed_hwfn *p_hwfn, spq_post_fail2: spin_lock_bh(&p_spq->lock); list_del(&p_ent->list); - qed_chain_return_produced(&p_spq->chain); + qed_spq_comp_bmap_update(p_hwfn, p_ent->elem.hdr.echo); spq_post_fail: /* return to the free pool */ @@ -874,25 +896,8 @@ int qed_spq_completion(struct qed_hwfn *p_hwfn, spin_lock_bh(&p_spq->lock); list_for_each_entry_safe(p_ent, tmp, &p_spq->completion_pending, list) { if (p_ent->elem.hdr.echo == echo) { - u16 pos = le16_to_cpu(echo) % SPQ_RING_SIZE; - list_del(&p_ent->list); - - /* Avoid overriding of SPQ entries when getting - * out-of-order completions, by marking the completions - * in a bitmap and increasing the chain consumer only - * for the first successive completed entries. - */ - __set_bit(pos, p_spq->p_comp_bitmap); - - while (test_bit(p_spq->comp_bitmap_idx, - p_spq->p_comp_bitmap)) { - __clear_bit(p_spq->comp_bitmap_idx, - p_spq->p_comp_bitmap); - p_spq->comp_bitmap_idx++; - qed_chain_return_produced(&p_spq->chain); - } - + qed_spq_comp_bmap_update(p_hwfn, echo); p_spq->comp_count++; found = p_ent; break; @@ -931,11 +936,9 @@ int qed_spq_completion(struct qed_hwfn *p_hwfn, QED_MSG_SPQ, "Got a completion without a callback function\n"); - if ((found->comp_mode != QED_SPQ_MODE_EBLOCK) || - (found->queue == &p_spq->unlimited_pending)) + if (found->comp_mode != QED_SPQ_MODE_EBLOCK) /* EBLOCK is responsible for returning its own entry into the - * free list, unless it originally added the entry into the - * unlimited pending list. + * free list. */ qed_spq_return_entry(p_hwfn, found); From fb5e7438e7a3c8966e04ccb0760170e9e06f3699 Mon Sep 17 00:00:00 2001 From: Denis Bolotin Date: Thu, 8 Nov 2018 16:46:10 +0200 Subject: [PATCH 26/41] qed: Fix SPQ entries not returned to pool in error flows qed_sp_destroy_request() API was added for SPQ users that need to free/return the entry they acquired in their error flows. Signed-off-by: Denis Bolotin Signed-off-by: Michal Kalderon Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qed/qed_fcoe.c | 11 +++++++--- drivers/net/ethernet/qlogic/qed/qed_iscsi.c | 1 + drivers/net/ethernet/qlogic/qed/qed_l2.c | 12 ++++++---- drivers/net/ethernet/qlogic/qed/qed_rdma.c | 1 + drivers/net/ethernet/qlogic/qed/qed_roce.c | 1 + drivers/net/ethernet/qlogic/qed/qed_sp.h | 11 ++++++++++ .../net/ethernet/qlogic/qed/qed_sp_commands.c | 22 ++++++++++++------- drivers/net/ethernet/qlogic/qed/qed_sriov.c | 1 + 8 files changed, 45 insertions(+), 15 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qed/qed_fcoe.c b/drivers/net/ethernet/qlogic/qed/qed_fcoe.c index cc1b373c0ace..46dc93d3b9b5 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_fcoe.c +++ b/drivers/net/ethernet/qlogic/qed/qed_fcoe.c @@ -147,7 +147,8 @@ qed_sp_fcoe_func_start(struct qed_hwfn *p_hwfn, "Cannot satisfy CQ amount. CQs requested %d, CQs available %d. Aborting function start\n", fcoe_pf_params->num_cqs, p_hwfn->hw_info.feat_num[QED_FCOE_CQ]); - return -EINVAL; + rc = -EINVAL; + goto err; } p_data->mtu = cpu_to_le16(fcoe_pf_params->mtu); @@ -156,14 +157,14 @@ qed_sp_fcoe_func_start(struct qed_hwfn *p_hwfn, rc = qed_cxt_acquire_cid(p_hwfn, PROTOCOLID_FCOE, &dummy_cid); if (rc) - return rc; + goto err; cxt_info.iid = dummy_cid; rc = qed_cxt_get_cid_info(p_hwfn, &cxt_info); if (rc) { DP_NOTICE(p_hwfn, "Cannot find context info for dummy cid=%d\n", dummy_cid); - return rc; + goto err; } p_cxt = cxt_info.p_cxt; SET_FIELD(p_cxt->tstorm_ag_context.flags3, @@ -240,6 +241,10 @@ qed_sp_fcoe_func_start(struct qed_hwfn *p_hwfn, rc = qed_spq_post(p_hwfn, p_ent, NULL); return rc; + +err: + qed_sp_destroy_request(p_hwfn, p_ent); + return rc; } static int diff --git a/drivers/net/ethernet/qlogic/qed/qed_iscsi.c b/drivers/net/ethernet/qlogic/qed/qed_iscsi.c index 1135387bd99d..4f8a685d1a55 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_iscsi.c +++ b/drivers/net/ethernet/qlogic/qed/qed_iscsi.c @@ -200,6 +200,7 @@ qed_sp_iscsi_func_start(struct qed_hwfn *p_hwfn, "Cannot satisfy CQ amount. Queues requested %d, CQs available %d. Aborting function start\n", p_params->num_queues, p_hwfn->hw_info.feat_num[QED_ISCSI_CQ]); + qed_sp_destroy_request(p_hwfn, p_ent); return -EINVAL; } diff --git a/drivers/net/ethernet/qlogic/qed/qed_l2.c b/drivers/net/ethernet/qlogic/qed/qed_l2.c index 82a1bd1f8a8c..67c02ea93906 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_l2.c +++ b/drivers/net/ethernet/qlogic/qed/qed_l2.c @@ -740,8 +740,7 @@ int qed_sp_vport_update(struct qed_hwfn *p_hwfn, rc = qed_sp_vport_update_rss(p_hwfn, p_ramrod, p_rss_params); if (rc) { - /* Return spq entry which is taken in qed_sp_init_request()*/ - qed_spq_return_entry(p_hwfn, p_ent); + qed_sp_destroy_request(p_hwfn, p_ent); return rc; } @@ -1355,6 +1354,7 @@ qed_filter_ucast_common(struct qed_hwfn *p_hwfn, DP_NOTICE(p_hwfn, "%d is not supported yet\n", p_filter_cmd->opcode); + qed_sp_destroy_request(p_hwfn, *pp_ent); return -EINVAL; } @@ -2056,13 +2056,13 @@ qed_configure_rfs_ntuple_filter(struct qed_hwfn *p_hwfn, } else { rc = qed_fw_vport(p_hwfn, p_params->vport_id, &abs_vport_id); if (rc) - return rc; + goto err; if (p_params->qid != QED_RFS_NTUPLE_QID_RSS) { rc = qed_fw_l2_queue(p_hwfn, p_params->qid, &abs_rx_q_id); if (rc) - return rc; + goto err; p_ramrod->rx_qid_valid = 1; p_ramrod->rx_qid = cpu_to_le16(abs_rx_q_id); @@ -2083,6 +2083,10 @@ qed_configure_rfs_ntuple_filter(struct qed_hwfn *p_hwfn, (u64)p_params->addr, p_params->length); return qed_spq_post(p_hwfn, p_ent, NULL); + +err: + qed_sp_destroy_request(p_hwfn, p_ent); + return rc; } int qed_get_rxq_coalesce(struct qed_hwfn *p_hwfn, diff --git a/drivers/net/ethernet/qlogic/qed/qed_rdma.c b/drivers/net/ethernet/qlogic/qed/qed_rdma.c index c71391b9c757..62113438c880 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_rdma.c +++ b/drivers/net/ethernet/qlogic/qed/qed_rdma.c @@ -1514,6 +1514,7 @@ qed_rdma_register_tid(void *rdma_cxt, default: rc = -EINVAL; DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "rc = %d\n", rc); + qed_sp_destroy_request(p_hwfn, p_ent); return rc; } SET_FIELD(p_ramrod->flags1, diff --git a/drivers/net/ethernet/qlogic/qed/qed_roce.c b/drivers/net/ethernet/qlogic/qed/qed_roce.c index f9167d1354bb..e49fada85410 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_roce.c +++ b/drivers/net/ethernet/qlogic/qed/qed_roce.c @@ -745,6 +745,7 @@ static int qed_roce_sp_destroy_qp_responder(struct qed_hwfn *p_hwfn, DP_NOTICE(p_hwfn, "qed destroy responder failed: cannot allocate memory (ramrod). rc = %d\n", rc); + qed_sp_destroy_request(p_hwfn, p_ent); return rc; } diff --git a/drivers/net/ethernet/qlogic/qed/qed_sp.h b/drivers/net/ethernet/qlogic/qed/qed_sp.h index 04259df8a5c2..3157c0d99441 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_sp.h +++ b/drivers/net/ethernet/qlogic/qed/qed_sp.h @@ -399,6 +399,17 @@ struct qed_sp_init_data { struct qed_spq_comp_cb *p_comp_data; }; +/** + * @brief Returns a SPQ entry to the pool / frees the entry if allocated. + * Should be called on in error flows after initializing the SPQ entry + * and before posting it. + * + * @param p_hwfn + * @param p_ent + */ +void qed_sp_destroy_request(struct qed_hwfn *p_hwfn, + struct qed_spq_entry *p_ent); + int qed_sp_init_request(struct qed_hwfn *p_hwfn, struct qed_spq_entry **pp_ent, u8 cmd, diff --git a/drivers/net/ethernet/qlogic/qed/qed_sp_commands.c b/drivers/net/ethernet/qlogic/qed/qed_sp_commands.c index e86a1ea23613..888274fa208b 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_sp_commands.c +++ b/drivers/net/ethernet/qlogic/qed/qed_sp_commands.c @@ -47,6 +47,19 @@ #include "qed_sp.h" #include "qed_sriov.h" +void qed_sp_destroy_request(struct qed_hwfn *p_hwfn, + struct qed_spq_entry *p_ent) +{ + /* qed_spq_get_entry() can either get an entry from the free_pool, + * or, if no entries are left, allocate a new entry and add it to + * the unlimited_pending list. + */ + if (p_ent->queue == &p_hwfn->p_spq->unlimited_pending) + kfree(p_ent); + else + qed_spq_return_entry(p_hwfn, p_ent); +} + int qed_sp_init_request(struct qed_hwfn *p_hwfn, struct qed_spq_entry **pp_ent, u8 cmd, u8 protocol, struct qed_sp_init_data *p_data) @@ -111,14 +124,7 @@ int qed_sp_init_request(struct qed_hwfn *p_hwfn, return 0; err: - /* qed_spq_get_entry() can either get an entry from the free_pool, - * or, if no entries are left, allocate a new entry and add it to - * the unlimited_pending list. - */ - if (p_ent->queue == &p_hwfn->p_spq->unlimited_pending) - kfree(p_ent); - else - qed_spq_return_entry(p_hwfn, p_ent); + qed_sp_destroy_request(p_hwfn, p_ent); return -EINVAL; } diff --git a/drivers/net/ethernet/qlogic/qed/qed_sriov.c b/drivers/net/ethernet/qlogic/qed/qed_sriov.c index 9b08a9d9e151..ca6290fa0f30 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_sriov.c +++ b/drivers/net/ethernet/qlogic/qed/qed_sriov.c @@ -101,6 +101,7 @@ static int qed_sp_vf_start(struct qed_hwfn *p_hwfn, struct qed_vf_info *p_vf) default: DP_NOTICE(p_hwfn, "Unknown VF personality %d\n", p_hwfn->hw_info.personality); + qed_sp_destroy_request(p_hwfn, p_ent); return -EINVAL; } From fa5c448d98f0df660bfcad3dd5facc027ef84cd3 Mon Sep 17 00:00:00 2001 From: Sagiv Ozeri Date: Thu, 8 Nov 2018 16:46:11 +0200 Subject: [PATCH 27/41] qed: Fix potential memory corruption A stuck ramrod should be deleted from the completion_pending list, otherwise it will be added again in the future and corrupt the list. Return error value to inform that ramrod is stuck and should be deleted. Signed-off-by: Sagiv Ozeri Signed-off-by: Denis Bolotin Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qed/qed_spq.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qed/qed_spq.c b/drivers/net/ethernet/qlogic/qed/qed_spq.c index c1a81ec0524b..0a9c5bb0fa48 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_spq.c +++ b/drivers/net/ethernet/qlogic/qed/qed_spq.c @@ -142,6 +142,7 @@ static int qed_spq_block(struct qed_hwfn *p_hwfn, DP_INFO(p_hwfn, "Ramrod is stuck, requesting MCP drain\n"); rc = qed_mcp_drain(p_hwfn, p_ptt); + qed_ptt_release(p_hwfn, p_ptt); if (rc) { DP_NOTICE(p_hwfn, "MCP drain failed\n"); goto err; @@ -150,18 +151,15 @@ static int qed_spq_block(struct qed_hwfn *p_hwfn, /* Retry after drain */ rc = __qed_spq_block(p_hwfn, p_ent, p_fw_ret, true); if (!rc) - goto out; + return 0; comp_done = (struct qed_spq_comp_done *)p_ent->comp_cb.cookie; - if (comp_done->done == 1) + if (comp_done->done == 1) { if (p_fw_ret) *p_fw_ret = comp_done->fw_return_code; -out: - qed_ptt_release(p_hwfn, p_ptt); - return 0; - + return 0; + } err: - qed_ptt_release(p_hwfn, p_ptt); DP_NOTICE(p_hwfn, "Ramrod is stuck [CID %08x cmd %02x protocol %02x echo %04x]\n", le32_to_cpu(p_ent->elem.hdr.cid), From 8137b6ef0ce469154e5cf19f8e7fe04d9a72ac5e Mon Sep 17 00:00:00 2001 From: Thor Thayer Date: Thu, 8 Nov 2018 11:42:14 -0600 Subject: [PATCH 28/41] net: stmmac: Fix RX packet size > 8191 Ping problems with packets > 8191 as shown: PING 192.168.1.99 (192.168.1.99) 8150(8178) bytes of data. 8158 bytes from 192.168.1.99: icmp_seq=1 ttl=64 time=0.669 ms wrong data byte 8144 should be 0xd0 but was 0x0 16 10 11 12 13 14 15 16 17 18 19 1a 1b 1c 1d 1e 1f 20 21 22 23 24 25 26 27 28 29 2a 2b 2c 2d 2e 2f %< ---------------snip-------------------------------------- 8112 b0 b1 b2 b3 b4 b5 b6 b7 b8 b9 ba bb bc bd be bf c0 c1 c2 c3 c4 c5 c6 c7 c8 c9 ca cb cc cd ce cf 8144 0 0 0 0 d0 d1 ^^^^^^^ Notice the 4 bytes of 0 before the expected byte of d0. Databook notes that the RX buffer must be a multiple of 4/8/16 bytes [1]. Update the DMA Buffer size define to 8188 instead of 8192. Remove the -1 from the RX buffer size allocations and use the new DMA Buffer size directly. [1] Synopsys DesignWare Cores Ethernet MAC Universal v3.70a [section 8.4.2 - Table 8-24] Tested on SoCFPGA Stratix10 with ping sweep from 100 to 8300 byte packets. Fixes: 286a83721720 ("stmmac: add CHAINED descriptor mode support (V4)") Suggested-by: Jose Abreu Signed-off-by: Thor Thayer Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/common.h | 3 ++- drivers/net/ethernet/stmicro/stmmac/descs_com.h | 2 +- drivers/net/ethernet/stmicro/stmmac/enh_desc.c | 2 +- drivers/net/ethernet/stmicro/stmmac/ring_mode.c | 2 +- 4 files changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/common.h b/drivers/net/ethernet/stmicro/stmmac/common.h index b1b305f8f414..272b9ca66314 100644 --- a/drivers/net/ethernet/stmicro/stmmac/common.h +++ b/drivers/net/ethernet/stmicro/stmmac/common.h @@ -365,7 +365,8 @@ struct dma_features { /* GMAC TX FIFO is 8K, Rx FIFO is 16K */ #define BUF_SIZE_16KiB 16384 -#define BUF_SIZE_8KiB 8192 +/* RX Buffer size must be < 8191 and multiple of 4/8/16 bytes */ +#define BUF_SIZE_8KiB 8188 #define BUF_SIZE_4KiB 4096 #define BUF_SIZE_2KiB 2048 diff --git a/drivers/net/ethernet/stmicro/stmmac/descs_com.h b/drivers/net/ethernet/stmicro/stmmac/descs_com.h index ca9d7e48034c..40d6356a7e73 100644 --- a/drivers/net/ethernet/stmicro/stmmac/descs_com.h +++ b/drivers/net/ethernet/stmicro/stmmac/descs_com.h @@ -31,7 +31,7 @@ /* Enhanced descriptors */ static inline void ehn_desc_rx_set_on_ring(struct dma_desc *p, int end) { - p->des1 |= cpu_to_le32(((BUF_SIZE_8KiB - 1) + p->des1 |= cpu_to_le32((BUF_SIZE_8KiB << ERDES1_BUFFER2_SIZE_SHIFT) & ERDES1_BUFFER2_SIZE_MASK); diff --git a/drivers/net/ethernet/stmicro/stmmac/enh_desc.c b/drivers/net/ethernet/stmicro/stmmac/enh_desc.c index 77914c89d749..5ef91a790f9d 100644 --- a/drivers/net/ethernet/stmicro/stmmac/enh_desc.c +++ b/drivers/net/ethernet/stmicro/stmmac/enh_desc.c @@ -262,7 +262,7 @@ static void enh_desc_init_rx_desc(struct dma_desc *p, int disable_rx_ic, int mode, int end) { p->des0 |= cpu_to_le32(RDES0_OWN); - p->des1 |= cpu_to_le32((BUF_SIZE_8KiB - 1) & ERDES1_BUFFER1_SIZE_MASK); + p->des1 |= cpu_to_le32(BUF_SIZE_8KiB & ERDES1_BUFFER1_SIZE_MASK); if (mode == STMMAC_CHAIN_MODE) ehn_desc_rx_set_on_chain(p); diff --git a/drivers/net/ethernet/stmicro/stmmac/ring_mode.c b/drivers/net/ethernet/stmicro/stmmac/ring_mode.c index abc3f85270cd..d8c5bc412219 100644 --- a/drivers/net/ethernet/stmicro/stmmac/ring_mode.c +++ b/drivers/net/ethernet/stmicro/stmmac/ring_mode.c @@ -140,7 +140,7 @@ static void clean_desc3(void *priv_ptr, struct dma_desc *p) static int set_16kib_bfsize(int mtu) { int ret = 0; - if (unlikely(mtu >= BUF_SIZE_8KiB)) + if (unlikely(mtu > BUF_SIZE_8KiB)) ret = BUF_SIZE_16KiB; return ret; } From 85b18b0237ce9986a81a1b9534b5e2ee116f5504 Mon Sep 17 00:00:00 2001 From: Stefan Wahren Date: Thu, 8 Nov 2018 20:38:26 +0100 Subject: [PATCH 29/41] net: smsc95xx: Fix MTU range The commit f77f0aee4da4 ("net: use core MTU range checking in USB NIC drivers") introduce a common MTU handling for usbnet. But it's missing the necessary changes for smsc95xx. So set the MTU range accordingly. This patch has been tested on a Raspberry Pi 3. Fixes: f77f0aee4da4 ("net: use core MTU range checking in USB NIC drivers") Signed-off-by: Stefan Wahren Signed-off-by: David S. Miller --- drivers/net/usb/smsc95xx.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/usb/smsc95xx.c b/drivers/net/usb/smsc95xx.c index 2d17f3b9bb16..f2d01cb6f958 100644 --- a/drivers/net/usb/smsc95xx.c +++ b/drivers/net/usb/smsc95xx.c @@ -1321,6 +1321,8 @@ static int smsc95xx_bind(struct usbnet *dev, struct usb_interface *intf) dev->net->ethtool_ops = &smsc95xx_ethtool_ops; dev->net->flags |= IFF_MULTICAST; dev->net->hard_header_len += SMSC95XX_TX_OVERHEAD_CSUM; + dev->net->min_mtu = ETH_MIN_MTU; + dev->net->max_mtu = ETH_DATA_LEN; dev->hard_mtu = dev->net->mtu + dev->net->hard_header_len; pdata->dev = dev; From 35e8e8b45d31bec34379dd36e7b71448e003efb2 Mon Sep 17 00:00:00 2001 From: Igor Russkikh Date: Fri, 9 Nov 2018 11:53:56 +0000 Subject: [PATCH 30/41] net: aquantia: synchronized flow control between mac/phy Flow control statuses were not synchronized between blocks, that caused packets/link drop on some corner cases, when MAC sent PFC although Phy was not expecting these to come. Driver should readout the negotiated FC from phy and configure RX block accordigly. This is done on each link change event with information from FW. Fixes: 288551de45aa ("net: aquantia: Implement rx/tx flow control ethtools callback") Signed-off-by: Igor Russkikh Signed-off-by: David S. Miller --- .../ethernet/aquantia/atlantic/aq_ethtool.c | 8 +++---- .../net/ethernet/aquantia/atlantic/aq_hw.h | 3 +++ .../net/ethernet/aquantia/atlantic/aq_nic.c | 14 ++++++++++++- .../aquantia/atlantic/hw_atl/hw_atl_b0.c | 12 ++++++++--- .../atlantic/hw_atl/hw_atl_utils_fw2x.c | 21 +++++++++++++++++++ 5 files changed, 50 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_ethtool.c b/drivers/net/ethernet/aquantia/atlantic/aq_ethtool.c index 6a633c70f603..99ef1daaa4d8 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_ethtool.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_ethtool.c @@ -407,13 +407,13 @@ static void aq_ethtool_get_pauseparam(struct net_device *ndev, struct ethtool_pauseparam *pause) { struct aq_nic_s *aq_nic = netdev_priv(ndev); + u32 fc = aq_nic->aq_nic_cfg.flow_control; pause->autoneg = 0; - if (aq_nic->aq_hw->aq_nic_cfg->flow_control & AQ_NIC_FC_RX) - pause->rx_pause = 1; - if (aq_nic->aq_hw->aq_nic_cfg->flow_control & AQ_NIC_FC_TX) - pause->tx_pause = 1; + pause->rx_pause = !!(fc & AQ_NIC_FC_RX); + pause->tx_pause = !!(fc & AQ_NIC_FC_TX); + } static int aq_ethtool_set_pauseparam(struct net_device *ndev, diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_hw.h b/drivers/net/ethernet/aquantia/atlantic/aq_hw.h index e8689241204e..7ec8d24b2b0b 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_hw.h +++ b/drivers/net/ethernet/aquantia/atlantic/aq_hw.h @@ -204,6 +204,7 @@ struct aq_hw_ops { int (*hw_get_fw_version)(struct aq_hw_s *self, u32 *fw_version); + int (*hw_set_fc)(struct aq_hw_s *self, u32 fc, u32 tc); }; struct aq_fw_ops { @@ -226,6 +227,8 @@ struct aq_fw_ops { int (*update_stats)(struct aq_hw_s *self); + u32 (*get_flow_control)(struct aq_hw_s *self, u32 *fcmode); + int (*set_flow_control)(struct aq_hw_s *self); int (*set_power)(struct aq_hw_s *self, unsigned int power_state, diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c index 5fed24446687..0011a3f2f672 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c @@ -124,6 +124,7 @@ void aq_nic_cfg_start(struct aq_nic_s *self) static int aq_nic_update_link_status(struct aq_nic_s *self) { int err = self->aq_fw_ops->update_link_status(self->aq_hw); + u32 fc = 0; if (err) return err; @@ -133,6 +134,15 @@ static int aq_nic_update_link_status(struct aq_nic_s *self) AQ_CFG_DRV_NAME, self->link_status.mbps, self->aq_hw->aq_link_status.mbps); aq_nic_update_interrupt_moderation_settings(self); + + /* Driver has to update flow control settings on RX block + * on any link event. + * We should query FW whether it negotiated FC. + */ + if (self->aq_fw_ops->get_flow_control) + self->aq_fw_ops->get_flow_control(self->aq_hw, &fc); + if (self->aq_hw_ops->hw_set_fc) + self->aq_hw_ops->hw_set_fc(self->aq_hw, fc, 0); } self->link_status = self->aq_hw->aq_link_status; @@ -772,7 +782,9 @@ void aq_nic_get_link_ksettings(struct aq_nic_s *self, ethtool_link_ksettings_add_link_mode(cmd, advertising, Pause); - if (self->aq_nic_cfg.flow_control & AQ_NIC_FC_TX) + /* Asym is when either RX or TX, but not both */ + if (!!(self->aq_nic_cfg.flow_control & AQ_NIC_FC_TX) ^ + !!(self->aq_nic_cfg.flow_control & AQ_NIC_FC_RX)) ethtool_link_ksettings_add_link_mode(cmd, advertising, Asym_Pause); diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c index 76d25d594a0f..119265762b0c 100644 --- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c +++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c @@ -100,12 +100,17 @@ static int hw_atl_b0_hw_reset(struct aq_hw_s *self) return err; } +static int hw_atl_b0_set_fc(struct aq_hw_s *self, u32 fc, u32 tc) +{ + hw_atl_rpb_rx_xoff_en_per_tc_set(self, !!(fc & AQ_NIC_FC_RX), tc); + return 0; +} + static int hw_atl_b0_hw_qos_set(struct aq_hw_s *self) { u32 tc = 0U; u32 buff_size = 0U; unsigned int i_priority = 0U; - bool is_rx_flow_control = false; /* TPS Descriptor rate init */ hw_atl_tps_tx_pkt_shed_desc_rate_curr_time_res_set(self, 0x0U); @@ -138,7 +143,6 @@ static int hw_atl_b0_hw_qos_set(struct aq_hw_s *self) /* QoS Rx buf size per TC */ tc = 0; - is_rx_flow_control = (AQ_NIC_FC_RX & self->aq_nic_cfg->flow_control); buff_size = HW_ATL_B0_RXBUF_MAX; hw_atl_rpb_rx_pkt_buff_size_per_tc_set(self, buff_size, tc); @@ -150,7 +154,8 @@ static int hw_atl_b0_hw_qos_set(struct aq_hw_s *self) (buff_size * (1024U / 32U) * 50U) / 100U, tc); - hw_atl_rpb_rx_xoff_en_per_tc_set(self, is_rx_flow_control ? 1U : 0U, tc); + + hw_atl_b0_set_fc(self, self->aq_nic_cfg->flow_control, tc); /* QoS 802.1p priority -> TC mapping */ for (i_priority = 8U; i_priority--;) @@ -963,4 +968,5 @@ const struct aq_hw_ops hw_atl_ops_b0 = { .hw_get_regs = hw_atl_utils_hw_get_regs, .hw_get_hw_stats = hw_atl_utils_get_hw_stats, .hw_get_fw_version = hw_atl_utils_get_fw_version, + .hw_set_fc = hw_atl_b0_set_fc, }; diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils_fw2x.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils_fw2x.c index 096ca5730887..7de3220d9cab 100644 --- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils_fw2x.c +++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils_fw2x.c @@ -30,6 +30,8 @@ #define HW_ATL_FW2X_MPI_STATE_ADDR 0x370 #define HW_ATL_FW2X_MPI_STATE2_ADDR 0x374 +#define HW_ATL_FW2X_CAP_PAUSE BIT(CAPS_HI_PAUSE) +#define HW_ATL_FW2X_CAP_ASYM_PAUSE BIT(CAPS_HI_ASYMMETRIC_PAUSE) #define HW_ATL_FW2X_CAP_SLEEP_PROXY BIT(CAPS_HI_SLEEP_PROXY) #define HW_ATL_FW2X_CAP_WOL BIT(CAPS_HI_WOL) @@ -451,6 +453,24 @@ static int aq_fw2x_set_flow_control(struct aq_hw_s *self) return 0; } +static u32 aq_fw2x_get_flow_control(struct aq_hw_s *self, u32 *fcmode) +{ + u32 mpi_state = aq_hw_read_reg(self, HW_ATL_FW2X_MPI_STATE2_ADDR); + + if (mpi_state & HW_ATL_FW2X_CAP_PAUSE) + if (mpi_state & HW_ATL_FW2X_CAP_ASYM_PAUSE) + *fcmode = AQ_NIC_FC_RX; + else + *fcmode = AQ_NIC_FC_RX | AQ_NIC_FC_TX; + else + if (mpi_state & HW_ATL_FW2X_CAP_ASYM_PAUSE) + *fcmode = AQ_NIC_FC_TX; + else + *fcmode = 0; + + return 0; +} + const struct aq_fw_ops aq_fw_2x_ops = { .init = aq_fw2x_init, .deinit = aq_fw2x_deinit, @@ -465,4 +485,5 @@ const struct aq_fw_ops aq_fw_2x_ops = { .set_eee_rate = aq_fw2x_set_eee_rate, .get_eee_rate = aq_fw2x_get_eee_rate, .set_flow_control = aq_fw2x_set_flow_control, + .get_flow_control = aq_fw2x_get_flow_control }; From 7a1bb49461b12b2e6332a4d054256835f45203f3 Mon Sep 17 00:00:00 2001 From: Dmitry Bogdanov Date: Fri, 9 Nov 2018 11:53:57 +0000 Subject: [PATCH 31/41] net: aquantia: fix potential IOMMU fault after driver unbind IOMMU fault may occurr on unbind/bind or if_down/if_up sequence. Although driver disables the rings on down, this is not enough. Due to internal HW design, during subsequent initialization NIC sometimes may reuse RX descriptors cache and write to the host memory from the descriptor cache. That's get catched by IOMMU on host. This patch invalidates the descriptor cache in NIC on interface down to prevent writing to the cached descriptors and to the memory pointed in those descriptors. Signed-off-by: Dmitry Bogdanov Signed-off-by: Igor Russkikh Signed-off-by: David S. Miller --- .../aquantia/atlantic/hw_atl/hw_atl_b0.c | 6 ++++++ .../aquantia/atlantic/hw_atl/hw_atl_llh.c | 8 ++++++++ .../aquantia/atlantic/hw_atl/hw_atl_llh.h | 3 +++ .../atlantic/hw_atl/hw_atl_llh_internal.h | 18 ++++++++++++++++++ 4 files changed, 35 insertions(+) diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c index 119265762b0c..3aec56623bf5 100644 --- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c +++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c @@ -920,6 +920,12 @@ static int hw_atl_b0_hw_interrupt_moderation_set(struct aq_hw_s *self) static int hw_atl_b0_hw_stop(struct aq_hw_s *self) { hw_atl_b0_hw_irq_disable(self, HW_ATL_B0_INT_MASK); + + /* Invalidate Descriptor Cache to prevent writing to the cached + * descriptors and to the data pointer of those descriptors + */ + hw_atl_rdm_rx_dma_desc_cache_init_set(self, 1); + return aq_hw_err_from_flags(self); } diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh.c index be0a3a90dfad..5502ec5f0f69 100644 --- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh.c +++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh.c @@ -619,6 +619,14 @@ void hw_atl_rpb_rx_flow_ctl_mode_set(struct aq_hw_s *aq_hw, u32 rx_flow_ctl_mode HW_ATL_RPB_RX_FC_MODE_SHIFT, rx_flow_ctl_mode); } +void hw_atl_rdm_rx_dma_desc_cache_init_set(struct aq_hw_s *aq_hw, u32 init) +{ + aq_hw_write_reg_bit(aq_hw, HW_ATL_RDM_RX_DMA_DESC_CACHE_INIT_ADR, + HW_ATL_RDM_RX_DMA_DESC_CACHE_INIT_MSK, + HW_ATL_RDM_RX_DMA_DESC_CACHE_INIT_SHIFT, + init); +} + void hw_atl_rpb_rx_pkt_buff_size_per_tc_set(struct aq_hw_s *aq_hw, u32 rx_pkt_buff_size_per_tc, u32 buffer) { diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh.h b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh.h index 7056c7342afc..41f239928c15 100644 --- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh.h +++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh.h @@ -325,6 +325,9 @@ void hw_atl_rpb_rx_pkt_buff_size_per_tc_set(struct aq_hw_s *aq_hw, u32 rx_pkt_buff_size_per_tc, u32 buffer); +/* set rdm rx dma descriptor cache init */ +void hw_atl_rdm_rx_dma_desc_cache_init_set(struct aq_hw_s *aq_hw, u32 init); + /* set rx xoff enable (per tc) */ void hw_atl_rpb_rx_xoff_en_per_tc_set(struct aq_hw_s *aq_hw, u32 rx_xoff_en_per_tc, u32 buffer); diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh_internal.h b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh_internal.h index 716674a9b729..a715fa317b1c 100644 --- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh_internal.h +++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh_internal.h @@ -293,6 +293,24 @@ /* default value of bitfield desc{d}_reset */ #define HW_ATL_RDM_DESCDRESET_DEFAULT 0x0 +/* rdm_desc_init_i bitfield definitions + * preprocessor definitions for the bitfield rdm_desc_init_i. + * port="pif_rdm_desc_init_i" + */ + +/* register address for bitfield rdm_desc_init_i */ +#define HW_ATL_RDM_RX_DMA_DESC_CACHE_INIT_ADR 0x00005a00 +/* bitmask for bitfield rdm_desc_init_i */ +#define HW_ATL_RDM_RX_DMA_DESC_CACHE_INIT_MSK 0xffffffff +/* inverted bitmask for bitfield rdm_desc_init_i */ +#define HW_ATL_RDM_RX_DMA_DESC_CACHE_INIT_MSKN 0x00000000 +/* lower bit position of bitfield rdm_desc_init_i */ +#define HW_ATL_RDM_RX_DMA_DESC_CACHE_INIT_SHIFT 0 +/* width of bitfield rdm_desc_init_i */ +#define HW_ATL_RDM_RX_DMA_DESC_CACHE_INIT_WIDTH 32 +/* default value of bitfield rdm_desc_init_i */ +#define HW_ATL_RDM_RX_DMA_DESC_CACHE_INIT_DEFAULT 0x0 + /* rx int_desc_wrb_en bitfield definitions * preprocessor definitions for the bitfield "int_desc_wrb_en". * port="pif_rdm_int_desc_wrb_en_i" From bfaa9f8553d5c20703781e63f4fc8cb4792f18fd Mon Sep 17 00:00:00 2001 From: Igor Russkikh Date: Fri, 9 Nov 2018 11:53:59 +0000 Subject: [PATCH 32/41] net: aquantia: fixed enable unicast on 32 macvlan Fixed a condition mistake due to which macvlans unicast item number 32 was not added in the unicast filter. The consequence is that when exactly 32 macvlans are created on NIC, the last created macvlan receives no traffic because its MAC was not registered in HW. Fixes: 94b3b542303f ("net: aquantia: vlan unicast address list correct handling") Signed-off-by: Igor Russkikh Tested-by: Nikita Danilov Signed-off-by: David S. Miller --- drivers/net/ethernet/aquantia/atlantic/aq_nic.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c index 0011a3f2f672..b5e7c98f424c 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c @@ -600,7 +600,7 @@ int aq_nic_set_multicast_list(struct aq_nic_s *self, struct net_device *ndev) } } - if (i > 0 && i < AQ_HW_MULTICAST_ADDRESS_MAX) { + if (i > 0 && i <= AQ_HW_MULTICAST_ADDRESS_MAX) { packet_filter |= IFF_MULTICAST; self->mc_list.count = i; self->aq_hw_ops->hw_multicast_list_set(self->aq_hw, From ad703c2b9127f9acdef697ec4755f6da4beaa266 Mon Sep 17 00:00:00 2001 From: Dmitry Bogdanov Date: Fri, 9 Nov 2018 11:54:01 +0000 Subject: [PATCH 33/41] net: aquantia: invalid checksumm offload implementation Packets with marked invalid IP/UDP/TCP checksums were considered as good by the driver. The error was in a logic, processing offload bits in RX descriptor. Signed-off-by: Igor Russkikh Signed-off-by: Dmitry Bogdanov Signed-off-by: David S. Miller --- .../net/ethernet/aquantia/atlantic/aq_ring.c | 35 +++++++++++------- .../aquantia/atlantic/hw_atl/hw_atl_b0.c | 36 +++++++++---------- 2 files changed, 41 insertions(+), 30 deletions(-) diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_ring.c b/drivers/net/ethernet/aquantia/atlantic/aq_ring.c index 3db91446cc67..74550ccc7a20 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_ring.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_ring.c @@ -172,6 +172,27 @@ bool aq_ring_tx_clean(struct aq_ring_s *self) return !!budget; } +static void aq_rx_checksum(struct aq_ring_s *self, + struct aq_ring_buff_s *buff, + struct sk_buff *skb) +{ + if (!(self->aq_nic->ndev->features & NETIF_F_RXCSUM)) + return; + + if (unlikely(buff->is_cso_err)) { + ++self->stats.rx.errors; + skb->ip_summed = CHECKSUM_NONE; + return; + } + if (buff->is_ip_cso) { + __skb_incr_checksum_unnecessary(skb); + if (buff->is_udp_cso || buff->is_tcp_cso) + __skb_incr_checksum_unnecessary(skb); + } else { + skb->ip_summed = CHECKSUM_NONE; + } +} + #define AQ_SKB_ALIGN SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) int aq_ring_rx_clean(struct aq_ring_s *self, struct napi_struct *napi, @@ -267,18 +288,8 @@ int aq_ring_rx_clean(struct aq_ring_s *self, } skb->protocol = eth_type_trans(skb, ndev); - if (unlikely(buff->is_cso_err)) { - ++self->stats.rx.errors; - skb->ip_summed = CHECKSUM_NONE; - } else { - if (buff->is_ip_cso) { - __skb_incr_checksum_unnecessary(skb); - if (buff->is_udp_cso || buff->is_tcp_cso) - __skb_incr_checksum_unnecessary(skb); - } else { - skb->ip_summed = CHECKSUM_NONE; - } - } + + aq_rx_checksum(self, buff, skb); skb_set_hash(skb, buff->rss_hash, buff->is_hash_l4 ? PKT_HASH_TYPE_L4 : diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c index 3aec56623bf5..179ce12fe4d8 100644 --- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c +++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c @@ -660,9 +660,9 @@ static int hw_atl_b0_hw_ring_rx_receive(struct aq_hw_s *self, struct hw_atl_rxd_wb_s *rxd_wb = (struct hw_atl_rxd_wb_s *) &ring->dx_ring[ring->hw_head * HW_ATL_B0_RXD_SIZE]; - unsigned int is_err = 1U; unsigned int is_rx_check_sum_enabled = 0U; unsigned int pkt_type = 0U; + u8 rx_stat = 0U; if (!(rxd_wb->status & 0x1U)) { /* RxD is not done */ break; @@ -670,35 +670,35 @@ static int hw_atl_b0_hw_ring_rx_receive(struct aq_hw_s *self, buff = &ring->buff_ring[ring->hw_head]; - is_err = (0x0000003CU & rxd_wb->status); + rx_stat = (0x0000003CU & rxd_wb->status) >> 2; is_rx_check_sum_enabled = (rxd_wb->type) & (0x3U << 19); - is_err &= ~0x20U; /* exclude validity bit */ pkt_type = 0xFFU & (rxd_wb->type >> 4); - if (is_rx_check_sum_enabled) { - if (0x0U == (pkt_type & 0x3U)) - buff->is_ip_cso = (is_err & 0x08U) ? 0U : 1U; + if (is_rx_check_sum_enabled & BIT(0) && + (0x0U == (pkt_type & 0x3U))) + buff->is_ip_cso = (rx_stat & BIT(1)) ? 0U : 1U; + if (is_rx_check_sum_enabled & BIT(1)) { if (0x4U == (pkt_type & 0x1CU)) - buff->is_udp_cso = buff->is_cso_err ? 0U : 1U; + buff->is_udp_cso = (rx_stat & BIT(2)) ? 0U : + !!(rx_stat & BIT(3)); else if (0x0U == (pkt_type & 0x1CU)) - buff->is_tcp_cso = buff->is_cso_err ? 0U : 1U; - - /* Checksum offload workaround for small packets */ - if (rxd_wb->pkt_len <= 60) { - buff->is_ip_cso = 0U; - buff->is_cso_err = 0U; - } + buff->is_tcp_cso = (rx_stat & BIT(2)) ? 0U : + !!(rx_stat & BIT(3)); + } + buff->is_cso_err = !!(rx_stat & 0x6); + /* Checksum offload workaround for small packets */ + if (unlikely(rxd_wb->pkt_len <= 60)) { + buff->is_ip_cso = 0U; + buff->is_cso_err = 0U; } - - is_err &= ~0x18U; dma_unmap_page(ndev, buff->pa, buff->len, DMA_FROM_DEVICE); - if (is_err || rxd_wb->type & 0x1000U) { - /* status error or DMA error */ + if ((rx_stat & BIT(0)) || rxd_wb->type & 0x1000U) { + /* MAC error or DMA error */ buff->is_error = 1U; } else { if (self->aq_nic_cfg->is_rss) { From bbb67a44baf973da734b9fd61cba4211da240751 Mon Sep 17 00:00:00 2001 From: Dmitry Bogdanov Date: Fri, 9 Nov 2018 11:54:03 +0000 Subject: [PATCH 34/41] net: aquantia: allow rx checksum offload configuration RX Checksum offloads could not be configured and ignored netdev features flag for checksumming. Signed-off-by: Igor Russkikh Signed-off-by: Dmitry Bogdanov Signed-off-by: David S. Miller --- drivers/net/ethernet/aquantia/atlantic/aq_hw.h | 3 +++ drivers/net/ethernet/aquantia/atlantic/aq_main.c | 10 ++++++++-- drivers/net/ethernet/aquantia/atlantic/aq_nic.c | 2 +- drivers/net/ethernet/aquantia/atlantic/aq_nic.h | 2 +- .../net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c | 7 +++++-- 5 files changed, 18 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_hw.h b/drivers/net/ethernet/aquantia/atlantic/aq_hw.h index 7ec8d24b2b0b..a1e70da358ca 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_hw.h +++ b/drivers/net/ethernet/aquantia/atlantic/aq_hw.h @@ -204,6 +204,9 @@ struct aq_hw_ops { int (*hw_get_fw_version)(struct aq_hw_s *self, u32 *fw_version); + int (*hw_set_offload)(struct aq_hw_s *self, + struct aq_nic_cfg_s *aq_nic_cfg); + int (*hw_set_fc)(struct aq_hw_s *self, u32 fc, u32 tc); }; diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_main.c b/drivers/net/ethernet/aquantia/atlantic/aq_main.c index e3ae29e523f0..7c07eef275eb 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_main.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_main.c @@ -99,8 +99,11 @@ static int aq_ndev_set_features(struct net_device *ndev, struct aq_nic_s *aq_nic = netdev_priv(ndev); struct aq_nic_cfg_s *aq_cfg = aq_nic_get_cfg(aq_nic); bool is_lro = false; + int err = 0; - if (aq_cfg->hw_features & NETIF_F_LRO) { + aq_cfg->features = features; + + if (aq_cfg->aq_hw_caps->hw_features & NETIF_F_LRO) { is_lro = features & NETIF_F_LRO; if (aq_cfg->is_lro != is_lro) { @@ -112,8 +115,11 @@ static int aq_ndev_set_features(struct net_device *ndev, } } } + if ((aq_nic->ndev->features ^ features) & NETIF_F_RXCSUM) + err = aq_nic->aq_hw_ops->hw_set_offload(aq_nic->aq_hw, + aq_cfg); - return 0; + return err; } static int aq_ndev_set_mac_address(struct net_device *ndev, void *addr) diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c index b5e7c98f424c..7abdc0952425 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c @@ -118,7 +118,7 @@ void aq_nic_cfg_start(struct aq_nic_s *self) } cfg->link_speed_msk &= cfg->aq_hw_caps->link_speed_msk; - cfg->hw_features = cfg->aq_hw_caps->hw_features; + cfg->features = cfg->aq_hw_caps->hw_features; } static int aq_nic_update_link_status(struct aq_nic_s *self) diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_nic.h b/drivers/net/ethernet/aquantia/atlantic/aq_nic.h index c1582f4e8e1b..44ec47a3d60a 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_nic.h +++ b/drivers/net/ethernet/aquantia/atlantic/aq_nic.h @@ -23,7 +23,7 @@ struct aq_vec_s; struct aq_nic_cfg_s { const struct aq_hw_caps_s *aq_hw_caps; - u64 hw_features; + u64 features; u32 rxds; /* rx ring size, descriptors # */ u32 txds; /* tx ring size, descriptors # */ u32 vecs; /* vecs==allocated irqs */ diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c index 179ce12fe4d8..f02592f43fe3 100644 --- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c +++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c @@ -234,8 +234,10 @@ static int hw_atl_b0_hw_offload_set(struct aq_hw_s *self, hw_atl_tpo_tcp_udp_crc_offload_en_set(self, 1); /* RX checksums offloads*/ - hw_atl_rpo_ipv4header_crc_offload_en_set(self, 1); - hw_atl_rpo_tcp_udp_crc_offload_en_set(self, 1); + hw_atl_rpo_ipv4header_crc_offload_en_set(self, !!(aq_nic_cfg->features & + NETIF_F_RXCSUM)); + hw_atl_rpo_tcp_udp_crc_offload_en_set(self, !!(aq_nic_cfg->features & + NETIF_F_RXCSUM)); /* LSO offloads*/ hw_atl_tdm_large_send_offload_en_set(self, 0xFFFFFFFFU); @@ -974,5 +976,6 @@ const struct aq_hw_ops hw_atl_ops_b0 = { .hw_get_regs = hw_atl_utils_hw_get_regs, .hw_get_hw_stats = hw_atl_utils_get_hw_stats, .hw_get_fw_version = hw_atl_utils_get_fw_version, + .hw_set_offload = hw_atl_b0_hw_offload_set, .hw_set_fc = hw_atl_b0_set_fc, }; From d02854dc1999ed3e7fd79ec700c64ac23ac0c458 Mon Sep 17 00:00:00 2001 From: Subash Abhinov Kasiviswanathan Date: Fri, 9 Nov 2018 18:56:27 -0700 Subject: [PATCH 35/41] net: qualcomm: rmnet: Fix incorrect assignment of real_dev A null dereference was observed when a sysctl was being set from userspace and rmnet was stuck trying to complete some actions in the NETDEV_REGISTER callback. This is because the real_dev is set only after the device registration handler completes. sysctl call stack - <6> Unable to handle kernel NULL pointer dereference at virtual address 00000108 <2> pc : rmnet_vnd_get_iflink+0x1c/0x28 <2> lr : dev_get_iflink+0x2c/0x40 <2> rmnet_vnd_get_iflink+0x1c/0x28 <2> inet6_fill_ifinfo+0x15c/0x234 <2> inet6_ifinfo_notify+0x68/0xd4 <2> ndisc_ifinfo_sysctl_change+0x1b8/0x234 <2> proc_sys_call_handler+0xac/0x100 <2> proc_sys_write+0x3c/0x4c <2> __vfs_write+0x54/0x14c <2> vfs_write+0xcc/0x188 <2> SyS_write+0x60/0xc0 <2> el0_svc_naked+0x34/0x38 device register call stack - <2> notifier_call_chain+0x84/0xbc <2> raw_notifier_call_chain+0x38/0x48 <2> call_netdevice_notifiers_info+0x40/0x70 <2> call_netdevice_notifiers+0x38/0x60 <2> register_netdevice+0x29c/0x3d8 <2> rmnet_vnd_newlink+0x68/0xe8 <2> rmnet_newlink+0xa0/0x160 <2> rtnl_newlink+0x57c/0x6c8 <2> rtnetlink_rcv_msg+0x1dc/0x328 <2> netlink_rcv_skb+0xac/0x118 <2> rtnetlink_rcv+0x24/0x30 <2> netlink_unicast+0x158/0x1f0 <2> netlink_sendmsg+0x32c/0x338 <2> sock_sendmsg+0x44/0x60 <2> SyS_sendto+0x150/0x1ac <2> el0_svc_naked+0x34/0x38 Fixes: b752eff5be24 ("net: qualcomm: rmnet: Implement ndo_get_iflink") Signed-off-by: Sean Tranchetti Signed-off-by: Subash Abhinov Kasiviswanathan Signed-off-by: David S. Miller --- drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c b/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c index 0afc3d335d56..d11c16aeb19a 100644 --- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c +++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c @@ -234,7 +234,7 @@ int rmnet_vnd_newlink(u8 id, struct net_device *rmnet_dev, struct net_device *real_dev, struct rmnet_endpoint *ep) { - struct rmnet_priv *priv; + struct rmnet_priv *priv = netdev_priv(rmnet_dev); int rc; if (ep->egress_dev) @@ -247,6 +247,8 @@ int rmnet_vnd_newlink(u8 id, struct net_device *rmnet_dev, rmnet_dev->hw_features |= NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM; rmnet_dev->hw_features |= NETIF_F_SG; + priv->real_dev = real_dev; + rc = register_netdevice(rmnet_dev); if (!rc) { ep->egress_dev = rmnet_dev; @@ -255,9 +257,7 @@ int rmnet_vnd_newlink(u8 id, struct net_device *rmnet_dev, rmnet_dev->rtnl_link_ops = &rmnet_link_ops; - priv = netdev_priv(rmnet_dev); priv->mux_id = id; - priv->real_dev = real_dev; netdev_dbg(rmnet_dev, "rmnet dev created\n"); } From 62230715fd2453b3ba948c9d83cfb3ada9169169 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=EB=B0=B0=EC=84=9D=EC=A7=84?= Date: Fri, 9 Nov 2018 16:53:06 -0800 Subject: [PATCH 36/41] flow_dissector: do not dissect l4 ports for fragments MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Only first fragment has the sport/dport information, not the following ones. If we want consistent hash for all fragments, we need to ignore ports even for first fragment. This bug is visible for IPv6 traffic, if incoming fragments do not have a flow label, since skb_get_hash() will give different results for first fragment and following ones. It is also visible if any routing rule wants dissection and sport or dport. See commit 5e5d6fed3741 ("ipv6: route: dissect flow in input path if fib rules need it") for details. [edumazet] rewrote the changelog completely. Fixes: 06635a35d13d ("flow_dissect: use programable dissector in skb_flow_dissect and friends") Signed-off-by: 배석진 Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/flow_dissector.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index 676f3ad629f9..588f475019d4 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -1166,8 +1166,8 @@ bool __skb_flow_dissect(const struct sk_buff *skb, break; } - if (dissector_uses_key(flow_dissector, - FLOW_DISSECTOR_KEY_PORTS)) { + if (dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_PORTS) && + !(key_control->flags & FLOW_DIS_IS_FRAGMENT)) { key_ports = skb_flow_dissector_target(flow_dissector, FLOW_DISSECTOR_KEY_PORTS, target_container); From fbd1d5245372e48b494120a30fe0b34b304576c4 Mon Sep 17 00:00:00 2001 From: Alexandre Belloni Date: Fri, 9 Nov 2018 17:37:20 +0100 Subject: [PATCH 37/41] net: mvneta: correct typo The reserved variable should be named reserved1. Signed-off-by: Alexandre Belloni Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/mvneta.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c index 5bfd349bf41a..3ba672e9e353 100644 --- a/drivers/net/ethernet/marvell/mvneta.c +++ b/drivers/net/ethernet/marvell/mvneta.c @@ -494,7 +494,7 @@ struct mvneta_port { #if defined(__LITTLE_ENDIAN) struct mvneta_tx_desc { u32 command; /* Options used by HW for packet transmitting.*/ - u16 reserverd1; /* csum_l4 (for future use) */ + u16 reserved1; /* csum_l4 (for future use) */ u16 data_size; /* Data size of transmitted packet in bytes */ u32 buf_phys_addr; /* Physical addr of transmitted buffer */ u32 reserved2; /* hw_cmd - (for future use, PMT) */ @@ -519,7 +519,7 @@ struct mvneta_rx_desc { #else struct mvneta_tx_desc { u16 data_size; /* Data size of transmitted packet in bytes */ - u16 reserverd1; /* csum_l4 (for future use) */ + u16 reserved1; /* csum_l4 (for future use) */ u32 command; /* Options used by HW for packet transmitting.*/ u32 reserved2; /* hw_cmd - (for future use, PMT) */ u32 buf_phys_addr; /* Physical addr of transmitted buffer */ From 63c82997f5c0f3e1b914af43d82f712a86bc5f3a Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 9 Nov 2018 21:06:26 -0800 Subject: [PATCH 38/41] net: sched: cls_flower: validate nested enc_opts_policy to avoid warning MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit TCA_FLOWER_KEY_ENC_OPTS and TCA_FLOWER_KEY_ENC_OPTS_MASK can only currently contain further nested attributes, which are parsed by hand, so the policy is never actually used resulting in a W=1 build warning: net/sched/cls_flower.c:492:1: warning: ‘enc_opts_policy’ defined but not used [-Wunused-const-variable=] enc_opts_policy[TCA_FLOWER_KEY_ENC_OPTS_MAX + 1] = { Add the validation anyway to avoid potential bugs when other attributes are added and to make the attribute structure slightly more clear. Validation will also set extact to point to bad attribute on error. Fixes: 0a6e77784f49 ("net/sched: allow flower to match tunnel options") Signed-off-by: Jakub Kicinski Acked-by: Simon Horman Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- net/sched/cls_flower.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index 9aada2d0ef06..c6c327874abc 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -709,11 +709,23 @@ static int fl_set_enc_opt(struct nlattr **tb, struct fl_flow_key *key, struct netlink_ext_ack *extack) { const struct nlattr *nla_enc_key, *nla_opt_key, *nla_opt_msk = NULL; - int option_len, key_depth, msk_depth = 0; + int err, option_len, key_depth, msk_depth = 0; + + err = nla_validate_nested(tb[TCA_FLOWER_KEY_ENC_OPTS], + TCA_FLOWER_KEY_ENC_OPTS_MAX, + enc_opts_policy, extack); + if (err) + return err; nla_enc_key = nla_data(tb[TCA_FLOWER_KEY_ENC_OPTS]); if (tb[TCA_FLOWER_KEY_ENC_OPTS_MASK]) { + err = nla_validate_nested(tb[TCA_FLOWER_KEY_ENC_OPTS_MASK], + TCA_FLOWER_KEY_ENC_OPTS_MAX, + enc_opts_policy, extack); + if (err) + return err; + nla_opt_msk = nla_data(tb[TCA_FLOWER_KEY_ENC_OPTS_MASK]); msk_depth = nla_len(tb[TCA_FLOWER_KEY_ENC_OPTS_MASK]); } From 7ab412d33b4c7ff3e0148d3db25dd861edd1283d Mon Sep 17 00:00:00 2001 From: Jon Maloy Date: Sat, 10 Nov 2018 17:30:24 -0500 Subject: [PATCH 39/41] tipc: fix link re-establish failure When a link failure is detected locally, the link is reset, the flag link->in_session is set to false, and a RESET_MSG with the 'stopping' bit set is sent to the peer. The purpose of this bit is to inform the peer that this endpoint just is going down, and that the peer should handle the reception of this particular RESET message as a local failure. This forces the peer to accept another RESET or ACTIVATE message from this endpoint before it can re-establish the link. This again is necessary to ensure that link session numbers are properly exchanged before the link comes up again. If a failure is detected locally at the same time at the peer endpoint this will do the same, which is also a correct behavior. However, when receiving such messages, the endpoints will not distinguish between 'stopping' RESETs and ordinary ones when it comes to updating session numbers. Both endpoints will copy the received session number and set their 'in_session' flags to true at the reception, while they are still expecting another RESET from the peer before they can go ahead and re-establish. This is contradictory, since, after applying the validation check referred to below, the 'in_session' flag will cause rejection of all such messages, and the link will never come up again. We now fix this by not only handling received RESET/STOPPING messages as a local failure, but also by omitting to set a new session number and the 'in_session' flag in such cases. Fixes: 7ea817f4e832 ("tipc: check session number before accepting link protocol messages") Signed-off-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/link.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/net/tipc/link.c b/net/tipc/link.c index 201c3b5bc96b..836727e363c4 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -1594,14 +1594,17 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb, if (in_range(peers_prio, l->priority + 1, TIPC_MAX_LINK_PRI)) l->priority = peers_prio; - /* ACTIVATE_MSG serves as PEER_RESET if link is already down */ - if (msg_peer_stopping(hdr)) + /* If peer is going down we want full re-establish cycle */ + if (msg_peer_stopping(hdr)) { rc = tipc_link_fsm_evt(l, LINK_FAILURE_EVT); - else if ((mtyp == RESET_MSG) || !link_is_up(l)) + break; + } + /* ACTIVATE_MSG serves as PEER_RESET if link is already down */ + if (mtyp == RESET_MSG || !link_is_up(l)) rc = tipc_link_fsm_evt(l, LINK_PEER_RESET_EVT); /* ACTIVATE_MSG takes up link if it was already locally reset */ - if ((mtyp == ACTIVATE_MSG) && (l->state == LINK_ESTABLISHING)) + if (mtyp == ACTIVATE_MSG && l->state == LINK_ESTABLISHING) rc = TIPC_LINK_UP_EVT; l->peer_session = msg_session(hdr); From a9049ff9214da68df1179a7d5e36b43479abc9b8 Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Sun, 11 Nov 2018 00:41:10 +0100 Subject: [PATCH 40/41] net: dsa: mv88e6xxx: Fix clearing of stats counters The mv88e6161 would sometime fail to probe with a timeout waiting for the switch to complete an operation. This operation is supposed to clear the statistics counters. However, due to a read/modify/write, without the needed mask, the operation actually carried out was more random, with invalid parameters, resulting in the switch not responding. We need to preserve the histogram mode bits, so apply a mask to keep them. Reported-by: Chris Healy Fixes: 40cff8fca9e3 ("net: dsa: mv88e6xxx: Fix stats histogram mode") Signed-off-by: Andrew Lunn Signed-off-by: David S. Miller --- drivers/net/dsa/mv88e6xxx/global1.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/dsa/mv88e6xxx/global1.c b/drivers/net/dsa/mv88e6xxx/global1.c index d721ccf7d8be..38e399e0f30e 100644 --- a/drivers/net/dsa/mv88e6xxx/global1.c +++ b/drivers/net/dsa/mv88e6xxx/global1.c @@ -567,6 +567,8 @@ int mv88e6xxx_g1_stats_clear(struct mv88e6xxx_chip *chip) if (err) return err; + /* Keep the histogram mode bits */ + val &= MV88E6XXX_G1_STATS_OP_HIST_RX_TX; val |= MV88E6XXX_G1_STATS_OP_BUSY | MV88E6XXX_G1_STATS_OP_FLUSH_ALL; err = mv88e6xxx_g1_write(chip, MV88E6XXX_G1_STATS_OP, val); From 7236ead1b14923f3ba35cd29cce13246be83f451 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sat, 10 Nov 2018 16:22:29 -0800 Subject: [PATCH 41/41] act_mirred: clear skb->tstamp on redirect If sch_fq is used at ingress, skbs that might have been timestamped by net_timestamp_set() if a packet capture is requesting timestamps could be delayed by arbitrary amount of time, since sch_fq time base is MONOTONIC. Fix this problem by moving code from sch_netem.c to act_mirred.c. Fixes: fb420d5d91c1 ("tcp/fq: move back to CLOCK_MONOTONIC") Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/sched/act_mirred.c | 3 ++- net/sched/sch_netem.c | 9 --------- 2 files changed, 2 insertions(+), 10 deletions(-) diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c index 1dae5f2b358f..c8cf4d10c435 100644 --- a/net/sched/act_mirred.c +++ b/net/sched/act_mirred.c @@ -258,7 +258,8 @@ static int tcf_mirred_act(struct sk_buff *skb, const struct tc_action *a, if (is_redirect) { skb2->tc_redirected = 1; skb2->tc_from_ingress = skb2->tc_at_ingress; - + if (skb2->tc_from_ingress) + skb2->tstamp = 0; /* let's the caller reinsert the packet, if possible */ if (use_reinsert) { res->ingress = want_ingress; diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index 57b3ad9394ad..2c38e3d07924 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -648,15 +648,6 @@ static struct sk_buff *netem_dequeue(struct Qdisc *sch) */ skb->dev = qdisc_dev(sch); -#ifdef CONFIG_NET_CLS_ACT - /* - * If it's at ingress let's pretend the delay is - * from the network (tstamp will be updated). - */ - if (skb->tc_redirected && skb->tc_from_ingress) - skb->tstamp = 0; -#endif - if (q->slot.slot_next) { q->slot.packets_left--; q->slot.bytes_left -= qdisc_pkt_len(skb);