From 837f08fdecbe4b2ffc7725624342e73b886665a8 Mon Sep 17 00:00:00 2001 From: Anirudh Venkataramanan Date: Tue, 20 Mar 2018 07:58:05 -0700 Subject: [PATCH 01/15] ice: Add basic driver framework for Intel(R) E800 Series This patch adds a basic driver framework for the Intel(R) E800 Ethernet Series of network devices. There is no functionality right now other than the ability to load. Signed-off-by: Anirudh Venkataramanan Tested-by: Tony Brelinski Signed-off-by: Jeff Kirsher --- Documentation/networking/ice.txt | 39 +++++ MAINTAINERS | 1 + drivers/net/ethernet/intel/Kconfig | 14 ++ drivers/net/ethernet/intel/Makefile | 1 + drivers/net/ethernet/intel/ice/Makefile | 10 ++ drivers/net/ethernet/intel/ice/ice.h | 34 +++++ drivers/net/ethernet/intel/ice/ice_devids.h | 19 +++ drivers/net/ethernet/intel/ice/ice_main.c | 158 ++++++++++++++++++++ drivers/net/ethernet/intel/ice/ice_type.h | 28 ++++ 9 files changed, 304 insertions(+) create mode 100644 Documentation/networking/ice.txt create mode 100644 drivers/net/ethernet/intel/ice/Makefile create mode 100644 drivers/net/ethernet/intel/ice/ice.h create mode 100644 drivers/net/ethernet/intel/ice/ice_devids.h create mode 100644 drivers/net/ethernet/intel/ice/ice_main.c create mode 100644 drivers/net/ethernet/intel/ice/ice_type.h diff --git a/Documentation/networking/ice.txt b/Documentation/networking/ice.txt new file mode 100644 index 000000000000..6261c46378e1 --- /dev/null +++ b/Documentation/networking/ice.txt @@ -0,0 +1,39 @@ +Intel(R) Ethernet Connection E800 Series Linux Driver +=================================================================== + +Intel ice Linux driver. +Copyright(c) 2018 Intel Corporation. + +Contents +======== +- Enabling the driver +- Support + +The driver in this release supports Intel's E800 Series of products. For +more information, visit Intel's support page at http://support.intel.com. + +Enabling the driver +=================== + +The driver is enabled via the standard kernel configuration system, +using the make command: + + Make oldconfig/silentoldconfig/menuconfig/etc. + +The driver is located in the menu structure at: + + -> Device Drivers + -> Network device support (NETDEVICES [=y]) + -> Ethernet driver support + -> Intel devices + -> Intel(R) Ethernet Connection E800 Series Support + +Support +======= + +For general information, go to the Intel support website at: + + http://support.intel.com + +If an issue is identified with the released source code, please email +the maintainer listed in the MAINTAINERS file. diff --git a/MAINTAINERS b/MAINTAINERS index b3ea844cf228..9107d9241564 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -7063,6 +7063,7 @@ F: Documentation/networking/ixgbe.txt F: Documentation/networking/ixgbevf.txt F: Documentation/networking/i40e.txt F: Documentation/networking/i40evf.txt +F: Documentation/networking/ice.txt F: drivers/net/ethernet/intel/ F: drivers/net/ethernet/intel/*/ F: include/linux/avf/virtchnl.h diff --git a/drivers/net/ethernet/intel/Kconfig b/drivers/net/ethernet/intel/Kconfig index 1feb54b6d92e..14d287bed33c 100644 --- a/drivers/net/ethernet/intel/Kconfig +++ b/drivers/net/ethernet/intel/Kconfig @@ -251,6 +251,20 @@ config I40EVF will be called i40evf. MSI-X interrupt support is required for this driver to work correctly. +config ICE + tristate "Intel(R) Ethernet Connection E800 Series Support" + default n + depends on PCI_MSI + ---help--- + This driver supports Intel(R) Ethernet Connection E800 Series of + devices. For more information on how to identify your adapter, go + to the Adapter & Driver ID Guide that can be located at: + + + + To compile this driver as a module, choose M here. The module + will be called ice. + config FM10K tristate "Intel(R) FM10000 Ethernet Switch Host Interface Support" default n diff --git a/drivers/net/ethernet/intel/Makefile b/drivers/net/ethernet/intel/Makefile index 90af7757a885..807a4f8c7e4e 100644 --- a/drivers/net/ethernet/intel/Makefile +++ b/drivers/net/ethernet/intel/Makefile @@ -14,3 +14,4 @@ obj-$(CONFIG_I40E) += i40e/ obj-$(CONFIG_IXGB) += ixgb/ obj-$(CONFIG_I40EVF) += i40evf/ obj-$(CONFIG_FM10K) += fm10k/ +obj-$(CONFIG_ICE) += ice/ diff --git a/drivers/net/ethernet/intel/ice/Makefile b/drivers/net/ethernet/intel/ice/Makefile new file mode 100644 index 000000000000..7ea6295d58fb --- /dev/null +++ b/drivers/net/ethernet/intel/ice/Makefile @@ -0,0 +1,10 @@ +# SPDX-License-Identifier: GPL-2.0 +# Copyright (c) 2018, Intel Corporation. + +# +# Makefile for the Intel(R) Ethernet Connection E800 Series Linux Driver +# + +obj-$(CONFIG_ICE) += ice.o + +ice-y := ice_main.o diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h new file mode 100644 index 000000000000..e3c1672d4976 --- /dev/null +++ b/drivers/net/ethernet/intel/ice/ice.h @@ -0,0 +1,34 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (c) 2018, Intel Corporation. */ + +#ifndef _ICE_H_ +#define _ICE_H_ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "ice_devids.h" +#include "ice_type.h" + +#define ICE_BAR0 0 + +#define ICE_DFLT_NETIF_M (NETIF_MSG_DRV | NETIF_MSG_PROBE | NETIF_MSG_LINK) + +enum ice_state { + __ICE_DOWN, + __ICE_STATE_NBITS /* must be last */ +}; + +struct ice_pf { + struct pci_dev *pdev; + DECLARE_BITMAP(state, __ICE_STATE_NBITS); + u32 msg_enable; + struct ice_hw hw; +}; +#endif /* _ICE_H_ */ diff --git a/drivers/net/ethernet/intel/ice/ice_devids.h b/drivers/net/ethernet/intel/ice/ice_devids.h new file mode 100644 index 000000000000..0e14d7215a6e --- /dev/null +++ b/drivers/net/ethernet/intel/ice/ice_devids.h @@ -0,0 +1,19 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (c) 2018, Intel Corporation. */ + +#ifndef _ICE_DEVIDS_H_ +#define _ICE_DEVIDS_H_ + +/* Device IDs */ +/* Intel(R) Ethernet Controller C810 for backplane */ +#define ICE_DEV_ID_C810_BACKPLANE 0x1591 +/* Intel(R) Ethernet Controller C810 for QSFP */ +#define ICE_DEV_ID_C810_QSFP 0x1592 +/* Intel(R) Ethernet Controller C810 for SFP */ +#define ICE_DEV_ID_C810_SFP 0x1593 +/* Intel(R) Ethernet Controller C810/X557-AT 10GBASE-T */ +#define ICE_DEV_ID_C810_10G_BASE_T 0x1594 +/* Intel(R) Ethernet Controller C810 1GbE */ +#define ICE_DEV_ID_C810_SGMII 0x1595 + +#endif /* _ICE_DEVIDS_H_ */ diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c new file mode 100644 index 000000000000..3ada230a3fc7 --- /dev/null +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -0,0 +1,158 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2018, Intel Corporation. */ + +/* Intel(R) Ethernet Connection E800 Series Linux Driver */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include "ice.h" + +#define DRV_VERSION "ice-0.0.1-k" +#define DRV_SUMMARY "Intel(R) Ethernet Connection E800 Series Linux Driver" +static const char ice_drv_ver[] = DRV_VERSION; +static const char ice_driver_string[] = DRV_SUMMARY; +static const char ice_copyright[] = "Copyright (c) 2018, Intel Corporation."; + +MODULE_AUTHOR("Intel Corporation, "); +MODULE_DESCRIPTION(DRV_SUMMARY); +MODULE_LICENSE("GPL"); +MODULE_VERSION(DRV_VERSION); + +static int debug = -1; +module_param(debug, int, 0644); +MODULE_PARM_DESC(debug, "netif message level (0=none,...,0x7FFF=all)"); + +/** + * ice_probe - Device initialization routine + * @pdev: PCI device information struct + * @ent: entry in ice_pci_tbl + * + * Returns 0 on success, negative on failure + */ +static int ice_probe(struct pci_dev *pdev, + const struct pci_device_id __always_unused *ent) +{ + struct ice_pf *pf; + struct ice_hw *hw; + int err; + + /* this driver uses devres, see Documentation/driver-model/devres.txt */ + err = pcim_enable_device(pdev); + if (err) + return err; + + err = pcim_iomap_regions(pdev, BIT(ICE_BAR0), pci_name(pdev)); + if (err) { + dev_err(&pdev->dev, "I/O map error %d\n", err); + return err; + } + + pf = devm_kzalloc(&pdev->dev, sizeof(*pf), GFP_KERNEL); + if (!pf) + return -ENOMEM; + + /* set up for high or low dma */ + err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)); + if (err) + err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32)); + if (err) { + dev_err(&pdev->dev, "DMA configuration failed: 0x%x\n", err); + return err; + } + + pci_enable_pcie_error_reporting(pdev); + pci_set_master(pdev); + + pf->pdev = pdev; + pci_set_drvdata(pdev, pf); + set_bit(__ICE_DOWN, pf->state); + + hw = &pf->hw; + hw->hw_addr = pcim_iomap_table(pdev)[ICE_BAR0]; + hw->back = pf; + hw->vendor_id = pdev->vendor; + hw->device_id = pdev->device; + pci_read_config_byte(pdev, PCI_REVISION_ID, &hw->revision_id); + hw->subsystem_vendor_id = pdev->subsystem_vendor; + hw->subsystem_device_id = pdev->subsystem_device; + hw->bus.device = PCI_SLOT(pdev->devfn); + hw->bus.func = PCI_FUNC(pdev->devfn); + pf->msg_enable = netif_msg_init(debug, ICE_DFLT_NETIF_M); + + return 0; +} + +/** + * ice_remove - Device removal routine + * @pdev: PCI device information struct + */ +static void ice_remove(struct pci_dev *pdev) +{ + struct ice_pf *pf = pci_get_drvdata(pdev); + + if (!pf) + return; + + set_bit(__ICE_DOWN, pf->state); + pci_disable_pcie_error_reporting(pdev); +} + +/* ice_pci_tbl - PCI Device ID Table + * + * Wildcard entries (PCI_ANY_ID) should come last + * Last entry must be all 0s + * + * { Vendor ID, Device ID, SubVendor ID, SubDevice ID, + * Class, Class Mask, private data (not used) } + */ +static const struct pci_device_id ice_pci_tbl[] = { + { PCI_VDEVICE(INTEL, ICE_DEV_ID_C810_BACKPLANE), 0 }, + { PCI_VDEVICE(INTEL, ICE_DEV_ID_C810_QSFP), 0 }, + { PCI_VDEVICE(INTEL, ICE_DEV_ID_C810_SFP), 0 }, + { PCI_VDEVICE(INTEL, ICE_DEV_ID_C810_10G_BASE_T), 0 }, + { PCI_VDEVICE(INTEL, ICE_DEV_ID_C810_SGMII), 0 }, + /* required last entry */ + { 0, } +}; +MODULE_DEVICE_TABLE(pci, ice_pci_tbl); + +static struct pci_driver ice_driver = { + .name = KBUILD_MODNAME, + .id_table = ice_pci_tbl, + .probe = ice_probe, + .remove = ice_remove, +}; + +/** + * ice_module_init - Driver registration routine + * + * ice_module_init is the first routine called when the driver is + * loaded. All it does is register with the PCI subsystem. + */ +static int __init ice_module_init(void) +{ + int status; + + pr_info("%s - version %s\n", ice_driver_string, ice_drv_ver); + pr_info("%s\n", ice_copyright); + + status = pci_register_driver(&ice_driver); + if (status) + pr_err("failed to register pci driver, err %d\n", status); + + return status; +} +module_init(ice_module_init); + +/** + * ice_module_exit - Driver exit cleanup routine + * + * ice_module_exit is called just before the driver is removed + * from memory. + */ +static void __exit ice_module_exit(void) +{ + pci_unregister_driver(&ice_driver); + pr_info("module unloaded\n"); +} +module_exit(ice_module_exit); diff --git a/drivers/net/ethernet/intel/ice/ice_type.h b/drivers/net/ethernet/intel/ice/ice_type.h new file mode 100644 index 000000000000..cd018f81b5c7 --- /dev/null +++ b/drivers/net/ethernet/intel/ice/ice_type.h @@ -0,0 +1,28 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (c) 2018, Intel Corporation. */ + +#ifndef _ICE_TYPE_H_ +#define _ICE_TYPE_H_ + +/* Bus parameters */ +struct ice_bus_info { + u16 device; + u8 func; +}; + +/* Port hardware description */ +struct ice_hw { + u8 __iomem *hw_addr; + void *back; + + /* pci info */ + u16 device_id; + u16 vendor_id; + u16 subsystem_device_id; + u16 subsystem_vendor_id; + u8 revision_id; + + struct ice_bus_info bus; +}; + +#endif /* _ICE_TYPE_H_ */ From 7ec59eeac804a59ea6fff81e89b6e584e9955e5b Mon Sep 17 00:00:00 2001 From: Anirudh Venkataramanan Date: Tue, 20 Mar 2018 07:58:06 -0700 Subject: [PATCH 02/15] ice: Add support for control queues A control queue is a hardware interface which is used by the driver to interact with other subsystems (like firmware, PHY, etc.). It is implemented as a producer-consumer ring. More specifically, an "admin queue" is a type of control queue used to interact with the firmware. This patch introduces data structures and functions to initialize and teardown control/admin queues. Once the admin queue is initialized, the driver uses it to get the firmware version. Signed-off-by: Anirudh Venkataramanan Tested-by: Tony Brelinski Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/ice/Makefile | 4 +- drivers/net/ethernet/intel/ice/ice.h | 1 + .../net/ethernet/intel/ice/ice_adminq_cmd.h | 94 ++ drivers/net/ethernet/intel/ice/ice_common.c | 130 +++ drivers/net/ethernet/intel/ice/ice_common.h | 25 + drivers/net/ethernet/intel/ice/ice_controlq.c | 965 ++++++++++++++++++ drivers/net/ethernet/intel/ice/ice_controlq.h | 83 ++ .../net/ethernet/intel/ice/ice_hw_autogen.h | 32 + drivers/net/ethernet/intel/ice/ice_main.c | 11 +- drivers/net/ethernet/intel/ice/ice_osdep.h | 72 ++ drivers/net/ethernet/intel/ice/ice_status.h | 21 + drivers/net/ethernet/intel/ice/ice_type.h | 22 + 12 files changed, 1458 insertions(+), 2 deletions(-) create mode 100644 drivers/net/ethernet/intel/ice/ice_adminq_cmd.h create mode 100644 drivers/net/ethernet/intel/ice/ice_common.c create mode 100644 drivers/net/ethernet/intel/ice/ice_common.h create mode 100644 drivers/net/ethernet/intel/ice/ice_controlq.c create mode 100644 drivers/net/ethernet/intel/ice/ice_controlq.h create mode 100644 drivers/net/ethernet/intel/ice/ice_hw_autogen.h create mode 100644 drivers/net/ethernet/intel/ice/ice_osdep.h create mode 100644 drivers/net/ethernet/intel/ice/ice_status.h diff --git a/drivers/net/ethernet/intel/ice/Makefile b/drivers/net/ethernet/intel/ice/Makefile index 7ea6295d58fb..562836a8f18a 100644 --- a/drivers/net/ethernet/intel/ice/Makefile +++ b/drivers/net/ethernet/intel/ice/Makefile @@ -7,4 +7,6 @@ obj-$(CONFIG_ICE) += ice.o -ice-y := ice_main.o +ice-y := ice_main.o \ + ice_controlq.o \ + ice_common.o diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h index e3c1672d4976..59b1400f9e81 100644 --- a/drivers/net/ethernet/intel/ice/ice.h +++ b/drivers/net/ethernet/intel/ice/ice.h @@ -12,6 +12,7 @@ #include #include #include +#include #include #include "ice_devids.h" #include "ice_type.h" diff --git a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h new file mode 100644 index 000000000000..e2fa8e2ea6ae --- /dev/null +++ b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h @@ -0,0 +1,94 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (c) 2018, Intel Corporation. */ + +#ifndef _ICE_ADMINQ_CMD_H_ +#define _ICE_ADMINQ_CMD_H_ + +/* This header file defines the Admin Queue commands, error codes and + * descriptor format. It is shared between Firmware and Software. + */ + +struct ice_aqc_generic { + __le32 param0; + __le32 param1; + __le32 addr_high; + __le32 addr_low; +}; + +/* Get version (direct 0x0001) */ +struct ice_aqc_get_ver { + __le32 rom_ver; + __le32 fw_build; + u8 fw_branch; + u8 fw_major; + u8 fw_minor; + u8 fw_patch; + u8 api_branch; + u8 api_major; + u8 api_minor; + u8 api_patch; +}; + +/* Queue Shutdown (direct 0x0003) */ +struct ice_aqc_q_shutdown { +#define ICE_AQC_DRIVER_UNLOADING BIT(0) + __le32 driver_unloading; + u8 reserved[12]; +}; + +/** + * struct ice_aq_desc - Admin Queue (AQ) descriptor + * @flags: ICE_AQ_FLAG_* flags + * @opcode: AQ command opcode + * @datalen: length in bytes of indirect/external data buffer + * @retval: return value from firmware + * @cookie_h: opaque data high-half + * @cookie_l: opaque data low-half + * @params: command-specific parameters + * + * Descriptor format for commands the driver posts on the Admin Transmit Queue + * (ATQ). The firmware writes back onto the command descriptor and returns + * the result of the command. Asynchronous events that are not an immediate + * result of the command are written to the Admin Receive Queue (ARQ) using + * the same descriptor format. Descriptors are in little-endian notation with + * 32-bit words. + */ +struct ice_aq_desc { + __le16 flags; + __le16 opcode; + __le16 datalen; + __le16 retval; + __le32 cookie_high; + __le32 cookie_low; + union { + u8 raw[16]; + struct ice_aqc_generic generic; + struct ice_aqc_get_ver get_ver; + struct ice_aqc_q_shutdown q_shutdown; + } params; +}; + +/* FW defined boundary for a large buffer, 4k >= Large buffer > 512 bytes */ +#define ICE_AQ_LG_BUF 512 + +#define ICE_AQ_FLAG_LB_S 9 +#define ICE_AQ_FLAG_BUF_S 12 +#define ICE_AQ_FLAG_SI_S 13 + +#define ICE_AQ_FLAG_LB BIT(ICE_AQ_FLAG_LB_S) /* 0x200 */ +#define ICE_AQ_FLAG_BUF BIT(ICE_AQ_FLAG_BUF_S) /* 0x1000 */ +#define ICE_AQ_FLAG_SI BIT(ICE_AQ_FLAG_SI_S) /* 0x2000 */ + +/* error codes */ +enum ice_aq_err { + ICE_AQ_RC_OK = 0, /* success */ +}; + +/* Admin Queue command opcodes */ +enum ice_adminq_opc { + /* AQ commands */ + ice_aqc_opc_get_ver = 0x0001, + ice_aqc_opc_q_shutdown = 0x0003, +}; + +#endif /* _ICE_ADMINQ_CMD_H_ */ diff --git a/drivers/net/ethernet/intel/ice/ice_common.c b/drivers/net/ethernet/intel/ice/ice_common.c new file mode 100644 index 000000000000..78452b92cae6 --- /dev/null +++ b/drivers/net/ethernet/intel/ice/ice_common.c @@ -0,0 +1,130 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2018, Intel Corporation. */ + +#include "ice_common.h" +#include "ice_adminq_cmd.h" + +/** + * ice_debug_cq + * @hw: pointer to the hardware structure + * @mask: debug mask + * @desc: pointer to control queue descriptor + * @buf: pointer to command buffer + * @buf_len: max length of buf + * + * Dumps debug log about control command with descriptor contents. + */ +void ice_debug_cq(struct ice_hw *hw, u32 __maybe_unused mask, void *desc, + void *buf, u16 buf_len) +{ + struct ice_aq_desc *cq_desc = (struct ice_aq_desc *)desc; + u16 len; + +#ifndef CONFIG_DYNAMIC_DEBUG + if (!(mask & hw->debug_mask)) + return; +#endif + + if (!desc) + return; + + len = le16_to_cpu(cq_desc->datalen); + + ice_debug(hw, mask, + "CQ CMD: opcode 0x%04X, flags 0x%04X, datalen 0x%04X, retval 0x%04X\n", + le16_to_cpu(cq_desc->opcode), + le16_to_cpu(cq_desc->flags), + le16_to_cpu(cq_desc->datalen), le16_to_cpu(cq_desc->retval)); + ice_debug(hw, mask, "\tcookie (h,l) 0x%08X 0x%08X\n", + le32_to_cpu(cq_desc->cookie_high), + le32_to_cpu(cq_desc->cookie_low)); + ice_debug(hw, mask, "\tparam (0,1) 0x%08X 0x%08X\n", + le32_to_cpu(cq_desc->params.generic.param0), + le32_to_cpu(cq_desc->params.generic.param1)); + ice_debug(hw, mask, "\taddr (h,l) 0x%08X 0x%08X\n", + le32_to_cpu(cq_desc->params.generic.addr_high), + le32_to_cpu(cq_desc->params.generic.addr_low)); + if (buf && cq_desc->datalen != 0) { + ice_debug(hw, mask, "Buffer:\n"); + if (buf_len < len) + len = buf_len; + + ice_debug_array(hw, mask, 16, 1, (u8 *)buf, len); + } +} + +/* FW Admin Queue command wrappers */ + +/** + * ice_aq_send_cmd - send FW Admin Queue command to FW Admin Queue + * @hw: pointer to the hw struct + * @desc: descriptor describing the command + * @buf: buffer to use for indirect commands (NULL for direct commands) + * @buf_size: size of buffer for indirect commands (0 for direct commands) + * @cd: pointer to command details structure + * + * Helper function to send FW Admin Queue commands to the FW Admin Queue. + */ +enum ice_status +ice_aq_send_cmd(struct ice_hw *hw, struct ice_aq_desc *desc, void *buf, + u16 buf_size, struct ice_sq_cd *cd) +{ + return ice_sq_send_cmd(hw, &hw->adminq, desc, buf, buf_size, cd); +} + +/** + * ice_aq_get_fw_ver + * @hw: pointer to the hw struct + * @cd: pointer to command details structure or NULL + * + * Get the firmware version (0x0001) from the admin queue commands + */ +enum ice_status ice_aq_get_fw_ver(struct ice_hw *hw, struct ice_sq_cd *cd) +{ + struct ice_aqc_get_ver *resp; + struct ice_aq_desc desc; + enum ice_status status; + + resp = &desc.params.get_ver; + + ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_get_ver); + + status = ice_aq_send_cmd(hw, &desc, NULL, 0, cd); + + if (!status) { + hw->fw_branch = resp->fw_branch; + hw->fw_maj_ver = resp->fw_major; + hw->fw_min_ver = resp->fw_minor; + hw->fw_patch = resp->fw_patch; + hw->fw_build = le32_to_cpu(resp->fw_build); + hw->api_branch = resp->api_branch; + hw->api_maj_ver = resp->api_major; + hw->api_min_ver = resp->api_minor; + hw->api_patch = resp->api_patch; + } + + return status; +} + +/** + * ice_aq_q_shutdown + * @hw: pointer to the hw struct + * @unloading: is the driver unloading itself + * + * Tell the Firmware that we're shutting down the AdminQ and whether + * or not the driver is unloading as well (0x0003). + */ +enum ice_status ice_aq_q_shutdown(struct ice_hw *hw, bool unloading) +{ + struct ice_aqc_q_shutdown *cmd; + struct ice_aq_desc desc; + + cmd = &desc.params.q_shutdown; + + ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_q_shutdown); + + if (unloading) + cmd->driver_unloading = cpu_to_le32(ICE_AQC_DRIVER_UNLOADING); + + return ice_aq_send_cmd(hw, &desc, NULL, 0, NULL); +} diff --git a/drivers/net/ethernet/intel/ice/ice_common.h b/drivers/net/ethernet/intel/ice/ice_common.h new file mode 100644 index 000000000000..ba9347459db1 --- /dev/null +++ b/drivers/net/ethernet/intel/ice/ice_common.h @@ -0,0 +1,25 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (c) 2018, Intel Corporation. */ + +#ifndef _ICE_COMMON_H_ +#define _ICE_COMMON_H_ + +#include "ice.h" +#include "ice_type.h" + +void ice_debug_cq(struct ice_hw *hw, u32 mask, void *desc, void *buf, + u16 buf_len); +enum ice_status ice_init_all_ctrlq(struct ice_hw *hw); +void ice_shutdown_all_ctrlq(struct ice_hw *hw); +enum ice_status +ice_sq_send_cmd(struct ice_hw *hw, struct ice_ctl_q_info *cq, + struct ice_aq_desc *desc, void *buf, u16 buf_size, + struct ice_sq_cd *cd); +bool ice_check_sq_alive(struct ice_hw *hw, struct ice_ctl_q_info *cq); +enum ice_status ice_aq_q_shutdown(struct ice_hw *hw, bool unloading); +void ice_fill_dflt_direct_cmd_desc(struct ice_aq_desc *desc, u16 opcode); +enum ice_status +ice_aq_send_cmd(struct ice_hw *hw, struct ice_aq_desc *desc, + void *buf, u16 buf_size, struct ice_sq_cd *cd); +enum ice_status ice_aq_get_fw_ver(struct ice_hw *hw, struct ice_sq_cd *cd); +#endif /* _ICE_COMMON_H_ */ diff --git a/drivers/net/ethernet/intel/ice/ice_controlq.c b/drivers/net/ethernet/intel/ice/ice_controlq.c new file mode 100644 index 000000000000..b32c0fc04bc9 --- /dev/null +++ b/drivers/net/ethernet/intel/ice/ice_controlq.c @@ -0,0 +1,965 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2018, Intel Corporation. */ + +#include "ice_common.h" + +/** + * ice_adminq_init_regs - Initialize AdminQ registers + * @hw: pointer to the hardware structure + * + * This assumes the alloc_sq and alloc_rq functions have already been called + */ +static void ice_adminq_init_regs(struct ice_hw *hw) +{ + struct ice_ctl_q_info *cq = &hw->adminq; + + cq->sq.head = PF_FW_ATQH; + cq->sq.tail = PF_FW_ATQT; + cq->sq.len = PF_FW_ATQLEN; + cq->sq.bah = PF_FW_ATQBAH; + cq->sq.bal = PF_FW_ATQBAL; + cq->sq.len_mask = PF_FW_ATQLEN_ATQLEN_M; + cq->sq.len_ena_mask = PF_FW_ATQLEN_ATQENABLE_M; + cq->sq.head_mask = PF_FW_ATQH_ATQH_M; + + cq->rq.head = PF_FW_ARQH; + cq->rq.tail = PF_FW_ARQT; + cq->rq.len = PF_FW_ARQLEN; + cq->rq.bah = PF_FW_ARQBAH; + cq->rq.bal = PF_FW_ARQBAL; + cq->rq.len_mask = PF_FW_ARQLEN_ARQLEN_M; + cq->rq.len_ena_mask = PF_FW_ARQLEN_ARQENABLE_M; + cq->rq.head_mask = PF_FW_ARQH_ARQH_M; +} + +/** + * ice_check_sq_alive + * @hw: pointer to the hw struct + * @cq: pointer to the specific Control queue + * + * Returns true if Queue is enabled else false. + */ +bool ice_check_sq_alive(struct ice_hw *hw, struct ice_ctl_q_info *cq) +{ + /* check both queue-length and queue-enable fields */ + if (cq->sq.len && cq->sq.len_mask && cq->sq.len_ena_mask) + return (rd32(hw, cq->sq.len) & (cq->sq.len_mask | + cq->sq.len_ena_mask)) == + (cq->num_sq_entries | cq->sq.len_ena_mask); + + return false; +} + +/** + * ice_alloc_ctrlq_sq_ring - Allocate Control Transmit Queue (ATQ) rings + * @hw: pointer to the hardware structure + * @cq: pointer to the specific Control queue + */ +static enum ice_status +ice_alloc_ctrlq_sq_ring(struct ice_hw *hw, struct ice_ctl_q_info *cq) +{ + size_t size = cq->num_sq_entries * sizeof(struct ice_aq_desc); + + cq->sq.desc_buf.va = dmam_alloc_coherent(ice_hw_to_dev(hw), size, + &cq->sq.desc_buf.pa, + GFP_KERNEL | __GFP_ZERO); + if (!cq->sq.desc_buf.va) + return ICE_ERR_NO_MEMORY; + cq->sq.desc_buf.size = size; + + cq->sq.cmd_buf = devm_kcalloc(ice_hw_to_dev(hw), cq->num_sq_entries, + sizeof(struct ice_sq_cd), GFP_KERNEL); + if (!cq->sq.cmd_buf) { + dmam_free_coherent(ice_hw_to_dev(hw), cq->sq.desc_buf.size, + cq->sq.desc_buf.va, cq->sq.desc_buf.pa); + cq->sq.desc_buf.va = NULL; + cq->sq.desc_buf.pa = 0; + cq->sq.desc_buf.size = 0; + return ICE_ERR_NO_MEMORY; + } + + return 0; +} + +/** + * ice_alloc_ctrlq_rq_ring - Allocate Control Receive Queue (ARQ) rings + * @hw: pointer to the hardware structure + * @cq: pointer to the specific Control queue + */ +static enum ice_status +ice_alloc_ctrlq_rq_ring(struct ice_hw *hw, struct ice_ctl_q_info *cq) +{ + size_t size = cq->num_rq_entries * sizeof(struct ice_aq_desc); + + cq->rq.desc_buf.va = dmam_alloc_coherent(ice_hw_to_dev(hw), size, + &cq->rq.desc_buf.pa, + GFP_KERNEL | __GFP_ZERO); + if (!cq->rq.desc_buf.va) + return ICE_ERR_NO_MEMORY; + cq->rq.desc_buf.size = size; + return 0; +} + +/** + * ice_free_ctrlq_sq_ring - Free Control Transmit Queue (ATQ) rings + * @hw: pointer to the hardware structure + * @cq: pointer to the specific Control queue + * + * This assumes the posted send buffers have already been cleaned + * and de-allocated + */ +static void ice_free_ctrlq_sq_ring(struct ice_hw *hw, struct ice_ctl_q_info *cq) +{ + dmam_free_coherent(ice_hw_to_dev(hw), cq->sq.desc_buf.size, + cq->sq.desc_buf.va, cq->sq.desc_buf.pa); + cq->sq.desc_buf.va = NULL; + cq->sq.desc_buf.pa = 0; + cq->sq.desc_buf.size = 0; +} + +/** + * ice_free_ctrlq_rq_ring - Free Control Receive Queue (ARQ) rings + * @hw: pointer to the hardware structure + * @cq: pointer to the specific Control queue + * + * This assumes the posted receive buffers have already been cleaned + * and de-allocated + */ +static void ice_free_ctrlq_rq_ring(struct ice_hw *hw, struct ice_ctl_q_info *cq) +{ + dmam_free_coherent(ice_hw_to_dev(hw), cq->rq.desc_buf.size, + cq->rq.desc_buf.va, cq->rq.desc_buf.pa); + cq->rq.desc_buf.va = NULL; + cq->rq.desc_buf.pa = 0; + cq->rq.desc_buf.size = 0; +} + +/** + * ice_alloc_rq_bufs - Allocate pre-posted buffers for the ARQ + * @hw: pointer to the hardware structure + * @cq: pointer to the specific Control queue + */ +static enum ice_status +ice_alloc_rq_bufs(struct ice_hw *hw, struct ice_ctl_q_info *cq) +{ + int i; + + /* We'll be allocating the buffer info memory first, then we can + * allocate the mapped buffers for the event processing + */ + cq->rq.dma_head = devm_kcalloc(ice_hw_to_dev(hw), cq->num_rq_entries, + sizeof(cq->rq.desc_buf), GFP_KERNEL); + if (!cq->rq.dma_head) + return ICE_ERR_NO_MEMORY; + cq->rq.r.rq_bi = (struct ice_dma_mem *)cq->rq.dma_head; + + /* allocate the mapped buffers */ + for (i = 0; i < cq->num_rq_entries; i++) { + struct ice_aq_desc *desc; + struct ice_dma_mem *bi; + + bi = &cq->rq.r.rq_bi[i]; + bi->va = dmam_alloc_coherent(ice_hw_to_dev(hw), + cq->rq_buf_size, &bi->pa, + GFP_KERNEL | __GFP_ZERO); + if (!bi->va) + goto unwind_alloc_rq_bufs; + bi->size = cq->rq_buf_size; + + /* now configure the descriptors for use */ + desc = ICE_CTL_Q_DESC(cq->rq, i); + + desc->flags = cpu_to_le16(ICE_AQ_FLAG_BUF); + if (cq->rq_buf_size > ICE_AQ_LG_BUF) + desc->flags |= cpu_to_le16(ICE_AQ_FLAG_LB); + desc->opcode = 0; + /* This is in accordance with Admin queue design, there is no + * register for buffer size configuration + */ + desc->datalen = cpu_to_le16(bi->size); + desc->retval = 0; + desc->cookie_high = 0; + desc->cookie_low = 0; + desc->params.generic.addr_high = + cpu_to_le32(upper_32_bits(bi->pa)); + desc->params.generic.addr_low = + cpu_to_le32(lower_32_bits(bi->pa)); + desc->params.generic.param0 = 0; + desc->params.generic.param1 = 0; + } + return 0; + +unwind_alloc_rq_bufs: + /* don't try to free the one that failed... */ + i--; + for (; i >= 0; i--) { + dmam_free_coherent(ice_hw_to_dev(hw), cq->rq.r.rq_bi[i].size, + cq->rq.r.rq_bi[i].va, cq->rq.r.rq_bi[i].pa); + cq->rq.r.rq_bi[i].va = NULL; + cq->rq.r.rq_bi[i].pa = 0; + cq->rq.r.rq_bi[i].size = 0; + } + devm_kfree(ice_hw_to_dev(hw), cq->rq.dma_head); + + return ICE_ERR_NO_MEMORY; +} + +/** + * ice_alloc_sq_bufs - Allocate empty buffer structs for the ATQ + * @hw: pointer to the hardware structure + * @cq: pointer to the specific Control queue + */ +static enum ice_status +ice_alloc_sq_bufs(struct ice_hw *hw, struct ice_ctl_q_info *cq) +{ + int i; + + /* No mapped memory needed yet, just the buffer info structures */ + cq->sq.dma_head = devm_kcalloc(ice_hw_to_dev(hw), cq->num_sq_entries, + sizeof(cq->sq.desc_buf), GFP_KERNEL); + if (!cq->sq.dma_head) + return ICE_ERR_NO_MEMORY; + cq->sq.r.sq_bi = (struct ice_dma_mem *)cq->sq.dma_head; + + /* allocate the mapped buffers */ + for (i = 0; i < cq->num_sq_entries; i++) { + struct ice_dma_mem *bi; + + bi = &cq->sq.r.sq_bi[i]; + bi->va = dmam_alloc_coherent(ice_hw_to_dev(hw), + cq->sq_buf_size, &bi->pa, + GFP_KERNEL | __GFP_ZERO); + if (!bi->va) + goto unwind_alloc_sq_bufs; + bi->size = cq->sq_buf_size; + } + return 0; + +unwind_alloc_sq_bufs: + /* don't try to free the one that failed... */ + i--; + for (; i >= 0; i--) { + dmam_free_coherent(ice_hw_to_dev(hw), cq->sq.r.sq_bi[i].size, + cq->sq.r.sq_bi[i].va, cq->sq.r.sq_bi[i].pa); + cq->sq.r.sq_bi[i].va = NULL; + cq->sq.r.sq_bi[i].pa = 0; + cq->sq.r.sq_bi[i].size = 0; + } + devm_kfree(ice_hw_to_dev(hw), cq->sq.dma_head); + + return ICE_ERR_NO_MEMORY; +} + +/** + * ice_free_rq_bufs - Free ARQ buffer info elements + * @hw: pointer to the hardware structure + * @cq: pointer to the specific Control queue + */ +static void ice_free_rq_bufs(struct ice_hw *hw, struct ice_ctl_q_info *cq) +{ + int i; + + /* free descriptors */ + for (i = 0; i < cq->num_rq_entries; i++) { + dmam_free_coherent(ice_hw_to_dev(hw), cq->rq.r.rq_bi[i].size, + cq->rq.r.rq_bi[i].va, cq->rq.r.rq_bi[i].pa); + cq->rq.r.rq_bi[i].va = NULL; + cq->rq.r.rq_bi[i].pa = 0; + cq->rq.r.rq_bi[i].size = 0; + } + + /* free the dma header */ + devm_kfree(ice_hw_to_dev(hw), cq->rq.dma_head); +} + +/** + * ice_free_sq_bufs - Free ATQ buffer info elements + * @hw: pointer to the hardware structure + * @cq: pointer to the specific Control queue + */ +static void ice_free_sq_bufs(struct ice_hw *hw, struct ice_ctl_q_info *cq) +{ + int i; + + /* only unmap if the address is non-NULL */ + for (i = 0; i < cq->num_sq_entries; i++) + if (cq->sq.r.sq_bi[i].pa) { + dmam_free_coherent(ice_hw_to_dev(hw), + cq->sq.r.sq_bi[i].size, + cq->sq.r.sq_bi[i].va, + cq->sq.r.sq_bi[i].pa); + cq->sq.r.sq_bi[i].va = NULL; + cq->sq.r.sq_bi[i].pa = 0; + cq->sq.r.sq_bi[i].size = 0; + } + + /* free the buffer info list */ + devm_kfree(ice_hw_to_dev(hw), cq->sq.cmd_buf); + + /* free the dma header */ + devm_kfree(ice_hw_to_dev(hw), cq->sq.dma_head); +} + +/** + * ice_cfg_sq_regs - configure Control ATQ registers + * @hw: pointer to the hardware structure + * @cq: pointer to the specific Control queue + * + * Configure base address and length registers for the transmit queue + */ +static enum ice_status +ice_cfg_sq_regs(struct ice_hw *hw, struct ice_ctl_q_info *cq) +{ + u32 reg = 0; + + /* Clear Head and Tail */ + wr32(hw, cq->sq.head, 0); + wr32(hw, cq->sq.tail, 0); + + /* set starting point */ + wr32(hw, cq->sq.len, (cq->num_sq_entries | cq->sq.len_ena_mask)); + wr32(hw, cq->sq.bal, lower_32_bits(cq->sq.desc_buf.pa)); + wr32(hw, cq->sq.bah, upper_32_bits(cq->sq.desc_buf.pa)); + + /* Check one register to verify that config was applied */ + reg = rd32(hw, cq->sq.bal); + if (reg != lower_32_bits(cq->sq.desc_buf.pa)) + return ICE_ERR_AQ_ERROR; + + return 0; +} + +/** + * ice_cfg_rq_regs - configure Control ARQ register + * @hw: pointer to the hardware structure + * @cq: pointer to the specific Control queue + * + * Configure base address and length registers for the receive (event q) + */ +static enum ice_status +ice_cfg_rq_regs(struct ice_hw *hw, struct ice_ctl_q_info *cq) +{ + u32 reg = 0; + + /* Clear Head and Tail */ + wr32(hw, cq->rq.head, 0); + wr32(hw, cq->rq.tail, 0); + + /* set starting point */ + wr32(hw, cq->rq.len, (cq->num_rq_entries | cq->rq.len_ena_mask)); + wr32(hw, cq->rq.bal, lower_32_bits(cq->rq.desc_buf.pa)); + wr32(hw, cq->rq.bah, upper_32_bits(cq->rq.desc_buf.pa)); + + /* Update tail in the HW to post pre-allocated buffers */ + wr32(hw, cq->rq.tail, (u32)(cq->num_rq_entries - 1)); + + /* Check one register to verify that config was applied */ + reg = rd32(hw, cq->rq.bal); + if (reg != lower_32_bits(cq->rq.desc_buf.pa)) + return ICE_ERR_AQ_ERROR; + + return 0; +} + +/** + * ice_init_sq - main initialization routine for Control ATQ + * @hw: pointer to the hardware structure + * @cq: pointer to the specific Control queue + * + * This is the main initialization routine for the Control Send Queue + * Prior to calling this function, drivers *MUST* set the following fields + * in the cq->structure: + * - cq->num_sq_entries + * - cq->sq_buf_size + * + * Do *NOT* hold the lock when calling this as the memory allocation routines + * called are not going to be atomic context safe + */ +static enum ice_status ice_init_sq(struct ice_hw *hw, struct ice_ctl_q_info *cq) +{ + enum ice_status ret_code; + + if (cq->sq.count > 0) { + /* queue already initialized */ + ret_code = ICE_ERR_NOT_READY; + goto init_ctrlq_exit; + } + + /* verify input for valid configuration */ + if (!cq->num_sq_entries || !cq->sq_buf_size) { + ret_code = ICE_ERR_CFG; + goto init_ctrlq_exit; + } + + cq->sq.next_to_use = 0; + cq->sq.next_to_clean = 0; + + /* allocate the ring memory */ + ret_code = ice_alloc_ctrlq_sq_ring(hw, cq); + if (ret_code) + goto init_ctrlq_exit; + + /* allocate buffers in the rings */ + ret_code = ice_alloc_sq_bufs(hw, cq); + if (ret_code) + goto init_ctrlq_free_rings; + + /* initialize base registers */ + ret_code = ice_cfg_sq_regs(hw, cq); + if (ret_code) + goto init_ctrlq_free_rings; + + /* success! */ + cq->sq.count = cq->num_sq_entries; + goto init_ctrlq_exit; + +init_ctrlq_free_rings: + ice_free_ctrlq_sq_ring(hw, cq); + +init_ctrlq_exit: + return ret_code; +} + +/** + * ice_init_rq - initialize ARQ + * @hw: pointer to the hardware structure + * @cq: pointer to the specific Control queue + * + * The main initialization routine for the Admin Receive (Event) Queue. + * Prior to calling this function, drivers *MUST* set the following fields + * in the cq->structure: + * - cq->num_rq_entries + * - cq->rq_buf_size + * + * Do *NOT* hold the lock when calling this as the memory allocation routines + * called are not going to be atomic context safe + */ +static enum ice_status ice_init_rq(struct ice_hw *hw, struct ice_ctl_q_info *cq) +{ + enum ice_status ret_code; + + if (cq->rq.count > 0) { + /* queue already initialized */ + ret_code = ICE_ERR_NOT_READY; + goto init_ctrlq_exit; + } + + /* verify input for valid configuration */ + if (!cq->num_rq_entries || !cq->rq_buf_size) { + ret_code = ICE_ERR_CFG; + goto init_ctrlq_exit; + } + + cq->rq.next_to_use = 0; + cq->rq.next_to_clean = 0; + + /* allocate the ring memory */ + ret_code = ice_alloc_ctrlq_rq_ring(hw, cq); + if (ret_code) + goto init_ctrlq_exit; + + /* allocate buffers in the rings */ + ret_code = ice_alloc_rq_bufs(hw, cq); + if (ret_code) + goto init_ctrlq_free_rings; + + /* initialize base registers */ + ret_code = ice_cfg_rq_regs(hw, cq); + if (ret_code) + goto init_ctrlq_free_rings; + + /* success! */ + cq->rq.count = cq->num_rq_entries; + goto init_ctrlq_exit; + +init_ctrlq_free_rings: + ice_free_ctrlq_rq_ring(hw, cq); + +init_ctrlq_exit: + return ret_code; +} + +/** + * ice_shutdown_sq - shutdown the Control ATQ + * @hw: pointer to the hardware structure + * @cq: pointer to the specific Control queue + * + * The main shutdown routine for the Control Transmit Queue + */ +static enum ice_status +ice_shutdown_sq(struct ice_hw *hw, struct ice_ctl_q_info *cq) +{ + enum ice_status ret_code = 0; + + mutex_lock(&cq->sq_lock); + + if (!cq->sq.count) { + ret_code = ICE_ERR_NOT_READY; + goto shutdown_sq_out; + } + + /* Stop firmware AdminQ processing */ + wr32(hw, cq->sq.head, 0); + wr32(hw, cq->sq.tail, 0); + wr32(hw, cq->sq.len, 0); + wr32(hw, cq->sq.bal, 0); + wr32(hw, cq->sq.bah, 0); + + cq->sq.count = 0; /* to indicate uninitialized queue */ + + /* free ring buffers and the ring itself */ + ice_free_sq_bufs(hw, cq); + ice_free_ctrlq_sq_ring(hw, cq); + +shutdown_sq_out: + mutex_unlock(&cq->sq_lock); + return ret_code; +} + +/** + * ice_aq_ver_check - Check the reported AQ API version. + * @fw_branch: The "branch" of FW, typically describes the device type + * @fw_major: The major version of the FW API + * @fw_minor: The minor version increment of the FW API + * + * Checks if the driver should load on a given AQ API version. + * + * Return: 'true' iff the driver should attempt to load. 'false' otherwise. + */ +static bool ice_aq_ver_check(u8 fw_branch, u8 fw_major, u8 fw_minor) +{ + if (fw_branch != EXP_FW_API_VER_BRANCH) + return false; + if (fw_major != EXP_FW_API_VER_MAJOR) + return false; + if (fw_minor != EXP_FW_API_VER_MINOR) + return false; + return true; +} + +/** + * ice_shutdown_rq - shutdown Control ARQ + * @hw: pointer to the hardware structure + * @cq: pointer to the specific Control queue + * + * The main shutdown routine for the Control Receive Queue + */ +static enum ice_status +ice_shutdown_rq(struct ice_hw *hw, struct ice_ctl_q_info *cq) +{ + enum ice_status ret_code = 0; + + mutex_lock(&cq->rq_lock); + + if (!cq->rq.count) { + ret_code = ICE_ERR_NOT_READY; + goto shutdown_rq_out; + } + + /* Stop Control Queue processing */ + wr32(hw, cq->rq.head, 0); + wr32(hw, cq->rq.tail, 0); + wr32(hw, cq->rq.len, 0); + wr32(hw, cq->rq.bal, 0); + wr32(hw, cq->rq.bah, 0); + + /* set rq.count to 0 to indicate uninitialized queue */ + cq->rq.count = 0; + + /* free ring buffers and the ring itself */ + ice_free_rq_bufs(hw, cq); + ice_free_ctrlq_rq_ring(hw, cq); + +shutdown_rq_out: + mutex_unlock(&cq->rq_lock); + return ret_code; +} + +/** + * ice_init_check_adminq - Check version for Admin Queue to know if its alive + * @hw: pointer to the hardware structure + */ +static enum ice_status ice_init_check_adminq(struct ice_hw *hw) +{ + struct ice_ctl_q_info *cq = &hw->adminq; + enum ice_status status; + + status = ice_aq_get_fw_ver(hw, NULL); + if (status) + goto init_ctrlq_free_rq; + + if (!ice_aq_ver_check(hw->api_branch, hw->api_maj_ver, + hw->api_min_ver)) { + status = ICE_ERR_FW_API_VER; + goto init_ctrlq_free_rq; + } + + return 0; + +init_ctrlq_free_rq: + ice_shutdown_rq(hw, cq); + ice_shutdown_sq(hw, cq); + mutex_destroy(&cq->sq_lock); + mutex_destroy(&cq->rq_lock); + return status; +} + +/** + * ice_init_ctrlq - main initialization routine for any control Queue + * @hw: pointer to the hardware structure + * @q_type: specific Control queue type + * + * Prior to calling this function, drivers *MUST* set the following fields + * in the cq->structure: + * - cq->num_sq_entries + * - cq->num_rq_entries + * - cq->rq_buf_size + * - cq->sq_buf_size + * + */ +static enum ice_status ice_init_ctrlq(struct ice_hw *hw, enum ice_ctl_q q_type) +{ + struct ice_ctl_q_info *cq; + enum ice_status ret_code; + + switch (q_type) { + case ICE_CTL_Q_ADMIN: + ice_adminq_init_regs(hw); + cq = &hw->adminq; + break; + default: + return ICE_ERR_PARAM; + } + cq->qtype = q_type; + + /* verify input for valid configuration */ + if (!cq->num_rq_entries || !cq->num_sq_entries || + !cq->rq_buf_size || !cq->sq_buf_size) { + return ICE_ERR_CFG; + } + mutex_init(&cq->sq_lock); + mutex_init(&cq->rq_lock); + + /* setup SQ command write back timeout */ + cq->sq_cmd_timeout = ICE_CTL_Q_SQ_CMD_TIMEOUT; + + /* allocate the ATQ */ + ret_code = ice_init_sq(hw, cq); + if (ret_code) + goto init_ctrlq_destroy_locks; + + /* allocate the ARQ */ + ret_code = ice_init_rq(hw, cq); + if (ret_code) + goto init_ctrlq_free_sq; + + /* success! */ + return 0; + +init_ctrlq_free_sq: + ice_shutdown_sq(hw, cq); +init_ctrlq_destroy_locks: + mutex_destroy(&cq->sq_lock); + mutex_destroy(&cq->rq_lock); + return ret_code; +} + +/** + * ice_init_all_ctrlq - main initialization routine for all control queues + * @hw: pointer to the hardware structure + * + * Prior to calling this function, drivers *MUST* set the following fields + * in the cq->structure for all control queues: + * - cq->num_sq_entries + * - cq->num_rq_entries + * - cq->rq_buf_size + * - cq->sq_buf_size + */ +enum ice_status ice_init_all_ctrlq(struct ice_hw *hw) +{ + enum ice_status ret_code; + + /* Init FW admin queue */ + ret_code = ice_init_ctrlq(hw, ICE_CTL_Q_ADMIN); + if (ret_code) + return ret_code; + + return ice_init_check_adminq(hw); +} + +/** + * ice_shutdown_ctrlq - shutdown routine for any control queue + * @hw: pointer to the hardware structure + * @q_type: specific Control queue type + */ +static void ice_shutdown_ctrlq(struct ice_hw *hw, enum ice_ctl_q q_type) +{ + struct ice_ctl_q_info *cq; + + switch (q_type) { + case ICE_CTL_Q_ADMIN: + cq = &hw->adminq; + if (ice_check_sq_alive(hw, cq)) + ice_aq_q_shutdown(hw, true); + break; + default: + return; + } + + ice_shutdown_sq(hw, cq); + ice_shutdown_rq(hw, cq); + mutex_destroy(&cq->sq_lock); + mutex_destroy(&cq->rq_lock); +} + +/** + * ice_shutdown_all_ctrlq - shutdown routine for all control queues + * @hw: pointer to the hardware structure + */ +void ice_shutdown_all_ctrlq(struct ice_hw *hw) +{ + /* Shutdown FW admin queue */ + ice_shutdown_ctrlq(hw, ICE_CTL_Q_ADMIN); +} + +/** + * ice_clean_sq - cleans Admin send queue (ATQ) + * @hw: pointer to the hardware structure + * @cq: pointer to the specific Control queue + * + * returns the number of free desc + */ +static u16 ice_clean_sq(struct ice_hw *hw, struct ice_ctl_q_info *cq) +{ + struct ice_ctl_q_ring *sq = &cq->sq; + u16 ntc = sq->next_to_clean; + struct ice_sq_cd *details; + struct ice_aq_desc *desc; + + desc = ICE_CTL_Q_DESC(*sq, ntc); + details = ICE_CTL_Q_DETAILS(*sq, ntc); + + while (rd32(hw, cq->sq.head) != ntc) { + ice_debug(hw, ICE_DBG_AQ_MSG, + "ntc %d head %d.\n", ntc, rd32(hw, cq->sq.head)); + memset(desc, 0, sizeof(*desc)); + memset(details, 0, sizeof(*details)); + ntc++; + if (ntc == sq->count) + ntc = 0; + desc = ICE_CTL_Q_DESC(*sq, ntc); + details = ICE_CTL_Q_DETAILS(*sq, ntc); + } + + sq->next_to_clean = ntc; + + return ICE_CTL_Q_DESC_UNUSED(sq); +} + +/** + * ice_sq_done - check if FW has processed the Admin Send Queue (ATQ) + * @hw: pointer to the hw struct + * @cq: pointer to the specific Control queue + * + * Returns true if the firmware has processed all descriptors on the + * admin send queue. Returns false if there are still requests pending. + */ +static bool ice_sq_done(struct ice_hw *hw, struct ice_ctl_q_info *cq) +{ + /* AQ designers suggest use of head for better + * timing reliability than DD bit + */ + return rd32(hw, cq->sq.head) == cq->sq.next_to_use; +} + +/** + * ice_sq_send_cmd - send command to Control Queue (ATQ) + * @hw: pointer to the hw struct + * @cq: pointer to the specific Control queue + * @desc: prefilled descriptor describing the command (non DMA mem) + * @buf: buffer to use for indirect commands (or NULL for direct commands) + * @buf_size: size of buffer for indirect commands (or 0 for direct commands) + * @cd: pointer to command details structure + * + * This is the main send command routine for the ATQ. It runs the q, + * cleans the queue, etc. + */ +enum ice_status +ice_sq_send_cmd(struct ice_hw *hw, struct ice_ctl_q_info *cq, + struct ice_aq_desc *desc, void *buf, u16 buf_size, + struct ice_sq_cd *cd) +{ + struct ice_dma_mem *dma_buf = NULL; + struct ice_aq_desc *desc_on_ring; + bool cmd_completed = false; + enum ice_status status = 0; + struct ice_sq_cd *details; + u32 total_delay = 0; + u16 retval = 0; + u32 val = 0; + + mutex_lock(&cq->sq_lock); + + cq->sq_last_status = ICE_AQ_RC_OK; + + if (!cq->sq.count) { + ice_debug(hw, ICE_DBG_AQ_MSG, + "Control Send queue not initialized.\n"); + status = ICE_ERR_AQ_EMPTY; + goto sq_send_command_error; + } + + if ((buf && !buf_size) || (!buf && buf_size)) { + status = ICE_ERR_PARAM; + goto sq_send_command_error; + } + + if (buf) { + if (buf_size > cq->sq_buf_size) { + ice_debug(hw, ICE_DBG_AQ_MSG, + "Invalid buffer size for Control Send queue: %d.\n", + buf_size); + status = ICE_ERR_INVAL_SIZE; + goto sq_send_command_error; + } + + desc->flags |= cpu_to_le16(ICE_AQ_FLAG_BUF); + if (buf_size > ICE_AQ_LG_BUF) + desc->flags |= cpu_to_le16(ICE_AQ_FLAG_LB); + } + + val = rd32(hw, cq->sq.head); + if (val >= cq->num_sq_entries) { + ice_debug(hw, ICE_DBG_AQ_MSG, + "head overrun at %d in the Control Send Queue ring\n", + val); + status = ICE_ERR_AQ_EMPTY; + goto sq_send_command_error; + } + + details = ICE_CTL_Q_DETAILS(cq->sq, cq->sq.next_to_use); + if (cd) + memcpy(details, cd, sizeof(*details)); + else + memset(details, 0, sizeof(*details)); + + /* Call clean and check queue available function to reclaim the + * descriptors that were processed by FW/MBX; the function returns the + * number of desc available. The clean function called here could be + * called in a separate thread in case of asynchronous completions. + */ + if (ice_clean_sq(hw, cq) == 0) { + ice_debug(hw, ICE_DBG_AQ_MSG, + "Error: Control Send Queue is full.\n"); + status = ICE_ERR_AQ_FULL; + goto sq_send_command_error; + } + + /* initialize the temp desc pointer with the right desc */ + desc_on_ring = ICE_CTL_Q_DESC(cq->sq, cq->sq.next_to_use); + + /* if the desc is available copy the temp desc to the right place */ + memcpy(desc_on_ring, desc, sizeof(*desc_on_ring)); + + /* if buf is not NULL assume indirect command */ + if (buf) { + dma_buf = &cq->sq.r.sq_bi[cq->sq.next_to_use]; + /* copy the user buf into the respective DMA buf */ + memcpy(dma_buf->va, buf, buf_size); + desc_on_ring->datalen = cpu_to_le16(buf_size); + + /* Update the address values in the desc with the pa value + * for respective buffer + */ + desc_on_ring->params.generic.addr_high = + cpu_to_le32(upper_32_bits(dma_buf->pa)); + desc_on_ring->params.generic.addr_low = + cpu_to_le32(lower_32_bits(dma_buf->pa)); + } + + /* Debug desc and buffer */ + ice_debug(hw, ICE_DBG_AQ_MSG, + "ATQ: Control Send queue desc and buffer:\n"); + + ice_debug_cq(hw, ICE_DBG_AQ_CMD, (void *)desc_on_ring, buf, buf_size); + + (cq->sq.next_to_use)++; + if (cq->sq.next_to_use == cq->sq.count) + cq->sq.next_to_use = 0; + wr32(hw, cq->sq.tail, cq->sq.next_to_use); + + do { + if (ice_sq_done(hw, cq)) + break; + + mdelay(1); + total_delay++; + } while (total_delay < cq->sq_cmd_timeout); + + /* if ready, copy the desc back to temp */ + if (ice_sq_done(hw, cq)) { + memcpy(desc, desc_on_ring, sizeof(*desc)); + if (buf) { + /* get returned length to copy */ + u16 copy_size = le16_to_cpu(desc->datalen); + + if (copy_size > buf_size) { + ice_debug(hw, ICE_DBG_AQ_MSG, + "Return len %d > than buf len %d\n", + copy_size, buf_size); + status = ICE_ERR_AQ_ERROR; + } else { + memcpy(buf, dma_buf->va, copy_size); + } + } + retval = le16_to_cpu(desc->retval); + if (retval) { + ice_debug(hw, ICE_DBG_AQ_MSG, + "Control Send Queue command completed with error 0x%x\n", + retval); + + /* strip off FW internal code */ + retval &= 0xff; + } + cmd_completed = true; + if (!status && retval != ICE_AQ_RC_OK) + status = ICE_ERR_AQ_ERROR; + cq->sq_last_status = (enum ice_aq_err)retval; + } + + ice_debug(hw, ICE_DBG_AQ_MSG, + "ATQ: desc and buffer writeback:\n"); + + ice_debug_cq(hw, ICE_DBG_AQ_CMD, (void *)desc, buf, buf_size); + + /* save writeback AQ if requested */ + if (details->wb_desc) + memcpy(details->wb_desc, desc_on_ring, + sizeof(*details->wb_desc)); + + /* update the error if time out occurred */ + if (!cmd_completed) { + ice_debug(hw, ICE_DBG_AQ_MSG, + "Control Send Queue Writeback timeout.\n"); + status = ICE_ERR_AQ_TIMEOUT; + } + +sq_send_command_error: + mutex_unlock(&cq->sq_lock); + return status; +} + +/** + * ice_fill_dflt_direct_cmd_desc - AQ descriptor helper function + * @desc: pointer to the temp descriptor (non DMA mem) + * @opcode: the opcode can be used to decide which flags to turn off or on + * + * Fill the desc with default values + */ +void ice_fill_dflt_direct_cmd_desc(struct ice_aq_desc *desc, u16 opcode) +{ + /* zero out the desc */ + memset(desc, 0, sizeof(*desc)); + desc->opcode = cpu_to_le16(opcode); + desc->flags = cpu_to_le16(ICE_AQ_FLAG_SI); +} diff --git a/drivers/net/ethernet/intel/ice/ice_controlq.h b/drivers/net/ethernet/intel/ice/ice_controlq.h new file mode 100644 index 000000000000..6133d38525e0 --- /dev/null +++ b/drivers/net/ethernet/intel/ice/ice_controlq.h @@ -0,0 +1,83 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (c) 2018, Intel Corporation. */ + +#ifndef _ICE_CONTROLQ_H_ +#define _ICE_CONTROLQ_H_ + +#include "ice_adminq_cmd.h" + +#define ICE_CTL_Q_DESC(R, i) \ + (&(((struct ice_aq_desc *)((R).desc_buf.va))[i])) + +#define ICE_CTL_Q_DESC_UNUSED(R) \ + (u16)((((R)->next_to_clean > (R)->next_to_use) ? 0 : (R)->count) + \ + (R)->next_to_clean - (R)->next_to_use - 1) + +/* Defines that help manage the driver vs FW API checks. + * Take a look at ice_aq_ver_check in ice_controlq.c for actual usage. + * + */ +#define EXP_FW_API_VER_BRANCH 0x00 +#define EXP_FW_API_VER_MAJOR 0x00 +#define EXP_FW_API_VER_MINOR 0x01 + +/* Different control queue types: These are mainly for SW consumption. */ +enum ice_ctl_q { + ICE_CTL_Q_UNKNOWN = 0, + ICE_CTL_Q_ADMIN, +}; + +/* Control Queue default settings */ +#define ICE_CTL_Q_SQ_CMD_TIMEOUT 250 /* msecs */ + +struct ice_ctl_q_ring { + void *dma_head; /* Virtual address to dma head */ + struct ice_dma_mem desc_buf; /* descriptor ring memory */ + void *cmd_buf; /* command buffer memory */ + + union { + struct ice_dma_mem *sq_bi; + struct ice_dma_mem *rq_bi; + } r; + + u16 count; /* Number of descriptors */ + + /* used for interrupt processing */ + u16 next_to_use; + u16 next_to_clean; + + /* used for queue tracking */ + u32 head; + u32 tail; + u32 len; + u32 bah; + u32 bal; + u32 len_mask; + u32 len_ena_mask; + u32 head_mask; +}; + +/* sq transaction details */ +struct ice_sq_cd { + struct ice_aq_desc *wb_desc; +}; + +#define ICE_CTL_Q_DETAILS(R, i) (&(((struct ice_sq_cd *)((R).cmd_buf))[i])) + +/* Control Queue information */ +struct ice_ctl_q_info { + enum ice_ctl_q qtype; + struct ice_ctl_q_ring rq; /* receive queue */ + struct ice_ctl_q_ring sq; /* send queue */ + u32 sq_cmd_timeout; /* send queue cmd write back timeout */ + u16 num_rq_entries; /* receive queue depth */ + u16 num_sq_entries; /* send queue depth */ + u16 rq_buf_size; /* receive queue buffer size */ + u16 sq_buf_size; /* send queue buffer size */ + struct mutex sq_lock; /* Send queue lock */ + struct mutex rq_lock; /* Receive queue lock */ + enum ice_aq_err sq_last_status; /* last status on send queue */ + enum ice_aq_err rq_last_status; /* last status on receive queue */ +}; + +#endif /* _ICE_CONTROLQ_H_ */ diff --git a/drivers/net/ethernet/intel/ice/ice_hw_autogen.h b/drivers/net/ethernet/intel/ice/ice_hw_autogen.h new file mode 100644 index 000000000000..66a982b55eaf --- /dev/null +++ b/drivers/net/ethernet/intel/ice/ice_hw_autogen.h @@ -0,0 +1,32 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (c) 2018, Intel Corporation. */ + +/* Machine-generated file */ + +#ifndef _ICE_HW_AUTOGEN_H_ +#define _ICE_HW_AUTOGEN_H_ + +#define PF_FW_ARQBAH 0x00080180 +#define PF_FW_ARQBAL 0x00080080 +#define PF_FW_ARQH 0x00080380 +#define PF_FW_ARQH_ARQH_S 0 +#define PF_FW_ARQH_ARQH_M ICE_M(0x3FF, PF_FW_ARQH_ARQH_S) +#define PF_FW_ARQLEN 0x00080280 +#define PF_FW_ARQLEN_ARQLEN_S 0 +#define PF_FW_ARQLEN_ARQLEN_M ICE_M(0x3FF, PF_FW_ARQLEN_ARQLEN_S) +#define PF_FW_ARQLEN_ARQENABLE_S 31 +#define PF_FW_ARQLEN_ARQENABLE_M BIT(PF_FW_ARQLEN_ARQENABLE_S) +#define PF_FW_ARQT 0x00080480 +#define PF_FW_ATQBAH 0x00080100 +#define PF_FW_ATQBAL 0x00080000 +#define PF_FW_ATQH 0x00080300 +#define PF_FW_ATQH_ATQH_S 0 +#define PF_FW_ATQH_ATQH_M ICE_M(0x3FF, PF_FW_ATQH_ATQH_S) +#define PF_FW_ATQLEN 0x00080200 +#define PF_FW_ATQLEN_ATQLEN_S 0 +#define PF_FW_ATQLEN_ATQLEN_M ICE_M(0x3FF, PF_FW_ATQLEN_ATQLEN_S) +#define PF_FW_ATQLEN_ATQENABLE_S 31 +#define PF_FW_ATQLEN_ATQENABLE_M BIT(PF_FW_ATQLEN_ATQENABLE_S) +#define PF_FW_ATQT 0x00080400 + +#endif /* _ICE_HW_AUTOGEN_H_ */ diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index 3ada230a3fc7..8b63c25fda8b 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -20,7 +20,11 @@ MODULE_VERSION(DRV_VERSION); static int debug = -1; module_param(debug, int, 0644); -MODULE_PARM_DESC(debug, "netif message level (0=none,...,0x7FFF=all)"); +#ifndef CONFIG_DYNAMIC_DEBUG +MODULE_PARM_DESC(debug, "netif level (0=none,...,16=all), hw debug_mask (0x8XXXXXXX)"); +#else +MODULE_PARM_DESC(debug, "netif level (0=none,...,16=all)"); +#endif /* !CONFIG_DYNAMIC_DEBUG */ /** * ice_probe - Device initialization routine @@ -79,6 +83,11 @@ static int ice_probe(struct pci_dev *pdev, hw->bus.func = PCI_FUNC(pdev->devfn); pf->msg_enable = netif_msg_init(debug, ICE_DFLT_NETIF_M); +#ifndef CONFIG_DYNAMIC_DEBUG + if (debug < -1) + hw->debug_mask = debug; +#endif + return 0; } diff --git a/drivers/net/ethernet/intel/ice/ice_osdep.h b/drivers/net/ethernet/intel/ice/ice_osdep.h new file mode 100644 index 000000000000..5c992aa1f150 --- /dev/null +++ b/drivers/net/ethernet/intel/ice/ice_osdep.h @@ -0,0 +1,72 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (c) 2018, Intel Corporation. */ + +#ifndef _ICE_OSDEP_H_ +#define _ICE_OSDEP_H_ + +#include +#include +#ifndef CONFIG_64BIT +#include +#endif + +#define wr32(a, reg, value) writel((value), ((a)->hw_addr + (reg))) +#define rd32(a, reg) readl((a)->hw_addr + (reg)) +#define wr64(a, reg, value) writeq((value), ((a)->hw_addr + (reg))) +#define rd64(a, reg) readq((a)->hw_addr + (reg)) + +#define ICE_M(m, s) ((m) << (s)) + +struct ice_dma_mem { + void *va; + dma_addr_t pa; + size_t size; +}; + +#define ice_hw_to_dev(ptr) \ + (&(container_of((ptr), struct ice_pf, hw))->pdev->dev) + +#ifdef CONFIG_DYNAMIC_DEBUG +#define ice_debug(hw, type, fmt, args...) \ + dev_dbg(ice_hw_to_dev(hw), fmt, ##args) + +#define ice_debug_array(hw, type, rowsize, groupsize, buf, len) \ + print_hex_dump_debug(KBUILD_MODNAME " ", \ + DUMP_PREFIX_OFFSET, rowsize, \ + groupsize, buf, len, false) +#else +#define ice_debug(hw, type, fmt, args...) \ +do { \ + if ((type) & (hw)->debug_mask) \ + dev_info(ice_hw_to_dev(hw), fmt, ##args); \ +} while (0) + +#ifdef DEBUG +#define ice_debug_array(hw, type, rowsize, groupsize, buf, len) \ +do { \ + if ((type) & (hw)->debug_mask) \ + print_hex_dump_debug(KBUILD_MODNAME, \ + DUMP_PREFIX_OFFSET, \ + rowsize, groupsize, buf, \ + len, false); \ +} while (0) +#else +#define ice_debug_array(hw, type, rowsize, groupsize, buf, len) \ +do { \ + struct ice_hw *hw_l = hw; \ + if ((type) & (hw_l)->debug_mask) { \ + u16 len_l = len; \ + u8 *buf_l = buf; \ + int i; \ + for (i = 0; i < (len_l - 16); i += 16) \ + ice_debug(hw_l, type, "0x%04X %16ph\n",\ + i, ((buf_l) + i)); \ + if (i < len_l) \ + ice_debug(hw_l, type, "0x%04X %*ph\n", \ + i, ((len_l) - i), ((buf_l) + i));\ + } \ +} while (0) +#endif /* DEBUG */ +#endif /* CONFIG_DYNAMIC_DEBUG */ + +#endif /* _ICE_OSDEP_H_ */ diff --git a/drivers/net/ethernet/intel/ice/ice_status.h b/drivers/net/ethernet/intel/ice/ice_status.h new file mode 100644 index 000000000000..978c6dff3c71 --- /dev/null +++ b/drivers/net/ethernet/intel/ice/ice_status.h @@ -0,0 +1,21 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (c) 2018, Intel Corporation. */ + +#ifndef _ICE_STATUS_H_ +#define _ICE_STATUS_H_ + +/* Error Codes */ +enum ice_status { + ICE_ERR_PARAM = -1, + ICE_ERR_NOT_READY = -3, + ICE_ERR_INVAL_SIZE = -6, + ICE_ERR_FW_API_VER = -10, + ICE_ERR_NO_MEMORY = -11, + ICE_ERR_CFG = -12, + ICE_ERR_AQ_ERROR = -100, + ICE_ERR_AQ_TIMEOUT = -101, + ICE_ERR_AQ_FULL = -102, + ICE_ERR_AQ_EMPTY = -104, +}; + +#endif /* _ICE_STATUS_H_ */ diff --git a/drivers/net/ethernet/intel/ice/ice_type.h b/drivers/net/ethernet/intel/ice/ice_type.h index cd018f81b5c7..212ab6cc0a42 100644 --- a/drivers/net/ethernet/intel/ice/ice_type.h +++ b/drivers/net/ethernet/intel/ice/ice_type.h @@ -4,6 +4,15 @@ #ifndef _ICE_TYPE_H_ #define _ICE_TYPE_H_ +#include "ice_status.h" +#include "ice_hw_autogen.h" +#include "ice_osdep.h" +#include "ice_controlq.h" + +/* debug masks - set these bits in hw->debug_mask to control output */ +#define ICE_DBG_AQ_MSG BIT_ULL(24) +#define ICE_DBG_AQ_CMD BIT_ULL(27) + /* Bus parameters */ struct ice_bus_info { u16 device; @@ -14,6 +23,7 @@ struct ice_bus_info { struct ice_hw { u8 __iomem *hw_addr; void *back; + u64 debug_mask; /* bitmap for debug mask */ /* pci info */ u16 device_id; @@ -23,6 +33,18 @@ struct ice_hw { u8 revision_id; struct ice_bus_info bus; + /* Control Queue info */ + struct ice_ctl_q_info adminq; + + u8 api_branch; /* API branch version */ + u8 api_maj_ver; /* API major version */ + u8 api_min_ver; /* API minor version */ + u8 api_patch; /* API patch version */ + u8 fw_branch; /* firmware branch version */ + u8 fw_maj_ver; /* firmware major version */ + u8 fw_min_ver; /* firmware minor version */ + u8 fw_patch; /* firmware patch version */ + u32 fw_build; /* firmware build number */ }; #endif /* _ICE_TYPE_H_ */ From f31e4b6fe227dfd7ed51c3fc0550878c7d7a8cf2 Mon Sep 17 00:00:00 2001 From: Anirudh Venkataramanan Date: Tue, 20 Mar 2018 07:58:07 -0700 Subject: [PATCH 03/15] ice: Start hardware initialization This patch implements multiple pieces of the initialization flow as follows: 1) A reset is issued to ensure a clean device state, followed by initialization of admin queue interface. 2) Once the admin queue interface is up, clear the PF config and transition the device to non-PXE mode. 3) Get the NVM configuration stored in the device's non-volatile memory (NVM) using ice_init_nvm. CC: Shannon Nelson Signed-off-by: Anirudh Venkataramanan Acked-by: Shannon Nelson Tested-by: Tony Brelinski Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/ice/Makefile | 3 +- drivers/net/ethernet/intel/ice/ice.h | 2 + .../net/ethernet/intel/ice/ice_adminq_cmd.h | 79 ++++ drivers/net/ethernet/intel/ice/ice_common.c | 405 ++++++++++++++++++ drivers/net/ethernet/intel/ice/ice_common.h | 11 + drivers/net/ethernet/intel/ice/ice_controlq.h | 3 + .../net/ethernet/intel/ice/ice_hw_autogen.h | 30 ++ drivers/net/ethernet/intel/ice/ice_main.c | 31 ++ drivers/net/ethernet/intel/ice/ice_nvm.c | 236 ++++++++++ drivers/net/ethernet/intel/ice/ice_osdep.h | 1 + drivers/net/ethernet/intel/ice/ice_status.h | 5 + drivers/net/ethernet/intel/ice/ice_type.h | 49 +++ 12 files changed, 854 insertions(+), 1 deletion(-) create mode 100644 drivers/net/ethernet/intel/ice/ice_nvm.c diff --git a/drivers/net/ethernet/intel/ice/Makefile b/drivers/net/ethernet/intel/ice/Makefile index 562836a8f18a..b9a32ddecb17 100644 --- a/drivers/net/ethernet/intel/ice/Makefile +++ b/drivers/net/ethernet/intel/ice/Makefile @@ -9,4 +9,5 @@ obj-$(CONFIG_ICE) += ice.o ice-y := ice_main.o \ ice_controlq.o \ - ice_common.o + ice_common.o \ + ice_nvm.o diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h index 59b1400f9e81..4c4f161768ed 100644 --- a/drivers/net/ethernet/intel/ice/ice.h +++ b/drivers/net/ethernet/intel/ice/ice.h @@ -16,8 +16,10 @@ #include #include "ice_devids.h" #include "ice_type.h" +#include "ice_common.h" #define ICE_BAR0 0 +#define ICE_AQ_LEN 64 #define ICE_DFLT_NETIF_M (NETIF_MSG_DRV | NETIF_MSG_PROBE | NETIF_MSG_LINK) diff --git a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h index e2fa8e2ea6ae..70fe00c90329 100644 --- a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h +++ b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h @@ -36,6 +36,67 @@ struct ice_aqc_q_shutdown { u8 reserved[12]; }; +/* Request resource ownership (direct 0x0008) + * Release resource ownership (direct 0x0009) + */ +struct ice_aqc_req_res { + __le16 res_id; +#define ICE_AQC_RES_ID_NVM 1 +#define ICE_AQC_RES_ID_SDP 2 +#define ICE_AQC_RES_ID_CHNG_LOCK 3 +#define ICE_AQC_RES_ID_GLBL_LOCK 4 + __le16 access_type; +#define ICE_AQC_RES_ACCESS_READ 1 +#define ICE_AQC_RES_ACCESS_WRITE 2 + + /* Upon successful completion, FW writes this value and driver is + * expected to release resource before timeout. This value is provided + * in milliseconds. + */ + __le32 timeout; +#define ICE_AQ_RES_NVM_READ_DFLT_TIMEOUT_MS 3000 +#define ICE_AQ_RES_NVM_WRITE_DFLT_TIMEOUT_MS 180000 +#define ICE_AQ_RES_CHNG_LOCK_DFLT_TIMEOUT_MS 1000 +#define ICE_AQ_RES_GLBL_LOCK_DFLT_TIMEOUT_MS 3000 + /* For SDP: pin id of the SDP */ + __le32 res_number; + /* Status is only used for ICE_AQC_RES_ID_GLBL_LOCK */ + __le16 status; +#define ICE_AQ_RES_GLBL_SUCCESS 0 +#define ICE_AQ_RES_GLBL_IN_PROG 1 +#define ICE_AQ_RES_GLBL_DONE 2 + u8 reserved[2]; +}; + +/* Clear PXE Command and response (direct 0x0110) */ +struct ice_aqc_clear_pxe { + u8 rx_cnt; +#define ICE_AQC_CLEAR_PXE_RX_CNT 0x2 + u8 reserved[15]; +}; + +/* NVM Read command (indirect 0x0701) + * NVM Erase commands (direct 0x0702) + * NVM Update commands (indirect 0x0703) + */ +struct ice_aqc_nvm { + u8 cmd_flags; +#define ICE_AQC_NVM_LAST_CMD BIT(0) +#define ICE_AQC_NVM_PCIR_REQ BIT(0) /* Used by NVM Update reply */ +#define ICE_AQC_NVM_PRESERVATION_S 1 +#define ICE_AQC_NVM_PRESERVATION_M (3 << CSR_AQ_NVM_PRESERVATION_S) +#define ICE_AQC_NVM_NO_PRESERVATION (0 << CSR_AQ_NVM_PRESERVATION_S) +#define ICE_AQC_NVM_PRESERVE_ALL BIT(1) +#define ICE_AQC_NVM_PRESERVE_SELECTED (3 << CSR_AQ_NVM_PRESERVATION_S) +#define ICE_AQC_NVM_FLASH_ONLY BIT(7) + u8 module_typeid; + __le16 length; +#define ICE_AQC_NVM_ERASE_LEN 0xFFFF + __le32 offset; + __le32 addr_high; + __le32 addr_low; +}; + /** * struct ice_aq_desc - Admin Queue (AQ) descriptor * @flags: ICE_AQ_FLAG_* flags @@ -65,6 +126,9 @@ struct ice_aq_desc { struct ice_aqc_generic generic; struct ice_aqc_get_ver get_ver; struct ice_aqc_q_shutdown q_shutdown; + struct ice_aqc_req_res res_owner; + struct ice_aqc_clear_pxe clear_pxe; + struct ice_aqc_nvm nvm; } params; }; @@ -82,6 +146,8 @@ struct ice_aq_desc { /* error codes */ enum ice_aq_err { ICE_AQ_RC_OK = 0, /* success */ + ICE_AQ_RC_EBUSY = 12, /* Device or resource busy */ + ICE_AQ_RC_EEXIST = 13, /* object already exists */ }; /* Admin Queue command opcodes */ @@ -89,6 +155,19 @@ enum ice_adminq_opc { /* AQ commands */ ice_aqc_opc_get_ver = 0x0001, ice_aqc_opc_q_shutdown = 0x0003, + + /* resource ownership */ + ice_aqc_opc_req_res = 0x0008, + ice_aqc_opc_release_res = 0x0009, + + /* PXE */ + ice_aqc_opc_clear_pxe_mode = 0x0110, + + ice_aqc_opc_clear_pf_cfg = 0x02A4, + + /* NVM commands */ + ice_aqc_opc_nvm_read = 0x0701, + }; #endif /* _ICE_ADMINQ_CMD_H_ */ diff --git a/drivers/net/ethernet/intel/ice/ice_common.c b/drivers/net/ethernet/intel/ice/ice_common.c index 78452b92cae6..d3d420c3ba7b 100644 --- a/drivers/net/ethernet/intel/ice/ice_common.c +++ b/drivers/net/ethernet/intel/ice/ice_common.c @@ -4,6 +4,224 @@ #include "ice_common.h" #include "ice_adminq_cmd.h" +#define ICE_PF_RESET_WAIT_COUNT 200 + +/** + * ice_set_mac_type - Sets MAC type + * @hw: pointer to the HW structure + * + * This function sets the MAC type of the adapter based on the + * vendor ID and device ID stored in the hw structure. + */ +static enum ice_status ice_set_mac_type(struct ice_hw *hw) +{ + if (hw->vendor_id != PCI_VENDOR_ID_INTEL) + return ICE_ERR_DEVICE_NOT_SUPPORTED; + + hw->mac_type = ICE_MAC_GENERIC; + return 0; +} + +/** + * ice_clear_pf_cfg - Clear PF configuration + * @hw: pointer to the hardware structure + */ +enum ice_status ice_clear_pf_cfg(struct ice_hw *hw) +{ + struct ice_aq_desc desc; + + ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_clear_pf_cfg); + + return ice_aq_send_cmd(hw, &desc, NULL, 0, NULL); +} + +/** + * ice_init_hw - main hardware initialization routine + * @hw: pointer to the hardware structure + */ +enum ice_status ice_init_hw(struct ice_hw *hw) +{ + enum ice_status status; + + /* Set MAC type based on DeviceID */ + status = ice_set_mac_type(hw); + if (status) + return status; + + hw->pf_id = (u8)(rd32(hw, PF_FUNC_RID) & + PF_FUNC_RID_FUNC_NUM_M) >> + PF_FUNC_RID_FUNC_NUM_S; + + status = ice_reset(hw, ICE_RESET_PFR); + if (status) + return status; + + status = ice_init_all_ctrlq(hw); + if (status) + goto err_unroll_cqinit; + + status = ice_clear_pf_cfg(hw); + if (status) + goto err_unroll_cqinit; + + ice_clear_pxe_mode(hw); + + status = ice_init_nvm(hw); + if (status) + goto err_unroll_cqinit; + + return 0; + +err_unroll_cqinit: + ice_shutdown_all_ctrlq(hw); + return status; +} + +/** + * ice_deinit_hw - unroll initialization operations done by ice_init_hw + * @hw: pointer to the hardware structure + */ +void ice_deinit_hw(struct ice_hw *hw) +{ + ice_shutdown_all_ctrlq(hw); +} + +/** + * ice_check_reset - Check to see if a global reset is complete + * @hw: pointer to the hardware structure + */ +enum ice_status ice_check_reset(struct ice_hw *hw) +{ + u32 cnt, reg = 0, grst_delay; + + /* Poll for Device Active state in case a recent CORER, GLOBR, + * or EMPR has occurred. The grst delay value is in 100ms units. + * Add 1sec for outstanding AQ commands that can take a long time. + */ + grst_delay = ((rd32(hw, GLGEN_RSTCTL) & GLGEN_RSTCTL_GRSTDEL_M) >> + GLGEN_RSTCTL_GRSTDEL_S) + 10; + + for (cnt = 0; cnt < grst_delay; cnt++) { + mdelay(100); + reg = rd32(hw, GLGEN_RSTAT); + if (!(reg & GLGEN_RSTAT_DEVSTATE_M)) + break; + } + + if (cnt == grst_delay) { + ice_debug(hw, ICE_DBG_INIT, + "Global reset polling failed to complete.\n"); + return ICE_ERR_RESET_FAILED; + } + +#define ICE_RESET_DONE_MASK (GLNVM_ULD_CORER_DONE_M | \ + GLNVM_ULD_GLOBR_DONE_M) + + /* Device is Active; check Global Reset processes are done */ + for (cnt = 0; cnt < ICE_PF_RESET_WAIT_COUNT; cnt++) { + reg = rd32(hw, GLNVM_ULD) & ICE_RESET_DONE_MASK; + if (reg == ICE_RESET_DONE_MASK) { + ice_debug(hw, ICE_DBG_INIT, + "Global reset processes done. %d\n", cnt); + break; + } + mdelay(10); + } + + if (cnt == ICE_PF_RESET_WAIT_COUNT) { + ice_debug(hw, ICE_DBG_INIT, + "Wait for Reset Done timed out. GLNVM_ULD = 0x%x\n", + reg); + return ICE_ERR_RESET_FAILED; + } + + return 0; +} + +/** + * ice_pf_reset - Reset the PF + * @hw: pointer to the hardware structure + * + * If a global reset has been triggered, this function checks + * for its completion and then issues the PF reset + */ +static enum ice_status ice_pf_reset(struct ice_hw *hw) +{ + u32 cnt, reg; + + /* If at function entry a global reset was already in progress, i.e. + * state is not 'device active' or any of the reset done bits are not + * set in GLNVM_ULD, there is no need for a PF Reset; poll until the + * global reset is done. + */ + if ((rd32(hw, GLGEN_RSTAT) & GLGEN_RSTAT_DEVSTATE_M) || + (rd32(hw, GLNVM_ULD) & ICE_RESET_DONE_MASK) ^ ICE_RESET_DONE_MASK) { + /* poll on global reset currently in progress until done */ + if (ice_check_reset(hw)) + return ICE_ERR_RESET_FAILED; + + return 0; + } + + /* Reset the PF */ + reg = rd32(hw, PFGEN_CTRL); + + wr32(hw, PFGEN_CTRL, (reg | PFGEN_CTRL_PFSWR_M)); + + for (cnt = 0; cnt < ICE_PF_RESET_WAIT_COUNT; cnt++) { + reg = rd32(hw, PFGEN_CTRL); + if (!(reg & PFGEN_CTRL_PFSWR_M)) + break; + + mdelay(1); + } + + if (cnt == ICE_PF_RESET_WAIT_COUNT) { + ice_debug(hw, ICE_DBG_INIT, + "PF reset polling failed to complete.\n"); + return ICE_ERR_RESET_FAILED; + } + + return 0; +} + +/** + * ice_reset - Perform different types of reset + * @hw: pointer to the hardware structure + * @req: reset request + * + * This function triggers a reset as specified by the req parameter. + * + * Note: + * If anything other than a PF reset is triggered, PXE mode is restored. + * This has to be cleared using ice_clear_pxe_mode again, once the AQ + * interface has been restored in the rebuild flow. + */ +enum ice_status ice_reset(struct ice_hw *hw, enum ice_reset_req req) +{ + u32 val = 0; + + switch (req) { + case ICE_RESET_PFR: + return ice_pf_reset(hw); + case ICE_RESET_CORER: + ice_debug(hw, ICE_DBG_INIT, "CoreR requested\n"); + val = GLGEN_RTRIG_CORER_M; + break; + case ICE_RESET_GLOBR: + ice_debug(hw, ICE_DBG_INIT, "GlobalR requested\n"); + val = GLGEN_RTRIG_GLOBR_M; + break; + } + + val |= rd32(hw, GLGEN_RTRIG); + wr32(hw, GLGEN_RTRIG, val); + ice_flush(hw); + + /* wait for the FW to be ready */ + return ice_check_reset(hw); +} + /** * ice_debug_cq * @hw: pointer to the hardware structure @@ -128,3 +346,190 @@ enum ice_status ice_aq_q_shutdown(struct ice_hw *hw, bool unloading) return ice_aq_send_cmd(hw, &desc, NULL, 0, NULL); } + +/** + * ice_aq_req_res + * @hw: pointer to the hw struct + * @res: resource id + * @access: access type + * @sdp_number: resource number + * @timeout: the maximum time in ms that the driver may hold the resource + * @cd: pointer to command details structure or NULL + * + * requests common resource using the admin queue commands (0x0008) + */ +static enum ice_status +ice_aq_req_res(struct ice_hw *hw, enum ice_aq_res_ids res, + enum ice_aq_res_access_type access, u8 sdp_number, u32 *timeout, + struct ice_sq_cd *cd) +{ + struct ice_aqc_req_res *cmd_resp; + struct ice_aq_desc desc; + enum ice_status status; + + cmd_resp = &desc.params.res_owner; + + ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_req_res); + + cmd_resp->res_id = cpu_to_le16(res); + cmd_resp->access_type = cpu_to_le16(access); + cmd_resp->res_number = cpu_to_le32(sdp_number); + + status = ice_aq_send_cmd(hw, &desc, NULL, 0, cd); + /* The completion specifies the maximum time in ms that the driver + * may hold the resource in the Timeout field. + * If the resource is held by someone else, the command completes with + * busy return value and the timeout field indicates the maximum time + * the current owner of the resource has to free it. + */ + if (!status || hw->adminq.sq_last_status == ICE_AQ_RC_EBUSY) + *timeout = le32_to_cpu(cmd_resp->timeout); + + return status; +} + +/** + * ice_aq_release_res + * @hw: pointer to the hw struct + * @res: resource id + * @sdp_number: resource number + * @cd: pointer to command details structure or NULL + * + * release common resource using the admin queue commands (0x0009) + */ +static enum ice_status +ice_aq_release_res(struct ice_hw *hw, enum ice_aq_res_ids res, u8 sdp_number, + struct ice_sq_cd *cd) +{ + struct ice_aqc_req_res *cmd; + struct ice_aq_desc desc; + + cmd = &desc.params.res_owner; + + ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_release_res); + + cmd->res_id = cpu_to_le16(res); + cmd->res_number = cpu_to_le32(sdp_number); + + return ice_aq_send_cmd(hw, &desc, NULL, 0, cd); +} + +/** + * ice_acquire_res + * @hw: pointer to the HW structure + * @res: resource id + * @access: access type (read or write) + * + * This function will attempt to acquire the ownership of a resource. + */ +enum ice_status +ice_acquire_res(struct ice_hw *hw, enum ice_aq_res_ids res, + enum ice_aq_res_access_type access) +{ +#define ICE_RES_POLLING_DELAY_MS 10 + u32 delay = ICE_RES_POLLING_DELAY_MS; + enum ice_status status; + u32 time_left = 0; + u32 timeout; + + status = ice_aq_req_res(hw, res, access, 0, &time_left, NULL); + + /* An admin queue return code of ICE_AQ_RC_EEXIST means that another + * driver has previously acquired the resource and performed any + * necessary updates; in this case the caller does not obtain the + * resource and has no further work to do. + */ + if (hw->adminq.sq_last_status == ICE_AQ_RC_EEXIST) { + status = ICE_ERR_AQ_NO_WORK; + goto ice_acquire_res_exit; + } + + if (status) + ice_debug(hw, ICE_DBG_RES, + "resource %d acquire type %d failed.\n", res, access); + + /* If necessary, poll until the current lock owner timeouts */ + timeout = time_left; + while (status && timeout && time_left) { + mdelay(delay); + timeout = (timeout > delay) ? timeout - delay : 0; + status = ice_aq_req_res(hw, res, access, 0, &time_left, NULL); + + if (hw->adminq.sq_last_status == ICE_AQ_RC_EEXIST) { + /* lock free, but no work to do */ + status = ICE_ERR_AQ_NO_WORK; + break; + } + + if (!status) + /* lock acquired */ + break; + } + if (status && status != ICE_ERR_AQ_NO_WORK) + ice_debug(hw, ICE_DBG_RES, "resource acquire timed out.\n"); + +ice_acquire_res_exit: + if (status == ICE_ERR_AQ_NO_WORK) { + if (access == ICE_RES_WRITE) + ice_debug(hw, ICE_DBG_RES, + "resource indicates no work to do.\n"); + else + ice_debug(hw, ICE_DBG_RES, + "Warning: ICE_ERR_AQ_NO_WORK not expected\n"); + } + return status; +} + +/** + * ice_release_res + * @hw: pointer to the HW structure + * @res: resource id + * + * This function will release a resource using the proper Admin Command. + */ +void ice_release_res(struct ice_hw *hw, enum ice_aq_res_ids res) +{ + enum ice_status status; + u32 total_delay = 0; + + status = ice_aq_release_res(hw, res, 0, NULL); + + /* there are some rare cases when trying to release the resource + * results in an admin Q timeout, so handle them correctly + */ + while ((status == ICE_ERR_AQ_TIMEOUT) && + (total_delay < hw->adminq.sq_cmd_timeout)) { + mdelay(1); + status = ice_aq_release_res(hw, res, 0, NULL); + total_delay++; + } +} + +/** + * ice_aq_clear_pxe_mode + * @hw: pointer to the hw struct + * + * Tell the firmware that the driver is taking over from PXE (0x0110). + */ +static enum ice_status ice_aq_clear_pxe_mode(struct ice_hw *hw) +{ + struct ice_aq_desc desc; + + ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_clear_pxe_mode); + desc.params.clear_pxe.rx_cnt = ICE_AQC_CLEAR_PXE_RX_CNT; + + return ice_aq_send_cmd(hw, &desc, NULL, 0, NULL); +} + +/** + * ice_clear_pxe_mode - clear pxe operations mode + * @hw: pointer to the hw struct + * + * Make sure all PXE mode settings are cleared, including things + * like descriptor fetch/write-back mode. + */ +void ice_clear_pxe_mode(struct ice_hw *hw) +{ + if (ice_check_sq_alive(hw, &hw->adminq)) + ice_aq_clear_pxe_mode(hw); +} diff --git a/drivers/net/ethernet/intel/ice/ice_common.h b/drivers/net/ethernet/intel/ice/ice_common.h index ba9347459db1..b1a7c5afe86b 100644 --- a/drivers/net/ethernet/intel/ice/ice_common.h +++ b/drivers/net/ethernet/intel/ice/ice_common.h @@ -9,12 +9,22 @@ void ice_debug_cq(struct ice_hw *hw, u32 mask, void *desc, void *buf, u16 buf_len); +enum ice_status ice_init_hw(struct ice_hw *hw); +void ice_deinit_hw(struct ice_hw *hw); +enum ice_status ice_check_reset(struct ice_hw *hw); +enum ice_status ice_reset(struct ice_hw *hw, enum ice_reset_req req); enum ice_status ice_init_all_ctrlq(struct ice_hw *hw); void ice_shutdown_all_ctrlq(struct ice_hw *hw); enum ice_status +ice_acquire_res(struct ice_hw *hw, enum ice_aq_res_ids res, + enum ice_aq_res_access_type access); +void ice_release_res(struct ice_hw *hw, enum ice_aq_res_ids res); +enum ice_status ice_init_nvm(struct ice_hw *hw); +enum ice_status ice_sq_send_cmd(struct ice_hw *hw, struct ice_ctl_q_info *cq, struct ice_aq_desc *desc, void *buf, u16 buf_size, struct ice_sq_cd *cd); +void ice_clear_pxe_mode(struct ice_hw *hw); bool ice_check_sq_alive(struct ice_hw *hw, struct ice_ctl_q_info *cq); enum ice_status ice_aq_q_shutdown(struct ice_hw *hw, bool unloading); void ice_fill_dflt_direct_cmd_desc(struct ice_aq_desc *desc, u16 opcode); @@ -22,4 +32,5 @@ enum ice_status ice_aq_send_cmd(struct ice_hw *hw, struct ice_aq_desc *desc, void *buf, u16 buf_size, struct ice_sq_cd *cd); enum ice_status ice_aq_get_fw_ver(struct ice_hw *hw, struct ice_sq_cd *cd); +enum ice_status ice_clear_pf_cfg(struct ice_hw *hw); #endif /* _ICE_COMMON_H_ */ diff --git a/drivers/net/ethernet/intel/ice/ice_controlq.h b/drivers/net/ethernet/intel/ice/ice_controlq.h index 6133d38525e0..b5306e8a5a0d 100644 --- a/drivers/net/ethernet/intel/ice/ice_controlq.h +++ b/drivers/net/ethernet/intel/ice/ice_controlq.h @@ -6,6 +6,9 @@ #include "ice_adminq_cmd.h" +/* Maximum buffer lengths for all control queue types */ +#define ICE_AQ_MAX_BUF_LEN 4096 + #define ICE_CTL_Q_DESC(R, i) \ (&(((struct ice_aq_desc *)((R).desc_buf.va))[i])) diff --git a/drivers/net/ethernet/intel/ice/ice_hw_autogen.h b/drivers/net/ethernet/intel/ice/ice_hw_autogen.h index 66a982b55eaf..893d5e967e66 100644 --- a/drivers/net/ethernet/intel/ice/ice_hw_autogen.h +++ b/drivers/net/ethernet/intel/ice/ice_hw_autogen.h @@ -28,5 +28,35 @@ #define PF_FW_ATQLEN_ATQENABLE_S 31 #define PF_FW_ATQLEN_ATQENABLE_M BIT(PF_FW_ATQLEN_ATQENABLE_S) #define PF_FW_ATQT 0x00080400 +#define GLGEN_RSTAT 0x000B8188 +#define GLGEN_RSTAT_DEVSTATE_S 0 +#define GLGEN_RSTAT_DEVSTATE_M ICE_M(0x3, GLGEN_RSTAT_DEVSTATE_S) +#define GLGEN_RSTCTL 0x000B8180 +#define GLGEN_RSTCTL_GRSTDEL_S 0 +#define GLGEN_RSTCTL_GRSTDEL_M ICE_M(0x3F, GLGEN_RSTCTL_GRSTDEL_S) +#define GLGEN_RTRIG 0x000B8190 +#define GLGEN_RTRIG_CORER_S 0 +#define GLGEN_RTRIG_CORER_M BIT(GLGEN_RTRIG_CORER_S) +#define GLGEN_RTRIG_GLOBR_S 1 +#define GLGEN_RTRIG_GLOBR_M BIT(GLGEN_RTRIG_GLOBR_S) +#define GLGEN_STAT 0x000B612C +#define PFGEN_CTRL 0x00091000 +#define PFGEN_CTRL_PFSWR_S 0 +#define PFGEN_CTRL_PFSWR_M BIT(PFGEN_CTRL_PFSWR_S) +#define GLLAN_RCTL_0 0x002941F8 +#define GLNVM_FLA 0x000B6108 +#define GLNVM_FLA_LOCKED_S 6 +#define GLNVM_FLA_LOCKED_M BIT(GLNVM_FLA_LOCKED_S) +#define GLNVM_GENS 0x000B6100 +#define GLNVM_GENS_SR_SIZE_S 5 +#define GLNVM_GENS_SR_SIZE_M ICE_M(0x7, GLNVM_GENS_SR_SIZE_S) +#define GLNVM_ULD 0x000B6008 +#define GLNVM_ULD_CORER_DONE_S 3 +#define GLNVM_ULD_CORER_DONE_M BIT(GLNVM_ULD_CORER_DONE_S) +#define GLNVM_ULD_GLOBR_DONE_S 4 +#define GLNVM_ULD_GLOBR_DONE_M BIT(GLNVM_ULD_GLOBR_DONE_S) +#define PF_FUNC_RID 0x0009E880 +#define PF_FUNC_RID_FUNC_NUM_S 0 +#define PF_FUNC_RID_FUNC_NUM_M ICE_M(0x7, PF_FUNC_RID_FUNC_NUM_S) #endif /* _ICE_HW_AUTOGEN_H_ */ diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index 8b63c25fda8b..953be26054ca 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -26,6 +26,18 @@ MODULE_PARM_DESC(debug, "netif level (0=none,...,16=all), hw debug_mask (0x8XXXX MODULE_PARM_DESC(debug, "netif level (0=none,...,16=all)"); #endif /* !CONFIG_DYNAMIC_DEBUG */ +/** + * ice_set_ctrlq_len - helper function to set controlq length + * @hw: pointer to the hw instance + */ +static void ice_set_ctrlq_len(struct ice_hw *hw) +{ + hw->adminq.num_rq_entries = ICE_AQ_LEN; + hw->adminq.num_sq_entries = ICE_AQ_LEN; + hw->adminq.rq_buf_size = ICE_AQ_MAX_BUF_LEN; + hw->adminq.sq_buf_size = ICE_AQ_MAX_BUF_LEN; +} + /** * ice_probe - Device initialization routine * @pdev: PCI device information struct @@ -81,6 +93,8 @@ static int ice_probe(struct pci_dev *pdev, hw->subsystem_device_id = pdev->subsystem_device; hw->bus.device = PCI_SLOT(pdev->devfn); hw->bus.func = PCI_FUNC(pdev->devfn); + ice_set_ctrlq_len(hw); + pf->msg_enable = netif_msg_init(debug, ICE_DFLT_NETIF_M); #ifndef CONFIG_DYNAMIC_DEBUG @@ -88,7 +102,22 @@ static int ice_probe(struct pci_dev *pdev, hw->debug_mask = debug; #endif + err = ice_init_hw(hw); + if (err) { + dev_err(&pdev->dev, "ice_init_hw failed: %d\n", err); + err = -EIO; + goto err_exit_unroll; + } + + dev_info(&pdev->dev, "firmware %d.%d.%05d api %d.%d\n", + hw->fw_maj_ver, hw->fw_min_ver, hw->fw_build, + hw->api_maj_ver, hw->api_min_ver); + return 0; + +err_exit_unroll: + pci_disable_pcie_error_reporting(pdev); + return err; } /** @@ -103,6 +132,8 @@ static void ice_remove(struct pci_dev *pdev) return; set_bit(__ICE_DOWN, pf->state); + + ice_deinit_hw(&pf->hw); pci_disable_pcie_error_reporting(pdev); } diff --git a/drivers/net/ethernet/intel/ice/ice_nvm.c b/drivers/net/ethernet/intel/ice/ice_nvm.c new file mode 100644 index 000000000000..fa7a69ac92b0 --- /dev/null +++ b/drivers/net/ethernet/intel/ice/ice_nvm.c @@ -0,0 +1,236 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2018, Intel Corporation. */ + +#include "ice_common.h" + +/** + * ice_aq_read_nvm + * @hw: pointer to the hw struct + * @module_typeid: module pointer location in words from the NVM beginning + * @offset: byte offset from the module beginning + * @length: length of the section to be read (in bytes from the offset) + * @data: command buffer (size [bytes] = length) + * @last_command: tells if this is the last command in a series + * @cd: pointer to command details structure or NULL + * + * Read the NVM using the admin queue commands (0x0701) + */ +static enum ice_status +ice_aq_read_nvm(struct ice_hw *hw, u8 module_typeid, u32 offset, u16 length, + void *data, bool last_command, struct ice_sq_cd *cd) +{ + struct ice_aq_desc desc; + struct ice_aqc_nvm *cmd; + + cmd = &desc.params.nvm; + + /* In offset the highest byte must be zeroed. */ + if (offset & 0xFF000000) + return ICE_ERR_PARAM; + + ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_nvm_read); + + /* If this is the last command in a series, set the proper flag. */ + if (last_command) + cmd->cmd_flags |= ICE_AQC_NVM_LAST_CMD; + cmd->module_typeid = module_typeid; + cmd->offset = cpu_to_le32(offset); + cmd->length = cpu_to_le16(length); + + return ice_aq_send_cmd(hw, &desc, data, length, cd); +} + +/** + * ice_check_sr_access_params - verify params for Shadow RAM R/W operations. + * @hw: pointer to the HW structure + * @offset: offset in words from module start + * @words: number of words to access + */ +static enum ice_status +ice_check_sr_access_params(struct ice_hw *hw, u32 offset, u16 words) +{ + if ((offset + words) > hw->nvm.sr_words) { + ice_debug(hw, ICE_DBG_NVM, + "NVM error: offset beyond SR lmt.\n"); + return ICE_ERR_PARAM; + } + + if (words > ICE_SR_SECTOR_SIZE_IN_WORDS) { + /* We can access only up to 4KB (one sector), in one AQ write */ + ice_debug(hw, ICE_DBG_NVM, + "NVM error: tried to access %d words, limit is %d.\n", + words, ICE_SR_SECTOR_SIZE_IN_WORDS); + return ICE_ERR_PARAM; + } + + if (((offset + (words - 1)) / ICE_SR_SECTOR_SIZE_IN_WORDS) != + (offset / ICE_SR_SECTOR_SIZE_IN_WORDS)) { + /* A single access cannot spread over two sectors */ + ice_debug(hw, ICE_DBG_NVM, + "NVM error: cannot spread over two sectors.\n"); + return ICE_ERR_PARAM; + } + + return 0; +} + +/** + * ice_read_sr_aq - Read Shadow RAM. + * @hw: pointer to the HW structure + * @offset: offset in words from module start + * @words: number of words to read + * @data: buffer for words reads from Shadow RAM + * @last_command: tells the AdminQ that this is the last command + * + * Reads 16-bit word buffers from the Shadow RAM using the admin command. + */ +static enum ice_status +ice_read_sr_aq(struct ice_hw *hw, u32 offset, u16 words, u16 *data, + bool last_command) +{ + enum ice_status status; + + status = ice_check_sr_access_params(hw, offset, words); + + /* values in "offset" and "words" parameters are sized as words + * (16 bits) but ice_aq_read_nvm expects these values in bytes. + * So do this conversion while calling ice_aq_read_nvm. + */ + if (!status) + status = ice_aq_read_nvm(hw, 0, 2 * offset, 2 * words, data, + last_command, NULL); + + return status; +} + +/** + * ice_read_sr_word_aq - Reads Shadow RAM via AQ + * @hw: pointer to the HW structure + * @offset: offset of the Shadow RAM word to read (0x000000 - 0x001FFF) + * @data: word read from the Shadow RAM + * + * Reads one 16 bit word from the Shadow RAM using the ice_read_sr_aq method. + */ +static enum ice_status +ice_read_sr_word_aq(struct ice_hw *hw, u16 offset, u16 *data) +{ + enum ice_status status; + + status = ice_read_sr_aq(hw, offset, 1, data, true); + if (!status) + *data = le16_to_cpu(*(__le16 *)data); + + return status; +} + +/** + * ice_acquire_nvm - Generic request for acquiring the NVM ownership + * @hw: pointer to the HW structure + * @access: NVM access type (read or write) + * + * This function will request NVM ownership. + */ +static enum +ice_status ice_acquire_nvm(struct ice_hw *hw, + enum ice_aq_res_access_type access) +{ + if (hw->nvm.blank_nvm_mode) + return 0; + + return ice_acquire_res(hw, ICE_NVM_RES_ID, access); +} + +/** + * ice_release_nvm - Generic request for releasing the NVM ownership + * @hw: pointer to the HW structure + * + * This function will release NVM ownership. + */ +static void ice_release_nvm(struct ice_hw *hw) +{ + if (hw->nvm.blank_nvm_mode) + return; + + ice_release_res(hw, ICE_NVM_RES_ID); +} + +/** + * ice_read_sr_word - Reads Shadow RAM word and acquire NVM if necessary + * @hw: pointer to the HW structure + * @offset: offset of the Shadow RAM word to read (0x000000 - 0x001FFF) + * @data: word read from the Shadow RAM + * + * Reads one 16 bit word from the Shadow RAM using the ice_read_sr_word_aq. + */ +static enum ice_status +ice_read_sr_word(struct ice_hw *hw, u16 offset, u16 *data) +{ + enum ice_status status; + + status = ice_acquire_nvm(hw, ICE_RES_READ); + if (!status) { + status = ice_read_sr_word_aq(hw, offset, data); + ice_release_nvm(hw); + } + + return status; +} + +/** + * ice_init_nvm - initializes NVM setting + * @hw: pointer to the hw struct + * + * This function reads and populates NVM settings such as Shadow RAM size, + * max_timeout, and blank_nvm_mode + */ +enum ice_status ice_init_nvm(struct ice_hw *hw) +{ + struct ice_nvm_info *nvm = &hw->nvm; + u16 eetrack_lo, eetrack_hi; + enum ice_status status = 0; + u32 fla, gens_stat; + u8 sr_size; + + /* The SR size is stored regardless of the nvm programming mode + * as the blank mode may be used in the factory line. + */ + gens_stat = rd32(hw, GLNVM_GENS); + sr_size = (gens_stat & GLNVM_GENS_SR_SIZE_M) >> GLNVM_GENS_SR_SIZE_S; + + /* Switching to words (sr_size contains power of 2) */ + nvm->sr_words = BIT(sr_size) * ICE_SR_WORDS_IN_1KB; + + /* Check if we are in the normal or blank NVM programming mode */ + fla = rd32(hw, GLNVM_FLA); + if (fla & GLNVM_FLA_LOCKED_M) { /* Normal programming mode */ + nvm->blank_nvm_mode = false; + } else { /* Blank programming mode */ + nvm->blank_nvm_mode = true; + status = ICE_ERR_NVM_BLANK_MODE; + ice_debug(hw, ICE_DBG_NVM, + "NVM init error: unsupported blank mode.\n"); + return status; + } + + status = ice_read_sr_word(hw, ICE_SR_NVM_DEV_STARTER_VER, &hw->nvm.ver); + if (status) { + ice_debug(hw, ICE_DBG_INIT, + "Failed to read DEV starter version.\n"); + return status; + } + + status = ice_read_sr_word(hw, ICE_SR_NVM_EETRACK_LO, &eetrack_lo); + if (status) { + ice_debug(hw, ICE_DBG_INIT, "Failed to read EETRACK lo.\n"); + return status; + } + status = ice_read_sr_word(hw, ICE_SR_NVM_EETRACK_HI, &eetrack_hi); + if (status) { + ice_debug(hw, ICE_DBG_INIT, "Failed to read EETRACK hi.\n"); + return status; + } + + hw->nvm.eetrack = (eetrack_hi << 16) | eetrack_lo; + + return status; +} diff --git a/drivers/net/ethernet/intel/ice/ice_osdep.h b/drivers/net/ethernet/intel/ice/ice_osdep.h index 5c992aa1f150..f57c414bc0a9 100644 --- a/drivers/net/ethernet/intel/ice/ice_osdep.h +++ b/drivers/net/ethernet/intel/ice/ice_osdep.h @@ -15,6 +15,7 @@ #define wr64(a, reg, value) writeq((value), ((a)->hw_addr + (reg))) #define rd64(a, reg) readq((a)->hw_addr + (reg)) +#define ice_flush(a) rd32((a), GLGEN_STAT) #define ICE_M(m, s) ((m) << (s)) struct ice_dma_mem { diff --git a/drivers/net/ethernet/intel/ice/ice_status.h b/drivers/net/ethernet/intel/ice/ice_status.h index 978c6dff3c71..7e8049efac1c 100644 --- a/drivers/net/ethernet/intel/ice/ice_status.h +++ b/drivers/net/ethernet/intel/ice/ice_status.h @@ -9,12 +9,17 @@ enum ice_status { ICE_ERR_PARAM = -1, ICE_ERR_NOT_READY = -3, ICE_ERR_INVAL_SIZE = -6, + ICE_ERR_DEVICE_NOT_SUPPORTED = -8, + ICE_ERR_RESET_FAILED = -9, ICE_ERR_FW_API_VER = -10, ICE_ERR_NO_MEMORY = -11, ICE_ERR_CFG = -12, + ICE_ERR_OUT_OF_RANGE = -13, + ICE_ERR_NVM_BLANK_MODE = -53, ICE_ERR_AQ_ERROR = -100, ICE_ERR_AQ_TIMEOUT = -101, ICE_ERR_AQ_FULL = -102, + ICE_ERR_AQ_NO_WORK = -103, ICE_ERR_AQ_EMPTY = -104, }; diff --git a/drivers/net/ethernet/intel/ice/ice_type.h b/drivers/net/ethernet/intel/ice/ice_type.h index 212ab6cc0a42..31be38369a48 100644 --- a/drivers/net/ethernet/intel/ice/ice_type.h +++ b/drivers/net/ethernet/intel/ice/ice_type.h @@ -10,20 +10,58 @@ #include "ice_controlq.h" /* debug masks - set these bits in hw->debug_mask to control output */ +#define ICE_DBG_INIT BIT_ULL(1) +#define ICE_DBG_NVM BIT_ULL(7) +#define ICE_DBG_RES BIT_ULL(17) #define ICE_DBG_AQ_MSG BIT_ULL(24) #define ICE_DBG_AQ_CMD BIT_ULL(27) +enum ice_aq_res_ids { + ICE_NVM_RES_ID = 1, + ICE_SPD_RES_ID, + ICE_GLOBAL_CFG_LOCK_RES_ID, + ICE_CHANGE_LOCK_RES_ID +}; + +enum ice_aq_res_access_type { + ICE_RES_READ = 1, + ICE_RES_WRITE +}; + +/* Various MAC types */ +enum ice_mac_type { + ICE_MAC_UNKNOWN = 0, + ICE_MAC_GENERIC, +}; + +/* Various RESET request, These are not tied with HW reset types */ +enum ice_reset_req { + ICE_RESET_PFR = 0, + ICE_RESET_CORER = 1, + ICE_RESET_GLOBR = 2, +}; + /* Bus parameters */ struct ice_bus_info { u16 device; u8 func; }; +/* NVM Information */ +struct ice_nvm_info { + u32 eetrack; /* NVM data version */ + u32 oem_ver; /* OEM version info */ + u16 sr_words; /* Shadow RAM size in words */ + u16 ver; /* NVM package version */ + bool blank_nvm_mode; /* is NVM empty (no FW present) */ +}; + /* Port hardware description */ struct ice_hw { u8 __iomem *hw_addr; void *back; u64 debug_mask; /* bitmap for debug mask */ + enum ice_mac_type mac_type; /* pci info */ u16 device_id; @@ -32,7 +70,11 @@ struct ice_hw { u16 subsystem_vendor_id; u8 revision_id; + u8 pf_id; /* device profile info */ + struct ice_bus_info bus; + struct ice_nvm_info nvm; + /* Control Queue info */ struct ice_ctl_q_info adminq; @@ -47,4 +89,11 @@ struct ice_hw { u32 fw_build; /* firmware build number */ }; +/* Checksum and Shadow RAM pointers */ +#define ICE_SR_NVM_DEV_STARTER_VER 0x18 +#define ICE_SR_NVM_EETRACK_LO 0x2D +#define ICE_SR_NVM_EETRACK_HI 0x2E +#define ICE_SR_SECTOR_SIZE_IN_WORDS 0x800 +#define ICE_SR_WORDS_IN_1KB 512 + #endif /* _ICE_TYPE_H_ */ From 9c20346b6309e20f64ee8e7054914ddc92c60baf Mon Sep 17 00:00:00 2001 From: Anirudh Venkataramanan Date: Tue, 20 Mar 2018 07:58:08 -0700 Subject: [PATCH 04/15] ice: Get switch config, scheduler config and device capabilities This patch adds to the initialization flow by getting switch configuration, scheduler configuration and device capabilities. Switch configuration: On boot, an L2 switch element is created in the firmware per physical function. Each physical function is also mapped to a port, to which its switch element is connected. In other words, this switch can be visualized as an embedded vSwitch that can connect a physical function's virtual station interfaces (VSIs) to the egress/ingress port. Egress/ingress filters will be eventually created and applied on this switch element. As part of the initialization flow, the driver gets configuration data from this switch element and stores it. Scheduler configuration: The Tx scheduler is a subsystem responsible for setting and enforcing QoS. As part of the initialization flow, the driver queries and stores the default scheduler configuration for the given physical function. Device capabilities: As part of initialization, the driver has to determine what the device is capable of (ex. max queues, VSIs, etc). This information is obtained from the firmware and stored by the driver. CC: Shannon Nelson Signed-off-by: Anirudh Venkataramanan Acked-by: Shannon Nelson Tested-by: Tony Brelinski Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/ice/Makefile | 4 +- drivers/net/ethernet/intel/ice/ice.h | 2 + .../net/ethernet/intel/ice/ice_adminq_cmd.h | 209 +++++++++++ drivers/net/ethernet/intel/ice/ice_common.c | 231 ++++++++++++ drivers/net/ethernet/intel/ice/ice_common.h | 2 + drivers/net/ethernet/intel/ice/ice_sched.c | 340 ++++++++++++++++++ drivers/net/ethernet/intel/ice/ice_sched.h | 28 ++ drivers/net/ethernet/intel/ice/ice_switch.c | 144 ++++++++ drivers/net/ethernet/intel/ice/ice_switch.h | 14 + drivers/net/ethernet/intel/ice/ice_type.h | 109 ++++++ 10 files changed, 1082 insertions(+), 1 deletion(-) create mode 100644 drivers/net/ethernet/intel/ice/ice_sched.c create mode 100644 drivers/net/ethernet/intel/ice/ice_sched.h create mode 100644 drivers/net/ethernet/intel/ice/ice_switch.c create mode 100644 drivers/net/ethernet/intel/ice/ice_switch.h diff --git a/drivers/net/ethernet/intel/ice/Makefile b/drivers/net/ethernet/intel/ice/Makefile index b9a32ddecb17..bd2cbe14e76e 100644 --- a/drivers/net/ethernet/intel/ice/Makefile +++ b/drivers/net/ethernet/intel/ice/Makefile @@ -10,4 +10,6 @@ obj-$(CONFIG_ICE) += ice.o ice-y := ice_main.o \ ice_controlq.o \ ice_common.o \ - ice_nvm.o + ice_nvm.o \ + ice_switch.o \ + ice_sched.o diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h index 4c4f161768ed..716c555532f4 100644 --- a/drivers/net/ethernet/intel/ice/ice.h +++ b/drivers/net/ethernet/intel/ice/ice.h @@ -16,7 +16,9 @@ #include #include "ice_devids.h" #include "ice_type.h" +#include "ice_switch.h" #include "ice_common.h" +#include "ice_sched.h" #define ICE_BAR0 0 #define ICE_AQ_LEN 64 diff --git a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h index 70fe00c90329..d1b70f0ed0e2 100644 --- a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h +++ b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h @@ -8,6 +8,8 @@ * descriptor format. It is shared between Firmware and Software. */ +#define ICE_AQC_TOPO_MAX_LEVEL_NUM 0x9 + struct ice_aqc_generic { __le32 param0; __le32 param1; @@ -68,6 +70,40 @@ struct ice_aqc_req_res { u8 reserved[2]; }; +/* Get function capabilities (indirect 0x000A) + * Get device capabilities (indirect 0x000B) + */ +struct ice_aqc_list_caps { + u8 cmd_flags; + u8 pf_index; + u8 reserved[2]; + __le32 count; + __le32 addr_high; + __le32 addr_low; +}; + +/* Device/Function buffer entry, repeated per reported capability */ +struct ice_aqc_list_caps_elem { + __le16 cap; +#define ICE_AQC_CAPS_VSI 0x0017 +#define ICE_AQC_CAPS_RSS 0x0040 +#define ICE_AQC_CAPS_RXQS 0x0041 +#define ICE_AQC_CAPS_TXQS 0x0042 +#define ICE_AQC_CAPS_MSIX 0x0043 +#define ICE_AQC_CAPS_MAX_MTU 0x0047 + + u8 major_ver; + u8 minor_ver; + /* Number of resources described by this capability */ + __le32 number; + /* Only meaningful for some types of resources */ + __le32 logical_id; + /* Only meaningful for some types of resources */ + __le32 phys_id; + __le64 rsvd1; + __le64 rsvd2; +}; + /* Clear PXE Command and response (direct 0x0110) */ struct ice_aqc_clear_pxe { u8 rx_cnt; @@ -75,6 +111,161 @@ struct ice_aqc_clear_pxe { u8 reserved[15]; }; +/* Get switch configuration (0x0200) */ +struct ice_aqc_get_sw_cfg { + /* Reserved for command and copy of request flags for response */ + __le16 flags; + /* First desc in case of command and next_elem in case of response + * In case of response, if it is not zero, means all the configuration + * was not returned and new command shall be sent with this value in + * the 'first desc' field + */ + __le16 element; + /* Reserved for command, only used for response */ + __le16 num_elems; + __le16 rsvd; + __le32 addr_high; + __le32 addr_low; +}; + +/* Each entry in the response buffer is of the following type: */ +struct ice_aqc_get_sw_cfg_resp_elem { + /* VSI/Port Number */ + __le16 vsi_port_num; +#define ICE_AQC_GET_SW_CONF_RESP_VSI_PORT_NUM_S 0 +#define ICE_AQC_GET_SW_CONF_RESP_VSI_PORT_NUM_M \ + (0x3FF << ICE_AQC_GET_SW_CONF_RESP_VSI_PORT_NUM_S) +#define ICE_AQC_GET_SW_CONF_RESP_TYPE_S 14 +#define ICE_AQC_GET_SW_CONF_RESP_TYPE_M (0x3 << ICE_AQC_GET_SW_CONF_RESP_TYPE_S) +#define ICE_AQC_GET_SW_CONF_RESP_PHYS_PORT 0 +#define ICE_AQC_GET_SW_CONF_RESP_VIRT_PORT 1 +#define ICE_AQC_GET_SW_CONF_RESP_VSI 2 + + /* SWID VSI/Port belongs to */ + __le16 swid; + + /* Bit 14..0 : PF/VF number VSI belongs to + * Bit 15 : VF indication bit + */ + __le16 pf_vf_num; +#define ICE_AQC_GET_SW_CONF_RESP_FUNC_NUM_S 0 +#define ICE_AQC_GET_SW_CONF_RESP_FUNC_NUM_M \ + (0x7FFF << ICE_AQC_GET_SW_CONF_RESP_FUNC_NUM_S) +#define ICE_AQC_GET_SW_CONF_RESP_IS_VF BIT(15) +}; + +/* The response buffer is as follows. Note that the length of the + * elements array varies with the length of the command response. + */ +struct ice_aqc_get_sw_cfg_resp { + struct ice_aqc_get_sw_cfg_resp_elem elements[1]; +}; + +/* Add TSE (indirect 0x0401) + * Delete TSE (indirect 0x040F) + * Move TSE (indirect 0x0408) + */ +struct ice_aqc_add_move_delete_elem { + __le16 num_grps_req; + __le16 num_grps_updated; + __le32 reserved; + __le32 addr_high; + __le32 addr_low; +}; + +struct ice_aqc_elem_info_bw { + __le16 bw_profile_idx; + __le16 bw_alloc; +}; + +struct ice_aqc_txsched_elem { + u8 elem_type; /* Special field, reserved for some aq calls */ +#define ICE_AQC_ELEM_TYPE_UNDEFINED 0x0 +#define ICE_AQC_ELEM_TYPE_ROOT_PORT 0x1 +#define ICE_AQC_ELEM_TYPE_TC 0x2 +#define ICE_AQC_ELEM_TYPE_SE_GENERIC 0x3 +#define ICE_AQC_ELEM_TYPE_ENTRY_POINT 0x4 +#define ICE_AQC_ELEM_TYPE_LEAF 0x5 +#define ICE_AQC_ELEM_TYPE_SE_PADDED 0x6 + u8 valid_sections; +#define ICE_AQC_ELEM_VALID_GENERIC BIT(0) +#define ICE_AQC_ELEM_VALID_CIR BIT(1) +#define ICE_AQC_ELEM_VALID_EIR BIT(2) +#define ICE_AQC_ELEM_VALID_SHARED BIT(3) + u8 generic; +#define ICE_AQC_ELEM_GENERIC_MODE_M 0x1 +#define ICE_AQC_ELEM_GENERIC_PRIO_S 0x1 +#define ICE_AQC_ELEM_GENERIC_PRIO_M (0x7 << ICE_AQC_ELEM_GENERIC_PRIO_S) +#define ICE_AQC_ELEM_GENERIC_SP_S 0x4 +#define ICE_AQC_ELEM_GENERIC_SP_M (0x1 << ICE_AQC_ELEM_GENERIC_SP_S) +#define ICE_AQC_ELEM_GENERIC_ADJUST_VAL_S 0x5 +#define ICE_AQC_ELEM_GENERIC_ADJUST_VAL_M \ + (0x3 << ICE_AQC_ELEM_GENERIC_ADJUST_VAL_S) + u8 flags; /* Special field, reserved for some aq calls */ +#define ICE_AQC_ELEM_FLAG_SUSPEND_M 0x1 + struct ice_aqc_elem_info_bw cir_bw; + struct ice_aqc_elem_info_bw eir_bw; + __le16 srl_id; + __le16 reserved2; +}; + +struct ice_aqc_txsched_elem_data { + __le32 parent_teid; + __le32 node_teid; + struct ice_aqc_txsched_elem data; +}; + +struct ice_aqc_txsched_topo_grp_info_hdr { + __le32 parent_teid; + __le16 num_elems; + __le16 reserved2; +}; + +struct ice_aqc_delete_elem { + struct ice_aqc_txsched_topo_grp_info_hdr hdr; + __le32 teid[1]; +}; + +/* Query Scheduler Resource Allocation (indirect 0x0412) + * This indirect command retrieves the scheduler resources allocated by + * EMP Firmware to the given PF. + */ +struct ice_aqc_query_txsched_res { + u8 reserved[8]; + __le32 addr_high; + __le32 addr_low; +}; + +struct ice_aqc_generic_sched_props { + __le16 phys_levels; + __le16 logical_levels; + u8 flattening_bitmap; + u8 max_device_cgds; + u8 max_pf_cgds; + u8 rsvd0; + __le16 rdma_qsets; + u8 rsvd1[22]; +}; + +struct ice_aqc_layer_props { + u8 logical_layer; + u8 chunk_size; + __le16 max_device_nodes; + __le16 max_pf_nodes; + u8 rsvd0[2]; + __le16 max_shared_rate_lmtr; + __le16 max_children; + __le16 max_cir_rl_profiles; + __le16 max_eir_rl_profiles; + __le16 max_srl_profiles; + u8 rsvd1[14]; +}; + +struct ice_aqc_query_txsched_res_resp { + struct ice_aqc_generic_sched_props sched_props; + struct ice_aqc_layer_props layer_props[ICE_AQC_TOPO_MAX_LEVEL_NUM]; +}; + /* NVM Read command (indirect 0x0701) * NVM Erase commands (direct 0x0702) * NVM Update commands (indirect 0x0703) @@ -128,6 +319,10 @@ struct ice_aq_desc { struct ice_aqc_q_shutdown q_shutdown; struct ice_aqc_req_res res_owner; struct ice_aqc_clear_pxe clear_pxe; + struct ice_aqc_list_caps get_cap; + struct ice_aqc_get_sw_cfg get_sw_conf; + struct ice_aqc_query_txsched_res query_sched_res; + struct ice_aqc_add_move_delete_elem add_move_delete_elem; struct ice_aqc_nvm nvm; } params; }; @@ -136,16 +331,19 @@ struct ice_aq_desc { #define ICE_AQ_LG_BUF 512 #define ICE_AQ_FLAG_LB_S 9 +#define ICE_AQ_FLAG_RD_S 10 #define ICE_AQ_FLAG_BUF_S 12 #define ICE_AQ_FLAG_SI_S 13 #define ICE_AQ_FLAG_LB BIT(ICE_AQ_FLAG_LB_S) /* 0x200 */ +#define ICE_AQ_FLAG_RD BIT(ICE_AQ_FLAG_RD_S) /* 0x400 */ #define ICE_AQ_FLAG_BUF BIT(ICE_AQ_FLAG_BUF_S) /* 0x1000 */ #define ICE_AQ_FLAG_SI BIT(ICE_AQ_FLAG_SI_S) /* 0x2000 */ /* error codes */ enum ice_aq_err { ICE_AQ_RC_OK = 0, /* success */ + ICE_AQ_RC_ENOMEM = 9, /* Out of memory */ ICE_AQ_RC_EBUSY = 12, /* Device or resource busy */ ICE_AQ_RC_EEXIST = 13, /* object already exists */ }; @@ -160,11 +358,22 @@ enum ice_adminq_opc { ice_aqc_opc_req_res = 0x0008, ice_aqc_opc_release_res = 0x0009, + /* device/function capabilities */ + ice_aqc_opc_list_func_caps = 0x000A, + ice_aqc_opc_list_dev_caps = 0x000B, + /* PXE */ ice_aqc_opc_clear_pxe_mode = 0x0110, + /* internal switch commands */ + ice_aqc_opc_get_sw_cfg = 0x0200, + ice_aqc_opc_clear_pf_cfg = 0x02A4, + /* transmit scheduler commands */ + ice_aqc_opc_delete_sched_elems = 0x040F, + ice_aqc_opc_query_sched_res = 0x0412, + /* NVM commands */ ice_aqc_opc_nvm_read = 0x0701, diff --git a/drivers/net/ethernet/intel/ice/ice_common.c b/drivers/net/ethernet/intel/ice/ice_common.c index d3d420c3ba7b..f9567dc1aefd 100644 --- a/drivers/net/ethernet/intel/ice/ice_common.c +++ b/drivers/net/ethernet/intel/ice/ice_common.c @@ -2,6 +2,7 @@ /* Copyright (c) 2018, Intel Corporation. */ #include "ice_common.h" +#include "ice_sched.h" #include "ice_adminq_cmd.h" #define ICE_PF_RESET_WAIT_COUNT 200 @@ -70,8 +71,37 @@ enum ice_status ice_init_hw(struct ice_hw *hw) if (status) goto err_unroll_cqinit; + status = ice_get_caps(hw); + if (status) + goto err_unroll_cqinit; + + hw->port_info = devm_kzalloc(ice_hw_to_dev(hw), + sizeof(*hw->port_info), GFP_KERNEL); + if (!hw->port_info) { + status = ICE_ERR_NO_MEMORY; + goto err_unroll_cqinit; + } + + /* set the back pointer to hw */ + hw->port_info->hw = hw; + + /* Initialize port_info struct with switch configuration data */ + status = ice_get_initial_sw_cfg(hw); + if (status) + goto err_unroll_alloc; + + /* Query the allocated resources for tx scheduler */ + status = ice_sched_query_res_alloc(hw); + if (status) { + ice_debug(hw, ICE_DBG_SCHED, + "Failed to get scheduler allocated resources\n"); + goto err_unroll_alloc; + } + return 0; +err_unroll_alloc: + devm_kfree(ice_hw_to_dev(hw), hw->port_info); err_unroll_cqinit: ice_shutdown_all_ctrlq(hw); return status; @@ -83,7 +113,12 @@ enum ice_status ice_init_hw(struct ice_hw *hw) */ void ice_deinit_hw(struct ice_hw *hw) { + ice_sched_cleanup_all(hw); ice_shutdown_all_ctrlq(hw); + if (hw->port_info) { + devm_kfree(ice_hw_to_dev(hw), hw->port_info); + hw->port_info = NULL; + } } /** @@ -505,6 +540,202 @@ void ice_release_res(struct ice_hw *hw, enum ice_aq_res_ids res) } } +/** + * ice_parse_caps - parse function/device capabilities + * @hw: pointer to the hw struct + * @buf: pointer to a buffer containing function/device capability records + * @cap_count: number of capability records in the list + * @opc: type of capabilities list to parse + * + * Helper function to parse function(0x000a)/device(0x000b) capabilities list. + */ +static void +ice_parse_caps(struct ice_hw *hw, void *buf, u32 cap_count, + enum ice_adminq_opc opc) +{ + struct ice_aqc_list_caps_elem *cap_resp; + struct ice_hw_func_caps *func_p = NULL; + struct ice_hw_dev_caps *dev_p = NULL; + struct ice_hw_common_caps *caps; + u32 i; + + if (!buf) + return; + + cap_resp = (struct ice_aqc_list_caps_elem *)buf; + + if (opc == ice_aqc_opc_list_dev_caps) { + dev_p = &hw->dev_caps; + caps = &dev_p->common_cap; + } else if (opc == ice_aqc_opc_list_func_caps) { + func_p = &hw->func_caps; + caps = &func_p->common_cap; + } else { + ice_debug(hw, ICE_DBG_INIT, "wrong opcode\n"); + return; + } + + for (i = 0; caps && i < cap_count; i++, cap_resp++) { + u32 logical_id = le32_to_cpu(cap_resp->logical_id); + u32 phys_id = le32_to_cpu(cap_resp->phys_id); + u32 number = le32_to_cpu(cap_resp->number); + u16 cap = le16_to_cpu(cap_resp->cap); + + switch (cap) { + case ICE_AQC_CAPS_VSI: + if (dev_p) { + dev_p->num_vsi_allocd_to_host = number; + ice_debug(hw, ICE_DBG_INIT, + "HW caps: Dev.VSI cnt = %d\n", + dev_p->num_vsi_allocd_to_host); + } else if (func_p) { + func_p->guaranteed_num_vsi = number; + ice_debug(hw, ICE_DBG_INIT, + "HW caps: Func.VSI cnt = %d\n", + func_p->guaranteed_num_vsi); + } + break; + case ICE_AQC_CAPS_RSS: + caps->rss_table_size = number; + caps->rss_table_entry_width = logical_id; + ice_debug(hw, ICE_DBG_INIT, + "HW caps: RSS table size = %d\n", + caps->rss_table_size); + ice_debug(hw, ICE_DBG_INIT, + "HW caps: RSS table width = %d\n", + caps->rss_table_entry_width); + break; + case ICE_AQC_CAPS_RXQS: + caps->num_rxq = number; + caps->rxq_first_id = phys_id; + ice_debug(hw, ICE_DBG_INIT, + "HW caps: Num Rx Qs = %d\n", caps->num_rxq); + ice_debug(hw, ICE_DBG_INIT, + "HW caps: Rx first queue ID = %d\n", + caps->rxq_first_id); + break; + case ICE_AQC_CAPS_TXQS: + caps->num_txq = number; + caps->txq_first_id = phys_id; + ice_debug(hw, ICE_DBG_INIT, + "HW caps: Num Tx Qs = %d\n", caps->num_txq); + ice_debug(hw, ICE_DBG_INIT, + "HW caps: Tx first queue ID = %d\n", + caps->txq_first_id); + break; + case ICE_AQC_CAPS_MSIX: + caps->num_msix_vectors = number; + caps->msix_vector_first_id = phys_id; + ice_debug(hw, ICE_DBG_INIT, + "HW caps: MSIX vector count = %d\n", + caps->num_msix_vectors); + ice_debug(hw, ICE_DBG_INIT, + "HW caps: MSIX first vector index = %d\n", + caps->msix_vector_first_id); + break; + case ICE_AQC_CAPS_MAX_MTU: + caps->max_mtu = number; + if (dev_p) + ice_debug(hw, ICE_DBG_INIT, + "HW caps: Dev.MaxMTU = %d\n", + caps->max_mtu); + else if (func_p) + ice_debug(hw, ICE_DBG_INIT, + "HW caps: func.MaxMTU = %d\n", + caps->max_mtu); + break; + default: + ice_debug(hw, ICE_DBG_INIT, + "HW caps: Unknown capability[%d]: 0x%x\n", i, + cap); + break; + } + } +} + +/** + * ice_aq_discover_caps - query function/device capabilities + * @hw: pointer to the hw struct + * @buf: a virtual buffer to hold the capabilities + * @buf_size: Size of the virtual buffer + * @data_size: Size of the returned data, or buf size needed if AQ err==ENOMEM + * @opc: capabilities type to discover - pass in the command opcode + * @cd: pointer to command details structure or NULL + * + * Get the function(0x000a)/device(0x000b) capabilities description from + * the firmware. + */ +static enum ice_status +ice_aq_discover_caps(struct ice_hw *hw, void *buf, u16 buf_size, u16 *data_size, + enum ice_adminq_opc opc, struct ice_sq_cd *cd) +{ + struct ice_aqc_list_caps *cmd; + struct ice_aq_desc desc; + enum ice_status status; + + cmd = &desc.params.get_cap; + + if (opc != ice_aqc_opc_list_func_caps && + opc != ice_aqc_opc_list_dev_caps) + return ICE_ERR_PARAM; + + ice_fill_dflt_direct_cmd_desc(&desc, opc); + + status = ice_aq_send_cmd(hw, &desc, buf, buf_size, cd); + if (!status) + ice_parse_caps(hw, buf, le32_to_cpu(cmd->count), opc); + *data_size = le16_to_cpu(desc.datalen); + + return status; +} + +/** + * ice_get_caps - get info about the HW + * @hw: pointer to the hardware structure + */ +enum ice_status ice_get_caps(struct ice_hw *hw) +{ + enum ice_status status; + u16 data_size = 0; + u16 cbuf_len; + u8 retries; + + /* The driver doesn't know how many capabilities the device will return + * so the buffer size required isn't known ahead of time. The driver + * starts with cbuf_len and if this turns out to be insufficient, the + * device returns ICE_AQ_RC_ENOMEM and also the buffer size it needs. + * The driver then allocates the buffer of this size and retries the + * operation. So it follows that the retry count is 2. + */ +#define ICE_GET_CAP_BUF_COUNT 40 +#define ICE_GET_CAP_RETRY_COUNT 2 + + cbuf_len = ICE_GET_CAP_BUF_COUNT * + sizeof(struct ice_aqc_list_caps_elem); + + retries = ICE_GET_CAP_RETRY_COUNT; + + do { + void *cbuf; + + cbuf = devm_kzalloc(ice_hw_to_dev(hw), cbuf_len, GFP_KERNEL); + if (!cbuf) + return ICE_ERR_NO_MEMORY; + + status = ice_aq_discover_caps(hw, cbuf, cbuf_len, &data_size, + ice_aqc_opc_list_func_caps, NULL); + devm_kfree(ice_hw_to_dev(hw), cbuf); + + if (!status || hw->adminq.sq_last_status != ICE_AQ_RC_ENOMEM) + break; + + /* If ENOMEM is returned, try again with bigger buffer */ + cbuf_len = data_size; + } while (--retries); + + return status; +} + /** * ice_aq_clear_pxe_mode * @hw: pointer to the hw struct diff --git a/drivers/net/ethernet/intel/ice/ice_common.h b/drivers/net/ethernet/intel/ice/ice_common.h index b1a7c5afe86b..87d873493bdd 100644 --- a/drivers/net/ethernet/intel/ice/ice_common.h +++ b/drivers/net/ethernet/intel/ice/ice_common.h @@ -6,6 +6,7 @@ #include "ice.h" #include "ice_type.h" +#include "ice_switch.h" void ice_debug_cq(struct ice_hw *hw, u32 mask, void *desc, void *buf, u16 buf_len); @@ -25,6 +26,7 @@ ice_sq_send_cmd(struct ice_hw *hw, struct ice_ctl_q_info *cq, struct ice_aq_desc *desc, void *buf, u16 buf_size, struct ice_sq_cd *cd); void ice_clear_pxe_mode(struct ice_hw *hw); +enum ice_status ice_get_caps(struct ice_hw *hw); bool ice_check_sq_alive(struct ice_hw *hw, struct ice_ctl_q_info *cq); enum ice_status ice_aq_q_shutdown(struct ice_hw *hw, bool unloading); void ice_fill_dflt_direct_cmd_desc(struct ice_aq_desc *desc, u16 opcode); diff --git a/drivers/net/ethernet/intel/ice/ice_sched.c b/drivers/net/ethernet/intel/ice/ice_sched.c new file mode 100644 index 000000000000..ce4edf61ec8e --- /dev/null +++ b/drivers/net/ethernet/intel/ice/ice_sched.c @@ -0,0 +1,340 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2018, Intel Corporation. */ + +#include "ice_sched.h" + +/** + * ice_aq_delete_sched_elems - delete scheduler elements + * @hw: pointer to the hw struct + * @grps_req: number of groups to delete + * @buf: pointer to buffer + * @buf_size: buffer size in bytes + * @grps_del: returns total number of elements deleted + * @cd: pointer to command details structure or NULL + * + * Delete scheduling elements (0x040F) + */ +static enum ice_status +ice_aq_delete_sched_elems(struct ice_hw *hw, u16 grps_req, + struct ice_aqc_delete_elem *buf, u16 buf_size, + u16 *grps_del, struct ice_sq_cd *cd) +{ + struct ice_aqc_add_move_delete_elem *cmd; + struct ice_aq_desc desc; + enum ice_status status; + + cmd = &desc.params.add_move_delete_elem; + ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_delete_sched_elems); + desc.flags |= cpu_to_le16(ICE_AQ_FLAG_RD); + cmd->num_grps_req = cpu_to_le16(grps_req); + + status = ice_aq_send_cmd(hw, &desc, buf, buf_size, cd); + if (!status && grps_del) + *grps_del = le16_to_cpu(cmd->num_grps_updated); + + return status; +} + +/** + * ice_sched_remove_elems - remove nodes from hw + * @hw: pointer to the hw struct + * @parent: pointer to the parent node + * @num_nodes: number of nodes + * @node_teids: array of node teids to be deleted + * + * This function remove nodes from hw + */ +static enum ice_status +ice_sched_remove_elems(struct ice_hw *hw, struct ice_sched_node *parent, + u16 num_nodes, u32 *node_teids) +{ + struct ice_aqc_delete_elem *buf; + u16 i, num_groups_removed = 0; + enum ice_status status; + u16 buf_size; + + buf_size = sizeof(*buf) + sizeof(u32) * (num_nodes - 1); + buf = devm_kzalloc(ice_hw_to_dev(hw), buf_size, GFP_KERNEL); + if (!buf) + return ICE_ERR_NO_MEMORY; + buf->hdr.parent_teid = parent->info.node_teid; + buf->hdr.num_elems = cpu_to_le16(num_nodes); + for (i = 0; i < num_nodes; i++) + buf->teid[i] = cpu_to_le32(node_teids[i]); + status = ice_aq_delete_sched_elems(hw, 1, buf, buf_size, + &num_groups_removed, NULL); + if (status || num_groups_removed != 1) + ice_debug(hw, ICE_DBG_SCHED, "remove elements failed\n"); + devm_kfree(ice_hw_to_dev(hw), buf); + return status; +} + +/** + * ice_sched_get_first_node - get the first node of the given layer + * @hw: pointer to the hw struct + * @parent: pointer the base node of the subtree + * @layer: layer number + * + * This function retrieves the first node of the given layer from the subtree + */ +static struct ice_sched_node * +ice_sched_get_first_node(struct ice_hw *hw, struct ice_sched_node *parent, + u8 layer) +{ + u8 i; + + if (layer < hw->sw_entry_point_layer) + return NULL; + for (i = 0; i < parent->num_children; i++) { + struct ice_sched_node *node = parent->children[i]; + + if (node) { + if (node->tx_sched_layer == layer) + return node; + /* this recursion is intentional, and wouldn't + * go more than 9 calls + */ + return ice_sched_get_first_node(hw, node, layer); + } + } + return NULL; +} + +/** + * ice_sched_get_tc_node - get pointer to TC node + * @pi: port information structure + * @tc: TC number + * + * This function returns the TC node pointer + */ +struct ice_sched_node *ice_sched_get_tc_node(struct ice_port_info *pi, u8 tc) +{ + u8 i; + + if (!pi) + return NULL; + for (i = 0; i < pi->root->num_children; i++) + if (pi->root->children[i]->tc_num == tc) + return pi->root->children[i]; + return NULL; +} + +/** + * ice_free_sched_node - Free a Tx scheduler node from SW DB + * @pi: port information structure + * @node: pointer to the ice_sched_node struct + * + * This function frees up a node from SW DB as well as from HW + * + * This function needs to be called with the port_info->sched_lock held + */ +void ice_free_sched_node(struct ice_port_info *pi, struct ice_sched_node *node) +{ + struct ice_sched_node *parent; + struct ice_hw *hw = pi->hw; + u8 i, j; + + /* Free the children before freeing up the parent node + * The parent array is updated below and that shifts the nodes + * in the array. So always pick the first child if num children > 0 + */ + while (node->num_children) + ice_free_sched_node(pi, node->children[0]); + + /* Leaf, TC and root nodes can't be deleted by SW */ + if (node->tx_sched_layer >= hw->sw_entry_point_layer && + node->info.data.elem_type != ICE_AQC_ELEM_TYPE_TC && + node->info.data.elem_type != ICE_AQC_ELEM_TYPE_ROOT_PORT && + node->info.data.elem_type != ICE_AQC_ELEM_TYPE_LEAF) { + u32 teid = le32_to_cpu(node->info.node_teid); + enum ice_status status; + + status = ice_sched_remove_elems(hw, node->parent, 1, &teid); + if (status) + ice_debug(hw, ICE_DBG_SCHED, + "remove element failed %d\n", status); + } + parent = node->parent; + /* root has no parent */ + if (parent) { + struct ice_sched_node *p, *tc_node; + + /* update the parent */ + for (i = 0; i < parent->num_children; i++) + if (parent->children[i] == node) { + for (j = i + 1; j < parent->num_children; j++) + parent->children[j - 1] = + parent->children[j]; + parent->num_children--; + break; + } + + /* search for previous sibling that points to this node and + * remove the reference + */ + tc_node = ice_sched_get_tc_node(pi, node->tc_num); + if (!tc_node) { + ice_debug(hw, ICE_DBG_SCHED, + "Invalid TC number %d\n", node->tc_num); + goto err_exit; + } + p = ice_sched_get_first_node(hw, tc_node, node->tx_sched_layer); + while (p) { + if (p->sibling == node) { + p->sibling = node->sibling; + break; + } + p = p->sibling; + } + } +err_exit: + /* leaf nodes have no children */ + if (node->children) + devm_kfree(ice_hw_to_dev(hw), node->children); + devm_kfree(ice_hw_to_dev(hw), node); +} + +/** + * ice_aq_query_sched_res - query scheduler resource + * @hw: pointer to the hw struct + * @buf_size: buffer size in bytes + * @buf: pointer to buffer + * @cd: pointer to command details structure or NULL + * + * Query scheduler resource allocation (0x0412) + */ +static enum ice_status +ice_aq_query_sched_res(struct ice_hw *hw, u16 buf_size, + struct ice_aqc_query_txsched_res_resp *buf, + struct ice_sq_cd *cd) +{ + struct ice_aq_desc desc; + + ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_query_sched_res); + return ice_aq_send_cmd(hw, &desc, buf, buf_size, cd); +} + +/** + * ice_sched_clear_tx_topo - clears the schduler tree nodes + * @pi: port information structure + * + * This function removes all the nodes from HW as well as from SW DB. + */ +static void ice_sched_clear_tx_topo(struct ice_port_info *pi) +{ + struct ice_sched_agg_info *agg_info; + struct ice_sched_vsi_info *vsi_elem; + struct ice_sched_agg_info *atmp; + struct ice_sched_vsi_info *tmp; + struct ice_hw *hw; + + if (!pi) + return; + + hw = pi->hw; + + list_for_each_entry_safe(agg_info, atmp, &pi->agg_list, list_entry) { + struct ice_sched_agg_vsi_info *agg_vsi_info; + struct ice_sched_agg_vsi_info *vtmp; + + list_for_each_entry_safe(agg_vsi_info, vtmp, + &agg_info->agg_vsi_list, list_entry) { + list_del(&agg_vsi_info->list_entry); + devm_kfree(ice_hw_to_dev(hw), agg_vsi_info); + } + } + + /* remove the vsi list */ + list_for_each_entry_safe(vsi_elem, tmp, &pi->vsi_info_list, + list_entry) { + list_del(&vsi_elem->list_entry); + devm_kfree(ice_hw_to_dev(hw), vsi_elem); + } + + if (pi->root) { + ice_free_sched_node(pi, pi->root); + pi->root = NULL; + } +} + +/** + * ice_sched_clear_port - clear the scheduler elements from SW DB for a port + * @pi: port information structure + * + * Cleanup scheduling elements from SW DB + */ +static void ice_sched_clear_port(struct ice_port_info *pi) +{ + if (!pi || pi->port_state != ICE_SCHED_PORT_STATE_READY) + return; + + pi->port_state = ICE_SCHED_PORT_STATE_INIT; + mutex_lock(&pi->sched_lock); + ice_sched_clear_tx_topo(pi); + mutex_unlock(&pi->sched_lock); + mutex_destroy(&pi->sched_lock); +} + +/** + * ice_sched_cleanup_all - cleanup scheduler elements from SW DB for all ports + * @hw: pointer to the hw struct + * + * Cleanup scheduling elements from SW DB for all the ports + */ +void ice_sched_cleanup_all(struct ice_hw *hw) +{ + if (!hw || !hw->port_info) + return; + + if (hw->layer_info) + devm_kfree(ice_hw_to_dev(hw), hw->layer_info); + + ice_sched_clear_port(hw->port_info); + + hw->num_tx_sched_layers = 0; + hw->num_tx_sched_phys_layers = 0; + hw->flattened_layers = 0; + hw->max_cgds = 0; +} + +/** + * ice_sched_query_res_alloc - query the FW for num of logical sched layers + * @hw: pointer to the HW struct + * + * query FW for allocated scheduler resources and store in HW struct + */ +enum ice_status ice_sched_query_res_alloc(struct ice_hw *hw) +{ + struct ice_aqc_query_txsched_res_resp *buf; + enum ice_status status = 0; + + if (hw->layer_info) + return status; + + buf = devm_kzalloc(ice_hw_to_dev(hw), sizeof(*buf), GFP_KERNEL); + if (!buf) + return ICE_ERR_NO_MEMORY; + + status = ice_aq_query_sched_res(hw, sizeof(*buf), buf, NULL); + if (status) + goto sched_query_out; + + hw->num_tx_sched_layers = le16_to_cpu(buf->sched_props.logical_levels); + hw->num_tx_sched_phys_layers = + le16_to_cpu(buf->sched_props.phys_levels); + hw->flattened_layers = buf->sched_props.flattening_bitmap; + hw->max_cgds = buf->sched_props.max_pf_cgds; + + hw->layer_info = devm_kmemdup(ice_hw_to_dev(hw), buf->layer_props, + (hw->num_tx_sched_layers * + sizeof(*hw->layer_info)), + GFP_KERNEL); + if (!hw->layer_info) { + status = ICE_ERR_NO_MEMORY; + goto sched_query_out; + } + +sched_query_out: + devm_kfree(ice_hw_to_dev(hw), buf); + return status; +} diff --git a/drivers/net/ethernet/intel/ice/ice_sched.h b/drivers/net/ethernet/intel/ice/ice_sched.h new file mode 100644 index 000000000000..e329f6ec6a0e --- /dev/null +++ b/drivers/net/ethernet/intel/ice/ice_sched.h @@ -0,0 +1,28 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (c) 2018, Intel Corporation. */ + +#ifndef _ICE_SCHED_H_ +#define _ICE_SCHED_H_ + +#include "ice_common.h" + +struct ice_sched_agg_vsi_info { + struct list_head list_entry; + DECLARE_BITMAP(tc_bitmap, ICE_MAX_TRAFFIC_CLASS); + u16 vsi_id; +}; + +struct ice_sched_agg_info { + struct list_head agg_vsi_list; + struct list_head list_entry; + DECLARE_BITMAP(tc_bitmap, ICE_MAX_TRAFFIC_CLASS); + u32 agg_id; + enum ice_agg_type agg_type; +}; + +/* FW AQ command calls */ +enum ice_status ice_sched_query_res_alloc(struct ice_hw *hw); +void ice_sched_cleanup_all(struct ice_hw *hw); +void ice_free_sched_node(struct ice_port_info *pi, struct ice_sched_node *node); +struct ice_sched_node *ice_sched_get_tc_node(struct ice_port_info *pi, u8 tc); +#endif /* _ICE_SCHED_H_ */ diff --git a/drivers/net/ethernet/intel/ice/ice_switch.c b/drivers/net/ethernet/intel/ice/ice_switch.c new file mode 100644 index 000000000000..8fc0579b0bbb --- /dev/null +++ b/drivers/net/ethernet/intel/ice/ice_switch.c @@ -0,0 +1,144 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2018, Intel Corporation. */ + +#include "ice_switch.h" + +/** + * ice_aq_get_sw_cfg - get switch configuration + * @hw: pointer to the hardware structure + * @buf: pointer to the result buffer + * @buf_size: length of the buffer available for response + * @req_desc: pointer to requested descriptor + * @num_elems: pointer to number of elements + * @cd: pointer to command details structure or NULL + * + * Get switch configuration (0x0200) to be placed in 'buff'. + * This admin command returns information such as initial VSI/port number + * and switch ID it belongs to. + * + * NOTE: *req_desc is both an input/output parameter. + * The caller of this function first calls this function with *request_desc set + * to 0. If the response from f/w has *req_desc set to 0, all the switch + * configuration information has been returned; if non-zero (meaning not all + * the information was returned), the caller should call this function again + * with *req_desc set to the previous value returned by f/w to get the + * next block of switch configuration information. + * + * *num_elems is output only parameter. This reflects the number of elements + * in response buffer. The caller of this function to use *num_elems while + * parsing the response buffer. + */ +static enum ice_status +ice_aq_get_sw_cfg(struct ice_hw *hw, struct ice_aqc_get_sw_cfg_resp *buf, + u16 buf_size, u16 *req_desc, u16 *num_elems, + struct ice_sq_cd *cd) +{ + struct ice_aqc_get_sw_cfg *cmd; + enum ice_status status; + struct ice_aq_desc desc; + + ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_get_sw_cfg); + cmd = &desc.params.get_sw_conf; + cmd->element = cpu_to_le16(*req_desc); + + status = ice_aq_send_cmd(hw, &desc, buf, buf_size, cd); + if (!status) { + *req_desc = le16_to_cpu(cmd->element); + *num_elems = le16_to_cpu(cmd->num_elems); + } + + return status; +} + +/* ice_init_port_info - Initialize port_info with switch configuration data + * @pi: pointer to port_info + * @vsi_port_num: VSI number or port number + * @type: Type of switch element (port or VSI) + * @swid: switch ID of the switch the element is attached to + * @pf_vf_num: PF or VF number + * @is_vf: true if the element is a VF, false otherwise + */ +static void +ice_init_port_info(struct ice_port_info *pi, u16 vsi_port_num, u8 type, + u16 swid, u16 pf_vf_num, bool is_vf) +{ + switch (type) { + case ICE_AQC_GET_SW_CONF_RESP_PHYS_PORT: + pi->lport = (u8)(vsi_port_num & ICE_LPORT_MASK); + pi->sw_id = swid; + pi->pf_vf_num = pf_vf_num; + pi->is_vf = is_vf; + pi->dflt_tx_vsi_num = ICE_DFLT_VSI_INVAL; + pi->dflt_rx_vsi_num = ICE_DFLT_VSI_INVAL; + break; + default: + ice_debug(pi->hw, ICE_DBG_SW, + "incorrect VSI/port type received\n"); + break; + } +} + +/* ice_get_initial_sw_cfg - Get initial port and default VSI data + * @hw: pointer to the hardware structure + */ +enum ice_status ice_get_initial_sw_cfg(struct ice_hw *hw) +{ + struct ice_aqc_get_sw_cfg_resp *rbuf; + enum ice_status status; + u16 req_desc = 0; + u16 num_elems; + u16 i; + + rbuf = devm_kzalloc(ice_hw_to_dev(hw), ICE_SW_CFG_MAX_BUF_LEN, + GFP_KERNEL); + + if (!rbuf) + return ICE_ERR_NO_MEMORY; + + /* Multiple calls to ice_aq_get_sw_cfg may be required + * to get all the switch configuration information. The need + * for additional calls is indicated by ice_aq_get_sw_cfg + * writing a non-zero value in req_desc + */ + do { + status = ice_aq_get_sw_cfg(hw, rbuf, ICE_SW_CFG_MAX_BUF_LEN, + &req_desc, &num_elems, NULL); + + if (status) + break; + + for (i = 0; i < num_elems; i++) { + struct ice_aqc_get_sw_cfg_resp_elem *ele; + u16 pf_vf_num, swid, vsi_port_num; + bool is_vf = false; + u8 type; + + ele = rbuf[i].elements; + vsi_port_num = le16_to_cpu(ele->vsi_port_num) & + ICE_AQC_GET_SW_CONF_RESP_VSI_PORT_NUM_M; + + pf_vf_num = le16_to_cpu(ele->pf_vf_num) & + ICE_AQC_GET_SW_CONF_RESP_FUNC_NUM_M; + + swid = le16_to_cpu(ele->swid); + + if (le16_to_cpu(ele->pf_vf_num) & + ICE_AQC_GET_SW_CONF_RESP_IS_VF) + is_vf = true; + + type = le16_to_cpu(ele->vsi_port_num) >> + ICE_AQC_GET_SW_CONF_RESP_TYPE_S; + + if (type == ICE_AQC_GET_SW_CONF_RESP_VSI) { + /* FW VSI is not needed. Just continue. */ + continue; + } + + ice_init_port_info(hw->port_info, vsi_port_num, + type, swid, pf_vf_num, is_vf); + } + } while (req_desc && !status); + + devm_kfree(ice_hw_to_dev(hw), (void *)rbuf); + return status; +} diff --git a/drivers/net/ethernet/intel/ice/ice_switch.h b/drivers/net/ethernet/intel/ice/ice_switch.h new file mode 100644 index 000000000000..b98cb978a129 --- /dev/null +++ b/drivers/net/ethernet/intel/ice/ice_switch.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (c) 2018, Intel Corporation. */ + +#ifndef _ICE_SWITCH_H_ +#define _ICE_SWITCH_H_ + +#include "ice_common.h" + +#define ICE_SW_CFG_MAX_BUF_LEN 2048 +#define ICE_DFLT_VSI_INVAL 0xff + +enum ice_status ice_get_initial_sw_cfg(struct ice_hw *hw); + +#endif /* _ICE_SWITCH_H_ */ diff --git a/drivers/net/ethernet/intel/ice/ice_type.h b/drivers/net/ethernet/intel/ice/ice_type.h index 31be38369a48..0b71634668bb 100644 --- a/drivers/net/ethernet/intel/ice/ice_type.h +++ b/drivers/net/ethernet/intel/ice/ice_type.h @@ -12,6 +12,8 @@ /* debug masks - set these bits in hw->debug_mask to control output */ #define ICE_DBG_INIT BIT_ULL(1) #define ICE_DBG_NVM BIT_ULL(7) +#define ICE_DBG_SW BIT_ULL(13) +#define ICE_DBG_SCHED BIT_ULL(14) #define ICE_DBG_RES BIT_ULL(17) #define ICE_DBG_AQ_MSG BIT_ULL(24) #define ICE_DBG_AQ_CMD BIT_ULL(27) @@ -34,6 +36,38 @@ enum ice_mac_type { ICE_MAC_GENERIC, }; +/* Common HW capabilities for SW use */ +struct ice_hw_common_caps { + /* TX/RX queues */ + u16 num_rxq; /* Number/Total RX queues */ + u16 rxq_first_id; /* First queue ID for RX queues */ + u16 num_txq; /* Number/Total TX queues */ + u16 txq_first_id; /* First queue ID for TX queues */ + + /* MSI-X vectors */ + u16 num_msix_vectors; + u16 msix_vector_first_id; + + /* Max MTU for function or device */ + u16 max_mtu; + + /* RSS related capabilities */ + u16 rss_table_size; /* 512 for PFs and 64 for VFs */ + u8 rss_table_entry_width; /* RSS Entry width in bits */ +}; + +/* Function specific capabilities */ +struct ice_hw_func_caps { + struct ice_hw_common_caps common_cap; + u32 guaranteed_num_vsi; +}; + +/* Device wide capabilities */ +struct ice_hw_dev_caps { + struct ice_hw_common_caps common_cap; + u32 num_vsi_allocd_to_host; /* Excluding EMP VSI */ +}; + /* Various RESET request, These are not tied with HW reset types */ enum ice_reset_req { ICE_RESET_PFR = 0, @@ -56,10 +90,76 @@ struct ice_nvm_info { bool blank_nvm_mode; /* is NVM empty (no FW present) */ }; +/* Max number of port to queue branches w.r.t topology */ +#define ICE_MAX_TRAFFIC_CLASS 8 + +struct ice_sched_node { + struct ice_sched_node *parent; + struct ice_sched_node *sibling; /* next sibling in the same layer */ + struct ice_sched_node **children; + struct ice_aqc_txsched_elem_data info; + u32 agg_id; /* aggregator group id */ + u16 vsi_id; + bool in_use; /* suspended or in use */ + u8 tx_sched_layer; /* Logical Layer (1-9) */ + u8 num_children; + u8 tc_num; + u8 owner; +#define ICE_SCHED_NODE_OWNER_LAN 0 +}; + +/* The aggregator type determines if identifier is for a VSI group, + * aggregator group, aggregator of queues, or queue group. + */ +enum ice_agg_type { + ICE_AGG_TYPE_UNKNOWN = 0, + ICE_AGG_TYPE_VSI, + ICE_AGG_TYPE_AGG, /* aggregator */ + ICE_AGG_TYPE_Q, + ICE_AGG_TYPE_QG +}; + +/* vsi type list entry to locate corresponding vsi/ag nodes */ +struct ice_sched_vsi_info { + struct ice_sched_node *vsi_node[ICE_MAX_TRAFFIC_CLASS]; + struct ice_sched_node *ag_node[ICE_MAX_TRAFFIC_CLASS]; + struct list_head list_entry; + u16 max_lanq[ICE_MAX_TRAFFIC_CLASS]; + u16 vsi_id; +}; + +/* driver defines the policy */ +struct ice_sched_tx_policy { + u16 max_num_vsis; + u8 max_num_lan_qs_per_tc[ICE_MAX_TRAFFIC_CLASS]; + bool rdma_ena; +}; + +struct ice_port_info { + struct ice_sched_node *root; /* Root Node per Port */ + struct ice_hw *hw; /* back pointer to hw instance */ + u16 sw_id; /* Initial switch ID belongs to port */ + u16 pf_vf_num; + u8 port_state; +#define ICE_SCHED_PORT_STATE_INIT 0x0 +#define ICE_SCHED_PORT_STATE_READY 0x1 + u16 dflt_tx_vsi_num; + u16 dflt_rx_vsi_num; + struct mutex sched_lock; /* protect access to TXSched tree */ + struct ice_sched_tx_policy sched_policy; + struct list_head vsi_info_list; + struct list_head agg_list; /* lists all aggregator */ + u8 lport; +#define ICE_LPORT_MASK 0xff + bool is_vf; +}; + /* Port hardware description */ struct ice_hw { u8 __iomem *hw_addr; void *back; + struct ice_aqc_layer_props *layer_info; + struct ice_port_info *port_info; u64 debug_mask; /* bitmap for debug mask */ enum ice_mac_type mac_type; @@ -72,8 +172,17 @@ struct ice_hw { u8 pf_id; /* device profile info */ + /* TX Scheduler values */ + u16 num_tx_sched_layers; + u16 num_tx_sched_phys_layers; + u8 flattened_layers; + u8 max_cgds; + u8 sw_entry_point_layer; + struct ice_bus_info bus; struct ice_nvm_info nvm; + struct ice_hw_dev_caps dev_caps; /* device capabilities */ + struct ice_hw_func_caps func_caps; /* function capabilities */ /* Control Queue info */ struct ice_ctl_q_info adminq; From dc49c77236769c571e77d49450b2dfc001d60e33 Mon Sep 17 00:00:00 2001 From: Anirudh Venkataramanan Date: Tue, 20 Mar 2018 07:58:09 -0700 Subject: [PATCH 05/15] ice: Get MAC/PHY/link info and scheduler topology This patch adds code to continue the initialization flow as follows: 1) Get PHY/link information and store it 2) Get default scheduler tree topology and store it 3) Get the MAC address associated with the port and store it Signed-off-by: Anirudh Venkataramanan Tested-by: Tony Brelinski Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/ice/ice.h | 1 + .../net/ethernet/intel/ice/ice_adminq_cmd.h | 261 ++++++++++++++ drivers/net/ethernet/intel/ice/ice_common.c | 264 ++++++++++++++ drivers/net/ethernet/intel/ice/ice_common.h | 3 + drivers/net/ethernet/intel/ice/ice_sched.c | 328 ++++++++++++++++++ drivers/net/ethernet/intel/ice/ice_sched.h | 6 + drivers/net/ethernet/intel/ice/ice_status.h | 1 + drivers/net/ethernet/intel/ice/ice_type.h | 65 ++++ 8 files changed, 929 insertions(+) diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h index 716c555532f4..1a1776a2a5a1 100644 --- a/drivers/net/ethernet/intel/ice/ice.h +++ b/drivers/net/ethernet/intel/ice/ice.h @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h index d1b70f0ed0e2..5061240996b1 100644 --- a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h +++ b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h @@ -104,6 +104,35 @@ struct ice_aqc_list_caps_elem { __le64 rsvd2; }; +/* Manage MAC address, read command - indirect (0x0107) + * This struct is also used for the response + */ +struct ice_aqc_manage_mac_read { + __le16 flags; /* Zeroed by device driver */ +#define ICE_AQC_MAN_MAC_LAN_ADDR_VALID BIT(4) +#define ICE_AQC_MAN_MAC_SAN_ADDR_VALID BIT(5) +#define ICE_AQC_MAN_MAC_PORT_ADDR_VALID BIT(6) +#define ICE_AQC_MAN_MAC_WOL_ADDR_VALID BIT(7) +#define ICE_AQC_MAN_MAC_READ_S 4 +#define ICE_AQC_MAN_MAC_READ_M (0xF << ICE_AQC_MAN_MAC_READ_S) + u8 lport_num; + u8 lport_num_valid; +#define ICE_AQC_MAN_MAC_PORT_NUM_IS_VALID BIT(0) + u8 num_addr; /* Used in response */ + u8 reserved[3]; + __le32 addr_high; + __le32 addr_low; +}; + +/* Response buffer format for manage MAC read command */ +struct ice_aqc_manage_mac_read_resp { + u8 lport_num; + u8 addr_type; +#define ICE_AQC_MAN_MAC_ADDR_TYPE_LAN 0 +#define ICE_AQC_MAN_MAC_ADDR_TYPE_WOL 1 + u8 mac_addr[ETH_ALEN]; +}; + /* Clear PXE Command and response (direct 0x0110) */ struct ice_aqc_clear_pxe { u8 rx_cnt; @@ -161,6 +190,16 @@ struct ice_aqc_get_sw_cfg_resp { struct ice_aqc_get_sw_cfg_resp_elem elements[1]; }; +/* Get Default Topology (indirect 0x0400) */ +struct ice_aqc_get_topo { + u8 port_num; + u8 num_branches; + __le16 reserved1; + __le32 reserved2; + __le32 addr_high; + __le32 addr_low; +}; + /* Add TSE (indirect 0x0401) * Delete TSE (indirect 0x040F) * Move TSE (indirect 0x0408) @@ -221,6 +260,12 @@ struct ice_aqc_txsched_topo_grp_info_hdr { __le16 reserved2; }; +struct ice_aqc_get_topo_elem { + struct ice_aqc_txsched_topo_grp_info_hdr hdr; + struct ice_aqc_txsched_elem_data + generic[ICE_AQC_TOPO_MAX_LEVEL_NUM]; +}; + struct ice_aqc_delete_elem { struct ice_aqc_txsched_topo_grp_info_hdr hdr; __le32 teid[1]; @@ -266,6 +311,210 @@ struct ice_aqc_query_txsched_res_resp { struct ice_aqc_layer_props layer_props[ICE_AQC_TOPO_MAX_LEVEL_NUM]; }; +/* Get PHY capabilities (indirect 0x0600) */ +struct ice_aqc_get_phy_caps { + u8 lport_num; + u8 reserved; + __le16 param0; + /* 18.0 - Report qualified modules */ +#define ICE_AQC_GET_PHY_RQM BIT(0) + /* 18.1 - 18.2 : Report mode + * 00b - Report NVM capabilities + * 01b - Report topology capabilities + * 10b - Report SW configured + */ +#define ICE_AQC_REPORT_MODE_S 1 +#define ICE_AQC_REPORT_MODE_M (3 << ICE_AQC_REPORT_MODE_S) +#define ICE_AQC_REPORT_NVM_CAP 0 +#define ICE_AQC_REPORT_TOPO_CAP BIT(1) +#define ICE_AQC_REPORT_SW_CFG BIT(2) + __le32 reserved1; + __le32 addr_high; + __le32 addr_low; +}; + +/* This is #define of PHY type (Extended): + * The first set of defines is for phy_type_low. + */ +#define ICE_PHY_TYPE_LOW_100BASE_TX BIT_ULL(0) +#define ICE_PHY_TYPE_LOW_100M_SGMII BIT_ULL(1) +#define ICE_PHY_TYPE_LOW_1000BASE_T BIT_ULL(2) +#define ICE_PHY_TYPE_LOW_1000BASE_SX BIT_ULL(3) +#define ICE_PHY_TYPE_LOW_1000BASE_LX BIT_ULL(4) +#define ICE_PHY_TYPE_LOW_1000BASE_KX BIT_ULL(5) +#define ICE_PHY_TYPE_LOW_1G_SGMII BIT_ULL(6) +#define ICE_PHY_TYPE_LOW_2500BASE_T BIT_ULL(7) +#define ICE_PHY_TYPE_LOW_2500BASE_X BIT_ULL(8) +#define ICE_PHY_TYPE_LOW_2500BASE_KX BIT_ULL(9) +#define ICE_PHY_TYPE_LOW_5GBASE_T BIT_ULL(10) +#define ICE_PHY_TYPE_LOW_5GBASE_KR BIT_ULL(11) +#define ICE_PHY_TYPE_LOW_10GBASE_T BIT_ULL(12) +#define ICE_PHY_TYPE_LOW_10G_SFI_DA BIT_ULL(13) +#define ICE_PHY_TYPE_LOW_10GBASE_SR BIT_ULL(14) +#define ICE_PHY_TYPE_LOW_10GBASE_LR BIT_ULL(15) +#define ICE_PHY_TYPE_LOW_10GBASE_KR_CR1 BIT_ULL(16) +#define ICE_PHY_TYPE_LOW_10G_SFI_AOC_ACC BIT_ULL(17) +#define ICE_PHY_TYPE_LOW_10G_SFI_C2C BIT_ULL(18) +#define ICE_PHY_TYPE_LOW_25GBASE_T BIT_ULL(19) +#define ICE_PHY_TYPE_LOW_25GBASE_CR BIT_ULL(20) +#define ICE_PHY_TYPE_LOW_25GBASE_CR_S BIT_ULL(21) +#define ICE_PHY_TYPE_LOW_25GBASE_CR1 BIT_ULL(22) +#define ICE_PHY_TYPE_LOW_25GBASE_SR BIT_ULL(23) +#define ICE_PHY_TYPE_LOW_25GBASE_LR BIT_ULL(24) +#define ICE_PHY_TYPE_LOW_25GBASE_KR BIT_ULL(25) +#define ICE_PHY_TYPE_LOW_25GBASE_KR_S BIT_ULL(26) +#define ICE_PHY_TYPE_LOW_25GBASE_KR1 BIT_ULL(27) +#define ICE_PHY_TYPE_LOW_25G_AUI_AOC_ACC BIT_ULL(28) +#define ICE_PHY_TYPE_LOW_25G_AUI_C2C BIT_ULL(29) +#define ICE_PHY_TYPE_LOW_40GBASE_CR4 BIT_ULL(30) +#define ICE_PHY_TYPE_LOW_40GBASE_SR4 BIT_ULL(31) +#define ICE_PHY_TYPE_LOW_40GBASE_LR4 BIT_ULL(32) +#define ICE_PHY_TYPE_LOW_40GBASE_KR4 BIT_ULL(33) +#define ICE_PHY_TYPE_LOW_40G_XLAUI_AOC_ACC BIT_ULL(34) +#define ICE_PHY_TYPE_LOW_40G_XLAUI BIT_ULL(35) +#define ICE_PHY_TYPE_LOW_MAX_INDEX 63 + +struct ice_aqc_get_phy_caps_data { + __le64 phy_type_low; /* Use values from ICE_PHY_TYPE_LOW_* */ + __le64 reserved; + u8 caps; +#define ICE_AQC_PHY_EN_TX_LINK_PAUSE BIT(0) +#define ICE_AQC_PHY_EN_RX_LINK_PAUSE BIT(1) +#define ICE_AQC_PHY_LOW_POWER_MODE BIT(2) +#define ICE_AQC_PHY_EN_LINK BIT(3) +#define ICE_AQC_PHY_AN_MODE BIT(4) +#define ICE_AQC_GET_PHY_EN_MOD_QUAL BIT(5) + u8 low_power_ctrl; +#define ICE_AQC_PHY_EN_D3COLD_LOW_POWER_AUTONEG BIT(0) + __le16 eee_cap; +#define ICE_AQC_PHY_EEE_EN_100BASE_TX BIT(0) +#define ICE_AQC_PHY_EEE_EN_1000BASE_T BIT(1) +#define ICE_AQC_PHY_EEE_EN_10GBASE_T BIT(2) +#define ICE_AQC_PHY_EEE_EN_1000BASE_KX BIT(3) +#define ICE_AQC_PHY_EEE_EN_10GBASE_KR BIT(4) +#define ICE_AQC_PHY_EEE_EN_25GBASE_KR BIT(5) +#define ICE_AQC_PHY_EEE_EN_40GBASE_KR4 BIT(6) + __le16 eeer_value; + u8 phy_id_oui[4]; /* PHY/Module ID connected on the port */ + u8 link_fec_options; +#define ICE_AQC_PHY_FEC_10G_KR_40G_KR4_EN BIT(0) +#define ICE_AQC_PHY_FEC_10G_KR_40G_KR4_REQ BIT(1) +#define ICE_AQC_PHY_FEC_25G_RS_528_REQ BIT(2) +#define ICE_AQC_PHY_FEC_25G_KR_REQ BIT(3) +#define ICE_AQC_PHY_FEC_25G_RS_544_REQ BIT(4) +#define ICE_AQC_PHY_FEC_25G_RS_CLAUSE91_EN BIT(6) +#define ICE_AQC_PHY_FEC_25G_KR_CLAUSE74_EN BIT(7) + u8 extended_compliance_code; +#define ICE_MODULE_TYPE_TOTAL_BYTE 3 + u8 module_type[ICE_MODULE_TYPE_TOTAL_BYTE]; +#define ICE_AQC_MOD_TYPE_BYTE0_SFP_PLUS 0xA0 +#define ICE_AQC_MOD_TYPE_BYTE0_QSFP_PLUS 0x80 +#define ICE_AQC_MOD_TYPE_BYTE1_SFP_PLUS_CU_PASSIVE BIT(0) +#define ICE_AQC_MOD_TYPE_BYTE1_SFP_PLUS_CU_ACTIVE BIT(1) +#define ICE_AQC_MOD_TYPE_BYTE1_10G_BASE_SR BIT(4) +#define ICE_AQC_MOD_TYPE_BYTE1_10G_BASE_LR BIT(5) +#define ICE_AQC_MOD_TYPE_BYTE1_10G_BASE_LRM BIT(6) +#define ICE_AQC_MOD_TYPE_BYTE1_10G_BASE_ER BIT(7) +#define ICE_AQC_MOD_TYPE_BYTE2_SFP_PLUS 0xA0 +#define ICE_AQC_MOD_TYPE_BYTE2_QSFP_PLUS 0x86 + u8 qualified_module_count; +#define ICE_AQC_QUAL_MOD_COUNT_MAX 16 + struct { + u8 v_oui[3]; + u8 rsvd1; + u8 v_part[16]; + __le32 v_rev; + __le64 rsvd8; + } qual_modules[ICE_AQC_QUAL_MOD_COUNT_MAX]; +}; + +/* Get link status (indirect 0x0607), also used for Link Status Event */ +struct ice_aqc_get_link_status { + u8 lport_num; + u8 reserved; + __le16 cmd_flags; +#define ICE_AQ_LSE_M 0x3 +#define ICE_AQ_LSE_NOP 0x0 +#define ICE_AQ_LSE_DIS 0x2 +#define ICE_AQ_LSE_ENA 0x3 + /* only response uses this flag */ +#define ICE_AQ_LSE_IS_ENABLED 0x1 + __le32 reserved2; + __le32 addr_high; + __le32 addr_low; +}; + +/* Get link status response data structure, also used for Link Status Event */ +struct ice_aqc_get_link_status_data { + u8 topo_media_conflict; +#define ICE_AQ_LINK_TOPO_CONFLICT BIT(0) +#define ICE_AQ_LINK_MEDIA_CONFLICT BIT(1) +#define ICE_AQ_LINK_TOPO_CORRUPT BIT(2) + u8 reserved1; + u8 link_info; +#define ICE_AQ_LINK_UP BIT(0) /* Link Status */ +#define ICE_AQ_LINK_FAULT BIT(1) +#define ICE_AQ_LINK_FAULT_TX BIT(2) +#define ICE_AQ_LINK_FAULT_RX BIT(3) +#define ICE_AQ_LINK_FAULT_REMOTE BIT(4) +#define ICE_AQ_LINK_UP_PORT BIT(5) /* External Port Link Status */ +#define ICE_AQ_MEDIA_AVAILABLE BIT(6) +#define ICE_AQ_SIGNAL_DETECT BIT(7) + u8 an_info; +#define ICE_AQ_AN_COMPLETED BIT(0) +#define ICE_AQ_LP_AN_ABILITY BIT(1) +#define ICE_AQ_PD_FAULT BIT(2) /* Parallel Detection Fault */ +#define ICE_AQ_FEC_EN BIT(3) +#define ICE_AQ_PHY_LOW_POWER BIT(4) /* Low Power State */ +#define ICE_AQ_LINK_PAUSE_TX BIT(5) +#define ICE_AQ_LINK_PAUSE_RX BIT(6) +#define ICE_AQ_QUALIFIED_MODULE BIT(7) + u8 ext_info; +#define ICE_AQ_LINK_PHY_TEMP_ALARM BIT(0) +#define ICE_AQ_LINK_EXCESSIVE_ERRORS BIT(1) /* Excessive Link Errors */ + /* Port TX Suspended */ +#define ICE_AQ_LINK_TX_S 2 +#define ICE_AQ_LINK_TX_M (0x03 << ICE_AQ_LINK_TX_S) +#define ICE_AQ_LINK_TX_ACTIVE 0 +#define ICE_AQ_LINK_TX_DRAINED 1 +#define ICE_AQ_LINK_TX_FLUSHED 3 + u8 reserved2; + __le16 max_frame_size; + u8 cfg; +#define ICE_AQ_LINK_25G_KR_FEC_EN BIT(0) +#define ICE_AQ_LINK_25G_RS_528_FEC_EN BIT(1) +#define ICE_AQ_LINK_25G_RS_544_FEC_EN BIT(2) + /* Pacing Config */ +#define ICE_AQ_CFG_PACING_S 3 +#define ICE_AQ_CFG_PACING_M (0xF << ICE_AQ_CFG_PACING_S) +#define ICE_AQ_CFG_PACING_TYPE_M BIT(7) +#define ICE_AQ_CFG_PACING_TYPE_AVG 0 +#define ICE_AQ_CFG_PACING_TYPE_FIXED ICE_AQ_CFG_PACING_TYPE_M + /* External Device Power Ability */ + u8 power_desc; +#define ICE_AQ_PWR_CLASS_M 0x3 +#define ICE_AQ_LINK_PWR_BASET_LOW_HIGH 0 +#define ICE_AQ_LINK_PWR_BASET_HIGH 1 +#define ICE_AQ_LINK_PWR_QSFP_CLASS_1 0 +#define ICE_AQ_LINK_PWR_QSFP_CLASS_2 1 +#define ICE_AQ_LINK_PWR_QSFP_CLASS_3 2 +#define ICE_AQ_LINK_PWR_QSFP_CLASS_4 3 + __le16 link_speed; +#define ICE_AQ_LINK_SPEED_10MB BIT(0) +#define ICE_AQ_LINK_SPEED_100MB BIT(1) +#define ICE_AQ_LINK_SPEED_1000MB BIT(2) +#define ICE_AQ_LINK_SPEED_2500MB BIT(3) +#define ICE_AQ_LINK_SPEED_5GB BIT(4) +#define ICE_AQ_LINK_SPEED_10GB BIT(5) +#define ICE_AQ_LINK_SPEED_20GB BIT(6) +#define ICE_AQ_LINK_SPEED_25GB BIT(7) +#define ICE_AQ_LINK_SPEED_40GB BIT(8) +#define ICE_AQ_LINK_SPEED_UNKNOWN BIT(15) + __le32 reserved3; /* Aligns next field to 8-byte boundary */ + __le64 phy_type_low; /* Use values from ICE_PHY_TYPE_LOW_* */ + __le64 reserved4; +}; + /* NVM Read command (indirect 0x0701) * NVM Erase commands (direct 0x0702) * NVM Update commands (indirect 0x0703) @@ -318,12 +567,16 @@ struct ice_aq_desc { struct ice_aqc_get_ver get_ver; struct ice_aqc_q_shutdown q_shutdown; struct ice_aqc_req_res res_owner; + struct ice_aqc_manage_mac_read mac_read; struct ice_aqc_clear_pxe clear_pxe; struct ice_aqc_list_caps get_cap; + struct ice_aqc_get_phy_caps get_phy; struct ice_aqc_get_sw_cfg get_sw_conf; + struct ice_aqc_get_topo get_topo; struct ice_aqc_query_txsched_res query_sched_res; struct ice_aqc_add_move_delete_elem add_move_delete_elem; struct ice_aqc_nvm nvm; + struct ice_aqc_get_link_status get_link_status; } params; }; @@ -362,6 +615,9 @@ enum ice_adminq_opc { ice_aqc_opc_list_func_caps = 0x000A, ice_aqc_opc_list_dev_caps = 0x000B, + /* manage MAC address */ + ice_aqc_opc_manage_mac_read = 0x0107, + /* PXE */ ice_aqc_opc_clear_pxe_mode = 0x0110, @@ -371,9 +627,14 @@ enum ice_adminq_opc { ice_aqc_opc_clear_pf_cfg = 0x02A4, /* transmit scheduler commands */ + ice_aqc_opc_get_dflt_topo = 0x0400, ice_aqc_opc_delete_sched_elems = 0x040F, ice_aqc_opc_query_sched_res = 0x0412, + /* PHY commands */ + ice_aqc_opc_get_phy_caps = 0x0600, + ice_aqc_opc_get_link_status = 0x0607, + /* NVM commands */ ice_aqc_opc_nvm_read = 0x0701, diff --git a/drivers/net/ethernet/intel/ice/ice_common.c b/drivers/net/ethernet/intel/ice/ice_common.c index f9567dc1aefd..ef5bab25c2f5 100644 --- a/drivers/net/ethernet/intel/ice/ice_common.c +++ b/drivers/net/ethernet/intel/ice/ice_common.c @@ -36,13 +36,238 @@ enum ice_status ice_clear_pf_cfg(struct ice_hw *hw) return ice_aq_send_cmd(hw, &desc, NULL, 0, NULL); } +/** + * ice_aq_manage_mac_read - manage MAC address read command + * @hw: pointer to the hw struct + * @buf: a virtual buffer to hold the manage MAC read response + * @buf_size: Size of the virtual buffer + * @cd: pointer to command details structure or NULL + * + * This function is used to return per PF station MAC address (0x0107). + * NOTE: Upon successful completion of this command, MAC address information + * is returned in user specified buffer. Please interpret user specified + * buffer as "manage_mac_read" response. + * Response such as various MAC addresses are stored in HW struct (port.mac) + * ice_aq_discover_caps is expected to be called before this function is called. + */ +static enum ice_status +ice_aq_manage_mac_read(struct ice_hw *hw, void *buf, u16 buf_size, + struct ice_sq_cd *cd) +{ + struct ice_aqc_manage_mac_read_resp *resp; + struct ice_aqc_manage_mac_read *cmd; + struct ice_aq_desc desc; + enum ice_status status; + u16 flags; + + cmd = &desc.params.mac_read; + + if (buf_size < sizeof(*resp)) + return ICE_ERR_BUF_TOO_SHORT; + + ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_manage_mac_read); + + status = ice_aq_send_cmd(hw, &desc, buf, buf_size, cd); + if (status) + return status; + + resp = (struct ice_aqc_manage_mac_read_resp *)buf; + flags = le16_to_cpu(cmd->flags) & ICE_AQC_MAN_MAC_READ_M; + + if (!(flags & ICE_AQC_MAN_MAC_LAN_ADDR_VALID)) { + ice_debug(hw, ICE_DBG_LAN, "got invalid MAC address\n"); + return ICE_ERR_CFG; + } + + ether_addr_copy(hw->port_info->mac.lan_addr, resp->mac_addr); + ether_addr_copy(hw->port_info->mac.perm_addr, resp->mac_addr); + return 0; +} + +/** + * ice_aq_get_phy_caps - returns PHY capabilities + * @pi: port information structure + * @qual_mods: report qualified modules + * @report_mode: report mode capabilities + * @pcaps: structure for PHY capabilities to be filled + * @cd: pointer to command details structure or NULL + * + * Returns the various PHY capabilities supported on the Port (0x0600) + */ +static enum ice_status +ice_aq_get_phy_caps(struct ice_port_info *pi, bool qual_mods, u8 report_mode, + struct ice_aqc_get_phy_caps_data *pcaps, + struct ice_sq_cd *cd) +{ + struct ice_aqc_get_phy_caps *cmd; + u16 pcaps_size = sizeof(*pcaps); + struct ice_aq_desc desc; + enum ice_status status; + + cmd = &desc.params.get_phy; + + if (!pcaps || (report_mode & ~ICE_AQC_REPORT_MODE_M) || !pi) + return ICE_ERR_PARAM; + + ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_get_phy_caps); + + if (qual_mods) + cmd->param0 |= cpu_to_le16(ICE_AQC_GET_PHY_RQM); + + cmd->param0 |= cpu_to_le16(report_mode); + status = ice_aq_send_cmd(pi->hw, &desc, pcaps, pcaps_size, cd); + + if (!status && report_mode == ICE_AQC_REPORT_TOPO_CAP) + pi->phy.phy_type_low = le64_to_cpu(pcaps->phy_type_low); + + return status; +} + +/** + * ice_get_media_type - Gets media type + * @pi: port information structure + */ +static enum ice_media_type ice_get_media_type(struct ice_port_info *pi) +{ + struct ice_link_status *hw_link_info; + + if (!pi) + return ICE_MEDIA_UNKNOWN; + + hw_link_info = &pi->phy.link_info; + + if (hw_link_info->phy_type_low) { + switch (hw_link_info->phy_type_low) { + case ICE_PHY_TYPE_LOW_1000BASE_SX: + case ICE_PHY_TYPE_LOW_1000BASE_LX: + case ICE_PHY_TYPE_LOW_10GBASE_SR: + case ICE_PHY_TYPE_LOW_10GBASE_LR: + case ICE_PHY_TYPE_LOW_10G_SFI_C2C: + case ICE_PHY_TYPE_LOW_25GBASE_SR: + case ICE_PHY_TYPE_LOW_25GBASE_LR: + case ICE_PHY_TYPE_LOW_25G_AUI_C2C: + case ICE_PHY_TYPE_LOW_40GBASE_SR4: + case ICE_PHY_TYPE_LOW_40GBASE_LR4: + return ICE_MEDIA_FIBER; + case ICE_PHY_TYPE_LOW_100BASE_TX: + case ICE_PHY_TYPE_LOW_1000BASE_T: + case ICE_PHY_TYPE_LOW_2500BASE_T: + case ICE_PHY_TYPE_LOW_5GBASE_T: + case ICE_PHY_TYPE_LOW_10GBASE_T: + case ICE_PHY_TYPE_LOW_25GBASE_T: + return ICE_MEDIA_BASET; + case ICE_PHY_TYPE_LOW_10G_SFI_DA: + case ICE_PHY_TYPE_LOW_25GBASE_CR: + case ICE_PHY_TYPE_LOW_25GBASE_CR_S: + case ICE_PHY_TYPE_LOW_25GBASE_CR1: + case ICE_PHY_TYPE_LOW_40GBASE_CR4: + return ICE_MEDIA_DA; + case ICE_PHY_TYPE_LOW_1000BASE_KX: + case ICE_PHY_TYPE_LOW_2500BASE_KX: + case ICE_PHY_TYPE_LOW_2500BASE_X: + case ICE_PHY_TYPE_LOW_5GBASE_KR: + case ICE_PHY_TYPE_LOW_10GBASE_KR_CR1: + case ICE_PHY_TYPE_LOW_25GBASE_KR: + case ICE_PHY_TYPE_LOW_25GBASE_KR1: + case ICE_PHY_TYPE_LOW_25GBASE_KR_S: + case ICE_PHY_TYPE_LOW_40GBASE_KR4: + return ICE_MEDIA_BACKPLANE; + } + } + + return ICE_MEDIA_UNKNOWN; +} + +/** + * ice_aq_get_link_info + * @pi: port information structure + * @ena_lse: enable/disable LinkStatusEvent reporting + * @link: pointer to link status structure - optional + * @cd: pointer to command details structure or NULL + * + * Get Link Status (0x607). Returns the link status of the adapter. + */ +enum ice_status +ice_aq_get_link_info(struct ice_port_info *pi, bool ena_lse, + struct ice_link_status *link, struct ice_sq_cd *cd) +{ + struct ice_link_status *hw_link_info_old, *hw_link_info; + struct ice_aqc_get_link_status_data link_data = { 0 }; + struct ice_aqc_get_link_status *resp; + enum ice_media_type *hw_media_type; + struct ice_fc_info *hw_fc_info; + bool tx_pause, rx_pause; + struct ice_aq_desc desc; + enum ice_status status; + u16 cmd_flags; + + if (!pi) + return ICE_ERR_PARAM; + hw_link_info_old = &pi->phy.link_info_old; + hw_media_type = &pi->phy.media_type; + hw_link_info = &pi->phy.link_info; + hw_fc_info = &pi->fc; + + ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_get_link_status); + cmd_flags = (ena_lse) ? ICE_AQ_LSE_ENA : ICE_AQ_LSE_DIS; + resp = &desc.params.get_link_status; + resp->cmd_flags = cpu_to_le16(cmd_flags); + resp->lport_num = pi->lport; + + status = ice_aq_send_cmd(pi->hw, &desc, &link_data, sizeof(link_data), + cd); + + if (status) + return status; + + /* save off old link status information */ + *hw_link_info_old = *hw_link_info; + + /* update current link status information */ + hw_link_info->link_speed = le16_to_cpu(link_data.link_speed); + hw_link_info->phy_type_low = le64_to_cpu(link_data.phy_type_low); + *hw_media_type = ice_get_media_type(pi); + hw_link_info->link_info = link_data.link_info; + hw_link_info->an_info = link_data.an_info; + hw_link_info->ext_info = link_data.ext_info; + hw_link_info->max_frame_size = le16_to_cpu(link_data.max_frame_size); + hw_link_info->pacing = link_data.cfg & ICE_AQ_CFG_PACING_M; + + /* update fc info */ + tx_pause = !!(link_data.an_info & ICE_AQ_LINK_PAUSE_TX); + rx_pause = !!(link_data.an_info & ICE_AQ_LINK_PAUSE_RX); + if (tx_pause && rx_pause) + hw_fc_info->current_mode = ICE_FC_FULL; + else if (tx_pause) + hw_fc_info->current_mode = ICE_FC_TX_PAUSE; + else if (rx_pause) + hw_fc_info->current_mode = ICE_FC_RX_PAUSE; + else + hw_fc_info->current_mode = ICE_FC_NONE; + + hw_link_info->lse_ena = + !!(resp->cmd_flags & cpu_to_le16(ICE_AQ_LSE_IS_ENABLED)); + + /* save link status information */ + if (link) + *link = *hw_link_info; + + /* flag cleared so calling functions don't call AQ again */ + pi->phy.get_link_info = false; + + return status; +} + /** * ice_init_hw - main hardware initialization routine * @hw: pointer to the hardware structure */ enum ice_status ice_init_hw(struct ice_hw *hw) { + struct ice_aqc_get_phy_caps_data *pcaps; enum ice_status status; + u16 mac_buf_len; + void *mac_buf; /* Set MAC type based on DeviceID */ status = ice_set_mac_type(hw); @@ -98,8 +323,46 @@ enum ice_status ice_init_hw(struct ice_hw *hw) goto err_unroll_alloc; } + /* Initialize port_info struct with scheduler data */ + status = ice_sched_init_port(hw->port_info); + if (status) + goto err_unroll_sched; + + pcaps = devm_kzalloc(ice_hw_to_dev(hw), sizeof(*pcaps), GFP_KERNEL); + if (!pcaps) { + status = ICE_ERR_NO_MEMORY; + goto err_unroll_sched; + } + + /* Initialize port_info struct with PHY capabilities */ + status = ice_aq_get_phy_caps(hw->port_info, false, + ICE_AQC_REPORT_TOPO_CAP, pcaps, NULL); + devm_kfree(ice_hw_to_dev(hw), pcaps); + if (status) + goto err_unroll_sched; + + /* Initialize port_info struct with link information */ + status = ice_aq_get_link_info(hw->port_info, false, NULL, NULL); + if (status) + goto err_unroll_sched; + + /* Get port MAC information */ + mac_buf_len = sizeof(struct ice_aqc_manage_mac_read_resp); + mac_buf = devm_kzalloc(ice_hw_to_dev(hw), mac_buf_len, GFP_KERNEL); + + if (!mac_buf) + goto err_unroll_sched; + + status = ice_aq_manage_mac_read(hw, mac_buf, mac_buf_len, NULL); + devm_kfree(ice_hw_to_dev(hw), mac_buf); + + if (status) + goto err_unroll_sched; + return 0; +err_unroll_sched: + ice_sched_cleanup_all(hw); err_unroll_alloc: devm_kfree(ice_hw_to_dev(hw), hw->port_info); err_unroll_cqinit: @@ -115,6 +378,7 @@ void ice_deinit_hw(struct ice_hw *hw) { ice_sched_cleanup_all(hw); ice_shutdown_all_ctrlq(hw); + if (hw->port_info) { devm_kfree(ice_hw_to_dev(hw), hw->port_info); hw->port_info = NULL; diff --git a/drivers/net/ethernet/intel/ice/ice_common.h b/drivers/net/ethernet/intel/ice/ice_common.h index 87d873493bdd..e0a7aa1dd943 100644 --- a/drivers/net/ethernet/intel/ice/ice_common.h +++ b/drivers/net/ethernet/intel/ice/ice_common.h @@ -35,4 +35,7 @@ ice_aq_send_cmd(struct ice_hw *hw, struct ice_aq_desc *desc, void *buf, u16 buf_size, struct ice_sq_cd *cd); enum ice_status ice_aq_get_fw_ver(struct ice_hw *hw, struct ice_sq_cd *cd); enum ice_status ice_clear_pf_cfg(struct ice_hw *hw); +enum ice_status +ice_aq_get_link_info(struct ice_port_info *pi, bool ena_lse, + struct ice_link_status *link, struct ice_sq_cd *cd); #endif /* _ICE_COMMON_H_ */ diff --git a/drivers/net/ethernet/intel/ice/ice_sched.c b/drivers/net/ethernet/intel/ice/ice_sched.c index ce4edf61ec8e..22039f9eb591 100644 --- a/drivers/net/ethernet/intel/ice/ice_sched.c +++ b/drivers/net/ethernet/intel/ice/ice_sched.c @@ -3,6 +3,141 @@ #include "ice_sched.h" +/** + * ice_sched_add_root_node - Insert the Tx scheduler root node in SW DB + * @pi: port information structure + * @info: Scheduler element information from firmware + * + * This function inserts the root node of the scheduling tree topology + * to the SW DB. + */ +static enum ice_status +ice_sched_add_root_node(struct ice_port_info *pi, + struct ice_aqc_txsched_elem_data *info) +{ + struct ice_sched_node *root; + struct ice_hw *hw; + u16 max_children; + + if (!pi) + return ICE_ERR_PARAM; + + hw = pi->hw; + + root = devm_kzalloc(ice_hw_to_dev(hw), sizeof(*root), GFP_KERNEL); + if (!root) + return ICE_ERR_NO_MEMORY; + + max_children = le16_to_cpu(hw->layer_info[0].max_children); + root->children = devm_kcalloc(ice_hw_to_dev(hw), max_children, + sizeof(*root), GFP_KERNEL); + if (!root->children) { + devm_kfree(ice_hw_to_dev(hw), root); + return ICE_ERR_NO_MEMORY; + } + + memcpy(&root->info, info, sizeof(*info)); + pi->root = root; + return 0; +} + +/** + * ice_sched_find_node_by_teid - Find the Tx scheduler node in SW DB + * @start_node: pointer to the starting ice_sched_node struct in a sub-tree + * @teid: node teid to search + * + * This function searches for a node matching the teid in the scheduling tree + * from the SW DB. The search is recursive and is restricted by the number of + * layers it has searched through; stopping at the max supported layer. + * + * This function needs to be called when holding the port_info->sched_lock + */ +struct ice_sched_node * +ice_sched_find_node_by_teid(struct ice_sched_node *start_node, u32 teid) +{ + u16 i; + + /* The TEID is same as that of the start_node */ + if (ICE_TXSCHED_GET_NODE_TEID(start_node) == teid) + return start_node; + + /* The node has no children or is at the max layer */ + if (!start_node->num_children || + start_node->tx_sched_layer >= ICE_AQC_TOPO_MAX_LEVEL_NUM || + start_node->info.data.elem_type == ICE_AQC_ELEM_TYPE_LEAF) + return NULL; + + /* Check if teid matches to any of the children nodes */ + for (i = 0; i < start_node->num_children; i++) + if (ICE_TXSCHED_GET_NODE_TEID(start_node->children[i]) == teid) + return start_node->children[i]; + + /* Search within each child's sub-tree */ + for (i = 0; i < start_node->num_children; i++) { + struct ice_sched_node *tmp; + + tmp = ice_sched_find_node_by_teid(start_node->children[i], + teid); + if (tmp) + return tmp; + } + + return NULL; +} + +/** + * ice_sched_add_node - Insert the Tx scheduler node in SW DB + * @pi: port information structure + * @layer: Scheduler layer of the node + * @info: Scheduler element information from firmware + * + * This function inserts a scheduler node to the SW DB. + */ +enum ice_status +ice_sched_add_node(struct ice_port_info *pi, u8 layer, + struct ice_aqc_txsched_elem_data *info) +{ + struct ice_sched_node *parent; + struct ice_sched_node *node; + struct ice_hw *hw; + u16 max_children; + + if (!pi) + return ICE_ERR_PARAM; + + hw = pi->hw; + + /* A valid parent node should be there */ + parent = ice_sched_find_node_by_teid(pi->root, + le32_to_cpu(info->parent_teid)); + if (!parent) { + ice_debug(hw, ICE_DBG_SCHED, + "Parent Node not found for parent_teid=0x%x\n", + le32_to_cpu(info->parent_teid)); + return ICE_ERR_PARAM; + } + + node = devm_kzalloc(ice_hw_to_dev(hw), sizeof(*node), GFP_KERNEL); + if (!node) + return ICE_ERR_NO_MEMORY; + max_children = le16_to_cpu(hw->layer_info[layer].max_children); + if (max_children) { + node->children = devm_kcalloc(ice_hw_to_dev(hw), max_children, + sizeof(*node), GFP_KERNEL); + if (!node->children) { + devm_kfree(ice_hw_to_dev(hw), node); + return ICE_ERR_NO_MEMORY; + } + } + + node->in_use = true; + node->parent = parent; + node->tx_sched_layer = layer; + parent->children[parent->num_children++] = node; + memcpy(&node->info, info, sizeof(*info)); + return 0; +} + /** * ice_aq_delete_sched_elems - delete scheduler elements * @hw: pointer to the hw struct @@ -194,6 +329,36 @@ void ice_free_sched_node(struct ice_port_info *pi, struct ice_sched_node *node) devm_kfree(ice_hw_to_dev(hw), node); } +/** + * ice_aq_get_dflt_topo - gets default scheduler topology + * @hw: pointer to the hw struct + * @lport: logical port number + * @buf: pointer to buffer + * @buf_size: buffer size in bytes + * @num_branches: returns total number of queue to port branches + * @cd: pointer to command details structure or NULL + * + * Get default scheduler topology (0x400) + */ +static enum ice_status +ice_aq_get_dflt_topo(struct ice_hw *hw, u8 lport, + struct ice_aqc_get_topo_elem *buf, u16 buf_size, + u8 *num_branches, struct ice_sq_cd *cd) +{ + struct ice_aqc_get_topo *cmd; + struct ice_aq_desc desc; + enum ice_status status; + + cmd = &desc.params.get_topo; + ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_get_dflt_topo); + cmd->port_num = lport; + status = ice_aq_send_cmd(hw, &desc, buf, buf_size, cd); + if (!status && num_branches) + *num_branches = cmd->num_branches; + + return status; +} + /** * ice_aq_query_sched_res - query scheduler resource * @hw: pointer to the hw struct @@ -297,6 +462,169 @@ void ice_sched_cleanup_all(struct ice_hw *hw) hw->max_cgds = 0; } +/** + * ice_rm_dflt_leaf_node - remove the default leaf node in the tree + * @pi: port information structure + * + * This function removes the leaf node that was created by the FW + * during initialization + */ +static void +ice_rm_dflt_leaf_node(struct ice_port_info *pi) +{ + struct ice_sched_node *node; + + node = pi->root; + while (node) { + if (!node->num_children) + break; + node = node->children[0]; + } + if (node && node->info.data.elem_type == ICE_AQC_ELEM_TYPE_LEAF) { + u32 teid = le32_to_cpu(node->info.node_teid); + enum ice_status status; + + /* remove the default leaf node */ + status = ice_sched_remove_elems(pi->hw, node->parent, 1, &teid); + if (!status) + ice_free_sched_node(pi, node); + } +} + +/** + * ice_sched_rm_dflt_nodes - free the default nodes in the tree + * @pi: port information structure + * + * This function frees all the nodes except root and TC that were created by + * the FW during initialization + */ +static void +ice_sched_rm_dflt_nodes(struct ice_port_info *pi) +{ + struct ice_sched_node *node; + + ice_rm_dflt_leaf_node(pi); + /* remove the default nodes except TC and root nodes */ + node = pi->root; + while (node) { + if (node->tx_sched_layer >= pi->hw->sw_entry_point_layer && + node->info.data.elem_type != ICE_AQC_ELEM_TYPE_TC && + node->info.data.elem_type != ICE_AQC_ELEM_TYPE_ROOT_PORT) { + ice_free_sched_node(pi, node); + break; + } + if (!node->num_children) + break; + node = node->children[0]; + } +} + +/** + * ice_sched_init_port - Initialize scheduler by querying information from FW + * @pi: port info structure for the tree to cleanup + * + * This function is the initial call to find the total number of Tx scheduler + * resources, default topology created by firmware and storing the information + * in SW DB. + */ +enum ice_status ice_sched_init_port(struct ice_port_info *pi) +{ + struct ice_aqc_get_topo_elem *buf; + enum ice_status status; + struct ice_hw *hw; + u8 num_branches; + u16 num_elems; + u8 i, j; + + if (!pi) + return ICE_ERR_PARAM; + hw = pi->hw; + + /* Query the Default Topology from FW */ + buf = devm_kcalloc(ice_hw_to_dev(hw), ICE_TXSCHED_MAX_BRANCHES, + sizeof(*buf), GFP_KERNEL); + if (!buf) + return ICE_ERR_NO_MEMORY; + + /* Query default scheduling tree topology */ + status = ice_aq_get_dflt_topo(hw, pi->lport, buf, + sizeof(*buf) * ICE_TXSCHED_MAX_BRANCHES, + &num_branches, NULL); + if (status) + goto err_init_port; + + /* num_branches should be between 1-8 */ + if (num_branches < 1 || num_branches > ICE_TXSCHED_MAX_BRANCHES) { + ice_debug(hw, ICE_DBG_SCHED, "num_branches unexpected %d\n", + num_branches); + status = ICE_ERR_PARAM; + goto err_init_port; + } + + /* get the number of elements on the default/first branch */ + num_elems = le16_to_cpu(buf[0].hdr.num_elems); + + /* num_elems should always be between 1-9 */ + if (num_elems < 1 || num_elems > ICE_AQC_TOPO_MAX_LEVEL_NUM) { + ice_debug(hw, ICE_DBG_SCHED, "num_elems unexpected %d\n", + num_elems); + status = ICE_ERR_PARAM; + goto err_init_port; + } + + /* If the last node is a leaf node then the index of the Q group + * layer is two less than the number of elements. + */ + if (num_elems > 2 && buf[0].generic[num_elems - 1].data.elem_type == + ICE_AQC_ELEM_TYPE_LEAF) + pi->last_node_teid = + le32_to_cpu(buf[0].generic[num_elems - 2].node_teid); + else + pi->last_node_teid = + le32_to_cpu(buf[0].generic[num_elems - 1].node_teid); + + /* Insert the Tx Sched root node */ + status = ice_sched_add_root_node(pi, &buf[0].generic[0]); + if (status) + goto err_init_port; + + /* Parse the default tree and cache the information */ + for (i = 0; i < num_branches; i++) { + num_elems = le16_to_cpu(buf[i].hdr.num_elems); + + /* Skip root element as already inserted */ + for (j = 1; j < num_elems; j++) { + /* update the sw entry point */ + if (buf[0].generic[j].data.elem_type == + ICE_AQC_ELEM_TYPE_ENTRY_POINT) + hw->sw_entry_point_layer = j; + + status = ice_sched_add_node(pi, j, &buf[i].generic[j]); + if (status) + goto err_init_port; + } + } + + /* Remove the default nodes. */ + if (pi->root) + ice_sched_rm_dflt_nodes(pi); + + /* initialize the port for handling the scheduler tree */ + pi->port_state = ICE_SCHED_PORT_STATE_READY; + mutex_init(&pi->sched_lock); + INIT_LIST_HEAD(&pi->agg_list); + INIT_LIST_HEAD(&pi->vsi_info_list); + +err_init_port: + if (status && pi->root) { + ice_free_sched_node(pi, pi->root); + pi->root = NULL; + } + + devm_kfree(ice_hw_to_dev(hw), buf); + return status; +} + /** * ice_sched_query_res_alloc - query the FW for num of logical sched layers * @hw: pointer to the HW struct diff --git a/drivers/net/ethernet/intel/ice/ice_sched.h b/drivers/net/ethernet/intel/ice/ice_sched.h index e329f6ec6a0e..2926ee9c373e 100644 --- a/drivers/net/ethernet/intel/ice/ice_sched.h +++ b/drivers/net/ethernet/intel/ice/ice_sched.h @@ -21,8 +21,14 @@ struct ice_sched_agg_info { }; /* FW AQ command calls */ +enum ice_status ice_sched_init_port(struct ice_port_info *pi); enum ice_status ice_sched_query_res_alloc(struct ice_hw *hw); void ice_sched_cleanup_all(struct ice_hw *hw); +struct ice_sched_node * +ice_sched_find_node_by_teid(struct ice_sched_node *start_node, u32 teid); +enum ice_status +ice_sched_add_node(struct ice_port_info *pi, u8 layer, + struct ice_aqc_txsched_elem_data *info); void ice_free_sched_node(struct ice_port_info *pi, struct ice_sched_node *node); struct ice_sched_node *ice_sched_get_tc_node(struct ice_port_info *pi, u8 tc); #endif /* _ICE_SCHED_H_ */ diff --git a/drivers/net/ethernet/intel/ice/ice_status.h b/drivers/net/ethernet/intel/ice/ice_status.h index 7e8049efac1c..144712db1ad2 100644 --- a/drivers/net/ethernet/intel/ice/ice_status.h +++ b/drivers/net/ethernet/intel/ice/ice_status.h @@ -15,6 +15,7 @@ enum ice_status { ICE_ERR_NO_MEMORY = -11, ICE_ERR_CFG = -12, ICE_ERR_OUT_OF_RANGE = -13, + ICE_ERR_BUF_TOO_SHORT = -52, ICE_ERR_NVM_BLANK_MODE = -53, ICE_ERR_AQ_ERROR = -100, ICE_ERR_AQ_TIMEOUT = -101, diff --git a/drivers/net/ethernet/intel/ice/ice_type.h b/drivers/net/ethernet/intel/ice/ice_type.h index 0b71634668bb..6024c80bfa6b 100644 --- a/drivers/net/ethernet/intel/ice/ice_type.h +++ b/drivers/net/ethernet/intel/ice/ice_type.h @@ -12,6 +12,7 @@ /* debug masks - set these bits in hw->debug_mask to control output */ #define ICE_DBG_INIT BIT_ULL(1) #define ICE_DBG_NVM BIT_ULL(7) +#define ICE_DBG_LAN BIT_ULL(8) #define ICE_DBG_SW BIT_ULL(13) #define ICE_DBG_SCHED BIT_ULL(14) #define ICE_DBG_RES BIT_ULL(17) @@ -30,12 +31,56 @@ enum ice_aq_res_access_type { ICE_RES_WRITE }; +enum ice_fc_mode { + ICE_FC_NONE = 0, + ICE_FC_RX_PAUSE, + ICE_FC_TX_PAUSE, + ICE_FC_FULL, + ICE_FC_PFC, + ICE_FC_DFLT +}; + /* Various MAC types */ enum ice_mac_type { ICE_MAC_UNKNOWN = 0, ICE_MAC_GENERIC, }; +/* Media Types */ +enum ice_media_type { + ICE_MEDIA_UNKNOWN = 0, + ICE_MEDIA_FIBER, + ICE_MEDIA_BASET, + ICE_MEDIA_BACKPLANE, + ICE_MEDIA_DA, +}; + +struct ice_link_status { + /* Refer to ice_aq_phy_type for bits definition */ + u64 phy_type_low; + u16 max_frame_size; + u16 link_speed; + bool lse_ena; /* Link Status Event notification */ + u8 link_info; + u8 an_info; + u8 ext_info; + u8 pacing; + u8 req_speeds; + /* Refer to #define from module_type[ICE_MODULE_TYPE_TOTAL_BYTE] of + * ice_aqc_get_phy_caps structure + */ + u8 module_type[ICE_MODULE_TYPE_TOTAL_BYTE]; +}; + +/* PHY info such as phy_type, etc... */ +struct ice_phy_info { + struct ice_link_status link_info; + struct ice_link_status link_info_old; + u64 phy_type_low; + enum ice_media_type media_type; + bool get_link_info; +}; + /* Common HW capabilities for SW use */ struct ice_hw_common_caps { /* TX/RX queues */ @@ -68,6 +113,12 @@ struct ice_hw_dev_caps { u32 num_vsi_allocd_to_host; /* Excluding EMP VSI */ }; +/* MAC info */ +struct ice_mac_info { + u8 lan_addr[ETH_ALEN]; + u8 perm_addr[ETH_ALEN]; +}; + /* Various RESET request, These are not tied with HW reset types */ enum ice_reset_req { ICE_RESET_PFR = 0, @@ -81,6 +132,12 @@ struct ice_bus_info { u8 func; }; +/* Flow control (FC) parameters */ +struct ice_fc_info { + enum ice_fc_mode current_mode; /* FC mode in effect */ + enum ice_fc_mode req_mode; /* FC mode requested by caller */ +}; + /* NVM Information */ struct ice_nvm_info { u32 eetrack; /* NVM data version */ @@ -92,6 +149,7 @@ struct ice_nvm_info { /* Max number of port to queue branches w.r.t topology */ #define ICE_MAX_TRAFFIC_CLASS 8 +#define ICE_TXSCHED_MAX_BRANCHES ICE_MAX_TRAFFIC_CLASS struct ice_sched_node { struct ice_sched_node *parent; @@ -108,6 +166,9 @@ struct ice_sched_node { #define ICE_SCHED_NODE_OWNER_LAN 0 }; +/* Access Macros for Tx Sched Elements data */ +#define ICE_TXSCHED_GET_NODE_TEID(x) le32_to_cpu((x)->info.node_teid) + /* The aggregator type determines if identifier is for a VSI group, * aggregator group, aggregator of queues, or queue group. */ @@ -138,6 +199,7 @@ struct ice_sched_tx_policy { struct ice_port_info { struct ice_sched_node *root; /* Root Node per Port */ struct ice_hw *hw; /* back pointer to hw instance */ + u32 last_node_teid; /* scheduler last node info */ u16 sw_id; /* Initial switch ID belongs to port */ u16 pf_vf_num; u8 port_state; @@ -145,6 +207,9 @@ struct ice_port_info { #define ICE_SCHED_PORT_STATE_READY 0x1 u16 dflt_tx_vsi_num; u16 dflt_rx_vsi_num; + struct ice_fc_info fc; + struct ice_mac_info mac; + struct ice_phy_info phy; struct mutex sched_lock; /* protect access to TXSched tree */ struct ice_sched_tx_policy sched_policy; struct list_head vsi_info_list; From 940b61af02f497fcd911b9e2d75c6b8cf76b92fd Mon Sep 17 00:00:00 2001 From: Anirudh Venkataramanan Date: Tue, 20 Mar 2018 07:58:10 -0700 Subject: [PATCH 06/15] ice: Initialize PF and setup miscellaneous interrupt This patch continues the initialization flow as follows: 1) Allocate and initialize necessary fields (like vsi, num_alloc_vsi, irq_tracker, etc) in the ice_pf instance. 2) Setup the miscellaneous interrupt handler. This also known as the "other interrupt causes" (OIC) handler and is used to handle non hotpath interrupts (like control queue events, link events, exceptions, etc. 3) Implement a background task to process admin queue receive (ARQ) events received by the driver. CC: Shannon Nelson Signed-off-by: Anirudh Venkataramanan Acked-by: Shannon Nelson Tested-by: Tony Brelinski Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/ice/ice.h | 84 ++ .../net/ethernet/intel/ice/ice_adminq_cmd.h | 2 + drivers/net/ethernet/intel/ice/ice_common.c | 6 + drivers/net/ethernet/intel/ice/ice_common.h | 3 + drivers/net/ethernet/intel/ice/ice_controlq.c | 101 +++ drivers/net/ethernet/intel/ice/ice_controlq.h | 8 + .../net/ethernet/intel/ice/ice_hw_autogen.h | 63 ++ drivers/net/ethernet/intel/ice/ice_main.c | 719 +++++++++++++++++- drivers/net/ethernet/intel/ice/ice_txrx.h | 29 + drivers/net/ethernet/intel/ice/ice_type.h | 11 + 10 files changed, 1025 insertions(+), 1 deletion(-) create mode 100644 drivers/net/ethernet/intel/ice/ice_txrx.h diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h index 1a1776a2a5a1..8103e61e67f9 100644 --- a/drivers/net/ethernet/intel/ice/ice.h +++ b/drivers/net/ethernet/intel/ice/ice.h @@ -12,29 +12,113 @@ #include #include #include +#include #include +#include +#include #include #include +#include #include "ice_devids.h" #include "ice_type.h" +#include "ice_txrx.h" #include "ice_switch.h" #include "ice_common.h" #include "ice_sched.h" #define ICE_BAR0 0 +#define ICE_INT_NAME_STR_LEN (IFNAMSIZ + 16) #define ICE_AQ_LEN 64 +#define ICE_MIN_MSIX 2 +#define ICE_MAX_VSI_ALLOC 130 +#define ICE_MAX_TXQS 2048 +#define ICE_MAX_RXQS 2048 +#define ICE_RES_VALID_BIT 0x8000 +#define ICE_RES_MISC_VEC_ID (ICE_RES_VALID_BIT - 1) #define ICE_DFLT_NETIF_M (NETIF_MSG_DRV | NETIF_MSG_PROBE | NETIF_MSG_LINK) +struct ice_res_tracker { + u16 num_entries; + u16 search_hint; + u16 list[1]; +}; + +struct ice_sw { + struct ice_pf *pf; + u16 sw_id; /* switch ID for this switch */ + u16 bridge_mode; /* VEB/VEPA/Port Virtualizer */ +}; + enum ice_state { __ICE_DOWN, + __ICE_PFR_REQ, /* set by driver and peers */ + __ICE_ADMINQ_EVENT_PENDING, + __ICE_SERVICE_SCHED, __ICE_STATE_NBITS /* must be last */ }; +/* struct that defines a VSI, associated with a dev */ +struct ice_vsi { + struct net_device *netdev; + struct ice_port_info *port_info; /* back pointer to port_info */ + u16 vsi_num; /* HW (absolute) index of this VSI */ +} ____cacheline_internodealigned_in_smp; + +enum ice_pf_flags { + ICE_FLAG_MSIX_ENA, + ICE_FLAG_FLTR_SYNC, + ICE_FLAG_RSS_ENA, + ICE_PF_FLAGS_NBITS /* must be last */ +}; + struct ice_pf { struct pci_dev *pdev; + struct msix_entry *msix_entries; + struct ice_res_tracker *irq_tracker; + struct ice_vsi **vsi; /* VSIs created by the driver */ + struct ice_sw *first_sw; /* first switch created by firmware */ DECLARE_BITMAP(state, __ICE_STATE_NBITS); + DECLARE_BITMAP(avail_txqs, ICE_MAX_TXQS); + DECLARE_BITMAP(avail_rxqs, ICE_MAX_RXQS); + DECLARE_BITMAP(flags, ICE_PF_FLAGS_NBITS); + unsigned long serv_tmr_period; + unsigned long serv_tmr_prev; + struct timer_list serv_tmr; + struct work_struct serv_task; + struct mutex avail_q_mutex; /* protects access to avail_[rx|tx]qs */ + struct mutex sw_mutex; /* lock for protecting VSI alloc flow */ u32 msg_enable; + u32 oicr_idx; /* Other interrupt cause vector index */ + u32 num_lan_msix; /* Total MSIX vectors for base driver */ + u32 num_avail_msix; /* remaining MSIX vectors left unclaimed */ + u16 num_lan_tx; /* num lan tx queues setup */ + u16 num_lan_rx; /* num lan rx queues setup */ + u16 q_left_tx; /* remaining num tx queues left unclaimed */ + u16 q_left_rx; /* remaining num rx queues left unclaimed */ + u16 next_vsi; /* Next free slot in pf->vsi[] - 0-based! */ + u16 num_alloc_vsi; + struct ice_hw hw; + char int_name[ICE_INT_NAME_STR_LEN]; }; + +/** + * ice_irq_dynamic_ena - Enable default interrupt generation settings + * @hw: pointer to hw struct + */ +static inline void ice_irq_dynamic_ena(struct ice_hw *hw) +{ + u32 vector = ((struct ice_pf *)hw->back)->oicr_idx; + int itr = ICE_ITR_NONE; + u32 val; + + /* clear the PBA here, as this function is meant to clean out all + * previous interrupts and enable the interrupt + */ + val = GLINT_DYN_CTL_INTENA_M | GLINT_DYN_CTL_CLEARPBA_M | + (itr << GLINT_DYN_CTL_ITR_INDX_S); + + wr32(hw, GLINT_DYN_CTL(vector), val); +} #endif /* _ICE_H_ */ diff --git a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h index 5061240996b1..f0837e277b2f 100644 --- a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h +++ b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h @@ -583,11 +583,13 @@ struct ice_aq_desc { /* FW defined boundary for a large buffer, 4k >= Large buffer > 512 bytes */ #define ICE_AQ_LG_BUF 512 +#define ICE_AQ_FLAG_ERR_S 2 #define ICE_AQ_FLAG_LB_S 9 #define ICE_AQ_FLAG_RD_S 10 #define ICE_AQ_FLAG_BUF_S 12 #define ICE_AQ_FLAG_SI_S 13 +#define ICE_AQ_FLAG_ERR BIT(ICE_AQ_FLAG_ERR_S) /* 0x4 */ #define ICE_AQ_FLAG_LB BIT(ICE_AQ_FLAG_LB_S) /* 0x200 */ #define ICE_AQ_FLAG_RD BIT(ICE_AQ_FLAG_RD_S) /* 0x400 */ #define ICE_AQ_FLAG_BUF BIT(ICE_AQ_FLAG_BUF_S) /* 0x1000 */ diff --git a/drivers/net/ethernet/intel/ice/ice_common.c b/drivers/net/ethernet/intel/ice/ice_common.c index ef5bab25c2f5..a4ce8a87fb0d 100644 --- a/drivers/net/ethernet/intel/ice/ice_common.c +++ b/drivers/net/ethernet/intel/ice/ice_common.c @@ -282,6 +282,12 @@ enum ice_status ice_init_hw(struct ice_hw *hw) if (status) return status; + /* set these values to minimum allowed */ + hw->itr_gran_200 = ICE_ITR_GRAN_MIN_200; + hw->itr_gran_100 = ICE_ITR_GRAN_MIN_100; + hw->itr_gran_50 = ICE_ITR_GRAN_MIN_50; + hw->itr_gran_25 = ICE_ITR_GRAN_MIN_25; + status = ice_init_all_ctrlq(hw); if (status) goto err_unroll_cqinit; diff --git a/drivers/net/ethernet/intel/ice/ice_common.h b/drivers/net/ethernet/intel/ice/ice_common.h index e0a7aa1dd943..dd4473e84ebb 100644 --- a/drivers/net/ethernet/intel/ice/ice_common.h +++ b/drivers/net/ethernet/intel/ice/ice_common.h @@ -17,6 +17,9 @@ enum ice_status ice_reset(struct ice_hw *hw, enum ice_reset_req req); enum ice_status ice_init_all_ctrlq(struct ice_hw *hw); void ice_shutdown_all_ctrlq(struct ice_hw *hw); enum ice_status +ice_clean_rq_elem(struct ice_hw *hw, struct ice_ctl_q_info *cq, + struct ice_rq_event_info *e, u16 *pending); +enum ice_status ice_acquire_res(struct ice_hw *hw, enum ice_aq_res_ids res, enum ice_aq_res_access_type access); void ice_release_res(struct ice_hw *hw, enum ice_aq_res_ids res); diff --git a/drivers/net/ethernet/intel/ice/ice_controlq.c b/drivers/net/ethernet/intel/ice/ice_controlq.c index b32c0fc04bc9..5909a4407e38 100644 --- a/drivers/net/ethernet/intel/ice/ice_controlq.c +++ b/drivers/net/ethernet/intel/ice/ice_controlq.c @@ -963,3 +963,104 @@ void ice_fill_dflt_direct_cmd_desc(struct ice_aq_desc *desc, u16 opcode) desc->opcode = cpu_to_le16(opcode); desc->flags = cpu_to_le16(ICE_AQ_FLAG_SI); } + +/** + * ice_clean_rq_elem + * @hw: pointer to the hw struct + * @cq: pointer to the specific Control queue + * @e: event info from the receive descriptor, includes any buffers + * @pending: number of events that could be left to process + * + * This function cleans one Admin Receive Queue element and returns + * the contents through e. It can also return how many events are + * left to process through 'pending'. + */ +enum ice_status +ice_clean_rq_elem(struct ice_hw *hw, struct ice_ctl_q_info *cq, + struct ice_rq_event_info *e, u16 *pending) +{ + u16 ntc = cq->rq.next_to_clean; + enum ice_status ret_code = 0; + struct ice_aq_desc *desc; + struct ice_dma_mem *bi; + u16 desc_idx; + u16 datalen; + u16 flags; + u16 ntu; + + /* pre-clean the event info */ + memset(&e->desc, 0, sizeof(e->desc)); + + /* take the lock before we start messing with the ring */ + mutex_lock(&cq->rq_lock); + + if (!cq->rq.count) { + ice_debug(hw, ICE_DBG_AQ_MSG, + "Control Receive queue not initialized.\n"); + ret_code = ICE_ERR_AQ_EMPTY; + goto clean_rq_elem_err; + } + + /* set next_to_use to head */ + ntu = (u16)(rd32(hw, cq->rq.head) & cq->rq.head_mask); + + if (ntu == ntc) { + /* nothing to do - shouldn't need to update ring's values */ + ret_code = ICE_ERR_AQ_NO_WORK; + goto clean_rq_elem_out; + } + + /* now clean the next descriptor */ + desc = ICE_CTL_Q_DESC(cq->rq, ntc); + desc_idx = ntc; + + flags = le16_to_cpu(desc->flags); + if (flags & ICE_AQ_FLAG_ERR) { + ret_code = ICE_ERR_AQ_ERROR; + cq->rq_last_status = (enum ice_aq_err)le16_to_cpu(desc->retval); + ice_debug(hw, ICE_DBG_AQ_MSG, + "Control Receive Queue Event received with error 0x%x\n", + cq->rq_last_status); + } + memcpy(&e->desc, desc, sizeof(e->desc)); + datalen = le16_to_cpu(desc->datalen); + e->msg_len = min(datalen, e->buf_len); + if (e->msg_buf && e->msg_len) + memcpy(e->msg_buf, cq->rq.r.rq_bi[desc_idx].va, e->msg_len); + + ice_debug(hw, ICE_DBG_AQ_MSG, "ARQ: desc and buffer:\n"); + + ice_debug_cq(hw, ICE_DBG_AQ_CMD, (void *)desc, e->msg_buf, + cq->rq_buf_size); + + /* Restore the original datalen and buffer address in the desc, + * FW updates datalen to indicate the event message size + */ + bi = &cq->rq.r.rq_bi[ntc]; + memset(desc, 0, sizeof(*desc)); + + desc->flags = cpu_to_le16(ICE_AQ_FLAG_BUF); + if (cq->rq_buf_size > ICE_AQ_LG_BUF) + desc->flags |= cpu_to_le16(ICE_AQ_FLAG_LB); + desc->datalen = cpu_to_le16(bi->size); + desc->params.generic.addr_high = cpu_to_le32(upper_32_bits(bi->pa)); + desc->params.generic.addr_low = cpu_to_le32(lower_32_bits(bi->pa)); + + /* set tail = the last cleaned desc index. */ + wr32(hw, cq->rq.tail, ntc); + /* ntc is updated to tail + 1 */ + ntc++; + if (ntc == cq->num_rq_entries) + ntc = 0; + cq->rq.next_to_clean = ntc; + cq->rq.next_to_use = ntu; + +clean_rq_elem_out: + /* Set pending if needed, unlock and return */ + if (pending) + *pending = (u16)((ntc > ntu ? cq->rq.count : 0) + (ntu - ntc)); +clean_rq_elem_err: + mutex_unlock(&cq->rq_lock); + + return ret_code; +} diff --git a/drivers/net/ethernet/intel/ice/ice_controlq.h b/drivers/net/ethernet/intel/ice/ice_controlq.h index b5306e8a5a0d..ea02b89243e2 100644 --- a/drivers/net/ethernet/intel/ice/ice_controlq.h +++ b/drivers/net/ethernet/intel/ice/ice_controlq.h @@ -67,6 +67,14 @@ struct ice_sq_cd { #define ICE_CTL_Q_DETAILS(R, i) (&(((struct ice_sq_cd *)((R).cmd_buf))[i])) +/* rq event information */ +struct ice_rq_event_info { + struct ice_aq_desc desc; + u16 msg_len; + u16 buf_len; + u8 *msg_buf; +}; + /* Control Queue information */ struct ice_ctl_q_info { enum ice_ctl_q qtype; diff --git a/drivers/net/ethernet/intel/ice/ice_hw_autogen.h b/drivers/net/ethernet/intel/ice/ice_hw_autogen.h index 893d5e967e66..446a8bbef488 100644 --- a/drivers/net/ethernet/intel/ice/ice_hw_autogen.h +++ b/drivers/net/ethernet/intel/ice/ice_hw_autogen.h @@ -14,6 +14,12 @@ #define PF_FW_ARQLEN 0x00080280 #define PF_FW_ARQLEN_ARQLEN_S 0 #define PF_FW_ARQLEN_ARQLEN_M ICE_M(0x3FF, PF_FW_ARQLEN_ARQLEN_S) +#define PF_FW_ARQLEN_ARQVFE_S 28 +#define PF_FW_ARQLEN_ARQVFE_M BIT(PF_FW_ARQLEN_ARQVFE_S) +#define PF_FW_ARQLEN_ARQOVFL_S 29 +#define PF_FW_ARQLEN_ARQOVFL_M BIT(PF_FW_ARQLEN_ARQOVFL_S) +#define PF_FW_ARQLEN_ARQCRIT_S 30 +#define PF_FW_ARQLEN_ARQCRIT_M BIT(PF_FW_ARQLEN_ARQCRIT_S) #define PF_FW_ARQLEN_ARQENABLE_S 31 #define PF_FW_ARQLEN_ARQENABLE_M BIT(PF_FW_ARQLEN_ARQENABLE_S) #define PF_FW_ARQT 0x00080480 @@ -25,6 +31,12 @@ #define PF_FW_ATQLEN 0x00080200 #define PF_FW_ATQLEN_ATQLEN_S 0 #define PF_FW_ATQLEN_ATQLEN_M ICE_M(0x3FF, PF_FW_ATQLEN_ATQLEN_S) +#define PF_FW_ATQLEN_ATQVFE_S 28 +#define PF_FW_ATQLEN_ATQVFE_M BIT(PF_FW_ATQLEN_ATQVFE_S) +#define PF_FW_ATQLEN_ATQOVFL_S 29 +#define PF_FW_ATQLEN_ATQOVFL_M BIT(PF_FW_ATQLEN_ATQOVFL_S) +#define PF_FW_ATQLEN_ATQCRIT_S 30 +#define PF_FW_ATQLEN_ATQCRIT_M BIT(PF_FW_ATQLEN_ATQCRIT_S) #define PF_FW_ATQLEN_ATQENABLE_S 31 #define PF_FW_ATQLEN_ATQENABLE_M BIT(PF_FW_ATQLEN_ATQENABLE_S) #define PF_FW_ATQT 0x00080400 @@ -43,6 +55,57 @@ #define PFGEN_CTRL 0x00091000 #define PFGEN_CTRL_PFSWR_S 0 #define PFGEN_CTRL_PFSWR_M BIT(PFGEN_CTRL_PFSWR_S) +#define PFHMC_ERRORDATA 0x00520500 +#define PFHMC_ERRORINFO 0x00520400 +#define GLINT_DYN_CTL(_INT) (0x00160000 + ((_INT) * 4)) +#define GLINT_DYN_CTL_INTENA_S 0 +#define GLINT_DYN_CTL_INTENA_M BIT(GLINT_DYN_CTL_INTENA_S) +#define GLINT_DYN_CTL_CLEARPBA_S 1 +#define GLINT_DYN_CTL_CLEARPBA_M BIT(GLINT_DYN_CTL_CLEARPBA_S) +#define GLINT_DYN_CTL_ITR_INDX_S 3 +#define GLINT_DYN_CTL_SW_ITR_INDX_S 25 +#define GLINT_DYN_CTL_SW_ITR_INDX_M ICE_M(0x3, GLINT_DYN_CTL_SW_ITR_INDX_S) +#define GLINT_DYN_CTL_INTENA_MSK_S 31 +#define GLINT_DYN_CTL_INTENA_MSK_M BIT(GLINT_DYN_CTL_INTENA_MSK_S) +#define GLINT_ITR(_i, _INT) (0x00154000 + ((_i) * 8192 + (_INT) * 4)) +#define PFINT_FW_CTL 0x0016C800 +#define PFINT_FW_CTL_MSIX_INDX_S 0 +#define PFINT_FW_CTL_MSIX_INDX_M ICE_M(0x7FF, PFINT_FW_CTL_MSIX_INDX_S) +#define PFINT_FW_CTL_ITR_INDX_S 11 +#define PFINT_FW_CTL_ITR_INDX_M ICE_M(0x3, PFINT_FW_CTL_ITR_INDX_S) +#define PFINT_FW_CTL_CAUSE_ENA_S 30 +#define PFINT_FW_CTL_CAUSE_ENA_M BIT(PFINT_FW_CTL_CAUSE_ENA_S) +#define PFINT_OICR 0x0016CA00 +#define PFINT_OICR_INTEVENT_S 0 +#define PFINT_OICR_INTEVENT_M BIT(PFINT_OICR_INTEVENT_S) +#define PFINT_OICR_HLP_RDY_S 14 +#define PFINT_OICR_HLP_RDY_M BIT(PFINT_OICR_HLP_RDY_S) +#define PFINT_OICR_CPM_RDY_S 15 +#define PFINT_OICR_CPM_RDY_M BIT(PFINT_OICR_CPM_RDY_S) +#define PFINT_OICR_ECC_ERR_S 16 +#define PFINT_OICR_ECC_ERR_M BIT(PFINT_OICR_ECC_ERR_S) +#define PFINT_OICR_MAL_DETECT_S 19 +#define PFINT_OICR_MAL_DETECT_M BIT(PFINT_OICR_MAL_DETECT_S) +#define PFINT_OICR_GRST_S 20 +#define PFINT_OICR_GRST_M BIT(PFINT_OICR_GRST_S) +#define PFINT_OICR_PCI_EXCEPTION_S 21 +#define PFINT_OICR_PCI_EXCEPTION_M BIT(PFINT_OICR_PCI_EXCEPTION_S) +#define PFINT_OICR_GPIO_S 22 +#define PFINT_OICR_GPIO_M BIT(PFINT_OICR_GPIO_S) +#define PFINT_OICR_STORM_DETECT_S 24 +#define PFINT_OICR_STORM_DETECT_M BIT(PFINT_OICR_STORM_DETECT_S) +#define PFINT_OICR_HMC_ERR_S 26 +#define PFINT_OICR_HMC_ERR_M BIT(PFINT_OICR_HMC_ERR_S) +#define PFINT_OICR_PE_CRITERR_S 28 +#define PFINT_OICR_PE_CRITERR_M BIT(PFINT_OICR_PE_CRITERR_S) +#define PFINT_OICR_CTL 0x0016CA80 +#define PFINT_OICR_CTL_MSIX_INDX_S 0 +#define PFINT_OICR_CTL_MSIX_INDX_M ICE_M(0x7FF, PFINT_OICR_CTL_MSIX_INDX_S) +#define PFINT_OICR_CTL_ITR_INDX_S 11 +#define PFINT_OICR_CTL_ITR_INDX_M ICE_M(0x3, PFINT_OICR_CTL_ITR_INDX_S) +#define PFINT_OICR_CTL_CAUSE_ENA_S 30 +#define PFINT_OICR_CTL_CAUSE_ENA_M BIT(PFINT_OICR_CTL_CAUSE_ENA_S) +#define PFINT_OICR_ENA 0x0016C900 #define GLLAN_RCTL_0 0x002941F8 #define GLNVM_FLA 0x000B6108 #define GLNVM_FLA_LOCKED_S 6 diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index 953be26054ca..d93eaae5dc60 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -26,6 +26,294 @@ MODULE_PARM_DESC(debug, "netif level (0=none,...,16=all), hw debug_mask (0x8XXXX MODULE_PARM_DESC(debug, "netif level (0=none,...,16=all)"); #endif /* !CONFIG_DYNAMIC_DEBUG */ +static struct workqueue_struct *ice_wq; + +/** + * ice_search_res - Search the tracker for a block of resources + * @res: pointer to the resource + * @needed: size of the block needed + * @id: identifier to track owner + * Returns the base item index of the block, or -ENOMEM for error + */ +static int ice_search_res(struct ice_res_tracker *res, u16 needed, u16 id) +{ + int start = res->search_hint; + int end = start; + + id |= ICE_RES_VALID_BIT; + + do { + /* skip already allocated entries */ + if (res->list[end++] & ICE_RES_VALID_BIT) { + start = end; + if ((start + needed) > res->num_entries) + break; + } + + if (end == (start + needed)) { + int i = start; + + /* there was enough, so assign it to the requestor */ + while (i != end) + res->list[i++] = id; + + if (end == res->num_entries) + end = 0; + + res->search_hint = end; + return start; + } + } while (1); + + return -ENOMEM; +} + +/** + * ice_get_res - get a block of resources + * @pf: board private structure + * @res: pointer to the resource + * @needed: size of the block needed + * @id: identifier to track owner + * + * Returns the base item index of the block, or -ENOMEM for error + * The search_hint trick and lack of advanced fit-finding only works + * because we're highly likely to have all the same sized requests. + * Linear search time and any fragmentation should be minimal. + */ +static int +ice_get_res(struct ice_pf *pf, struct ice_res_tracker *res, u16 needed, u16 id) +{ + int ret; + + if (!res || !pf) + return -EINVAL; + + if (!needed || needed > res->num_entries || id >= ICE_RES_VALID_BIT) { + dev_err(&pf->pdev->dev, + "param err: needed=%d, num_entries = %d id=0x%04x\n", + needed, res->num_entries, id); + return -EINVAL; + } + + /* search based on search_hint */ + ret = ice_search_res(res, needed, id); + + if (ret < 0) { + /* previous search failed. Reset search hint and try again */ + res->search_hint = 0; + ret = ice_search_res(res, needed, id); + } + + return ret; +} + +/** + * ice_free_res - free a block of resources + * @res: pointer to the resource + * @index: starting index previously returned by ice_get_res + * @id: identifier to track owner + * Returns number of resources freed + */ +static int ice_free_res(struct ice_res_tracker *res, u16 index, u16 id) +{ + int count = 0; + int i; + + if (!res || index >= res->num_entries) + return -EINVAL; + + id |= ICE_RES_VALID_BIT; + for (i = index; i < res->num_entries && res->list[i] == id; i++) { + res->list[i] = 0; + count++; + } + + return count; +} + +/** + * __ice_clean_ctrlq - helper function to clean controlq rings + * @pf: ptr to struct ice_pf + * @q_type: specific Control queue type + */ +static int __ice_clean_ctrlq(struct ice_pf *pf, enum ice_ctl_q q_type) +{ + struct ice_rq_event_info event; + struct ice_hw *hw = &pf->hw; + struct ice_ctl_q_info *cq; + u16 pending, i = 0; + const char *qtype; + u32 oldval, val; + + switch (q_type) { + case ICE_CTL_Q_ADMIN: + cq = &hw->adminq; + qtype = "Admin"; + break; + default: + dev_warn(&pf->pdev->dev, "Unknown control queue type 0x%x\n", + q_type); + return 0; + } + + /* check for error indications - PF_xx_AxQLEN register layout for + * FW/MBX/SB are identical so just use defines for PF_FW_AxQLEN. + */ + val = rd32(hw, cq->rq.len); + if (val & (PF_FW_ARQLEN_ARQVFE_M | PF_FW_ARQLEN_ARQOVFL_M | + PF_FW_ARQLEN_ARQCRIT_M)) { + oldval = val; + if (val & PF_FW_ARQLEN_ARQVFE_M) + dev_dbg(&pf->pdev->dev, + "%s Receive Queue VF Error detected\n", qtype); + if (val & PF_FW_ARQLEN_ARQOVFL_M) { + dev_dbg(&pf->pdev->dev, + "%s Receive Queue Overflow Error detected\n", + qtype); + } + if (val & PF_FW_ARQLEN_ARQCRIT_M) + dev_dbg(&pf->pdev->dev, + "%s Receive Queue Critical Error detected\n", + qtype); + val &= ~(PF_FW_ARQLEN_ARQVFE_M | PF_FW_ARQLEN_ARQOVFL_M | + PF_FW_ARQLEN_ARQCRIT_M); + if (oldval != val) + wr32(hw, cq->rq.len, val); + } + + val = rd32(hw, cq->sq.len); + if (val & (PF_FW_ATQLEN_ATQVFE_M | PF_FW_ATQLEN_ATQOVFL_M | + PF_FW_ATQLEN_ATQCRIT_M)) { + oldval = val; + if (val & PF_FW_ATQLEN_ATQVFE_M) + dev_dbg(&pf->pdev->dev, + "%s Send Queue VF Error detected\n", qtype); + if (val & PF_FW_ATQLEN_ATQOVFL_M) { + dev_dbg(&pf->pdev->dev, + "%s Send Queue Overflow Error detected\n", + qtype); + } + if (val & PF_FW_ATQLEN_ATQCRIT_M) + dev_dbg(&pf->pdev->dev, + "%s Send Queue Critical Error detected\n", + qtype); + val &= ~(PF_FW_ATQLEN_ATQVFE_M | PF_FW_ATQLEN_ATQOVFL_M | + PF_FW_ATQLEN_ATQCRIT_M); + if (oldval != val) + wr32(hw, cq->sq.len, val); + } + + event.buf_len = cq->rq_buf_size; + event.msg_buf = devm_kzalloc(&pf->pdev->dev, event.buf_len, + GFP_KERNEL); + if (!event.msg_buf) + return 0; + + do { + enum ice_status ret; + + ret = ice_clean_rq_elem(hw, cq, &event, &pending); + if (ret == ICE_ERR_AQ_NO_WORK) + break; + if (ret) { + dev_err(&pf->pdev->dev, + "%s Receive Queue event error %d\n", qtype, + ret); + break; + } + } while (pending && (i++ < ICE_DFLT_IRQ_WORK)); + + devm_kfree(&pf->pdev->dev, event.msg_buf); + + return pending && (i == ICE_DFLT_IRQ_WORK); +} + +/** + * ice_clean_adminq_subtask - clean the AdminQ rings + * @pf: board private structure + */ +static void ice_clean_adminq_subtask(struct ice_pf *pf) +{ + struct ice_hw *hw = &pf->hw; + u32 val; + + if (!test_bit(__ICE_ADMINQ_EVENT_PENDING, pf->state)) + return; + + if (__ice_clean_ctrlq(pf, ICE_CTL_Q_ADMIN)) + return; + + clear_bit(__ICE_ADMINQ_EVENT_PENDING, pf->state); + + /* re-enable Admin queue interrupt causes */ + val = rd32(hw, PFINT_FW_CTL); + wr32(hw, PFINT_FW_CTL, (val | PFINT_FW_CTL_CAUSE_ENA_M)); + + ice_flush(hw); +} + +/** + * ice_service_task_schedule - schedule the service task to wake up + * @pf: board private structure + * + * If not already scheduled, this puts the task into the work queue. + */ +static void ice_service_task_schedule(struct ice_pf *pf) +{ + if (!test_bit(__ICE_DOWN, pf->state) && + !test_and_set_bit(__ICE_SERVICE_SCHED, pf->state)) + queue_work(ice_wq, &pf->serv_task); +} + +/** + * ice_service_task_complete - finish up the service task + * @pf: board private structure + */ +static void ice_service_task_complete(struct ice_pf *pf) +{ + WARN_ON(!test_bit(__ICE_SERVICE_SCHED, pf->state)); + + /* force memory (pf->state) to sync before next service task */ + smp_mb__before_atomic(); + clear_bit(__ICE_SERVICE_SCHED, pf->state); +} + +/** + * ice_service_timer - timer callback to schedule service task + * @t: pointer to timer_list + */ +static void ice_service_timer(struct timer_list *t) +{ + struct ice_pf *pf = from_timer(pf, t, serv_tmr); + + mod_timer(&pf->serv_tmr, round_jiffies(pf->serv_tmr_period + jiffies)); + ice_service_task_schedule(pf); +} + +/** + * ice_service_task - manage and run subtasks + * @work: pointer to work_struct contained by the PF struct + */ +static void ice_service_task(struct work_struct *work) +{ + struct ice_pf *pf = container_of(work, struct ice_pf, serv_task); + unsigned long start_time = jiffies; + + /* subtasks */ + ice_clean_adminq_subtask(pf); + + /* Clear __ICE_SERVICE_SCHED flag to allow scheduling next event */ + ice_service_task_complete(pf); + + /* If the tasks have taken longer than one service timer period + * or there is more work to be done, reset the service timer to + * schedule the service task now. + */ + if (time_after(jiffies, (start_time + pf->serv_tmr_period)) || + test_bit(__ICE_ADMINQ_EVENT_PENDING, pf->state)) + mod_timer(&pf->serv_tmr, jiffies); +} + /** * ice_set_ctrlq_len - helper function to set controlq length * @hw: pointer to the hw instance @@ -38,6 +326,361 @@ static void ice_set_ctrlq_len(struct ice_hw *hw) hw->adminq.sq_buf_size = ICE_AQ_MAX_BUF_LEN; } +/** + * ice_ena_misc_vector - enable the non-queue interrupts + * @pf: board private structure + */ +static void ice_ena_misc_vector(struct ice_pf *pf) +{ + struct ice_hw *hw = &pf->hw; + u32 val; + + /* clear things first */ + wr32(hw, PFINT_OICR_ENA, 0); /* disable all */ + rd32(hw, PFINT_OICR); /* read to clear */ + + val = (PFINT_OICR_HLP_RDY_M | + PFINT_OICR_CPM_RDY_M | + PFINT_OICR_ECC_ERR_M | + PFINT_OICR_MAL_DETECT_M | + PFINT_OICR_GRST_M | + PFINT_OICR_PCI_EXCEPTION_M | + PFINT_OICR_GPIO_M | + PFINT_OICR_STORM_DETECT_M | + PFINT_OICR_HMC_ERR_M); + + wr32(hw, PFINT_OICR_ENA, val); + + /* SW_ITR_IDX = 0, but don't change INTENA */ + wr32(hw, GLINT_DYN_CTL(pf->oicr_idx), + GLINT_DYN_CTL_SW_ITR_INDX_M | GLINT_DYN_CTL_INTENA_MSK_M); +} + +/** + * ice_misc_intr - misc interrupt handler + * @irq: interrupt number + * @data: pointer to a q_vector + */ +static irqreturn_t ice_misc_intr(int __always_unused irq, void *data) +{ + struct ice_pf *pf = (struct ice_pf *)data; + struct ice_hw *hw = &pf->hw; + irqreturn_t ret = IRQ_NONE; + u32 oicr, ena_mask; + + set_bit(__ICE_ADMINQ_EVENT_PENDING, pf->state); + + oicr = rd32(hw, PFINT_OICR); + ena_mask = rd32(hw, PFINT_OICR_ENA); + + if (!(oicr & PFINT_OICR_INTEVENT_M)) + goto ena_intr; + + if (oicr & PFINT_OICR_HMC_ERR_M) { + ena_mask &= ~PFINT_OICR_HMC_ERR_M; + dev_dbg(&pf->pdev->dev, + "HMC Error interrupt - info 0x%x, data 0x%x\n", + rd32(hw, PFHMC_ERRORINFO), + rd32(hw, PFHMC_ERRORDATA)); + } + + /* Report and mask off any remaining unexpected interrupts */ + oicr &= ena_mask; + if (oicr) { + dev_dbg(&pf->pdev->dev, "unhandled interrupt oicr=0x%08x\n", + oicr); + /* If a critical error is pending there is no choice but to + * reset the device. + */ + if (oicr & (PFINT_OICR_PE_CRITERR_M | + PFINT_OICR_PCI_EXCEPTION_M | + PFINT_OICR_ECC_ERR_M)) + set_bit(__ICE_PFR_REQ, pf->state); + + ena_mask &= ~oicr; + } + ret = IRQ_HANDLED; + +ena_intr: + /* re-enable interrupt causes that are not handled during this pass */ + wr32(hw, PFINT_OICR_ENA, ena_mask); + if (!test_bit(__ICE_DOWN, pf->state)) { + ice_service_task_schedule(pf); + ice_irq_dynamic_ena(hw); + } + + return ret; +} + +/** + * ice_free_irq_msix_misc - Unroll misc vector setup + * @pf: board private structure + */ +static void ice_free_irq_msix_misc(struct ice_pf *pf) +{ + /* disable OICR interrupt */ + wr32(&pf->hw, PFINT_OICR_ENA, 0); + ice_flush(&pf->hw); + + if (test_bit(ICE_FLAG_MSIX_ENA, pf->flags) && pf->msix_entries) { + synchronize_irq(pf->msix_entries[pf->oicr_idx].vector); + devm_free_irq(&pf->pdev->dev, + pf->msix_entries[pf->oicr_idx].vector, pf); + } + + ice_free_res(pf->irq_tracker, pf->oicr_idx, ICE_RES_MISC_VEC_ID); +} + +/** + * ice_req_irq_msix_misc - Setup the misc vector to handle non queue events + * @pf: board private structure + * + * This sets up the handler for MSIX 0, which is used to manage the + * non-queue interrupts, e.g. AdminQ and errors. This is not used + * when in MSI or Legacy interrupt mode. + */ +static int ice_req_irq_msix_misc(struct ice_pf *pf) +{ + struct ice_hw *hw = &pf->hw; + int oicr_idx, err = 0; + u8 itr_gran; + u32 val; + + if (!pf->int_name[0]) + snprintf(pf->int_name, sizeof(pf->int_name) - 1, "%s-%s:misc", + dev_driver_string(&pf->pdev->dev), + dev_name(&pf->pdev->dev)); + + /* reserve one vector in irq_tracker for misc interrupts */ + oicr_idx = ice_get_res(pf, pf->irq_tracker, 1, ICE_RES_MISC_VEC_ID); + if (oicr_idx < 0) + return oicr_idx; + + pf->oicr_idx = oicr_idx; + + err = devm_request_irq(&pf->pdev->dev, + pf->msix_entries[pf->oicr_idx].vector, + ice_misc_intr, 0, pf->int_name, pf); + if (err) { + dev_err(&pf->pdev->dev, + "devm_request_irq for %s failed: %d\n", + pf->int_name, err); + ice_free_res(pf->irq_tracker, 1, ICE_RES_MISC_VEC_ID); + return err; + } + + ice_ena_misc_vector(pf); + + val = (pf->oicr_idx & PFINT_OICR_CTL_MSIX_INDX_M) | + (ICE_RX_ITR & PFINT_OICR_CTL_ITR_INDX_M) | + PFINT_OICR_CTL_CAUSE_ENA_M; + wr32(hw, PFINT_OICR_CTL, val); + + /* This enables Admin queue Interrupt causes */ + val = (pf->oicr_idx & PFINT_FW_CTL_MSIX_INDX_M) | + (ICE_RX_ITR & PFINT_FW_CTL_ITR_INDX_M) | + PFINT_FW_CTL_CAUSE_ENA_M; + wr32(hw, PFINT_FW_CTL, val); + + itr_gran = hw->itr_gran_200; + + wr32(hw, GLINT_ITR(ICE_RX_ITR, pf->oicr_idx), + ITR_TO_REG(ICE_ITR_8K, itr_gran)); + + ice_flush(hw); + ice_irq_dynamic_ena(hw); + + return 0; +} + +/** + * ice_determine_q_usage - Calculate queue distribution + * @pf: board private structure + * + * Return -ENOMEM if we don't get enough queues for all ports + */ +static void ice_determine_q_usage(struct ice_pf *pf) +{ + u16 q_left_tx, q_left_rx; + + q_left_tx = pf->hw.func_caps.common_cap.num_txq; + q_left_rx = pf->hw.func_caps.common_cap.num_rxq; + + /* initial support for only 1 tx and 1 rx queue */ + pf->num_lan_tx = 1; + pf->num_lan_rx = 1; + + pf->q_left_tx = q_left_tx - pf->num_lan_tx; + pf->q_left_rx = q_left_rx - pf->num_lan_rx; +} + +/** + * ice_deinit_pf - Unrolls initialziations done by ice_init_pf + * @pf: board private structure to initialize + */ +static void ice_deinit_pf(struct ice_pf *pf) +{ + if (pf->serv_tmr.function) + del_timer_sync(&pf->serv_tmr); + if (pf->serv_task.func) + cancel_work_sync(&pf->serv_task); + mutex_destroy(&pf->sw_mutex); + mutex_destroy(&pf->avail_q_mutex); +} + +/** + * ice_init_pf - Initialize general software structures (struct ice_pf) + * @pf: board private structure to initialize + */ +static void ice_init_pf(struct ice_pf *pf) +{ + bitmap_zero(pf->flags, ICE_PF_FLAGS_NBITS); + set_bit(ICE_FLAG_MSIX_ENA, pf->flags); + + mutex_init(&pf->sw_mutex); + mutex_init(&pf->avail_q_mutex); + + /* Clear avail_[t|r]x_qs bitmaps (set all to avail) */ + mutex_lock(&pf->avail_q_mutex); + bitmap_zero(pf->avail_txqs, ICE_MAX_TXQS); + bitmap_zero(pf->avail_rxqs, ICE_MAX_RXQS); + mutex_unlock(&pf->avail_q_mutex); + + /* setup service timer and periodic service task */ + timer_setup(&pf->serv_tmr, ice_service_timer, 0); + pf->serv_tmr_period = HZ; + INIT_WORK(&pf->serv_task, ice_service_task); + clear_bit(__ICE_SERVICE_SCHED, pf->state); +} + +/** + * ice_ena_msix_range - Request a range of MSIX vectors from the OS + * @pf: board private structure + * + * compute the number of MSIX vectors required (v_budget) and request from + * the OS. Return the number of vectors reserved or negative on failure + */ +static int ice_ena_msix_range(struct ice_pf *pf) +{ + int v_left, v_actual, v_budget = 0; + int needed, err, i; + + v_left = pf->hw.func_caps.common_cap.num_msix_vectors; + + /* reserve one vector for miscellaneous handler */ + needed = 1; + v_budget += needed; + v_left -= needed; + + /* reserve vectors for LAN traffic */ + pf->num_lan_msix = min_t(int, num_online_cpus(), v_left); + v_budget += pf->num_lan_msix; + + pf->msix_entries = devm_kcalloc(&pf->pdev->dev, v_budget, + sizeof(struct msix_entry), GFP_KERNEL); + + if (!pf->msix_entries) { + err = -ENOMEM; + goto exit_err; + } + + for (i = 0; i < v_budget; i++) + pf->msix_entries[i].entry = i; + + /* actually reserve the vectors */ + v_actual = pci_enable_msix_range(pf->pdev, pf->msix_entries, + ICE_MIN_MSIX, v_budget); + + if (v_actual < 0) { + dev_err(&pf->pdev->dev, "unable to reserve MSI-X vectors\n"); + err = v_actual; + goto msix_err; + } + + if (v_actual < v_budget) { + dev_warn(&pf->pdev->dev, + "not enough vectors. requested = %d, obtained = %d\n", + v_budget, v_actual); + if (v_actual >= (pf->num_lan_msix + 1)) { + pf->num_avail_msix = v_actual - (pf->num_lan_msix + 1); + } else if (v_actual >= 2) { + pf->num_lan_msix = 1; + pf->num_avail_msix = v_actual - 2; + } else { + pci_disable_msix(pf->pdev); + err = -ERANGE; + goto msix_err; + } + } + + return v_actual; + +msix_err: + devm_kfree(&pf->pdev->dev, pf->msix_entries); + goto exit_err; + +exit_err: + pf->num_lan_msix = 0; + clear_bit(ICE_FLAG_MSIX_ENA, pf->flags); + return err; +} + +/** + * ice_dis_msix - Disable MSI-X interrupt setup in OS + * @pf: board private structure + */ +static void ice_dis_msix(struct ice_pf *pf) +{ + pci_disable_msix(pf->pdev); + devm_kfree(&pf->pdev->dev, pf->msix_entries); + pf->msix_entries = NULL; + clear_bit(ICE_FLAG_MSIX_ENA, pf->flags); +} + +/** + * ice_init_interrupt_scheme - Determine proper interrupt scheme + * @pf: board private structure to initialize + */ +static int ice_init_interrupt_scheme(struct ice_pf *pf) +{ + int vectors = 0; + ssize_t size; + + if (test_bit(ICE_FLAG_MSIX_ENA, pf->flags)) + vectors = ice_ena_msix_range(pf); + else + return -ENODEV; + + if (vectors < 0) + return vectors; + + /* set up vector assignment tracking */ + size = sizeof(struct ice_res_tracker) + (sizeof(u16) * vectors); + + pf->irq_tracker = devm_kzalloc(&pf->pdev->dev, size, GFP_KERNEL); + if (!pf->irq_tracker) { + ice_dis_msix(pf); + return -ENOMEM; + } + + pf->irq_tracker->num_entries = vectors; + + return 0; +} + +/** + * ice_clear_interrupt_scheme - Undo things done by ice_init_interrupt_scheme + * @pf: board private structure + */ +static void ice_clear_interrupt_scheme(struct ice_pf *pf) +{ + if (test_bit(ICE_FLAG_MSIX_ENA, pf->flags)) + ice_dis_msix(pf); + + devm_kfree(&pf->pdev->dev, pf->irq_tracker); + pf->irq_tracker = NULL; +} + /** * ice_probe - Device initialization routine * @pdev: PCI device information struct @@ -113,8 +756,70 @@ static int ice_probe(struct pci_dev *pdev, hw->fw_maj_ver, hw->fw_min_ver, hw->fw_build, hw->api_maj_ver, hw->api_min_ver); + ice_init_pf(pf); + + ice_determine_q_usage(pf); + + pf->num_alloc_vsi = min_t(u16, ICE_MAX_VSI_ALLOC, + hw->func_caps.guaranteed_num_vsi); + if (!pf->num_alloc_vsi) { + err = -EIO; + goto err_init_pf_unroll; + } + + pf->vsi = devm_kcalloc(&pdev->dev, pf->num_alloc_vsi, + sizeof(struct ice_vsi *), GFP_KERNEL); + if (!pf->vsi) { + err = -ENOMEM; + goto err_init_pf_unroll; + } + + err = ice_init_interrupt_scheme(pf); + if (err) { + dev_err(&pdev->dev, + "ice_init_interrupt_scheme failed: %d\n", err); + err = -EIO; + goto err_init_interrupt_unroll; + } + + /* In case of MSIX we are going to setup the misc vector right here + * to handle admin queue events etc. In case of legacy and MSI + * the misc functionality and queue processing is combined in + * the same vector and that gets setup at open. + */ + if (test_bit(ICE_FLAG_MSIX_ENA, pf->flags)) { + err = ice_req_irq_msix_misc(pf); + if (err) { + dev_err(&pdev->dev, + "setup of misc vector failed: %d\n", err); + goto err_init_interrupt_unroll; + } + } + + /* create switch struct for the switch element created by FW on boot */ + pf->first_sw = devm_kzalloc(&pdev->dev, sizeof(struct ice_sw), + GFP_KERNEL); + if (!pf->first_sw) { + err = -ENOMEM; + goto err_msix_misc_unroll; + } + + pf->first_sw->bridge_mode = BRIDGE_MODE_VEB; + pf->first_sw->pf = pf; + + /* record the sw_id available for later use */ + pf->first_sw->sw_id = hw->port_info->sw_id; + return 0; +err_msix_misc_unroll: + ice_free_irq_msix_misc(pf); +err_init_interrupt_unroll: + ice_clear_interrupt_scheme(pf); + devm_kfree(&pdev->dev, pf->vsi); +err_init_pf_unroll: + ice_deinit_pf(pf); + ice_deinit_hw(hw); err_exit_unroll: pci_disable_pcie_error_reporting(pdev); return err; @@ -133,6 +838,9 @@ static void ice_remove(struct pci_dev *pdev) set_bit(__ICE_DOWN, pf->state); + ice_free_irq_msix_misc(pf); + ice_clear_interrupt_scheme(pf); + ice_deinit_pf(pf); ice_deinit_hw(&pf->hw); pci_disable_pcie_error_reporting(pdev); } @@ -176,9 +884,17 @@ static int __init ice_module_init(void) pr_info("%s - version %s\n", ice_driver_string, ice_drv_ver); pr_info("%s\n", ice_copyright); + ice_wq = alloc_ordered_workqueue("%s", WQ_MEM_RECLAIM, KBUILD_MODNAME); + if (!ice_wq) { + pr_err("Failed to create workqueue\n"); + return -ENOMEM; + } + status = pci_register_driver(&ice_driver); - if (status) + if (status) { pr_err("failed to register pci driver, err %d\n", status); + destroy_workqueue(ice_wq); + } return status; } @@ -193,6 +909,7 @@ module_init(ice_module_init); static void __exit ice_module_exit(void) { pci_unregister_driver(&ice_driver); + destroy_workqueue(ice_wq); pr_info("module unloaded\n"); } module_exit(ice_module_exit); diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.h b/drivers/net/ethernet/intel/ice/ice_txrx.h new file mode 100644 index 000000000000..7ceb69ea745e --- /dev/null +++ b/drivers/net/ethernet/intel/ice/ice_txrx.h @@ -0,0 +1,29 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (c) 2018, Intel Corporation. */ + +#ifndef _ICE_TXRX_H_ +#define _ICE_TXRX_H_ + +#define ICE_DFLT_IRQ_WORK 256 + +/* this enum matches hardware bits and is meant to be used by DYN_CTLN + * registers and QINT registers or more generally anywhere in the manual + * mentioning ITR_INDX, ITR_NONE cannot be used as an index 'n' into any + * register but instead is a special value meaning "don't update" ITR0/1/2. + */ +enum ice_dyn_idx_t { + ICE_IDX_ITR0 = 0, + ICE_IDX_ITR1 = 1, + ICE_IDX_ITR2 = 2, + ICE_ITR_NONE = 3 /* ITR_NONE must not be used as an index */ +}; + +/* indices into GLINT_ITR registers */ +#define ICE_RX_ITR ICE_IDX_ITR0 +#define ICE_ITR_DYNAMIC 0x8000 /* use top bit as a flag */ +#define ICE_ITR_8K 0x003E + +/* apply ITR HW granularity translation to program the HW registers */ +#define ITR_TO_REG(val, itr_gran) (((val) & ~ICE_ITR_DYNAMIC) >> (itr_gran)) + +#endif /* _ICE_TXRX_H_ */ diff --git a/drivers/net/ethernet/intel/ice/ice_type.h b/drivers/net/ethernet/intel/ice/ice_type.h index 6024c80bfa6b..2325be552a84 100644 --- a/drivers/net/ethernet/intel/ice/ice_type.h +++ b/drivers/net/ethernet/intel/ice/ice_type.h @@ -261,6 +261,17 @@ struct ice_hw { u8 fw_min_ver; /* firmware minor version */ u8 fw_patch; /* firmware patch version */ u32 fw_build; /* firmware build number */ + + /* minimum allowed value for different speeds */ +#define ICE_ITR_GRAN_MIN_200 1 +#define ICE_ITR_GRAN_MIN_100 1 +#define ICE_ITR_GRAN_MIN_50 2 +#define ICE_ITR_GRAN_MIN_25 4 + /* ITR granularity in 1 us */ + u8 itr_gran_200; + u8 itr_gran_100; + u8 itr_gran_50; + u8 itr_gran_25; }; /* Checksum and Shadow RAM pointers */ From 3a858ba392c3b19986c40a4c170ddc37b144115f Mon Sep 17 00:00:00 2001 From: Anirudh Venkataramanan Date: Tue, 20 Mar 2018 07:58:11 -0700 Subject: [PATCH 07/15] ice: Add support for VSI allocation and deallocation This patch introduces data structures and functions to alloc/free VSIs. The driver represents a VSI using the ice_vsi structure. Some noteworthy points about VSI allocation: 1) A VSI is allocated in the firmware using the "add VSI" admin queue command (implemented as ice_aq_add_vsi). The firmware returns an identifier for the allocated VSI. The VSI context is used to program certain aspects (loopback, queue map, etc.) of the VSI's configuration. 2) A VSI is deleted using the "free VSI" admin queue command (implemented as ice_aq_free_vsi). 3) The driver represents a VSI using struct ice_vsi. This is allocated and initialized as part of the ice_vsi_alloc flow, and deallocated as part of the ice_vsi_delete flow. 4) Once the VSI is created, a netdev is allocated and associated with it. The VSI's ring and vector related data structures are also allocated and initialized. 5) A VSI's queues can either be contiguous or scattered. To do this, the driver maintains a bitmap (vsi->avail_txqs) which is kept in sync with the firmware's VSI queue allocation imap. If the VSI can't get a contiguous queue allocation, it will fallback to scatter. This is implemented in ice_vsi_get_qs which is called as part of the VSI setup flow. In the release flow, the VSI's queues are released and the bitmap is updated to reflect this by ice_vsi_put_qs. CC: Shannon Nelson Signed-off-by: Anirudh Venkataramanan Acked-by: Shannon Nelson Tested-by: Tony Brelinski Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/ice/ice.h | 72 ++ .../net/ethernet/intel/ice/ice_adminq_cmd.h | 199 +++ drivers/net/ethernet/intel/ice/ice_main.c | 1111 +++++++++++++++++ drivers/net/ethernet/intel/ice/ice_switch.c | 115 ++ drivers/net/ethernet/intel/ice/ice_switch.h | 21 + drivers/net/ethernet/intel/ice/ice_txrx.h | 26 + drivers/net/ethernet/intel/ice/ice_type.h | 4 + 7 files changed, 1548 insertions(+) diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h index 8103e61e67f9..21d0c237ee3f 100644 --- a/drivers/net/ethernet/intel/ice/ice.h +++ b/drivers/net/ethernet/intel/ice/ice.h @@ -11,6 +11,8 @@ #include #include #include +#include +#include #include #include #include @@ -18,6 +20,7 @@ #include #include #include +#include #include #include "ice_devids.h" #include "ice_type.h" @@ -27,17 +30,43 @@ #include "ice_sched.h" #define ICE_BAR0 0 +#define ICE_DFLT_NUM_DESC 128 +#define ICE_REQ_DESC_MULTIPLE 32 #define ICE_INT_NAME_STR_LEN (IFNAMSIZ + 16) #define ICE_AQ_LEN 64 #define ICE_MIN_MSIX 2 +#define ICE_NO_VSI 0xffff #define ICE_MAX_VSI_ALLOC 130 #define ICE_MAX_TXQS 2048 #define ICE_MAX_RXQS 2048 +#define ICE_VSI_MAP_CONTIG 0 +#define ICE_VSI_MAP_SCATTER 1 +#define ICE_MAX_SCATTER_TXQS 16 +#define ICE_MAX_SCATTER_RXQS 16 #define ICE_RES_VALID_BIT 0x8000 #define ICE_RES_MISC_VEC_ID (ICE_RES_VALID_BIT - 1) +#define ICE_INVAL_Q_INDEX 0xffff #define ICE_DFLT_NETIF_M (NETIF_MSG_DRV | NETIF_MSG_PROBE | NETIF_MSG_LINK) +#define ICE_MAX_MTU (ICE_AQ_SET_MAC_FRAME_SIZE_MAX - \ + ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN) + +#define ICE_UP_TABLE_TRANSLATE(val, i) \ + (((val) << ICE_AQ_VSI_UP_TABLE_UP##i##_S) & \ + ICE_AQ_VSI_UP_TABLE_UP##i##_M) + +struct ice_tc_info { + u16 qoffset; + u16 qcount; +}; + +struct ice_tc_cfg { + u8 numtc; /* Total number of enabled TCs */ + u8 ena_tc; /* TX map */ + struct ice_tc_info tc_info[ICE_MAX_TRAFFIC_CLASS]; +}; + struct ice_res_tracker { u16 num_entries; u16 search_hint; @@ -61,8 +90,47 @@ enum ice_state { /* struct that defines a VSI, associated with a dev */ struct ice_vsi { struct net_device *netdev; + struct ice_sw *vsw; /* switch this VSI is on */ + struct ice_pf *back; /* back pointer to PF */ struct ice_port_info *port_info; /* back pointer to port_info */ + struct ice_ring **rx_rings; /* rx ring array */ + struct ice_ring **tx_rings; /* tx ring array */ + struct ice_q_vector **q_vectors; /* q_vector array */ + DECLARE_BITMAP(state, __ICE_STATE_NBITS); + int num_q_vectors; + int base_vector; + enum ice_vsi_type type; u16 vsi_num; /* HW (absolute) index of this VSI */ + u16 idx; /* software index in pf->vsi[] */ + + /* Interrupt thresholds */ + u16 work_lmt; + + struct ice_aqc_vsi_props info; /* VSI properties */ + + /* queue information */ + u8 tx_mapping_mode; /* ICE_MAP_MODE_[CONTIG|SCATTER] */ + u8 rx_mapping_mode; /* ICE_MAP_MODE_[CONTIG|SCATTER] */ + u16 txq_map[ICE_MAX_TXQS]; /* index in pf->avail_txqs */ + u16 rxq_map[ICE_MAX_RXQS]; /* index in pf->avail_rxqs */ + u16 alloc_txq; /* Allocated Tx queues */ + u16 num_txq; /* Used Tx queues */ + u16 alloc_rxq; /* Allocated Rx queues */ + u16 num_rxq; /* Used Rx queues */ + u16 num_desc; + struct ice_tc_cfg tc_cfg; +} ____cacheline_internodealigned_in_smp; + +/* struct that defines an interrupt vector */ +struct ice_q_vector { + struct ice_vsi *vsi; + cpumask_t affinity_mask; + struct napi_struct napi; + struct ice_ring_container rx; + struct ice_ring_container tx; + u16 v_idx; /* index in the vsi->q_vector array. */ + u8 num_ring_tx; /* total number of tx rings in vector */ + u8 num_ring_rx; /* total number of rx rings in vector */ } ____cacheline_internodealigned_in_smp; enum ice_pf_flags { @@ -103,6 +171,10 @@ struct ice_pf { char int_name[ICE_INT_NAME_STR_LEN]; }; +struct ice_netdev_priv { + struct ice_vsi *vsi; +}; + /** * ice_irq_dynamic_ena - Enable default interrupt generation settings * @hw: pointer to hw struct diff --git a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h index f0837e277b2f..682e8dac72cc 100644 --- a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h +++ b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h @@ -9,6 +9,7 @@ */ #define ICE_AQC_TOPO_MAX_LEVEL_NUM 0x9 +#define ICE_AQ_SET_MAC_FRAME_SIZE_MAX 9728 struct ice_aqc_generic { __le32 param0; @@ -190,6 +191,199 @@ struct ice_aqc_get_sw_cfg_resp { struct ice_aqc_get_sw_cfg_resp_elem elements[1]; }; +/* Add VSI (indirect 0x0210) + * Update VSI (indirect 0x0211) + * Get VSI (indirect 0x0212) + * Free VSI (indirect 0x0213) + */ +struct ice_aqc_add_get_update_free_vsi { + __le16 vsi_num; +#define ICE_AQ_VSI_NUM_S 0 +#define ICE_AQ_VSI_NUM_M (0x03FF << ICE_AQ_VSI_NUM_S) +#define ICE_AQ_VSI_IS_VALID BIT(15) + __le16 cmd_flags; +#define ICE_AQ_VSI_KEEP_ALLOC 0x1 + u8 vf_id; + u8 reserved; + __le16 vsi_flags; +#define ICE_AQ_VSI_TYPE_S 0 +#define ICE_AQ_VSI_TYPE_M (0x3 << ICE_AQ_VSI_TYPE_S) +#define ICE_AQ_VSI_TYPE_VF 0x0 +#define ICE_AQ_VSI_TYPE_VMDQ2 0x1 +#define ICE_AQ_VSI_TYPE_PF 0x2 +#define ICE_AQ_VSI_TYPE_EMP_MNG 0x3 + __le32 addr_high; + __le32 addr_low; +}; + +/* Response descriptor for: + * Add VSI (indirect 0x0210) + * Update VSI (indirect 0x0211) + * Free VSI (indirect 0x0213) + */ +struct ice_aqc_add_update_free_vsi_resp { + __le16 vsi_num; + __le16 ext_status; + __le16 vsi_used; + __le16 vsi_free; + __le32 addr_high; + __le32 addr_low; +}; + +struct ice_aqc_vsi_props { + __le16 valid_sections; +#define ICE_AQ_VSI_PROP_SW_VALID BIT(0) +#define ICE_AQ_VSI_PROP_SECURITY_VALID BIT(1) +#define ICE_AQ_VSI_PROP_VLAN_VALID BIT(2) +#define ICE_AQ_VSI_PROP_OUTER_TAG_VALID BIT(3) +#define ICE_AQ_VSI_PROP_INGRESS_UP_VALID BIT(4) +#define ICE_AQ_VSI_PROP_EGRESS_UP_VALID BIT(5) +#define ICE_AQ_VSI_PROP_RXQ_MAP_VALID BIT(6) +#define ICE_AQ_VSI_PROP_Q_OPT_VALID BIT(7) +#define ICE_AQ_VSI_PROP_OUTER_UP_VALID BIT(8) +#define ICE_AQ_VSI_PROP_FLOW_DIR_VALID BIT(11) +#define ICE_AQ_VSI_PROP_PASID_VALID BIT(12) + /* switch section */ + u8 sw_id; + u8 sw_flags; +#define ICE_AQ_VSI_SW_FLAG_ALLOW_LB BIT(5) +#define ICE_AQ_VSI_SW_FLAG_LOCAL_LB BIT(6) +#define ICE_AQ_VSI_SW_FLAG_SRC_PRUNE BIT(7) + u8 sw_flags2; +#define ICE_AQ_VSI_SW_FLAG_RX_PRUNE_EN_S 0 +#define ICE_AQ_VSI_SW_FLAG_RX_PRUNE_EN_M \ + (0xF << ICE_AQ_VSI_SW_FLAG_RX_PRUNE_EN_S) +#define ICE_AQ_VSI_SW_FLAG_RX_VLAN_PRUNE_ENA BIT(0) +#define ICE_AQ_VSI_SW_FLAG_LAN_ENA BIT(4) + u8 veb_stat_id; +#define ICE_AQ_VSI_SW_VEB_STAT_ID_S 0 +#define ICE_AQ_VSI_SW_VEB_STAT_ID_M (0x1F << ICE_AQ_VSI_SW_VEB_STAT_ID_S) +#define ICE_AQ_VSI_SW_VEB_STAT_ID_VALID BIT(5) + /* security section */ + u8 sec_flags; +#define ICE_AQ_VSI_SEC_FLAG_ALLOW_DEST_OVRD BIT(0) +#define ICE_AQ_VSI_SEC_FLAG_ENA_MAC_ANTI_SPOOF BIT(2) +#define ICE_AQ_VSI_SEC_TX_PRUNE_ENA_S 4 +#define ICE_AQ_VSI_SEC_TX_PRUNE_ENA_M (0xF << ICE_AQ_VSI_SEC_TX_PRUNE_ENA_S) +#define ICE_AQ_VSI_SEC_TX_VLAN_PRUNE_ENA BIT(0) + u8 sec_reserved; + /* VLAN section */ + __le16 pvid; /* VLANS include priority bits */ + u8 pvlan_reserved[2]; + u8 port_vlan_flags; +#define ICE_AQ_VSI_PVLAN_MODE_S 0 +#define ICE_AQ_VSI_PVLAN_MODE_M (0x3 << ICE_AQ_VSI_PVLAN_MODE_S) +#define ICE_AQ_VSI_PVLAN_MODE_UNTAGGED 0x1 +#define ICE_AQ_VSI_PVLAN_MODE_TAGGED 0x2 +#define ICE_AQ_VSI_PVLAN_MODE_ALL 0x3 +#define ICE_AQ_VSI_PVLAN_INSERT_PVID BIT(2) +#define ICE_AQ_VSI_PVLAN_EMOD_S 3 +#define ICE_AQ_VSI_PVLAN_EMOD_M (0x3 << ICE_AQ_VSI_PVLAN_EMOD_S) +#define ICE_AQ_VSI_PVLAN_EMOD_STR_BOTH (0x0 << ICE_AQ_VSI_PVLAN_EMOD_S) +#define ICE_AQ_VSI_PVLAN_EMOD_STR_UP (0x1 << ICE_AQ_VSI_PVLAN_EMOD_S) +#define ICE_AQ_VSI_PVLAN_EMOD_STR (0x2 << ICE_AQ_VSI_PVLAN_EMOD_S) +#define ICE_AQ_VSI_PVLAN_EMOD_NOTHING (0x3 << ICE_AQ_VSI_PVLAN_EMOD_S) + u8 pvlan_reserved2[3]; + /* ingress egress up sections */ + __le32 ingress_table; /* bitmap, 3 bits per up */ +#define ICE_AQ_VSI_UP_TABLE_UP0_S 0 +#define ICE_AQ_VSI_UP_TABLE_UP0_M (0x7 << ICE_AQ_VSI_UP_TABLE_UP0_S) +#define ICE_AQ_VSI_UP_TABLE_UP1_S 3 +#define ICE_AQ_VSI_UP_TABLE_UP1_M (0x7 << ICE_AQ_VSI_UP_TABLE_UP1_S) +#define ICE_AQ_VSI_UP_TABLE_UP2_S 6 +#define ICE_AQ_VSI_UP_TABLE_UP2_M (0x7 << ICE_AQ_VSI_UP_TABLE_UP2_S) +#define ICE_AQ_VSI_UP_TABLE_UP3_S 9 +#define ICE_AQ_VSI_UP_TABLE_UP3_M (0x7 << ICE_AQ_VSI_UP_TABLE_UP3_S) +#define ICE_AQ_VSI_UP_TABLE_UP4_S 12 +#define ICE_AQ_VSI_UP_TABLE_UP4_M (0x7 << ICE_AQ_VSI_UP_TABLE_UP4_S) +#define ICE_AQ_VSI_UP_TABLE_UP5_S 15 +#define ICE_AQ_VSI_UP_TABLE_UP5_M (0x7 << ICE_AQ_VSI_UP_TABLE_UP5_S) +#define ICE_AQ_VSI_UP_TABLE_UP6_S 18 +#define ICE_AQ_VSI_UP_TABLE_UP6_M (0x7 << ICE_AQ_VSI_UP_TABLE_UP6_S) +#define ICE_AQ_VSI_UP_TABLE_UP7_S 21 +#define ICE_AQ_VSI_UP_TABLE_UP7_M (0x7 << ICE_AQ_VSI_UP_TABLE_UP7_S) + __le32 egress_table; /* same defines as for ingress table */ + /* outer tags section */ + __le16 outer_tag; + u8 outer_tag_flags; +#define ICE_AQ_VSI_OUTER_TAG_MODE_S 0 +#define ICE_AQ_VSI_OUTER_TAG_MODE_M (0x3 << ICE_AQ_VSI_OUTER_TAG_MODE_S) +#define ICE_AQ_VSI_OUTER_TAG_NOTHING 0x0 +#define ICE_AQ_VSI_OUTER_TAG_REMOVE 0x1 +#define ICE_AQ_VSI_OUTER_TAG_COPY 0x2 +#define ICE_AQ_VSI_OUTER_TAG_TYPE_S 2 +#define ICE_AQ_VSI_OUTER_TAG_TYPE_M (0x3 << ICE_AQ_VSI_OUTER_TAG_TYPE_S) +#define ICE_AQ_VSI_OUTER_TAG_NONE 0x0 +#define ICE_AQ_VSI_OUTER_TAG_STAG 0x1 +#define ICE_AQ_VSI_OUTER_TAG_VLAN_8100 0x2 +#define ICE_AQ_VSI_OUTER_TAG_VLAN_9100 0x3 +#define ICE_AQ_VSI_OUTER_TAG_INSERT BIT(4) +#define ICE_AQ_VSI_OUTER_TAG_ACCEPT_HOST BIT(6) + u8 outer_tag_reserved; + /* queue mapping section */ + __le16 mapping_flags; +#define ICE_AQ_VSI_Q_MAP_CONTIG 0x0 +#define ICE_AQ_VSI_Q_MAP_NONCONTIG BIT(0) + __le16 q_mapping[16]; +#define ICE_AQ_VSI_Q_S 0 +#define ICE_AQ_VSI_Q_M (0x7FF << ICE_AQ_VSI_Q_S) + __le16 tc_mapping[8]; +#define ICE_AQ_VSI_TC_Q_OFFSET_S 0 +#define ICE_AQ_VSI_TC_Q_OFFSET_M (0x7FF << ICE_AQ_VSI_TC_Q_OFFSET_S) +#define ICE_AQ_VSI_TC_Q_NUM_S 11 +#define ICE_AQ_VSI_TC_Q_NUM_M (0xF << ICE_AQ_VSI_TC_Q_NUM_S) + /* queueing option section */ + u8 q_opt_rss; +#define ICE_AQ_VSI_Q_OPT_RSS_LUT_S 0 +#define ICE_AQ_VSI_Q_OPT_RSS_LUT_M (0x3 << ICE_AQ_VSI_Q_OPT_RSS_LUT_S) +#define ICE_AQ_VSI_Q_OPT_RSS_LUT_VSI 0x0 +#define ICE_AQ_VSI_Q_OPT_RSS_LUT_PF 0x2 +#define ICE_AQ_VSI_Q_OPT_RSS_LUT_GBL 0x3 +#define ICE_AQ_VSI_Q_OPT_RSS_GBL_LUT_S 2 +#define ICE_AQ_VSI_Q_OPT_RSS_GBL_LUT_M (0xF << ICE_AQ_VSI_Q_OPT_RSS_GBL_LUT_S) +#define ICE_AQ_VSI_Q_OPT_RSS_HASH_S 6 +#define ICE_AQ_VSI_Q_OPT_RSS_HASH_M (0x3 << ICE_AQ_VSI_Q_OPT_RSS_HASH_S) +#define ICE_AQ_VSI_Q_OPT_RSS_TPLZ (0x0 << ICE_AQ_VSI_Q_OPT_RSS_HASH_S) +#define ICE_AQ_VSI_Q_OPT_RSS_SYM_TPLZ (0x1 << ICE_AQ_VSI_Q_OPT_RSS_HASH_S) +#define ICE_AQ_VSI_Q_OPT_RSS_XOR (0x2 << ICE_AQ_VSI_Q_OPT_RSS_HASH_S) +#define ICE_AQ_VSI_Q_OPT_RSS_JHASH (0x3 << ICE_AQ_VSI_Q_OPT_RSS_HASH_S) + u8 q_opt_tc; +#define ICE_AQ_VSI_Q_OPT_TC_OVR_S 0 +#define ICE_AQ_VSI_Q_OPT_TC_OVR_M (0x1F << ICE_AQ_VSI_Q_OPT_TC_OVR_S) +#define ICE_AQ_VSI_Q_OPT_PROF_TC_OVR BIT(7) + u8 q_opt_flags; +#define ICE_AQ_VSI_Q_OPT_PE_FLTR_EN BIT(0) + u8 q_opt_reserved[3]; + /* outer up section */ + __le32 outer_up_table; /* same structure and defines as ingress tbl */ + /* section 10 */ + __le16 sect_10_reserved; + /* flow director section */ + __le16 fd_options; +#define ICE_AQ_VSI_FD_ENABLE BIT(0) +#define ICE_AQ_VSI_FD_TX_AUTO_ENABLE BIT(1) +#define ICE_AQ_VSI_FD_PROG_ENABLE BIT(3) + __le16 max_fd_fltr_dedicated; + __le16 max_fd_fltr_shared; + __le16 fd_def_q; +#define ICE_AQ_VSI_FD_DEF_Q_S 0 +#define ICE_AQ_VSI_FD_DEF_Q_M (0x7FF << ICE_AQ_VSI_FD_DEF_Q_S) +#define ICE_AQ_VSI_FD_DEF_GRP_S 12 +#define ICE_AQ_VSI_FD_DEF_GRP_M (0x7 << ICE_AQ_VSI_FD_DEF_GRP_S) + __le16 fd_report_opt; +#define ICE_AQ_VSI_FD_REPORT_Q_S 0 +#define ICE_AQ_VSI_FD_REPORT_Q_M (0x7FF << ICE_AQ_VSI_FD_REPORT_Q_S) +#define ICE_AQ_VSI_FD_DEF_PRIORITY_S 12 +#define ICE_AQ_VSI_FD_DEF_PRIORITY_M (0x7 << ICE_AQ_VSI_FD_DEF_PRIORITY_S) +#define ICE_AQ_VSI_FD_DEF_DROP BIT(15) + /* PASID section */ + __le32 pasid_id; +#define ICE_AQ_VSI_PASID_ID_S 0 +#define ICE_AQ_VSI_PASID_ID_M (0xFFFFF << ICE_AQ_VSI_PASID_ID_S) +#define ICE_AQ_VSI_PASID_ID_VALID BIT(31) + u8 reserved[24]; +}; + /* Get Default Topology (indirect 0x0400) */ struct ice_aqc_get_topo { u8 port_num; @@ -576,6 +770,7 @@ struct ice_aq_desc { struct ice_aqc_query_txsched_res query_sched_res; struct ice_aqc_add_move_delete_elem add_move_delete_elem; struct ice_aqc_nvm nvm; + struct ice_aqc_add_get_update_free_vsi vsi_cmd; struct ice_aqc_get_link_status get_link_status; } params; }; @@ -626,6 +821,10 @@ enum ice_adminq_opc { /* internal switch commands */ ice_aqc_opc_get_sw_cfg = 0x0200, + /* VSI commands */ + ice_aqc_opc_add_vsi = 0x0210, + ice_aqc_opc_update_vsi = 0x0211, + ice_aqc_opc_free_vsi = 0x0213, ice_aqc_opc_clear_pf_cfg = 0x02A4, /* transmit scheduler commands */ diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index d93eaae5dc60..04e004ba2067 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -28,6 +28,37 @@ MODULE_PARM_DESC(debug, "netif level (0=none,...,16=all)"); static struct workqueue_struct *ice_wq; +static int ice_vsi_release(struct ice_vsi *vsi); + +/** + * ice_get_free_slot - get the next non-NULL location index in array + * @array: array to search + * @size: size of the array + * @curr: last known occupied index to be used as a search hint + * + * void * is being used to keep the functionality generic. This lets us use this + * function on any array of pointers. + */ +static int ice_get_free_slot(void *array, int size, int curr) +{ + int **tmp_array = (int **)array; + int next; + + if (curr < (size - 1) && !tmp_array[curr + 1]) { + next = curr + 1; + } else { + int i = 0; + + while ((i < size) && (tmp_array[i])) + i++; + if (i == size) + next = ICE_NO_VSI; + else + next = i; + } + return next; +} + /** * ice_search_res - Search the tracker for a block of resources * @res: pointer to the resource @@ -326,6 +357,270 @@ static void ice_set_ctrlq_len(struct ice_hw *hw) hw->adminq.sq_buf_size = ICE_AQ_MAX_BUF_LEN; } +/** + * ice_vsi_delete - delete a VSI from the switch + * @vsi: pointer to VSI being removed + */ +static void ice_vsi_delete(struct ice_vsi *vsi) +{ + struct ice_pf *pf = vsi->back; + struct ice_vsi_ctx ctxt; + enum ice_status status; + + ctxt.vsi_num = vsi->vsi_num; + + memcpy(&ctxt.info, &vsi->info, sizeof(struct ice_aqc_vsi_props)); + + status = ice_aq_free_vsi(&pf->hw, &ctxt, false, NULL); + if (status) + dev_err(&pf->pdev->dev, "Failed to delete VSI %i in FW\n", + vsi->vsi_num); +} + +/** + * ice_vsi_setup_q_map - Setup a VSI queue map + * @vsi: the VSI being configured + * @ctxt: VSI context structure + */ +static void ice_vsi_setup_q_map(struct ice_vsi *vsi, struct ice_vsi_ctx *ctxt) +{ + u16 offset = 0, qmap = 0, pow = 0, qcount; + u16 qcount_tx = vsi->alloc_txq; + u16 qcount_rx = vsi->alloc_rxq; + bool ena_tc0 = false; + int i; + + /* at least TC0 should be enabled by default */ + if (vsi->tc_cfg.numtc) { + if (!(vsi->tc_cfg.ena_tc & BIT(0))) + ena_tc0 = true; + } else { + ena_tc0 = true; + } + + if (ena_tc0) { + vsi->tc_cfg.numtc++; + vsi->tc_cfg.ena_tc |= 1; + } + + qcount = qcount_rx / vsi->tc_cfg.numtc; + + /* find higher power-of-2 of qcount */ + pow = ilog2(qcount); + + if (!is_power_of_2(qcount)) + pow++; + + /* TC mapping is a function of the number of Rx queues assigned to the + * VSI for each traffic class and the offset of these queues. + * The first 10 bits are for queue offset for TC0, next 4 bits for no:of + * queues allocated to TC0. No:of queues is a power-of-2. + * + * If TC is not enabled, the queue offset is set to 0, and allocate one + * queue, this way, traffic for the given TC will be sent to the default + * queue. + * + * Setup number and offset of Rx queues for all TCs for the VSI + */ + for (i = 0; i < ICE_MAX_TRAFFIC_CLASS; i++) { + if (!(vsi->tc_cfg.ena_tc & BIT(i))) { + /* TC is not enabled */ + vsi->tc_cfg.tc_info[i].qoffset = 0; + vsi->tc_cfg.tc_info[i].qcount = 1; + ctxt->info.tc_mapping[i] = 0; + continue; + } + + /* TC is enabled */ + vsi->tc_cfg.tc_info[i].qoffset = offset; + vsi->tc_cfg.tc_info[i].qcount = qcount; + + qmap = ((offset << ICE_AQ_VSI_TC_Q_OFFSET_S) & + ICE_AQ_VSI_TC_Q_OFFSET_M) | + ((pow << ICE_AQ_VSI_TC_Q_NUM_S) & + ICE_AQ_VSI_TC_Q_NUM_M); + offset += qcount; + ctxt->info.tc_mapping[i] = cpu_to_le16(qmap); + } + + vsi->num_txq = qcount_tx; + vsi->num_rxq = offset; + + /* Rx queue mapping */ + ctxt->info.mapping_flags |= cpu_to_le16(ICE_AQ_VSI_Q_MAP_CONTIG); + /* q_mapping buffer holds the info for the first queue allocated for + * this VSI in the PF space and also the number of queues associated + * with this VSI. + */ + ctxt->info.q_mapping[0] = cpu_to_le16(vsi->rxq_map[0]); + ctxt->info.q_mapping[1] = cpu_to_le16(vsi->num_rxq); +} + +/** + * ice_set_dflt_vsi_ctx - Set default VSI context before adding a VSI + * @ctxt: the VSI context being set + * + * This initializes a default VSI context for all sections except the Queues. + */ +static void ice_set_dflt_vsi_ctx(struct ice_vsi_ctx *ctxt) +{ + u32 table = 0; + + memset(&ctxt->info, 0, sizeof(ctxt->info)); + /* VSI's should be allocated from shared pool */ + ctxt->alloc_from_pool = true; + /* Src pruning enabled by default */ + ctxt->info.sw_flags = ICE_AQ_VSI_SW_FLAG_SRC_PRUNE; + /* Traffic from VSI can be sent to LAN */ + ctxt->info.sw_flags2 = ICE_AQ_VSI_SW_FLAG_LAN_ENA; + /* Allow all packets untagged/tagged */ + ctxt->info.port_vlan_flags = ((ICE_AQ_VSI_PVLAN_MODE_ALL & + ICE_AQ_VSI_PVLAN_MODE_M) >> + ICE_AQ_VSI_PVLAN_MODE_S); + /* Show VLAN/UP from packets in Rx descriptors */ + ctxt->info.port_vlan_flags |= ((ICE_AQ_VSI_PVLAN_EMOD_STR_BOTH & + ICE_AQ_VSI_PVLAN_EMOD_M) >> + ICE_AQ_VSI_PVLAN_EMOD_S); + /* Have 1:1 UP mapping for both ingress/egress tables */ + table |= ICE_UP_TABLE_TRANSLATE(0, 0); + table |= ICE_UP_TABLE_TRANSLATE(1, 1); + table |= ICE_UP_TABLE_TRANSLATE(2, 2); + table |= ICE_UP_TABLE_TRANSLATE(3, 3); + table |= ICE_UP_TABLE_TRANSLATE(4, 4); + table |= ICE_UP_TABLE_TRANSLATE(5, 5); + table |= ICE_UP_TABLE_TRANSLATE(6, 6); + table |= ICE_UP_TABLE_TRANSLATE(7, 7); + ctxt->info.ingress_table = cpu_to_le32(table); + ctxt->info.egress_table = cpu_to_le32(table); + /* Have 1:1 UP mapping for outer to inner UP table */ + ctxt->info.outer_up_table = cpu_to_le32(table); + /* No Outer tag support outer_tag_flags remains to zero */ +} + +/** + * ice_vsi_add - Create a new VSI or fetch preallocated VSI + * @vsi: the VSI being configured + * + * This initializes a VSI context depending on the VSI type to be added and + * passes it down to the add_vsi aq command to create a new VSI. + */ +static int ice_vsi_add(struct ice_vsi *vsi) +{ + struct ice_vsi_ctx ctxt = { 0 }; + struct ice_pf *pf = vsi->back; + struct ice_hw *hw = &pf->hw; + int ret = 0; + + switch (vsi->type) { + case ICE_VSI_PF: + ctxt.flags = ICE_AQ_VSI_TYPE_PF; + break; + default: + return -ENODEV; + } + + ice_set_dflt_vsi_ctx(&ctxt); + /* if the switch is in VEB mode, allow VSI loopback */ + if (vsi->vsw->bridge_mode == BRIDGE_MODE_VEB) + ctxt.info.sw_flags |= ICE_AQ_VSI_SW_FLAG_ALLOW_LB; + + ctxt.info.sw_id = vsi->port_info->sw_id; + ice_vsi_setup_q_map(vsi, &ctxt); + + ret = ice_aq_add_vsi(hw, &ctxt, NULL); + if (ret) { + dev_err(&vsi->back->pdev->dev, + "Add VSI AQ call failed, err %d\n", ret); + return -EIO; + } + vsi->info = ctxt.info; + vsi->vsi_num = ctxt.vsi_num; + + return ret; +} + +/** + * ice_vsi_clear_rings - Deallocates the Tx and Rx rings for VSI + * @vsi: the VSI having rings deallocated + */ +static void ice_vsi_clear_rings(struct ice_vsi *vsi) +{ + int i; + + if (vsi->tx_rings) { + for (i = 0; i < vsi->alloc_txq; i++) { + if (vsi->tx_rings[i]) { + kfree_rcu(vsi->tx_rings[i], rcu); + vsi->tx_rings[i] = NULL; + } + } + } + if (vsi->rx_rings) { + for (i = 0; i < vsi->alloc_rxq; i++) { + if (vsi->rx_rings[i]) { + kfree_rcu(vsi->rx_rings[i], rcu); + vsi->rx_rings[i] = NULL; + } + } + } +} + +/** + * ice_vsi_alloc_rings - Allocates Tx and Rx rings for the VSI + * @vsi: VSI which is having rings allocated + */ +static int ice_vsi_alloc_rings(struct ice_vsi *vsi) +{ + struct ice_pf *pf = vsi->back; + int i; + + /* Allocate tx_rings */ + for (i = 0; i < vsi->alloc_txq; i++) { + struct ice_ring *ring; + + /* allocate with kzalloc(), free with kfree_rcu() */ + ring = kzalloc(sizeof(*ring), GFP_KERNEL); + + if (!ring) + goto err_out; + + ring->q_index = i; + ring->reg_idx = vsi->txq_map[i]; + ring->ring_active = false; + ring->vsi = vsi; + ring->netdev = vsi->netdev; + ring->dev = &pf->pdev->dev; + ring->count = vsi->num_desc; + + vsi->tx_rings[i] = ring; + } + + /* Allocate rx_rings */ + for (i = 0; i < vsi->alloc_rxq; i++) { + struct ice_ring *ring; + + /* allocate with kzalloc(), free with kfree_rcu() */ + ring = kzalloc(sizeof(*ring), GFP_KERNEL); + if (!ring) + goto err_out; + + ring->q_index = i; + ring->reg_idx = vsi->rxq_map[i]; + ring->ring_active = false; + ring->vsi = vsi; + ring->netdev = vsi->netdev; + ring->dev = &pf->pdev->dev; + ring->count = vsi->num_desc; + vsi->rx_rings[i] = ring; + } + + return 0; + +err_out: + ice_vsi_clear_rings(vsi); + return -ENOMEM; +} + /** * ice_ena_misc_vector - enable the non-queue interrupts * @pf: board private structure @@ -412,6 +707,189 @@ static irqreturn_t ice_misc_intr(int __always_unused irq, void *data) return ret; } +/** + * ice_vsi_map_rings_to_vectors - Map VSI rings to interrupt vectors + * @vsi: the VSI being configured + * + * This function maps descriptor rings to the queue-specific vectors allotted + * through the MSI-X enabling code. On a constrained vector budget, we map Tx + * and Rx rings to the vector as "efficiently" as possible. + */ +static void ice_vsi_map_rings_to_vectors(struct ice_vsi *vsi) +{ + int q_vectors = vsi->num_q_vectors; + int tx_rings_rem, rx_rings_rem; + int v_id; + + /* initially assigning remaining rings count to VSIs num queue value */ + tx_rings_rem = vsi->num_txq; + rx_rings_rem = vsi->num_rxq; + + for (v_id = 0; v_id < q_vectors; v_id++) { + struct ice_q_vector *q_vector = vsi->q_vectors[v_id]; + int tx_rings_per_v, rx_rings_per_v, q_id, q_base; + + /* Tx rings mapping to vector */ + tx_rings_per_v = DIV_ROUND_UP(tx_rings_rem, q_vectors - v_id); + q_vector->num_ring_tx = tx_rings_per_v; + q_vector->tx.ring = NULL; + q_base = vsi->num_txq - tx_rings_rem; + + for (q_id = q_base; q_id < (q_base + tx_rings_per_v); q_id++) { + struct ice_ring *tx_ring = vsi->tx_rings[q_id]; + + tx_ring->q_vector = q_vector; + tx_ring->next = q_vector->tx.ring; + q_vector->tx.ring = tx_ring; + } + tx_rings_rem -= tx_rings_per_v; + + /* Rx rings mapping to vector */ + rx_rings_per_v = DIV_ROUND_UP(rx_rings_rem, q_vectors - v_id); + q_vector->num_ring_rx = rx_rings_per_v; + q_vector->rx.ring = NULL; + q_base = vsi->num_rxq - rx_rings_rem; + + for (q_id = q_base; q_id < (q_base + rx_rings_per_v); q_id++) { + struct ice_ring *rx_ring = vsi->rx_rings[q_id]; + + rx_ring->q_vector = q_vector; + rx_ring->next = q_vector->rx.ring; + q_vector->rx.ring = rx_ring; + } + rx_rings_rem -= rx_rings_per_v; + } +} + +/** + * ice_vsi_set_num_qs - Set num queues, descriptors and vectors for a VSI + * @vsi: the VSI being configured + * + * Return 0 on success and a negative value on error + */ +static void ice_vsi_set_num_qs(struct ice_vsi *vsi) +{ + struct ice_pf *pf = vsi->back; + + switch (vsi->type) { + case ICE_VSI_PF: + vsi->alloc_txq = pf->num_lan_tx; + vsi->alloc_rxq = pf->num_lan_rx; + vsi->num_desc = ALIGN(ICE_DFLT_NUM_DESC, ICE_REQ_DESC_MULTIPLE); + vsi->num_q_vectors = max_t(int, pf->num_lan_rx, pf->num_lan_tx); + break; + default: + dev_warn(&vsi->back->pdev->dev, "Unknown VSI type %d\n", + vsi->type); + break; + } +} + +/** + * ice_vsi_alloc_arrays - Allocate queue and vector pointer arrays for the vsi + * @vsi: VSI pointer + * @alloc_qvectors: a bool to specify if q_vectors need to be allocated. + * + * On error: returns error code (negative) + * On success: returns 0 + */ +static int ice_vsi_alloc_arrays(struct ice_vsi *vsi, bool alloc_qvectors) +{ + struct ice_pf *pf = vsi->back; + + /* allocate memory for both Tx and Rx ring pointers */ + vsi->tx_rings = devm_kcalloc(&pf->pdev->dev, vsi->alloc_txq, + sizeof(struct ice_ring *), GFP_KERNEL); + if (!vsi->tx_rings) + goto err_txrings; + + vsi->rx_rings = devm_kcalloc(&pf->pdev->dev, vsi->alloc_rxq, + sizeof(struct ice_ring *), GFP_KERNEL); + if (!vsi->rx_rings) + goto err_rxrings; + + if (alloc_qvectors) { + /* allocate memory for q_vector pointers */ + vsi->q_vectors = devm_kcalloc(&pf->pdev->dev, + vsi->num_q_vectors, + sizeof(struct ice_q_vector *), + GFP_KERNEL); + if (!vsi->q_vectors) + goto err_vectors; + } + + return 0; + +err_vectors: + devm_kfree(&pf->pdev->dev, vsi->rx_rings); +err_rxrings: + devm_kfree(&pf->pdev->dev, vsi->tx_rings); +err_txrings: + return -ENOMEM; +} + +/** + * ice_vsi_alloc - Allocates the next available struct vsi in the PF + * @pf: board private structure + * @type: type of VSI + * + * returns a pointer to a VSI on success, NULL on failure. + */ +static struct ice_vsi *ice_vsi_alloc(struct ice_pf *pf, enum ice_vsi_type type) +{ + struct ice_vsi *vsi = NULL; + + /* Need to protect the allocation of the VSIs at the PF level */ + mutex_lock(&pf->sw_mutex); + + /* If we have already allocated our maximum number of VSIs, + * pf->next_vsi will be ICE_NO_VSI. If not, pf->next_vsi index + * is available to be populated + */ + if (pf->next_vsi == ICE_NO_VSI) { + dev_dbg(&pf->pdev->dev, "out of VSI slots!\n"); + goto unlock_pf; + } + + vsi = devm_kzalloc(&pf->pdev->dev, sizeof(*vsi), GFP_KERNEL); + if (!vsi) + goto unlock_pf; + + vsi->type = type; + vsi->back = pf; + set_bit(__ICE_DOWN, vsi->state); + vsi->idx = pf->next_vsi; + vsi->work_lmt = ICE_DFLT_IRQ_WORK; + + ice_vsi_set_num_qs(vsi); + + switch (vsi->type) { + case ICE_VSI_PF: + if (ice_vsi_alloc_arrays(vsi, true)) + goto err_rings; + + break; + default: + dev_warn(&pf->pdev->dev, "Unknown VSI type %d\n", vsi->type); + goto unlock_pf; + } + + /* fill VSI slot in the PF struct */ + pf->vsi[pf->next_vsi] = vsi; + + /* prepare pf->next_vsi for next use */ + pf->next_vsi = ice_get_free_slot(pf->vsi, pf->num_alloc_vsi, + pf->next_vsi); + goto unlock_pf; + +err_rings: + devm_kfree(&pf->pdev->dev, vsi); + vsi = NULL; +unlock_pf: + mutex_unlock(&pf->sw_mutex); + return vsi; +} + /** * ice_free_irq_msix_misc - Unroll misc vector setup * @pf: board private structure @@ -493,6 +971,581 @@ static int ice_req_irq_msix_misc(struct ice_pf *pf) return 0; } +/** + * ice_vsi_get_qs_contig - Assign a contiguous chunk of queues to VSI + * @vsi: the VSI getting queues + * + * Return 0 on success and a negative value on error + */ +static int ice_vsi_get_qs_contig(struct ice_vsi *vsi) +{ + struct ice_pf *pf = vsi->back; + int offset, ret = 0; + + mutex_lock(&pf->avail_q_mutex); + /* look for contiguous block of queues for tx */ + offset = bitmap_find_next_zero_area(pf->avail_txqs, ICE_MAX_TXQS, + 0, vsi->alloc_txq, 0); + if (offset < ICE_MAX_TXQS) { + int i; + + bitmap_set(pf->avail_txqs, offset, vsi->alloc_txq); + for (i = 0; i < vsi->alloc_txq; i++) + vsi->txq_map[i] = i + offset; + } else { + ret = -ENOMEM; + vsi->tx_mapping_mode = ICE_VSI_MAP_SCATTER; + } + + /* look for contiguous block of queues for rx */ + offset = bitmap_find_next_zero_area(pf->avail_rxqs, ICE_MAX_RXQS, + 0, vsi->alloc_rxq, 0); + if (offset < ICE_MAX_RXQS) { + int i; + + bitmap_set(pf->avail_rxqs, offset, vsi->alloc_rxq); + for (i = 0; i < vsi->alloc_rxq; i++) + vsi->rxq_map[i] = i + offset; + } else { + ret = -ENOMEM; + vsi->rx_mapping_mode = ICE_VSI_MAP_SCATTER; + } + mutex_unlock(&pf->avail_q_mutex); + + return ret; +} + +/** + * ice_vsi_get_qs_scatter - Assign a scattered queues to VSI + * @vsi: the VSI getting queues + * + * Return 0 on success and a negative value on error + */ +static int ice_vsi_get_qs_scatter(struct ice_vsi *vsi) +{ + struct ice_pf *pf = vsi->back; + int i, index = 0; + + mutex_lock(&pf->avail_q_mutex); + + if (vsi->tx_mapping_mode == ICE_VSI_MAP_SCATTER) { + for (i = 0; i < vsi->alloc_txq; i++) { + index = find_next_zero_bit(pf->avail_txqs, + ICE_MAX_TXQS, index); + if (index < ICE_MAX_TXQS) { + set_bit(index, pf->avail_txqs); + vsi->txq_map[i] = index; + } else { + goto err_scatter_tx; + } + } + } + + if (vsi->rx_mapping_mode == ICE_VSI_MAP_SCATTER) { + for (i = 0; i < vsi->alloc_rxq; i++) { + index = find_next_zero_bit(pf->avail_rxqs, + ICE_MAX_RXQS, index); + if (index < ICE_MAX_RXQS) { + set_bit(index, pf->avail_rxqs); + vsi->rxq_map[i] = index; + } else { + goto err_scatter_rx; + } + } + } + + mutex_unlock(&pf->avail_q_mutex); + return 0; + +err_scatter_rx: + /* unflag any queues we have grabbed (i is failed position) */ + for (index = 0; index < i; index++) { + clear_bit(vsi->rxq_map[index], pf->avail_rxqs); + vsi->rxq_map[index] = 0; + } + i = vsi->alloc_txq; +err_scatter_tx: + /* i is either position of failed attempt or vsi->alloc_txq */ + for (index = 0; index < i; index++) { + clear_bit(vsi->txq_map[index], pf->avail_txqs); + vsi->txq_map[index] = 0; + } + + mutex_unlock(&pf->avail_q_mutex); + return -ENOMEM; +} + +/** + * ice_vsi_get_qs - Assign queues from PF to VSI + * @vsi: the VSI to assign queues to + * + * Returns 0 on success and a negative value on error + */ +static int ice_vsi_get_qs(struct ice_vsi *vsi) +{ + int ret = 0; + + vsi->tx_mapping_mode = ICE_VSI_MAP_CONTIG; + vsi->rx_mapping_mode = ICE_VSI_MAP_CONTIG; + + /* NOTE: ice_vsi_get_qs_contig() will set the rx/tx mapping + * modes individually to scatter if assigning contiguous queues + * to rx or tx fails + */ + ret = ice_vsi_get_qs_contig(vsi); + if (ret < 0) { + if (vsi->tx_mapping_mode == ICE_VSI_MAP_SCATTER) + vsi->alloc_txq = max_t(u16, vsi->alloc_txq, + ICE_MAX_SCATTER_TXQS); + if (vsi->rx_mapping_mode == ICE_VSI_MAP_SCATTER) + vsi->alloc_rxq = max_t(u16, vsi->alloc_rxq, + ICE_MAX_SCATTER_RXQS); + ret = ice_vsi_get_qs_scatter(vsi); + } + + return ret; +} + +/** + * ice_vsi_put_qs - Release queues from VSI to PF + * @vsi: the VSI thats going to release queues + */ +static void ice_vsi_put_qs(struct ice_vsi *vsi) +{ + struct ice_pf *pf = vsi->back; + int i; + + mutex_lock(&pf->avail_q_mutex); + + for (i = 0; i < vsi->alloc_txq; i++) { + clear_bit(vsi->txq_map[i], pf->avail_txqs); + vsi->txq_map[i] = ICE_INVAL_Q_INDEX; + } + + for (i = 0; i < vsi->alloc_rxq; i++) { + clear_bit(vsi->rxq_map[i], pf->avail_rxqs); + vsi->rxq_map[i] = ICE_INVAL_Q_INDEX; + } + + mutex_unlock(&pf->avail_q_mutex); +} + +/** + * ice_free_q_vector - Free memory allocated for a specific interrupt vector + * @vsi: VSI having the memory freed + * @v_idx: index of the vector to be freed + */ +static void ice_free_q_vector(struct ice_vsi *vsi, int v_idx) +{ + struct ice_q_vector *q_vector; + struct ice_ring *ring; + + if (!vsi->q_vectors[v_idx]) { + dev_dbg(&vsi->back->pdev->dev, "Queue vector at index %d not found\n", + v_idx); + return; + } + q_vector = vsi->q_vectors[v_idx]; + + ice_for_each_ring(ring, q_vector->tx) + ring->q_vector = NULL; + ice_for_each_ring(ring, q_vector->rx) + ring->q_vector = NULL; + + /* only VSI with an associated netdev is set up with NAPI */ + if (vsi->netdev) + netif_napi_del(&q_vector->napi); + + devm_kfree(&vsi->back->pdev->dev, q_vector); + vsi->q_vectors[v_idx] = NULL; +} + +/** + * ice_vsi_free_q_vectors - Free memory allocated for interrupt vectors + * @vsi: the VSI having memory freed + */ +static void ice_vsi_free_q_vectors(struct ice_vsi *vsi) +{ + int v_idx; + + for (v_idx = 0; v_idx < vsi->num_q_vectors; v_idx++) + ice_free_q_vector(vsi, v_idx); +} + +/** + * ice_cfg_netdev - Setup the netdev flags + * @vsi: the VSI being configured + * + * Returns 0 on success, negative value on failure + */ +static int ice_cfg_netdev(struct ice_vsi *vsi) +{ + struct ice_netdev_priv *np; + struct net_device *netdev; + u8 mac_addr[ETH_ALEN]; + + netdev = alloc_etherdev_mqs(sizeof(struct ice_netdev_priv), + vsi->alloc_txq, vsi->alloc_rxq); + if (!netdev) + return -ENOMEM; + + vsi->netdev = netdev; + np = netdev_priv(netdev); + np->vsi = vsi; + + /* set features that user can change */ + netdev->hw_features = NETIF_F_SG | + NETIF_F_HIGHDMA | + NETIF_F_RXHASH; + + /* enable features */ + netdev->features |= netdev->hw_features; + + if (vsi->type == ICE_VSI_PF) { + SET_NETDEV_DEV(netdev, &vsi->back->pdev->dev); + ether_addr_copy(mac_addr, vsi->port_info->mac.perm_addr); + + ether_addr_copy(netdev->dev_addr, mac_addr); + ether_addr_copy(netdev->perm_addr, mac_addr); + } + + netdev->priv_flags |= IFF_UNICAST_FLT; + + /* setup watchdog timeout value to be 5 second */ + netdev->watchdog_timeo = 5 * HZ; + + netdev->min_mtu = ETH_MIN_MTU; + netdev->max_mtu = ICE_MAX_MTU; + + return 0; +} + +/** + * ice_vsi_free_arrays - clean up vsi resources + * @vsi: pointer to VSI being cleared + * @free_qvectors: bool to specify if q_vectors should be deallocated + */ +static void ice_vsi_free_arrays(struct ice_vsi *vsi, bool free_qvectors) +{ + struct ice_pf *pf = vsi->back; + + /* free the ring and vector containers */ + if (free_qvectors && vsi->q_vectors) { + devm_kfree(&pf->pdev->dev, vsi->q_vectors); + vsi->q_vectors = NULL; + } + if (vsi->tx_rings) { + devm_kfree(&pf->pdev->dev, vsi->tx_rings); + vsi->tx_rings = NULL; + } + if (vsi->rx_rings) { + devm_kfree(&pf->pdev->dev, vsi->rx_rings); + vsi->rx_rings = NULL; + } +} + +/** + * ice_vsi_clear - clean up and deallocate the provided vsi + * @vsi: pointer to VSI being cleared + * + * This deallocates the vsi's queue resources, removes it from the PF's + * VSI array if necessary, and deallocates the VSI + * + * Returns 0 on success, negative on failure + */ +static int ice_vsi_clear(struct ice_vsi *vsi) +{ + struct ice_pf *pf = NULL; + + if (!vsi) + return 0; + + if (!vsi->back) + return -EINVAL; + + pf = vsi->back; + + if (!pf->vsi[vsi->idx] || pf->vsi[vsi->idx] != vsi) { + dev_dbg(&pf->pdev->dev, "vsi does not exist at pf->vsi[%d]\n", + vsi->idx); + return -EINVAL; + } + + mutex_lock(&pf->sw_mutex); + /* updates the PF for this cleared vsi */ + + pf->vsi[vsi->idx] = NULL; + if (vsi->idx < pf->next_vsi) + pf->next_vsi = vsi->idx; + + ice_vsi_free_arrays(vsi, true); + mutex_unlock(&pf->sw_mutex); + devm_kfree(&pf->pdev->dev, vsi); + + return 0; +} + +/** + * ice_vsi_alloc_q_vector - Allocate memory for a single interrupt vector + * @vsi: the VSI being configured + * @v_idx: index of the vector in the vsi struct + * + * We allocate one q_vector. If allocation fails we return -ENOMEM. + */ +static int ice_vsi_alloc_q_vector(struct ice_vsi *vsi, int v_idx) +{ + struct ice_pf *pf = vsi->back; + struct ice_q_vector *q_vector; + + /* allocate q_vector */ + q_vector = devm_kzalloc(&pf->pdev->dev, sizeof(*q_vector), GFP_KERNEL); + if (!q_vector) + return -ENOMEM; + + q_vector->vsi = vsi; + q_vector->v_idx = v_idx; + /* only set affinity_mask if the CPU is online */ + if (cpu_online(v_idx)) + cpumask_set_cpu(v_idx, &q_vector->affinity_mask); + + /* tie q_vector and vsi together */ + vsi->q_vectors[v_idx] = q_vector; + + return 0; +} + +/** + * ice_vsi_alloc_q_vectors - Allocate memory for interrupt vectors + * @vsi: the VSI being configured + * + * We allocate one q_vector per queue interrupt. If allocation fails we + * return -ENOMEM. + */ +static int ice_vsi_alloc_q_vectors(struct ice_vsi *vsi) +{ + struct ice_pf *pf = vsi->back; + int v_idx = 0, num_q_vectors; + int err; + + if (vsi->q_vectors[0]) { + dev_dbg(&pf->pdev->dev, "VSI %d has existing q_vectors\n", + vsi->vsi_num); + return -EEXIST; + } + + if (test_bit(ICE_FLAG_MSIX_ENA, pf->flags)) { + num_q_vectors = vsi->num_q_vectors; + } else { + err = -EINVAL; + goto err_out; + } + + for (v_idx = 0; v_idx < num_q_vectors; v_idx++) { + err = ice_vsi_alloc_q_vector(vsi, v_idx); + if (err) + goto err_out; + } + + return 0; + +err_out: + while (v_idx--) + ice_free_q_vector(vsi, v_idx); + + dev_err(&pf->pdev->dev, + "Failed to allocate %d q_vector for VSI %d, ret=%d\n", + vsi->num_q_vectors, vsi->vsi_num, err); + vsi->num_q_vectors = 0; + return err; +} + +/** + * ice_vsi_setup_vector_base - Set up the base vector for the given VSI + * @vsi: ptr to the VSI + * + * This should only be called after ice_vsi_alloc() which allocates the + * corresponding SW VSI structure and initializes num_queue_pairs for the + * newly allocated VSI. + * + * Returns 0 on success or negative on failure + */ +static int ice_vsi_setup_vector_base(struct ice_vsi *vsi) +{ + struct ice_pf *pf = vsi->back; + int num_q_vectors = 0; + + if (vsi->base_vector) { + dev_dbg(&pf->pdev->dev, "VSI %d has non-zero base vector %d\n", + vsi->vsi_num, vsi->base_vector); + return -EEXIST; + } + + if (!test_bit(ICE_FLAG_MSIX_ENA, pf->flags)) + return -ENOENT; + + switch (vsi->type) { + case ICE_VSI_PF: + num_q_vectors = vsi->num_q_vectors; + break; + default: + dev_warn(&vsi->back->pdev->dev, "Unknown VSI type %d\n", + vsi->type); + break; + } + + if (num_q_vectors) + vsi->base_vector = ice_get_res(pf, pf->irq_tracker, + num_q_vectors, vsi->idx); + + if (vsi->base_vector < 0) { + dev_err(&pf->pdev->dev, + "Failed to get tracking for %d vectors for VSI %d, err=%d\n", + num_q_vectors, vsi->vsi_num, vsi->base_vector); + return -ENOENT; + } + + return 0; +} + +/** + * ice_vsi_setup - Set up a VSI by a given type + * @pf: board private structure + * @type: VSI type + * @pi: pointer to the port_info instance + * + * This allocates the sw VSI structure and its queue resources. + * + * Returns pointer to the successfully allocated and configure VSI sw struct on + * success, otherwise returns NULL on failure. + */ +static struct ice_vsi * +ice_vsi_setup(struct ice_pf *pf, enum ice_vsi_type type, + struct ice_port_info *pi) +{ + struct device *dev = &pf->pdev->dev; + struct ice_vsi_ctx ctxt = { 0 }; + struct ice_vsi *vsi; + int ret; + + vsi = ice_vsi_alloc(pf, type); + if (!vsi) { + dev_err(dev, "could not allocate VSI\n"); + return NULL; + } + + vsi->port_info = pi; + vsi->vsw = pf->first_sw; + + if (ice_vsi_get_qs(vsi)) { + dev_err(dev, "Failed to allocate queues. vsi->idx = %d\n", + vsi->idx); + goto err_get_qs; + } + + /* create the VSI */ + ret = ice_vsi_add(vsi); + if (ret) + goto err_vsi; + + ctxt.vsi_num = vsi->vsi_num; + + switch (vsi->type) { + case ICE_VSI_PF: + ret = ice_cfg_netdev(vsi); + if (ret) + goto err_cfg_netdev; + + ret = register_netdev(vsi->netdev); + if (ret) + goto err_register_netdev; + + netif_carrier_off(vsi->netdev); + + /* make sure transmit queues start off as stopped */ + netif_tx_stop_all_queues(vsi->netdev); + ret = ice_vsi_alloc_q_vectors(vsi); + if (ret) + goto err_msix; + + ret = ice_vsi_setup_vector_base(vsi); + if (ret) + goto err_rings; + + ret = ice_vsi_alloc_rings(vsi); + if (ret) + goto err_rings; + + ice_vsi_map_rings_to_vectors(vsi); + + break; + default: + /* if vsi type is not recognized, clean up the resources and + * exit + */ + goto err_rings; + } + return vsi; + +err_rings: + ice_vsi_free_q_vectors(vsi); +err_msix: + if (vsi->netdev && vsi->netdev->reg_state == NETREG_REGISTERED) + unregister_netdev(vsi->netdev); +err_register_netdev: + if (vsi->netdev) { + free_netdev(vsi->netdev); + vsi->netdev = NULL; + } +err_cfg_netdev: + ret = ice_aq_free_vsi(&pf->hw, &ctxt, false, NULL); + if (ret) + dev_err(&vsi->back->pdev->dev, + "Free VSI AQ call failed, err %d\n", ret); +err_vsi: + ice_vsi_put_qs(vsi); +err_get_qs: + pf->q_left_tx += vsi->alloc_txq; + pf->q_left_rx += vsi->alloc_rxq; + ice_vsi_clear(vsi); + + return NULL; +} + +/** + * ice_setup_pf_sw - Setup the HW switch on startup or after reset + * @pf: board private structure + * + * Returns 0 on success, negative value on failure + */ +static int ice_setup_pf_sw(struct ice_pf *pf) +{ + struct ice_vsi *vsi; + int status = 0; + + vsi = ice_vsi_setup(pf, ICE_VSI_PF, pf->hw.port_info); + if (!vsi) { + status = -ENOMEM; + goto error_exit; + } + +error_exit: + if (vsi) { + ice_vsi_free_q_vectors(vsi); + if (vsi->netdev && vsi->netdev->reg_state == NETREG_REGISTERED) + unregister_netdev(vsi->netdev); + if (vsi->netdev) { + free_netdev(vsi->netdev); + vsi->netdev = NULL; + } + ice_vsi_delete(vsi); + ice_vsi_put_qs(vsi); + pf->q_left_tx += vsi->alloc_txq; + pf->q_left_rx += vsi->alloc_rxq; + ice_vsi_clear(vsi); + } + return status; +} + /** * ice_determine_q_usage - Calculate queue distribution * @pf: board private structure @@ -810,8 +1863,17 @@ static int ice_probe(struct pci_dev *pdev, /* record the sw_id available for later use */ pf->first_sw->sw_id = hw->port_info->sw_id; + err = ice_setup_pf_sw(pf); + if (err) { + dev_err(&pdev->dev, + "probe failed due to setup pf switch:%d\n", err); + goto err_alloc_sw_unroll; + } return 0; +err_alloc_sw_unroll: + set_bit(__ICE_DOWN, pf->state); + devm_kfree(&pf->pdev->dev, pf->first_sw); err_msix_misc_unroll: ice_free_irq_msix_misc(pf); err_init_interrupt_unroll: @@ -832,12 +1894,24 @@ static int ice_probe(struct pci_dev *pdev, static void ice_remove(struct pci_dev *pdev) { struct ice_pf *pf = pci_get_drvdata(pdev); + int i = 0; + int err; if (!pf) return; set_bit(__ICE_DOWN, pf->state); + for (i = 0; i < pf->num_alloc_vsi; i++) { + if (!pf->vsi[i]) + continue; + + err = ice_vsi_release(pf->vsi[i]); + if (err) + dev_dbg(&pf->pdev->dev, "Failed to release VSI index %d (err %d)\n", + i, err); + } + ice_free_irq_msix_misc(pf); ice_clear_interrupt_scheme(pf); ice_deinit_pf(pf); @@ -913,3 +1987,40 @@ static void __exit ice_module_exit(void) pr_info("module unloaded\n"); } module_exit(ice_module_exit); + +/** + * ice_vsi_release - Delete a VSI and free its resources + * @vsi: the VSI being removed + * + * Returns 0 on success or < 0 on error + */ +static int ice_vsi_release(struct ice_vsi *vsi) +{ + struct ice_pf *pf; + + if (!vsi->back) + return -ENODEV; + pf = vsi->back; + + if (vsi->netdev) { + unregister_netdev(vsi->netdev); + free_netdev(vsi->netdev); + vsi->netdev = NULL; + } + + /* reclaim interrupt vectors back to PF */ + ice_free_res(vsi->back->irq_tracker, vsi->base_vector, vsi->idx); + pf->num_avail_msix += vsi->num_q_vectors; + + ice_vsi_delete(vsi); + ice_vsi_free_q_vectors(vsi); + ice_vsi_clear_rings(vsi); + + ice_vsi_put_qs(vsi); + pf->q_left_tx += vsi->alloc_txq; + pf->q_left_rx += vsi->alloc_rxq; + + ice_vsi_clear(vsi); + + return 0; +} diff --git a/drivers/net/ethernet/intel/ice/ice_switch.c b/drivers/net/ethernet/intel/ice/ice_switch.c index 8fc0579b0bbb..b438cee9521e 100644 --- a/drivers/net/ethernet/intel/ice/ice_switch.c +++ b/drivers/net/ethernet/intel/ice/ice_switch.c @@ -50,6 +50,121 @@ ice_aq_get_sw_cfg(struct ice_hw *hw, struct ice_aqc_get_sw_cfg_resp *buf, return status; } +/** + * ice_aq_add_vsi + * @hw: pointer to the hw struct + * @vsi_ctx: pointer to a VSI context struct + * @cd: pointer to command details structure or NULL + * + * Add a VSI context to the hardware (0x0210) + */ +enum ice_status +ice_aq_add_vsi(struct ice_hw *hw, struct ice_vsi_ctx *vsi_ctx, + struct ice_sq_cd *cd) +{ + struct ice_aqc_add_update_free_vsi_resp *res; + struct ice_aqc_add_get_update_free_vsi *cmd; + enum ice_status status; + struct ice_aq_desc desc; + + cmd = &desc.params.vsi_cmd; + res = (struct ice_aqc_add_update_free_vsi_resp *)&desc.params.raw; + + ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_add_vsi); + + if (!vsi_ctx->alloc_from_pool) + cmd->vsi_num = cpu_to_le16(vsi_ctx->vsi_num | + ICE_AQ_VSI_IS_VALID); + + cmd->vsi_flags = cpu_to_le16(vsi_ctx->flags); + + desc.flags |= cpu_to_le16(ICE_AQ_FLAG_RD); + + status = ice_aq_send_cmd(hw, &desc, &vsi_ctx->info, + sizeof(vsi_ctx->info), cd); + + if (!status) { + vsi_ctx->vsi_num = le16_to_cpu(res->vsi_num) & ICE_AQ_VSI_NUM_M; + vsi_ctx->vsis_allocd = le16_to_cpu(res->vsi_used); + vsi_ctx->vsis_unallocated = le16_to_cpu(res->vsi_free); + } + + return status; +} + +/** + * ice_aq_update_vsi + * @hw: pointer to the hw struct + * @vsi_ctx: pointer to a VSI context struct + * @cd: pointer to command details structure or NULL + * + * Update VSI context in the hardware (0x0211) + */ +enum ice_status +ice_aq_update_vsi(struct ice_hw *hw, struct ice_vsi_ctx *vsi_ctx, + struct ice_sq_cd *cd) +{ + struct ice_aqc_add_update_free_vsi_resp *resp; + struct ice_aqc_add_get_update_free_vsi *cmd; + struct ice_aq_desc desc; + enum ice_status status; + + cmd = &desc.params.vsi_cmd; + resp = (struct ice_aqc_add_update_free_vsi_resp *)&desc.params.raw; + + ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_update_vsi); + + cmd->vsi_num = cpu_to_le16(vsi_ctx->vsi_num | ICE_AQ_VSI_IS_VALID); + + desc.flags |= cpu_to_le16(ICE_AQ_FLAG_RD); + + status = ice_aq_send_cmd(hw, &desc, &vsi_ctx->info, + sizeof(vsi_ctx->info), cd); + + if (!status) { + vsi_ctx->vsis_allocd = le16_to_cpu(resp->vsi_used); + vsi_ctx->vsis_unallocated = le16_to_cpu(resp->vsi_free); + } + + return status; +} + +/** + * ice_aq_free_vsi + * @hw: pointer to the hw struct + * @vsi_ctx: pointer to a VSI context struct + * @keep_vsi_alloc: keep VSI allocation as part of this PF's resources + * @cd: pointer to command details structure or NULL + * + * Get VSI context info from hardware (0x0213) + */ +enum ice_status +ice_aq_free_vsi(struct ice_hw *hw, struct ice_vsi_ctx *vsi_ctx, + bool keep_vsi_alloc, struct ice_sq_cd *cd) +{ + struct ice_aqc_add_update_free_vsi_resp *resp; + struct ice_aqc_add_get_update_free_vsi *cmd; + struct ice_aq_desc desc; + enum ice_status status; + + cmd = &desc.params.vsi_cmd; + resp = (struct ice_aqc_add_update_free_vsi_resp *)&desc.params.raw; + + ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_free_vsi); + + cmd->vsi_num = cpu_to_le16(vsi_ctx->vsi_num | ICE_AQ_VSI_IS_VALID); + if (keep_vsi_alloc) + cmd->cmd_flags = cpu_to_le16(ICE_AQ_VSI_KEEP_ALLOC); + + status = ice_aq_send_cmd(hw, &desc, NULL, 0, cd); + if (!status) { + vsi_ctx->vsis_allocd = le16_to_cpu(resp->vsi_used); + vsi_ctx->vsis_unallocated = le16_to_cpu(resp->vsi_free); + } + + return status; +} + /* ice_init_port_info - Initialize port_info with switch configuration data * @pi: pointer to port_info * @vsi_port_num: VSI number or port number diff --git a/drivers/net/ethernet/intel/ice/ice_switch.h b/drivers/net/ethernet/intel/ice/ice_switch.h index b98cb978a129..bb8612d8e07b 100644 --- a/drivers/net/ethernet/intel/ice/ice_switch.h +++ b/drivers/net/ethernet/intel/ice/ice_switch.h @@ -9,6 +9,27 @@ #define ICE_SW_CFG_MAX_BUF_LEN 2048 #define ICE_DFLT_VSI_INVAL 0xff +/* VSI context structure for add/get/update/free operations */ +struct ice_vsi_ctx { + u16 vsi_num; + u16 vsis_allocd; + u16 vsis_unallocated; + u16 flags; + struct ice_aqc_vsi_props info; + bool alloc_from_pool; +}; + +/* VSI related commands */ +enum ice_status +ice_aq_add_vsi(struct ice_hw *hw, struct ice_vsi_ctx *vsi_ctx, + struct ice_sq_cd *cd); +enum ice_status +ice_aq_update_vsi(struct ice_hw *hw, struct ice_vsi_ctx *vsi_ctx, + struct ice_sq_cd *cd); +enum ice_status +ice_aq_free_vsi(struct ice_hw *hw, struct ice_vsi_ctx *vsi_ctx, + bool keep_vsi_alloc, struct ice_sq_cd *cd); + enum ice_status ice_get_initial_sw_cfg(struct ice_hw *hw); #endif /* _ICE_SWITCH_H_ */ diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.h b/drivers/net/ethernet/intel/ice/ice_txrx.h index 7ceb69ea745e..2dd2232127a7 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx.h +++ b/drivers/net/ethernet/intel/ice/ice_txrx.h @@ -26,4 +26,30 @@ enum ice_dyn_idx_t { /* apply ITR HW granularity translation to program the HW registers */ #define ITR_TO_REG(val, itr_gran) (((val) & ~ICE_ITR_DYNAMIC) >> (itr_gran)) +/* descriptor ring, associated with a VSI */ +struct ice_ring { + struct ice_ring *next; /* pointer to next ring in q_vector */ + struct device *dev; /* Used for DMA mapping */ + struct net_device *netdev; /* netdev ring maps to */ + struct ice_vsi *vsi; /* Backreference to associated VSI */ + struct ice_q_vector *q_vector; /* Backreference to associated vector */ + u16 q_index; /* Queue number of ring */ + u16 count; /* Number of descriptors */ + u16 reg_idx; /* HW register index of the ring */ + bool ring_active; /* is ring online or not */ + struct rcu_head rcu; /* to avoid race on free */ +} ____cacheline_internodealigned_in_smp; + +struct ice_ring_container { + /* array of pointers to rings */ + struct ice_ring *ring; + unsigned int total_bytes; /* total bytes processed this int */ + unsigned int total_pkts; /* total packets processed this int */ + u16 itr; +}; + +/* iterator for handling rings in ring container */ +#define ice_for_each_ring(pos, head) \ + for (pos = (head).ring; pos; pos = pos->next) + #endif /* _ICE_TXRX_H_ */ diff --git a/drivers/net/ethernet/intel/ice/ice_type.h b/drivers/net/ethernet/intel/ice/ice_type.h index 2325be552a84..5f58fd84d558 100644 --- a/drivers/net/ethernet/intel/ice/ice_type.h +++ b/drivers/net/ethernet/intel/ice/ice_type.h @@ -55,6 +55,10 @@ enum ice_media_type { ICE_MEDIA_DA, }; +enum ice_vsi_type { + ICE_VSI_PF = 0, +}; + struct ice_link_status { /* Refer to ice_aq_phy_type for bits definition */ u64 phy_type_low; From 9daf8208dd4dee4e13079bd0520a5fb8d20e8b06 Mon Sep 17 00:00:00 2001 From: Anirudh Venkataramanan Date: Tue, 20 Mar 2018 07:58:12 -0700 Subject: [PATCH 08/15] ice: Add support for switch filter programming A VSI needs traffic directed towards it. This is done by programming filter rules on the switch (embedded vSwitch) element in the hardware, which connects the VSI to the ingress/egress port. This patch introduces data structures and functions necessary to add remove or update switch rules on the switch element. This is a pretty low level function that is generic enough to add a whole range of filters. This patch also introduces two top level functions ice_add_mac and ice_remove mac which through a series of intermediate helper functions eventually call ice_aq_sw_rules to add/delete simple MAC based filters. It's worth noting that one invocation of ice_add_mac/ice_remove_mac is capable of adding/deleting multiple MAC filters. Also worth noting is the fact that the driver maintains a list of currently active filters, so every filter addition/removal causes an update to this list. This is done for a couple of reasons: 1) If two VSIs try to add the same filters, we need to detect it and do things a little differently (i.e. use VSI lists, described below) as the same filter can't be added more than once. 2) In the event of a hardware reset we can simply walk through this list and restore the filters. VSI Lists: In a multi-VSI situation, it's possible that multiple VSIs want to add the same filter rule. For example, two VSIs that want to receive broadcast traffic would both add a filter for destination MAC ff:ff:ff:ff:ff:ff. This can become cumbersome to maintain and so this is handled using a VSI list. A VSI list is resource that can be allocated in the hardware using the ice_aq_alloc_free_res admin queue command. Simply put, a VSI list can be thought of as a subscription list containing a set of VSIs to which the packet should be forwarded, should the filter match. For example, if VSI-0 has already added a broadcast filter, and VSI-1 wants to do the same thing, the filter creation flow will detect this, allocate a VSI list and update the switch rule so that broadcast traffic will now be forwarded to the VSI list which contains VSI-0 and VSI-1. Signed-off-by: Anirudh Venkataramanan Tested-by: Tony Brelinski Signed-off-by: Jeff Kirsher --- .../net/ethernet/intel/ice/ice_adminq_cmd.h | 249 +++ drivers/net/ethernet/intel/ice/ice_common.c | 74 +- drivers/net/ethernet/intel/ice/ice_main.c | 92 ++ drivers/net/ethernet/intel/ice/ice_status.h | 3 + drivers/net/ethernet/intel/ice/ice_switch.c | 1378 +++++++++++++++++ drivers/net/ethernet/intel/ice/ice_switch.h | 120 ++ drivers/net/ethernet/intel/ice/ice_type.h | 21 + 7 files changed, 1935 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h index 682e8dac72cc..9237841439da 100644 --- a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h +++ b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h @@ -8,6 +8,7 @@ * descriptor format. It is shared between Firmware and Software. */ +#define ICE_MAX_VSI 768 #define ICE_AQC_TOPO_MAX_LEVEL_NUM 0x9 #define ICE_AQ_SET_MAC_FRAME_SIZE_MAX 9728 @@ -191,6 +192,46 @@ struct ice_aqc_get_sw_cfg_resp { struct ice_aqc_get_sw_cfg_resp_elem elements[1]; }; +/* These resource type defines are used for all switch resource + * commands where a resource type is required, such as: + * Get Resource Allocation command (indirect 0x0204) + * Allocate Resources command (indirect 0x0208) + * Free Resources command (indirect 0x0209) + * Get Allocated Resource Descriptors Command (indirect 0x020A) + */ +#define ICE_AQC_RES_TYPE_VSI_LIST_REP 0x03 +#define ICE_AQC_RES_TYPE_VSI_LIST_PRUNE 0x04 + +/* Allocate Resources command (indirect 0x0208) + * Free Resources command (indirect 0x0209) + */ +struct ice_aqc_alloc_free_res_cmd { + __le16 num_entries; /* Number of Resource entries */ + u8 reserved[6]; + __le32 addr_high; + __le32 addr_low; +}; + +/* Resource descriptor */ +struct ice_aqc_res_elem { + union { + __le16 sw_resp; + __le16 flu_resp; + } e; +}; + +/* Buffer for Allocate/Free Resources commands */ +struct ice_aqc_alloc_free_res_elem { + __le16 res_type; /* Types defined above cmd 0x0204 */ +#define ICE_AQC_RES_TYPE_SHARED_S 7 +#define ICE_AQC_RES_TYPE_SHARED_M (0x1 << ICE_AQC_RES_TYPE_SHARED_S) +#define ICE_AQC_RES_TYPE_VSI_PRUNE_LIST_S 8 +#define ICE_AQC_RES_TYPE_VSI_PRUNE_LIST_M \ + (0xF << ICE_AQC_RES_TYPE_VSI_PRUNE_LIST_S) + __le16 num_elems; + struct ice_aqc_res_elem elem[1]; +}; + /* Add VSI (indirect 0x0210) * Update VSI (indirect 0x0211) * Get VSI (indirect 0x0212) @@ -384,6 +425,202 @@ struct ice_aqc_vsi_props { u8 reserved[24]; }; +/* Add/Update/Remove/Get switch rules (indirect 0x02A0, 0x02A1, 0x02A2, 0x02A3) + */ +struct ice_aqc_sw_rules { + /* ops: add switch rules, referring the number of rules. + * ops: update switch rules, referring the number of filters + * ops: remove switch rules, referring the entry index. + * ops: get switch rules, referring to the number of filters. + */ + __le16 num_rules_fltr_entry_index; + u8 reserved[6]; + __le32 addr_high; + __le32 addr_low; +}; + +/* Add/Update/Get/Remove lookup Rx/Tx command/response entry + * This structures describes the lookup rules and associated actions. "index" + * is returned as part of a response to a successful Add command, and can be + * used to identify the rule for Update/Get/Remove commands. + */ +struct ice_sw_rule_lkup_rx_tx { + __le16 recipe_id; +#define ICE_SW_RECIPE_LOGICAL_PORT_FWD 10 + /* Source port for LOOKUP_RX and source VSI in case of LOOKUP_TX */ + __le16 src; + __le32 act; + + /* Bit 0:1 - Action type */ +#define ICE_SINGLE_ACT_TYPE_S 0x00 +#define ICE_SINGLE_ACT_TYPE_M (0x3 << ICE_SINGLE_ACT_TYPE_S) + + /* Bit 2 - Loop back enable + * Bit 3 - LAN enable + */ +#define ICE_SINGLE_ACT_LB_ENABLE BIT(2) +#define ICE_SINGLE_ACT_LAN_ENABLE BIT(3) + + /* Action type = 0 - Forward to VSI or VSI list */ +#define ICE_SINGLE_ACT_VSI_FORWARDING 0x0 + +#define ICE_SINGLE_ACT_VSI_ID_S 4 +#define ICE_SINGLE_ACT_VSI_ID_M (0x3FF << ICE_SINGLE_ACT_VSI_ID_S) +#define ICE_SINGLE_ACT_VSI_LIST_ID_S 4 +#define ICE_SINGLE_ACT_VSI_LIST_ID_M (0x3FF << ICE_SINGLE_ACT_VSI_LIST_ID_S) + /* This bit needs to be set if action is forward to VSI list */ +#define ICE_SINGLE_ACT_VSI_LIST BIT(14) +#define ICE_SINGLE_ACT_VALID_BIT BIT(17) +#define ICE_SINGLE_ACT_DROP BIT(18) + + /* Action type = 1 - Forward to Queue of Queue group */ +#define ICE_SINGLE_ACT_TO_Q 0x1 +#define ICE_SINGLE_ACT_Q_INDEX_S 4 +#define ICE_SINGLE_ACT_Q_INDEX_M (0x7FF << ICE_SINGLE_ACT_Q_INDEX_S) +#define ICE_SINGLE_ACT_Q_REGION_S 15 +#define ICE_SINGLE_ACT_Q_REGION_M (0x7 << ICE_SINGLE_ACT_Q_REGION_S) +#define ICE_SINGLE_ACT_Q_PRIORITY BIT(18) + + /* Action type = 2 - Prune */ +#define ICE_SINGLE_ACT_PRUNE 0x2 +#define ICE_SINGLE_ACT_EGRESS BIT(15) +#define ICE_SINGLE_ACT_INGRESS BIT(16) +#define ICE_SINGLE_ACT_PRUNET BIT(17) + /* Bit 18 should be set to 0 for this action */ + + /* Action type = 2 - Pointer */ +#define ICE_SINGLE_ACT_PTR 0x2 +#define ICE_SINGLE_ACT_PTR_VAL_S 4 +#define ICE_SINGLE_ACT_PTR_VAL_M (0x1FFF << ICE_SINGLE_ACT_PTR_VAL_S) + /* Bit 18 should be set to 1 */ +#define ICE_SINGLE_ACT_PTR_BIT BIT(18) + + /* Action type = 3 - Other actions. Last two bits + * are other action identifier + */ +#define ICE_SINGLE_ACT_OTHER_ACTS 0x3 +#define ICE_SINGLE_OTHER_ACT_IDENTIFIER_S 17 +#define ICE_SINGLE_OTHER_ACT_IDENTIFIER_M \ + (0x3 << \ ICE_SINGLE_OTHER_ACT_IDENTIFIER_S) + + /* Bit 17:18 - Defines other actions */ + /* Other action = 0 - Mirror VSI */ +#define ICE_SINGLE_OTHER_ACT_MIRROR 0 +#define ICE_SINGLE_ACT_MIRROR_VSI_ID_S 4 +#define ICE_SINGLE_ACT_MIRROR_VSI_ID_M \ + (0x3FF << ICE_SINGLE_ACT_MIRROR_VSI_ID_S) + + /* Other action = 3 - Set Stat count */ +#define ICE_SINGLE_OTHER_ACT_STAT_COUNT 3 +#define ICE_SINGLE_ACT_STAT_COUNT_INDEX_S 4 +#define ICE_SINGLE_ACT_STAT_COUNT_INDEX_M \ + (0x7F << ICE_SINGLE_ACT_STAT_COUNT_INDEX_S) + + __le16 index; /* The index of the rule in the lookup table */ + /* Length and values of the header to be matched per recipe or + * lookup-type + */ + __le16 hdr_len; + u8 hdr[1]; +} __packed; + +/* Add/Update/Remove large action command/response entry + * "index" is returned as part of a response to a successful Add command, and + * can be used to identify the action for Update/Get/Remove commands. + */ +struct ice_sw_rule_lg_act { + __le16 index; /* Index in large action table */ + __le16 size; + __le32 act[1]; /* array of size for actions */ + /* Max number of large actions */ +#define ICE_MAX_LG_ACT 4 + /* Bit 0:1 - Action type */ +#define ICE_LG_ACT_TYPE_S 0 +#define ICE_LG_ACT_TYPE_M (0x7 << ICE_LG_ACT_TYPE_S) + + /* Action type = 0 - Forward to VSI or VSI list */ +#define ICE_LG_ACT_VSI_FORWARDING 0 +#define ICE_LG_ACT_VSI_ID_S 3 +#define ICE_LG_ACT_VSI_ID_M (0x3FF << ICE_LG_ACT_VSI_ID_S) +#define ICE_LG_ACT_VSI_LIST_ID_S 3 +#define ICE_LG_ACT_VSI_LIST_ID_M (0x3FF << ICE_LG_ACT_VSI_LIST_ID_S) + /* This bit needs to be set if action is forward to VSI list */ +#define ICE_LG_ACT_VSI_LIST BIT(13) + +#define ICE_LG_ACT_VALID_BIT BIT(16) + + /* Action type = 1 - Forward to Queue of Queue group */ +#define ICE_LG_ACT_TO_Q 0x1 +#define ICE_LG_ACT_Q_INDEX_S 3 +#define ICE_LG_ACT_Q_INDEX_M (0x7FF << ICE_LG_ACT_Q_INDEX_S) +#define ICE_LG_ACT_Q_REGION_S 14 +#define ICE_LG_ACT_Q_REGION_M (0x7 << ICE_LG_ACT_Q_REGION_S) +#define ICE_LG_ACT_Q_PRIORITY_SET BIT(17) + + /* Action type = 2 - Prune */ +#define ICE_LG_ACT_PRUNE 0x2 +#define ICE_LG_ACT_EGRESS BIT(14) +#define ICE_LG_ACT_INGRESS BIT(15) +#define ICE_LG_ACT_PRUNET BIT(16) + + /* Action type = 3 - Mirror VSI */ +#define ICE_LG_OTHER_ACT_MIRROR 0x3 +#define ICE_LG_ACT_MIRROR_VSI_ID_S 3 +#define ICE_LG_ACT_MIRROR_VSI_ID_M (0x3FF << ICE_LG_ACT_MIRROR_VSI_ID_S) + + /* Action type = 5 - Large Action */ +#define ICE_LG_ACT_GENERIC 0x5 +#define ICE_LG_ACT_GENERIC_VALUE_S 3 +#define ICE_LG_ACT_GENERIC_VALUE_M (0xFFFF << ICE_LG_ACT_GENERIC_VALUE_S) +#define ICE_LG_ACT_GENERIC_OFFSET_S 19 +#define ICE_LG_ACT_GENERIC_OFFSET_M (0x7 << ICE_LG_ACT_GENERIC_OFFSET_S) +#define ICE_LG_ACT_GENERIC_PRIORITY_S 22 +#define ICE_LG_ACT_GENERIC_PRIORITY_M (0x7 << ICE_LG_ACT_GENERIC_PRIORITY_S) + + /* Action = 7 - Set Stat count */ +#define ICE_LG_ACT_STAT_COUNT 0x7 +#define ICE_LG_ACT_STAT_COUNT_S 3 +#define ICE_LG_ACT_STAT_COUNT_M (0x7F << ICE_LG_ACT_STAT_COUNT_S) +}; + +/* Add/Update/Remove VSI list command/response entry + * "index" is returned as part of a response to a successful Add command, and + * can be used to identify the VSI list for Update/Get/Remove commands. + */ +struct ice_sw_rule_vsi_list { + __le16 index; /* Index of VSI/Prune list */ + __le16 number_vsi; + __le16 vsi[1]; /* Array of number_vsi VSI numbers */ +}; + +/* Query VSI list command/response entry */ +struct ice_sw_rule_vsi_list_query { + __le16 index; + DECLARE_BITMAP(vsi_list, ICE_MAX_VSI); +} __packed; + +/* Add switch rule response: + * Content of return buffer is same as the input buffer. The status field and + * LUT index are updated as part of the response + */ +struct ice_aqc_sw_rules_elem { + __le16 type; /* Switch rule type, one of T_... */ +#define ICE_AQC_SW_RULES_T_LKUP_RX 0x0 +#define ICE_AQC_SW_RULES_T_LKUP_TX 0x1 +#define ICE_AQC_SW_RULES_T_LG_ACT 0x2 +#define ICE_AQC_SW_RULES_T_VSI_LIST_SET 0x3 +#define ICE_AQC_SW_RULES_T_VSI_LIST_CLEAR 0x4 +#define ICE_AQC_SW_RULES_T_PRUNE_LIST_SET 0x5 +#define ICE_AQC_SW_RULES_T_PRUNE_LIST_CLEAR 0x6 + __le16 status; + union { + struct ice_sw_rule_lkup_rx_tx lkup_tx_rx; + struct ice_sw_rule_lg_act lg_act; + struct ice_sw_rule_vsi_list vsi_list; + struct ice_sw_rule_vsi_list_query vsi_list_query; + } __packed pdata; +}; + /* Get Default Topology (indirect 0x0400) */ struct ice_aqc_get_topo { u8 port_num; @@ -766,11 +1003,13 @@ struct ice_aq_desc { struct ice_aqc_list_caps get_cap; struct ice_aqc_get_phy_caps get_phy; struct ice_aqc_get_sw_cfg get_sw_conf; + struct ice_aqc_sw_rules sw_rules; struct ice_aqc_get_topo get_topo; struct ice_aqc_query_txsched_res query_sched_res; struct ice_aqc_add_move_delete_elem add_move_delete_elem; struct ice_aqc_nvm nvm; struct ice_aqc_add_get_update_free_vsi vsi_cmd; + struct ice_aqc_alloc_free_res_cmd sw_res_ctrl; struct ice_aqc_get_link_status get_link_status; } params; }; @@ -821,10 +1060,20 @@ enum ice_adminq_opc { /* internal switch commands */ ice_aqc_opc_get_sw_cfg = 0x0200, + /* Alloc/Free/Get Resources */ + ice_aqc_opc_alloc_res = 0x0208, + ice_aqc_opc_free_res = 0x0209, + /* VSI commands */ ice_aqc_opc_add_vsi = 0x0210, ice_aqc_opc_update_vsi = 0x0211, ice_aqc_opc_free_vsi = 0x0213, + + /* switch rules population commands */ + ice_aqc_opc_add_sw_rules = 0x02A0, + ice_aqc_opc_update_sw_rules = 0x02A1, + ice_aqc_opc_remove_sw_rules = 0x02A2, + ice_aqc_opc_clear_pf_cfg = 0x02A4, /* transmit scheduler commands */ diff --git a/drivers/net/ethernet/intel/ice/ice_common.c b/drivers/net/ethernet/intel/ice/ice_common.c index a4ce8a87fb0d..67301fe75482 100644 --- a/drivers/net/ethernet/intel/ice/ice_common.c +++ b/drivers/net/ethernet/intel/ice/ice_common.c @@ -258,6 +258,66 @@ ice_aq_get_link_info(struct ice_port_info *pi, bool ena_lse, return status; } +/** + * ice_init_fltr_mgmt_struct - initializes filter management list and locks + * @hw: pointer to the hw struct + */ +static enum ice_status ice_init_fltr_mgmt_struct(struct ice_hw *hw) +{ + struct ice_switch_info *sw; + + hw->switch_info = devm_kzalloc(ice_hw_to_dev(hw), + sizeof(*hw->switch_info), GFP_KERNEL); + sw = hw->switch_info; + + if (!sw) + return ICE_ERR_NO_MEMORY; + + INIT_LIST_HEAD(&sw->vsi_list_map_head); + + mutex_init(&sw->mac_list_lock); + INIT_LIST_HEAD(&sw->mac_list_head); + + mutex_init(&sw->vlan_list_lock); + INIT_LIST_HEAD(&sw->vlan_list_head); + + mutex_init(&sw->eth_m_list_lock); + INIT_LIST_HEAD(&sw->eth_m_list_head); + + mutex_init(&sw->promisc_list_lock); + INIT_LIST_HEAD(&sw->promisc_list_head); + + mutex_init(&sw->mac_vlan_list_lock); + INIT_LIST_HEAD(&sw->mac_vlan_list_head); + + return 0; +} + +/** + * ice_cleanup_fltr_mgmt_struct - cleanup filter management list and locks + * @hw: pointer to the hw struct + */ +static void ice_cleanup_fltr_mgmt_struct(struct ice_hw *hw) +{ + struct ice_switch_info *sw = hw->switch_info; + struct ice_vsi_list_map_info *v_pos_map; + struct ice_vsi_list_map_info *v_tmp_map; + + list_for_each_entry_safe(v_pos_map, v_tmp_map, &sw->vsi_list_map_head, + list_entry) { + list_del(&v_pos_map->list_entry); + devm_kfree(ice_hw_to_dev(hw), v_pos_map); + } + + mutex_destroy(&sw->mac_list_lock); + mutex_destroy(&sw->vlan_list_lock); + mutex_destroy(&sw->eth_m_list_lock); + mutex_destroy(&sw->promisc_list_lock); + mutex_destroy(&sw->mac_vlan_list_lock); + + devm_kfree(ice_hw_to_dev(hw), sw); +} + /** * ice_init_hw - main hardware initialization routine * @hw: pointer to the hardware structure @@ -321,6 +381,8 @@ enum ice_status ice_init_hw(struct ice_hw *hw) if (status) goto err_unroll_alloc; + hw->evb_veb = true; + /* Query the allocated resources for tx scheduler */ status = ice_sched_query_res_alloc(hw); if (status) { @@ -352,21 +414,27 @@ enum ice_status ice_init_hw(struct ice_hw *hw) if (status) goto err_unroll_sched; + status = ice_init_fltr_mgmt_struct(hw); + if (status) + goto err_unroll_sched; + /* Get port MAC information */ mac_buf_len = sizeof(struct ice_aqc_manage_mac_read_resp); mac_buf = devm_kzalloc(ice_hw_to_dev(hw), mac_buf_len, GFP_KERNEL); if (!mac_buf) - goto err_unroll_sched; + goto err_unroll_fltr_mgmt_struct; status = ice_aq_manage_mac_read(hw, mac_buf, mac_buf_len, NULL); devm_kfree(ice_hw_to_dev(hw), mac_buf); if (status) - goto err_unroll_sched; + goto err_unroll_fltr_mgmt_struct; return 0; +err_unroll_fltr_mgmt_struct: + ice_cleanup_fltr_mgmt_struct(hw); err_unroll_sched: ice_sched_cleanup_all(hw); err_unroll_alloc: @@ -389,6 +457,8 @@ void ice_deinit_hw(struct ice_hw *hw) devm_kfree(ice_hw_to_dev(hw), hw->port_info); hw->port_info = NULL; } + + ice_cleanup_fltr_mgmt_struct(hw); } /** diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index 04e004ba2067..3b4a2691ddac 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -162,6 +162,57 @@ static int ice_free_res(struct ice_res_tracker *res, u16 index, u16 id) return count; } +/** + * ice_add_mac_to_list - Add a mac address filter entry to the list + * @vsi: the VSI to be forwarded to + * @add_list: pointer to the list which contains MAC filter entries + * @macaddr: the MAC address to be added. + * + * Adds mac address filter entry to the temp list + * + * Returns 0 on success or ENOMEM on failure. + */ +static int ice_add_mac_to_list(struct ice_vsi *vsi, struct list_head *add_list, + const u8 *macaddr) +{ + struct ice_fltr_list_entry *tmp; + struct ice_pf *pf = vsi->back; + + tmp = devm_kzalloc(&pf->pdev->dev, sizeof(*tmp), GFP_ATOMIC); + if (!tmp) + return -ENOMEM; + + tmp->fltr_info.flag = ICE_FLTR_TX; + tmp->fltr_info.src = vsi->vsi_num; + tmp->fltr_info.lkup_type = ICE_SW_LKUP_MAC; + tmp->fltr_info.fltr_act = ICE_FWD_TO_VSI; + tmp->fltr_info.fwd_id.vsi_id = vsi->vsi_num; + ether_addr_copy(tmp->fltr_info.l_data.mac.mac_addr, macaddr); + + INIT_LIST_HEAD(&tmp->list_entry); + list_add(&tmp->list_entry, add_list); + + return 0; +} + +/** + * ice_free_fltr_list - free filter lists helper + * @dev: pointer to the device struct + * @h: pointer to the list head to be freed + * + * Helper function to free filter lists previously created using + * ice_add_mac_to_list + */ +static void ice_free_fltr_list(struct device *dev, struct list_head *h) +{ + struct ice_fltr_list_entry *e, *tmp; + + list_for_each_entry_safe(e, tmp, h, list_entry) { + list_del(&e->list_entry); + devm_kfree(dev, e); + } +} + /** * __ice_clean_ctrlq - helper function to clean controlq rings * @pf: ptr to struct ice_pf @@ -1519,6 +1570,8 @@ ice_vsi_setup(struct ice_pf *pf, enum ice_vsi_type type, */ static int ice_setup_pf_sw(struct ice_pf *pf) { + LIST_HEAD(tmp_add_list); + u8 broadcast[ETH_ALEN]; struct ice_vsi *vsi; int status = 0; @@ -1528,7 +1581,37 @@ static int ice_setup_pf_sw(struct ice_pf *pf) goto error_exit; } + /* tmp_add_list contains a list of MAC addresses for which MAC + * filters need to be programmed. Add the VSI's unicast MAC to + * this list + */ + status = ice_add_mac_to_list(vsi, &tmp_add_list, + vsi->port_info->mac.perm_addr); + if (status) + goto error_exit; + + /* VSI needs to receive broadcast traffic, so add the broadcast + * MAC address to the list. + */ + eth_broadcast_addr(broadcast); + status = ice_add_mac_to_list(vsi, &tmp_add_list, broadcast); + if (status) + goto error_exit; + + /* program MAC filters for entries in tmp_add_list */ + status = ice_add_mac(&pf->hw, &tmp_add_list); + if (status) { + dev_err(&pf->pdev->dev, "Could not add MAC filters\n"); + status = -ENOMEM; + goto error_exit; + } + + ice_free_fltr_list(&pf->pdev->dev, &tmp_add_list); + return status; + error_exit: + ice_free_fltr_list(&pf->pdev->dev, &tmp_add_list); + if (vsi) { ice_vsi_free_q_vectors(vsi); if (vsi->netdev && vsi->netdev->reg_state == NETREG_REGISTERED) @@ -1537,6 +1620,7 @@ static int ice_setup_pf_sw(struct ice_pf *pf) free_netdev(vsi->netdev); vsi->netdev = NULL; } + ice_vsi_delete(vsi); ice_vsi_put_qs(vsi); pf->q_left_tx += vsi->alloc_txq; @@ -1869,6 +1953,13 @@ static int ice_probe(struct pci_dev *pdev, "probe failed due to setup pf switch:%d\n", err); goto err_alloc_sw_unroll; } + + /* Driver is mostly up */ + clear_bit(__ICE_DOWN, pf->state); + + /* since everything is good, start the service timer */ + mod_timer(&pf->serv_tmr, round_jiffies(jiffies + pf->serv_tmr_period)); + return 0; err_alloc_sw_unroll: @@ -2012,6 +2103,7 @@ static int ice_vsi_release(struct ice_vsi *vsi) ice_free_res(vsi->back->irq_tracker, vsi->base_vector, vsi->idx); pf->num_avail_msix += vsi->num_q_vectors; + ice_remove_vsi_fltr(&pf->hw, vsi->vsi_num); ice_vsi_delete(vsi); ice_vsi_free_q_vectors(vsi); ice_vsi_clear_rings(vsi); diff --git a/drivers/net/ethernet/intel/ice/ice_status.h b/drivers/net/ethernet/intel/ice/ice_status.h index 144712db1ad2..365dfb86dcb9 100644 --- a/drivers/net/ethernet/intel/ice/ice_status.h +++ b/drivers/net/ethernet/intel/ice/ice_status.h @@ -7,6 +7,7 @@ /* Error Codes */ enum ice_status { ICE_ERR_PARAM = -1, + ICE_ERR_NOT_IMPL = -2, ICE_ERR_NOT_READY = -3, ICE_ERR_INVAL_SIZE = -6, ICE_ERR_DEVICE_NOT_SUPPORTED = -8, @@ -15,6 +16,8 @@ enum ice_status { ICE_ERR_NO_MEMORY = -11, ICE_ERR_CFG = -12, ICE_ERR_OUT_OF_RANGE = -13, + ICE_ERR_ALREADY_EXISTS = -14, + ICE_ERR_DOES_NOT_EXIST = -15, ICE_ERR_BUF_TOO_SHORT = -52, ICE_ERR_NVM_BLANK_MODE = -53, ICE_ERR_AQ_ERROR = -100, diff --git a/drivers/net/ethernet/intel/ice/ice_switch.c b/drivers/net/ethernet/intel/ice/ice_switch.c index b438cee9521e..3944dac6cfc6 100644 --- a/drivers/net/ethernet/intel/ice/ice_switch.c +++ b/drivers/net/ethernet/intel/ice/ice_switch.c @@ -3,6 +3,88 @@ #include "ice_switch.h" +#define ICE_ETH_DA_OFFSET 0 +#define ICE_ETH_ETHTYPE_OFFSET 12 +#define ICE_ETH_VLAN_TCI_OFFSET 14 +#define ICE_MAX_VLAN_ID 0xFFF + +/* Dummy ethernet header needed in the ice_aqc_sw_rules_elem + * struct to configure any switch filter rules. + * {DA (6 bytes), SA(6 bytes), + * Ether type (2 bytes for header without VLAN tag) OR + * VLAN tag (4 bytes for header with VLAN tag) } + * + * Word on Hardcoded values + * byte 0 = 0x2: to identify it as locally administered DA MAC + * byte 6 = 0x2: to identify it as locally administered SA MAC + * byte 12 = 0x81 & byte 13 = 0x00: + * In case of VLAN filter first two bytes defines ether type (0x8100) + * and remaining two bytes are placeholder for programming a given VLAN id + * In case of Ether type filter it is treated as header without VLAN tag + * and byte 12 and 13 is used to program a given Ether type instead + */ +#define DUMMY_ETH_HDR_LEN 16 +static const u8 dummy_eth_header[DUMMY_ETH_HDR_LEN] = { 0x2, 0, 0, 0, 0, 0, + 0x2, 0, 0, 0, 0, 0, + 0x81, 0, 0, 0}; + +#define ICE_SW_RULE_RX_TX_ETH_HDR_SIZE \ + (sizeof(struct ice_aqc_sw_rules_elem) - \ + sizeof(((struct ice_aqc_sw_rules_elem *)0)->pdata) + \ + sizeof(struct ice_sw_rule_lkup_rx_tx) + DUMMY_ETH_HDR_LEN - 1) +#define ICE_SW_RULE_RX_TX_NO_HDR_SIZE \ + (sizeof(struct ice_aqc_sw_rules_elem) - \ + sizeof(((struct ice_aqc_sw_rules_elem *)0)->pdata) + \ + sizeof(struct ice_sw_rule_lkup_rx_tx) - 1) +#define ICE_SW_RULE_LG_ACT_SIZE(n) \ + (sizeof(struct ice_aqc_sw_rules_elem) - \ + sizeof(((struct ice_aqc_sw_rules_elem *)0)->pdata) + \ + sizeof(struct ice_sw_rule_lg_act) - \ + sizeof(((struct ice_sw_rule_lg_act *)0)->act) + \ + ((n) * sizeof(((struct ice_sw_rule_lg_act *)0)->act))) +#define ICE_SW_RULE_VSI_LIST_SIZE(n) \ + (sizeof(struct ice_aqc_sw_rules_elem) - \ + sizeof(((struct ice_aqc_sw_rules_elem *)0)->pdata) + \ + sizeof(struct ice_sw_rule_vsi_list) - \ + sizeof(((struct ice_sw_rule_vsi_list *)0)->vsi) + \ + ((n) * sizeof(((struct ice_sw_rule_vsi_list *)0)->vsi))) + +/** + * ice_aq_alloc_free_res - command to allocate/free resources + * @hw: pointer to the hw struct + * @num_entries: number of resource entries in buffer + * @buf: Indirect buffer to hold data parameters and response + * @buf_size: size of buffer for indirect commands + * @opc: pass in the command opcode + * @cd: pointer to command details structure or NULL + * + * Helper function to allocate/free resources using the admin queue commands + */ +static enum ice_status +ice_aq_alloc_free_res(struct ice_hw *hw, u16 num_entries, + struct ice_aqc_alloc_free_res_elem *buf, u16 buf_size, + enum ice_adminq_opc opc, struct ice_sq_cd *cd) +{ + struct ice_aqc_alloc_free_res_cmd *cmd; + struct ice_aq_desc desc; + + cmd = &desc.params.sw_res_ctrl; + + if (!buf) + return ICE_ERR_PARAM; + + if (buf_size < (num_entries * sizeof(buf->elem[0]))) + return ICE_ERR_PARAM; + + ice_fill_dflt_direct_cmd_desc(&desc, opc); + + desc.flags |= cpu_to_le16(ICE_AQ_FLAG_RD); + + cmd->num_entries = cpu_to_le16(num_entries); + + return ice_aq_send_cmd(hw, &desc, buf, buf_size, cd); +} + /** * ice_aq_get_sw_cfg - get switch configuration * @hw: pointer to the hardware structure @@ -165,6 +247,93 @@ ice_aq_free_vsi(struct ice_hw *hw, struct ice_vsi_ctx *vsi_ctx, return status; } +/** + * ice_aq_alloc_free_vsi_list + * @hw: pointer to the hw struct + * @vsi_list_id: VSI list id returned or used for lookup + * @lkup_type: switch rule filter lookup type + * @opc: switch rules population command type - pass in the command opcode + * + * allocates or free a VSI list resource + */ +static enum ice_status +ice_aq_alloc_free_vsi_list(struct ice_hw *hw, u16 *vsi_list_id, + enum ice_sw_lkup_type lkup_type, + enum ice_adminq_opc opc) +{ + struct ice_aqc_alloc_free_res_elem *sw_buf; + struct ice_aqc_res_elem *vsi_ele; + enum ice_status status; + u16 buf_len; + + buf_len = sizeof(*sw_buf); + sw_buf = devm_kzalloc(ice_hw_to_dev(hw), buf_len, GFP_KERNEL); + if (!sw_buf) + return ICE_ERR_NO_MEMORY; + sw_buf->num_elems = cpu_to_le16(1); + + if (lkup_type == ICE_SW_LKUP_MAC || + lkup_type == ICE_SW_LKUP_MAC_VLAN || + lkup_type == ICE_SW_LKUP_ETHERTYPE || + lkup_type == ICE_SW_LKUP_ETHERTYPE_MAC || + lkup_type == ICE_SW_LKUP_PROMISC || + lkup_type == ICE_SW_LKUP_PROMISC_VLAN) { + sw_buf->res_type = cpu_to_le16(ICE_AQC_RES_TYPE_VSI_LIST_REP); + } else if (lkup_type == ICE_SW_LKUP_VLAN) { + sw_buf->res_type = + cpu_to_le16(ICE_AQC_RES_TYPE_VSI_LIST_PRUNE); + } else { + status = ICE_ERR_PARAM; + goto ice_aq_alloc_free_vsi_list_exit; + } + + if (opc == ice_aqc_opc_free_res) + sw_buf->elem[0].e.sw_resp = cpu_to_le16(*vsi_list_id); + + status = ice_aq_alloc_free_res(hw, 1, sw_buf, buf_len, opc, NULL); + if (status) + goto ice_aq_alloc_free_vsi_list_exit; + + if (opc == ice_aqc_opc_alloc_res) { + vsi_ele = &sw_buf->elem[0]; + *vsi_list_id = le16_to_cpu(vsi_ele->e.sw_resp); + } + +ice_aq_alloc_free_vsi_list_exit: + devm_kfree(ice_hw_to_dev(hw), sw_buf); + return status; +} + +/** + * ice_aq_sw_rules - add/update/remove switch rules + * @hw: pointer to the hw struct + * @rule_list: pointer to switch rule population list + * @rule_list_sz: total size of the rule list in bytes + * @num_rules: number of switch rules in the rule_list + * @opc: switch rules population command type - pass in the command opcode + * @cd: pointer to command details structure or NULL + * + * Add(0x02a0)/Update(0x02a1)/Remove(0x02a2) switch rules commands to firmware + */ +static enum ice_status +ice_aq_sw_rules(struct ice_hw *hw, void *rule_list, u16 rule_list_sz, + u8 num_rules, enum ice_adminq_opc opc, struct ice_sq_cd *cd) +{ + struct ice_aq_desc desc; + + if (opc != ice_aqc_opc_add_sw_rules && + opc != ice_aqc_opc_update_sw_rules && + opc != ice_aqc_opc_remove_sw_rules) + return ICE_ERR_PARAM; + + ice_fill_dflt_direct_cmd_desc(&desc, opc); + + desc.flags |= cpu_to_le16(ICE_AQ_FLAG_RD); + desc.params.sw_rules.num_rules_fltr_entry_index = + cpu_to_le16(num_rules); + return ice_aq_send_cmd(hw, &desc, rule_list, rule_list_sz, cd); +} + /* ice_init_port_info - Initialize port_info with switch configuration data * @pi: pointer to port_info * @vsi_port_num: VSI number or port number @@ -257,3 +426,1212 @@ enum ice_status ice_get_initial_sw_cfg(struct ice_hw *hw) devm_kfree(ice_hw_to_dev(hw), (void *)rbuf); return status; } + +/** + * ice_fill_sw_info - Helper function to populate lb_en and lan_en + * @hw: pointer to the hardware structure + * @f_info: filter info structure to fill/update + * + * This helper function populates the lb_en and lan_en elements of the provided + * ice_fltr_info struct using the switch's type and characteristics of the + * switch rule being configured. + */ +static void ice_fill_sw_info(struct ice_hw *hw, struct ice_fltr_info *f_info) +{ + f_info->lb_en = false; + f_info->lan_en = false; + if ((f_info->flag & ICE_FLTR_TX) && + (f_info->fltr_act == ICE_FWD_TO_VSI || + f_info->fltr_act == ICE_FWD_TO_VSI_LIST || + f_info->fltr_act == ICE_FWD_TO_Q || + f_info->fltr_act == ICE_FWD_TO_QGRP)) { + f_info->lb_en = true; + if (!(hw->evb_veb && f_info->lkup_type == ICE_SW_LKUP_MAC && + is_unicast_ether_addr(f_info->l_data.mac.mac_addr))) + f_info->lan_en = true; + } +} + +/** + * ice_fill_sw_rule - Helper function to fill switch rule structure + * @hw: pointer to the hardware structure + * @f_info: entry containing packet forwarding information + * @s_rule: switch rule structure to be filled in based on mac_entry + * @opc: switch rules population command type - pass in the command opcode + */ +static void +ice_fill_sw_rule(struct ice_hw *hw, struct ice_fltr_info *f_info, + struct ice_aqc_sw_rules_elem *s_rule, enum ice_adminq_opc opc) +{ + u16 vlan_id = ICE_MAX_VLAN_ID + 1; + u8 eth_hdr[DUMMY_ETH_HDR_LEN]; + void *daddr = NULL; + u32 act = 0; + __be16 *off; + + if (opc == ice_aqc_opc_remove_sw_rules) { + s_rule->pdata.lkup_tx_rx.act = 0; + s_rule->pdata.lkup_tx_rx.index = + cpu_to_le16(f_info->fltr_rule_id); + s_rule->pdata.lkup_tx_rx.hdr_len = 0; + return; + } + + /* initialize the ether header with a dummy header */ + memcpy(eth_hdr, dummy_eth_header, sizeof(dummy_eth_header)); + ice_fill_sw_info(hw, f_info); + + switch (f_info->fltr_act) { + case ICE_FWD_TO_VSI: + act |= (f_info->fwd_id.vsi_id << ICE_SINGLE_ACT_VSI_ID_S) & + ICE_SINGLE_ACT_VSI_ID_M; + if (f_info->lkup_type != ICE_SW_LKUP_VLAN) + act |= ICE_SINGLE_ACT_VSI_FORWARDING | + ICE_SINGLE_ACT_VALID_BIT; + break; + case ICE_FWD_TO_VSI_LIST: + act |= ICE_SINGLE_ACT_VSI_LIST; + act |= (f_info->fwd_id.vsi_list_id << + ICE_SINGLE_ACT_VSI_LIST_ID_S) & + ICE_SINGLE_ACT_VSI_LIST_ID_M; + if (f_info->lkup_type != ICE_SW_LKUP_VLAN) + act |= ICE_SINGLE_ACT_VSI_FORWARDING | + ICE_SINGLE_ACT_VALID_BIT; + break; + case ICE_FWD_TO_Q: + act |= ICE_SINGLE_ACT_TO_Q; + act |= (f_info->fwd_id.q_id << ICE_SINGLE_ACT_Q_INDEX_S) & + ICE_SINGLE_ACT_Q_INDEX_M; + break; + case ICE_FWD_TO_QGRP: + act |= ICE_SINGLE_ACT_TO_Q; + act |= (f_info->qgrp_size << ICE_SINGLE_ACT_Q_REGION_S) & + ICE_SINGLE_ACT_Q_REGION_M; + break; + case ICE_DROP_PACKET: + act |= ICE_SINGLE_ACT_VSI_FORWARDING | ICE_SINGLE_ACT_DROP; + break; + default: + return; + } + + if (f_info->lb_en) + act |= ICE_SINGLE_ACT_LB_ENABLE; + if (f_info->lan_en) + act |= ICE_SINGLE_ACT_LAN_ENABLE; + + switch (f_info->lkup_type) { + case ICE_SW_LKUP_MAC: + daddr = f_info->l_data.mac.mac_addr; + break; + case ICE_SW_LKUP_VLAN: + vlan_id = f_info->l_data.vlan.vlan_id; + if (f_info->fltr_act == ICE_FWD_TO_VSI || + f_info->fltr_act == ICE_FWD_TO_VSI_LIST) { + act |= ICE_SINGLE_ACT_PRUNE; + act |= ICE_SINGLE_ACT_EGRESS | ICE_SINGLE_ACT_INGRESS; + } + break; + case ICE_SW_LKUP_ETHERTYPE_MAC: + daddr = f_info->l_data.ethertype_mac.mac_addr; + /* fall-through */ + case ICE_SW_LKUP_ETHERTYPE: + off = (__be16 *)ð_hdr[ICE_ETH_ETHTYPE_OFFSET]; + *off = cpu_to_be16(f_info->l_data.ethertype_mac.ethertype); + break; + case ICE_SW_LKUP_MAC_VLAN: + daddr = f_info->l_data.mac_vlan.mac_addr; + vlan_id = f_info->l_data.mac_vlan.vlan_id; + break; + case ICE_SW_LKUP_PROMISC_VLAN: + vlan_id = f_info->l_data.mac_vlan.vlan_id; + /* fall-through */ + case ICE_SW_LKUP_PROMISC: + daddr = f_info->l_data.mac_vlan.mac_addr; + break; + default: + break; + } + + s_rule->type = (f_info->flag & ICE_FLTR_RX) ? + cpu_to_le16(ICE_AQC_SW_RULES_T_LKUP_RX) : + cpu_to_le16(ICE_AQC_SW_RULES_T_LKUP_TX); + + /* Recipe set depending on lookup type */ + s_rule->pdata.lkup_tx_rx.recipe_id = cpu_to_le16(f_info->lkup_type); + s_rule->pdata.lkup_tx_rx.src = cpu_to_le16(f_info->src); + s_rule->pdata.lkup_tx_rx.act = cpu_to_le32(act); + + if (daddr) + ether_addr_copy(ð_hdr[ICE_ETH_DA_OFFSET], daddr); + + if (!(vlan_id > ICE_MAX_VLAN_ID)) { + off = (__be16 *)ð_hdr[ICE_ETH_VLAN_TCI_OFFSET]; + *off = cpu_to_be16(vlan_id); + } + + /* Create the switch rule with the final dummy Ethernet header */ + if (opc != ice_aqc_opc_update_sw_rules) + s_rule->pdata.lkup_tx_rx.hdr_len = cpu_to_le16(sizeof(eth_hdr)); + + memcpy(s_rule->pdata.lkup_tx_rx.hdr, eth_hdr, sizeof(eth_hdr)); +} + +/** + * ice_add_marker_act + * @hw: pointer to the hardware structure + * @m_ent: the management entry for which sw marker needs to be added + * @sw_marker: sw marker to tag the Rx descriptor with + * @l_id: large action resource id + * + * Create a large action to hold software marker and update the switch rule + * entry pointed by m_ent with newly created large action + */ +static enum ice_status +ice_add_marker_act(struct ice_hw *hw, struct ice_fltr_mgmt_list_entry *m_ent, + u16 sw_marker, u16 l_id) +{ + struct ice_aqc_sw_rules_elem *lg_act, *rx_tx; + /* For software marker we need 3 large actions + * 1. FWD action: FWD TO VSI or VSI LIST + * 2. GENERIC VALUE action to hold the profile id + * 3. GENERIC VALUE action to hold the software marker id + */ + const u16 num_lg_acts = 3; + enum ice_status status; + u16 lg_act_size; + u16 rules_size; + u16 vsi_info; + u32 act; + + if (m_ent->fltr_info.lkup_type != ICE_SW_LKUP_MAC) + return ICE_ERR_PARAM; + + /* Create two back-to-back switch rules and submit them to the HW using + * one memory buffer: + * 1. Large Action + * 2. Look up tx rx + */ + lg_act_size = (u16)ICE_SW_RULE_LG_ACT_SIZE(num_lg_acts); + rules_size = lg_act_size + ICE_SW_RULE_RX_TX_ETH_HDR_SIZE; + lg_act = devm_kzalloc(ice_hw_to_dev(hw), rules_size, GFP_KERNEL); + if (!lg_act) + return ICE_ERR_NO_MEMORY; + + rx_tx = (struct ice_aqc_sw_rules_elem *)((u8 *)lg_act + lg_act_size); + + /* Fill in the first switch rule i.e. large action */ + lg_act->type = cpu_to_le16(ICE_AQC_SW_RULES_T_LG_ACT); + lg_act->pdata.lg_act.index = cpu_to_le16(l_id); + lg_act->pdata.lg_act.size = cpu_to_le16(num_lg_acts); + + /* First action VSI forwarding or VSI list forwarding depending on how + * many VSIs + */ + vsi_info = (m_ent->vsi_count > 1) ? + m_ent->fltr_info.fwd_id.vsi_list_id : + m_ent->fltr_info.fwd_id.vsi_id; + + act = ICE_LG_ACT_VSI_FORWARDING | ICE_LG_ACT_VALID_BIT; + act |= (vsi_info << ICE_LG_ACT_VSI_LIST_ID_S) & + ICE_LG_ACT_VSI_LIST_ID_M; + if (m_ent->vsi_count > 1) + act |= ICE_LG_ACT_VSI_LIST; + lg_act->pdata.lg_act.act[0] = cpu_to_le32(act); + + /* Second action descriptor type */ + act = ICE_LG_ACT_GENERIC; + + act |= (1 << ICE_LG_ACT_GENERIC_VALUE_S) & ICE_LG_ACT_GENERIC_VALUE_M; + lg_act->pdata.lg_act.act[1] = cpu_to_le32(act); + + act = (7 << ICE_LG_ACT_GENERIC_OFFSET_S) & ICE_LG_ACT_GENERIC_VALUE_M; + + /* Third action Marker value */ + act |= ICE_LG_ACT_GENERIC; + act |= (sw_marker << ICE_LG_ACT_GENERIC_VALUE_S) & + ICE_LG_ACT_GENERIC_VALUE_M; + + act |= (0 << ICE_LG_ACT_GENERIC_OFFSET_S) & ICE_LG_ACT_GENERIC_VALUE_M; + lg_act->pdata.lg_act.act[2] = cpu_to_le32(act); + + /* call the fill switch rule to fill the lookup tx rx structure */ + ice_fill_sw_rule(hw, &m_ent->fltr_info, rx_tx, + ice_aqc_opc_update_sw_rules); + + /* Update the action to point to the large action id */ + rx_tx->pdata.lkup_tx_rx.act = + cpu_to_le32(ICE_SINGLE_ACT_PTR | + ((l_id << ICE_SINGLE_ACT_PTR_VAL_S) & + ICE_SINGLE_ACT_PTR_VAL_M)); + + /* Use the filter rule id of the previously created rule with single + * act. Once the update happens, hardware will treat this as large + * action + */ + rx_tx->pdata.lkup_tx_rx.index = + cpu_to_le16(m_ent->fltr_info.fltr_rule_id); + + status = ice_aq_sw_rules(hw, lg_act, rules_size, 2, + ice_aqc_opc_update_sw_rules, NULL); + if (!status) { + m_ent->lg_act_idx = l_id; + m_ent->sw_marker_id = sw_marker; + } + + devm_kfree(ice_hw_to_dev(hw), lg_act); + return status; +} + +/** + * ice_create_vsi_list_map + * @hw: pointer to the hardware structure + * @vsi_array: array of VSIs to form a VSI list + * @num_vsi: num VSI in the array + * @vsi_list_id: VSI list id generated as part of allocate resource + * + * Helper function to create a new entry of VSI list id to VSI mapping + * using the given VSI list id + */ +static struct ice_vsi_list_map_info * +ice_create_vsi_list_map(struct ice_hw *hw, u16 *vsi_array, u16 num_vsi, + u16 vsi_list_id) +{ + struct ice_switch_info *sw = hw->switch_info; + struct ice_vsi_list_map_info *v_map; + int i; + + v_map = devm_kcalloc(ice_hw_to_dev(hw), 1, sizeof(*v_map), GFP_KERNEL); + if (!v_map) + return NULL; + + v_map->vsi_list_id = vsi_list_id; + + for (i = 0; i < num_vsi; i++) + set_bit(vsi_array[i], v_map->vsi_map); + + list_add(&v_map->list_entry, &sw->vsi_list_map_head); + return v_map; +} + +/** + * ice_update_vsi_list_rule + * @hw: pointer to the hardware structure + * @vsi_array: array of VSIs to form a VSI list + * @num_vsi: num VSI in the array + * @vsi_list_id: VSI list id generated as part of allocate resource + * @remove: Boolean value to indicate if this is a remove action + * @opc: switch rules population command type - pass in the command opcode + * @lkup_type: lookup type of the filter + * + * Call AQ command to add a new switch rule or update existing switch rule + * using the given VSI list id + */ +static enum ice_status +ice_update_vsi_list_rule(struct ice_hw *hw, u16 *vsi_array, u16 num_vsi, + u16 vsi_list_id, bool remove, enum ice_adminq_opc opc, + enum ice_sw_lkup_type lkup_type) +{ + struct ice_aqc_sw_rules_elem *s_rule; + enum ice_status status; + u16 s_rule_size; + u16 type; + int i; + + if (!num_vsi) + return ICE_ERR_PARAM; + + if (lkup_type == ICE_SW_LKUP_MAC || + lkup_type == ICE_SW_LKUP_MAC_VLAN || + lkup_type == ICE_SW_LKUP_ETHERTYPE || + lkup_type == ICE_SW_LKUP_ETHERTYPE_MAC || + lkup_type == ICE_SW_LKUP_PROMISC || + lkup_type == ICE_SW_LKUP_PROMISC_VLAN) + type = remove ? ICE_AQC_SW_RULES_T_VSI_LIST_CLEAR : + ICE_AQC_SW_RULES_T_VSI_LIST_SET; + else if (lkup_type == ICE_SW_LKUP_VLAN) + type = remove ? ICE_AQC_SW_RULES_T_PRUNE_LIST_CLEAR : + ICE_AQC_SW_RULES_T_PRUNE_LIST_SET; + else + return ICE_ERR_PARAM; + + s_rule_size = (u16)ICE_SW_RULE_VSI_LIST_SIZE(num_vsi); + s_rule = devm_kzalloc(ice_hw_to_dev(hw), s_rule_size, GFP_KERNEL); + if (!s_rule) + return ICE_ERR_NO_MEMORY; + + for (i = 0; i < num_vsi; i++) + s_rule->pdata.vsi_list.vsi[i] = cpu_to_le16(vsi_array[i]); + + s_rule->type = cpu_to_le16(type); + s_rule->pdata.vsi_list.number_vsi = cpu_to_le16(num_vsi); + s_rule->pdata.vsi_list.index = cpu_to_le16(vsi_list_id); + + status = ice_aq_sw_rules(hw, s_rule, s_rule_size, 1, opc, NULL); + + devm_kfree(ice_hw_to_dev(hw), s_rule); + return status; +} + +/** + * ice_create_vsi_list_rule - Creates and populates a VSI list rule + * @hw: pointer to the hw struct + * @vsi_array: array of VSIs to form a VSI list + * @num_vsi: number of VSIs in the array + * @vsi_list_id: stores the ID of the VSI list to be created + * @lkup_type: switch rule filter's lookup type + */ +static enum ice_status +ice_create_vsi_list_rule(struct ice_hw *hw, u16 *vsi_array, u16 num_vsi, + u16 *vsi_list_id, enum ice_sw_lkup_type lkup_type) +{ + enum ice_status status; + int i; + + for (i = 0; i < num_vsi; i++) + if (vsi_array[i] >= ICE_MAX_VSI) + return ICE_ERR_OUT_OF_RANGE; + + status = ice_aq_alloc_free_vsi_list(hw, vsi_list_id, lkup_type, + ice_aqc_opc_alloc_res); + if (status) + return status; + + /* Update the newly created VSI list to include the specified VSIs */ + return ice_update_vsi_list_rule(hw, vsi_array, num_vsi, *vsi_list_id, + false, ice_aqc_opc_add_sw_rules, + lkup_type); +} + +/** + * ice_create_pkt_fwd_rule + * @hw: pointer to the hardware structure + * @f_entry: entry containing packet forwarding information + * + * Create switch rule with given filter information and add an entry + * to the corresponding filter management list to track this switch rule + * and VSI mapping + */ +static enum ice_status +ice_create_pkt_fwd_rule(struct ice_hw *hw, + struct ice_fltr_list_entry *f_entry) +{ + struct ice_switch_info *sw = hw->switch_info; + struct ice_fltr_mgmt_list_entry *fm_entry; + struct ice_aqc_sw_rules_elem *s_rule; + enum ice_sw_lkup_type l_type; + enum ice_status status; + + s_rule = devm_kzalloc(ice_hw_to_dev(hw), + ICE_SW_RULE_RX_TX_ETH_HDR_SIZE, GFP_KERNEL); + if (!s_rule) + return ICE_ERR_NO_MEMORY; + fm_entry = devm_kzalloc(ice_hw_to_dev(hw), sizeof(*fm_entry), + GFP_KERNEL); + if (!fm_entry) { + status = ICE_ERR_NO_MEMORY; + goto ice_create_pkt_fwd_rule_exit; + } + + fm_entry->fltr_info = f_entry->fltr_info; + + /* Initialize all the fields for the management entry */ + fm_entry->vsi_count = 1; + fm_entry->lg_act_idx = ICE_INVAL_LG_ACT_INDEX; + fm_entry->sw_marker_id = ICE_INVAL_SW_MARKER_ID; + fm_entry->counter_index = ICE_INVAL_COUNTER_ID; + + ice_fill_sw_rule(hw, &fm_entry->fltr_info, s_rule, + ice_aqc_opc_add_sw_rules); + + status = ice_aq_sw_rules(hw, s_rule, ICE_SW_RULE_RX_TX_ETH_HDR_SIZE, 1, + ice_aqc_opc_add_sw_rules, NULL); + if (status) { + devm_kfree(ice_hw_to_dev(hw), fm_entry); + goto ice_create_pkt_fwd_rule_exit; + } + + f_entry->fltr_info.fltr_rule_id = + le16_to_cpu(s_rule->pdata.lkup_tx_rx.index); + fm_entry->fltr_info.fltr_rule_id = + le16_to_cpu(s_rule->pdata.lkup_tx_rx.index); + + /* The book keeping entries will get removed when base driver + * calls remove filter AQ command + */ + l_type = fm_entry->fltr_info.lkup_type; + if (l_type == ICE_SW_LKUP_MAC) { + mutex_lock(&sw->mac_list_lock); + list_add(&fm_entry->list_entry, &sw->mac_list_head); + mutex_unlock(&sw->mac_list_lock); + } else if (l_type == ICE_SW_LKUP_VLAN) { + mutex_lock(&sw->vlan_list_lock); + list_add(&fm_entry->list_entry, &sw->vlan_list_head); + mutex_unlock(&sw->vlan_list_lock); + } else if (l_type == ICE_SW_LKUP_ETHERTYPE || + l_type == ICE_SW_LKUP_ETHERTYPE_MAC) { + mutex_lock(&sw->eth_m_list_lock); + list_add(&fm_entry->list_entry, &sw->eth_m_list_head); + mutex_unlock(&sw->eth_m_list_lock); + } else if (l_type == ICE_SW_LKUP_PROMISC || + l_type == ICE_SW_LKUP_PROMISC_VLAN) { + mutex_lock(&sw->promisc_list_lock); + list_add(&fm_entry->list_entry, &sw->promisc_list_head); + mutex_unlock(&sw->promisc_list_lock); + } else if (fm_entry->fltr_info.lkup_type == ICE_SW_LKUP_MAC_VLAN) { + mutex_lock(&sw->mac_vlan_list_lock); + list_add(&fm_entry->list_entry, &sw->mac_vlan_list_head); + mutex_unlock(&sw->mac_vlan_list_lock); + } else { + status = ICE_ERR_NOT_IMPL; + } +ice_create_pkt_fwd_rule_exit: + devm_kfree(ice_hw_to_dev(hw), s_rule); + return status; +} + +/** + * ice_update_pkt_fwd_rule + * @hw: pointer to the hardware structure + * @rule_id: rule of previously created switch rule to update + * @vsi_list_id: VSI list id to be updated with + * @f_info: ice_fltr_info to pull other information for switch rule + * + * Call AQ command to update a previously created switch rule with a + * VSI list id + */ +static enum ice_status +ice_update_pkt_fwd_rule(struct ice_hw *hw, u16 rule_id, u16 vsi_list_id, + struct ice_fltr_info f_info) +{ + struct ice_aqc_sw_rules_elem *s_rule; + struct ice_fltr_info tmp_fltr; + enum ice_status status; + + s_rule = devm_kzalloc(ice_hw_to_dev(hw), + ICE_SW_RULE_RX_TX_ETH_HDR_SIZE, GFP_KERNEL); + if (!s_rule) + return ICE_ERR_NO_MEMORY; + + tmp_fltr = f_info; + tmp_fltr.fltr_act = ICE_FWD_TO_VSI_LIST; + tmp_fltr.fwd_id.vsi_list_id = vsi_list_id; + + ice_fill_sw_rule(hw, &tmp_fltr, s_rule, + ice_aqc_opc_update_sw_rules); + + s_rule->pdata.lkup_tx_rx.index = cpu_to_le16(rule_id); + + /* Update switch rule with new rule set to forward VSI list */ + status = ice_aq_sw_rules(hw, s_rule, ICE_SW_RULE_RX_TX_ETH_HDR_SIZE, 1, + ice_aqc_opc_update_sw_rules, NULL); + + devm_kfree(ice_hw_to_dev(hw), s_rule); + return status; +} + +/** + * ice_handle_vsi_list_mgmt + * @hw: pointer to the hardware structure + * @m_entry: pointer to current filter management list entry + * @cur_fltr: filter information from the book keeping entry + * @new_fltr: filter information with the new VSI to be added + * + * Call AQ command to add or update previously created VSI list with new VSI. + * + * Helper function to do book keeping associated with adding filter information + * The algorithm to do the booking keeping is described below : + * When a VSI needs to subscribe to a given filter( MAC/VLAN/Ethtype etc.) + * if only one VSI has been added till now + * Allocate a new VSI list and add two VSIs + * to this list using switch rule command + * Update the previously created switch rule with the + * newly created VSI list id + * if a VSI list was previously created + * Add the new VSI to the previously created VSI list set + * using the update switch rule command + */ +static enum ice_status +ice_handle_vsi_list_mgmt(struct ice_hw *hw, + struct ice_fltr_mgmt_list_entry *m_entry, + struct ice_fltr_info *cur_fltr, + struct ice_fltr_info *new_fltr) +{ + enum ice_status status = 0; + u16 vsi_list_id = 0; + + if ((cur_fltr->fltr_act == ICE_FWD_TO_Q || + cur_fltr->fltr_act == ICE_FWD_TO_QGRP)) + return ICE_ERR_NOT_IMPL; + + if ((new_fltr->fltr_act == ICE_FWD_TO_Q || + new_fltr->fltr_act == ICE_FWD_TO_QGRP) && + (cur_fltr->fltr_act == ICE_FWD_TO_VSI || + cur_fltr->fltr_act == ICE_FWD_TO_VSI_LIST)) + return ICE_ERR_NOT_IMPL; + + if (m_entry->vsi_count < 2 && !m_entry->vsi_list_info) { + /* Only one entry existed in the mapping and it was not already + * a part of a VSI list. So, create a VSI list with the old and + * new VSIs. + */ + u16 vsi_id_arr[2]; + u16 fltr_rule; + + /* A rule already exists with the new VSI being added */ + if (cur_fltr->fwd_id.vsi_id == new_fltr->fwd_id.vsi_id) + return ICE_ERR_ALREADY_EXISTS; + + vsi_id_arr[0] = cur_fltr->fwd_id.vsi_id; + vsi_id_arr[1] = new_fltr->fwd_id.vsi_id; + status = ice_create_vsi_list_rule(hw, &vsi_id_arr[0], 2, + &vsi_list_id, + new_fltr->lkup_type); + if (status) + return status; + + fltr_rule = cur_fltr->fltr_rule_id; + /* Update the previous switch rule of "MAC forward to VSI" to + * "MAC fwd to VSI list" + */ + status = ice_update_pkt_fwd_rule(hw, fltr_rule, vsi_list_id, + *new_fltr); + if (status) + return status; + + cur_fltr->fwd_id.vsi_list_id = vsi_list_id; + cur_fltr->fltr_act = ICE_FWD_TO_VSI_LIST; + m_entry->vsi_list_info = + ice_create_vsi_list_map(hw, &vsi_id_arr[0], 2, + vsi_list_id); + + /* If this entry was large action then the large action needs + * to be updated to point to FWD to VSI list + */ + if (m_entry->sw_marker_id != ICE_INVAL_SW_MARKER_ID) + status = + ice_add_marker_act(hw, m_entry, + m_entry->sw_marker_id, + m_entry->lg_act_idx); + } else { + u16 vsi_id = new_fltr->fwd_id.vsi_id; + enum ice_adminq_opc opcode; + + /* A rule already exists with the new VSI being added */ + if (test_bit(vsi_id, m_entry->vsi_list_info->vsi_map)) + return 0; + + /* Update the previously created VSI list set with + * the new VSI id passed in + */ + vsi_list_id = cur_fltr->fwd_id.vsi_list_id; + opcode = ice_aqc_opc_update_sw_rules; + + status = ice_update_vsi_list_rule(hw, &vsi_id, 1, vsi_list_id, + false, opcode, + new_fltr->lkup_type); + /* update VSI list mapping info with new VSI id */ + if (!status) + set_bit(vsi_id, m_entry->vsi_list_info->vsi_map); + } + if (!status) + m_entry->vsi_count++; + return status; +} + +/** + * ice_find_mac_entry + * @hw: pointer to the hardware structure + * @mac_addr: MAC address to search for + * + * Helper function to search for a MAC entry using a given MAC address + * Returns pointer to the entry if found. + */ +static struct ice_fltr_mgmt_list_entry * +ice_find_mac_entry(struct ice_hw *hw, u8 *mac_addr) +{ + struct ice_fltr_mgmt_list_entry *m_list_itr, *mac_ret = NULL; + struct ice_switch_info *sw = hw->switch_info; + + mutex_lock(&sw->mac_list_lock); + list_for_each_entry(m_list_itr, &sw->mac_list_head, list_entry) { + u8 *buf = &m_list_itr->fltr_info.l_data.mac.mac_addr[0]; + + if (ether_addr_equal(buf, mac_addr)) { + mac_ret = m_list_itr; + break; + } + } + mutex_unlock(&sw->mac_list_lock); + return mac_ret; +} + +/** + * ice_add_shared_mac - Add one MAC shared filter rule + * @hw: pointer to the hardware structure + * @f_entry: structure containing MAC forwarding information + * + * Adds or updates the book keeping list for the MAC addresses + */ +static enum ice_status +ice_add_shared_mac(struct ice_hw *hw, struct ice_fltr_list_entry *f_entry) +{ + struct ice_fltr_info *new_fltr, *cur_fltr; + struct ice_fltr_mgmt_list_entry *m_entry; + + new_fltr = &f_entry->fltr_info; + + m_entry = ice_find_mac_entry(hw, &new_fltr->l_data.mac.mac_addr[0]); + if (!m_entry) + return ice_create_pkt_fwd_rule(hw, f_entry); + + cur_fltr = &m_entry->fltr_info; + + return ice_handle_vsi_list_mgmt(hw, m_entry, cur_fltr, new_fltr); +} + +/** + * ice_add_mac - Add a MAC address based filter rule + * @hw: pointer to the hardware structure + * @m_list: list of MAC addresses and forwarding information + * + * IMPORTANT: When the ucast_shared flag is set to false and m_list has + * multiple unicast addresses, the function assumes that all the + * addresses are unique in a given add_mac call. It doesn't + * check for duplicates in this case, removing duplicates from a given + * list should be taken care of in the caller of this function. + */ +enum ice_status +ice_add_mac(struct ice_hw *hw, struct list_head *m_list) +{ + struct ice_aqc_sw_rules_elem *s_rule, *r_iter; + struct ice_fltr_list_entry *m_list_itr; + u16 elem_sent, total_elem_left; + enum ice_status status = 0; + u16 num_unicast = 0; + u16 s_rule_size; + + if (!m_list || !hw) + return ICE_ERR_PARAM; + + list_for_each_entry(m_list_itr, m_list, list_entry) { + u8 *add = &m_list_itr->fltr_info.l_data.mac.mac_addr[0]; + + if (m_list_itr->fltr_info.lkup_type != ICE_SW_LKUP_MAC) + return ICE_ERR_PARAM; + if (is_zero_ether_addr(add)) + return ICE_ERR_PARAM; + if (is_unicast_ether_addr(add) && !hw->ucast_shared) { + /* Don't overwrite the unicast address */ + if (ice_find_mac_entry(hw, add)) + return ICE_ERR_ALREADY_EXISTS; + num_unicast++; + } else if (is_multicast_ether_addr(add) || + (is_unicast_ether_addr(add) && hw->ucast_shared)) { + status = ice_add_shared_mac(hw, m_list_itr); + if (status) { + m_list_itr->status = ICE_FLTR_STATUS_FW_FAIL; + return status; + } + m_list_itr->status = ICE_FLTR_STATUS_FW_SUCCESS; + } + } + + /* Exit if no suitable entries were found for adding bulk switch rule */ + if (!num_unicast) + return 0; + + /* Allocate switch rule buffer for the bulk update for unicast */ + s_rule_size = ICE_SW_RULE_RX_TX_ETH_HDR_SIZE; + s_rule = devm_kcalloc(ice_hw_to_dev(hw), num_unicast, s_rule_size, + GFP_KERNEL); + if (!s_rule) + return ICE_ERR_NO_MEMORY; + + r_iter = s_rule; + list_for_each_entry(m_list_itr, m_list, list_entry) { + struct ice_fltr_info *f_info = &m_list_itr->fltr_info; + u8 *addr = &f_info->l_data.mac.mac_addr[0]; + + if (is_unicast_ether_addr(addr)) { + ice_fill_sw_rule(hw, &m_list_itr->fltr_info, + r_iter, ice_aqc_opc_add_sw_rules); + r_iter = (struct ice_aqc_sw_rules_elem *) + ((u8 *)r_iter + s_rule_size); + } + } + + /* Call AQ bulk switch rule update for all unicast addresses */ + r_iter = s_rule; + /* Call AQ switch rule in AQ_MAX chunk */ + for (total_elem_left = num_unicast; total_elem_left > 0; + total_elem_left -= elem_sent) { + struct ice_aqc_sw_rules_elem *entry = r_iter; + + elem_sent = min(total_elem_left, + (u16)(ICE_AQ_MAX_BUF_LEN / s_rule_size)); + status = ice_aq_sw_rules(hw, entry, elem_sent * s_rule_size, + elem_sent, ice_aqc_opc_add_sw_rules, + NULL); + if (status) + goto ice_add_mac_exit; + r_iter = (struct ice_aqc_sw_rules_elem *) + ((u8 *)r_iter + (elem_sent * s_rule_size)); + } + + /* Fill up rule id based on the value returned from FW */ + r_iter = s_rule; + list_for_each_entry(m_list_itr, m_list, list_entry) { + struct ice_fltr_info *f_info = &m_list_itr->fltr_info; + u8 *addr = &f_info->l_data.mac.mac_addr[0]; + struct ice_switch_info *sw = hw->switch_info; + struct ice_fltr_mgmt_list_entry *fm_entry; + + if (is_unicast_ether_addr(addr)) { + f_info->fltr_rule_id = + le16_to_cpu(r_iter->pdata.lkup_tx_rx.index); + f_info->fltr_act = ICE_FWD_TO_VSI; + /* Create an entry to track this MAC address */ + fm_entry = devm_kzalloc(ice_hw_to_dev(hw), + sizeof(*fm_entry), GFP_KERNEL); + if (!fm_entry) { + status = ICE_ERR_NO_MEMORY; + goto ice_add_mac_exit; + } + fm_entry->fltr_info = *f_info; + fm_entry->vsi_count = 1; + /* The book keeping entries will get removed when + * base driver calls remove filter AQ command + */ + mutex_lock(&sw->mac_list_lock); + list_add(&fm_entry->list_entry, &sw->mac_list_head); + mutex_unlock(&sw->mac_list_lock); + + r_iter = (struct ice_aqc_sw_rules_elem *) + ((u8 *)r_iter + s_rule_size); + } + } + +ice_add_mac_exit: + devm_kfree(ice_hw_to_dev(hw), s_rule); + return status; +} + +/** + * ice_remove_vsi_list_rule + * @hw: pointer to the hardware structure + * @vsi_list_id: VSI list id generated as part of allocate resource + * @lkup_type: switch rule filter lookup type + */ +static enum ice_status +ice_remove_vsi_list_rule(struct ice_hw *hw, u16 vsi_list_id, + enum ice_sw_lkup_type lkup_type) +{ + struct ice_aqc_sw_rules_elem *s_rule; + enum ice_status status; + u16 s_rule_size; + + s_rule_size = (u16)ICE_SW_RULE_VSI_LIST_SIZE(0); + s_rule = devm_kzalloc(ice_hw_to_dev(hw), s_rule_size, GFP_KERNEL); + if (!s_rule) + return ICE_ERR_NO_MEMORY; + + s_rule->type = cpu_to_le16(ICE_AQC_SW_RULES_T_VSI_LIST_CLEAR); + s_rule->pdata.vsi_list.index = cpu_to_le16(vsi_list_id); + /* FW expects number of VSIs in vsi_list resource to be 0 for clear + * command. Since memory is zero'ed out during initialization, it's not + * necessary to explicitly initialize the variable to 0. + */ + + status = ice_aq_sw_rules(hw, s_rule, s_rule_size, 1, + ice_aqc_opc_remove_sw_rules, NULL); + if (!status) + /* Free the vsi_list resource that we allocated */ + status = ice_aq_alloc_free_vsi_list(hw, &vsi_list_id, lkup_type, + ice_aqc_opc_free_res); + + devm_kfree(ice_hw_to_dev(hw), s_rule); + return status; +} + +/** + * ice_handle_rem_vsi_list_mgmt + * @hw: pointer to the hardware structure + * @vsi_id: ID of the VSI to remove + * @fm_list_itr: filter management entry for which the VSI list management + * needs to be done + */ +static enum ice_status +ice_handle_rem_vsi_list_mgmt(struct ice_hw *hw, u16 vsi_id, + struct ice_fltr_mgmt_list_entry *fm_list_itr) +{ + struct ice_switch_info *sw = hw->switch_info; + enum ice_status status = 0; + enum ice_sw_lkup_type lkup_type; + bool is_last_elem = true; + bool conv_list = false; + bool del_list = false; + u16 vsi_list_id; + + lkup_type = fm_list_itr->fltr_info.lkup_type; + vsi_list_id = fm_list_itr->fltr_info.fwd_id.vsi_list_id; + + if (fm_list_itr->vsi_count > 1) { + status = ice_update_vsi_list_rule(hw, &vsi_id, 1, vsi_list_id, + true, + ice_aqc_opc_update_sw_rules, + lkup_type); + if (status) + return status; + fm_list_itr->vsi_count--; + is_last_elem = false; + clear_bit(vsi_id, fm_list_itr->vsi_list_info->vsi_map); + } + + /* For non-VLAN rules that forward packets to a VSI list, convert them + * to forwarding packets to a VSI if there is only one VSI left in the + * list. Unused lists are then removed. + * VLAN rules need to use VSI lists even with only one VSI. + */ + if (fm_list_itr->fltr_info.fltr_act == ICE_FWD_TO_VSI_LIST) { + if (lkup_type == ICE_SW_LKUP_VLAN) { + del_list = is_last_elem; + } else if (fm_list_itr->vsi_count == 1) { + conv_list = true; + del_list = true; + } + } + + if (del_list) { + /* Remove the VSI list since it is no longer used */ + struct ice_vsi_list_map_info *vsi_list_info = + fm_list_itr->vsi_list_info; + + status = ice_remove_vsi_list_rule(hw, vsi_list_id, lkup_type); + if (status) + return status; + + if (conv_list) { + u16 rem_vsi_id; + + rem_vsi_id = find_first_bit(vsi_list_info->vsi_map, + ICE_MAX_VSI); + + /* Error out when the expected last element is not in + * the VSI list map + */ + if (rem_vsi_id == ICE_MAX_VSI) + return ICE_ERR_OUT_OF_RANGE; + + /* Change the list entry action from VSI_LIST to VSI */ + fm_list_itr->fltr_info.fltr_act = ICE_FWD_TO_VSI; + fm_list_itr->fltr_info.fwd_id.vsi_id = rem_vsi_id; + } + + list_del(&vsi_list_info->list_entry); + devm_kfree(ice_hw_to_dev(hw), vsi_list_info); + fm_list_itr->vsi_list_info = NULL; + } + + if (conv_list) { + /* Convert the rule's forward action to forwarding packets to + * a VSI + */ + struct ice_aqc_sw_rules_elem *s_rule; + + s_rule = devm_kzalloc(ice_hw_to_dev(hw), + ICE_SW_RULE_RX_TX_ETH_HDR_SIZE, + GFP_KERNEL); + if (!s_rule) + return ICE_ERR_NO_MEMORY; + + ice_fill_sw_rule(hw, &fm_list_itr->fltr_info, s_rule, + ice_aqc_opc_update_sw_rules); + + s_rule->pdata.lkup_tx_rx.index = + cpu_to_le16(fm_list_itr->fltr_info.fltr_rule_id); + + status = ice_aq_sw_rules(hw, s_rule, + ICE_SW_RULE_RX_TX_ETH_HDR_SIZE, 1, + ice_aqc_opc_update_sw_rules, NULL); + devm_kfree(ice_hw_to_dev(hw), s_rule); + if (status) + return status; + } + + if (is_last_elem) { + /* Remove the lookup rule */ + struct ice_aqc_sw_rules_elem *s_rule; + + s_rule = devm_kzalloc(ice_hw_to_dev(hw), + ICE_SW_RULE_RX_TX_NO_HDR_SIZE, + GFP_KERNEL); + if (!s_rule) + return ICE_ERR_NO_MEMORY; + + ice_fill_sw_rule(hw, &fm_list_itr->fltr_info, s_rule, + ice_aqc_opc_remove_sw_rules); + + status = ice_aq_sw_rules(hw, s_rule, + ICE_SW_RULE_RX_TX_NO_HDR_SIZE, 1, + ice_aqc_opc_remove_sw_rules, NULL); + if (status) + return status; + + /* Remove a book keeping entry from the MAC address list */ + mutex_lock(&sw->mac_list_lock); + list_del(&fm_list_itr->list_entry); + mutex_unlock(&sw->mac_list_lock); + devm_kfree(ice_hw_to_dev(hw), fm_list_itr); + devm_kfree(ice_hw_to_dev(hw), s_rule); + } + return status; +} + +/** + * ice_remove_mac_entry + * @hw: pointer to the hardware structure + * @f_entry: structure containing MAC forwarding information + */ +static enum ice_status +ice_remove_mac_entry(struct ice_hw *hw, struct ice_fltr_list_entry *f_entry) +{ + struct ice_fltr_mgmt_list_entry *m_entry; + u16 vsi_id; + u8 *add; + + add = &f_entry->fltr_info.l_data.mac.mac_addr[0]; + + m_entry = ice_find_mac_entry(hw, add); + if (!m_entry) + return ICE_ERR_PARAM; + + vsi_id = f_entry->fltr_info.fwd_id.vsi_id; + return ice_handle_rem_vsi_list_mgmt(hw, vsi_id, m_entry); +} + +/** + * ice_remove_mac - remove a MAC address based filter rule + * @hw: pointer to the hardware structure + * @m_list: list of MAC addresses and forwarding information + * + * This function removes either a MAC filter rule or a specific VSI from a + * VSI list for a multicast MAC address. + * + * Returns ICE_ERR_DOES_NOT_EXIST if a given entry was not added by + * ice_add_mac. Caller should be aware that this call will only work if all + * the entries passed into m_list were added previously. It will not attempt to + * do a partial remove of entries that were found. + */ +enum ice_status +ice_remove_mac(struct ice_hw *hw, struct list_head *m_list) +{ + struct ice_aqc_sw_rules_elem *s_rule, *r_iter; + u8 s_rule_size = ICE_SW_RULE_RX_TX_NO_HDR_SIZE; + struct ice_switch_info *sw = hw->switch_info; + struct ice_fltr_mgmt_list_entry *m_entry; + struct ice_fltr_list_entry *m_list_itr; + u16 elem_sent, total_elem_left; + enum ice_status status = 0; + u16 num_unicast = 0; + + if (!m_list) + return ICE_ERR_PARAM; + + list_for_each_entry(m_list_itr, m_list, list_entry) { + u8 *addr = m_list_itr->fltr_info.l_data.mac.mac_addr; + + if (is_unicast_ether_addr(addr) && !hw->ucast_shared) + num_unicast++; + else if (is_multicast_ether_addr(addr) || + (is_unicast_ether_addr(addr) && hw->ucast_shared)) + ice_remove_mac_entry(hw, m_list_itr); + } + + /* Exit if no unicast addresses found. Multicast switch rules + * were added individually + */ + if (!num_unicast) + return 0; + + /* Allocate switch rule buffer for the bulk update for unicast */ + s_rule = devm_kcalloc(ice_hw_to_dev(hw), num_unicast, s_rule_size, + GFP_KERNEL); + if (!s_rule) + return ICE_ERR_NO_MEMORY; + + r_iter = s_rule; + list_for_each_entry(m_list_itr, m_list, list_entry) { + u8 *addr = m_list_itr->fltr_info.l_data.mac.mac_addr; + + if (is_unicast_ether_addr(addr)) { + m_entry = ice_find_mac_entry(hw, addr); + if (!m_entry) { + status = ICE_ERR_DOES_NOT_EXIST; + goto ice_remove_mac_exit; + } + + ice_fill_sw_rule(hw, &m_entry->fltr_info, + r_iter, ice_aqc_opc_remove_sw_rules); + r_iter = (struct ice_aqc_sw_rules_elem *) + ((u8 *)r_iter + s_rule_size); + } + } + + /* Call AQ bulk switch rule update for all unicast addresses */ + r_iter = s_rule; + /* Call AQ switch rule in AQ_MAX chunk */ + for (total_elem_left = num_unicast; total_elem_left > 0; + total_elem_left -= elem_sent) { + struct ice_aqc_sw_rules_elem *entry = r_iter; + + elem_sent = min(total_elem_left, + (u16)(ICE_AQ_MAX_BUF_LEN / s_rule_size)); + status = ice_aq_sw_rules(hw, entry, elem_sent * s_rule_size, + elem_sent, ice_aqc_opc_remove_sw_rules, + NULL); + if (status) + break; + r_iter = (struct ice_aqc_sw_rules_elem *) + ((u8 *)r_iter + s_rule_size); + } + + list_for_each_entry(m_list_itr, m_list, list_entry) { + u8 *addr = m_list_itr->fltr_info.l_data.mac.mac_addr; + + if (is_unicast_ether_addr(addr)) { + m_entry = ice_find_mac_entry(hw, addr); + if (!m_entry) + return ICE_ERR_OUT_OF_RANGE; + mutex_lock(&sw->mac_list_lock); + list_del(&m_entry->list_entry); + mutex_unlock(&sw->mac_list_lock); + devm_kfree(ice_hw_to_dev(hw), m_entry); + } + } + +ice_remove_mac_exit: + devm_kfree(ice_hw_to_dev(hw), s_rule); + return status; +} + +/** + * ice_add_to_vsi_fltr_list - Add VSI filters to the list + * @hw: pointer to the hardware structure + * @vsi_id: ID of VSI to remove filters from + * @lkup_list_head: pointer to the list that has certain lookup type filters + * @vsi_list_head: pointer to the list pertaining to VSI with vsi_id + */ +static enum ice_status +ice_add_to_vsi_fltr_list(struct ice_hw *hw, u16 vsi_id, + struct list_head *lkup_list_head, + struct list_head *vsi_list_head) +{ + struct ice_fltr_mgmt_list_entry *fm_entry; + + /* check to make sure VSI id is valid and within boundary */ + if (vsi_id >= + (sizeof(fm_entry->vsi_list_info->vsi_map) * BITS_PER_BYTE - 1)) + return ICE_ERR_PARAM; + + list_for_each_entry(fm_entry, lkup_list_head, list_entry) { + struct ice_fltr_info *fi; + + fi = &fm_entry->fltr_info; + if ((fi->fltr_act == ICE_FWD_TO_VSI && + fi->fwd_id.vsi_id == vsi_id) || + (fi->fltr_act == ICE_FWD_TO_VSI_LIST && + (test_bit(vsi_id, fm_entry->vsi_list_info->vsi_map)))) { + struct ice_fltr_list_entry *tmp; + + /* this memory is freed up in the caller function + * ice_remove_vsi_lkup_fltr() once filters for + * this VSI are removed + */ + tmp = devm_kzalloc(ice_hw_to_dev(hw), sizeof(*tmp), + GFP_KERNEL); + if (!tmp) + return ICE_ERR_NO_MEMORY; + + memcpy(&tmp->fltr_info, fi, sizeof(*fi)); + + /* Expected below fields to be set to ICE_FWD_TO_VSI and + * the particular VSI id since we are only removing this + * one VSI + */ + if (fi->fltr_act == ICE_FWD_TO_VSI_LIST) { + tmp->fltr_info.fltr_act = ICE_FWD_TO_VSI; + tmp->fltr_info.fwd_id.vsi_id = vsi_id; + } + + list_add(&tmp->list_entry, vsi_list_head); + } + } + return 0; +} + +/** + * ice_remove_vsi_lkup_fltr - Remove lookup type filters for a VSI + * @hw: pointer to the hardware structure + * @vsi_id: ID of VSI to remove filters from + * @lkup: switch rule filter lookup type + */ +static void +ice_remove_vsi_lkup_fltr(struct ice_hw *hw, u16 vsi_id, + enum ice_sw_lkup_type lkup) +{ + struct ice_switch_info *sw = hw->switch_info; + struct ice_fltr_list_entry *fm_entry; + struct list_head remove_list_head; + struct ice_fltr_list_entry *tmp; + enum ice_status status; + + INIT_LIST_HEAD(&remove_list_head); + switch (lkup) { + case ICE_SW_LKUP_MAC: + mutex_lock(&sw->mac_list_lock); + status = ice_add_to_vsi_fltr_list(hw, vsi_id, + &sw->mac_list_head, + &remove_list_head); + mutex_unlock(&sw->mac_list_lock); + if (!status) { + ice_remove_mac(hw, &remove_list_head); + goto free_fltr_list; + } + break; + case ICE_SW_LKUP_VLAN: + case ICE_SW_LKUP_MAC_VLAN: + case ICE_SW_LKUP_ETHERTYPE: + case ICE_SW_LKUP_ETHERTYPE_MAC: + case ICE_SW_LKUP_PROMISC: + case ICE_SW_LKUP_PROMISC_VLAN: + case ICE_SW_LKUP_DFLT: + ice_debug(hw, ICE_DBG_SW, + "Remove filters for this lookup type hasn't been implemented yet\n"); + break; + } + + return; +free_fltr_list: + list_for_each_entry_safe(fm_entry, tmp, &remove_list_head, list_entry) { + list_del(&fm_entry->list_entry); + devm_kfree(ice_hw_to_dev(hw), fm_entry); + } +} + +/** + * ice_remove_vsi_fltr - Remove all filters for a VSI + * @hw: pointer to the hardware structure + * @vsi_id: ID of VSI to remove filters from + */ +void ice_remove_vsi_fltr(struct ice_hw *hw, u16 vsi_id) +{ + ice_remove_vsi_lkup_fltr(hw, vsi_id, ICE_SW_LKUP_MAC); + ice_remove_vsi_lkup_fltr(hw, vsi_id, ICE_SW_LKUP_MAC_VLAN); + ice_remove_vsi_lkup_fltr(hw, vsi_id, ICE_SW_LKUP_PROMISC); + ice_remove_vsi_lkup_fltr(hw, vsi_id, ICE_SW_LKUP_VLAN); + ice_remove_vsi_lkup_fltr(hw, vsi_id, ICE_SW_LKUP_DFLT); + ice_remove_vsi_lkup_fltr(hw, vsi_id, ICE_SW_LKUP_ETHERTYPE); + ice_remove_vsi_lkup_fltr(hw, vsi_id, ICE_SW_LKUP_ETHERTYPE_MAC); + ice_remove_vsi_lkup_fltr(hw, vsi_id, ICE_SW_LKUP_PROMISC_VLAN); +} diff --git a/drivers/net/ethernet/intel/ice/ice_switch.h b/drivers/net/ethernet/intel/ice/ice_switch.h index bb8612d8e07b..1b312e7ab2b5 100644 --- a/drivers/net/ethernet/intel/ice/ice_switch.h +++ b/drivers/net/ethernet/intel/ice/ice_switch.h @@ -19,6 +19,122 @@ struct ice_vsi_ctx { bool alloc_from_pool; }; +enum ice_sw_fwd_act_type { + ICE_FWD_TO_VSI = 0, + ICE_FWD_TO_VSI_LIST, /* Do not use this when adding filter */ + ICE_FWD_TO_Q, + ICE_FWD_TO_QGRP, + ICE_DROP_PACKET, + ICE_INVAL_ACT +}; + +/* Switch recipe ID enum values are specific to hardware */ +enum ice_sw_lkup_type { + ICE_SW_LKUP_ETHERTYPE = 0, + ICE_SW_LKUP_MAC = 1, + ICE_SW_LKUP_MAC_VLAN = 2, + ICE_SW_LKUP_PROMISC = 3, + ICE_SW_LKUP_VLAN = 4, + ICE_SW_LKUP_DFLT = 5, + ICE_SW_LKUP_ETHERTYPE_MAC = 8, + ICE_SW_LKUP_PROMISC_VLAN = 9, +}; + +struct ice_fltr_info { + /* Look up information: how to look up packet */ + enum ice_sw_lkup_type lkup_type; + /* Forward action: filter action to do after lookup */ + enum ice_sw_fwd_act_type fltr_act; + /* rule ID returned by firmware once filter rule is created */ + u16 fltr_rule_id; + u16 flag; +#define ICE_FLTR_RX BIT(0) +#define ICE_FLTR_TX BIT(1) +#define ICE_FLTR_TX_RX (ICE_FLTR_RX | ICE_FLTR_TX) + + /* Source VSI for LOOKUP_TX or source port for LOOKUP_RX */ + u16 src; + + union { + struct { + u8 mac_addr[ETH_ALEN]; + } mac; + struct { + u8 mac_addr[ETH_ALEN]; + u16 vlan_id; + } mac_vlan; + struct { + u16 vlan_id; + } vlan; + /* Set lkup_type as ICE_SW_LKUP_ETHERTYPE + * if just using ethertype as filter. Set lkup_type as + * ICE_SW_LKUP_ETHERTYPE_MAC if MAC also needs to be + * passed in as filter. + */ + struct { + u16 ethertype; + u8 mac_addr[ETH_ALEN]; /* optional */ + } ethertype_mac; + } l_data; + + /* Depending on filter action */ + union { + /* queue id in case of ICE_FWD_TO_Q and starting + * queue id in case of ICE_FWD_TO_QGRP. + */ + u16 q_id:11; + u16 vsi_id:10; + u16 vsi_list_id:10; + } fwd_id; + + /* Set to num_queues if action is ICE_FWD_TO_QGRP. This field + * determines the range of queues the packet needs to be forwarded to + */ + u8 qgrp_size; + + /* Rule creations populate these indicators basing on the switch type */ + bool lb_en; /* Indicate if packet can be looped back */ + bool lan_en; /* Indicate if packet can be forwarded to the uplink */ +}; + +/* Bookkeeping structure to hold bitmap of VSIs corresponding to VSI list id */ +struct ice_vsi_list_map_info { + struct list_head list_entry; + DECLARE_BITMAP(vsi_map, ICE_MAX_VSI); + u16 vsi_list_id; +}; + +enum ice_sw_fltr_status { + ICE_FLTR_STATUS_NEW = 0, + ICE_FLTR_STATUS_FW_SUCCESS, + ICE_FLTR_STATUS_FW_FAIL, +}; + +struct ice_fltr_list_entry { + struct list_head list_entry; + enum ice_sw_fltr_status status; + struct ice_fltr_info fltr_info; +}; + +/* This defines an entry in the list that maintains MAC or VLAN membership + * to HW list mapping, since multiple VSIs can subscribe to the same MAC or + * VLAN. As an optimization the VSI list should be created only when a + * second VSI becomes a subscriber to the VLAN address. + */ +struct ice_fltr_mgmt_list_entry { + /* back pointer to VSI list id to VSI list mapping */ + struct ice_vsi_list_map_info *vsi_list_info; + u16 vsi_count; +#define ICE_INVAL_LG_ACT_INDEX 0xffff + u16 lg_act_idx; +#define ICE_INVAL_SW_MARKER_ID 0xffff + u16 sw_marker_id; + struct list_head list_entry; + struct ice_fltr_info fltr_info; +#define ICE_INVAL_COUNTER_ID 0xff + u8 counter_index; +}; + /* VSI related commands */ enum ice_status ice_aq_add_vsi(struct ice_hw *hw, struct ice_vsi_ctx *vsi_ctx, @@ -32,4 +148,8 @@ ice_aq_free_vsi(struct ice_hw *hw, struct ice_vsi_ctx *vsi_ctx, enum ice_status ice_get_initial_sw_cfg(struct ice_hw *hw); +/* Switch/bridge related commands */ +enum ice_status ice_add_mac(struct ice_hw *hw, struct list_head *m_lst); +enum ice_status ice_remove_mac(struct ice_hw *hw, struct list_head *m_lst); +void ice_remove_vsi_fltr(struct ice_hw *hw, u16 vsi_id); #endif /* _ICE_SWITCH_H_ */ diff --git a/drivers/net/ethernet/intel/ice/ice_type.h b/drivers/net/ethernet/intel/ice/ice_type.h index 5f58fd84d558..8926715b76ee 100644 --- a/drivers/net/ethernet/intel/ice/ice_type.h +++ b/drivers/net/ethernet/intel/ice/ice_type.h @@ -223,6 +223,22 @@ struct ice_port_info { bool is_vf; }; +struct ice_switch_info { + /* Switch VSI lists to MAC/VLAN translation */ + struct mutex mac_list_lock; /* protect MAC list */ + struct list_head mac_list_head; + struct mutex vlan_list_lock; /* protect VLAN list */ + struct list_head vlan_list_head; + struct mutex eth_m_list_lock; /* protect ethtype list */ + struct list_head eth_m_list_head; + struct mutex promisc_list_lock; /* protect promisc mode list */ + struct list_head promisc_list_head; + struct mutex mac_vlan_list_lock; /* protect MAC-VLAN list */ + struct list_head mac_vlan_list_head; + + struct list_head vsi_list_map_head; +}; + /* Port hardware description */ struct ice_hw { u8 __iomem *hw_addr; @@ -248,11 +264,14 @@ struct ice_hw { u8 max_cgds; u8 sw_entry_point_layer; + bool evb_veb; /* true for VEB, false for VEPA */ struct ice_bus_info bus; struct ice_nvm_info nvm; struct ice_hw_dev_caps dev_caps; /* device capabilities */ struct ice_hw_func_caps func_caps; /* function capabilities */ + struct ice_switch_info *switch_info; /* switch filter lists */ + /* Control Queue info */ struct ice_ctl_q_info adminq; @@ -276,6 +295,8 @@ struct ice_hw { u8 itr_gran_100; u8 itr_gran_50; u8 itr_gran_25; + bool ucast_shared; /* true if VSIs can share unicast addr */ + }; /* Checksum and Shadow RAM pointers */ From cdedef59deb020e78721d820a5692100128c8c73 Mon Sep 17 00:00:00 2001 From: Anirudh Venkataramanan Date: Tue, 20 Mar 2018 07:58:13 -0700 Subject: [PATCH 09/15] ice: Configure VSIs for Tx/Rx This patch configures the VSIs to be able to send and receive packets by doing the following: 1) Initialize flexible parser to extract and include certain fields in the Rx descriptor. 2) Add Tx queues by programming the Tx queue context (implemented in ice_vsi_cfg_txqs). Note that adding the queues also enables (starts) the queues. 3) Add Rx queues by programming Rx queue context (implemented in ice_vsi_cfg_rxqs). Note that this only adds queues but doesn't start them. The rings will be started by calling ice_vsi_start_rx_rings on interface up. 4) Configure interrupts for VSI queues. 5) Implement ice_open and ice_stop. Signed-off-by: Anirudh Venkataramanan Tested-by: Tony Brelinski Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/ice/Makefile | 3 +- drivers/net/ethernet/intel/ice/ice.h | 36 +- .../net/ethernet/intel/ice/ice_adminq_cmd.h | 86 ++ drivers/net/ethernet/intel/ice/ice_common.c | 602 +++++++++ drivers/net/ethernet/intel/ice/ice_common.h | 13 + .../net/ethernet/intel/ice/ice_hw_autogen.h | 59 + .../net/ethernet/intel/ice/ice_lan_tx_rx.h | 246 ++++ drivers/net/ethernet/intel/ice/ice_main.c | 1140 ++++++++++++++++- drivers/net/ethernet/intel/ice/ice_sched.c | 105 ++ drivers/net/ethernet/intel/ice/ice_sched.h | 5 + drivers/net/ethernet/intel/ice/ice_status.h | 2 + drivers/net/ethernet/intel/ice/ice_txrx.c | 361 ++++++ drivers/net/ethernet/intel/ice/ice_txrx.h | 75 ++ drivers/net/ethernet/intel/ice/ice_type.h | 2 + 14 files changed, 2729 insertions(+), 6 deletions(-) create mode 100644 drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h create mode 100644 drivers/net/ethernet/intel/ice/ice_txrx.c diff --git a/drivers/net/ethernet/intel/ice/Makefile b/drivers/net/ethernet/intel/ice/Makefile index bd2cbe14e76e..de82fc875775 100644 --- a/drivers/net/ethernet/intel/ice/Makefile +++ b/drivers/net/ethernet/intel/ice/Makefile @@ -12,4 +12,5 @@ ice-y := ice_main.o \ ice_common.o \ ice_nvm.o \ ice_switch.o \ - ice_sched.o + ice_sched.o \ + ice_txrx.o diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h index 21d0c237ee3f..46cf82212ed8 100644 --- a/drivers/net/ethernet/intel/ice/ice.h +++ b/drivers/net/ethernet/intel/ice/ice.h @@ -11,8 +11,10 @@ #include #include #include +#include #include #include +#include #include #include #include @@ -43,6 +45,8 @@ #define ICE_VSI_MAP_SCATTER 1 #define ICE_MAX_SCATTER_TXQS 16 #define ICE_MAX_SCATTER_RXQS 16 +#define ICE_Q_WAIT_RETRY_LIMIT 10 +#define ICE_Q_WAIT_MAX_RETRY (5 * ICE_Q_WAIT_RETRY_LIMIT) #define ICE_RES_VALID_BIT 0x8000 #define ICE_RES_MISC_VEC_ID (ICE_RES_VALID_BIT - 1) #define ICE_INVAL_Q_INDEX 0xffff @@ -56,6 +60,14 @@ (((val) << ICE_AQ_VSI_UP_TABLE_UP##i##_S) & \ ICE_AQ_VSI_UP_TABLE_UP##i##_M) +#define ICE_RX_DESC(R, i) (&(((union ice_32b_rx_flex_desc *)((R)->desc))[i])) + +#define ice_for_each_txq(vsi, i) \ + for ((i) = 0; (i) < (vsi)->num_txq; (i)++) + +#define ice_for_each_rxq(vsi, i) \ + for ((i) = 0; (i) < (vsi)->num_rxq; (i)++) + struct ice_tc_info { u16 qoffset; u16 qcount; @@ -96,6 +108,9 @@ struct ice_vsi { struct ice_ring **rx_rings; /* rx ring array */ struct ice_ring **tx_rings; /* tx ring array */ struct ice_q_vector **q_vectors; /* q_vector array */ + + irqreturn_t (*irq_handler)(int irq, void *data); + DECLARE_BITMAP(state, __ICE_STATE_NBITS); int num_q_vectors; int base_vector; @@ -106,8 +121,14 @@ struct ice_vsi { /* Interrupt thresholds */ u16 work_lmt; + u16 max_frame; + u16 rx_buf_len; + struct ice_aqc_vsi_props info; /* VSI properties */ + bool irqs_ready; + bool current_isup; /* Sync 'link up' logging */ + /* queue information */ u8 tx_mapping_mode; /* ICE_MAP_MODE_[CONTIG|SCATTER] */ u8 rx_mapping_mode; /* ICE_MAP_MODE_[CONTIG|SCATTER] */ @@ -128,9 +149,11 @@ struct ice_q_vector { struct napi_struct napi; struct ice_ring_container rx; struct ice_ring_container tx; + struct irq_affinity_notify affinity_notify; u16 v_idx; /* index in the vsi->q_vector array. */ u8 num_ring_tx; /* total number of tx rings in vector */ u8 num_ring_rx; /* total number of rx rings in vector */ + char name[ICE_INT_NAME_STR_LEN]; } ____cacheline_internodealigned_in_smp; enum ice_pf_flags { @@ -178,10 +201,14 @@ struct ice_netdev_priv { /** * ice_irq_dynamic_ena - Enable default interrupt generation settings * @hw: pointer to hw struct + * @vsi: pointer to vsi struct, can be NULL + * @q_vector: pointer to q_vector, can be NULL */ -static inline void ice_irq_dynamic_ena(struct ice_hw *hw) +static inline void ice_irq_dynamic_ena(struct ice_hw *hw, struct ice_vsi *vsi, + struct ice_q_vector *q_vector) { - u32 vector = ((struct ice_pf *)hw->back)->oicr_idx; + u32 vector = (vsi && q_vector) ? vsi->base_vector + q_vector->v_idx : + ((struct ice_pf *)hw->back)->oicr_idx; int itr = ICE_ITR_NONE; u32 val; @@ -190,7 +217,10 @@ static inline void ice_irq_dynamic_ena(struct ice_hw *hw) */ val = GLINT_DYN_CTL_INTENA_M | GLINT_DYN_CTL_CLEARPBA_M | (itr << GLINT_DYN_CTL_ITR_INDX_S); - + if (vsi) + if (test_bit(__ICE_DOWN, vsi->state)) + return; wr32(hw, GLINT_DYN_CTL(vector), val); } + #endif /* _ICE_H_ */ diff --git a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h index 9237841439da..c7abcc1dbbcc 100644 --- a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h +++ b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h @@ -968,6 +968,87 @@ struct ice_aqc_nvm { __le32 addr_low; }; +/* Add TX LAN Queues (indirect 0x0C30) */ +struct ice_aqc_add_txqs { + u8 num_qgrps; + u8 reserved[3]; + __le32 reserved1; + __le32 addr_high; + __le32 addr_low; +}; + +/* This is the descriptor of each queue entry for the Add TX LAN Queues + * command (0x0C30). Only used within struct ice_aqc_add_tx_qgrp. + */ +struct ice_aqc_add_txqs_perq { + __le16 txq_id; + u8 rsvd[2]; + __le32 q_teid; + u8 txq_ctx[22]; + u8 rsvd2[2]; + struct ice_aqc_txsched_elem info; +}; + +/* The format of the command buffer for Add TX LAN Queues (0x0C30) + * is an array of the following structs. Please note that the length of + * each struct ice_aqc_add_tx_qgrp is variable due + * to the variable number of queues in each group! + */ +struct ice_aqc_add_tx_qgrp { + __le32 parent_teid; + u8 num_txqs; + u8 rsvd[3]; + struct ice_aqc_add_txqs_perq txqs[1]; +}; + +/* Disable TX LAN Queues (indirect 0x0C31) */ +struct ice_aqc_dis_txqs { + u8 cmd_type; +#define ICE_AQC_Q_DIS_CMD_S 0 +#define ICE_AQC_Q_DIS_CMD_M (0x3 << ICE_AQC_Q_DIS_CMD_S) +#define ICE_AQC_Q_DIS_CMD_NO_FUNC_RESET (0 << ICE_AQC_Q_DIS_CMD_S) +#define ICE_AQC_Q_DIS_CMD_VM_RESET BIT(ICE_AQC_Q_DIS_CMD_S) +#define ICE_AQC_Q_DIS_CMD_VF_RESET (2 << ICE_AQC_Q_DIS_CMD_S) +#define ICE_AQC_Q_DIS_CMD_PF_RESET (3 << ICE_AQC_Q_DIS_CMD_S) +#define ICE_AQC_Q_DIS_CMD_SUBSEQ_CALL BIT(2) +#define ICE_AQC_Q_DIS_CMD_FLUSH_PIPE BIT(3) + u8 num_entries; + __le16 vmvf_and_timeout; +#define ICE_AQC_Q_DIS_VMVF_NUM_S 0 +#define ICE_AQC_Q_DIS_VMVF_NUM_M (0x3FF << ICE_AQC_Q_DIS_VMVF_NUM_S) +#define ICE_AQC_Q_DIS_TIMEOUT_S 10 +#define ICE_AQC_Q_DIS_TIMEOUT_M (0x3F << ICE_AQC_Q_DIS_TIMEOUT_S) + __le32 blocked_cgds; + __le32 addr_high; + __le32 addr_low; +}; + +/* The buffer for Disable TX LAN Queues (indirect 0x0C31) + * contains the following structures, arrayed one after the + * other. + * Note: Since the q_id is 16 bits wide, if the + * number of queues is even, then 2 bytes of alignment MUST be + * added before the start of the next group, to allow correct + * alignment of the parent_teid field. + */ +struct ice_aqc_dis_txq_item { + __le32 parent_teid; + u8 num_qs; + u8 rsvd; + /* The length of the q_id array varies according to num_qs */ + __le16 q_id[1]; + /* This only applies from F8 onward */ +#define ICE_AQC_Q_DIS_BUF_ELEM_TYPE_S 15 +#define ICE_AQC_Q_DIS_BUF_ELEM_TYPE_LAN_Q \ + (0 << ICE_AQC_Q_DIS_BUF_ELEM_TYPE_S) +#define ICE_AQC_Q_DIS_BUF_ELEM_TYPE_RDMA_QSET \ + (1 << ICE_AQC_Q_DIS_BUF_ELEM_TYPE_S) +}; + +struct ice_aqc_dis_txq { + struct ice_aqc_dis_txq_item qgrps[1]; +}; + /** * struct ice_aq_desc - Admin Queue (AQ) descriptor * @flags: ICE_AQ_FLAG_* flags @@ -1008,6 +1089,8 @@ struct ice_aq_desc { struct ice_aqc_query_txsched_res query_sched_res; struct ice_aqc_add_move_delete_elem add_move_delete_elem; struct ice_aqc_nvm nvm; + struct ice_aqc_add_txqs add_txqs; + struct ice_aqc_dis_txqs dis_txqs; struct ice_aqc_add_get_update_free_vsi vsi_cmd; struct ice_aqc_alloc_free_res_cmd sw_res_ctrl; struct ice_aqc_get_link_status get_link_status; @@ -1088,6 +1171,9 @@ enum ice_adminq_opc { /* NVM commands */ ice_aqc_opc_nvm_read = 0x0701, + /* TX queue handling commands/events */ + ice_aqc_opc_add_txqs = 0x0C30, + ice_aqc_opc_dis_txqs = 0x0C31, }; #endif /* _ICE_ADMINQ_CMD_H_ */ diff --git a/drivers/net/ethernet/intel/ice/ice_common.c b/drivers/net/ethernet/intel/ice/ice_common.c index 67301fe75482..3d5686636656 100644 --- a/drivers/net/ethernet/intel/ice/ice_common.c +++ b/drivers/net/ethernet/intel/ice/ice_common.c @@ -7,6 +7,25 @@ #define ICE_PF_RESET_WAIT_COUNT 200 +#define ICE_NIC_FLX_ENTRY(hw, mdid, idx) \ + wr32((hw), GLFLXP_RXDID_FLX_WRD_##idx(ICE_RXDID_FLEX_NIC), \ + ((ICE_RX_OPC_MDID << \ + GLFLXP_RXDID_FLX_WRD_##idx##_RXDID_OPCODE_S) & \ + GLFLXP_RXDID_FLX_WRD_##idx##_RXDID_OPCODE_M) | \ + (((mdid) << GLFLXP_RXDID_FLX_WRD_##idx##_PROT_MDID_S) & \ + GLFLXP_RXDID_FLX_WRD_##idx##_PROT_MDID_M)) + +#define ICE_NIC_FLX_FLG_ENTRY(hw, flg_0, flg_1, flg_2, flg_3, idx) \ + wr32((hw), GLFLXP_RXDID_FLAGS(ICE_RXDID_FLEX_NIC, idx), \ + (((flg_0) << GLFLXP_RXDID_FLAGS_FLEXIFLAG_4N_S) & \ + GLFLXP_RXDID_FLAGS_FLEXIFLAG_4N_M) | \ + (((flg_1) << GLFLXP_RXDID_FLAGS_FLEXIFLAG_4N_1_S) & \ + GLFLXP_RXDID_FLAGS_FLEXIFLAG_4N_1_M) | \ + (((flg_2) << GLFLXP_RXDID_FLAGS_FLEXIFLAG_4N_2_S) & \ + GLFLXP_RXDID_FLAGS_FLEXIFLAG_4N_2_M) | \ + (((flg_3) << GLFLXP_RXDID_FLAGS_FLEXIFLAG_4N_3_S) & \ + GLFLXP_RXDID_FLAGS_FLEXIFLAG_4N_3_M)) + /** * ice_set_mac_type - Sets MAC type * @hw: pointer to the HW structure @@ -258,6 +277,33 @@ ice_aq_get_link_info(struct ice_port_info *pi, bool ena_lse, return status; } +/** + * ice_init_flex_parser - initialize rx flex parser + * @hw: pointer to the hardware structure + * + * Function to initialize flex descriptors + */ +static void ice_init_flex_parser(struct ice_hw *hw) +{ + u8 idx = 0; + + ICE_NIC_FLX_ENTRY(hw, ICE_RX_MDID_HASH_LOW, 0); + ICE_NIC_FLX_ENTRY(hw, ICE_RX_MDID_HASH_HIGH, 1); + ICE_NIC_FLX_ENTRY(hw, ICE_RX_MDID_FLOW_ID_LOWER, 2); + ICE_NIC_FLX_ENTRY(hw, ICE_RX_MDID_FLOW_ID_HIGH, 3); + ICE_NIC_FLX_FLG_ENTRY(hw, ICE_RXFLG_PKT_FRG, ICE_RXFLG_UDP_GRE, + ICE_RXFLG_PKT_DSI, ICE_RXFLG_FIN, idx++); + ICE_NIC_FLX_FLG_ENTRY(hw, ICE_RXFLG_SYN, ICE_RXFLG_RST, + ICE_RXFLG_PKT_DSI, ICE_RXFLG_PKT_DSI, idx++); + ICE_NIC_FLX_FLG_ENTRY(hw, ICE_RXFLG_PKT_DSI, ICE_RXFLG_PKT_DSI, + ICE_RXFLG_EVLAN_x8100, ICE_RXFLG_EVLAN_x9100, + idx++); + ICE_NIC_FLX_FLG_ENTRY(hw, ICE_RXFLG_VLAN_x8100, ICE_RXFLG_TNL_VLAN, + ICE_RXFLG_TNL_MAC, ICE_RXFLG_TNL0, idx++); + ICE_NIC_FLX_FLG_ENTRY(hw, ICE_RXFLG_TNL1, ICE_RXFLG_TNL2, + ICE_RXFLG_PKT_DSI, ICE_RXFLG_PKT_DSI, idx); +} + /** * ice_init_fltr_mgmt_struct - initializes filter management list and locks * @hw: pointer to the hw struct @@ -431,6 +477,8 @@ enum ice_status ice_init_hw(struct ice_hw *hw) if (status) goto err_unroll_fltr_mgmt_struct; + ice_init_flex_parser(hw); + return 0; err_unroll_fltr_mgmt_struct: @@ -597,6 +645,114 @@ enum ice_status ice_reset(struct ice_hw *hw, enum ice_reset_req req) return ice_check_reset(hw); } +/** + * ice_copy_rxq_ctx_to_hw + * @hw: pointer to the hardware structure + * @ice_rxq_ctx: pointer to the rxq context + * @rxq_index: the index of the rx queue + * + * Copies rxq context from dense structure to hw register space + */ +static enum ice_status +ice_copy_rxq_ctx_to_hw(struct ice_hw *hw, u8 *ice_rxq_ctx, u32 rxq_index) +{ + u8 i; + + if (!ice_rxq_ctx) + return ICE_ERR_BAD_PTR; + + if (rxq_index > QRX_CTRL_MAX_INDEX) + return ICE_ERR_PARAM; + + /* Copy each dword separately to hw */ + for (i = 0; i < ICE_RXQ_CTX_SIZE_DWORDS; i++) { + wr32(hw, QRX_CONTEXT(i, rxq_index), + *((u32 *)(ice_rxq_ctx + (i * sizeof(u32))))); + + ice_debug(hw, ICE_DBG_QCTX, "qrxdata[%d]: %08X\n", i, + *((u32 *)(ice_rxq_ctx + (i * sizeof(u32))))); + } + + return 0; +} + +/* LAN Rx Queue Context */ +static const struct ice_ctx_ele ice_rlan_ctx_info[] = { + /* Field Width LSB */ + ICE_CTX_STORE(ice_rlan_ctx, head, 13, 0), + ICE_CTX_STORE(ice_rlan_ctx, cpuid, 8, 13), + ICE_CTX_STORE(ice_rlan_ctx, base, 57, 32), + ICE_CTX_STORE(ice_rlan_ctx, qlen, 13, 89), + ICE_CTX_STORE(ice_rlan_ctx, dbuf, 7, 102), + ICE_CTX_STORE(ice_rlan_ctx, hbuf, 5, 109), + ICE_CTX_STORE(ice_rlan_ctx, dtype, 2, 114), + ICE_CTX_STORE(ice_rlan_ctx, dsize, 1, 116), + ICE_CTX_STORE(ice_rlan_ctx, crcstrip, 1, 117), + ICE_CTX_STORE(ice_rlan_ctx, l2tsel, 1, 119), + ICE_CTX_STORE(ice_rlan_ctx, hsplit_0, 4, 120), + ICE_CTX_STORE(ice_rlan_ctx, hsplit_1, 2, 124), + ICE_CTX_STORE(ice_rlan_ctx, showiv, 1, 127), + ICE_CTX_STORE(ice_rlan_ctx, rxmax, 14, 174), + ICE_CTX_STORE(ice_rlan_ctx, tphrdesc_ena, 1, 193), + ICE_CTX_STORE(ice_rlan_ctx, tphwdesc_ena, 1, 194), + ICE_CTX_STORE(ice_rlan_ctx, tphdata_ena, 1, 195), + ICE_CTX_STORE(ice_rlan_ctx, tphhead_ena, 1, 196), + ICE_CTX_STORE(ice_rlan_ctx, lrxqthresh, 3, 198), + { 0 } +}; + +/** + * ice_write_rxq_ctx + * @hw: pointer to the hardware structure + * @rlan_ctx: pointer to the rxq context + * @rxq_index: the index of the rx queue + * + * Converts rxq context from sparse to dense structure and then writes + * it to hw register space + */ +enum ice_status +ice_write_rxq_ctx(struct ice_hw *hw, struct ice_rlan_ctx *rlan_ctx, + u32 rxq_index) +{ + u8 ctx_buf[ICE_RXQ_CTX_SZ] = { 0 }; + + ice_set_ctx((u8 *)rlan_ctx, ctx_buf, ice_rlan_ctx_info); + return ice_copy_rxq_ctx_to_hw(hw, ctx_buf, rxq_index); +} + +/* LAN Tx Queue Context */ +const struct ice_ctx_ele ice_tlan_ctx_info[] = { + /* Field Width LSB */ + ICE_CTX_STORE(ice_tlan_ctx, base, 57, 0), + ICE_CTX_STORE(ice_tlan_ctx, port_num, 3, 57), + ICE_CTX_STORE(ice_tlan_ctx, cgd_num, 5, 60), + ICE_CTX_STORE(ice_tlan_ctx, pf_num, 3, 65), + ICE_CTX_STORE(ice_tlan_ctx, vmvf_num, 10, 68), + ICE_CTX_STORE(ice_tlan_ctx, vmvf_type, 2, 78), + ICE_CTX_STORE(ice_tlan_ctx, src_vsi, 10, 80), + ICE_CTX_STORE(ice_tlan_ctx, tsyn_ena, 1, 90), + ICE_CTX_STORE(ice_tlan_ctx, alt_vlan, 1, 92), + ICE_CTX_STORE(ice_tlan_ctx, cpuid, 8, 93), + ICE_CTX_STORE(ice_tlan_ctx, wb_mode, 1, 101), + ICE_CTX_STORE(ice_tlan_ctx, tphrd_desc, 1, 102), + ICE_CTX_STORE(ice_tlan_ctx, tphrd, 1, 103), + ICE_CTX_STORE(ice_tlan_ctx, tphwr_desc, 1, 104), + ICE_CTX_STORE(ice_tlan_ctx, cmpq_id, 9, 105), + ICE_CTX_STORE(ice_tlan_ctx, qnum_in_func, 14, 114), + ICE_CTX_STORE(ice_tlan_ctx, itr_notification_mode, 1, 128), + ICE_CTX_STORE(ice_tlan_ctx, adjust_prof_id, 6, 129), + ICE_CTX_STORE(ice_tlan_ctx, qlen, 13, 135), + ICE_CTX_STORE(ice_tlan_ctx, quanta_prof_idx, 4, 148), + ICE_CTX_STORE(ice_tlan_ctx, tso_ena, 1, 152), + ICE_CTX_STORE(ice_tlan_ctx, tso_qnum, 11, 153), + ICE_CTX_STORE(ice_tlan_ctx, legacy_int, 1, 164), + ICE_CTX_STORE(ice_tlan_ctx, drop_ena, 1, 165), + ICE_CTX_STORE(ice_tlan_ctx, cache_prof_idx, 2, 166), + ICE_CTX_STORE(ice_tlan_ctx, pkt_shaper_prof_idx, 3, 168), + ICE_CTX_STORE(ice_tlan_ctx, int_q_state, 110, 171), + { 0 } +}; + /** * ice_debug_cq * @hw: pointer to the hardware structure @@ -1104,3 +1260,449 @@ void ice_clear_pxe_mode(struct ice_hw *hw) if (ice_check_sq_alive(hw, &hw->adminq)) ice_aq_clear_pxe_mode(hw); } + +/** + * ice_aq_add_lan_txq + * @hw: pointer to the hardware structure + * @num_qgrps: Number of added queue groups + * @qg_list: list of queue groups to be added + * @buf_size: size of buffer for indirect command + * @cd: pointer to command details structure or NULL + * + * Add Tx LAN queue (0x0C30) + * + * NOTE: + * Prior to calling add Tx LAN queue: + * Initialize the following as part of the Tx queue context: + * Completion queue ID if the queue uses Completion queue, Quanta profile, + * Cache profile and Packet shaper profile. + * + * After add Tx LAN queue AQ command is completed: + * Interrupts should be associated with specific queues, + * Association of Tx queue to Doorbell queue is not part of Add LAN Tx queue + * flow. + */ +static enum ice_status +ice_aq_add_lan_txq(struct ice_hw *hw, u8 num_qgrps, + struct ice_aqc_add_tx_qgrp *qg_list, u16 buf_size, + struct ice_sq_cd *cd) +{ + u16 i, sum_header_size, sum_q_size = 0; + struct ice_aqc_add_tx_qgrp *list; + struct ice_aqc_add_txqs *cmd; + struct ice_aq_desc desc; + + cmd = &desc.params.add_txqs; + + ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_add_txqs); + + if (!qg_list) + return ICE_ERR_PARAM; + + if (num_qgrps > ICE_LAN_TXQ_MAX_QGRPS) + return ICE_ERR_PARAM; + + sum_header_size = num_qgrps * + (sizeof(*qg_list) - sizeof(*qg_list->txqs)); + + list = qg_list; + for (i = 0; i < num_qgrps; i++) { + struct ice_aqc_add_txqs_perq *q = list->txqs; + + sum_q_size += list->num_txqs * sizeof(*q); + list = (struct ice_aqc_add_tx_qgrp *)(q + list->num_txqs); + } + + if (buf_size != (sum_header_size + sum_q_size)) + return ICE_ERR_PARAM; + + desc.flags |= cpu_to_le16(ICE_AQ_FLAG_RD); + + cmd->num_qgrps = num_qgrps; + + return ice_aq_send_cmd(hw, &desc, qg_list, buf_size, cd); +} + +/** + * ice_aq_dis_lan_txq + * @hw: pointer to the hardware structure + * @num_qgrps: number of groups in the list + * @qg_list: the list of groups to disable + * @buf_size: the total size of the qg_list buffer in bytes + * @cd: pointer to command details structure or NULL + * + * Disable LAN Tx queue (0x0C31) + */ +static enum ice_status +ice_aq_dis_lan_txq(struct ice_hw *hw, u8 num_qgrps, + struct ice_aqc_dis_txq_item *qg_list, u16 buf_size, + struct ice_sq_cd *cd) +{ + struct ice_aqc_dis_txqs *cmd; + struct ice_aq_desc desc; + u16 i, sz = 0; + + cmd = &desc.params.dis_txqs; + ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_dis_txqs); + + if (!qg_list) + return ICE_ERR_PARAM; + + if (num_qgrps > ICE_LAN_TXQ_MAX_QGRPS) + return ICE_ERR_PARAM; + desc.flags |= cpu_to_le16(ICE_AQ_FLAG_RD); + cmd->num_entries = num_qgrps; + + for (i = 0; i < num_qgrps; ++i) { + /* Calculate the size taken up by the queue IDs in this group */ + sz += qg_list[i].num_qs * sizeof(qg_list[i].q_id); + + /* Add the size of the group header */ + sz += sizeof(qg_list[i]) - sizeof(qg_list[i].q_id); + + /* If the num of queues is even, add 2 bytes of padding */ + if ((qg_list[i].num_qs % 2) == 0) + sz += 2; + } + + if (buf_size != sz) + return ICE_ERR_PARAM; + + return ice_aq_send_cmd(hw, &desc, qg_list, buf_size, cd); +} + +/* End of FW Admin Queue command wrappers */ + +/** + * ice_write_byte - write a byte to a packed context structure + * @src_ctx: the context structure to read from + * @dest_ctx: the context to be written to + * @ce_info: a description of the struct to be filled + */ +static void ice_write_byte(u8 *src_ctx, u8 *dest_ctx, + const struct ice_ctx_ele *ce_info) +{ + u8 src_byte, dest_byte, mask; + u8 *from, *dest; + u16 shift_width; + + /* copy from the next struct field */ + from = src_ctx + ce_info->offset; + + /* prepare the bits and mask */ + shift_width = ce_info->lsb % 8; + mask = (u8)(BIT(ce_info->width) - 1); + + src_byte = *from; + src_byte &= mask; + + /* shift to correct alignment */ + mask <<= shift_width; + src_byte <<= shift_width; + + /* get the current bits from the target bit string */ + dest = dest_ctx + (ce_info->lsb / 8); + + memcpy(&dest_byte, dest, sizeof(dest_byte)); + + dest_byte &= ~mask; /* get the bits not changing */ + dest_byte |= src_byte; /* add in the new bits */ + + /* put it all back */ + memcpy(dest, &dest_byte, sizeof(dest_byte)); +} + +/** + * ice_write_word - write a word to a packed context structure + * @src_ctx: the context structure to read from + * @dest_ctx: the context to be written to + * @ce_info: a description of the struct to be filled + */ +static void ice_write_word(u8 *src_ctx, u8 *dest_ctx, + const struct ice_ctx_ele *ce_info) +{ + u16 src_word, mask; + __le16 dest_word; + u8 *from, *dest; + u16 shift_width; + + /* copy from the next struct field */ + from = src_ctx + ce_info->offset; + + /* prepare the bits and mask */ + shift_width = ce_info->lsb % 8; + mask = BIT(ce_info->width) - 1; + + /* don't swizzle the bits until after the mask because the mask bits + * will be in a different bit position on big endian machines + */ + src_word = *(u16 *)from; + src_word &= mask; + + /* shift to correct alignment */ + mask <<= shift_width; + src_word <<= shift_width; + + /* get the current bits from the target bit string */ + dest = dest_ctx + (ce_info->lsb / 8); + + memcpy(&dest_word, dest, sizeof(dest_word)); + + dest_word &= ~(cpu_to_le16(mask)); /* get the bits not changing */ + dest_word |= cpu_to_le16(src_word); /* add in the new bits */ + + /* put it all back */ + memcpy(dest, &dest_word, sizeof(dest_word)); +} + +/** + * ice_write_dword - write a dword to a packed context structure + * @src_ctx: the context structure to read from + * @dest_ctx: the context to be written to + * @ce_info: a description of the struct to be filled + */ +static void ice_write_dword(u8 *src_ctx, u8 *dest_ctx, + const struct ice_ctx_ele *ce_info) +{ + u32 src_dword, mask; + __le32 dest_dword; + u8 *from, *dest; + u16 shift_width; + + /* copy from the next struct field */ + from = src_ctx + ce_info->offset; + + /* prepare the bits and mask */ + shift_width = ce_info->lsb % 8; + + /* if the field width is exactly 32 on an x86 machine, then the shift + * operation will not work because the SHL instructions count is masked + * to 5 bits so the shift will do nothing + */ + if (ce_info->width < 32) + mask = BIT(ce_info->width) - 1; + else + mask = (u32)~0; + + /* don't swizzle the bits until after the mask because the mask bits + * will be in a different bit position on big endian machines + */ + src_dword = *(u32 *)from; + src_dword &= mask; + + /* shift to correct alignment */ + mask <<= shift_width; + src_dword <<= shift_width; + + /* get the current bits from the target bit string */ + dest = dest_ctx + (ce_info->lsb / 8); + + memcpy(&dest_dword, dest, sizeof(dest_dword)); + + dest_dword &= ~(cpu_to_le32(mask)); /* get the bits not changing */ + dest_dword |= cpu_to_le32(src_dword); /* add in the new bits */ + + /* put it all back */ + memcpy(dest, &dest_dword, sizeof(dest_dword)); +} + +/** + * ice_write_qword - write a qword to a packed context structure + * @src_ctx: the context structure to read from + * @dest_ctx: the context to be written to + * @ce_info: a description of the struct to be filled + */ +static void ice_write_qword(u8 *src_ctx, u8 *dest_ctx, + const struct ice_ctx_ele *ce_info) +{ + u64 src_qword, mask; + __le64 dest_qword; + u8 *from, *dest; + u16 shift_width; + + /* copy from the next struct field */ + from = src_ctx + ce_info->offset; + + /* prepare the bits and mask */ + shift_width = ce_info->lsb % 8; + + /* if the field width is exactly 64 on an x86 machine, then the shift + * operation will not work because the SHL instructions count is masked + * to 6 bits so the shift will do nothing + */ + if (ce_info->width < 64) + mask = BIT_ULL(ce_info->width) - 1; + else + mask = (u64)~0; + + /* don't swizzle the bits until after the mask because the mask bits + * will be in a different bit position on big endian machines + */ + src_qword = *(u64 *)from; + src_qword &= mask; + + /* shift to correct alignment */ + mask <<= shift_width; + src_qword <<= shift_width; + + /* get the current bits from the target bit string */ + dest = dest_ctx + (ce_info->lsb / 8); + + memcpy(&dest_qword, dest, sizeof(dest_qword)); + + dest_qword &= ~(cpu_to_le64(mask)); /* get the bits not changing */ + dest_qword |= cpu_to_le64(src_qword); /* add in the new bits */ + + /* put it all back */ + memcpy(dest, &dest_qword, sizeof(dest_qword)); +} + +/** + * ice_set_ctx - set context bits in packed structure + * @src_ctx: pointer to a generic non-packed context structure + * @dest_ctx: pointer to memory for the packed structure + * @ce_info: a description of the structure to be transformed + */ +enum ice_status +ice_set_ctx(u8 *src_ctx, u8 *dest_ctx, const struct ice_ctx_ele *ce_info) +{ + int f; + + for (f = 0; ce_info[f].width; f++) { + /* We have to deal with each element of the FW response + * using the correct size so that we are correct regardless + * of the endianness of the machine. + */ + switch (ce_info[f].size_of) { + case sizeof(u8): + ice_write_byte(src_ctx, dest_ctx, &ce_info[f]); + break; + case sizeof(u16): + ice_write_word(src_ctx, dest_ctx, &ce_info[f]); + break; + case sizeof(u32): + ice_write_dword(src_ctx, dest_ctx, &ce_info[f]); + break; + case sizeof(u64): + ice_write_qword(src_ctx, dest_ctx, &ce_info[f]); + break; + default: + return ICE_ERR_INVAL_SIZE; + } + } + + return 0; +} + +/** + * ice_ena_vsi_txq + * @pi: port information structure + * @vsi_id: VSI id + * @tc: tc number + * @num_qgrps: Number of added queue groups + * @buf: list of queue groups to be added + * @buf_size: size of buffer for indirect command + * @cd: pointer to command details structure or NULL + * + * This function adds one lan q + */ +enum ice_status +ice_ena_vsi_txq(struct ice_port_info *pi, u16 vsi_id, u8 tc, u8 num_qgrps, + struct ice_aqc_add_tx_qgrp *buf, u16 buf_size, + struct ice_sq_cd *cd) +{ + struct ice_aqc_txsched_elem_data node = { 0 }; + struct ice_sched_node *parent; + enum ice_status status; + struct ice_hw *hw; + + if (!pi || pi->port_state != ICE_SCHED_PORT_STATE_READY) + return ICE_ERR_CFG; + + if (num_qgrps > 1 || buf->num_txqs > 1) + return ICE_ERR_MAX_LIMIT; + + hw = pi->hw; + + mutex_lock(&pi->sched_lock); + + /* find a parent node */ + parent = ice_sched_get_free_qparent(pi, vsi_id, tc, + ICE_SCHED_NODE_OWNER_LAN); + if (!parent) { + status = ICE_ERR_PARAM; + goto ena_txq_exit; + } + buf->parent_teid = parent->info.node_teid; + node.parent_teid = parent->info.node_teid; + /* Mark that the values in the "generic" section as valid. The default + * value in the "generic" section is zero. This means that : + * - Scheduling mode is Bytes Per Second (BPS), indicated by Bit 0. + * - 0 priority among siblings, indicated by Bit 1-3. + * - WFQ, indicated by Bit 4. + * - 0 Adjustment value is used in PSM credit update flow, indicated by + * Bit 5-6. + * - Bit 7 is reserved. + * Without setting the generic section as valid in valid_sections, the + * Admin Q command will fail with error code ICE_AQ_RC_EINVAL. + */ + buf->txqs[0].info.valid_sections = ICE_AQC_ELEM_VALID_GENERIC; + + /* add the lan q */ + status = ice_aq_add_lan_txq(hw, num_qgrps, buf, buf_size, cd); + if (status) + goto ena_txq_exit; + + node.node_teid = buf->txqs[0].q_teid; + node.data.elem_type = ICE_AQC_ELEM_TYPE_LEAF; + + /* add a leaf node into schduler tree q layer */ + status = ice_sched_add_node(pi, hw->num_tx_sched_layers - 1, &node); + +ena_txq_exit: + mutex_unlock(&pi->sched_lock); + return status; +} + +/** + * ice_dis_vsi_txq + * @pi: port information structure + * @num_queues: number of queues + * @q_ids: pointer to the q_id array + * @q_teids: pointer to queue node teids + * @cd: pointer to command details structure or NULL + * + * This function removes queues and their corresponding nodes in SW DB + */ +enum ice_status +ice_dis_vsi_txq(struct ice_port_info *pi, u8 num_queues, u16 *q_ids, + u32 *q_teids, struct ice_sq_cd *cd) +{ + enum ice_status status = ICE_ERR_DOES_NOT_EXIST; + struct ice_aqc_dis_txq_item qg_list; + u16 i; + + if (!pi || pi->port_state != ICE_SCHED_PORT_STATE_READY) + return ICE_ERR_CFG; + + mutex_lock(&pi->sched_lock); + + for (i = 0; i < num_queues; i++) { + struct ice_sched_node *node; + + node = ice_sched_find_node_by_teid(pi->root, q_teids[i]); + if (!node) + continue; + qg_list.parent_teid = node->info.parent_teid; + qg_list.num_qs = 1; + qg_list.q_id[0] = cpu_to_le16(q_ids[i]); + status = ice_aq_dis_lan_txq(pi->hw, 1, &qg_list, + sizeof(qg_list), cd); + + if (status) + break; + ice_free_sched_node(pi, node); + } + mutex_unlock(&pi->sched_lock); + return status; +} diff --git a/drivers/net/ethernet/intel/ice/ice_common.h b/drivers/net/ethernet/intel/ice/ice_common.h index dd4473e84ebb..8ed1135bb189 100644 --- a/drivers/net/ethernet/intel/ice/ice_common.h +++ b/drivers/net/ethernet/intel/ice/ice_common.h @@ -30,9 +30,15 @@ ice_sq_send_cmd(struct ice_hw *hw, struct ice_ctl_q_info *cq, struct ice_sq_cd *cd); void ice_clear_pxe_mode(struct ice_hw *hw); enum ice_status ice_get_caps(struct ice_hw *hw); +enum ice_status +ice_write_rxq_ctx(struct ice_hw *hw, struct ice_rlan_ctx *rlan_ctx, + u32 rxq_index); bool ice_check_sq_alive(struct ice_hw *hw, struct ice_ctl_q_info *cq); enum ice_status ice_aq_q_shutdown(struct ice_hw *hw, bool unloading); void ice_fill_dflt_direct_cmd_desc(struct ice_aq_desc *desc, u16 opcode); +extern const struct ice_ctx_ele ice_tlan_ctx_info[]; +enum ice_status +ice_set_ctx(u8 *src_ctx, u8 *dest_ctx, const struct ice_ctx_ele *ce_info); enum ice_status ice_aq_send_cmd(struct ice_hw *hw, struct ice_aq_desc *desc, void *buf, u16 buf_size, struct ice_sq_cd *cd); @@ -41,4 +47,11 @@ enum ice_status ice_clear_pf_cfg(struct ice_hw *hw); enum ice_status ice_aq_get_link_info(struct ice_port_info *pi, bool ena_lse, struct ice_link_status *link, struct ice_sq_cd *cd); +enum ice_status +ice_dis_vsi_txq(struct ice_port_info *pi, u8 num_queues, u16 *q_ids, + u32 *q_teids, struct ice_sq_cd *cmd_details); +enum ice_status +ice_ena_vsi_txq(struct ice_port_info *pi, u16 vsi_id, u8 tc, u8 num_qgrps, + struct ice_aqc_add_tx_qgrp *buf, u16 buf_size, + struct ice_sq_cd *cd); #endif /* _ICE_COMMON_H_ */ diff --git a/drivers/net/ethernet/intel/ice/ice_hw_autogen.h b/drivers/net/ethernet/intel/ice/ice_hw_autogen.h index 446a8bbef488..fc9b0b179e99 100644 --- a/drivers/net/ethernet/intel/ice/ice_hw_autogen.h +++ b/drivers/net/ethernet/intel/ice/ice_hw_autogen.h @@ -6,6 +6,7 @@ #ifndef _ICE_HW_AUTOGEN_H_ #define _ICE_HW_AUTOGEN_H_ +#define QTX_COMM_DBELL(_DBQM) (0x002C0000 + ((_DBQM) * 4)) #define PF_FW_ARQBAH 0x00080180 #define PF_FW_ARQBAL 0x00080080 #define PF_FW_ARQH 0x00080380 @@ -40,6 +41,44 @@ #define PF_FW_ATQLEN_ATQENABLE_S 31 #define PF_FW_ATQLEN_ATQENABLE_M BIT(PF_FW_ATQLEN_ATQENABLE_S) #define PF_FW_ATQT 0x00080400 + +#define GLFLXP_RXDID_FLAGS(_i, _j) (0x0045D000 + ((_i) * 4 + (_j) * 256)) +#define GLFLXP_RXDID_FLAGS_FLEXIFLAG_4N_S 0 +#define GLFLXP_RXDID_FLAGS_FLEXIFLAG_4N_M ICE_M(0x3F, GLFLXP_RXDID_FLAGS_FLEXIFLAG_4N_S) +#define GLFLXP_RXDID_FLAGS_FLEXIFLAG_4N_1_S 8 +#define GLFLXP_RXDID_FLAGS_FLEXIFLAG_4N_1_M ICE_M(0x3F, GLFLXP_RXDID_FLAGS_FLEXIFLAG_4N_1_S) +#define GLFLXP_RXDID_FLAGS_FLEXIFLAG_4N_2_S 16 +#define GLFLXP_RXDID_FLAGS_FLEXIFLAG_4N_2_M ICE_M(0x3F, GLFLXP_RXDID_FLAGS_FLEXIFLAG_4N_2_S) +#define GLFLXP_RXDID_FLAGS_FLEXIFLAG_4N_3_S 24 +#define GLFLXP_RXDID_FLAGS_FLEXIFLAG_4N_3_M ICE_M(0x3F, GLFLXP_RXDID_FLAGS_FLEXIFLAG_4N_3_S) +#define GLFLXP_RXDID_FLX_WRD_0(_i) (0x0045c800 + ((_i) * 4)) +#define GLFLXP_RXDID_FLX_WRD_0_PROT_MDID_S 0 +#define GLFLXP_RXDID_FLX_WRD_0_PROT_MDID_M ICE_M(0xFF, GLFLXP_RXDID_FLX_WRD_0_PROT_MDID_S) +#define GLFLXP_RXDID_FLX_WRD_0_RXDID_OPCODE_S 30 +#define GLFLXP_RXDID_FLX_WRD_0_RXDID_OPCODE_M ICE_M(0x3, GLFLXP_RXDID_FLX_WRD_0_RXDID_OPCODE_S) +#define GLFLXP_RXDID_FLX_WRD_1(_i) (0x0045c900 + ((_i) * 4)) +#define GLFLXP_RXDID_FLX_WRD_1_PROT_MDID_S 0 +#define GLFLXP_RXDID_FLX_WRD_1_PROT_MDID_M ICE_M(0xFF, GLFLXP_RXDID_FLX_WRD_1_PROT_MDID_S) +#define GLFLXP_RXDID_FLX_WRD_1_RXDID_OPCODE_S 30 +#define GLFLXP_RXDID_FLX_WRD_1_RXDID_OPCODE_M ICE_M(0x3, GLFLXP_RXDID_FLX_WRD_1_RXDID_OPCODE_S) +#define GLFLXP_RXDID_FLX_WRD_2(_i) (0x0045ca00 + ((_i) * 4)) +#define GLFLXP_RXDID_FLX_WRD_2_PROT_MDID_S 0 +#define GLFLXP_RXDID_FLX_WRD_2_PROT_MDID_M ICE_M(0xFF, GLFLXP_RXDID_FLX_WRD_2_PROT_MDID_S) +#define GLFLXP_RXDID_FLX_WRD_2_RXDID_OPCODE_S 30 +#define GLFLXP_RXDID_FLX_WRD_2_RXDID_OPCODE_M ICE_M(0x3, GLFLXP_RXDID_FLX_WRD_2_RXDID_OPCODE_S) +#define GLFLXP_RXDID_FLX_WRD_3(_i) (0x0045cb00 + ((_i) * 4)) +#define GLFLXP_RXDID_FLX_WRD_3_PROT_MDID_S 0 +#define GLFLXP_RXDID_FLX_WRD_3_PROT_MDID_M ICE_M(0xFF, GLFLXP_RXDID_FLX_WRD_3_PROT_MDID_S) +#define GLFLXP_RXDID_FLX_WRD_3_RXDID_OPCODE_S 30 +#define GLFLXP_RXDID_FLX_WRD_3_RXDID_OPCODE_M ICE_M(0x3, GLFLXP_RXDID_FLX_WRD_3_RXDID_OPCODE_S) + +#define QRXFLXP_CNTXT(_QRX) (0x00480000 + ((_QRX) * 4)) +#define QRXFLXP_CNTXT_RXDID_IDX_S 0 +#define QRXFLXP_CNTXT_RXDID_IDX_M ICE_M(0x3F, QRXFLXP_CNTXT_RXDID_IDX_S) +#define QRXFLXP_CNTXT_RXDID_PRIO_S 8 +#define QRXFLXP_CNTXT_RXDID_PRIO_M ICE_M(0x7, QRXFLXP_CNTXT_RXDID_PRIO_S) +#define QRXFLXP_CNTXT_TS_S 11 +#define QRXFLXP_CNTXT_TS_M BIT(QRXFLXP_CNTXT_TS_S) #define GLGEN_RSTAT 0x000B8188 #define GLGEN_RSTAT_DEVSTATE_S 0 #define GLGEN_RSTAT_DEVSTATE_M ICE_M(0x3, GLGEN_RSTAT_DEVSTATE_S) @@ -62,6 +101,8 @@ #define GLINT_DYN_CTL_INTENA_M BIT(GLINT_DYN_CTL_INTENA_S) #define GLINT_DYN_CTL_CLEARPBA_S 1 #define GLINT_DYN_CTL_CLEARPBA_M BIT(GLINT_DYN_CTL_CLEARPBA_S) +#define GLINT_DYN_CTL_SWINT_TRIG_S 2 +#define GLINT_DYN_CTL_SWINT_TRIG_M BIT(GLINT_DYN_CTL_SWINT_TRIG_S) #define GLINT_DYN_CTL_ITR_INDX_S 3 #define GLINT_DYN_CTL_SW_ITR_INDX_S 25 #define GLINT_DYN_CTL_SW_ITR_INDX_M ICE_M(0x3, GLINT_DYN_CTL_SW_ITR_INDX_S) @@ -106,7 +147,25 @@ #define PFINT_OICR_CTL_CAUSE_ENA_S 30 #define PFINT_OICR_CTL_CAUSE_ENA_M BIT(PFINT_OICR_CTL_CAUSE_ENA_S) #define PFINT_OICR_ENA 0x0016C900 +#define QINT_RQCTL(_QRX) (0x00150000 + ((_QRX) * 4)) +#define QINT_RQCTL_MSIX_INDX_S 0 +#define QINT_RQCTL_ITR_INDX_S 11 +#define QINT_RQCTL_CAUSE_ENA_S 30 +#define QINT_RQCTL_CAUSE_ENA_M BIT(QINT_RQCTL_CAUSE_ENA_S) +#define QINT_TQCTL(_DBQM) (0x00140000 + ((_DBQM) * 4)) +#define QINT_TQCTL_MSIX_INDX_S 0 +#define QINT_TQCTL_ITR_INDX_S 11 +#define QINT_TQCTL_CAUSE_ENA_S 30 +#define QINT_TQCTL_CAUSE_ENA_M BIT(QINT_TQCTL_CAUSE_ENA_S) #define GLLAN_RCTL_0 0x002941F8 +#define QRX_CONTEXT(_i, _QRX) (0x00280000 + ((_i) * 8192 + (_QRX) * 4)) +#define QRX_CTRL(_QRX) (0x00120000 + ((_QRX) * 4)) +#define QRX_CTRL_MAX_INDEX 2047 +#define QRX_CTRL_QENA_REQ_S 0 +#define QRX_CTRL_QENA_REQ_M BIT(QRX_CTRL_QENA_REQ_S) +#define QRX_CTRL_QENA_STAT_S 2 +#define QRX_CTRL_QENA_STAT_M BIT(QRX_CTRL_QENA_STAT_S) +#define QRX_TAIL(_QRX) (0x00290000 + ((_QRX) * 4)) #define GLNVM_FLA 0x000B6108 #define GLNVM_FLA_LOCKED_S 6 #define GLNVM_FLA_LOCKED_M BIT(GLNVM_FLA_LOCKED_S) diff --git a/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h b/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h new file mode 100644 index 000000000000..3c1aeb74d950 --- /dev/null +++ b/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h @@ -0,0 +1,246 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (c) 2018, Intel Corporation. */ + +#ifndef _ICE_LAN_TX_RX_H_ +#define _ICE_LAN_TX_RX_H_ + +union ice_32byte_rx_desc { + struct { + __le64 pkt_addr; /* Packet buffer address */ + __le64 hdr_addr; /* Header buffer address */ + /* bit 0 of hdr_addr is DD bit */ + __le64 rsvd1; + __le64 rsvd2; + } read; + struct { + struct { + struct { + __le16 mirroring_status; + __le16 l2tag1; + } lo_dword; + union { + __le32 rss; /* RSS Hash */ + __le32 fd_id; /* Flow Director filter id */ + } hi_dword; + } qword0; + struct { + /* status/error/PTYPE/length */ + __le64 status_error_len; + } qword1; + struct { + __le16 ext_status; /* extended status */ + __le16 rsvd; + __le16 l2tag2_1; + __le16 l2tag2_2; + } qword2; + struct { + __le32 reserved; + __le32 fd_id; + } qword3; + } wb; /* writeback */ +}; + +/* RX Flex Descriptor + * This descriptor is used instead of the legacy version descriptor when + * ice_rlan_ctx.adv_desc is set + */ +union ice_32b_rx_flex_desc { + struct { + __le64 pkt_addr; /* Packet buffer address */ + __le64 hdr_addr; /* Header buffer address */ + /* bit 0 of hdr_addr is DD bit */ + __le64 rsvd1; + __le64 rsvd2; + } read; + struct { + /* Qword 0 */ + u8 rxdid; /* descriptor builder profile id */ + u8 mir_id_umb_cast; /* mirror=[5:0], umb=[7:6] */ + __le16 ptype_flex_flags0; /* ptype=[9:0], ff0=[15:10] */ + __le16 pkt_len; /* [15:14] are reserved */ + __le16 hdr_len_sph_flex_flags1; /* header=[10:0] */ + /* sph=[11:11] */ + /* ff1/ext=[15:12] */ + + /* Qword 1 */ + __le16 status_error0; + __le16 l2tag1; + __le16 flex_meta0; + __le16 flex_meta1; + + /* Qword 2 */ + __le16 status_error1; + u8 flex_flags2; + u8 time_stamp_low; + __le16 l2tag2_1st; + __le16 l2tag2_2nd; + + /* Qword 3 */ + __le16 flex_meta2; + __le16 flex_meta3; + union { + struct { + __le16 flex_meta4; + __le16 flex_meta5; + } flex; + __le32 ts_high; + } flex_ts; + } wb; /* writeback */ +}; + +/* Receive Flex Descriptor profile IDs: There are a total + * of 64 profiles where profile IDs 0/1 are for legacy; and + * profiles 2-63 are flex profiles that can be programmed + * with a specific metadata (profile 7 reserved for HW) + */ +enum ice_rxdid { + ICE_RXDID_START = 0, + ICE_RXDID_LEGACY_0 = ICE_RXDID_START, + ICE_RXDID_LEGACY_1, + ICE_RXDID_FLX_START, + ICE_RXDID_FLEX_NIC = ICE_RXDID_FLX_START, + ICE_RXDID_FLX_LAST = 63, + ICE_RXDID_LAST = ICE_RXDID_FLX_LAST +}; + +/* Receive Flex Descriptor Rx opcode values */ +#define ICE_RX_OPC_MDID 0x01 + +/* Receive Descriptor MDID values */ +#define ICE_RX_MDID_FLOW_ID_LOWER 5 +#define ICE_RX_MDID_FLOW_ID_HIGH 6 +#define ICE_RX_MDID_HASH_LOW 56 +#define ICE_RX_MDID_HASH_HIGH 57 + +/* Rx Flag64 packet flag bits */ +enum ice_rx_flg64_bits { + ICE_RXFLG_PKT_DSI = 0, + ICE_RXFLG_EVLAN_x8100 = 15, + ICE_RXFLG_EVLAN_x9100, + ICE_RXFLG_VLAN_x8100, + ICE_RXFLG_TNL_MAC = 22, + ICE_RXFLG_TNL_VLAN, + ICE_RXFLG_PKT_FRG, + ICE_RXFLG_FIN = 32, + ICE_RXFLG_SYN, + ICE_RXFLG_RST, + ICE_RXFLG_TNL0 = 38, + ICE_RXFLG_TNL1, + ICE_RXFLG_TNL2, + ICE_RXFLG_UDP_GRE, + ICE_RXFLG_RSVD = 63 +}; + +#define ICE_RXQ_CTX_SIZE_DWORDS 8 +#define ICE_RXQ_CTX_SZ (ICE_RXQ_CTX_SIZE_DWORDS * sizeof(u32)) + +/* RLAN Rx queue context data + * + * The sizes of the variables may be larger than needed due to crossing byte + * boundaries. If we do not have the width of the variable set to the correct + * size then we could end up shifting bits off the top of the variable when the + * variable is at the top of a byte and crosses over into the next byte. + */ +struct ice_rlan_ctx { + u16 head; + u16 cpuid; /* bigger than needed, see above for reason */ + u64 base; + u16 qlen; +#define ICE_RLAN_CTX_DBUF_S 7 + u16 dbuf; /* bigger than needed, see above for reason */ +#define ICE_RLAN_CTX_HBUF_S 6 + u16 hbuf; /* bigger than needed, see above for reason */ + u8 dtype; + u8 dsize; + u8 crcstrip; + u8 l2tsel; + u8 hsplit_0; + u8 hsplit_1; + u8 showiv; + u32 rxmax; /* bigger than needed, see above for reason */ + u8 tphrdesc_ena; + u8 tphwdesc_ena; + u8 tphdata_ena; + u8 tphhead_ena; + u16 lrxqthresh; /* bigger than needed, see above for reason */ +}; + +struct ice_ctx_ele { + u16 offset; + u16 size_of; + u16 width; + u16 lsb; +}; + +#define ICE_CTX_STORE(_struct, _ele, _width, _lsb) { \ + .offset = offsetof(struct _struct, _ele), \ + .size_of = FIELD_SIZEOF(struct _struct, _ele), \ + .width = _width, \ + .lsb = _lsb, \ +} + +/* for hsplit_0 field of Rx RLAN context */ +enum ice_rlan_ctx_rx_hsplit_0 { + ICE_RLAN_RX_HSPLIT_0_NO_SPLIT = 0, + ICE_RLAN_RX_HSPLIT_0_SPLIT_L2 = 1, + ICE_RLAN_RX_HSPLIT_0_SPLIT_IP = 2, + ICE_RLAN_RX_HSPLIT_0_SPLIT_TCP_UDP = 4, + ICE_RLAN_RX_HSPLIT_0_SPLIT_SCTP = 8, +}; + +/* for hsplit_1 field of Rx RLAN context */ +enum ice_rlan_ctx_rx_hsplit_1 { + ICE_RLAN_RX_HSPLIT_1_NO_SPLIT = 0, + ICE_RLAN_RX_HSPLIT_1_SPLIT_L2 = 1, + ICE_RLAN_RX_HSPLIT_1_SPLIT_ALWAYS = 2, +}; + +/* TX Descriptor */ +struct ice_tx_desc { + __le64 buf_addr; /* Address of descriptor's data buf */ + __le64 cmd_type_offset_bsz; +}; + +#define ICE_LAN_TXQ_MAX_QGRPS 127 +#define ICE_LAN_TXQ_MAX_QDIS 1023 + +/* Tx queue context data + * + * The sizes of the variables may be larger than needed due to crossing byte + * boundaries. If we do not have the width of the variable set to the correct + * size then we could end up shifting bits off the top of the variable when the + * variable is at the top of a byte and crosses over into the next byte. + */ +struct ice_tlan_ctx { +#define ICE_TLAN_CTX_BASE_S 7 + u64 base; /* base is defined in 128-byte units */ + u8 port_num; + u16 cgd_num; /* bigger than needed, see above for reason */ + u8 pf_num; + u16 vmvf_num; + u8 vmvf_type; +#define ICE_TLAN_CTX_VMVF_TYPE_VMQ 1 +#define ICE_TLAN_CTX_VMVF_TYPE_PF 2 + u16 src_vsi; + u8 tsyn_ena; + u8 alt_vlan; + u16 cpuid; /* bigger than needed, see above for reason */ + u8 wb_mode; + u8 tphrd_desc; + u8 tphrd; + u8 tphwr_desc; + u16 cmpq_id; + u16 qnum_in_func; + u8 itr_notification_mode; + u8 adjust_prof_id; + u32 qlen; /* bigger than needed, see above for reason */ + u8 quanta_prof_idx; + u8 tso_ena; + u16 tso_qnum; + u8 legacy_int; + u8 drop_ena; + u8 cache_prof_idx; + u8 pkt_shaper_prof_idx; + u8 int_q_state; /* width not needed - internal do not write */ +}; +#endif /* _ICE_LAN_TX_RX_H_ */ diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index 3b4a2691ddac..58e9eb40f64a 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -27,6 +27,7 @@ MODULE_PARM_DESC(debug, "netif level (0=none,...,16=all)"); #endif /* !CONFIG_DYNAMIC_DEBUG */ static struct workqueue_struct *ice_wq; +static const struct net_device_ops ice_netdev_ops; static int ice_vsi_release(struct ice_vsi *vsi); @@ -213,6 +214,75 @@ static void ice_free_fltr_list(struct device *dev, struct list_head *h) } } +/** + * ice_print_link_msg - print link up or down message + * @vsi: the VSI whose link status is being queried + * @isup: boolean for if the link is now up or down + */ +static void ice_print_link_msg(struct ice_vsi *vsi, bool isup) +{ + const char *speed; + const char *fc; + + if (vsi->current_isup == isup) + return; + + vsi->current_isup = isup; + + if (!isup) { + netdev_info(vsi->netdev, "NIC Link is Down\n"); + return; + } + + switch (vsi->port_info->phy.link_info.link_speed) { + case ICE_AQ_LINK_SPEED_40GB: + speed = "40 G"; + break; + case ICE_AQ_LINK_SPEED_25GB: + speed = "25 G"; + break; + case ICE_AQ_LINK_SPEED_20GB: + speed = "20 G"; + break; + case ICE_AQ_LINK_SPEED_10GB: + speed = "10 G"; + break; + case ICE_AQ_LINK_SPEED_5GB: + speed = "5 G"; + break; + case ICE_AQ_LINK_SPEED_2500MB: + speed = "2.5 G"; + break; + case ICE_AQ_LINK_SPEED_1000MB: + speed = "1 G"; + break; + case ICE_AQ_LINK_SPEED_100MB: + speed = "100 M"; + break; + default: + speed = "Unknown"; + break; + } + + switch (vsi->port_info->fc.current_mode) { + case ICE_FC_FULL: + fc = "RX/TX"; + break; + case ICE_FC_TX_PAUSE: + fc = "TX"; + break; + case ICE_FC_RX_PAUSE: + fc = "RX"; + break; + default: + fc = "Unknown"; + break; + } + + netdev_info(vsi->netdev, "NIC Link is up %sbps, Flow Control: %s\n", + speed, fc); +} + /** * __ice_clean_ctrlq - helper function to clean controlq rings * @pf: ptr to struct ice_pf @@ -408,6 +478,104 @@ static void ice_set_ctrlq_len(struct ice_hw *hw) hw->adminq.sq_buf_size = ICE_AQ_MAX_BUF_LEN; } +/** + * ice_irq_affinity_notify - Callback for affinity changes + * @notify: context as to what irq was changed + * @mask: the new affinity mask + * + * This is a callback function used by the irq_set_affinity_notifier function + * so that we may register to receive changes to the irq affinity masks. + */ +static void ice_irq_affinity_notify(struct irq_affinity_notify *notify, + const cpumask_t *mask) +{ + struct ice_q_vector *q_vector = + container_of(notify, struct ice_q_vector, affinity_notify); + + cpumask_copy(&q_vector->affinity_mask, mask); +} + +/** + * ice_irq_affinity_release - Callback for affinity notifier release + * @ref: internal core kernel usage + * + * This is a callback function used by the irq_set_affinity_notifier function + * to inform the current notification subscriber that they will no longer + * receive notifications. + */ +static void ice_irq_affinity_release(struct kref __always_unused *ref) {} + +/** + * ice_vsi_dis_irq - Mask off queue interrupt generation on the VSI + * @vsi: the VSI being un-configured + */ +static void ice_vsi_dis_irq(struct ice_vsi *vsi) +{ + struct ice_pf *pf = vsi->back; + struct ice_hw *hw = &pf->hw; + int base = vsi->base_vector; + u32 val; + int i; + + /* disable interrupt causation from each queue */ + if (vsi->tx_rings) { + ice_for_each_txq(vsi, i) { + if (vsi->tx_rings[i]) { + u16 reg; + + reg = vsi->tx_rings[i]->reg_idx; + val = rd32(hw, QINT_TQCTL(reg)); + val &= ~QINT_TQCTL_CAUSE_ENA_M; + wr32(hw, QINT_TQCTL(reg), val); + } + } + } + + if (vsi->rx_rings) { + ice_for_each_rxq(vsi, i) { + if (vsi->rx_rings[i]) { + u16 reg; + + reg = vsi->rx_rings[i]->reg_idx; + val = rd32(hw, QINT_RQCTL(reg)); + val &= ~QINT_RQCTL_CAUSE_ENA_M; + wr32(hw, QINT_RQCTL(reg), val); + } + } + } + + /* disable each interrupt */ + if (test_bit(ICE_FLAG_MSIX_ENA, pf->flags)) { + for (i = vsi->base_vector; + i < (vsi->num_q_vectors + vsi->base_vector); i++) + wr32(hw, GLINT_DYN_CTL(i), 0); + + ice_flush(hw); + for (i = 0; i < vsi->num_q_vectors; i++) + synchronize_irq(pf->msix_entries[i + base].vector); + } +} + +/** + * ice_vsi_ena_irq - Enable IRQ for the given VSI + * @vsi: the VSI being configured + */ +static int ice_vsi_ena_irq(struct ice_vsi *vsi) +{ + struct ice_pf *pf = vsi->back; + struct ice_hw *hw = &pf->hw; + + if (test_bit(ICE_FLAG_MSIX_ENA, pf->flags)) { + int i; + + for (i = 0; i < vsi->num_q_vectors; i++) + ice_irq_dynamic_ena(hw, vsi, vsi->q_vectors[i]); + } + + ice_flush(hw); + return 0; +} + /** * ice_vsi_delete - delete a VSI from the switch * @vsi: pointer to VSI being removed @@ -428,6 +596,73 @@ static void ice_vsi_delete(struct ice_vsi *vsi) vsi->vsi_num); } +/** + * ice_vsi_req_irq_msix - get MSI-X vectors from the OS for the VSI + * @vsi: the VSI being configured + * @basename: name for the vector + */ +static int ice_vsi_req_irq_msix(struct ice_vsi *vsi, char *basename) +{ + int q_vectors = vsi->num_q_vectors; + struct ice_pf *pf = vsi->back; + int base = vsi->base_vector; + int rx_int_idx = 0; + int tx_int_idx = 0; + int vector, err; + int irq_num; + + for (vector = 0; vector < q_vectors; vector++) { + struct ice_q_vector *q_vector = vsi->q_vectors[vector]; + + irq_num = pf->msix_entries[base + vector].vector; + + if (q_vector->tx.ring && q_vector->rx.ring) { + snprintf(q_vector->name, sizeof(q_vector->name) - 1, + "%s-%s-%d", basename, "TxRx", rx_int_idx++); + tx_int_idx++; + } else if (q_vector->rx.ring) { + snprintf(q_vector->name, sizeof(q_vector->name) - 1, + "%s-%s-%d", basename, "rx", rx_int_idx++); + } else if (q_vector->tx.ring) { + snprintf(q_vector->name, sizeof(q_vector->name) - 1, + "%s-%s-%d", basename, "tx", tx_int_idx++); + } else { + /* skip this unused q_vector */ + continue; + } + err = devm_request_irq(&pf->pdev->dev, + pf->msix_entries[base + vector].vector, + vsi->irq_handler, 0, q_vector->name, + q_vector); + if (err) { + netdev_err(vsi->netdev, + "MSIX request_irq failed, error: %d\n", err); + goto free_q_irqs; + } + + /* register for affinity change notifications */ + q_vector->affinity_notify.notify = ice_irq_affinity_notify; + q_vector->affinity_notify.release = ice_irq_affinity_release; + irq_set_affinity_notifier(irq_num, &q_vector->affinity_notify); + + /* assign the mask for this irq */ + irq_set_affinity_hint(irq_num, &q_vector->affinity_mask); + } + + vsi->irqs_ready = true; + return 0; + +free_q_irqs: + while (vector) { + vector--; + irq_num = pf->msix_entries[base + vector].vector, + irq_set_affinity_notifier(irq_num, NULL); + irq_set_affinity_hint(irq_num, NULL); + devm_free_irq(&pf->pdev->dev, irq_num, &vsi->q_vectors[vector]); + } + return err; +} + /** * ice_vsi_setup_q_map - Setup a VSI queue map * @vsi: the VSI being configured @@ -590,6 +825,38 @@ static int ice_vsi_add(struct ice_vsi *vsi) return ret; } +/** + * ice_vsi_release_msix - Clear the queue to Interrupt mapping in HW + * @vsi: the VSI being cleaned up + */ +static void ice_vsi_release_msix(struct ice_vsi *vsi) +{ + struct ice_pf *pf = vsi->back; + u16 vector = vsi->base_vector; + struct ice_hw *hw = &pf->hw; + u32 txq = 0; + u32 rxq = 0; + int i, q; + + for (i = 0; i < vsi->num_q_vectors; i++, vector++) { + struct ice_q_vector *q_vector = vsi->q_vectors[i]; + + wr32(hw, GLINT_ITR(ICE_RX_ITR, vector), 0); + wr32(hw, GLINT_ITR(ICE_TX_ITR, vector), 0); + for (q = 0; q < q_vector->num_ring_tx; q++) { + wr32(hw, QINT_TQCTL(vsi->txq_map[txq]), 0); + txq++; + } + + for (q = 0; q < q_vector->num_ring_rx; q++) { + wr32(hw, QINT_RQCTL(vsi->rxq_map[rxq]), 0); + rxq++; + } + } + + ice_flush(hw); +} + /** * ice_vsi_clear_rings - Deallocates the Tx and Rx rings for VSI * @vsi: the VSI having rings deallocated @@ -672,6 +939,118 @@ static int ice_vsi_alloc_rings(struct ice_vsi *vsi) return -ENOMEM; } +/** + * ice_vsi_free_irq - Free the irq association with the OS + * @vsi: the VSI being configured + */ +static void ice_vsi_free_irq(struct ice_vsi *vsi) +{ + struct ice_pf *pf = vsi->back; + int base = vsi->base_vector; + + if (test_bit(ICE_FLAG_MSIX_ENA, pf->flags)) { + int i; + + if (!vsi->q_vectors || !vsi->irqs_ready) + return; + + vsi->irqs_ready = false; + for (i = 0; i < vsi->num_q_vectors; i++) { + u16 vector = i + base; + int irq_num; + + irq_num = pf->msix_entries[vector].vector; + + /* free only the irqs that were actually requested */ + if (!vsi->q_vectors[i] || + !(vsi->q_vectors[i]->num_ring_tx || + vsi->q_vectors[i]->num_ring_rx)) + continue; + + /* clear the affinity notifier in the IRQ descriptor */ + irq_set_affinity_notifier(irq_num, NULL); + + /* clear the affinity_mask in the IRQ descriptor */ + irq_set_affinity_hint(irq_num, NULL); + synchronize_irq(irq_num); + devm_free_irq(&pf->pdev->dev, irq_num, + vsi->q_vectors[i]); + } + ice_vsi_release_msix(vsi); + } +} + +/** + * ice_vsi_cfg_msix - MSIX mode Interrupt Config in the HW + * @vsi: the VSI being configured + */ +static void ice_vsi_cfg_msix(struct ice_vsi *vsi) +{ + struct ice_pf *pf = vsi->back; + u16 vector = vsi->base_vector; + struct ice_hw *hw = &pf->hw; + u32 txq = 0, rxq = 0; + int i, q, itr; + u8 itr_gran; + + for (i = 0; i < vsi->num_q_vectors; i++, vector++) { + struct ice_q_vector *q_vector = vsi->q_vectors[i]; + + itr_gran = hw->itr_gran_200; + + if (q_vector->num_ring_rx) { + q_vector->rx.itr = + ITR_TO_REG(vsi->rx_rings[rxq]->rx_itr_setting, + itr_gran); + q_vector->rx.latency_range = ICE_LOW_LATENCY; + } + + if (q_vector->num_ring_tx) { + q_vector->tx.itr = + ITR_TO_REG(vsi->tx_rings[txq]->tx_itr_setting, + itr_gran); + q_vector->tx.latency_range = ICE_LOW_LATENCY; + } + wr32(hw, GLINT_ITR(ICE_RX_ITR, vector), q_vector->rx.itr); + wr32(hw, GLINT_ITR(ICE_TX_ITR, vector), q_vector->tx.itr); + + /* Both Transmit Queue Interrupt Cause Control register + * and Receive Queue Interrupt Cause control register + * expects MSIX_INDX field to be the vector index + * within the function space and not the absolute + * vector index across PF or across device. + * For SR-IOV VF VSIs queue vector index always starts + * with 1 since first vector index(0) is used for OICR + * in VF space. Since VMDq and other PF VSIs are withtin + * the PF function space, use the vector index thats + * tracked for this PF. + */ + for (q = 0; q < q_vector->num_ring_tx; q++) { + u32 val; + + itr = ICE_TX_ITR; + val = QINT_TQCTL_CAUSE_ENA_M | + (itr << QINT_TQCTL_ITR_INDX_S) | + (vector << QINT_TQCTL_MSIX_INDX_S); + wr32(hw, QINT_TQCTL(vsi->txq_map[txq]), val); + txq++; + } + + for (q = 0; q < q_vector->num_ring_rx; q++) { + u32 val; + + itr = ICE_RX_ITR; + val = QINT_RQCTL_CAUSE_ENA_M | + (itr << QINT_RQCTL_ITR_INDX_S) | + (vector << QINT_RQCTL_MSIX_INDX_S); + wr32(hw, QINT_RQCTL(vsi->rxq_map[rxq]), val); + rxq++; + } + } + + ice_flush(hw); +} + /** * ice_ena_misc_vector - enable the non-queue interrupts * @pf: board private structure @@ -752,7 +1131,7 @@ static irqreturn_t ice_misc_intr(int __always_unused irq, void *data) wr32(hw, PFINT_OICR_ENA, ena_mask); if (!test_bit(__ICE_DOWN, pf->state)) { ice_service_task_schedule(pf); - ice_irq_dynamic_ena(hw); + ice_irq_dynamic_ena(hw, NULL, NULL); } return ret; @@ -1017,7 +1396,7 @@ static int ice_req_irq_msix_misc(struct ice_pf *pf) ITR_TO_REG(ICE_ITR_8K, itr_gran)); ice_flush(hw); - ice_irq_dynamic_ena(hw); + ice_irq_dynamic_ena(hw, NULL, NULL); return 0; } @@ -1262,6 +1641,9 @@ static int ice_cfg_netdev(struct ice_vsi *vsi) netdev->priv_flags |= IFF_UNICAST_FLT; + /* assign netdev_ops */ + netdev->netdev_ops = &ice_netdev_ops; + /* setup watchdog timeout value to be 5 second */ netdev->watchdog_timeo = 5 * HZ; @@ -2079,6 +2461,704 @@ static void __exit ice_module_exit(void) } module_exit(ice_module_exit); +/** + * ice_setup_tx_ctx - setup a struct ice_tlan_ctx instance + * @ring: The Tx ring to configure + * @tlan_ctx: Pointer to the Tx LAN queue context structure to be initialized + * @pf_q: queue index in the PF space + * + * Configure the Tx descriptor ring in TLAN context. + */ +static void +ice_setup_tx_ctx(struct ice_ring *ring, struct ice_tlan_ctx *tlan_ctx, u16 pf_q) +{ + struct ice_vsi *vsi = ring->vsi; + struct ice_hw *hw = &vsi->back->hw; + + tlan_ctx->base = ring->dma >> ICE_TLAN_CTX_BASE_S; + + tlan_ctx->port_num = vsi->port_info->lport; + + /* Transmit Queue Length */ + tlan_ctx->qlen = ring->count; + + /* PF number */ + tlan_ctx->pf_num = hw->pf_id; + + /* queue belongs to a specific VSI type + * VF / VM index should be programmed per vmvf_type setting: + * for vmvf_type = VF, it is VF number between 0-256 + * for vmvf_type = VM, it is VM number between 0-767 + * for PF or EMP this field should be set to zero + */ + switch (vsi->type) { + case ICE_VSI_PF: + tlan_ctx->vmvf_type = ICE_TLAN_CTX_VMVF_TYPE_PF; + break; + default: + return; + } + + /* make sure the context is associated with the right VSI */ + tlan_ctx->src_vsi = vsi->vsi_num; + + tlan_ctx->tso_ena = ICE_TX_LEGACY; + tlan_ctx->tso_qnum = pf_q; + + /* Legacy or Advanced Host Interface: + * 0: Advanced Host Interface + * 1: Legacy Host Interface + */ + tlan_ctx->legacy_int = ICE_TX_LEGACY; +} + +/** + * ice_vsi_cfg_txqs - Configure the VSI for Tx + * @vsi: the VSI being configured + * + * Return 0 on success and a negative value on error + * Configure the Tx VSI for operation. + */ +static int ice_vsi_cfg_txqs(struct ice_vsi *vsi) +{ + struct ice_aqc_add_tx_qgrp *qg_buf; + struct ice_aqc_add_txqs_perq *txq; + struct ice_pf *pf = vsi->back; + enum ice_status status; + u16 buf_len, i, pf_q; + int err = 0, tc = 0; + u8 num_q_grps; + + buf_len = sizeof(struct ice_aqc_add_tx_qgrp); + qg_buf = devm_kzalloc(&pf->pdev->dev, buf_len, GFP_KERNEL); + if (!qg_buf) + return -ENOMEM; + + if (vsi->num_txq > ICE_MAX_TXQ_PER_TXQG) { + err = -EINVAL; + goto err_cfg_txqs; + } + qg_buf->num_txqs = 1; + num_q_grps = 1; + + /* set up and configure the tx queues */ + ice_for_each_txq(vsi, i) { + struct ice_tlan_ctx tlan_ctx = { 0 }; + + pf_q = vsi->txq_map[i]; + ice_setup_tx_ctx(vsi->tx_rings[i], &tlan_ctx, pf_q); + /* copy context contents into the qg_buf */ + qg_buf->txqs[0].txq_id = cpu_to_le16(pf_q); + ice_set_ctx((u8 *)&tlan_ctx, qg_buf->txqs[0].txq_ctx, + ice_tlan_ctx_info); + + /* init queue specific tail reg. It is referred as transmit + * comm scheduler queue doorbell. + */ + vsi->tx_rings[i]->tail = pf->hw.hw_addr + QTX_COMM_DBELL(pf_q); + status = ice_ena_vsi_txq(vsi->port_info, vsi->vsi_num, tc, + num_q_grps, qg_buf, buf_len, NULL); + if (status) { + dev_err(&vsi->back->pdev->dev, + "Failed to set LAN Tx queue context, error: %d\n", + status); + err = -ENODEV; + goto err_cfg_txqs; + } + + /* Add Tx Queue TEID into the VSI tx ring from the response + * This will complete configuring and enabling the queue. + */ + txq = &qg_buf->txqs[0]; + if (pf_q == le16_to_cpu(txq->txq_id)) + vsi->tx_rings[i]->txq_teid = + le32_to_cpu(txq->q_teid); + } +err_cfg_txqs: + devm_kfree(&pf->pdev->dev, qg_buf); + return err; +} + +/** + * ice_setup_rx_ctx - Configure a receive ring context + * @ring: The Rx ring to configure + * + * Configure the Rx descriptor ring in RLAN context. + */ +static int ice_setup_rx_ctx(struct ice_ring *ring) +{ + struct ice_vsi *vsi = ring->vsi; + struct ice_hw *hw = &vsi->back->hw; + u32 rxdid = ICE_RXDID_FLEX_NIC; + struct ice_rlan_ctx rlan_ctx; + u32 regval; + u16 pf_q; + int err; + + /* what is RX queue number in global space of 2K rx queues */ + pf_q = vsi->rxq_map[ring->q_index]; + + /* clear the context structure first */ + memset(&rlan_ctx, 0, sizeof(rlan_ctx)); + + rlan_ctx.base = ring->dma >> 7; + + rlan_ctx.qlen = ring->count; + + /* Receive Packet Data Buffer Size. + * The Packet Data Buffer Size is defined in 128 byte units. + */ + rlan_ctx.dbuf = vsi->rx_buf_len >> ICE_RLAN_CTX_DBUF_S; + + /* use 32 byte descriptors */ + rlan_ctx.dsize = 1; + + /* Strip the Ethernet CRC bytes before the packet is posted to host + * memory. + */ + rlan_ctx.crcstrip = 1; + + /* L2TSEL flag defines the reported L2 Tags in the receive descriptor */ + rlan_ctx.l2tsel = 1; + + rlan_ctx.dtype = ICE_RX_DTYPE_NO_SPLIT; + rlan_ctx.hsplit_0 = ICE_RLAN_RX_HSPLIT_0_NO_SPLIT; + rlan_ctx.hsplit_1 = ICE_RLAN_RX_HSPLIT_1_NO_SPLIT; + + /* This controls whether VLAN is stripped from inner headers + * The VLAN in the inner L2 header is stripped to the receive + * descriptor if enabled by this flag. + */ + rlan_ctx.showiv = 0; + + /* Max packet size for this queue - must not be set to a larger value + * than 5 x DBUF + */ + rlan_ctx.rxmax = min_t(u16, vsi->max_frame, + ICE_MAX_CHAINED_RX_BUFS * vsi->rx_buf_len); + + /* Rx queue threshold in units of 64 */ + rlan_ctx.lrxqthresh = 1; + + /* Enable Flexible Descriptors in the queue context which + * allows this driver to select a specific receive descriptor format + */ + regval = rd32(hw, QRXFLXP_CNTXT(pf_q)); + regval |= (rxdid << QRXFLXP_CNTXT_RXDID_IDX_S) & + QRXFLXP_CNTXT_RXDID_IDX_M; + + /* increasing context priority to pick up profile id; + * default is 0x01; setting to 0x03 to ensure profile + * is programming if prev context is of same priority + */ + regval |= (0x03 << QRXFLXP_CNTXT_RXDID_PRIO_S) & + QRXFLXP_CNTXT_RXDID_PRIO_M; + + wr32(hw, QRXFLXP_CNTXT(pf_q), regval); + + /* Absolute queue number out of 2K needs to be passed */ + err = ice_write_rxq_ctx(hw, &rlan_ctx, pf_q); + if (err) { + dev_err(&vsi->back->pdev->dev, + "Failed to set LAN Rx queue context for absolute Rx queue %d error: %d\n", + pf_q, err); + return -EIO; + } + + /* init queue specific tail register */ + ring->tail = hw->hw_addr + QRX_TAIL(pf_q); + writel(0, ring->tail); + ice_alloc_rx_bufs(ring, ICE_DESC_UNUSED(ring)); + + return 0; +} + +/** + * ice_vsi_cfg_rxqs - Configure the VSI for Rx + * @vsi: the VSI being configured + * + * Return 0 on success and a negative value on error + * Configure the Rx VSI for operation. + */ +static int ice_vsi_cfg_rxqs(struct ice_vsi *vsi) +{ + int err = 0; + u16 i; + + if (vsi->netdev && vsi->netdev->mtu > ETH_DATA_LEN) + vsi->max_frame = vsi->netdev->mtu + + ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN; + else + vsi->max_frame = ICE_RXBUF_2048; + + vsi->rx_buf_len = ICE_RXBUF_2048; + /* set up individual rings */ + for (i = 0; i < vsi->num_rxq && !err; i++) + err = ice_setup_rx_ctx(vsi->rx_rings[i]); + + if (err) { + dev_err(&vsi->back->pdev->dev, "ice_setup_rx_ctx failed\n"); + return -EIO; + } + return err; +} + +/** + * ice_vsi_cfg - Setup the VSI + * @vsi: the VSI being configured + * + * Return 0 on success and negative value on error + */ +static int ice_vsi_cfg(struct ice_vsi *vsi) +{ + int err; + + err = ice_vsi_cfg_txqs(vsi); + if (!err) + err = ice_vsi_cfg_rxqs(vsi); + + return err; +} + +/** + * ice_vsi_stop_tx_rings - Disable Tx rings + * @vsi: the VSI being configured + */ +static int ice_vsi_stop_tx_rings(struct ice_vsi *vsi) +{ + struct ice_pf *pf = vsi->back; + struct ice_hw *hw = &pf->hw; + enum ice_status status; + u32 *q_teids, val; + u16 *q_ids, i; + int err = 0; + + if (vsi->num_txq > ICE_LAN_TXQ_MAX_QDIS) + return -EINVAL; + + q_teids = devm_kcalloc(&pf->pdev->dev, vsi->num_txq, sizeof(*q_teids), + GFP_KERNEL); + if (!q_teids) + return -ENOMEM; + + q_ids = devm_kcalloc(&pf->pdev->dev, vsi->num_txq, sizeof(*q_ids), + GFP_KERNEL); + if (!q_ids) { + err = -ENOMEM; + goto err_alloc_q_ids; + } + + /* set up the tx queue list to be disabled */ + ice_for_each_txq(vsi, i) { + u16 v_idx; + + if (!vsi->tx_rings || !vsi->tx_rings[i]) { + err = -EINVAL; + goto err_out; + } + + q_ids[i] = vsi->txq_map[i]; + q_teids[i] = vsi->tx_rings[i]->txq_teid; + + /* clear cause_ena bit for disabled queues */ + val = rd32(hw, QINT_TQCTL(vsi->tx_rings[i]->reg_idx)); + val &= ~QINT_TQCTL_CAUSE_ENA_M; + wr32(hw, QINT_TQCTL(vsi->tx_rings[i]->reg_idx), val); + + /* software is expected to wait for 100 ns */ + ndelay(100); + + /* trigger a software interrupt for the vector associated to + * the queue to schedule napi handler + */ + v_idx = vsi->tx_rings[i]->q_vector->v_idx; + wr32(hw, GLINT_DYN_CTL(vsi->base_vector + v_idx), + GLINT_DYN_CTL_SWINT_TRIG_M | GLINT_DYN_CTL_INTENA_MSK_M); + } + status = ice_dis_vsi_txq(vsi->port_info, vsi->num_txq, q_ids, q_teids, + NULL); + if (status) { + dev_err(&pf->pdev->dev, + "Failed to disable LAN Tx queues, error: %d\n", + status); + err = -ENODEV; + } + +err_out: + devm_kfree(&pf->pdev->dev, q_ids); + +err_alloc_q_ids: + devm_kfree(&pf->pdev->dev, q_teids); + + return err; +} + +/** + * ice_pf_rxq_wait - Wait for a PF's Rx queue to be enabled or disabled + * @pf: the PF being configured + * @pf_q: the PF queue + * @ena: enable or disable state of the queue + * + * This routine will wait for the given Rx queue of the PF to reach the + * enabled or disabled state. + * Returns -ETIMEDOUT in case of failing to reach the requested state after + * multiple retries; else will return 0 in case of success. + */ +static int ice_pf_rxq_wait(struct ice_pf *pf, int pf_q, bool ena) +{ + int i; + + for (i = 0; i < ICE_Q_WAIT_RETRY_LIMIT; i++) { + u32 rx_reg = rd32(&pf->hw, QRX_CTRL(pf_q)); + + if (ena == !!(rx_reg & QRX_CTRL_QENA_STAT_M)) + break; + + usleep_range(10, 20); + } + if (i >= ICE_Q_WAIT_RETRY_LIMIT) + return -ETIMEDOUT; + + return 0; +} + +/** + * ice_vsi_ctrl_rx_rings - Start or stop a VSI's rx rings + * @vsi: the VSI being configured + * @ena: start or stop the rx rings + */ +static int ice_vsi_ctrl_rx_rings(struct ice_vsi *vsi, bool ena) +{ + struct ice_pf *pf = vsi->back; + struct ice_hw *hw = &pf->hw; + int i, j, ret = 0; + + for (i = 0; i < vsi->num_rxq; i++) { + int pf_q = vsi->rxq_map[i]; + u32 rx_reg; + + for (j = 0; j < ICE_Q_WAIT_MAX_RETRY; j++) { + rx_reg = rd32(hw, QRX_CTRL(pf_q)); + if (((rx_reg >> QRX_CTRL_QENA_REQ_S) & 1) == + ((rx_reg >> QRX_CTRL_QENA_STAT_S) & 1)) + break; + usleep_range(1000, 2000); + } + + /* Skip if the queue is already in the requested state */ + if (ena == !!(rx_reg & QRX_CTRL_QENA_STAT_M)) + continue; + + /* turn on/off the queue */ + if (ena) + rx_reg |= QRX_CTRL_QENA_REQ_M; + else + rx_reg &= ~QRX_CTRL_QENA_REQ_M; + wr32(hw, QRX_CTRL(pf_q), rx_reg); + + /* wait for the change to finish */ + ret = ice_pf_rxq_wait(pf, pf_q, ena); + if (ret) { + dev_err(&pf->pdev->dev, + "VSI idx %d Rx ring %d %sable timeout\n", + vsi->idx, pf_q, (ena ? "en" : "dis")); + break; + } + } + + return ret; +} + +/** + * ice_vsi_start_rx_rings - start VSI's rx rings + * @vsi: the VSI whose rings are to be started + * + * Returns 0 on success and a negative value on error + */ +static int ice_vsi_start_rx_rings(struct ice_vsi *vsi) +{ + return ice_vsi_ctrl_rx_rings(vsi, true); +} + +/** + * ice_vsi_stop_rx_rings - stop VSI's rx rings + * @vsi: the VSI + * + * Returns 0 on success and a negative value on error + */ +static int ice_vsi_stop_rx_rings(struct ice_vsi *vsi) +{ + return ice_vsi_ctrl_rx_rings(vsi, false); +} + +/** + * ice_vsi_stop_tx_rx_rings - stop VSI's tx and rx rings + * @vsi: the VSI + * Returns 0 on success and a negative value on error + */ +static int ice_vsi_stop_tx_rx_rings(struct ice_vsi *vsi) +{ + int err_tx, err_rx; + + err_tx = ice_vsi_stop_tx_rings(vsi); + if (err_tx) + dev_dbg(&vsi->back->pdev->dev, "Failed to disable Tx rings\n"); + + err_rx = ice_vsi_stop_rx_rings(vsi); + if (err_rx) + dev_dbg(&vsi->back->pdev->dev, "Failed to disable Rx rings\n"); + + if (err_tx || err_rx) + return -EIO; + + return 0; +} + +/** + * ice_up_complete - Finish the last steps of bringing up a connection + * @vsi: The VSI being configured + * + * Return 0 on success and negative value on error + */ +static int ice_up_complete(struct ice_vsi *vsi) +{ + struct ice_pf *pf = vsi->back; + int err; + + if (test_bit(ICE_FLAG_MSIX_ENA, pf->flags)) + ice_vsi_cfg_msix(vsi); + else + return -ENOTSUPP; + + /* Enable only Rx rings, Tx rings were enabled by the FW when the + * Tx queue group list was configured and the context bits were + * programmed using ice_vsi_cfg_txqs + */ + err = ice_vsi_start_rx_rings(vsi); + if (err) + return err; + + clear_bit(__ICE_DOWN, vsi->state); + ice_vsi_ena_irq(vsi); + + if (vsi->port_info && + (vsi->port_info->phy.link_info.link_info & ICE_AQ_LINK_UP) && + vsi->netdev) { + ice_print_link_msg(vsi, true); + netif_tx_start_all_queues(vsi->netdev); + netif_carrier_on(vsi->netdev); + } + + ice_service_task_schedule(pf); + + return err; +} + +/** + * ice_down - Shutdown the connection + * @vsi: The VSI being stopped + */ +static int ice_down(struct ice_vsi *vsi) +{ + int i, err; + + /* Caller of this function is expected to set the + * vsi->state __ICE_DOWN bit + */ + if (vsi->netdev) { + netif_carrier_off(vsi->netdev); + netif_tx_disable(vsi->netdev); + } + + ice_vsi_dis_irq(vsi); + err = ice_vsi_stop_tx_rx_rings(vsi); + + ice_for_each_txq(vsi, i) + ice_clean_tx_ring(vsi->tx_rings[i]); + + ice_for_each_rxq(vsi, i) + ice_clean_rx_ring(vsi->rx_rings[i]); + + if (err) + netdev_err(vsi->netdev, "Failed to close VSI 0x%04X on switch 0x%04X\n", + vsi->vsi_num, vsi->vsw->sw_id); + return err; +} + +/** + * ice_vsi_setup_tx_rings - Allocate VSI Tx queue resources + * @vsi: VSI having resources allocated + * + * Return 0 on success, negative on failure + */ +static int ice_vsi_setup_tx_rings(struct ice_vsi *vsi) +{ + int i, err; + + if (!vsi->num_txq) { + dev_err(&vsi->back->pdev->dev, "VSI %d has 0 Tx queues\n", + vsi->vsi_num); + return -EINVAL; + } + + ice_for_each_txq(vsi, i) { + err = ice_setup_tx_ring(vsi->tx_rings[i]); + if (err) + break; + } + + return err; +} + +/** + * ice_vsi_setup_rx_rings - Allocate VSI Rx queue resources + * @vsi: VSI having resources allocated + * + * Return 0 on success, negative on failure + */ +static int ice_vsi_setup_rx_rings(struct ice_vsi *vsi) +{ + int i, err; + + if (!vsi->num_rxq) { + dev_err(&vsi->back->pdev->dev, "VSI %d has 0 Rx queues\n", + vsi->vsi_num); + return -EINVAL; + } + + ice_for_each_rxq(vsi, i) { + err = ice_setup_rx_ring(vsi->rx_rings[i]); + if (err) + break; + } + + return err; +} + +/** + * ice_vsi_req_irq - Request IRQ from the OS + * @vsi: The VSI IRQ is being requested for + * @basename: name for the vector + * + * Return 0 on success and a negative value on error + */ +static int ice_vsi_req_irq(struct ice_vsi *vsi, char *basename) +{ + struct ice_pf *pf = vsi->back; + int err = -EINVAL; + + if (test_bit(ICE_FLAG_MSIX_ENA, pf->flags)) + err = ice_vsi_req_irq_msix(vsi, basename); + + return err; +} + +/** + * ice_vsi_free_tx_rings - Free Tx resources for VSI queues + * @vsi: the VSI having resources freed + */ +static void ice_vsi_free_tx_rings(struct ice_vsi *vsi) +{ + int i; + + if (!vsi->tx_rings) + return; + + ice_for_each_txq(vsi, i) + if (vsi->tx_rings[i] && vsi->tx_rings[i]->desc) + ice_free_tx_ring(vsi->tx_rings[i]); +} + +/** + * ice_vsi_free_rx_rings - Free Rx resources for VSI queues + * @vsi: the VSI having resources freed + */ +static void ice_vsi_free_rx_rings(struct ice_vsi *vsi) +{ + int i; + + if (!vsi->rx_rings) + return; + + ice_for_each_rxq(vsi, i) + if (vsi->rx_rings[i] && vsi->rx_rings[i]->desc) + ice_free_rx_ring(vsi->rx_rings[i]); +} + +/** + * ice_vsi_open - Called when a network interface is made active + * @vsi: the VSI to open + * + * Initialization of the VSI + * + * Returns 0 on success, negative value on error + */ +static int ice_vsi_open(struct ice_vsi *vsi) +{ + char int_name[ICE_INT_NAME_STR_LEN]; + struct ice_pf *pf = vsi->back; + int err; + + /* allocate descriptors */ + err = ice_vsi_setup_tx_rings(vsi); + if (err) + goto err_setup_tx; + + err = ice_vsi_setup_rx_rings(vsi); + if (err) + goto err_setup_rx; + + err = ice_vsi_cfg(vsi); + if (err) + goto err_setup_rx; + + snprintf(int_name, sizeof(int_name) - 1, "%s-%s", + dev_driver_string(&pf->pdev->dev), vsi->netdev->name); + err = ice_vsi_req_irq(vsi, int_name); + if (err) + goto err_setup_rx; + + /* Notify the stack of the actual queue counts. */ + err = netif_set_real_num_tx_queues(vsi->netdev, vsi->num_txq); + if (err) + goto err_set_qs; + + err = netif_set_real_num_rx_queues(vsi->netdev, vsi->num_rxq); + if (err) + goto err_set_qs; + + err = ice_up_complete(vsi); + if (err) + goto err_up_complete; + + return 0; + +err_up_complete: + ice_down(vsi); +err_set_qs: + ice_vsi_free_irq(vsi); +err_setup_rx: + ice_vsi_free_rx_rings(vsi); +err_setup_tx: + ice_vsi_free_tx_rings(vsi); + + return err; +} + +/** + * ice_vsi_close - Shut down a VSI + * @vsi: the VSI being shut down + */ +static void ice_vsi_close(struct ice_vsi *vsi) +{ + if (!test_and_set_bit(__ICE_DOWN, vsi->state)) + ice_down(vsi); + + ice_vsi_free_irq(vsi); + ice_vsi_free_tx_rings(vsi); + ice_vsi_free_rx_rings(vsi); +} + /** * ice_vsi_release - Delete a VSI and free its resources * @vsi: the VSI being removed @@ -2099,6 +3179,9 @@ static int ice_vsi_release(struct ice_vsi *vsi) vsi->netdev = NULL; } + ice_vsi_dis_irq(vsi); + ice_vsi_close(vsi); + /* reclaim interrupt vectors back to PF */ ice_free_res(vsi->back->irq_tracker, vsi->base_vector, vsi->idx); pf->num_avail_msix += vsi->num_q_vectors; @@ -2116,3 +3199,56 @@ static int ice_vsi_release(struct ice_vsi *vsi) return 0; } + +/** + * ice_open - Called when a network interface becomes active + * @netdev: network interface device structure + * + * The open entry point is called when a network interface is made + * active by the system (IFF_UP). At this point all resources needed + * for transmit and receive operations are allocated, the interrupt + * handler is registered with the OS, the netdev watchdog is enabled, + * and the stack is notified that the interface is ready. + * + * Returns 0 on success, negative value on failure + */ +static int ice_open(struct net_device *netdev) +{ + struct ice_netdev_priv *np = netdev_priv(netdev); + struct ice_vsi *vsi = np->vsi; + int err; + + netif_carrier_off(netdev); + + err = ice_vsi_open(vsi); + + if (err) + netdev_err(netdev, "Failed to open VSI 0x%04X on switch 0x%04X\n", + vsi->vsi_num, vsi->vsw->sw_id); + return err; +} + +/** + * ice_stop - Disables a network interface + * @netdev: network interface device structure + * + * The stop entry point is called when an interface is de-activated by the OS, + * and the netdevice enters the DOWN state. The hardware is still under the + * driver's control, but the netdev interface is disabled. + * + * Returns success only - not allowed to fail + */ +static int ice_stop(struct net_device *netdev) +{ + struct ice_netdev_priv *np = netdev_priv(netdev); + struct ice_vsi *vsi = np->vsi; + + ice_vsi_close(vsi); + + return 0; +} + +static const struct net_device_ops ice_netdev_ops = { + .ndo_open = ice_open, + .ndo_stop = ice_stop, +}; diff --git a/drivers/net/ethernet/intel/ice/ice_sched.c b/drivers/net/ethernet/intel/ice/ice_sched.c index 22039f9eb591..e50eebbf78e3 100644 --- a/drivers/net/ethernet/intel/ice/ice_sched.c +++ b/drivers/net/ethernet/intel/ice/ice_sched.c @@ -462,6 +462,18 @@ void ice_sched_cleanup_all(struct ice_hw *hw) hw->max_cgds = 0; } +/** + * ice_sched_get_qgrp_layer - get the current queue group layer number + * @hw: pointer to the hw struct + * + * This function returns the current queue group layer number + */ +static u8 ice_sched_get_qgrp_layer(struct ice_hw *hw) +{ + /* It's always total layers - 1, the array is 0 relative so -2 */ + return hw->num_tx_sched_layers - ICE_QGRP_LAYER_OFFSET; +} + /** * ice_rm_dflt_leaf_node - remove the default leaf node in the tree * @pi: port information structure @@ -666,3 +678,96 @@ enum ice_status ice_sched_query_res_alloc(struct ice_hw *hw) devm_kfree(ice_hw_to_dev(hw), buf); return status; } + +/** + * ice_sched_get_vsi_info_entry - Get the vsi entry list for given vsi_id + * @pi: port information structure + * @vsi_id: vsi id + * + * This function retrieves the vsi list for the given vsi id + */ +static struct ice_sched_vsi_info * +ice_sched_get_vsi_info_entry(struct ice_port_info *pi, u16 vsi_id) +{ + struct ice_sched_vsi_info *list_elem; + + if (!pi) + return NULL; + + list_for_each_entry(list_elem, &pi->vsi_info_list, list_entry) + if (list_elem->vsi_id == vsi_id) + return list_elem; + return NULL; +} + +/** + * ice_sched_find_node_in_subtree - Find node in part of base node subtree + * @hw: pointer to the hw struct + * @base: pointer to the base node + * @node: pointer to the node to search + * + * This function checks whether a given node is part of the base node + * subtree or not + */ +static bool +ice_sched_find_node_in_subtree(struct ice_hw *hw, struct ice_sched_node *base, + struct ice_sched_node *node) +{ + u8 i; + + for (i = 0; i < base->num_children; i++) { + struct ice_sched_node *child = base->children[i]; + + if (node == child) + return true; + if (child->tx_sched_layer > node->tx_sched_layer) + return false; + /* this recursion is intentional, and wouldn't + * go more than 8 calls + */ + if (ice_sched_find_node_in_subtree(hw, child, node)) + return true; + } + return false; +} + +/** + * ice_sched_get_free_qparent - Get a free lan or rdma q group node + * @pi: port information structure + * @vsi_id: vsi id + * @tc: branch number + * @owner: lan or rdma + * + * This function retrieves a free lan or rdma q group node + */ +struct ice_sched_node * +ice_sched_get_free_qparent(struct ice_port_info *pi, u16 vsi_id, u8 tc, + u8 owner) +{ + struct ice_sched_node *vsi_node, *qgrp_node = NULL; + struct ice_sched_vsi_info *list_elem; + u16 max_children; + u8 qgrp_layer; + + qgrp_layer = ice_sched_get_qgrp_layer(pi->hw); + max_children = le16_to_cpu(pi->hw->layer_info[qgrp_layer].max_children); + list_elem = ice_sched_get_vsi_info_entry(pi, vsi_id); + if (!list_elem) + goto lan_q_exit; + vsi_node = list_elem->vsi_node[tc]; + /* validate invalid VSI id */ + if (!vsi_node) + goto lan_q_exit; + /* get the first q group node from VSI sub-tree */ + qgrp_node = ice_sched_get_first_node(pi->hw, vsi_node, qgrp_layer); + while (qgrp_node) { + /* make sure the qgroup node is part of the VSI subtree */ + if (ice_sched_find_node_in_subtree(pi->hw, vsi_node, qgrp_node)) + if (qgrp_node->num_children < max_children && + qgrp_node->owner == owner) + break; + qgrp_node = qgrp_node->sibling; + } +lan_q_exit: + return qgrp_node; +} diff --git a/drivers/net/ethernet/intel/ice/ice_sched.h b/drivers/net/ethernet/intel/ice/ice_sched.h index 2926ee9c373e..639859a3d0f0 100644 --- a/drivers/net/ethernet/intel/ice/ice_sched.h +++ b/drivers/net/ethernet/intel/ice/ice_sched.h @@ -6,6 +6,8 @@ #include "ice_common.h" +#define ICE_QGRP_LAYER_OFFSET 2 + struct ice_sched_agg_vsi_info { struct list_head list_entry; DECLARE_BITMAP(tc_bitmap, ICE_MAX_TRAFFIC_CLASS); @@ -31,4 +33,7 @@ ice_sched_add_node(struct ice_port_info *pi, u8 layer, struct ice_aqc_txsched_elem_data *info); void ice_free_sched_node(struct ice_port_info *pi, struct ice_sched_node *node); struct ice_sched_node *ice_sched_get_tc_node(struct ice_port_info *pi, u8 tc); +struct ice_sched_node * +ice_sched_get_free_qparent(struct ice_port_info *pi, u16 vsi_id, u8 tc, + u8 owner); #endif /* _ICE_SCHED_H_ */ diff --git a/drivers/net/ethernet/intel/ice/ice_status.h b/drivers/net/ethernet/intel/ice/ice_status.h index 365dfb86dcb9..9a95c4ffd7d7 100644 --- a/drivers/net/ethernet/intel/ice/ice_status.h +++ b/drivers/net/ethernet/intel/ice/ice_status.h @@ -9,6 +9,7 @@ enum ice_status { ICE_ERR_PARAM = -1, ICE_ERR_NOT_IMPL = -2, ICE_ERR_NOT_READY = -3, + ICE_ERR_BAD_PTR = -5, ICE_ERR_INVAL_SIZE = -6, ICE_ERR_DEVICE_NOT_SUPPORTED = -8, ICE_ERR_RESET_FAILED = -9, @@ -18,6 +19,7 @@ enum ice_status { ICE_ERR_OUT_OF_RANGE = -13, ICE_ERR_ALREADY_EXISTS = -14, ICE_ERR_DOES_NOT_EXIST = -15, + ICE_ERR_MAX_LIMIT = -17, ICE_ERR_BUF_TOO_SHORT = -52, ICE_ERR_NVM_BLANK_MODE = -53, ICE_ERR_AQ_ERROR = -100, diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c new file mode 100644 index 000000000000..6190ea30ee01 --- /dev/null +++ b/drivers/net/ethernet/intel/ice/ice_txrx.c @@ -0,0 +1,361 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2018, Intel Corporation. */ + +/* The driver transmit and receive code */ + +#include +#include +#include "ice.h" + +/** + * ice_unmap_and_free_tx_buf - Release a Tx buffer + * @ring: the ring that owns the buffer + * @tx_buf: the buffer to free + */ +static void +ice_unmap_and_free_tx_buf(struct ice_ring *ring, struct ice_tx_buf *tx_buf) +{ + if (tx_buf->skb) { + dev_kfree_skb_any(tx_buf->skb); + if (dma_unmap_len(tx_buf, len)) + dma_unmap_single(ring->dev, + dma_unmap_addr(tx_buf, dma), + dma_unmap_len(tx_buf, len), + DMA_TO_DEVICE); + } else if (dma_unmap_len(tx_buf, len)) { + dma_unmap_page(ring->dev, + dma_unmap_addr(tx_buf, dma), + dma_unmap_len(tx_buf, len), + DMA_TO_DEVICE); + } + + tx_buf->next_to_watch = NULL; + tx_buf->skb = NULL; + dma_unmap_len_set(tx_buf, len, 0); + /* tx_buf must be completely set up in the transmit path */ +} + +static struct netdev_queue *txring_txq(const struct ice_ring *ring) +{ + return netdev_get_tx_queue(ring->netdev, ring->q_index); +} + +/** + * ice_clean_tx_ring - Free any empty Tx buffers + * @tx_ring: ring to be cleaned + */ +void ice_clean_tx_ring(struct ice_ring *tx_ring) +{ + unsigned long size; + u16 i; + + /* ring already cleared, nothing to do */ + if (!tx_ring->tx_buf) + return; + + /* Free all the Tx ring sk_bufss */ + for (i = 0; i < tx_ring->count; i++) + ice_unmap_and_free_tx_buf(tx_ring, &tx_ring->tx_buf[i]); + + size = sizeof(struct ice_tx_buf) * tx_ring->count; + memset(tx_ring->tx_buf, 0, size); + + /* Zero out the descriptor ring */ + memset(tx_ring->desc, 0, tx_ring->size); + + tx_ring->next_to_use = 0; + tx_ring->next_to_clean = 0; + + if (!tx_ring->netdev) + return; + + /* cleanup Tx queue statistics */ + netdev_tx_reset_queue(txring_txq(tx_ring)); +} + +/** + * ice_free_tx_ring - Free Tx resources per queue + * @tx_ring: Tx descriptor ring for a specific queue + * + * Free all transmit software resources + */ +void ice_free_tx_ring(struct ice_ring *tx_ring) +{ + ice_clean_tx_ring(tx_ring); + devm_kfree(tx_ring->dev, tx_ring->tx_buf); + tx_ring->tx_buf = NULL; + + if (tx_ring->desc) { + dmam_free_coherent(tx_ring->dev, tx_ring->size, + tx_ring->desc, tx_ring->dma); + tx_ring->desc = NULL; + } +} + +/** + * ice_setup_tx_ring - Allocate the Tx descriptors + * @tx_ring: the tx ring to set up + * + * Return 0 on success, negative on error + */ +int ice_setup_tx_ring(struct ice_ring *tx_ring) +{ + struct device *dev = tx_ring->dev; + int bi_size; + + if (!dev) + return -ENOMEM; + + /* warn if we are about to overwrite the pointer */ + WARN_ON(tx_ring->tx_buf); + bi_size = sizeof(struct ice_tx_buf) * tx_ring->count; + tx_ring->tx_buf = devm_kzalloc(dev, bi_size, GFP_KERNEL); + if (!tx_ring->tx_buf) + return -ENOMEM; + + /* round up to nearest 4K */ + tx_ring->size = tx_ring->count * sizeof(struct ice_tx_desc); + tx_ring->size = ALIGN(tx_ring->size, 4096); + tx_ring->desc = dmam_alloc_coherent(dev, tx_ring->size, &tx_ring->dma, + GFP_KERNEL); + if (!tx_ring->desc) { + dev_err(dev, "Unable to allocate memory for the Tx descriptor ring, size=%d\n", + tx_ring->size); + goto err; + } + + tx_ring->next_to_use = 0; + tx_ring->next_to_clean = 0; + return 0; + +err: + devm_kfree(dev, tx_ring->tx_buf); + tx_ring->tx_buf = NULL; + return -ENOMEM; +} + +/** + * ice_clean_rx_ring - Free Rx buffers + * @rx_ring: ring to be cleaned + */ +void ice_clean_rx_ring(struct ice_ring *rx_ring) +{ + struct device *dev = rx_ring->dev; + unsigned long size; + u16 i; + + /* ring already cleared, nothing to do */ + if (!rx_ring->rx_buf) + return; + + /* Free all the Rx ring sk_buffs */ + for (i = 0; i < rx_ring->count; i++) { + struct ice_rx_buf *rx_buf = &rx_ring->rx_buf[i]; + + if (rx_buf->skb) { + dev_kfree_skb(rx_buf->skb); + rx_buf->skb = NULL; + } + if (!rx_buf->page) + continue; + + dma_unmap_page(dev, rx_buf->dma, PAGE_SIZE, DMA_FROM_DEVICE); + __free_pages(rx_buf->page, 0); + + rx_buf->page = NULL; + rx_buf->page_offset = 0; + } + + size = sizeof(struct ice_rx_buf) * rx_ring->count; + memset(rx_ring->rx_buf, 0, size); + + /* Zero out the descriptor ring */ + memset(rx_ring->desc, 0, rx_ring->size); + + rx_ring->next_to_alloc = 0; + rx_ring->next_to_clean = 0; + rx_ring->next_to_use = 0; +} + +/** + * ice_free_rx_ring - Free Rx resources + * @rx_ring: ring to clean the resources from + * + * Free all receive software resources + */ +void ice_free_rx_ring(struct ice_ring *rx_ring) +{ + ice_clean_rx_ring(rx_ring); + devm_kfree(rx_ring->dev, rx_ring->rx_buf); + rx_ring->rx_buf = NULL; + + if (rx_ring->desc) { + dmam_free_coherent(rx_ring->dev, rx_ring->size, + rx_ring->desc, rx_ring->dma); + rx_ring->desc = NULL; + } +} + +/** + * ice_setup_rx_ring - Allocate the Rx descriptors + * @rx_ring: the rx ring to set up + * + * Return 0 on success, negative on error + */ +int ice_setup_rx_ring(struct ice_ring *rx_ring) +{ + struct device *dev = rx_ring->dev; + int bi_size; + + if (!dev) + return -ENOMEM; + + /* warn if we are about to overwrite the pointer */ + WARN_ON(rx_ring->rx_buf); + bi_size = sizeof(struct ice_rx_buf) * rx_ring->count; + rx_ring->rx_buf = devm_kzalloc(dev, bi_size, GFP_KERNEL); + if (!rx_ring->rx_buf) + return -ENOMEM; + + /* round up to nearest 4K */ + rx_ring->size = rx_ring->count * sizeof(union ice_32byte_rx_desc); + rx_ring->size = ALIGN(rx_ring->size, 4096); + rx_ring->desc = dmam_alloc_coherent(dev, rx_ring->size, &rx_ring->dma, + GFP_KERNEL); + if (!rx_ring->desc) { + dev_err(dev, "Unable to allocate memory for the Rx descriptor ring, size=%d\n", + rx_ring->size); + goto err; + } + + rx_ring->next_to_use = 0; + rx_ring->next_to_clean = 0; + return 0; + +err: + devm_kfree(dev, rx_ring->rx_buf); + rx_ring->rx_buf = NULL; + return -ENOMEM; +} + +/** + * ice_release_rx_desc - Store the new tail and head values + * @rx_ring: ring to bump + * @val: new head index + */ +static void ice_release_rx_desc(struct ice_ring *rx_ring, u32 val) +{ + rx_ring->next_to_use = val; + + /* update next to alloc since we have filled the ring */ + rx_ring->next_to_alloc = val; + + /* Force memory writes to complete before letting h/w + * know there are new descriptors to fetch. (Only + * applicable for weak-ordered memory model archs, + * such as IA-64). + */ + wmb(); + writel(val, rx_ring->tail); +} + +/** + * ice_alloc_mapped_page - recycle or make a new page + * @rx_ring: ring to use + * @bi: rx_buf struct to modify + * + * Returns true if the page was successfully allocated or + * reused. + */ +static bool ice_alloc_mapped_page(struct ice_ring *rx_ring, + struct ice_rx_buf *bi) +{ + struct page *page = bi->page; + dma_addr_t dma; + + /* since we are recycling buffers we should seldom need to alloc */ + if (likely(page)) + return true; + + /* alloc new page for storage */ + page = alloc_page(GFP_ATOMIC | __GFP_NOWARN); + if (unlikely(!page)) + return false; + + /* map page for use */ + dma = dma_map_page(rx_ring->dev, page, 0, PAGE_SIZE, DMA_FROM_DEVICE); + + /* if mapping failed free memory back to system since + * there isn't much point in holding memory we can't use + */ + if (dma_mapping_error(rx_ring->dev, dma)) { + __free_pages(page, 0); + return false; + } + + bi->dma = dma; + bi->page = page; + bi->page_offset = 0; + + return true; +} + +/** + * ice_alloc_rx_bufs - Replace used receive buffers + * @rx_ring: ring to place buffers on + * @cleaned_count: number of buffers to replace + * + * Returns false if all allocations were successful, true if any fail + */ +bool ice_alloc_rx_bufs(struct ice_ring *rx_ring, u16 cleaned_count) +{ + union ice_32b_rx_flex_desc *rx_desc; + u16 ntu = rx_ring->next_to_use; + struct ice_rx_buf *bi; + + /* do nothing if no valid netdev defined */ + if (!rx_ring->netdev || !cleaned_count) + return false; + + /* get the RX descriptor and buffer based on next_to_use */ + rx_desc = ICE_RX_DESC(rx_ring, ntu); + bi = &rx_ring->rx_buf[ntu]; + + do { + if (!ice_alloc_mapped_page(rx_ring, bi)) + goto no_bufs; + + /* Refresh the desc even if buffer_addrs didn't change + * because each write-back erases this info. + */ + rx_desc->read.pkt_addr = cpu_to_le64(bi->dma + bi->page_offset); + + rx_desc++; + bi++; + ntu++; + if (unlikely(ntu == rx_ring->count)) { + rx_desc = ICE_RX_DESC(rx_ring, 0); + bi = rx_ring->rx_buf; + ntu = 0; + } + + /* clear the status bits for the next_to_use descriptor */ + rx_desc->wb.status_error0 = 0; + + cleaned_count--; + } while (cleaned_count); + + if (rx_ring->next_to_use != ntu) + ice_release_rx_desc(rx_ring, ntu); + + return false; + +no_bufs: + if (rx_ring->next_to_use != ntu) + ice_release_rx_desc(rx_ring, ntu); + + /* make sure to come back via polling to try again after + * allocation failure + */ + return true; +} diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.h b/drivers/net/ethernet/intel/ice/ice_txrx.h index 2dd2232127a7..b7015cfad2d7 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx.h +++ b/drivers/net/ethernet/intel/ice/ice_txrx.h @@ -5,6 +5,30 @@ #define _ICE_TXRX_H_ #define ICE_DFLT_IRQ_WORK 256 +#define ICE_RXBUF_2048 2048 +#define ICE_MAX_CHAINED_RX_BUFS 5 +#define ICE_MAX_TXQ_PER_TXQG 128 + +#define ICE_DESC_UNUSED(R) \ + ((((R)->next_to_clean > (R)->next_to_use) ? 0 : (R)->count) + \ + (R)->next_to_clean - (R)->next_to_use - 1) + +struct ice_tx_buf { + struct ice_tx_desc *next_to_watch; + struct sk_buff *skb; + unsigned int bytecount; + unsigned short gso_segs; + u32 tx_flags; + DEFINE_DMA_UNMAP_ADDR(dma); + DEFINE_DMA_UNMAP_LEN(len); +}; + +struct ice_rx_buf { + struct sk_buff *skb; + dma_addr_t dma; + struct page *page; + unsigned int page_offset; +}; /* this enum matches hardware bits and is meant to be used by DYN_CTLN * registers and QINT registers or more generally anywhere in the manual @@ -18,33 +42,77 @@ enum ice_dyn_idx_t { ICE_ITR_NONE = 3 /* ITR_NONE must not be used as an index */ }; +/* Header split modes defined by DTYPE field of Rx RLAN context */ +enum ice_rx_dtype { + ICE_RX_DTYPE_NO_SPLIT = 0, + ICE_RX_DTYPE_HEADER_SPLIT = 1, + ICE_RX_DTYPE_SPLIT_ALWAYS = 2, +}; + /* indices into GLINT_ITR registers */ #define ICE_RX_ITR ICE_IDX_ITR0 +#define ICE_TX_ITR ICE_IDX_ITR1 #define ICE_ITR_DYNAMIC 0x8000 /* use top bit as a flag */ #define ICE_ITR_8K 0x003E /* apply ITR HW granularity translation to program the HW registers */ #define ITR_TO_REG(val, itr_gran) (((val) & ~ICE_ITR_DYNAMIC) >> (itr_gran)) +/* Legacy or Advanced Mode Queue */ +#define ICE_TX_ADVANCED 0 +#define ICE_TX_LEGACY 1 + /* descriptor ring, associated with a VSI */ struct ice_ring { struct ice_ring *next; /* pointer to next ring in q_vector */ + void *desc; /* Descriptor ring memory */ struct device *dev; /* Used for DMA mapping */ struct net_device *netdev; /* netdev ring maps to */ struct ice_vsi *vsi; /* Backreference to associated VSI */ struct ice_q_vector *q_vector; /* Backreference to associated vector */ + u8 __iomem *tail; + union { + struct ice_tx_buf *tx_buf; + struct ice_rx_buf *rx_buf; + }; u16 q_index; /* Queue number of ring */ + u32 txq_teid; /* Added Tx queue TEID */ + + /* high bit set means dynamic, use accessor routines to read/write. + * hardware supports 2us/1us resolution for the ITR registers. + * these values always store the USER setting, and must be converted + * before programming to a register. + */ + u16 rx_itr_setting; + u16 tx_itr_setting; + u16 count; /* Number of descriptors */ u16 reg_idx; /* HW register index of the ring */ + + /* used in interrupt processing */ + u16 next_to_use; + u16 next_to_clean; + bool ring_active; /* is ring online or not */ + unsigned int size; /* length of descriptor ring in bytes */ + dma_addr_t dma; /* physical address of ring */ struct rcu_head rcu; /* to avoid race on free */ + u16 next_to_alloc; } ____cacheline_internodealigned_in_smp; +enum ice_latency_range { + ICE_LOWEST_LATENCY = 0, + ICE_LOW_LATENCY = 1, + ICE_BULK_LATENCY = 2, + ICE_ULTRA_LATENCY = 3, +}; + struct ice_ring_container { /* array of pointers to rings */ struct ice_ring *ring; unsigned int total_bytes; /* total bytes processed this int */ unsigned int total_pkts; /* total packets processed this int */ + enum ice_latency_range latency_range; u16 itr; }; @@ -52,4 +120,11 @@ struct ice_ring_container { #define ice_for_each_ring(pos, head) \ for (pos = (head).ring; pos; pos = pos->next) +bool ice_alloc_rx_bufs(struct ice_ring *rxr, u16 cleaned_count); +void ice_clean_tx_ring(struct ice_ring *tx_ring); +void ice_clean_rx_ring(struct ice_ring *rx_ring); +int ice_setup_tx_ring(struct ice_ring *tx_ring); +int ice_setup_rx_ring(struct ice_ring *rx_ring); +void ice_free_tx_ring(struct ice_ring *tx_ring); +void ice_free_rx_ring(struct ice_ring *rx_ring); #endif /* _ICE_TXRX_H_ */ diff --git a/drivers/net/ethernet/intel/ice/ice_type.h b/drivers/net/ethernet/intel/ice/ice_type.h index 8926715b76ee..991ac56ca7b3 100644 --- a/drivers/net/ethernet/intel/ice/ice_type.h +++ b/drivers/net/ethernet/intel/ice/ice_type.h @@ -8,9 +8,11 @@ #include "ice_hw_autogen.h" #include "ice_osdep.h" #include "ice_controlq.h" +#include "ice_lan_tx_rx.h" /* debug masks - set these bits in hw->debug_mask to control output */ #define ICE_DBG_INIT BIT_ULL(1) +#define ICE_DBG_QCTX BIT_ULL(6) #define ICE_DBG_NVM BIT_ULL(7) #define ICE_DBG_LAN BIT_ULL(8) #define ICE_DBG_SW BIT_ULL(13) From 2b245cb29421abbad508e93cdfedf81adc12edf1 Mon Sep 17 00:00:00 2001 From: Anirudh Venkataramanan Date: Tue, 20 Mar 2018 07:58:14 -0700 Subject: [PATCH 10/15] ice: Implement transmit and NAPI support This patch implements ice_start_xmit (the handler for ndo_start_xmit) and related functions. ice_start_xmit ultimately calls ice_tx_map, where the Tx descriptor is built and posted to the hardware by bumping the ring tail. This patch also implements ice_napi_poll, which is invoked when there's an interrupt on the VSI's queues. The interrupt can be due to either a completed Tx or an Rx event. In case of a completed Tx/Rx event, resources are reclaimed. Additionally, in case of an Rx event, the skb is fetched and passed up to the network stack. Signed-off-by: Anirudh Venkataramanan Tested-by: Tony Brelinski Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/ice/ice.h | 1 + .../net/ethernet/intel/ice/ice_lan_tx_rx.h | 46 + drivers/net/ethernet/intel/ice/ice_main.c | 55 + drivers/net/ethernet/intel/ice/ice_txrx.c | 1026 ++++++++++++++++- drivers/net/ethernet/intel/ice/ice_txrx.h | 45 + 5 files changed, 1171 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h index 46cf82212ed8..6f01395a7f34 100644 --- a/drivers/net/ethernet/intel/ice/ice.h +++ b/drivers/net/ethernet/intel/ice/ice.h @@ -60,6 +60,7 @@ (((val) << ICE_AQ_VSI_UP_TABLE_UP##i##_S) & \ ICE_AQ_VSI_UP_TABLE_UP##i##_M) +#define ICE_TX_DESC(R, i) (&(((struct ice_tx_desc *)((R)->desc))[i])) #define ICE_RX_DESC(R, i) (&(((union ice_32b_rx_flex_desc *)((R)->desc))[i])) #define ice_for_each_txq(vsi, i) \ diff --git a/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h b/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h index 3c1aeb74d950..e1221b9839d5 100644 --- a/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h +++ b/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h @@ -131,6 +131,33 @@ enum ice_rx_flg64_bits { ICE_RXFLG_RSVD = 63 }; +/* for ice_32byte_rx_flex_desc.ptype_flexi_flags0 member */ +#define ICE_RX_FLEX_DESC_PTYPE_M (0x3FF) /* 10-bits */ + +/* for ice_32byte_rx_flex_desc.pkt_length member */ +#define ICE_RX_FLX_DESC_PKT_LEN_M (0x3FFF) /* 14-bits */ + +enum ice_rx_flex_desc_status_error_0_bits { + /* Note: These are predefined bit offsets */ + ICE_RX_FLEX_DESC_STATUS0_DD_S = 0, + ICE_RX_FLEX_DESC_STATUS0_EOF_S, + ICE_RX_FLEX_DESC_STATUS0_HBO_S, + ICE_RX_FLEX_DESC_STATUS0_L3L4P_S, + ICE_RX_FLEX_DESC_STATUS0_XSUM_IPE_S, + ICE_RX_FLEX_DESC_STATUS0_XSUM_L4E_S, + ICE_RX_FLEX_DESC_STATUS0_XSUM_EIPE_S, + ICE_RX_FLEX_DESC_STATUS0_XSUM_EUDPE_S, + ICE_RX_FLEX_DESC_STATUS0_LPBK_S, + ICE_RX_FLEX_DESC_STATUS0_IPV6EXADD_S, + ICE_RX_FLEX_DESC_STATUS0_RXE_S, + ICE_RX_FLEX_DESC_STATUS0_CRCP_S, + ICE_RX_FLEX_DESC_STATUS0_RSS_VALID_S, + ICE_RX_FLEX_DESC_STATUS0_L2TAG1P_S, + ICE_RX_FLEX_DESC_STATUS0_XTRMD0_VALID_S, + ICE_RX_FLEX_DESC_STATUS0_XTRMD1_VALID_S, + ICE_RX_FLEX_DESC_STATUS0_LAST /* this entry must be last!!! */ +}; + #define ICE_RXQ_CTX_SIZE_DWORDS 8 #define ICE_RXQ_CTX_SZ (ICE_RXQ_CTX_SIZE_DWORDS * sizeof(u32)) @@ -201,6 +228,25 @@ struct ice_tx_desc { __le64 cmd_type_offset_bsz; }; +enum ice_tx_desc_dtype_value { + ICE_TX_DESC_DTYPE_DATA = 0x0, + ICE_TX_DESC_DTYPE_CTX = 0x1, + /* DESC_DONE - HW has completed write-back of descriptor */ + ICE_TX_DESC_DTYPE_DESC_DONE = 0xF, +}; + +#define ICE_TXD_QW1_CMD_S 4 +#define ICE_TXD_QW1_CMD_M (0xFFFUL << ICE_TXD_QW1_CMD_S) + +enum ice_tx_desc_cmd_bits { + ICE_TX_DESC_CMD_EOP = 0x0001, + ICE_TX_DESC_CMD_RS = 0x0002, +}; + +#define ICE_TXD_QW1_OFFSET_S 16 +#define ICE_TXD_QW1_TX_BUF_SZ_S 34 +#define ICE_TXD_QW1_L2TAG1_S 48 + #define ICE_LAN_TXQ_MAX_QGRPS 127 #define ICE_LAN_TXQ_MAX_QDIS 1023 diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index 58e9eb40f64a..dd07bf862afe 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -1258,6 +1258,23 @@ static int ice_vsi_alloc_arrays(struct ice_vsi *vsi, bool alloc_qvectors) return -ENOMEM; } +/** + * ice_msix_clean_rings - MSIX mode Interrupt Handler + * @irq: interrupt number + * @data: pointer to a q_vector + */ +static irqreturn_t ice_msix_clean_rings(int __always_unused irq, void *data) +{ + struct ice_q_vector *q_vector = (struct ice_q_vector *)data; + + if (!q_vector->tx.ring && !q_vector->rx.ring) + return IRQ_HANDLED; + + napi_schedule(&q_vector->napi); + + return IRQ_HANDLED; +} + /** * ice_vsi_alloc - Allocates the next available struct vsi in the PF * @pf: board private structure @@ -1298,6 +1315,8 @@ static struct ice_vsi *ice_vsi_alloc(struct ice_pf *pf, enum ice_vsi_type type) if (ice_vsi_alloc_arrays(vsi, true)) goto err_rings; + /* Setup default MSIX irq handler for VSI */ + vsi->irq_handler = ice_msix_clean_rings; break; default: dev_warn(&pf->pdev->dev, "Unknown VSI type %d\n", vsi->type); @@ -1741,6 +1760,9 @@ static int ice_vsi_alloc_q_vector(struct ice_vsi *vsi, int v_idx) if (cpu_online(v_idx)) cpumask_set_cpu(v_idx, &q_vector->affinity_mask); + if (vsi->netdev) + netif_napi_add(vsi->netdev, &q_vector->napi, ice_napi_poll, + NAPI_POLL_WEIGHT); /* tie q_vector and vsi together */ vsi->q_vectors[v_idx] = q_vector; @@ -2914,6 +2936,21 @@ static int ice_vsi_stop_tx_rx_rings(struct ice_vsi *vsi) return 0; } +/** + * ice_napi_enable_all - Enable NAPI for all q_vectors in the VSI + * @vsi: the VSI being configured + */ +static void ice_napi_enable_all(struct ice_vsi *vsi) +{ + int q_idx; + + if (!vsi->netdev) + return; + + for (q_idx = 0; q_idx < vsi->num_q_vectors; q_idx++) + napi_enable(&vsi->q_vectors[q_idx]->napi); +} + /** * ice_up_complete - Finish the last steps of bringing up a connection * @vsi: The VSI being configured @@ -2939,6 +2976,7 @@ static int ice_up_complete(struct ice_vsi *vsi) return err; clear_bit(__ICE_DOWN, vsi->state); + ice_napi_enable_all(vsi); ice_vsi_ena_irq(vsi); if (vsi->port_info && @@ -2954,6 +2992,21 @@ static int ice_up_complete(struct ice_vsi *vsi) return err; } +/** + * ice_napi_disable_all - Disable NAPI for all q_vectors in the VSI + * @vsi: VSI having NAPI disabled + */ +static void ice_napi_disable_all(struct ice_vsi *vsi) +{ + int q_idx; + + if (!vsi->netdev) + return; + + for (q_idx = 0; q_idx < vsi->num_q_vectors; q_idx++) + napi_disable(&vsi->q_vectors[q_idx]->napi); +} + /** * ice_down - Shutdown the connection * @vsi: The VSI being stopped @@ -2972,6 +3025,7 @@ static int ice_down(struct ice_vsi *vsi) ice_vsi_dis_irq(vsi); err = ice_vsi_stop_tx_rx_rings(vsi); + ice_napi_disable_all(vsi); ice_for_each_txq(vsi, i) ice_clean_tx_ring(vsi->tx_rings[i]); @@ -3251,4 +3305,5 @@ static int ice_stop(struct net_device *netdev) static const struct net_device_ops ice_netdev_ops = { .ndo_open = ice_open, .ndo_stop = ice_stop, + .ndo_start_xmit = ice_start_xmit, }; diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c index 6190ea30ee01..1ccf8e69b85a 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx.c +++ b/drivers/net/ethernet/intel/ice/ice_txrx.c @@ -7,6 +7,8 @@ #include #include "ice.h" +#define ICE_RX_HDR_SIZE 256 + /** * ice_unmap_and_free_tx_buf - Release a Tx buffer * @ring: the ring that owns the buffer @@ -92,6 +94,129 @@ void ice_free_tx_ring(struct ice_ring *tx_ring) } } +/** + * ice_clean_tx_irq - Reclaim resources after transmit completes + * @vsi: the VSI we care about + * @tx_ring: Tx ring to clean + * @napi_budget: Used to determine if we are in netpoll + * + * Returns true if there's any budget left (e.g. the clean is finished) + */ +static bool ice_clean_tx_irq(struct ice_vsi *vsi, struct ice_ring *tx_ring, + int napi_budget) +{ + unsigned int total_bytes = 0, total_pkts = 0; + unsigned int budget = vsi->work_lmt; + s16 i = tx_ring->next_to_clean; + struct ice_tx_desc *tx_desc; + struct ice_tx_buf *tx_buf; + + tx_buf = &tx_ring->tx_buf[i]; + tx_desc = ICE_TX_DESC(tx_ring, i); + i -= tx_ring->count; + + do { + struct ice_tx_desc *eop_desc = tx_buf->next_to_watch; + + /* if next_to_watch is not set then there is no work pending */ + if (!eop_desc) + break; + + smp_rmb(); /* prevent any other reads prior to eop_desc */ + + /* if the descriptor isn't done, no work yet to do */ + if (!(eop_desc->cmd_type_offset_bsz & + cpu_to_le64(ICE_TX_DESC_DTYPE_DESC_DONE))) + break; + + /* clear next_to_watch to prevent false hangs */ + tx_buf->next_to_watch = NULL; + + /* update the statistics for this packet */ + total_bytes += tx_buf->bytecount; + total_pkts += tx_buf->gso_segs; + + /* free the skb */ + napi_consume_skb(tx_buf->skb, napi_budget); + + /* unmap skb header data */ + dma_unmap_single(tx_ring->dev, + dma_unmap_addr(tx_buf, dma), + dma_unmap_len(tx_buf, len), + DMA_TO_DEVICE); + + /* clear tx_buf data */ + tx_buf->skb = NULL; + dma_unmap_len_set(tx_buf, len, 0); + + /* unmap remaining buffers */ + while (tx_desc != eop_desc) { + tx_buf++; + tx_desc++; + i++; + if (unlikely(!i)) { + i -= tx_ring->count; + tx_buf = tx_ring->tx_buf; + tx_desc = ICE_TX_DESC(tx_ring, 0); + } + + /* unmap any remaining paged data */ + if (dma_unmap_len(tx_buf, len)) { + dma_unmap_page(tx_ring->dev, + dma_unmap_addr(tx_buf, dma), + dma_unmap_len(tx_buf, len), + DMA_TO_DEVICE); + dma_unmap_len_set(tx_buf, len, 0); + } + } + + /* move us one more past the eop_desc for start of next pkt */ + tx_buf++; + tx_desc++; + i++; + if (unlikely(!i)) { + i -= tx_ring->count; + tx_buf = tx_ring->tx_buf; + tx_desc = ICE_TX_DESC(tx_ring, 0); + } + + prefetch(tx_desc); + + /* update budget accounting */ + budget--; + } while (likely(budget)); + + i += tx_ring->count; + tx_ring->next_to_clean = i; + u64_stats_update_begin(&tx_ring->syncp); + tx_ring->stats.bytes += total_bytes; + tx_ring->stats.pkts += total_pkts; + u64_stats_update_end(&tx_ring->syncp); + tx_ring->q_vector->tx.total_bytes += total_bytes; + tx_ring->q_vector->tx.total_pkts += total_pkts; + + netdev_tx_completed_queue(txring_txq(tx_ring), total_pkts, + total_bytes); + +#define TX_WAKE_THRESHOLD ((s16)(DESC_NEEDED * 2)) + if (unlikely(total_pkts && netif_carrier_ok(tx_ring->netdev) && + (ICE_DESC_UNUSED(tx_ring) >= TX_WAKE_THRESHOLD))) { + /* Make sure that anybody stopping the queue after this + * sees the new next_to_clean. + */ + smp_mb(); + if (__netif_subqueue_stopped(tx_ring->netdev, + tx_ring->q_index) && + !test_bit(__ICE_DOWN, vsi->state)) { + netif_wake_subqueue(tx_ring->netdev, + tx_ring->q_index); + ++tx_ring->tx_stats.restart_q; + } + } + + return !!budget; +} + /** * ice_setup_tx_ring - Allocate the Tx descriptors * @tx_ring: the tx ring to set up @@ -274,13 +399,17 @@ static bool ice_alloc_mapped_page(struct ice_ring *rx_ring, dma_addr_t dma; /* since we are recycling buffers we should seldom need to alloc */ - if (likely(page)) + if (likely(page)) { + rx_ring->rx_stats.page_reuse_count++; return true; + } /* alloc new page for storage */ page = alloc_page(GFP_ATOMIC | __GFP_NOWARN); - if (unlikely(!page)) + if (unlikely(!page)) { + rx_ring->rx_stats.alloc_page_failed++; return false; + } /* map page for use */ dma = dma_map_page(rx_ring->dev, page, 0, PAGE_SIZE, DMA_FROM_DEVICE); @@ -290,6 +419,7 @@ static bool ice_alloc_mapped_page(struct ice_ring *rx_ring, */ if (dma_mapping_error(rx_ring->dev, dma)) { __free_pages(page, 0); + rx_ring->rx_stats.alloc_page_failed++; return false; } @@ -359,3 +489,895 @@ bool ice_alloc_rx_bufs(struct ice_ring *rx_ring, u16 cleaned_count) */ return true; } + +/** + * ice_page_is_reserved - check if reuse is possible + * @page: page struct to check + */ +static bool ice_page_is_reserved(struct page *page) +{ + return (page_to_nid(page) != numa_mem_id()) || page_is_pfmemalloc(page); +} + +/** + * ice_add_rx_frag - Add contents of Rx buffer to sk_buff + * @rx_buf: buffer containing page to add + * @rx_desc: descriptor containing length of buffer written by hardware + * @skb: sk_buf to place the data into + * + * This function will add the data contained in rx_buf->page to the skb. + * This is done either through a direct copy if the data in the buffer is + * less than the skb header size, otherwise it will just attach the page as + * a frag to the skb. + * + * The function will then update the page offset if necessary and return + * true if the buffer can be reused by the adapter. + */ +static bool ice_add_rx_frag(struct ice_rx_buf *rx_buf, + union ice_32b_rx_flex_desc *rx_desc, + struct sk_buff *skb) +{ +#if (PAGE_SIZE < 8192) + unsigned int truesize = ICE_RXBUF_2048; +#else + unsigned int last_offset = PAGE_SIZE - ICE_RXBUF_2048; + unsigned int truesize; +#endif /* PAGE_SIZE < 8192) */ + + struct page *page; + unsigned int size; + + size = le16_to_cpu(rx_desc->wb.pkt_len) & + ICE_RX_FLX_DESC_PKT_LEN_M; + + page = rx_buf->page; + +#if (PAGE_SIZE >= 8192) + truesize = ALIGN(size, L1_CACHE_BYTES); +#endif /* PAGE_SIZE >= 8192) */ + + /* will the data fit in the skb we allocated? if so, just + * copy it as it is pretty small anyway + */ + if (size <= ICE_RX_HDR_SIZE && !skb_is_nonlinear(skb)) { + unsigned char *va = page_address(page) + rx_buf->page_offset; + + memcpy(__skb_put(skb, size), va, ALIGN(size, sizeof(long))); + + /* page is not reserved, we can reuse buffer as-is */ + if (likely(!ice_page_is_reserved(page))) + return true; + + /* this page cannot be reused so discard it */ + __free_pages(page, 0); + return false; + } + + skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, page, + rx_buf->page_offset, size, truesize); + + /* avoid re-using remote pages */ + if (unlikely(ice_page_is_reserved(page))) + return false; + +#if (PAGE_SIZE < 8192) + /* if we are only owner of page we can reuse it */ + if (unlikely(page_count(page) != 1)) + return false; + + /* flip page offset to other buffer */ + rx_buf->page_offset ^= truesize; +#else + /* move offset up to the next cache line */ + rx_buf->page_offset += truesize; + + if (rx_buf->page_offset > last_offset) + return false; +#endif /* PAGE_SIZE < 8192) */ + + /* Even if we own the page, we are not allowed to use atomic_set() + * This would break get_page_unless_zero() users. + */ + get_page(rx_buf->page); + + return true; +} + +/** + * ice_reuse_rx_page - page flip buffer and store it back on the ring + * @rx_ring: rx descriptor ring to store buffers on + * @old_buf: donor buffer to have page reused + * + * Synchronizes page for reuse by the adapter + */ +static void ice_reuse_rx_page(struct ice_ring *rx_ring, + struct ice_rx_buf *old_buf) +{ + u16 nta = rx_ring->next_to_alloc; + struct ice_rx_buf *new_buf; + + new_buf = &rx_ring->rx_buf[nta]; + + /* update, and store next to alloc */ + nta++; + rx_ring->next_to_alloc = (nta < rx_ring->count) ? nta : 0; + + /* transfer page from old buffer to new buffer */ + *new_buf = *old_buf; +} + +/** + * ice_fetch_rx_buf - Allocate skb and populate it + * @rx_ring: rx descriptor ring to transact packets on + * @rx_desc: descriptor containing info written by hardware + * + * This function allocates an skb on the fly, and populates it with the page + * data from the current receive descriptor, taking care to set up the skb + * correctly, as well as handling calling the page recycle function if + * necessary. + */ +static struct sk_buff *ice_fetch_rx_buf(struct ice_ring *rx_ring, + union ice_32b_rx_flex_desc *rx_desc) +{ + struct ice_rx_buf *rx_buf; + struct sk_buff *skb; + struct page *page; + + rx_buf = &rx_ring->rx_buf[rx_ring->next_to_clean]; + page = rx_buf->page; + prefetchw(page); + + skb = rx_buf->skb; + + if (likely(!skb)) { + u8 *page_addr = page_address(page) + rx_buf->page_offset; + + /* prefetch first cache line of first page */ + prefetch(page_addr); +#if L1_CACHE_BYTES < 128 + prefetch((void *)(page_addr + L1_CACHE_BYTES)); +#endif /* L1_CACHE_BYTES */ + + /* allocate a skb to store the frags */ + skb = __napi_alloc_skb(&rx_ring->q_vector->napi, + ICE_RX_HDR_SIZE, + GFP_ATOMIC | __GFP_NOWARN); + if (unlikely(!skb)) { + rx_ring->rx_stats.alloc_buf_failed++; + return NULL; + } + + /* we will be copying header into skb->data in + * pskb_may_pull so it is in our interest to prefetch + * it now to avoid a possible cache miss + */ + prefetchw(skb->data); + + skb_record_rx_queue(skb, rx_ring->q_index); + } else { + /* we are reusing so sync this buffer for CPU use */ + dma_sync_single_range_for_cpu(rx_ring->dev, rx_buf->dma, + rx_buf->page_offset, + ICE_RXBUF_2048, + DMA_FROM_DEVICE); + + rx_buf->skb = NULL; + } + + /* pull page into skb */ + if (ice_add_rx_frag(rx_buf, rx_desc, skb)) { + /* hand second half of page back to the ring */ + ice_reuse_rx_page(rx_ring, rx_buf); + rx_ring->rx_stats.page_reuse_count++; + } else { + /* we are not reusing the buffer so unmap it */ + dma_unmap_page(rx_ring->dev, rx_buf->dma, PAGE_SIZE, + DMA_FROM_DEVICE); + } + + /* clear contents of buffer_info */ + rx_buf->page = NULL; + + return skb; +} + +/** + * ice_pull_tail - ice specific version of skb_pull_tail + * @skb: pointer to current skb being adjusted + * + * This function is an ice specific version of __pskb_pull_tail. The + * main difference between this version and the original function is that + * this function can make several assumptions about the state of things + * that allow for significant optimizations versus the standard function. + * As a result we can do things like drop a frag and maintain an accurate + * truesize for the skb. + */ +static void ice_pull_tail(struct sk_buff *skb) +{ + struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[0]; + unsigned int pull_len; + unsigned char *va; + + /* it is valid to use page_address instead of kmap since we are + * working with pages allocated out of the lomem pool per + * alloc_page(GFP_ATOMIC) + */ + va = skb_frag_address(frag); + + /* we need the header to contain the greater of either ETH_HLEN or + * 60 bytes if the skb->len is less than 60 for skb_pad. + */ + pull_len = eth_get_headlen(va, ICE_RX_HDR_SIZE); + + /* align pull length to size of long to optimize memcpy performance */ + skb_copy_to_linear_data(skb, va, ALIGN(pull_len, sizeof(long))); + + /* update all of the pointers */ + skb_frag_size_sub(frag, pull_len); + frag->page_offset += pull_len; + skb->data_len -= pull_len; + skb->tail += pull_len; +} + +/** + * ice_cleanup_headers - Correct empty headers + * @skb: pointer to current skb being fixed + * + * Also address the case where we are pulling data in on pages only + * and as such no data is present in the skb header. + * + * In addition if skb is not at least 60 bytes we need to pad it so that + * it is large enough to qualify as a valid Ethernet frame. + * + * Returns true if an error was encountered and skb was freed. + */ +static bool ice_cleanup_headers(struct sk_buff *skb) +{ + /* place header in linear portion of buffer */ + if (skb_is_nonlinear(skb)) + ice_pull_tail(skb); + + /* if eth_skb_pad returns an error the skb was freed */ + if (eth_skb_pad(skb)) + return true; + + return false; +} + +/** + * ice_test_staterr - tests bits in Rx descriptor status and error fields + * @rx_desc: pointer to receive descriptor (in le64 format) + * @stat_err_bits: value to mask + * + * This function does some fast chicanery in order to return the + * value of the mask which is really only used for boolean tests. + * The status_error_len doesn't need to be shifted because it begins + * at offset zero. + */ +static bool ice_test_staterr(union ice_32b_rx_flex_desc *rx_desc, + const u16 stat_err_bits) +{ + return !!(rx_desc->wb.status_error0 & + cpu_to_le16(stat_err_bits)); +} + +/** + * ice_is_non_eop - process handling of non-EOP buffers + * @rx_ring: Rx ring being processed + * @rx_desc: Rx descriptor for current buffer + * @skb: Current socket buffer containing buffer in progress + * + * This function updates next to clean. If the buffer is an EOP buffer + * this function exits returning false, otherwise it will place the + * sk_buff in the next buffer to be chained and return true indicating + * that this is in fact a non-EOP buffer. + */ +static bool ice_is_non_eop(struct ice_ring *rx_ring, + union ice_32b_rx_flex_desc *rx_desc, + struct sk_buff *skb) +{ + u32 ntc = rx_ring->next_to_clean + 1; + + /* fetch, update, and store next to clean */ + ntc = (ntc < rx_ring->count) ? ntc : 0; + rx_ring->next_to_clean = ntc; + + prefetch(ICE_RX_DESC(rx_ring, ntc)); + + /* if we are the last buffer then there is nothing else to do */ +#define ICE_RXD_EOF BIT(ICE_RX_FLEX_DESC_STATUS0_EOF_S) + if (likely(ice_test_staterr(rx_desc, ICE_RXD_EOF))) + return false; + + /* place skb in next buffer to be received */ + rx_ring->rx_buf[ntc].skb = skb; + rx_ring->rx_stats.non_eop_descs++; + + return true; +} + +/** + * ice_receive_skb - Send a completed packet up the stack + * @rx_ring: rx ring in play + * @skb: packet to send up + * @vlan_tag: vlan tag for packet + * + * This function sends the completed packet (via. skb) up the stack using + * gro receive functions (with/without vlan tag) + */ +static void ice_receive_skb(struct ice_ring *rx_ring, struct sk_buff *skb, + u16 vlan_tag) +{ + if ((rx_ring->netdev->features & NETIF_F_HW_VLAN_CTAG_RX) && + (vlan_tag & VLAN_VID_MASK)) { + __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vlan_tag); + } + napi_gro_receive(&rx_ring->q_vector->napi, skb); +} + +/** + * ice_clean_rx_irq - Clean completed descriptors from Rx ring - bounce buf + * @rx_ring: rx descriptor ring to transact packets on + * @budget: Total limit on number of packets to process + * + * This function provides a "bounce buffer" approach to Rx interrupt + * processing. The advantage to this is that on systems that have + * expensive overhead for IOMMU access this provides a means of avoiding + * it by maintaining the mapping of the page to the system. + * + * Returns amount of work completed + */ +static int ice_clean_rx_irq(struct ice_ring *rx_ring, int budget) +{ + unsigned int total_rx_bytes = 0, total_rx_pkts = 0; + u16 cleaned_count = ICE_DESC_UNUSED(rx_ring); + bool failure = false; + + /* start the loop to process RX packets bounded by 'budget' */ + while (likely(total_rx_pkts < (unsigned int)budget)) { + union ice_32b_rx_flex_desc *rx_desc; + struct sk_buff *skb; + u16 stat_err_bits; + u16 vlan_tag = 0; + + /* return some buffers to hardware, one at a time is too slow */ + if (cleaned_count >= ICE_RX_BUF_WRITE) { + failure = failure || + ice_alloc_rx_bufs(rx_ring, cleaned_count); + cleaned_count = 0; + } + + /* get the RX desc from RX ring based on 'next_to_clean' */ + rx_desc = ICE_RX_DESC(rx_ring, rx_ring->next_to_clean); + + /* status_error_len will always be zero for unused descriptors + * because it's cleared in cleanup, and overlaps with hdr_addr + * which is always zero because packet split isn't used, if the + * hardware wrote DD then it will be non-zero + */ + stat_err_bits = BIT(ICE_RX_FLEX_DESC_STATUS0_DD_S); + if (!ice_test_staterr(rx_desc, stat_err_bits)) + break; + + /* This memory barrier is needed to keep us from reading + * any other fields out of the rx_desc until we know the + * DD bit is set. + */ + dma_rmb(); + + /* allocate (if needed) and populate skb */ + skb = ice_fetch_rx_buf(rx_ring, rx_desc); + if (!skb) + break; + + cleaned_count++; + + /* skip if it is NOP desc */ + if (ice_is_non_eop(rx_ring, rx_desc, skb)) + continue; + + stat_err_bits = BIT(ICE_RX_FLEX_DESC_STATUS0_RXE_S); + if (unlikely(ice_test_staterr(rx_desc, stat_err_bits))) { + dev_kfree_skb_any(skb); + continue; + } + + stat_err_bits = BIT(ICE_RX_FLEX_DESC_STATUS0_L2TAG1P_S); + if (ice_test_staterr(rx_desc, stat_err_bits)) + vlan_tag = le16_to_cpu(rx_desc->wb.l2tag1); + + /* correct empty headers and pad skb if needed (to make valid + * ethernet frame + */ + if (ice_cleanup_headers(skb)) { + skb = NULL; + continue; + } + + /* probably a little skewed due to removing CRC */ + total_rx_bytes += skb->len; + + /* send completed skb up the stack */ + ice_receive_skb(rx_ring, skb, vlan_tag); + + /* update budget accounting */ + total_rx_pkts++; + } + + /* update queue and vector specific stats */ + u64_stats_update_begin(&rx_ring->syncp); + rx_ring->stats.pkts += total_rx_pkts; + rx_ring->stats.bytes += total_rx_bytes; + u64_stats_update_end(&rx_ring->syncp); + rx_ring->q_vector->rx.total_pkts += total_rx_pkts; + rx_ring->q_vector->rx.total_bytes += total_rx_bytes; + + /* guarantee a trip back through this routine if there was a failure */ + return failure ? budget : (int)total_rx_pkts; +} + +/** + * ice_napi_poll - NAPI polling Rx/Tx cleanup routine + * @napi: napi struct with our devices info in it + * @budget: amount of work driver is allowed to do this pass, in packets + * + * This function will clean all queues associated with a q_vector. + * + * Returns the amount of work done + */ +int ice_napi_poll(struct napi_struct *napi, int budget) +{ + struct ice_q_vector *q_vector = + container_of(napi, struct ice_q_vector, napi); + struct ice_vsi *vsi = q_vector->vsi; + struct ice_pf *pf = vsi->back; + bool clean_complete = true; + int budget_per_ring = 0; + struct ice_ring *ring; + int work_done = 0; + + /* Since the actual Tx work is minimal, we can give the Tx a larger + * budget and be more aggressive about cleaning up the Tx descriptors. + */ + ice_for_each_ring(ring, q_vector->tx) + if (!ice_clean_tx_irq(vsi, ring, budget)) + clean_complete = false; + + /* Handle case where we are called by netpoll with a budget of 0 */ + if (budget <= 0) + return budget; + + /* We attempt to distribute budget to each Rx queue fairly, but don't + * allow the budget to go below 1 because that would exit polling early. + */ + if (q_vector->num_ring_rx) + budget_per_ring = max(budget / q_vector->num_ring_rx, 1); + + ice_for_each_ring(ring, q_vector->rx) { + int cleaned; + + cleaned = ice_clean_rx_irq(ring, budget_per_ring); + work_done += cleaned; + /* if we clean as many as budgeted, we must not be done */ + if (cleaned >= budget_per_ring) + clean_complete = false; + } + + /* If work not completed, return budget and polling will return */ + if (!clean_complete) + return budget; + + /* Work is done so exit the polling mode and re-enable the interrupt */ + napi_complete_done(napi, work_done); + if (test_bit(ICE_FLAG_MSIX_ENA, pf->flags)) + ice_irq_dynamic_ena(&vsi->back->hw, vsi, q_vector); + return 0; +} + +/* helper function for building cmd/type/offset */ +static __le64 +build_ctob(u64 td_cmd, u64 td_offset, unsigned int size, u64 td_tag) +{ + return cpu_to_le64(ICE_TX_DESC_DTYPE_DATA | + (td_cmd << ICE_TXD_QW1_CMD_S) | + (td_offset << ICE_TXD_QW1_OFFSET_S) | + ((u64)size << ICE_TXD_QW1_TX_BUF_SZ_S) | + (td_tag << ICE_TXD_QW1_L2TAG1_S)); +} + +/** + * __ice_maybe_stop_tx - 2nd level check for tx stop conditions + * @tx_ring: the ring to be checked + * @size: the size buffer we want to assure is available + * + * Returns -EBUSY if a stop is needed, else 0 + */ +static int __ice_maybe_stop_tx(struct ice_ring *tx_ring, unsigned int size) +{ + netif_stop_subqueue(tx_ring->netdev, tx_ring->q_index); + /* Memory barrier before checking head and tail */ + smp_mb(); + + /* Check again in a case another CPU has just made room available. */ + if (likely(ICE_DESC_UNUSED(tx_ring) < size)) + return -EBUSY; + + /* A reprieve! - use start_subqueue because it doesn't call schedule */ + netif_start_subqueue(tx_ring->netdev, tx_ring->q_index); + ++tx_ring->tx_stats.restart_q; + return 0; +} + +/** + * ice_maybe_stop_tx - 1st level check for tx stop conditions + * @tx_ring: the ring to be checked + * @size: the size buffer we want to assure is available + * + * Returns 0 if stop is not needed + */ +static int ice_maybe_stop_tx(struct ice_ring *tx_ring, unsigned int size) +{ + if (likely(ICE_DESC_UNUSED(tx_ring) >= size)) + return 0; + return __ice_maybe_stop_tx(tx_ring, size); +} + +/** + * ice_tx_map - Build the Tx descriptor + * @tx_ring: ring to send buffer on + * @first: first buffer info buffer to use + * + * This function loops over the skb data pointed to by *first + * and gets a physical address for each memory location and programs + * it and the length into the transmit descriptor. + */ +static void ice_tx_map(struct ice_ring *tx_ring, struct ice_tx_buf *first) +{ + u64 td_offset = 0, td_tag = 0, td_cmd = 0; + u16 i = tx_ring->next_to_use; + struct skb_frag_struct *frag; + unsigned int data_len, size; + struct ice_tx_desc *tx_desc; + struct ice_tx_buf *tx_buf; + struct sk_buff *skb; + dma_addr_t dma; + + skb = first->skb; + + data_len = skb->data_len; + size = skb_headlen(skb); + + tx_desc = ICE_TX_DESC(tx_ring, i); + + dma = dma_map_single(tx_ring->dev, skb->data, size, DMA_TO_DEVICE); + + tx_buf = first; + + for (frag = &skb_shinfo(skb)->frags[0];; frag++) { + unsigned int max_data = ICE_MAX_DATA_PER_TXD_ALIGNED; + + if (dma_mapping_error(tx_ring->dev, dma)) + goto dma_error; + + /* record length, and DMA address */ + dma_unmap_len_set(tx_buf, len, size); + dma_unmap_addr_set(tx_buf, dma, dma); + + /* align size to end of page */ + max_data += -dma & (ICE_MAX_READ_REQ_SIZE - 1); + tx_desc->buf_addr = cpu_to_le64(dma); + + /* account for data chunks larger than the hardware + * can handle + */ + while (unlikely(size > ICE_MAX_DATA_PER_TXD)) { + tx_desc->cmd_type_offset_bsz = + build_ctob(td_cmd, td_offset, max_data, td_tag); + + tx_desc++; + i++; + + if (i == tx_ring->count) { + tx_desc = ICE_TX_DESC(tx_ring, 0); + i = 0; + } + + dma += max_data; + size -= max_data; + + max_data = ICE_MAX_DATA_PER_TXD_ALIGNED; + tx_desc->buf_addr = cpu_to_le64(dma); + } + + if (likely(!data_len)) + break; + + tx_desc->cmd_type_offset_bsz = build_ctob(td_cmd, td_offset, + size, td_tag); + + tx_desc++; + i++; + + if (i == tx_ring->count) { + tx_desc = ICE_TX_DESC(tx_ring, 0); + i = 0; + } + + size = skb_frag_size(frag); + data_len -= size; + + dma = skb_frag_dma_map(tx_ring->dev, frag, 0, size, + DMA_TO_DEVICE); + + tx_buf = &tx_ring->tx_buf[i]; + } + + /* record bytecount for BQL */ + netdev_tx_sent_queue(txring_txq(tx_ring), first->bytecount); + + /* record SW timestamp if HW timestamp is not available */ + skb_tx_timestamp(first->skb); + + i++; + if (i == tx_ring->count) + i = 0; + + /* write last descriptor with RS and EOP bits */ + td_cmd |= (u64)(ICE_TX_DESC_CMD_EOP | ICE_TX_DESC_CMD_RS); + tx_desc->cmd_type_offset_bsz = + build_ctob(td_cmd, td_offset, size, td_tag); + + /* Force memory writes to complete before letting h/w know there + * are new descriptors to fetch. + * + * We also use this memory barrier to make certain all of the + * status bits have been updated before next_to_watch is written. + */ + wmb(); + + /* set next_to_watch value indicating a packet is present */ + first->next_to_watch = tx_desc; + + tx_ring->next_to_use = i; + + ice_maybe_stop_tx(tx_ring, DESC_NEEDED); + + /* notify HW of packet */ + if (netif_xmit_stopped(txring_txq(tx_ring)) || !skb->xmit_more) { + writel(i, tx_ring->tail); + + /* we need this if more than one processor can write to our tail + * at a time, it synchronizes IO on IA64/Altix systems + */ + mmiowb(); + } + + return; + +dma_error: + /* clear dma mappings for failed tx_buf map */ + for (;;) { + tx_buf = &tx_ring->tx_buf[i]; + ice_unmap_and_free_tx_buf(tx_ring, tx_buf); + if (tx_buf == first) + break; + if (i == 0) + i = tx_ring->count; + i--; + } + + tx_ring->next_to_use = i; +} + +/** + * ice_txd_use_count - estimate the number of descriptors needed for Tx + * @size: transmit request size in bytes + * + * Due to hardware alignment restrictions (4K alignment), we need to + * assume that we can have no more than 12K of data per descriptor, even + * though each descriptor can take up to 16K - 1 bytes of aligned memory. + * Thus, we need to divide by 12K. But division is slow! Instead, + * we decompose the operation into shifts and one relatively cheap + * multiply operation. + * + * To divide by 12K, we first divide by 4K, then divide by 3: + * To divide by 4K, shift right by 12 bits + * To divide by 3, multiply by 85, then divide by 256 + * (Divide by 256 is done by shifting right by 8 bits) + * Finally, we add one to round up. Because 256 isn't an exact multiple of + * 3, we'll underestimate near each multiple of 12K. This is actually more + * accurate as we have 4K - 1 of wiggle room that we can fit into the last + * segment. For our purposes this is accurate out to 1M which is orders of + * magnitude greater than our largest possible GSO size. + * + * This would then be implemented as: + * return (((size >> 12) * 85) >> 8) + 1; + * + * Since multiplication and division are commutative, we can reorder + * operations into: + * return ((size * 85) >> 20) + 1; + */ +static unsigned int ice_txd_use_count(unsigned int size) +{ + return ((size * 85) >> 20) + 1; +} + +/** + * ice_xmit_desc_count - calculate number of tx descriptors needed + * @skb: send buffer + * + * Returns number of data descriptors needed for this skb. + */ +static unsigned int ice_xmit_desc_count(struct sk_buff *skb) +{ + const struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[0]; + unsigned int nr_frags = skb_shinfo(skb)->nr_frags; + unsigned int count = 0, size = skb_headlen(skb); + + for (;;) { + count += ice_txd_use_count(size); + + if (!nr_frags--) + break; + + size = skb_frag_size(frag++); + } + + return count; +} + +/** + * __ice_chk_linearize - Check if there are more than 8 buffers per packet + * @skb: send buffer + * + * Note: This HW can't DMA more than 8 buffers to build a packet on the wire + * and so we need to figure out the cases where we need to linearize the skb. + * + * For TSO we need to count the TSO header and segment payload separately. + * As such we need to check cases where we have 7 fragments or more as we + * can potentially require 9 DMA transactions, 1 for the TSO header, 1 for + * the segment payload in the first descriptor, and another 7 for the + * fragments. + */ +static bool __ice_chk_linearize(struct sk_buff *skb) +{ + const struct skb_frag_struct *frag, *stale; + int nr_frags, sum; + + /* no need to check if number of frags is less than 7 */ + nr_frags = skb_shinfo(skb)->nr_frags; + if (nr_frags < (ICE_MAX_BUF_TXD - 1)) + return false; + + /* We need to walk through the list and validate that each group + * of 6 fragments totals at least gso_size. + */ + nr_frags -= ICE_MAX_BUF_TXD - 2; + frag = &skb_shinfo(skb)->frags[0]; + + /* Initialize size to the negative value of gso_size minus 1. We + * use this as the worst case scenerio in which the frag ahead + * of us only provides one byte which is why we are limited to 6 + * descriptors for a single transmit as the header and previous + * fragment are already consuming 2 descriptors. + */ + sum = 1 - skb_shinfo(skb)->gso_size; + + /* Add size of frags 0 through 4 to create our initial sum */ + sum += skb_frag_size(frag++); + sum += skb_frag_size(frag++); + sum += skb_frag_size(frag++); + sum += skb_frag_size(frag++); + sum += skb_frag_size(frag++); + + /* Walk through fragments adding latest fragment, testing it, and + * then removing stale fragments from the sum. + */ + stale = &skb_shinfo(skb)->frags[0]; + for (;;) { + sum += skb_frag_size(frag++); + + /* if sum is negative we failed to make sufficient progress */ + if (sum < 0) + return true; + + if (!nr_frags--) + break; + + sum -= skb_frag_size(stale++); + } + + return false; +} + +/** + * ice_chk_linearize - Check if there are more than 8 fragments per packet + * @skb: send buffer + * @count: number of buffers used + * + * Note: Our HW can't scatter-gather more than 8 fragments to build + * a packet on the wire and so we need to figure out the cases where we + * need to linearize the skb. + */ +static bool ice_chk_linearize(struct sk_buff *skb, unsigned int count) +{ + /* Both TSO and single send will work if count is less than 8 */ + if (likely(count < ICE_MAX_BUF_TXD)) + return false; + + if (skb_is_gso(skb)) + return __ice_chk_linearize(skb); + + /* we can support up to 8 data buffers for a single send */ + return count != ICE_MAX_BUF_TXD; +} + +/** + * ice_xmit_frame_ring - Sends buffer on Tx ring + * @skb: send buffer + * @tx_ring: ring to send buffer on + * + * Returns NETDEV_TX_OK if sent, else an error code + */ +static netdev_tx_t +ice_xmit_frame_ring(struct sk_buff *skb, struct ice_ring *tx_ring) +{ + struct ice_tx_buf *first; + unsigned int count; + + count = ice_xmit_desc_count(skb); + if (ice_chk_linearize(skb, count)) { + if (__skb_linearize(skb)) + goto out_drop; + count = ice_txd_use_count(skb->len); + tx_ring->tx_stats.tx_linearize++; + } + + /* need: 1 descriptor per page * PAGE_SIZE/ICE_MAX_DATA_PER_TXD, + * + 1 desc for skb_head_len/ICE_MAX_DATA_PER_TXD, + * + 4 desc gap to avoid the cache line where head is, + * + 1 desc for context descriptor, + * otherwise try next time + */ + if (ice_maybe_stop_tx(tx_ring, count + 4 + 1)) { + tx_ring->tx_stats.tx_busy++; + return NETDEV_TX_BUSY; + } + + /* record the location of the first descriptor for this packet */ + first = &tx_ring->tx_buf[tx_ring->next_to_use]; + first->skb = skb; + first->bytecount = max_t(unsigned int, skb->len, ETH_ZLEN); + first->gso_segs = 1; + + ice_tx_map(tx_ring, first); + return NETDEV_TX_OK; + +out_drop: + dev_kfree_skb_any(skb); + return NETDEV_TX_OK; +} + +/** + * ice_start_xmit - Selects the correct VSI and Tx queue to send buffer + * @skb: send buffer + * @netdev: network interface device structure + * + * Returns NETDEV_TX_OK if sent, else an error code + */ +netdev_tx_t ice_start_xmit(struct sk_buff *skb, struct net_device *netdev) +{ + struct ice_netdev_priv *np = netdev_priv(netdev); + struct ice_vsi *vsi = np->vsi; + struct ice_ring *tx_ring; + + tx_ring = vsi->tx_rings[skb->queue_mapping]; + + /* hardware can't handle really short frames, hardware padding works + * beyond this point + */ + if (skb_put_padto(skb, ICE_MIN_TX_LEN)) + return NETDEV_TX_OK; + + return ice_xmit_frame_ring(skb, tx_ring); +} diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.h b/drivers/net/ethernet/intel/ice/ice_txrx.h index b7015cfad2d7..8fa4450514b4 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx.h +++ b/drivers/net/ethernet/intel/ice/ice_txrx.h @@ -7,8 +7,23 @@ #define ICE_DFLT_IRQ_WORK 256 #define ICE_RXBUF_2048 2048 #define ICE_MAX_CHAINED_RX_BUFS 5 +#define ICE_MAX_BUF_TXD 8 +#define ICE_MIN_TX_LEN 17 + +/* The size limit for a transmit buffer in a descriptor is (16K - 1). + * In order to align with the read requests we will align the value to + * the nearest 4K which represents our maximum read request size. + */ +#define ICE_MAX_READ_REQ_SIZE 4096 +#define ICE_MAX_DATA_PER_TXD (16 * 1024 - 1) +#define ICE_MAX_DATA_PER_TXD_ALIGNED \ + (~(ICE_MAX_READ_REQ_SIZE - 1) & ICE_MAX_DATA_PER_TXD) + +#define ICE_RX_BUF_WRITE 16 /* Must be power of 2 */ #define ICE_MAX_TXQ_PER_TXQG 128 +/* Tx Descriptors needed, worst case */ +#define DESC_NEEDED (MAX_SKB_FRAGS + 4) #define ICE_DESC_UNUSED(R) \ ((((R)->next_to_clean > (R)->next_to_use) ? 0 : (R)->count) + \ (R)->next_to_clean - (R)->next_to_use - 1) @@ -30,6 +45,24 @@ struct ice_rx_buf { unsigned int page_offset; }; +struct ice_q_stats { + u64 pkts; + u64 bytes; +}; + +struct ice_txq_stats { + u64 restart_q; + u64 tx_busy; + u64 tx_linearize; +}; + +struct ice_rxq_stats { + u64 non_eop_descs; + u64 alloc_page_failed; + u64 alloc_buf_failed; + u64 page_reuse_count; +}; + /* this enum matches hardware bits and is meant to be used by DYN_CTLN * registers and QINT registers or more generally anywhere in the manual * mentioning ITR_INDX, ITR_NONE cannot be used as an index 'n' into any @@ -94,6 +127,15 @@ struct ice_ring { u16 next_to_clean; bool ring_active; /* is ring online or not */ + + /* stats structs */ + struct ice_q_stats stats; + struct u64_stats_sync syncp; + union { + struct ice_txq_stats tx_stats; + struct ice_rxq_stats rx_stats; + }; + unsigned int size; /* length of descriptor ring in bytes */ dma_addr_t dma; /* physical address of ring */ struct rcu_head rcu; /* to avoid race on free */ @@ -121,10 +163,13 @@ struct ice_ring_container { for (pos = (head).ring; pos; pos = pos->next) bool ice_alloc_rx_bufs(struct ice_ring *rxr, u16 cleaned_count); +netdev_tx_t ice_start_xmit(struct sk_buff *skb, struct net_device *netdev); void ice_clean_tx_ring(struct ice_ring *tx_ring); void ice_clean_rx_ring(struct ice_ring *rx_ring); int ice_setup_tx_ring(struct ice_ring *tx_ring); int ice_setup_rx_ring(struct ice_ring *rx_ring); void ice_free_tx_ring(struct ice_ring *tx_ring); void ice_free_rx_ring(struct ice_ring *rx_ring); +int ice_napi_poll(struct napi_struct *napi, int budget); + #endif /* _ICE_TXRX_H_ */ From d76a60ba7afb89523c88cf2ed3a044ce4180289e Mon Sep 17 00:00:00 2001 From: Anirudh Venkataramanan Date: Tue, 20 Mar 2018 07:58:15 -0700 Subject: [PATCH 11/15] ice: Add support for VLANs and offloads This patch adds support for VLANs. When a VLAN is created a switch filter is added to direct the VLAN traffic to the corresponding VSI. When a VLAN is deleted, the filter is deleted as well. This patch also adds support for the following hardware offloads. 1) VLAN tag insertion/stripping 2) Receive Side Scaling (RSS) 3) Tx checksum and TCP segmentation 4) Rx checksum Signed-off-by: Anirudh Venkataramanan Tested-by: Tony Brelinski Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/ice/ice.h | 19 + .../net/ethernet/intel/ice/ice_adminq_cmd.h | 62 ++ drivers/net/ethernet/intel/ice/ice_common.c | 188 ++++++ drivers/net/ethernet/intel/ice/ice_common.h | 13 + .../net/ethernet/intel/ice/ice_lan_tx_rx.h | 169 +++++ drivers/net/ethernet/intel/ice/ice_main.c | 601 +++++++++++++++++- drivers/net/ethernet/intel/ice/ice_switch.c | 169 +++++ drivers/net/ethernet/intel/ice/ice_switch.h | 4 + drivers/net/ethernet/intel/ice/ice_txrx.c | 405 +++++++++++- drivers/net/ethernet/intel/ice/ice_txrx.h | 17 + 10 files changed, 1631 insertions(+), 16 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h index 6f01395a7f34..51cc955c7220 100644 --- a/drivers/net/ethernet/intel/ice/ice.h +++ b/drivers/net/ethernet/intel/ice/ice.h @@ -23,7 +23,10 @@ #include #include #include +#include +#include #include +#include #include "ice_devids.h" #include "ice_type.h" #include "ice_txrx.h" @@ -47,6 +50,8 @@ #define ICE_MAX_SCATTER_RXQS 16 #define ICE_Q_WAIT_RETRY_LIMIT 10 #define ICE_Q_WAIT_MAX_RETRY (5 * ICE_Q_WAIT_RETRY_LIMIT) +#define ICE_MAX_LG_RSS_QS 256 +#define ICE_MAX_SMALL_RSS_QS 8 #define ICE_RES_VALID_BIT 0x8000 #define ICE_RES_MISC_VEC_ID (ICE_RES_VALID_BIT - 1) #define ICE_INVAL_Q_INDEX 0xffff @@ -62,6 +67,7 @@ #define ICE_TX_DESC(R, i) (&(((struct ice_tx_desc *)((R)->desc))[i])) #define ICE_RX_DESC(R, i) (&(((union ice_32b_rx_flex_desc *)((R)->desc))[i])) +#define ICE_TX_CTX_DESC(R, i) (&(((struct ice_tx_ctx_desc *)((R)->desc))[i])) #define ice_for_each_txq(vsi, i) \ for ((i) = 0; (i) < (vsi)->num_txq; (i)++) @@ -113,6 +119,7 @@ struct ice_vsi { irqreturn_t (*irq_handler)(int irq, void *data); DECLARE_BITMAP(state, __ICE_STATE_NBITS); + unsigned long active_vlans[BITS_TO_LONGS(VLAN_N_VID)]; int num_q_vectors; int base_vector; enum ice_vsi_type type; @@ -122,6 +129,13 @@ struct ice_vsi { /* Interrupt thresholds */ u16 work_lmt; + /* RSS config */ + u16 rss_table_size; /* HW RSS table size */ + u16 rss_size; /* Allocated RSS queues */ + u8 *rss_hkey_user; /* User configured hash keys */ + u8 *rss_lut_user; /* User configured lookup table entries */ + u8 rss_lut_type; /* used to configure Get/Set RSS LUT AQ call */ + u16 max_frame; u16 rx_buf_len; @@ -181,6 +195,7 @@ struct ice_pf { struct mutex avail_q_mutex; /* protects access to avail_[rx|tx]qs */ struct mutex sw_mutex; /* lock for protecting VSI alloc flow */ u32 msg_enable; + u32 hw_csum_rx_error; u32 oicr_idx; /* Other interrupt cause vector index */ u32 num_lan_msix; /* Total MSIX vectors for base driver */ u32 num_avail_msix; /* remaining MSIX vectors left unclaimed */ @@ -224,4 +239,8 @@ static inline void ice_irq_dynamic_ena(struct ice_hw *hw, struct ice_vsi *vsi, wr32(hw, GLINT_DYN_CTL(vector), val); } +int ice_set_rss(struct ice_vsi *vsi, u8 *seed, u8 *lut, u16 lut_size); +int ice_get_rss(struct ice_vsi *vsi, u8 *seed, u8 *lut, u16 lut_size); +void ice_fill_rss_lut(u8 *lut, u16 rss_table_size, u16 rss_size); + #endif /* _ICE_H_ */ diff --git a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h index c7abcc1dbbcc..051427dcfb3c 100644 --- a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h +++ b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h @@ -968,6 +968,60 @@ struct ice_aqc_nvm { __le32 addr_low; }; +/* Get/Set RSS key (indirect 0x0B04/0x0B02) */ +struct ice_aqc_get_set_rss_key { +#define ICE_AQC_GSET_RSS_KEY_VSI_VALID BIT(15) +#define ICE_AQC_GSET_RSS_KEY_VSI_ID_S 0 +#define ICE_AQC_GSET_RSS_KEY_VSI_ID_M (0x3FF << ICE_AQC_GSET_RSS_KEY_VSI_ID_S) + __le16 vsi_id; + u8 reserved[6]; + __le32 addr_high; + __le32 addr_low; +}; + +#define ICE_AQC_GET_SET_RSS_KEY_DATA_RSS_KEY_SIZE 0x28 +#define ICE_AQC_GET_SET_RSS_KEY_DATA_HASH_KEY_SIZE 0xC + +struct ice_aqc_get_set_rss_keys { + u8 standard_rss_key[ICE_AQC_GET_SET_RSS_KEY_DATA_RSS_KEY_SIZE]; + u8 extended_hash_key[ICE_AQC_GET_SET_RSS_KEY_DATA_HASH_KEY_SIZE]; +}; + +/* Get/Set RSS LUT (indirect 0x0B05/0x0B03) */ +struct ice_aqc_get_set_rss_lut { +#define ICE_AQC_GSET_RSS_LUT_VSI_VALID BIT(15) +#define ICE_AQC_GSET_RSS_LUT_VSI_ID_S 0 +#define ICE_AQC_GSET_RSS_LUT_VSI_ID_M (0x1FF << ICE_AQC_GSET_RSS_LUT_VSI_ID_S) + __le16 vsi_id; +#define ICE_AQC_GSET_RSS_LUT_TABLE_TYPE_S 0 +#define ICE_AQC_GSET_RSS_LUT_TABLE_TYPE_M \ + (0x3 << ICE_AQC_GSET_RSS_LUT_TABLE_TYPE_S) + +#define ICE_AQC_GSET_RSS_LUT_TABLE_TYPE_VSI 0 +#define ICE_AQC_GSET_RSS_LUT_TABLE_TYPE_PF 1 +#define ICE_AQC_GSET_RSS_LUT_TABLE_TYPE_GLOBAL 2 + +#define ICE_AQC_GSET_RSS_LUT_TABLE_SIZE_S 2 +#define ICE_AQC_GSET_RSS_LUT_TABLE_SIZE_M \ + (0x3 << ICE_AQC_GSET_RSS_LUT_TABLE_SIZE_S) + +#define ICE_AQC_GSET_RSS_LUT_TABLE_SIZE_128 128 +#define ICE_AQC_GSET_RSS_LUT_TABLE_SIZE_128_FLAG 0 +#define ICE_AQC_GSET_RSS_LUT_TABLE_SIZE_512 512 +#define ICE_AQC_GSET_RSS_LUT_TABLE_SIZE_512_FLAG 1 +#define ICE_AQC_GSET_RSS_LUT_TABLE_SIZE_2K 2048 +#define ICE_AQC_GSET_RSS_LUT_TABLE_SIZE_2K_FLAG 2 + +#define ICE_AQC_GSET_RSS_LUT_GLOBAL_IDX_S 4 +#define ICE_AQC_GSET_RSS_LUT_GLOBAL_IDX_M \ + (0xF << ICE_AQC_GSET_RSS_LUT_GLOBAL_IDX_S) + + __le16 flags; + __le32 reserved; + __le32 addr_high; + __le32 addr_low; +}; + /* Add TX LAN Queues (indirect 0x0C30) */ struct ice_aqc_add_txqs { u8 num_qgrps; @@ -1089,6 +1143,8 @@ struct ice_aq_desc { struct ice_aqc_query_txsched_res query_sched_res; struct ice_aqc_add_move_delete_elem add_move_delete_elem; struct ice_aqc_nvm nvm; + struct ice_aqc_get_set_rss_lut get_set_rss_lut; + struct ice_aqc_get_set_rss_key get_set_rss_key; struct ice_aqc_add_txqs add_txqs; struct ice_aqc_dis_txqs dis_txqs; struct ice_aqc_add_get_update_free_vsi vsi_cmd; @@ -1171,6 +1227,12 @@ enum ice_adminq_opc { /* NVM commands */ ice_aqc_opc_nvm_read = 0x0701, + /* RSS commands */ + ice_aqc_opc_set_rss_key = 0x0B02, + ice_aqc_opc_set_rss_lut = 0x0B03, + ice_aqc_opc_get_rss_key = 0x0B04, + ice_aqc_opc_get_rss_lut = 0x0B05, + /* TX queue handling commands/events */ ice_aqc_opc_add_txqs = 0x0C30, ice_aqc_opc_dis_txqs = 0x0C31, diff --git a/drivers/net/ethernet/intel/ice/ice_common.c b/drivers/net/ethernet/intel/ice/ice_common.c index 3d5686636656..b4e834359a95 100644 --- a/drivers/net/ethernet/intel/ice/ice_common.c +++ b/drivers/net/ethernet/intel/ice/ice_common.c @@ -1261,6 +1261,194 @@ void ice_clear_pxe_mode(struct ice_hw *hw) ice_aq_clear_pxe_mode(hw); } +/** + * __ice_aq_get_set_rss_lut + * @hw: pointer to the hardware structure + * @vsi_id: VSI FW index + * @lut_type: LUT table type + * @lut: pointer to the LUT buffer provided by the caller + * @lut_size: size of the LUT buffer + * @glob_lut_idx: global LUT index + * @set: set true to set the table, false to get the table + * + * Internal function to get (0x0B05) or set (0x0B03) RSS look up table + */ +static enum ice_status +__ice_aq_get_set_rss_lut(struct ice_hw *hw, u16 vsi_id, u8 lut_type, u8 *lut, + u16 lut_size, u8 glob_lut_idx, bool set) +{ + struct ice_aqc_get_set_rss_lut *cmd_resp; + struct ice_aq_desc desc; + enum ice_status status; + u16 flags = 0; + + cmd_resp = &desc.params.get_set_rss_lut; + + if (set) { + ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_set_rss_lut); + desc.flags |= cpu_to_le16(ICE_AQ_FLAG_RD); + } else { + ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_get_rss_lut); + } + + cmd_resp->vsi_id = cpu_to_le16(((vsi_id << + ICE_AQC_GSET_RSS_LUT_VSI_ID_S) & + ICE_AQC_GSET_RSS_LUT_VSI_ID_M) | + ICE_AQC_GSET_RSS_LUT_VSI_VALID); + + switch (lut_type) { + case ICE_AQC_GSET_RSS_LUT_TABLE_TYPE_VSI: + case ICE_AQC_GSET_RSS_LUT_TABLE_TYPE_PF: + case ICE_AQC_GSET_RSS_LUT_TABLE_TYPE_GLOBAL: + flags |= ((lut_type << ICE_AQC_GSET_RSS_LUT_TABLE_TYPE_S) & + ICE_AQC_GSET_RSS_LUT_TABLE_TYPE_M); + break; + default: + status = ICE_ERR_PARAM; + goto ice_aq_get_set_rss_lut_exit; + } + + if (lut_type == ICE_AQC_GSET_RSS_LUT_TABLE_TYPE_GLOBAL) { + flags |= ((glob_lut_idx << ICE_AQC_GSET_RSS_LUT_GLOBAL_IDX_S) & + ICE_AQC_GSET_RSS_LUT_GLOBAL_IDX_M); + + if (!set) + goto ice_aq_get_set_rss_lut_send; + } else if (lut_type == ICE_AQC_GSET_RSS_LUT_TABLE_TYPE_PF) { + if (!set) + goto ice_aq_get_set_rss_lut_send; + } else { + goto ice_aq_get_set_rss_lut_send; + } + + /* LUT size is only valid for Global and PF table types */ + if (lut_size == ICE_AQC_GSET_RSS_LUT_TABLE_SIZE_128) { + flags |= (ICE_AQC_GSET_RSS_LUT_TABLE_SIZE_128_FLAG << + ICE_AQC_GSET_RSS_LUT_TABLE_SIZE_S) & + ICE_AQC_GSET_RSS_LUT_TABLE_SIZE_M; + } else if (lut_size == ICE_AQC_GSET_RSS_LUT_TABLE_SIZE_512) { + flags |= (ICE_AQC_GSET_RSS_LUT_TABLE_SIZE_512_FLAG << + ICE_AQC_GSET_RSS_LUT_TABLE_SIZE_S) & + ICE_AQC_GSET_RSS_LUT_TABLE_SIZE_M; + } else if ((lut_size == ICE_AQC_GSET_RSS_LUT_TABLE_SIZE_2K) && + (lut_type == ICE_AQC_GSET_RSS_LUT_TABLE_TYPE_PF)) { + flags |= (ICE_AQC_GSET_RSS_LUT_TABLE_SIZE_2K_FLAG << + ICE_AQC_GSET_RSS_LUT_TABLE_SIZE_S) & + ICE_AQC_GSET_RSS_LUT_TABLE_SIZE_M; + } else { + status = ICE_ERR_PARAM; + goto ice_aq_get_set_rss_lut_exit; + } + +ice_aq_get_set_rss_lut_send: + cmd_resp->flags = cpu_to_le16(flags); + status = ice_aq_send_cmd(hw, &desc, lut, lut_size, NULL); + +ice_aq_get_set_rss_lut_exit: + return status; +} + +/** + * ice_aq_get_rss_lut + * @hw: pointer to the hardware structure + * @vsi_id: VSI FW index + * @lut_type: LUT table type + * @lut: pointer to the LUT buffer provided by the caller + * @lut_size: size of the LUT buffer + * + * get the RSS lookup table, PF or VSI type + */ +enum ice_status +ice_aq_get_rss_lut(struct ice_hw *hw, u16 vsi_id, u8 lut_type, u8 *lut, + u16 lut_size) +{ + return __ice_aq_get_set_rss_lut(hw, vsi_id, lut_type, lut, lut_size, 0, + false); +} + +/** + * ice_aq_set_rss_lut + * @hw: pointer to the hardware structure + * @vsi_id: VSI FW index + * @lut_type: LUT table type + * @lut: pointer to the LUT buffer provided by the caller + * @lut_size: size of the LUT buffer + * + * set the RSS lookup table, PF or VSI type + */ +enum ice_status +ice_aq_set_rss_lut(struct ice_hw *hw, u16 vsi_id, u8 lut_type, u8 *lut, + u16 lut_size) +{ + return __ice_aq_get_set_rss_lut(hw, vsi_id, lut_type, lut, lut_size, 0, + true); +} + +/** + * __ice_aq_get_set_rss_key + * @hw: pointer to the hw struct + * @vsi_id: VSI FW index + * @key: pointer to key info struct + * @set: set true to set the key, false to get the key + * + * get (0x0B04) or set (0x0B02) the RSS key per VSI + */ +static enum +ice_status __ice_aq_get_set_rss_key(struct ice_hw *hw, u16 vsi_id, + struct ice_aqc_get_set_rss_keys *key, + bool set) +{ + struct ice_aqc_get_set_rss_key *cmd_resp; + u16 key_size = sizeof(*key); + struct ice_aq_desc desc; + + cmd_resp = &desc.params.get_set_rss_key; + + if (set) { + ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_set_rss_key); + desc.flags |= cpu_to_le16(ICE_AQ_FLAG_RD); + } else { + ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_get_rss_key); + } + + cmd_resp->vsi_id = cpu_to_le16(((vsi_id << + ICE_AQC_GSET_RSS_KEY_VSI_ID_S) & + ICE_AQC_GSET_RSS_KEY_VSI_ID_M) | + ICE_AQC_GSET_RSS_KEY_VSI_VALID); + + return ice_aq_send_cmd(hw, &desc, key, key_size, NULL); +} + +/** + * ice_aq_get_rss_key + * @hw: pointer to the hw struct + * @vsi_id: VSI FW index + * @key: pointer to key info struct + * + * get the RSS key per VSI + */ +enum ice_status +ice_aq_get_rss_key(struct ice_hw *hw, u16 vsi_id, + struct ice_aqc_get_set_rss_keys *key) +{ + return __ice_aq_get_set_rss_key(hw, vsi_id, key, false); +} + +/** + * ice_aq_set_rss_key + * @hw: pointer to the hw struct + * @vsi_id: VSI FW index + * @keys: pointer to key info struct + * + * set the RSS key per VSI + */ +enum ice_status +ice_aq_set_rss_key(struct ice_hw *hw, u16 vsi_id, + struct ice_aqc_get_set_rss_keys *keys) +{ + return __ice_aq_get_set_rss_key(hw, vsi_id, keys, true); +} + /** * ice_aq_add_lan_txq * @hw: pointer to the hardware structure diff --git a/drivers/net/ethernet/intel/ice/ice_common.h b/drivers/net/ethernet/intel/ice/ice_common.h index 8ed1135bb189..38719ba01d9b 100644 --- a/drivers/net/ethernet/intel/ice/ice_common.h +++ b/drivers/net/ethernet/intel/ice/ice_common.h @@ -33,6 +33,19 @@ enum ice_status ice_get_caps(struct ice_hw *hw); enum ice_status ice_write_rxq_ctx(struct ice_hw *hw, struct ice_rlan_ctx *rlan_ctx, u32 rxq_index); + +enum ice_status +ice_aq_get_rss_lut(struct ice_hw *hw, u16 vsi_id, u8 lut_type, u8 *lut, + u16 lut_size); +enum ice_status +ice_aq_set_rss_lut(struct ice_hw *hw, u16 vsi_id, u8 lut_type, u8 *lut, + u16 lut_size); +enum ice_status +ice_aq_get_rss_key(struct ice_hw *hw, u16 vsi_id, + struct ice_aqc_get_set_rss_keys *keys); +enum ice_status +ice_aq_set_rss_key(struct ice_hw *hw, u16 vsi_id, + struct ice_aqc_get_set_rss_keys *keys); bool ice_check_sq_alive(struct ice_hw *hw, struct ice_ctl_q_info *cq); enum ice_status ice_aq_q_shutdown(struct ice_hw *hw, bool unloading); void ice_fill_dflt_direct_cmd_desc(struct ice_aq_desc *desc, u16 opcode); diff --git a/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h b/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h index e1221b9839d5..57acbcff740b 100644 --- a/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h +++ b/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h @@ -40,6 +40,65 @@ union ice_32byte_rx_desc { } wb; /* writeback */ }; +struct ice_rx_ptype_decoded { + u32 ptype:10; + u32 known:1; + u32 outer_ip:1; + u32 outer_ip_ver:2; + u32 outer_frag:1; + u32 tunnel_type:3; + u32 tunnel_end_prot:2; + u32 tunnel_end_frag:1; + u32 inner_prot:4; + u32 payload_layer:3; +}; + +enum ice_rx_ptype_outer_ip { + ICE_RX_PTYPE_OUTER_L2 = 0, + ICE_RX_PTYPE_OUTER_IP = 1, +}; + +enum ice_rx_ptype_outer_ip_ver { + ICE_RX_PTYPE_OUTER_NONE = 0, + ICE_RX_PTYPE_OUTER_IPV4 = 1, + ICE_RX_PTYPE_OUTER_IPV6 = 2, +}; + +enum ice_rx_ptype_outer_fragmented { + ICE_RX_PTYPE_NOT_FRAG = 0, + ICE_RX_PTYPE_FRAG = 1, +}; + +enum ice_rx_ptype_tunnel_type { + ICE_RX_PTYPE_TUNNEL_NONE = 0, + ICE_RX_PTYPE_TUNNEL_IP_IP = 1, + ICE_RX_PTYPE_TUNNEL_IP_GRENAT = 2, + ICE_RX_PTYPE_TUNNEL_IP_GRENAT_MAC = 3, + ICE_RX_PTYPE_TUNNEL_IP_GRENAT_MAC_VLAN = 4, +}; + +enum ice_rx_ptype_tunnel_end_prot { + ICE_RX_PTYPE_TUNNEL_END_NONE = 0, + ICE_RX_PTYPE_TUNNEL_END_IPV4 = 1, + ICE_RX_PTYPE_TUNNEL_END_IPV6 = 2, +}; + +enum ice_rx_ptype_inner_prot { + ICE_RX_PTYPE_INNER_PROT_NONE = 0, + ICE_RX_PTYPE_INNER_PROT_UDP = 1, + ICE_RX_PTYPE_INNER_PROT_TCP = 2, + ICE_RX_PTYPE_INNER_PROT_SCTP = 3, + ICE_RX_PTYPE_INNER_PROT_ICMP = 4, + ICE_RX_PTYPE_INNER_PROT_TIMESYNC = 5, +}; + +enum ice_rx_ptype_payload_layer { + ICE_RX_PTYPE_PAYLOAD_LAYER_NONE = 0, + ICE_RX_PTYPE_PAYLOAD_LAYER_PAY2 = 1, + ICE_RX_PTYPE_PAYLOAD_LAYER_PAY3 = 2, + ICE_RX_PTYPE_PAYLOAD_LAYER_PAY4 = 3, +}; + /* RX Flex Descriptor * This descriptor is used instead of the legacy version descriptor when * ice_rlan_ctx.adv_desc is set @@ -88,6 +147,41 @@ union ice_32b_rx_flex_desc { } wb; /* writeback */ }; +/* Rx Flex Descriptor NIC Profile + * This descriptor corresponds to RxDID 2 which contains + * metadata fields for RSS, flow id and timestamp info + */ +struct ice_32b_rx_flex_desc_nic { + /* Qword 0 */ + u8 rxdid; + u8 mir_id_umb_cast; + __le16 ptype_flexi_flags0; + __le16 pkt_len; + __le16 hdr_len_sph_flex_flags1; + + /* Qword 1 */ + __le16 status_error0; + __le16 l2tag1; + __le32 rss_hash; + + /* Qword 2 */ + __le16 status_error1; + u8 flexi_flags2; + u8 ts_low; + __le16 l2tag2_1st; + __le16 l2tag2_2nd; + + /* Qword 3 */ + __le32 flow_id; + union { + struct { + __le16 vlan_id; + __le16 flow_id_ipv6; + } flex; + __le32 ts_high; + } flex_ts; +}; + /* Receive Flex Descriptor profile IDs: There are a total * of 64 profiles where profile IDs 0/1 are for legacy; and * profiles 2-63 are flex profiles that can be programmed @@ -241,12 +335,56 @@ enum ice_tx_desc_dtype_value { enum ice_tx_desc_cmd_bits { ICE_TX_DESC_CMD_EOP = 0x0001, ICE_TX_DESC_CMD_RS = 0x0002, + ICE_TX_DESC_CMD_IL2TAG1 = 0x0008, + ICE_TX_DESC_CMD_IIPT_IPV6 = 0x0020, /* 2 BITS */ + ICE_TX_DESC_CMD_IIPT_IPV4 = 0x0040, /* 2 BITS */ + ICE_TX_DESC_CMD_IIPT_IPV4_CSUM = 0x0060, /* 2 BITS */ + ICE_TX_DESC_CMD_L4T_EOFT_TCP = 0x0100, /* 2 BITS */ + ICE_TX_DESC_CMD_L4T_EOFT_UDP = 0x0300, /* 2 BITS */ }; #define ICE_TXD_QW1_OFFSET_S 16 +#define ICE_TXD_QW1_OFFSET_M (0x3FFFFULL << ICE_TXD_QW1_OFFSET_S) + +enum ice_tx_desc_len_fields { + /* Note: These are predefined bit offsets */ + ICE_TX_DESC_LEN_MACLEN_S = 0, /* 7 BITS */ + ICE_TX_DESC_LEN_IPLEN_S = 7, /* 7 BITS */ + ICE_TX_DESC_LEN_L4_LEN_S = 14 /* 4 BITS */ +}; + #define ICE_TXD_QW1_TX_BUF_SZ_S 34 #define ICE_TXD_QW1_L2TAG1_S 48 +/* Context descriptors */ +struct ice_tx_ctx_desc { + __le32 tunneling_params; + __le16 l2tag2; + __le16 rsvd; + __le64 qw1; +}; + +#define ICE_TXD_CTX_QW1_CMD_S 4 +#define ICE_TXD_CTX_QW1_CMD_M (0x7FUL << ICE_TXD_CTX_QW1_CMD_S) + +#define ICE_TXD_CTX_QW1_TSO_LEN_S 30 +#define ICE_TXD_CTX_QW1_TSO_LEN_M \ + (0x3FFFFULL << ICE_TXD_CTX_QW1_TSO_LEN_S) + +#define ICE_TXD_CTX_QW1_MSS_S 50 + +enum ice_tx_ctx_desc_cmd_bits { + ICE_TX_CTX_DESC_TSO = 0x01, + ICE_TX_CTX_DESC_TSYN = 0x02, + ICE_TX_CTX_DESC_IL2TAG2 = 0x04, + ICE_TX_CTX_DESC_IL2TAG2_IL2H = 0x08, + ICE_TX_CTX_DESC_SWTCH_NOTAG = 0x00, + ICE_TX_CTX_DESC_SWTCH_UPLINK = 0x10, + ICE_TX_CTX_DESC_SWTCH_LOCAL = 0x20, + ICE_TX_CTX_DESC_SWTCH_VSI = 0x30, + ICE_TX_CTX_DESC_RESERVED = 0x40 +}; + #define ICE_LAN_TXQ_MAX_QGRPS 127 #define ICE_LAN_TXQ_MAX_QDIS 1023 @@ -289,4 +427,35 @@ struct ice_tlan_ctx { u8 pkt_shaper_prof_idx; u8 int_q_state; /* width not needed - internal do not write */ }; + +/* macro to make the table lines short */ +#define ICE_PTT(PTYPE, OUTER_IP, OUTER_IP_VER, OUTER_FRAG, T, TE, TEF, I, PL)\ + { PTYPE, \ + 1, \ + ICE_RX_PTYPE_OUTER_##OUTER_IP, \ + ICE_RX_PTYPE_OUTER_##OUTER_IP_VER, \ + ICE_RX_PTYPE_##OUTER_FRAG, \ + ICE_RX_PTYPE_TUNNEL_##T, \ + ICE_RX_PTYPE_TUNNEL_END_##TE, \ + ICE_RX_PTYPE_##TEF, \ + ICE_RX_PTYPE_INNER_PROT_##I, \ + ICE_RX_PTYPE_PAYLOAD_LAYER_##PL } + +#define ICE_PTT_UNUSED_ENTRY(PTYPE) { PTYPE, 0, 0, 0, 0, 0, 0, 0, 0, 0 } + +/* shorter macros makes the table fit but are terse */ +#define ICE_RX_PTYPE_NOF ICE_RX_PTYPE_NOT_FRAG + +/* Lookup table mapping the HW PTYPE to the bit field for decoding */ +static const struct ice_rx_ptype_decoded ice_ptype_lkup[] = { + /* L2 Packet types */ + ICE_PTT_UNUSED_ENTRY(0), + ICE_PTT(1, L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY2), + ICE_PTT(2, L2, NONE, NOF, NONE, NONE, NOF, NONE, NONE), +}; + +static inline struct ice_rx_ptype_decoded ice_decode_rx_desc_ptype(u16 ptype) +{ + return ice_ptype_lkup[ptype]; +} #endif /* _ICE_LAN_TX_RX_H_ */ diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index dd07bf862afe..2124e7ad640c 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -663,6 +663,35 @@ static int ice_vsi_req_irq_msix(struct ice_vsi *vsi, char *basename) return err; } +/** + * ice_vsi_set_rss_params - Setup RSS capabilities per VSI type + * @vsi: the VSI being configured + */ +static void ice_vsi_set_rss_params(struct ice_vsi *vsi) +{ + struct ice_hw_common_caps *cap; + struct ice_pf *pf = vsi->back; + + if (!test_bit(ICE_FLAG_RSS_ENA, pf->flags)) { + vsi->rss_size = 1; + return; + } + + cap = &pf->hw.func_caps.common_cap; + switch (vsi->type) { + case ICE_VSI_PF: + /* PF VSI will inherit RSS instance of PF */ + vsi->rss_table_size = cap->rss_table_size; + vsi->rss_size = min_t(int, num_online_cpus(), + BIT(cap->rss_table_entry_width)); + vsi->rss_lut_type = ICE_AQC_GSET_RSS_LUT_TABLE_TYPE_PF; + break; + default: + dev_warn(&pf->pdev->dev, "Unknown VSI type %d\n", vsi->type); + break; + } +} + /** * ice_vsi_setup_q_map - Setup a VSI queue map * @vsi: the VSI being configured @@ -670,7 +699,8 @@ static int ice_vsi_req_irq_msix(struct ice_vsi *vsi, char *basename) */ static void ice_vsi_setup_q_map(struct ice_vsi *vsi, struct ice_vsi_ctx *ctxt) { - u16 offset = 0, qmap = 0, pow = 0, qcount; + u16 offset = 0, qmap = 0, numq_tc; + u16 pow = 0, max_rss = 0, qcount; u16 qcount_tx = vsi->alloc_txq; u16 qcount_rx = vsi->alloc_rxq; bool ena_tc0 = false; @@ -689,13 +719,7 @@ static void ice_vsi_setup_q_map(struct ice_vsi *vsi, struct ice_vsi_ctx *ctxt) vsi->tc_cfg.ena_tc |= 1; } - qcount = qcount_rx / vsi->tc_cfg.numtc; - - /* find higher power-of-2 of qcount */ - pow = ilog2(qcount); - - if (!is_power_of_2(qcount)) - pow++; + numq_tc = qcount_rx / vsi->tc_cfg.numtc; /* TC mapping is a function of the number of Rx queues assigned to the * VSI for each traffic class and the offset of these queues. @@ -708,6 +732,26 @@ static void ice_vsi_setup_q_map(struct ice_vsi *vsi, struct ice_vsi_ctx *ctxt) * * Setup number and offset of Rx queues for all TCs for the VSI */ + + /* qcount will change if RSS is enabled */ + if (test_bit(ICE_FLAG_RSS_ENA, vsi->back->flags)) { + if (vsi->type == ICE_VSI_PF) + max_rss = ICE_MAX_LG_RSS_QS; + else + max_rss = ICE_MAX_SMALL_RSS_QS; + + qcount = min_t(int, numq_tc, max_rss); + qcount = min_t(int, qcount, vsi->rss_size); + } else { + qcount = numq_tc; + } + + /* find higher power-of-2 of qcount */ + pow = ilog2(qcount); + + if (!is_power_of_2(qcount)) + pow++; + for (i = 0; i < ICE_MAX_TRAFFIC_CLASS; i++) { if (!(vsi->tc_cfg.ena_tc & BIT(i))) { /* TC is not enabled */ @@ -783,6 +827,33 @@ static void ice_set_dflt_vsi_ctx(struct ice_vsi_ctx *ctxt) /* No Outer tag support outer_tag_flags remains to zero */ } +/** + * ice_set_rss_vsi_ctx - Set RSS VSI context before adding a VSI + * @ctxt: the VSI context being set + * @vsi: the VSI being configured + */ +static void ice_set_rss_vsi_ctx(struct ice_vsi_ctx *ctxt, struct ice_vsi *vsi) +{ + u8 lut_type, hash_type; + + switch (vsi->type) { + case ICE_VSI_PF: + /* PF VSI will inherit RSS instance of PF */ + lut_type = ICE_AQ_VSI_Q_OPT_RSS_LUT_PF; + hash_type = ICE_AQ_VSI_Q_OPT_RSS_TPLZ; + break; + default: + dev_warn(&vsi->back->pdev->dev, "Unknown VSI type %d\n", + vsi->type); + return; + } + + ctxt->info.q_opt_rss = ((lut_type << ICE_AQ_VSI_Q_OPT_RSS_LUT_S) & + ICE_AQ_VSI_Q_OPT_RSS_LUT_M) | + ((hash_type << ICE_AQ_VSI_Q_OPT_RSS_HASH_S) & + ICE_AQ_VSI_Q_OPT_RSS_HASH_M); +} + /** * ice_vsi_add - Create a new VSI or fetch preallocated VSI * @vsi: the VSI being configured @@ -810,6 +881,10 @@ static int ice_vsi_add(struct ice_vsi *vsi) if (vsi->vsw->bridge_mode == BRIDGE_MODE_VEB) ctxt.info.sw_flags |= ICE_AQ_VSI_SW_FLAG_ALLOW_LB; + /* Set LUT type and HASH type if RSS is enabled */ + if (test_bit(ICE_FLAG_RSS_ENA, pf->flags)) + ice_set_rss_vsi_ctx(&ctxt, vsi); + ctxt.info.sw_id = vsi->port_info->sw_id; ice_vsi_setup_q_map(vsi, &ctxt); @@ -1629,6 +1704,10 @@ static void ice_vsi_free_q_vectors(struct ice_vsi *vsi) */ static int ice_cfg_netdev(struct ice_vsi *vsi) { + netdev_features_t csumo_features; + netdev_features_t vlano_features; + netdev_features_t dflt_features; + netdev_features_t tso_features; struct ice_netdev_priv *np; struct net_device *netdev; u8 mac_addr[ETH_ALEN]; @@ -1642,13 +1721,31 @@ static int ice_cfg_netdev(struct ice_vsi *vsi) np = netdev_priv(netdev); np->vsi = vsi; + dflt_features = NETIF_F_SG | + NETIF_F_HIGHDMA | + NETIF_F_RXHASH; + + csumo_features = NETIF_F_RXCSUM | + NETIF_F_IP_CSUM | + NETIF_F_IPV6_CSUM; + + vlano_features = NETIF_F_HW_VLAN_CTAG_FILTER | + NETIF_F_HW_VLAN_CTAG_TX | + NETIF_F_HW_VLAN_CTAG_RX; + + tso_features = NETIF_F_TSO; + /* set features that user can change */ - netdev->hw_features = NETIF_F_SG | - NETIF_F_HIGHDMA | - NETIF_F_RXHASH; + netdev->hw_features = dflt_features | csumo_features | + vlano_features | tso_features; /* enable features */ netdev->features |= netdev->hw_features; + /* encap and VLAN devices inherit default, csumo and tso features */ + netdev->hw_enc_features |= dflt_features | csumo_features | + tso_features; + netdev->vlan_features |= dflt_features | csumo_features | + tso_features; if (vsi->type == ICE_VSI_PF) { SET_NETDEV_DEV(netdev, &vsi->back->pdev->dev); @@ -1862,6 +1959,83 @@ static int ice_vsi_setup_vector_base(struct ice_vsi *vsi) return 0; } +/** + * ice_fill_rss_lut - Fill the RSS lookup table with default values + * @lut: Lookup table + * @rss_table_size: Lookup table size + * @rss_size: Range of queue number for hashing + */ +void ice_fill_rss_lut(u8 *lut, u16 rss_table_size, u16 rss_size) +{ + u16 i; + + for (i = 0; i < rss_table_size; i++) + lut[i] = i % rss_size; +} + +/** + * ice_vsi_cfg_rss - Configure RSS params for a VSI + * @vsi: VSI to be configured + */ +static int ice_vsi_cfg_rss(struct ice_vsi *vsi) +{ + u8 seed[ICE_AQC_GET_SET_RSS_KEY_DATA_RSS_KEY_SIZE]; + struct ice_aqc_get_set_rss_keys *key; + struct ice_pf *pf = vsi->back; + enum ice_status status; + int err = 0; + u8 *lut; + + vsi->rss_size = min_t(int, vsi->rss_size, vsi->num_rxq); + + lut = devm_kzalloc(&pf->pdev->dev, vsi->rss_table_size, GFP_KERNEL); + if (!lut) + return -ENOMEM; + + if (vsi->rss_lut_user) + memcpy(lut, vsi->rss_lut_user, vsi->rss_table_size); + else + ice_fill_rss_lut(lut, vsi->rss_table_size, vsi->rss_size); + + status = ice_aq_set_rss_lut(&pf->hw, vsi->vsi_num, vsi->rss_lut_type, + lut, vsi->rss_table_size); + + if (status) { + dev_err(&vsi->back->pdev->dev, + "set_rss_lut failed, error %d\n", status); + err = -EIO; + goto ice_vsi_cfg_rss_exit; + } + + key = devm_kzalloc(&vsi->back->pdev->dev, sizeof(*key), GFP_KERNEL); + if (!key) { + err = -ENOMEM; + goto ice_vsi_cfg_rss_exit; + } + + if (vsi->rss_hkey_user) + memcpy(seed, vsi->rss_hkey_user, + ICE_AQC_GET_SET_RSS_KEY_DATA_RSS_KEY_SIZE); + else + netdev_rss_key_fill((void *)seed, + ICE_AQC_GET_SET_RSS_KEY_DATA_RSS_KEY_SIZE); + memcpy(&key->standard_rss_key, seed, + ICE_AQC_GET_SET_RSS_KEY_DATA_RSS_KEY_SIZE); + + status = ice_aq_set_rss_key(&pf->hw, vsi->vsi_num, key); + + if (status) { + dev_err(&vsi->back->pdev->dev, "set_rss_key failed, error %d\n", + status); + err = -EIO; + } + + devm_kfree(&pf->pdev->dev, key); +ice_vsi_cfg_rss_exit: + devm_kfree(&pf->pdev->dev, lut); + return err; +} + /** * ice_vsi_setup - Set up a VSI by a given type * @pf: board private structure @@ -1897,6 +2071,9 @@ ice_vsi_setup(struct ice_pf *pf, enum ice_vsi_type type, goto err_get_qs; } + /* set RSS capabilities */ + ice_vsi_set_rss_params(vsi); + /* create the VSI */ ret = ice_vsi_add(vsi); if (ret) @@ -1932,6 +2109,12 @@ ice_vsi_setup(struct ice_pf *pf, enum ice_vsi_type type, ice_vsi_map_rings_to_vectors(vsi); + /* Do not exit if configuring RSS had an issue, at least + * receive traffic on first queue. Hence no need to capture + * return value + */ + if (test_bit(ICE_FLAG_RSS_ENA, pf->flags)) + ice_vsi_cfg_rss(vsi); break; default: /* if vsi type is not recognized, clean up the resources and @@ -1966,6 +2149,140 @@ ice_vsi_setup(struct ice_pf *pf, enum ice_vsi_type type, return NULL; } +/** + * ice_vsi_add_vlan - Add vsi membership for given vlan + * @vsi: the vsi being configured + * @vid: vlan id to be added + */ +static int ice_vsi_add_vlan(struct ice_vsi *vsi, u16 vid) +{ + struct ice_fltr_list_entry *tmp; + struct ice_pf *pf = vsi->back; + LIST_HEAD(tmp_add_list); + enum ice_status status; + int err = 0; + + tmp = devm_kzalloc(&pf->pdev->dev, sizeof(*tmp), GFP_KERNEL); + if (!tmp) + return -ENOMEM; + + tmp->fltr_info.lkup_type = ICE_SW_LKUP_VLAN; + tmp->fltr_info.fltr_act = ICE_FWD_TO_VSI; + tmp->fltr_info.flag = ICE_FLTR_TX; + tmp->fltr_info.src = vsi->vsi_num; + tmp->fltr_info.fwd_id.vsi_id = vsi->vsi_num; + tmp->fltr_info.l_data.vlan.vlan_id = vid; + + INIT_LIST_HEAD(&tmp->list_entry); + list_add(&tmp->list_entry, &tmp_add_list); + + status = ice_add_vlan(&pf->hw, &tmp_add_list); + if (status) { + err = -ENODEV; + dev_err(&pf->pdev->dev, "Failure Adding VLAN %d on VSI %i\n", + vid, vsi->vsi_num); + } + + ice_free_fltr_list(&pf->pdev->dev, &tmp_add_list); + return err; +} + +/** + * ice_vlan_rx_add_vid - Add a vlan id filter to HW offload + * @netdev: network interface to be adjusted + * @proto: unused protocol + * @vid: vlan id to be added + * + * net_device_ops implementation for adding vlan ids + */ +static int ice_vlan_rx_add_vid(struct net_device *netdev, + __always_unused __be16 proto, u16 vid) +{ + struct ice_netdev_priv *np = netdev_priv(netdev); + struct ice_vsi *vsi = np->vsi; + int ret = 0; + + if (vid >= VLAN_N_VID) { + netdev_err(netdev, "VLAN id requested %d is out of range %d\n", + vid, VLAN_N_VID); + return -EINVAL; + } + + if (vsi->info.pvid) + return -EINVAL; + + /* Add all VLAN ids including 0 to the switch filter. VLAN id 0 is + * needed to continue allowing all untagged packets since VLAN prune + * list is applied to all packets by the switch + */ + ret = ice_vsi_add_vlan(vsi, vid); + + if (!ret) + set_bit(vid, vsi->active_vlans); + + return ret; +} + +/** + * ice_vsi_kill_vlan - Remove VSI membership for a given VLAN + * @vsi: the VSI being configured + * @vid: VLAN id to be removed + */ +static void ice_vsi_kill_vlan(struct ice_vsi *vsi, u16 vid) +{ + struct ice_fltr_list_entry *list; + struct ice_pf *pf = vsi->back; + LIST_HEAD(tmp_add_list); + + list = devm_kzalloc(&pf->pdev->dev, sizeof(*list), GFP_KERNEL); + if (!list) + return; + + list->fltr_info.lkup_type = ICE_SW_LKUP_VLAN; + list->fltr_info.fwd_id.vsi_id = vsi->vsi_num; + list->fltr_info.fltr_act = ICE_FWD_TO_VSI; + list->fltr_info.l_data.vlan.vlan_id = vid; + list->fltr_info.flag = ICE_FLTR_TX; + list->fltr_info.src = vsi->vsi_num; + + INIT_LIST_HEAD(&list->list_entry); + list_add(&list->list_entry, &tmp_add_list); + + if (ice_remove_vlan(&pf->hw, &tmp_add_list)) + dev_err(&pf->pdev->dev, "Error removing VLAN %d on vsi %i\n", + vid, vsi->vsi_num); + + ice_free_fltr_list(&pf->pdev->dev, &tmp_add_list); +} + +/** + * ice_vlan_rx_kill_vid - Remove a vlan id filter from HW offload + * @netdev: network interface to be adjusted + * @proto: unused protocol + * @vid: vlan id to be removed + * + * net_device_ops implementation for removing vlan ids + */ +static int ice_vlan_rx_kill_vid(struct net_device *netdev, + __always_unused __be16 proto, u16 vid) +{ + struct ice_netdev_priv *np = netdev_priv(netdev); + struct ice_vsi *vsi = np->vsi; + + if (vsi->info.pvid) + return -EINVAL; + + /* return code is ignored as there is nothing a user + * can do about failure to remove and a log message was + * already printed from the other function + */ + ice_vsi_kill_vlan(vsi, vid); + + clear_bit(vid, vsi->active_vlans); + + return 0; +} + /** * ice_setup_pf_sw - Setup the HW switch on startup or after reset * @pf: board private structure @@ -2047,9 +2364,14 @@ static void ice_determine_q_usage(struct ice_pf *pf) q_left_tx = pf->hw.func_caps.common_cap.num_txq; q_left_rx = pf->hw.func_caps.common_cap.num_rxq; - /* initial support for only 1 tx and 1 rx queue */ + /* initial support for only 1 tx queue */ pf->num_lan_tx = 1; - pf->num_lan_rx = 1; + + /* only 1 rx queue unless RSS is enabled */ + if (!test_bit(ICE_FLAG_RSS_ENA, pf->flags)) + pf->num_lan_rx = 1; + else + pf->num_lan_rx = min_t(int, q_left_rx, num_online_cpus()); pf->q_left_tx = q_left_tx - pf->num_lan_tx; pf->q_left_rx = q_left_rx - pf->num_lan_rx; @@ -2087,6 +2409,9 @@ static void ice_init_pf(struct ice_pf *pf) bitmap_zero(pf->avail_rxqs, ICE_MAX_RXQS); mutex_unlock(&pf->avail_q_mutex); + if (pf->hw.func_caps.common_cap.rss_table_size) + set_bit(ICE_FLAG_RSS_ENA, pf->flags); + /* setup service timer and periodic service task */ timer_setup(&pf->serv_tmr, ice_service_timer, 0); pf->serv_tmr_period = HZ; @@ -2483,6 +2808,144 @@ static void __exit ice_module_exit(void) } module_exit(ice_module_exit); +/** + * ice_vsi_manage_vlan_insertion - Manage VLAN insertion for the VSI for Tx + * @vsi: the vsi being changed + */ +static int ice_vsi_manage_vlan_insertion(struct ice_vsi *vsi) +{ + struct device *dev = &vsi->back->pdev->dev; + struct ice_hw *hw = &vsi->back->hw; + struct ice_vsi_ctx ctxt = { 0 }; + enum ice_status status; + + /* Here we are configuring the VSI to let the driver add VLAN tags by + * setting port_vlan_flags to ICE_AQ_VSI_PVLAN_MODE_ALL. The actual VLAN + * tag insertion happens in the Tx hot path, in ice_tx_map. + */ + ctxt.info.port_vlan_flags = ICE_AQ_VSI_PVLAN_MODE_ALL; + + ctxt.info.valid_sections = cpu_to_le16(ICE_AQ_VSI_PROP_VLAN_VALID); + ctxt.vsi_num = vsi->vsi_num; + + status = ice_aq_update_vsi(hw, &ctxt, NULL); + if (status) { + dev_err(dev, "update VSI for VLAN insert failed, err %d aq_err %d\n", + status, hw->adminq.sq_last_status); + return -EIO; + } + + vsi->info.port_vlan_flags = ctxt.info.port_vlan_flags; + return 0; +} + +/** + * ice_vsi_manage_vlan_stripping - Manage VLAN stripping for the VSI for Rx + * @vsi: the vsi being changed + * @ena: boolean value indicating if this is a enable or disable request + */ +static int ice_vsi_manage_vlan_stripping(struct ice_vsi *vsi, bool ena) +{ + struct device *dev = &vsi->back->pdev->dev; + struct ice_hw *hw = &vsi->back->hw; + struct ice_vsi_ctx ctxt = { 0 }; + enum ice_status status; + + /* Here we are configuring what the VSI should do with the VLAN tag in + * the Rx packet. We can either leave the tag in the packet or put it in + * the Rx descriptor. + */ + if (ena) { + /* Strip VLAN tag from Rx packet and put it in the desc */ + ctxt.info.port_vlan_flags = ICE_AQ_VSI_PVLAN_EMOD_STR_BOTH; + } else { + /* Disable stripping. Leave tag in packet */ + ctxt.info.port_vlan_flags = ICE_AQ_VSI_PVLAN_EMOD_NOTHING; + } + + ctxt.info.valid_sections = cpu_to_le16(ICE_AQ_VSI_PROP_VLAN_VALID); + ctxt.vsi_num = vsi->vsi_num; + + status = ice_aq_update_vsi(hw, &ctxt, NULL); + if (status) { + dev_err(dev, "update VSI for VALN strip failed, ena = %d err %d aq_err %d\n", + ena, status, hw->adminq.sq_last_status); + return -EIO; + } + + vsi->info.port_vlan_flags = ctxt.info.port_vlan_flags; + return 0; +} + +/** + * ice_set_features - set the netdev feature flags + * @netdev: ptr to the netdev being adjusted + * @features: the feature set that the stack is suggesting + */ +static int ice_set_features(struct net_device *netdev, + netdev_features_t features) +{ + struct ice_netdev_priv *np = netdev_priv(netdev); + struct ice_vsi *vsi = np->vsi; + int ret = 0; + + if ((features & NETIF_F_HW_VLAN_CTAG_RX) && + !(netdev->features & NETIF_F_HW_VLAN_CTAG_RX)) + ret = ice_vsi_manage_vlan_stripping(vsi, true); + else if (!(features & NETIF_F_HW_VLAN_CTAG_RX) && + (netdev->features & NETIF_F_HW_VLAN_CTAG_RX)) + ret = ice_vsi_manage_vlan_stripping(vsi, false); + else if ((features & NETIF_F_HW_VLAN_CTAG_TX) && + !(netdev->features & NETIF_F_HW_VLAN_CTAG_TX)) + ret = ice_vsi_manage_vlan_insertion(vsi); + else if (!(features & NETIF_F_HW_VLAN_CTAG_TX) && + (netdev->features & NETIF_F_HW_VLAN_CTAG_TX)) + ret = ice_vsi_manage_vlan_insertion(vsi); + + return ret; +} + +/** + * ice_vsi_vlan_setup - Setup vlan offload properties on a VSI + * @vsi: VSI to setup vlan properties for + */ +static int ice_vsi_vlan_setup(struct ice_vsi *vsi) +{ + int ret = 0; + + if (vsi->netdev->features & NETIF_F_HW_VLAN_CTAG_RX) + ret = ice_vsi_manage_vlan_stripping(vsi, true); + if (vsi->netdev->features & NETIF_F_HW_VLAN_CTAG_TX) + ret = ice_vsi_manage_vlan_insertion(vsi); + + return ret; +} + +/** + * ice_restore_vlan - Reinstate VLANs when vsi/netdev comes back up + * @vsi: the VSI being brought back up + */ +static int ice_restore_vlan(struct ice_vsi *vsi) +{ + int err; + u16 vid; + + if (!vsi->netdev) + return -EINVAL; + + err = ice_vsi_vlan_setup(vsi); + if (err) + return err; + + for_each_set_bit(vid, vsi->active_vlans, VLAN_N_VID) { + err = ice_vlan_rx_add_vid(vsi->netdev, htons(ETH_P_8021Q), vid); + if (err) + break; + } + + return err; +} + /** * ice_setup_tx_ctx - setup a struct ice_tlan_ctx instance * @ring: The Tx ring to configure @@ -2735,6 +3198,10 @@ static int ice_vsi_cfg(struct ice_vsi *vsi) { int err; + err = ice_restore_vlan(vsi); + if (err) + return err; + err = ice_vsi_cfg_txqs(vsi); if (!err) err = ice_vsi_cfg_rxqs(vsi); @@ -3213,6 +3680,22 @@ static void ice_vsi_close(struct ice_vsi *vsi) ice_vsi_free_rx_rings(vsi); } +/** + * ice_rss_clean - Delete RSS related VSI structures that hold user inputs + * @vsi: the VSI being removed + */ +static void ice_rss_clean(struct ice_vsi *vsi) +{ + struct ice_pf *pf; + + pf = vsi->back; + + if (vsi->rss_hkey_user) + devm_kfree(&pf->pdev->dev, vsi->rss_hkey_user); + if (vsi->rss_lut_user) + devm_kfree(&pf->pdev->dev, vsi->rss_lut_user); +} + /** * ice_vsi_release - Delete a VSI and free its resources * @vsi: the VSI being removed @@ -3233,6 +3716,10 @@ static int ice_vsi_release(struct ice_vsi *vsi) vsi->netdev = NULL; } + if (test_bit(ICE_FLAG_RSS_ENA, pf->flags)) + ice_rss_clean(vsi); + + /* Disable VSI and free resources */ ice_vsi_dis_irq(vsi); ice_vsi_close(vsi); @@ -3254,6 +3741,91 @@ static int ice_vsi_release(struct ice_vsi *vsi) return 0; } +/** + * ice_set_rss - Set RSS keys and lut + * @vsi: Pointer to VSI structure + * @seed: RSS hash seed + * @lut: Lookup table + * @lut_size: Lookup table size + * + * Returns 0 on success, negative on failure + */ +int ice_set_rss(struct ice_vsi *vsi, u8 *seed, u8 *lut, u16 lut_size) +{ + struct ice_pf *pf = vsi->back; + struct ice_hw *hw = &pf->hw; + enum ice_status status; + + if (seed) { + struct ice_aqc_get_set_rss_keys *buf = + (struct ice_aqc_get_set_rss_keys *)seed; + + status = ice_aq_set_rss_key(hw, vsi->vsi_num, buf); + + if (status) { + dev_err(&pf->pdev->dev, + "Cannot set RSS key, err %d aq_err %d\n", + status, hw->adminq.rq_last_status); + return -EIO; + } + } + + if (lut) { + status = ice_aq_set_rss_lut(hw, vsi->vsi_num, + vsi->rss_lut_type, lut, lut_size); + if (status) { + dev_err(&pf->pdev->dev, + "Cannot set RSS lut, err %d aq_err %d\n", + status, hw->adminq.rq_last_status); + return -EIO; + } + } + + return 0; +} + +/** + * ice_get_rss - Get RSS keys and lut + * @vsi: Pointer to VSI structure + * @seed: Buffer to store the keys + * @lut: Buffer to store the lookup table entries + * @lut_size: Size of buffer to store the lookup table entries + * + * Returns 0 on success, negative on failure + */ +int ice_get_rss(struct ice_vsi *vsi, u8 *seed, u8 *lut, u16 lut_size) +{ + struct ice_pf *pf = vsi->back; + struct ice_hw *hw = &pf->hw; + enum ice_status status; + + if (seed) { + struct ice_aqc_get_set_rss_keys *buf = + (struct ice_aqc_get_set_rss_keys *)seed; + + status = ice_aq_get_rss_key(hw, vsi->vsi_num, buf); + if (status) { + dev_err(&pf->pdev->dev, + "Cannot get RSS key, err %d aq_err %d\n", + status, hw->adminq.rq_last_status); + return -EIO; + } + } + + if (lut) { + status = ice_aq_get_rss_lut(hw, vsi->vsi_num, + vsi->rss_lut_type, lut, lut_size); + if (status) { + dev_err(&pf->pdev->dev, + "Cannot get RSS lut, err %d aq_err %d\n", + status, hw->adminq.rq_last_status); + return -EIO; + } + } + + return 0; +} + /** * ice_open - Called when a network interface becomes active * @netdev: network interface device structure @@ -3306,4 +3878,7 @@ static const struct net_device_ops ice_netdev_ops = { .ndo_open = ice_open, .ndo_stop = ice_stop, .ndo_start_xmit = ice_start_xmit, + .ndo_vlan_rx_add_vid = ice_vlan_rx_add_vid, + .ndo_vlan_rx_kill_vid = ice_vlan_rx_kill_vid, + .ndo_set_features = ice_set_features, }; diff --git a/drivers/net/ethernet/intel/ice/ice_switch.c b/drivers/net/ethernet/intel/ice/ice_switch.c index 3944dac6cfc6..5624df9eb6fc 100644 --- a/drivers/net/ethernet/intel/ice/ice_switch.c +++ b/drivers/net/ethernet/intel/ice/ice_switch.c @@ -1217,6 +1217,117 @@ ice_add_mac(struct ice_hw *hw, struct list_head *m_list) return status; } +/** + * ice_find_vlan_entry + * @hw: pointer to the hardware structure + * @vlan_id: VLAN id to search for + * + * Helper function to search for a VLAN entry using a given VLAN id + * Returns pointer to the entry if found. + */ +static struct ice_fltr_mgmt_list_entry * +ice_find_vlan_entry(struct ice_hw *hw, u16 vlan_id) +{ + struct ice_fltr_mgmt_list_entry *vlan_list_itr, *vlan_ret = NULL; + struct ice_switch_info *sw = hw->switch_info; + + mutex_lock(&sw->vlan_list_lock); + list_for_each_entry(vlan_list_itr, &sw->vlan_list_head, list_entry) + if (vlan_list_itr->fltr_info.l_data.vlan.vlan_id == vlan_id) { + vlan_ret = vlan_list_itr; + break; + } + + mutex_unlock(&sw->vlan_list_lock); + return vlan_ret; +} + +/** + * ice_add_vlan_internal - Add one VLAN based filter rule + * @hw: pointer to the hardware structure + * @f_entry: filter entry containing one VLAN information + */ +static enum ice_status +ice_add_vlan_internal(struct ice_hw *hw, struct ice_fltr_list_entry *f_entry) +{ + struct ice_fltr_info *new_fltr, *cur_fltr; + struct ice_fltr_mgmt_list_entry *v_list_itr; + u16 vlan_id; + + new_fltr = &f_entry->fltr_info; + /* VLAN id should only be 12 bits */ + if (new_fltr->l_data.vlan.vlan_id > ICE_MAX_VLAN_ID) + return ICE_ERR_PARAM; + + vlan_id = new_fltr->l_data.vlan.vlan_id; + v_list_itr = ice_find_vlan_entry(hw, vlan_id); + if (!v_list_itr) { + u16 vsi_id = ICE_VSI_INVAL_ID; + enum ice_status status; + u16 vsi_list_id = 0; + + if (new_fltr->fltr_act == ICE_FWD_TO_VSI) { + enum ice_sw_lkup_type lkup_type = new_fltr->lkup_type; + + /* All VLAN pruning rules use a VSI list. + * Convert the action to forwarding to a VSI list. + */ + vsi_id = new_fltr->fwd_id.vsi_id; + status = ice_create_vsi_list_rule(hw, &vsi_id, 1, + &vsi_list_id, + lkup_type); + if (status) + return status; + new_fltr->fltr_act = ICE_FWD_TO_VSI_LIST; + new_fltr->fwd_id.vsi_list_id = vsi_list_id; + } + + status = ice_create_pkt_fwd_rule(hw, f_entry); + if (!status && vsi_id != ICE_VSI_INVAL_ID) { + v_list_itr = ice_find_vlan_entry(hw, vlan_id); + if (!v_list_itr) + return ICE_ERR_DOES_NOT_EXIST; + v_list_itr->vsi_list_info = + ice_create_vsi_list_map(hw, &vsi_id, 1, + vsi_list_id); + } + + return status; + } + + cur_fltr = &v_list_itr->fltr_info; + return ice_handle_vsi_list_mgmt(hw, v_list_itr, cur_fltr, new_fltr); +} + +/** + * ice_add_vlan - Add VLAN based filter rule + * @hw: pointer to the hardware structure + * @v_list: list of VLAN entries and forwarding information + */ +enum ice_status +ice_add_vlan(struct ice_hw *hw, struct list_head *v_list) +{ + struct ice_fltr_list_entry *v_list_itr; + + if (!v_list || !hw) + return ICE_ERR_PARAM; + + list_for_each_entry(v_list_itr, v_list, list_entry) { + enum ice_status status; + + if (v_list_itr->fltr_info.lkup_type != ICE_SW_LKUP_VLAN) + return ICE_ERR_PARAM; + + status = ice_add_vlan_internal(hw, v_list_itr); + if (status) { + v_list_itr->status = ICE_FLTR_STATUS_FW_FAIL; + return status; + } + v_list_itr->status = ICE_FLTR_STATUS_FW_SUCCESS; + } + return 0; +} + /** * ice_remove_vsi_list_rule * @hw: pointer to the hardware structure @@ -1515,6 +1626,54 @@ ice_remove_mac(struct ice_hw *hw, struct list_head *m_list) return status; } +/** + * ice_remove_vlan_internal - Remove one VLAN based filter rule + * @hw: pointer to the hardware structure + * @f_entry: filter entry containing one VLAN information + */ +static enum ice_status +ice_remove_vlan_internal(struct ice_hw *hw, + struct ice_fltr_list_entry *f_entry) +{ + struct ice_fltr_info *new_fltr; + struct ice_fltr_mgmt_list_entry *v_list_elem; + u16 vsi_id; + + new_fltr = &f_entry->fltr_info; + + v_list_elem = ice_find_vlan_entry(hw, new_fltr->l_data.vlan.vlan_id); + if (!v_list_elem) + return ICE_ERR_PARAM; + + vsi_id = f_entry->fltr_info.fwd_id.vsi_id; + return ice_handle_rem_vsi_list_mgmt(hw, vsi_id, v_list_elem); +} + +/** + * ice_remove_vlan - Remove VLAN based filter rule + * @hw: pointer to the hardware structure + * @v_list: list of VLAN entries and forwarding information + */ +enum ice_status +ice_remove_vlan(struct ice_hw *hw, struct list_head *v_list) +{ + struct ice_fltr_list_entry *v_list_itr; + enum ice_status status = 0; + + if (!v_list || !hw) + return ICE_ERR_PARAM; + + list_for_each_entry(v_list_itr, v_list, list_entry) { + status = ice_remove_vlan_internal(hw, v_list_itr); + if (status) { + v_list_itr->status = ICE_FLTR_STATUS_FW_FAIL; + return status; + } + v_list_itr->status = ICE_FLTR_STATUS_FW_SUCCESS; + } + return status; +} + /** * ice_add_to_vsi_fltr_list - Add VSI filters to the list * @hw: pointer to the hardware structure @@ -1600,6 +1759,16 @@ ice_remove_vsi_lkup_fltr(struct ice_hw *hw, u16 vsi_id, } break; case ICE_SW_LKUP_VLAN: + mutex_lock(&sw->vlan_list_lock); + status = ice_add_to_vsi_fltr_list(hw, vsi_id, + &sw->vlan_list_head, + &remove_list_head); + mutex_unlock(&sw->vlan_list_lock); + if (!status) { + ice_remove_vlan(hw, &remove_list_head); + goto free_fltr_list; + } + break; case ICE_SW_LKUP_MAC_VLAN: case ICE_SW_LKUP_ETHERTYPE: case ICE_SW_LKUP_ETHERTYPE_MAC: diff --git a/drivers/net/ethernet/intel/ice/ice_switch.h b/drivers/net/ethernet/intel/ice/ice_switch.h index 1b312e7ab2b5..80b4e1951b32 100644 --- a/drivers/net/ethernet/intel/ice/ice_switch.h +++ b/drivers/net/ethernet/intel/ice/ice_switch.h @@ -8,6 +8,7 @@ #define ICE_SW_CFG_MAX_BUF_LEN 2048 #define ICE_DFLT_VSI_INVAL 0xff +#define ICE_VSI_INVAL_ID 0xffff /* VSI context structure for add/get/update/free operations */ struct ice_vsi_ctx { @@ -152,4 +153,7 @@ enum ice_status ice_get_initial_sw_cfg(struct ice_hw *hw); enum ice_status ice_add_mac(struct ice_hw *hw, struct list_head *m_lst); enum ice_status ice_remove_mac(struct ice_hw *hw, struct list_head *m_lst); void ice_remove_vsi_fltr(struct ice_hw *hw, u16 vsi_id); +enum ice_status ice_add_vlan(struct ice_hw *hw, struct list_head *m_list); +enum ice_status ice_remove_vlan(struct ice_hw *hw, struct list_head *v_list); + #endif /* _ICE_SWITCH_H_ */ diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c index 1ccf8e69b85a..6481e3d86374 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx.c +++ b/drivers/net/ethernet/intel/ice/ice_txrx.c @@ -796,6 +796,134 @@ static bool ice_is_non_eop(struct ice_ring *rx_ring, return true; } +/** + * ice_ptype_to_htype - get a hash type + * @ptype: the ptype value from the descriptor + * + * Returns a hash type to be used by skb_set_hash + */ +static enum pkt_hash_types ice_ptype_to_htype(u8 __always_unused ptype) +{ + return PKT_HASH_TYPE_NONE; +} + +/** + * ice_rx_hash - set the hash value in the skb + * @rx_ring: descriptor ring + * @rx_desc: specific descriptor + * @skb: pointer to current skb + * @rx_ptype: the ptype value from the descriptor + */ +static void +ice_rx_hash(struct ice_ring *rx_ring, union ice_32b_rx_flex_desc *rx_desc, + struct sk_buff *skb, u8 rx_ptype) +{ + struct ice_32b_rx_flex_desc_nic *nic_mdid; + u32 hash; + + if (!(rx_ring->netdev->features & NETIF_F_RXHASH)) + return; + + if (rx_desc->wb.rxdid != ICE_RXDID_FLEX_NIC) + return; + + nic_mdid = (struct ice_32b_rx_flex_desc_nic *)rx_desc; + hash = le32_to_cpu(nic_mdid->rss_hash); + skb_set_hash(skb, hash, ice_ptype_to_htype(rx_ptype)); +} + +/** + * ice_rx_csum - Indicate in skb if checksum is good + * @vsi: the VSI we care about + * @skb: skb currently being received and modified + * @rx_desc: the receive descriptor + * @ptype: the packet type decoded by hardware + * + * skb->protocol must be set before this function is called + */ +static void ice_rx_csum(struct ice_vsi *vsi, struct sk_buff *skb, + union ice_32b_rx_flex_desc *rx_desc, u8 ptype) +{ + struct ice_rx_ptype_decoded decoded; + u32 rx_error, rx_status; + bool ipv4, ipv6; + + rx_status = le16_to_cpu(rx_desc->wb.status_error0); + rx_error = rx_status; + + decoded = ice_decode_rx_desc_ptype(ptype); + + /* Start with CHECKSUM_NONE and by default csum_level = 0 */ + skb->ip_summed = CHECKSUM_NONE; + skb_checksum_none_assert(skb); + + /* check if Rx checksum is enabled */ + if (!(vsi->netdev->features & NETIF_F_RXCSUM)) + return; + + /* check if HW has decoded the packet and checksum */ + if (!(rx_status & BIT(ICE_RX_FLEX_DESC_STATUS0_L3L4P_S))) + return; + + if (!(decoded.known && decoded.outer_ip)) + return; + + ipv4 = (decoded.outer_ip == ICE_RX_PTYPE_OUTER_IP) && + (decoded.outer_ip_ver == ICE_RX_PTYPE_OUTER_IPV4); + ipv6 = (decoded.outer_ip == ICE_RX_PTYPE_OUTER_IP) && + (decoded.outer_ip_ver == ICE_RX_PTYPE_OUTER_IPV6); + + if (ipv4 && (rx_error & (BIT(ICE_RX_FLEX_DESC_STATUS0_XSUM_IPE_S) | + BIT(ICE_RX_FLEX_DESC_STATUS0_XSUM_EIPE_S)))) + goto checksum_fail; + else if (ipv6 && (rx_status & + (BIT(ICE_RX_FLEX_DESC_STATUS0_IPV6EXADD_S)))) + goto checksum_fail; + + /* check for L4 errors and handle packets that were not able to be + * checksummed due to arrival speed + */ + if (rx_error & BIT(ICE_RX_FLEX_DESC_STATUS0_XSUM_L4E_S)) + goto checksum_fail; + + /* Only report checksum unnecessary for TCP, UDP, or SCTP */ + switch (decoded.inner_prot) { + case ICE_RX_PTYPE_INNER_PROT_TCP: + case ICE_RX_PTYPE_INNER_PROT_UDP: + case ICE_RX_PTYPE_INNER_PROT_SCTP: + skb->ip_summed = CHECKSUM_UNNECESSARY; + default: + break; + } + return; + +checksum_fail: + vsi->back->hw_csum_rx_error++; +} + +/** + * ice_process_skb_fields - Populate skb header fields from Rx descriptor + * @rx_ring: rx descriptor ring packet is being transacted on + * @rx_desc: pointer to the EOP Rx descriptor + * @skb: pointer to current skb being populated + * @ptype: the packet type decoded by hardware + * + * This function checks the ring, descriptor, and packet information in + * order to populate the hash, checksum, VLAN, protocol, and + * other fields within the skb. + */ +static void ice_process_skb_fields(struct ice_ring *rx_ring, + union ice_32b_rx_flex_desc *rx_desc, + struct sk_buff *skb, u8 ptype) +{ + ice_rx_hash(rx_ring, rx_desc, skb, ptype); + + /* modifies the skb - consumes the enet header */ + skb->protocol = eth_type_trans(skb, rx_ring->netdev); + + ice_rx_csum(rx_ring->vsi, skb, rx_desc, ptype); +} + /** * ice_receive_skb - Send a completed packet up the stack * @rx_ring: rx ring in play @@ -839,6 +967,7 @@ static int ice_clean_rx_irq(struct ice_ring *rx_ring, int budget) struct sk_buff *skb; u16 stat_err_bits; u16 vlan_tag = 0; + u8 rx_ptype; /* return some buffers to hardware, one at a time is too slow */ if (cleaned_count >= ICE_RX_BUF_WRITE) { @@ -882,6 +1011,9 @@ static int ice_clean_rx_irq(struct ice_ring *rx_ring, int budget) continue; } + rx_ptype = le16_to_cpu(rx_desc->wb.ptype_flex_flags0) & + ICE_RX_FLEX_DESC_PTYPE_M; + stat_err_bits = BIT(ICE_RX_FLEX_DESC_STATUS0_L2TAG1P_S); if (ice_test_staterr(rx_desc, stat_err_bits)) vlan_tag = le16_to_cpu(rx_desc->wb.l2tag1); @@ -897,6 +1029,9 @@ static int ice_clean_rx_irq(struct ice_ring *rx_ring, int budget) /* probably a little skewed due to removing CRC */ total_rx_bytes += skb->len; + /* populate checksum, VLAN, and protocol */ + ice_process_skb_fields(rx_ring, rx_desc, skb, rx_ptype); + /* send completed skb up the stack */ ice_receive_skb(rx_ring, skb, vlan_tag); @@ -1026,14 +1161,17 @@ static int ice_maybe_stop_tx(struct ice_ring *tx_ring, unsigned int size) * ice_tx_map - Build the Tx descriptor * @tx_ring: ring to send buffer on * @first: first buffer info buffer to use + * @off: pointer to struct that holds offload parameters * * This function loops over the skb data pointed to by *first * and gets a physical address for each memory location and programs * it and the length into the transmit descriptor. */ -static void ice_tx_map(struct ice_ring *tx_ring, struct ice_tx_buf *first) +static void +ice_tx_map(struct ice_ring *tx_ring, struct ice_tx_buf *first, + struct ice_tx_offload_params *off) { - u64 td_offset = 0, td_tag = 0, td_cmd = 0; + u64 td_offset, td_tag, td_cmd; u16 i = tx_ring->next_to_use; struct skb_frag_struct *frag; unsigned int data_len, size; @@ -1042,6 +1180,9 @@ static void ice_tx_map(struct ice_ring *tx_ring, struct ice_tx_buf *first) struct sk_buff *skb; dma_addr_t dma; + td_tag = off->td_l2tag1; + td_cmd = off->td_cmd; + td_offset = off->td_offset; skb = first->skb; data_len = skb->data_len; @@ -1049,6 +1190,12 @@ static void ice_tx_map(struct ice_ring *tx_ring, struct ice_tx_buf *first) tx_desc = ICE_TX_DESC(tx_ring, i); + if (first->tx_flags & ICE_TX_FLAGS_HW_VLAN) { + td_cmd |= (u64)ICE_TX_DESC_CMD_IL2TAG1; + td_tag = (first->tx_flags & ICE_TX_FLAGS_VLAN_M) >> + ICE_TX_FLAGS_VLAN_S; + } + dma = dma_map_single(tx_ring->dev, skb->data, size, DMA_TO_DEVICE); tx_buf = first; @@ -1169,6 +1316,223 @@ static void ice_tx_map(struct ice_ring *tx_ring, struct ice_tx_buf *first) tx_ring->next_to_use = i; } +/** + * ice_tx_csum - Enable Tx checksum offloads + * @first: pointer to the first descriptor + * @off: pointer to struct that holds offload parameters + * + * Returns 0 or error (negative) if checksum offload can't happen, 1 otherwise. + */ +static +int ice_tx_csum(struct ice_tx_buf *first, struct ice_tx_offload_params *off) +{ + u32 l4_len = 0, l3_len = 0, l2_len = 0; + struct sk_buff *skb = first->skb; + union { + struct iphdr *v4; + struct ipv6hdr *v6; + unsigned char *hdr; + } ip; + union { + struct tcphdr *tcp; + unsigned char *hdr; + } l4; + __be16 frag_off, protocol; + unsigned char *exthdr; + u32 offset, cmd = 0; + u8 l4_proto = 0; + + if (skb->ip_summed != CHECKSUM_PARTIAL) + return 0; + + ip.hdr = skb_network_header(skb); + l4.hdr = skb_transport_header(skb); + + /* compute outer L2 header size */ + l2_len = ip.hdr - skb->data; + offset = (l2_len / 2) << ICE_TX_DESC_LEN_MACLEN_S; + + if (skb->encapsulation) + return -1; + + /* Enable IP checksum offloads */ + protocol = vlan_get_protocol(skb); + if (protocol == htons(ETH_P_IP)) { + l4_proto = ip.v4->protocol; + /* the stack computes the IP header already, the only time we + * need the hardware to recompute it is in the case of TSO. + */ + if (first->tx_flags & ICE_TX_FLAGS_TSO) + cmd |= ICE_TX_DESC_CMD_IIPT_IPV4_CSUM; + else + cmd |= ICE_TX_DESC_CMD_IIPT_IPV4; + + } else if (protocol == htons(ETH_P_IPV6)) { + cmd |= ICE_TX_DESC_CMD_IIPT_IPV6; + exthdr = ip.hdr + sizeof(*ip.v6); + l4_proto = ip.v6->nexthdr; + if (l4.hdr != exthdr) + ipv6_skip_exthdr(skb, exthdr - skb->data, &l4_proto, + &frag_off); + } else { + return -1; + } + + /* compute inner L3 header size */ + l3_len = l4.hdr - ip.hdr; + offset |= (l3_len / 4) << ICE_TX_DESC_LEN_IPLEN_S; + + /* Enable L4 checksum offloads */ + switch (l4_proto) { + case IPPROTO_TCP: + /* enable checksum offloads */ + cmd |= ICE_TX_DESC_CMD_L4T_EOFT_TCP; + l4_len = l4.tcp->doff; + offset |= l4_len << ICE_TX_DESC_LEN_L4_LEN_S; + break; + case IPPROTO_UDP: + /* enable UDP checksum offload */ + cmd |= ICE_TX_DESC_CMD_L4T_EOFT_UDP; + l4_len = (sizeof(struct udphdr) >> 2); + offset |= l4_len << ICE_TX_DESC_LEN_L4_LEN_S; + break; + case IPPROTO_SCTP: + default: + if (first->tx_flags & ICE_TX_FLAGS_TSO) + return -1; + skb_checksum_help(skb); + return 0; + } + + off->td_cmd |= cmd; + off->td_offset |= offset; + return 1; +} + +/** + * ice_tx_prepare_vlan_flags - prepare generic TX VLAN tagging flags for HW + * @tx_ring: ring to send buffer on + * @first: pointer to struct ice_tx_buf + * + * Checks the skb and set up correspondingly several generic transmit flags + * related to VLAN tagging for the HW, such as VLAN, DCB, etc. + * + * Returns error code indicate the frame should be dropped upon error and the + * otherwise returns 0 to indicate the flags has been set properly. + */ +static int +ice_tx_prepare_vlan_flags(struct ice_ring *tx_ring, struct ice_tx_buf *first) +{ + struct sk_buff *skb = first->skb; + __be16 protocol = skb->protocol; + + if (protocol == htons(ETH_P_8021Q) && + !(tx_ring->netdev->features & NETIF_F_HW_VLAN_CTAG_TX)) { + /* when HW VLAN acceleration is turned off by the user the + * stack sets the protocol to 8021q so that the driver + * can take any steps required to support the SW only + * VLAN handling. In our case the driver doesn't need + * to take any further steps so just set the protocol + * to the encapsulated ethertype. + */ + skb->protocol = vlan_get_protocol(skb); + goto out; + } + + /* if we have a HW VLAN tag being added, default to the HW one */ + if (skb_vlan_tag_present(skb)) { + first->tx_flags |= skb_vlan_tag_get(skb) << ICE_TX_FLAGS_VLAN_S; + first->tx_flags |= ICE_TX_FLAGS_HW_VLAN; + } else if (protocol == htons(ETH_P_8021Q)) { + struct vlan_hdr *vhdr, _vhdr; + + /* for SW VLAN, check the next protocol and store the tag */ + vhdr = (struct vlan_hdr *)skb_header_pointer(skb, ETH_HLEN, + sizeof(_vhdr), + &_vhdr); + if (!vhdr) + return -EINVAL; + + first->tx_flags |= ntohs(vhdr->h_vlan_TCI) << + ICE_TX_FLAGS_VLAN_S; + first->tx_flags |= ICE_TX_FLAGS_SW_VLAN; + } + +out: + return 0; +} + +/** + * ice_tso - computes mss and TSO length to prepare for TSO + * @first: pointer to struct ice_tx_buf + * @off: pointer to struct that holds offload parameters + * + * Returns 0 or error (negative) if TSO can't happen, 1 otherwise. + */ +static +int ice_tso(struct ice_tx_buf *first, struct ice_tx_offload_params *off) +{ + struct sk_buff *skb = first->skb; + union { + struct iphdr *v4; + struct ipv6hdr *v6; + unsigned char *hdr; + } ip; + union { + struct tcphdr *tcp; + unsigned char *hdr; + } l4; + u64 cd_mss, cd_tso_len; + u32 paylen, l4_start; + int err; + + if (skb->ip_summed != CHECKSUM_PARTIAL) + return 0; + + if (!skb_is_gso(skb)) + return 0; + + err = skb_cow_head(skb, 0); + if (err < 0) + return err; + + ip.hdr = skb_network_header(skb); + l4.hdr = skb_transport_header(skb); + + /* initialize outer IP header fields */ + if (ip.v4->version == 4) { + ip.v4->tot_len = 0; + ip.v4->check = 0; + } else { + ip.v6->payload_len = 0; + } + + /* determine offset of transport header */ + l4_start = l4.hdr - skb->data; + + /* remove payload length from checksum */ + paylen = skb->len - l4_start; + csum_replace_by_diff(&l4.tcp->check, (__force __wsum)htonl(paylen)); + + /* compute length of segmentation header */ + off->header_len = (l4.tcp->doff * 4) + l4_start; + + /* update gso_segs and bytecount */ + first->gso_segs = skb_shinfo(skb)->gso_segs; + first->bytecount = (first->gso_segs - 1) * off->header_len; + + cd_tso_len = skb->len - off->header_len; + cd_mss = skb_shinfo(skb)->gso_size; + + /* record cdesc_qw1 with TSO parameters */ + off->cd_qw1 |= ICE_TX_DESC_DTYPE_CTX | + (ICE_TX_CTX_DESC_TSO << ICE_TXD_CTX_QW1_CMD_S) | + (cd_tso_len << ICE_TXD_CTX_QW1_TSO_LEN_S) | + (cd_mss << ICE_TXD_CTX_QW1_MSS_S); + first->tx_flags |= ICE_TX_FLAGS_TSO; + return 1; +} + /** * ice_txd_use_count - estimate the number of descriptors needed for Tx * @size: transmit request size in bytes @@ -1322,8 +1686,10 @@ static bool ice_chk_linearize(struct sk_buff *skb, unsigned int count) static netdev_tx_t ice_xmit_frame_ring(struct sk_buff *skb, struct ice_ring *tx_ring) { + struct ice_tx_offload_params offload = { 0 }; struct ice_tx_buf *first; unsigned int count; + int tso, csum; count = ice_xmit_desc_count(skb); if (ice_chk_linearize(skb, count)) { @@ -1344,13 +1710,46 @@ ice_xmit_frame_ring(struct sk_buff *skb, struct ice_ring *tx_ring) return NETDEV_TX_BUSY; } + offload.tx_ring = tx_ring; + /* record the location of the first descriptor for this packet */ first = &tx_ring->tx_buf[tx_ring->next_to_use]; first->skb = skb; first->bytecount = max_t(unsigned int, skb->len, ETH_ZLEN); first->gso_segs = 1; + first->tx_flags = 0; - ice_tx_map(tx_ring, first); + /* prepare the VLAN tagging flags for Tx */ + if (ice_tx_prepare_vlan_flags(tx_ring, first)) + goto out_drop; + + /* set up TSO offload */ + tso = ice_tso(first, &offload); + if (tso < 0) + goto out_drop; + + /* always set up Tx checksum offload */ + csum = ice_tx_csum(first, &offload); + if (csum < 0) + goto out_drop; + + if (tso || offload.cd_tunnel_params) { + struct ice_tx_ctx_desc *cdesc; + int i = tx_ring->next_to_use; + + /* grab the next descriptor */ + cdesc = ICE_TX_CTX_DESC(tx_ring, i); + i++; + tx_ring->next_to_use = (i < tx_ring->count) ? i : 0; + + /* setup context descriptor */ + cdesc->tunneling_params = cpu_to_le32(offload.cd_tunnel_params); + cdesc->l2tag2 = cpu_to_le16(offload.cd_l2tag2); + cdesc->rsvd = cpu_to_le16(0); + cdesc->qw1 = cpu_to_le64(offload.cd_qw1); + } + + ice_tx_map(tx_ring, first, &offload); return NETDEV_TX_OK; out_drop: diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.h b/drivers/net/ethernet/intel/ice/ice_txrx.h index 8fa4450514b4..567067b650c4 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx.h +++ b/drivers/net/ethernet/intel/ice/ice_txrx.h @@ -28,6 +28,12 @@ ((((R)->next_to_clean > (R)->next_to_use) ? 0 : (R)->count) + \ (R)->next_to_clean - (R)->next_to_use - 1) +#define ICE_TX_FLAGS_TSO BIT(0) +#define ICE_TX_FLAGS_HW_VLAN BIT(1) +#define ICE_TX_FLAGS_SW_VLAN BIT(2) +#define ICE_TX_FLAGS_VLAN_M 0xffff0000 +#define ICE_TX_FLAGS_VLAN_S 16 + struct ice_tx_buf { struct ice_tx_desc *next_to_watch; struct sk_buff *skb; @@ -38,6 +44,17 @@ struct ice_tx_buf { DEFINE_DMA_UNMAP_LEN(len); }; +struct ice_tx_offload_params { + u8 header_len; + u32 td_cmd; + u32 td_offset; + u32 td_l2tag1; + u16 cd_l2tag2; + u32 cd_tunnel_params; + u64 cd_qw1; + struct ice_ring *tx_ring; +}; + struct ice_rx_buf { struct sk_buff *skb; dma_addr_t dma; From fcea6f3da546b93050f3534aadea7bd96c1d7349 Mon Sep 17 00:00:00 2001 From: Anirudh Venkataramanan Date: Tue, 20 Mar 2018 07:58:16 -0700 Subject: [PATCH 12/15] ice: Add stats and ethtool support This patch implements a watchdog task to get packet statistics from the device. This patch also adds support for the following ethtool operations: ethtool devname ethtool -s devname [msglvl N] [msglevel type on|off] ethtool -g|--show-ring devname ethtool -G|--set-ring devname [rx N] [tx N] ethtool -i|--driver devname ethtool -d|--register-dump devname [raw on|off] [hex on|off] [file name] ethtool -k|--show-features|--show-offload devname ethtool -K|--features|--offload devname feature on|off ethtool -P|--show-permaddr devname ethtool -S|--statistics devname ethtool -a|--show-pause devname ethtool -A|--pause devname [autoneg on|off] [rx on|off] [tx on|off] ethtool -r|--negotiate devname CC: Andrew Lunn CC: Jakub Kicinski CC: Stephen Hemminger Signed-off-by: Anirudh Venkataramanan Acked-by: Stephen Hemminger Tested-by: Tony Brelinski Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/ice/Makefile | 3 +- drivers/net/ethernet/intel/ice/ice.h | 28 +- .../net/ethernet/intel/ice/ice_adminq_cmd.h | 43 + drivers/net/ethernet/intel/ice/ice_common.c | 195 ++++ drivers/net/ethernet/intel/ice/ice_common.h | 5 + drivers/net/ethernet/intel/ice/ice_ethtool.c | 940 ++++++++++++++++++ .../net/ethernet/intel/ice/ice_hw_autogen.h | 80 ++ drivers/net/ethernet/intel/ice/ice_main.c | 469 ++++++++- drivers/net/ethernet/intel/ice/ice_type.h | 70 ++ 9 files changed, 1828 insertions(+), 5 deletions(-) create mode 100644 drivers/net/ethernet/intel/ice/ice_ethtool.c diff --git a/drivers/net/ethernet/intel/ice/Makefile b/drivers/net/ethernet/intel/ice/Makefile index de82fc875775..4058673fd853 100644 --- a/drivers/net/ethernet/intel/ice/Makefile +++ b/drivers/net/ethernet/intel/ice/Makefile @@ -13,4 +13,5 @@ ice-y := ice_main.o \ ice_nvm.o \ ice_switch.o \ ice_sched.o \ - ice_txrx.o + ice_txrx.o \ + ice_ethtool.o diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h index 51cc955c7220..66d0976afa46 100644 --- a/drivers/net/ethernet/intel/ice/ice.h +++ b/drivers/net/ethernet/intel/ice/ice.h @@ -13,12 +13,14 @@ #include #include #include +#include #include #include #include #include #include #include +#include #include #include #include @@ -34,10 +36,14 @@ #include "ice_common.h" #include "ice_sched.h" +extern const char ice_drv_ver[]; #define ICE_BAR0 0 #define ICE_DFLT_NUM_DESC 128 +#define ICE_MIN_NUM_DESC 8 +#define ICE_MAX_NUM_DESC 8160 #define ICE_REQ_DESC_MULTIPLE 32 #define ICE_INT_NAME_STR_LEN (IFNAMSIZ + 16) +#define ICE_ETHTOOL_FWVER_LEN 32 #define ICE_AQ_LEN 64 #define ICE_MIN_MSIX 2 #define ICE_NO_VSI 0xffff @@ -56,6 +62,8 @@ #define ICE_RES_MISC_VEC_ID (ICE_RES_VALID_BIT - 1) #define ICE_INVAL_Q_INDEX 0xffff +#define ICE_VSIQF_HKEY_ARRAY_SIZE ((VSIQF_HKEY_MAX_INDEX + 1) * 4) + #define ICE_DFLT_NETIF_M (NETIF_MSG_DRV | NETIF_MSG_PROBE | NETIF_MSG_LINK) #define ICE_MAX_MTU (ICE_AQ_SET_MAC_FRAME_SIZE_MAX - \ @@ -102,6 +110,7 @@ enum ice_state { __ICE_DOWN, __ICE_PFR_REQ, /* set by driver and peers */ __ICE_ADMINQ_EVENT_PENDING, + __ICE_CFG_BUSY, __ICE_SERVICE_SCHED, __ICE_STATE_NBITS /* must be last */ }; @@ -118,8 +127,13 @@ struct ice_vsi { irqreturn_t (*irq_handler)(int irq, void *data); + u64 tx_linearize; DECLARE_BITMAP(state, __ICE_STATE_NBITS); unsigned long active_vlans[BITS_TO_LONGS(VLAN_N_VID)]; + u32 tx_restart; + u32 tx_busy; + u32 rx_buf_failed; + u32 rx_page_failed; int num_q_vectors; int base_vector; enum ice_vsi_type type; @@ -141,8 +155,14 @@ struct ice_vsi { struct ice_aqc_vsi_props info; /* VSI properties */ + /* VSI stats */ + struct rtnl_link_stats64 net_stats; + struct ice_eth_stats eth_stats; + struct ice_eth_stats eth_stats_prev; + bool irqs_ready; bool current_isup; /* Sync 'link up' logging */ + bool stat_offsets_loaded; /* queue information */ u8 tx_mapping_mode; /* ICE_MAP_MODE_[CONTIG|SCATTER] */ @@ -205,8 +225,10 @@ struct ice_pf { u16 q_left_rx; /* remaining num rx queues left unclaimed */ u16 next_vsi; /* Next free slot in pf->vsi[] - 0-based! */ u16 num_alloc_vsi; - + struct ice_hw_port_stats stats; + struct ice_hw_port_stats stats_prev; struct ice_hw hw; + bool stat_prev_loaded; /* has previous stats been loaded */ char int_name[ICE_INT_NAME_STR_LEN]; }; @@ -239,8 +261,12 @@ static inline void ice_irq_dynamic_ena(struct ice_hw *hw, struct ice_vsi *vsi, wr32(hw, GLINT_DYN_CTL(vector), val); } +void ice_set_ethtool_ops(struct net_device *netdev); +int ice_up(struct ice_vsi *vsi); +int ice_down(struct ice_vsi *vsi); int ice_set_rss(struct ice_vsi *vsi, u8 *seed, u8 *lut, u16 lut_size); int ice_get_rss(struct ice_vsi *vsi, u8 *seed, u8 *lut, u16 lut_size); void ice_fill_rss_lut(u8 *lut, u16 rss_table_size, u16 rss_size); +void ice_print_link_msg(struct ice_vsi *vsi, bool isup); #endif /* _ICE_H_ */ diff --git a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h index 051427dcfb3c..ae7376f2135b 100644 --- a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h +++ b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h @@ -859,6 +859,45 @@ struct ice_aqc_get_phy_caps_data { } qual_modules[ICE_AQC_QUAL_MOD_COUNT_MAX]; }; +/* Set PHY capabilities (direct 0x0601) + * NOTE: This command must be followed by setup link and restart auto-neg + */ +struct ice_aqc_set_phy_cfg { + u8 lport_num; + u8 reserved[7]; + __le32 addr_high; + __le32 addr_low; +}; + +/* Set PHY config command data structure */ +struct ice_aqc_set_phy_cfg_data { + __le64 phy_type_low; /* Use values from ICE_PHY_TYPE_LOW_* */ + __le64 rsvd0; + u8 caps; +#define ICE_AQ_PHY_ENA_TX_PAUSE_ABILITY BIT(0) +#define ICE_AQ_PHY_ENA_RX_PAUSE_ABILITY BIT(1) +#define ICE_AQ_PHY_ENA_LOW_POWER BIT(2) +#define ICE_AQ_PHY_ENA_LINK BIT(3) +#define ICE_AQ_PHY_ENA_ATOMIC_LINK BIT(5) + u8 low_power_ctrl; + __le16 eee_cap; /* Value from ice_aqc_get_phy_caps */ + __le16 eeer_value; + u8 link_fec_opt; /* Use defines from ice_aqc_get_phy_caps */ + u8 rsvd1; +}; + +/* Restart AN command data structure (direct 0x0605) + * Also used for response, with only the lport_num field present. + */ +struct ice_aqc_restart_an { + u8 lport_num; + u8 reserved; + u8 cmd_flags; +#define ICE_AQC_RESTART_AN_LINK_RESTART BIT(1) +#define ICE_AQC_RESTART_AN_LINK_ENABLE BIT(2) + u8 reserved2[13]; +}; + /* Get link status (indirect 0x0607), also used for Link Status Event */ struct ice_aqc_get_link_status { u8 lport_num; @@ -1137,6 +1176,8 @@ struct ice_aq_desc { struct ice_aqc_clear_pxe clear_pxe; struct ice_aqc_list_caps get_cap; struct ice_aqc_get_phy_caps get_phy; + struct ice_aqc_set_phy_cfg set_phy; + struct ice_aqc_restart_an restart_an; struct ice_aqc_get_sw_cfg get_sw_conf; struct ice_aqc_sw_rules sw_rules; struct ice_aqc_get_topo get_topo; @@ -1222,6 +1263,8 @@ enum ice_adminq_opc { /* PHY commands */ ice_aqc_opc_get_phy_caps = 0x0600, + ice_aqc_opc_set_phy_cfg = 0x0601, + ice_aqc_opc_restart_an = 0x0605, ice_aqc_opc_get_link_status = 0x0607, /* NVM commands */ diff --git a/drivers/net/ethernet/intel/ice/ice_common.c b/drivers/net/ethernet/intel/ice/ice_common.c index b4e834359a95..02d589275db0 100644 --- a/drivers/net/ethernet/intel/ice/ice_common.c +++ b/drivers/net/ethernet/intel/ice/ice_common.c @@ -1261,6 +1261,201 @@ void ice_clear_pxe_mode(struct ice_hw *hw) ice_aq_clear_pxe_mode(hw); } +/** + * ice_aq_set_phy_cfg + * @hw: pointer to the hw struct + * @lport: logical port number + * @cfg: structure with PHY configuration data to be set + * @cd: pointer to command details structure or NULL + * + * Set the various PHY configuration parameters supported on the Port. + * One or more of the Set PHY config parameters may be ignored in an MFP + * mode as the PF may not have the privilege to set some of the PHY Config + * parameters. This status will be indicated by the command response (0x0601). + */ +static enum ice_status +ice_aq_set_phy_cfg(struct ice_hw *hw, u8 lport, + struct ice_aqc_set_phy_cfg_data *cfg, struct ice_sq_cd *cd) +{ + struct ice_aqc_set_phy_cfg *cmd; + struct ice_aq_desc desc; + + if (!cfg) + return ICE_ERR_PARAM; + + cmd = &desc.params.set_phy; + ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_set_phy_cfg); + cmd->lport_num = lport; + + return ice_aq_send_cmd(hw, &desc, cfg, sizeof(*cfg), cd); +} + +/** + * ice_update_link_info - update status of the HW network link + * @pi: port info structure of the interested logical port + */ +static enum ice_status +ice_update_link_info(struct ice_port_info *pi) +{ + struct ice_aqc_get_phy_caps_data *pcaps; + struct ice_phy_info *phy_info; + enum ice_status status; + struct ice_hw *hw; + + if (!pi) + return ICE_ERR_PARAM; + + hw = pi->hw; + + pcaps = devm_kzalloc(ice_hw_to_dev(hw), sizeof(*pcaps), GFP_KERNEL); + if (!pcaps) + return ICE_ERR_NO_MEMORY; + + phy_info = &pi->phy; + status = ice_aq_get_link_info(pi, true, NULL, NULL); + if (status) + goto out; + + if (phy_info->link_info.link_info & ICE_AQ_MEDIA_AVAILABLE) { + status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_SW_CFG, + pcaps, NULL); + if (status) + goto out; + + memcpy(phy_info->link_info.module_type, &pcaps->module_type, + sizeof(phy_info->link_info.module_type)); + } +out: + devm_kfree(ice_hw_to_dev(hw), pcaps); + return status; +} + +/** + * ice_set_fc + * @pi: port information structure + * @aq_failures: pointer to status code, specific to ice_set_fc routine + * @atomic_restart: enable automatic link update + * + * Set the requested flow control mode. + */ +enum ice_status +ice_set_fc(struct ice_port_info *pi, u8 *aq_failures, bool atomic_restart) +{ + struct ice_aqc_set_phy_cfg_data cfg = { 0 }; + struct ice_aqc_get_phy_caps_data *pcaps; + enum ice_status status; + u8 pause_mask = 0x0; + struct ice_hw *hw; + + if (!pi) + return ICE_ERR_PARAM; + hw = pi->hw; + *aq_failures = ICE_SET_FC_AQ_FAIL_NONE; + + switch (pi->fc.req_mode) { + case ICE_FC_FULL: + pause_mask |= ICE_AQC_PHY_EN_TX_LINK_PAUSE; + pause_mask |= ICE_AQC_PHY_EN_RX_LINK_PAUSE; + break; + case ICE_FC_RX_PAUSE: + pause_mask |= ICE_AQC_PHY_EN_RX_LINK_PAUSE; + break; + case ICE_FC_TX_PAUSE: + pause_mask |= ICE_AQC_PHY_EN_TX_LINK_PAUSE; + break; + default: + break; + } + + pcaps = devm_kzalloc(ice_hw_to_dev(hw), sizeof(*pcaps), GFP_KERNEL); + if (!pcaps) + return ICE_ERR_NO_MEMORY; + + /* Get the current phy config */ + status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_SW_CFG, pcaps, + NULL); + if (status) { + *aq_failures = ICE_SET_FC_AQ_FAIL_GET; + goto out; + } + + /* clear the old pause settings */ + cfg.caps = pcaps->caps & ~(ICE_AQC_PHY_EN_TX_LINK_PAUSE | + ICE_AQC_PHY_EN_RX_LINK_PAUSE); + /* set the new capabilities */ + cfg.caps |= pause_mask; + /* If the capabilities have changed, then set the new config */ + if (cfg.caps != pcaps->caps) { + int retry_count, retry_max = 10; + + /* Auto restart link so settings take effect */ + if (atomic_restart) + cfg.caps |= ICE_AQ_PHY_ENA_ATOMIC_LINK; + /* Copy over all the old settings */ + cfg.phy_type_low = pcaps->phy_type_low; + cfg.low_power_ctrl = pcaps->low_power_ctrl; + cfg.eee_cap = pcaps->eee_cap; + cfg.eeer_value = pcaps->eeer_value; + cfg.link_fec_opt = pcaps->link_fec_options; + + status = ice_aq_set_phy_cfg(hw, pi->lport, &cfg, NULL); + if (status) { + *aq_failures = ICE_SET_FC_AQ_FAIL_SET; + goto out; + } + + /* Update the link info + * It sometimes takes a really long time for link to + * come back from the atomic reset. Thus, we wait a + * little bit. + */ + for (retry_count = 0; retry_count < retry_max; retry_count++) { + status = ice_update_link_info(pi); + + if (!status) + break; + + mdelay(100); + } + + if (status) + *aq_failures = ICE_SET_FC_AQ_FAIL_UPDATE; + } + +out: + devm_kfree(ice_hw_to_dev(hw), pcaps); + return status; +} + +/** + * ice_aq_set_link_restart_an + * @pi: pointer to the port information structure + * @ena_link: if true: enable link, if false: disable link + * @cd: pointer to command details structure or NULL + * + * Sets up the link and restarts the Auto-Negotiation over the link. + */ +enum ice_status +ice_aq_set_link_restart_an(struct ice_port_info *pi, bool ena_link, + struct ice_sq_cd *cd) +{ + struct ice_aqc_restart_an *cmd; + struct ice_aq_desc desc; + + cmd = &desc.params.restart_an; + + ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_restart_an); + + cmd->cmd_flags = ICE_AQC_RESTART_AN_LINK_RESTART; + cmd->lport_num = pi->lport; + if (ena_link) + cmd->cmd_flags |= ICE_AQC_RESTART_AN_LINK_ENABLE; + else + cmd->cmd_flags &= ~ICE_AQC_RESTART_AN_LINK_ENABLE; + + return ice_aq_send_cmd(pi->hw, &desc, NULL, 0, cd); +} + /** * __ice_aq_get_set_rss_lut * @hw: pointer to the hardware structure diff --git a/drivers/net/ethernet/intel/ice/ice_common.h b/drivers/net/ethernet/intel/ice/ice_common.h index 38719ba01d9b..c4213fbf9579 100644 --- a/drivers/net/ethernet/intel/ice/ice_common.h +++ b/drivers/net/ethernet/intel/ice/ice_common.h @@ -58,6 +58,11 @@ ice_aq_send_cmd(struct ice_hw *hw, struct ice_aq_desc *desc, enum ice_status ice_aq_get_fw_ver(struct ice_hw *hw, struct ice_sq_cd *cd); enum ice_status ice_clear_pf_cfg(struct ice_hw *hw); enum ice_status +ice_set_fc(struct ice_port_info *pi, u8 *aq_failures, bool atomic_restart); +enum ice_status +ice_aq_set_link_restart_an(struct ice_port_info *pi, bool ena_link, + struct ice_sq_cd *cd); +enum ice_status ice_aq_get_link_info(struct ice_port_info *pi, bool ena_lse, struct ice_link_status *link, struct ice_sq_cd *cd); enum ice_status diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.c b/drivers/net/ethernet/intel/ice/ice_ethtool.c new file mode 100644 index 000000000000..186764a5c263 --- /dev/null +++ b/drivers/net/ethernet/intel/ice/ice_ethtool.c @@ -0,0 +1,940 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2018, Intel Corporation. */ + +/* ethtool support for ice */ + +#include "ice.h" + +struct ice_stats { + char stat_string[ETH_GSTRING_LEN]; + int sizeof_stat; + int stat_offset; +}; + +#define ICE_STAT(_type, _name, _stat) { \ + .stat_string = _name, \ + .sizeof_stat = FIELD_SIZEOF(_type, _stat), \ + .stat_offset = offsetof(_type, _stat) \ +} + +#define ICE_VSI_STAT(_name, _stat) \ + ICE_STAT(struct ice_vsi, _name, _stat) +#define ICE_PF_STAT(_name, _stat) \ + ICE_STAT(struct ice_pf, _name, _stat) + +static int ice_q_stats_len(struct net_device *netdev) +{ + struct ice_netdev_priv *np = netdev_priv(netdev); + + return ((np->vsi->num_txq + np->vsi->num_rxq) * + (sizeof(struct ice_q_stats) / sizeof(u64))); +} + +#define ICE_PF_STATS_LEN ARRAY_SIZE(ice_gstrings_pf_stats) +#define ICE_VSI_STATS_LEN ARRAY_SIZE(ice_gstrings_vsi_stats) + +#define ICE_ALL_STATS_LEN(n) (ICE_PF_STATS_LEN + ICE_VSI_STATS_LEN + \ + ice_q_stats_len(n)) + +static const struct ice_stats ice_gstrings_vsi_stats[] = { + ICE_VSI_STAT("tx_unicast", eth_stats.tx_unicast), + ICE_VSI_STAT("rx_unicast", eth_stats.rx_unicast), + ICE_VSI_STAT("tx_multicast", eth_stats.tx_multicast), + ICE_VSI_STAT("rx_multicast", eth_stats.rx_multicast), + ICE_VSI_STAT("tx_broadcast", eth_stats.tx_broadcast), + ICE_VSI_STAT("rx_broadcast", eth_stats.rx_broadcast), + ICE_VSI_STAT("tx_bytes", eth_stats.tx_bytes), + ICE_VSI_STAT("rx_bytes", eth_stats.rx_bytes), + ICE_VSI_STAT("rx_discards", eth_stats.rx_discards), + ICE_VSI_STAT("tx_errors", eth_stats.tx_errors), + ICE_VSI_STAT("tx_linearize", tx_linearize), + ICE_VSI_STAT("rx_unknown_protocol", eth_stats.rx_unknown_protocol), + ICE_VSI_STAT("rx_alloc_fail", rx_buf_failed), + ICE_VSI_STAT("rx_pg_alloc_fail", rx_page_failed), +}; + +/* These PF_STATs might look like duplicates of some NETDEV_STATs, + * but they aren't. This device is capable of supporting multiple + * VSIs/netdevs on a single PF. The NETDEV_STATs are for individual + * netdevs whereas the PF_STATs are for the physical function that's + * hosting these netdevs. + * + * The PF_STATs are appended to the netdev stats only when ethtool -S + * is queried on the base PF netdev. + */ +static struct ice_stats ice_gstrings_pf_stats[] = { + ICE_PF_STAT("tx_bytes", stats.eth.tx_bytes), + ICE_PF_STAT("rx_bytes", stats.eth.rx_bytes), + ICE_PF_STAT("tx_unicast", stats.eth.tx_unicast), + ICE_PF_STAT("rx_unicast", stats.eth.rx_unicast), + ICE_PF_STAT("tx_multicast", stats.eth.tx_multicast), + ICE_PF_STAT("rx_multicast", stats.eth.rx_multicast), + ICE_PF_STAT("tx_broadcast", stats.eth.tx_broadcast), + ICE_PF_STAT("rx_broadcast", stats.eth.rx_broadcast), + ICE_PF_STAT("tx_errors", stats.eth.tx_errors), + ICE_PF_STAT("tx_size_64", stats.tx_size_64), + ICE_PF_STAT("rx_size_64", stats.rx_size_64), + ICE_PF_STAT("tx_size_127", stats.tx_size_127), + ICE_PF_STAT("rx_size_127", stats.rx_size_127), + ICE_PF_STAT("tx_size_255", stats.tx_size_255), + ICE_PF_STAT("rx_size_255", stats.rx_size_255), + ICE_PF_STAT("tx_size_511", stats.tx_size_511), + ICE_PF_STAT("rx_size_511", stats.rx_size_511), + ICE_PF_STAT("tx_size_1023", stats.tx_size_1023), + ICE_PF_STAT("rx_size_1023", stats.rx_size_1023), + ICE_PF_STAT("tx_size_1522", stats.tx_size_1522), + ICE_PF_STAT("rx_size_1522", stats.rx_size_1522), + ICE_PF_STAT("tx_size_big", stats.tx_size_big), + ICE_PF_STAT("rx_size_big", stats.rx_size_big), + ICE_PF_STAT("link_xon_tx", stats.link_xon_tx), + ICE_PF_STAT("link_xon_rx", stats.link_xon_rx), + ICE_PF_STAT("link_xoff_tx", stats.link_xoff_tx), + ICE_PF_STAT("link_xoff_rx", stats.link_xoff_rx), + ICE_PF_STAT("tx_dropped_link_down", stats.tx_dropped_link_down), + ICE_PF_STAT("rx_undersize", stats.rx_undersize), + ICE_PF_STAT("rx_fragments", stats.rx_fragments), + ICE_PF_STAT("rx_oversize", stats.rx_oversize), + ICE_PF_STAT("rx_jabber", stats.rx_jabber), + ICE_PF_STAT("rx_csum_bad", hw_csum_rx_error), + ICE_PF_STAT("rx_length_errors", stats.rx_len_errors), + ICE_PF_STAT("rx_dropped", stats.eth.rx_discards), + ICE_PF_STAT("rx_crc_errors", stats.crc_errors), + ICE_PF_STAT("illegal_bytes", stats.illegal_bytes), + ICE_PF_STAT("mac_local_faults", stats.mac_local_faults), + ICE_PF_STAT("mac_remote_faults", stats.mac_remote_faults), +}; + +static u32 ice_regs_dump_list[] = { + PFGEN_STATE, + PRTGEN_STATUS, + QRX_CTRL(0), + QINT_TQCTL(0), + QINT_RQCTL(0), + PFINT_OICR_ENA, + QRX_ITR(0), +}; + +/** + * ice_nvm_version_str - format the NVM version strings + * @hw: ptr to the hardware info + */ +static char *ice_nvm_version_str(struct ice_hw *hw) +{ + static char buf[ICE_ETHTOOL_FWVER_LEN]; + u8 ver, patch; + u32 full_ver; + u16 build; + + full_ver = hw->nvm.oem_ver; + ver = (u8)((full_ver & ICE_OEM_VER_MASK) >> ICE_OEM_VER_SHIFT); + build = (u16)((full_ver & ICE_OEM_VER_BUILD_MASK) >> + ICE_OEM_VER_BUILD_SHIFT); + patch = (u8)(full_ver & ICE_OEM_VER_PATCH_MASK); + + snprintf(buf, sizeof(buf), "%x.%02x 0x%x %d.%d.%d", + (hw->nvm.ver & ICE_NVM_VER_HI_MASK) >> ICE_NVM_VER_HI_SHIFT, + (hw->nvm.ver & ICE_NVM_VER_LO_MASK) >> ICE_NVM_VER_LO_SHIFT, + hw->nvm.eetrack, ver, build, patch); + + return buf; +} + +static void +ice_get_drvinfo(struct net_device *netdev, struct ethtool_drvinfo *drvinfo) +{ + struct ice_netdev_priv *np = netdev_priv(netdev); + struct ice_vsi *vsi = np->vsi; + struct ice_pf *pf = vsi->back; + + strlcpy(drvinfo->driver, KBUILD_MODNAME, sizeof(drvinfo->driver)); + strlcpy(drvinfo->version, ice_drv_ver, sizeof(drvinfo->version)); + strlcpy(drvinfo->fw_version, ice_nvm_version_str(&pf->hw), + sizeof(drvinfo->fw_version)); + strlcpy(drvinfo->bus_info, pci_name(pf->pdev), + sizeof(drvinfo->bus_info)); +} + +static int ice_get_regs_len(struct net_device __always_unused *netdev) +{ + return ARRAY_SIZE(ice_regs_dump_list); +} + +static void +ice_get_regs(struct net_device *netdev, struct ethtool_regs *regs, void *p) +{ + struct ice_netdev_priv *np = netdev_priv(netdev); + struct ice_pf *pf = np->vsi->back; + struct ice_hw *hw = &pf->hw; + u32 *regs_buf = (u32 *)p; + int i; + + regs->version = 1; + + for (i = 0; i < ARRAY_SIZE(ice_regs_dump_list) / sizeof(u32); ++i) + regs_buf[i] = rd32(hw, ice_regs_dump_list[i]); +} + +static u32 ice_get_msglevel(struct net_device *netdev) +{ + struct ice_netdev_priv *np = netdev_priv(netdev); + struct ice_pf *pf = np->vsi->back; + +#ifndef CONFIG_DYNAMIC_DEBUG + if (pf->hw.debug_mask) + netdev_info(netdev, "hw debug_mask: 0x%llX\n", + pf->hw.debug_mask); +#endif /* !CONFIG_DYNAMIC_DEBUG */ + + return pf->msg_enable; +} + +static void ice_set_msglevel(struct net_device *netdev, u32 data) +{ + struct ice_netdev_priv *np = netdev_priv(netdev); + struct ice_pf *pf = np->vsi->back; + +#ifndef CONFIG_DYNAMIC_DEBUG + if (ICE_DBG_USER & data) + pf->hw.debug_mask = data; + else + pf->msg_enable = data; +#else + pf->msg_enable = data; +#endif /* !CONFIG_DYNAMIC_DEBUG */ +} + +static void ice_get_strings(struct net_device *netdev, u32 stringset, u8 *data) +{ + struct ice_netdev_priv *np = netdev_priv(netdev); + struct ice_vsi *vsi = np->vsi; + char *p = (char *)data; + unsigned int i; + + switch (stringset) { + case ETH_SS_STATS: + for (i = 0; i < ICE_VSI_STATS_LEN; i++) { + snprintf(p, ETH_GSTRING_LEN, "%s", + ice_gstrings_vsi_stats[i].stat_string); + p += ETH_GSTRING_LEN; + } + + ice_for_each_txq(vsi, i) { + snprintf(p, ETH_GSTRING_LEN, + "tx-queue-%u.tx_packets", i); + p += ETH_GSTRING_LEN; + snprintf(p, ETH_GSTRING_LEN, "tx-queue-%u.tx_bytes", i); + p += ETH_GSTRING_LEN; + } + + ice_for_each_rxq(vsi, i) { + snprintf(p, ETH_GSTRING_LEN, + "rx-queue-%u.rx_packets", i); + p += ETH_GSTRING_LEN; + snprintf(p, ETH_GSTRING_LEN, "rx-queue-%u.rx_bytes", i); + p += ETH_GSTRING_LEN; + } + + if (vsi->type != ICE_VSI_PF) + return; + + for (i = 0; i < ICE_PF_STATS_LEN; i++) { + snprintf(p, ETH_GSTRING_LEN, "port.%s", + ice_gstrings_pf_stats[i].stat_string); + p += ETH_GSTRING_LEN; + } + + break; + default: + break; + } +} + +static int ice_get_sset_count(struct net_device *netdev, int sset) +{ + switch (sset) { + case ETH_SS_STATS: + return ICE_ALL_STATS_LEN(netdev); + default: + return -EOPNOTSUPP; + } +} + +static void +ice_get_ethtool_stats(struct net_device *netdev, + struct ethtool_stats __always_unused *stats, u64 *data) +{ + struct ice_netdev_priv *np = netdev_priv(netdev); + struct ice_vsi *vsi = np->vsi; + struct ice_pf *pf = vsi->back; + struct ice_ring *ring; + unsigned int j = 0; + int i = 0; + char *p; + + for (j = 0; j < ICE_VSI_STATS_LEN; j++) { + p = (char *)vsi + ice_gstrings_vsi_stats[j].stat_offset; + data[i++] = (ice_gstrings_vsi_stats[j].sizeof_stat == + sizeof(u64)) ? *(u64 *)p : *(u32 *)p; + } + + /* populate per queue stats */ + rcu_read_lock(); + + ice_for_each_txq(vsi, j) { + ring = READ_ONCE(vsi->tx_rings[j]); + if (!ring) + continue; + data[i++] = ring->stats.pkts; + data[i++] = ring->stats.bytes; + } + + ice_for_each_rxq(vsi, j) { + ring = READ_ONCE(vsi->rx_rings[j]); + data[i++] = ring->stats.pkts; + data[i++] = ring->stats.bytes; + } + + rcu_read_unlock(); + + if (vsi->type != ICE_VSI_PF) + return; + + for (j = 0; j < ICE_PF_STATS_LEN; j++) { + p = (char *)pf + ice_gstrings_pf_stats[j].stat_offset; + data[i++] = (ice_gstrings_pf_stats[j].sizeof_stat == + sizeof(u64)) ? *(u64 *)p : *(u32 *)p; + } +} + +static int +ice_get_link_ksettings(struct net_device *netdev, + struct ethtool_link_ksettings *ks) +{ + struct ice_netdev_priv *np = netdev_priv(netdev); + struct ice_link_status *hw_link_info; + struct ice_vsi *vsi = np->vsi; + bool link_up; + + hw_link_info = &vsi->port_info->phy.link_info; + link_up = hw_link_info->link_info & ICE_AQ_LINK_UP; + + ethtool_link_ksettings_add_link_mode(ks, supported, + 10000baseT_Full); + ethtool_link_ksettings_add_link_mode(ks, advertising, + 10000baseT_Full); + + /* set speed and duplex */ + if (link_up) { + switch (hw_link_info->link_speed) { + case ICE_AQ_LINK_SPEED_100MB: + ks->base.speed = SPEED_100; + break; + case ICE_AQ_LINK_SPEED_2500MB: + ks->base.speed = SPEED_2500; + break; + case ICE_AQ_LINK_SPEED_5GB: + ks->base.speed = SPEED_5000; + break; + case ICE_AQ_LINK_SPEED_10GB: + ks->base.speed = SPEED_10000; + break; + case ICE_AQ_LINK_SPEED_25GB: + ks->base.speed = SPEED_25000; + break; + case ICE_AQ_LINK_SPEED_40GB: + ks->base.speed = SPEED_40000; + break; + default: + ks->base.speed = SPEED_UNKNOWN; + break; + } + + ks->base.duplex = DUPLEX_FULL; + } else { + ks->base.speed = SPEED_UNKNOWN; + ks->base.duplex = DUPLEX_UNKNOWN; + } + + /* set autoneg settings */ + ks->base.autoneg = ((hw_link_info->an_info & ICE_AQ_AN_COMPLETED) ? + AUTONEG_ENABLE : AUTONEG_DISABLE); + + /* set media type settings */ + switch (vsi->port_info->phy.media_type) { + case ICE_MEDIA_FIBER: + ethtool_link_ksettings_add_link_mode(ks, supported, FIBRE); + ks->base.port = PORT_FIBRE; + break; + case ICE_MEDIA_BASET: + ethtool_link_ksettings_add_link_mode(ks, supported, TP); + ethtool_link_ksettings_add_link_mode(ks, advertising, TP); + ks->base.port = PORT_TP; + break; + case ICE_MEDIA_BACKPLANE: + ethtool_link_ksettings_add_link_mode(ks, supported, Autoneg); + ethtool_link_ksettings_add_link_mode(ks, supported, Backplane); + ethtool_link_ksettings_add_link_mode(ks, advertising, Autoneg); + ethtool_link_ksettings_add_link_mode(ks, advertising, + Backplane); + ks->base.port = PORT_NONE; + break; + case ICE_MEDIA_DA: + ethtool_link_ksettings_add_link_mode(ks, supported, FIBRE); + ethtool_link_ksettings_add_link_mode(ks, advertising, FIBRE); + ks->base.port = PORT_DA; + break; + default: + ks->base.port = PORT_OTHER; + break; + } + + /* flow control is symmetric and always supported */ + ethtool_link_ksettings_add_link_mode(ks, supported, Pause); + + switch (vsi->port_info->fc.req_mode) { + case ICE_FC_FULL: + ethtool_link_ksettings_add_link_mode(ks, advertising, Pause); + break; + case ICE_FC_TX_PAUSE: + ethtool_link_ksettings_add_link_mode(ks, advertising, + Asym_Pause); + break; + case ICE_FC_RX_PAUSE: + ethtool_link_ksettings_add_link_mode(ks, advertising, Pause); + ethtool_link_ksettings_add_link_mode(ks, advertising, + Asym_Pause); + break; + case ICE_FC_PFC: + default: + ethtool_link_ksettings_del_link_mode(ks, advertising, Pause); + ethtool_link_ksettings_del_link_mode(ks, advertising, + Asym_Pause); + break; + } + + return 0; +} + +/** + * ice_get_rxnfc - command to get RX flow classification rules + * @netdev: network interface device structure + * @cmd: ethtool rxnfc command + * @rule_locs: buffer to rturn Rx flow classification rules + * + * Returns Success if the command is supported. + */ +static int ice_get_rxnfc(struct net_device *netdev, struct ethtool_rxnfc *cmd, + u32 __always_unused *rule_locs) +{ + struct ice_netdev_priv *np = netdev_priv(netdev); + struct ice_vsi *vsi = np->vsi; + int ret = -EOPNOTSUPP; + + switch (cmd->cmd) { + case ETHTOOL_GRXRINGS: + cmd->data = vsi->rss_size; + ret = 0; + break; + default: + break; + } + + return ret; +} + +static void +ice_get_ringparam(struct net_device *netdev, struct ethtool_ringparam *ring) +{ + struct ice_netdev_priv *np = netdev_priv(netdev); + struct ice_vsi *vsi = np->vsi; + + ring->rx_max_pending = ICE_MAX_NUM_DESC; + ring->tx_max_pending = ICE_MAX_NUM_DESC; + ring->rx_pending = vsi->rx_rings[0]->count; + ring->tx_pending = vsi->tx_rings[0]->count; + ring->rx_mini_pending = ICE_MIN_NUM_DESC; + ring->rx_mini_max_pending = 0; + ring->rx_jumbo_max_pending = 0; + ring->rx_jumbo_pending = 0; +} + +static int +ice_set_ringparam(struct net_device *netdev, struct ethtool_ringparam *ring) +{ + struct ice_ring *tx_rings = NULL, *rx_rings = NULL; + struct ice_netdev_priv *np = netdev_priv(netdev); + struct ice_vsi *vsi = np->vsi; + struct ice_pf *pf = vsi->back; + int i, timeout = 50, err = 0; + u32 new_rx_cnt, new_tx_cnt; + + if (ring->tx_pending > ICE_MAX_NUM_DESC || + ring->tx_pending < ICE_MIN_NUM_DESC || + ring->rx_pending > ICE_MAX_NUM_DESC || + ring->rx_pending < ICE_MIN_NUM_DESC) { + netdev_err(netdev, "Descriptors requested (Tx: %d / Rx: %d) out of range [%d-%d]\n", + ring->tx_pending, ring->rx_pending, + ICE_MIN_NUM_DESC, ICE_MAX_NUM_DESC); + return -EINVAL; + } + + new_tx_cnt = ALIGN(ring->tx_pending, ICE_REQ_DESC_MULTIPLE); + new_rx_cnt = ALIGN(ring->rx_pending, ICE_REQ_DESC_MULTIPLE); + + /* if nothing to do return success */ + if (new_tx_cnt == vsi->tx_rings[0]->count && + new_rx_cnt == vsi->rx_rings[0]->count) { + netdev_dbg(netdev, "Nothing to change, descriptor count is same as requested\n"); + return 0; + } + + while (test_and_set_bit(__ICE_CFG_BUSY, pf->state)) { + timeout--; + if (!timeout) + return -EBUSY; + usleep_range(1000, 2000); + } + + /* set for the next time the netdev is started */ + if (!netif_running(vsi->netdev)) { + for (i = 0; i < vsi->alloc_txq; i++) + vsi->tx_rings[i]->count = new_tx_cnt; + for (i = 0; i < vsi->alloc_rxq; i++) + vsi->rx_rings[i]->count = new_rx_cnt; + netdev_dbg(netdev, "Link is down, descriptor count change happens when link is brought up\n"); + goto done; + } + + if (new_tx_cnt == vsi->tx_rings[0]->count) + goto process_rx; + + /* alloc updated Tx resources */ + netdev_info(netdev, "Changing Tx descriptor count from %d to %d\n", + vsi->tx_rings[0]->count, new_tx_cnt); + + tx_rings = devm_kcalloc(&pf->pdev->dev, vsi->alloc_txq, + sizeof(struct ice_ring), GFP_KERNEL); + if (!tx_rings) { + err = -ENOMEM; + goto done; + } + + for (i = 0; i < vsi->num_txq; i++) { + /* clone ring and setup updated count */ + tx_rings[i] = *vsi->tx_rings[i]; + tx_rings[i].count = new_tx_cnt; + tx_rings[i].desc = NULL; + tx_rings[i].tx_buf = NULL; + err = ice_setup_tx_ring(&tx_rings[i]); + if (err) { + while (i) { + i--; + ice_clean_tx_ring(&tx_rings[i]); + } + devm_kfree(&pf->pdev->dev, tx_rings); + goto done; + } + } + +process_rx: + if (new_rx_cnt == vsi->rx_rings[0]->count) + goto process_link; + + /* alloc updated Rx resources */ + netdev_info(netdev, "Changing Rx descriptor count from %d to %d\n", + vsi->rx_rings[0]->count, new_rx_cnt); + + rx_rings = devm_kcalloc(&pf->pdev->dev, vsi->alloc_rxq, + sizeof(struct ice_ring), GFP_KERNEL); + if (!rx_rings) { + err = -ENOMEM; + goto done; + } + + for (i = 0; i < vsi->num_rxq; i++) { + /* clone ring and setup updated count */ + rx_rings[i] = *vsi->rx_rings[i]; + rx_rings[i].count = new_rx_cnt; + rx_rings[i].desc = NULL; + rx_rings[i].rx_buf = NULL; + /* this is to allow wr32 to have something to write to + * during early allocation of Rx buffers + */ + rx_rings[i].tail = vsi->back->hw.hw_addr + PRTGEN_STATUS; + + err = ice_setup_rx_ring(&rx_rings[i]); + if (err) + goto rx_unwind; + + /* allocate Rx buffers */ + err = ice_alloc_rx_bufs(&rx_rings[i], + ICE_DESC_UNUSED(&rx_rings[i])); +rx_unwind: + if (err) { + while (i) { + i--; + ice_free_rx_ring(&rx_rings[i]); + } + devm_kfree(&pf->pdev->dev, rx_rings); + err = -ENOMEM; + goto free_tx; + } + } + +process_link: + /* Bring interface down, copy in the new ring info, then restore the + * interface. if VSI is up, bring it down and then back up + */ + if (!test_and_set_bit(__ICE_DOWN, vsi->state)) { + ice_down(vsi); + + if (tx_rings) { + for (i = 0; i < vsi->alloc_txq; i++) { + ice_free_tx_ring(vsi->tx_rings[i]); + *vsi->tx_rings[i] = tx_rings[i]; + } + devm_kfree(&pf->pdev->dev, tx_rings); + } + + if (rx_rings) { + for (i = 0; i < vsi->alloc_rxq; i++) { + ice_free_rx_ring(vsi->rx_rings[i]); + /* copy the real tail offset */ + rx_rings[i].tail = vsi->rx_rings[i]->tail; + /* this is to fake out the allocation routine + * into thinking it has to realloc everything + * but the recycling logic will let us re-use + * the buffers allocated above + */ + rx_rings[i].next_to_use = 0; + rx_rings[i].next_to_clean = 0; + rx_rings[i].next_to_alloc = 0; + *vsi->rx_rings[i] = rx_rings[i]; + } + devm_kfree(&pf->pdev->dev, rx_rings); + } + + ice_up(vsi); + } + goto done; + +free_tx: + /* error cleanup if the Rx allocations failed after getting Tx */ + if (tx_rings) { + for (i = 0; i < vsi->alloc_txq; i++) + ice_free_tx_ring(&tx_rings[i]); + devm_kfree(&pf->pdev->dev, tx_rings); + } + +done: + clear_bit(__ICE_CFG_BUSY, pf->state); + return err; +} + +static int ice_nway_reset(struct net_device *netdev) +{ + /* restart autonegotiation */ + struct ice_netdev_priv *np = netdev_priv(netdev); + struct ice_link_status *hw_link_info; + struct ice_vsi *vsi = np->vsi; + struct ice_port_info *pi; + enum ice_status status; + bool link_up; + + pi = vsi->port_info; + hw_link_info = &pi->phy.link_info; + link_up = hw_link_info->link_info & ICE_AQ_LINK_UP; + + status = ice_aq_set_link_restart_an(pi, link_up, NULL); + if (status) { + netdev_info(netdev, "link restart failed, err %d aq_err %d\n", + status, pi->hw->adminq.sq_last_status); + return -EIO; + } + + return 0; +} + +/** + * ice_get_pauseparam - Get Flow Control status + * @netdev: network interface device structure + * @pause: ethernet pause (flow control) parameters + */ +static void +ice_get_pauseparam(struct net_device *netdev, struct ethtool_pauseparam *pause) +{ + struct ice_netdev_priv *np = netdev_priv(netdev); + struct ice_port_info *pi; + + pi = np->vsi->port_info; + pause->autoneg = + ((pi->phy.link_info.an_info & ICE_AQ_AN_COMPLETED) ? + AUTONEG_ENABLE : AUTONEG_DISABLE); + + if (pi->fc.current_mode == ICE_FC_RX_PAUSE) { + pause->rx_pause = 1; + } else if (pi->fc.current_mode == ICE_FC_TX_PAUSE) { + pause->tx_pause = 1; + } else if (pi->fc.current_mode == ICE_FC_FULL) { + pause->rx_pause = 1; + pause->tx_pause = 1; + } +} + +/** + * ice_set_pauseparam - Set Flow Control parameter + * @netdev: network interface device structure + * @pause: return tx/rx flow control status + */ +static int +ice_set_pauseparam(struct net_device *netdev, struct ethtool_pauseparam *pause) +{ + struct ice_netdev_priv *np = netdev_priv(netdev); + struct ice_link_status *hw_link_info; + struct ice_pf *pf = np->vsi->back; + struct ice_vsi *vsi = np->vsi; + struct ice_hw *hw = &pf->hw; + struct ice_port_info *pi; + enum ice_status status; + u8 aq_failures; + bool link_up; + int err = 0; + + pi = vsi->port_info; + hw_link_info = &pi->phy.link_info; + link_up = hw_link_info->link_info & ICE_AQ_LINK_UP; + + /* Changing the port's flow control is not supported if this isn't the + * PF VSI + */ + if (vsi->type != ICE_VSI_PF) { + netdev_info(netdev, "Changing flow control parameters only supported for PF VSI\n"); + return -EOPNOTSUPP; + } + + if (pause->autoneg != (hw_link_info->an_info & ICE_AQ_AN_COMPLETED)) { + netdev_info(netdev, "To change autoneg please use: ethtool -s autoneg \n"); + return -EOPNOTSUPP; + } + + /* If we have link and don't have autoneg */ + if (!test_bit(__ICE_DOWN, pf->state) && + !(hw_link_info->an_info & ICE_AQ_AN_COMPLETED)) { + /* Send message that it might not necessarily work*/ + netdev_info(netdev, "Autoneg did not complete so changing settings may not result in an actual change.\n"); + } + + if (pause->rx_pause && pause->tx_pause) + pi->fc.req_mode = ICE_FC_FULL; + else if (pause->rx_pause && !pause->tx_pause) + pi->fc.req_mode = ICE_FC_RX_PAUSE; + else if (!pause->rx_pause && pause->tx_pause) + pi->fc.req_mode = ICE_FC_TX_PAUSE; + else if (!pause->rx_pause && !pause->tx_pause) + pi->fc.req_mode = ICE_FC_NONE; + else + return -EINVAL; + + /* Tell the OS link is going down, the link will go back up when fw + * says it is ready asynchronously + */ + ice_print_link_msg(vsi, false); + netif_carrier_off(netdev); + netif_tx_stop_all_queues(netdev); + + /* Set the FC mode and only restart AN if link is up */ + status = ice_set_fc(pi, &aq_failures, link_up); + + if (aq_failures & ICE_SET_FC_AQ_FAIL_GET) { + netdev_info(netdev, "Set fc failed on the get_phy_capabilities call with err %d aq_err %d\n", + status, hw->adminq.sq_last_status); + err = -EAGAIN; + } else if (aq_failures & ICE_SET_FC_AQ_FAIL_SET) { + netdev_info(netdev, "Set fc failed on the set_phy_config call with err %d aq_err %d\n", + status, hw->adminq.sq_last_status); + err = -EAGAIN; + } else if (aq_failures & ICE_SET_FC_AQ_FAIL_UPDATE) { + netdev_info(netdev, "Set fc failed on the get_link_info call with err %d aq_err %d\n", + status, hw->adminq.sq_last_status); + err = -EAGAIN; + } + + if (!test_bit(__ICE_DOWN, pf->state)) { + /* Give it a little more time to try to come back */ + msleep(75); + if (!test_bit(__ICE_DOWN, pf->state)) + return ice_nway_reset(netdev); + } + + return err; +} + +/** + * ice_get_rxfh_key_size - get the RSS hash key size + * @netdev: network interface device structure + * + * Returns the table size. + */ +static u32 ice_get_rxfh_key_size(struct net_device __always_unused *netdev) +{ + return ICE_VSIQF_HKEY_ARRAY_SIZE; +} + +/** + * ice_get_rxfh_indir_size - get the rx flow hash indirection table size + * @netdev: network interface device structure + * + * Returns the table size. + */ +static u32 ice_get_rxfh_indir_size(struct net_device *netdev) +{ + struct ice_netdev_priv *np = netdev_priv(netdev); + + return np->vsi->rss_table_size; +} + +/** + * ice_get_rxfh - get the rx flow hash indirection table + * @netdev: network interface device structure + * @indir: indirection table + * @key: hash key + * @hfunc: hash function + * + * Reads the indirection table directly from the hardware. + */ +static int +ice_get_rxfh(struct net_device *netdev, u32 *indir, u8 *key, u8 *hfunc) +{ + struct ice_netdev_priv *np = netdev_priv(netdev); + struct ice_vsi *vsi = np->vsi; + struct ice_pf *pf = vsi->back; + int ret = 0, i; + u8 *lut; + + if (hfunc) + *hfunc = ETH_RSS_HASH_TOP; + + if (!indir) + return 0; + + if (!test_bit(ICE_FLAG_RSS_ENA, pf->flags)) { + /* RSS not supported return error here */ + netdev_warn(netdev, "RSS is not configured on this VSI!\n"); + return -EIO; + } + + lut = devm_kzalloc(&pf->pdev->dev, vsi->rss_table_size, GFP_KERNEL); + if (!lut) + return -ENOMEM; + + if (ice_get_rss(vsi, key, lut, vsi->rss_table_size)) { + ret = -EIO; + goto out; + } + + for (i = 0; i < vsi->rss_table_size; i++) + indir[i] = (u32)(lut[i]); + +out: + devm_kfree(&pf->pdev->dev, lut); + return ret; +} + +/** + * ice_set_rxfh - set the rx flow hash indirection table + * @netdev: network interface device structure + * @indir: indirection table + * @key: hash key + * @hfunc: hash function + * + * Returns -EINVAL if the table specifies an invalid queue id, otherwise + * returns 0 after programming the table. + */ +static int ice_set_rxfh(struct net_device *netdev, const u32 *indir, + const u8 *key, const u8 hfunc) +{ + struct ice_netdev_priv *np = netdev_priv(netdev); + struct ice_vsi *vsi = np->vsi; + struct ice_pf *pf = vsi->back; + u8 *seed = NULL; + + if (hfunc != ETH_RSS_HASH_NO_CHANGE && hfunc != ETH_RSS_HASH_TOP) + return -EOPNOTSUPP; + + if (!test_bit(ICE_FLAG_RSS_ENA, pf->flags)) { + /* RSS not supported return error here */ + netdev_warn(netdev, "RSS is not configured on this VSI!\n"); + return -EIO; + } + + if (key) { + if (!vsi->rss_hkey_user) { + vsi->rss_hkey_user = + devm_kzalloc(&pf->pdev->dev, + ICE_VSIQF_HKEY_ARRAY_SIZE, + GFP_KERNEL); + if (!vsi->rss_hkey_user) + return -ENOMEM; + } + memcpy(vsi->rss_hkey_user, key, ICE_VSIQF_HKEY_ARRAY_SIZE); + seed = vsi->rss_hkey_user; + } + + if (!vsi->rss_lut_user) { + vsi->rss_lut_user = devm_kzalloc(&pf->pdev->dev, + vsi->rss_table_size, + GFP_KERNEL); + if (!vsi->rss_lut_user) + return -ENOMEM; + } + + /* Each 32 bits pointed by 'indir' is stored with a lut entry */ + if (indir) { + int i; + + for (i = 0; i < vsi->rss_table_size; i++) + vsi->rss_lut_user[i] = (u8)(indir[i]); + } else { + ice_fill_rss_lut(vsi->rss_lut_user, vsi->rss_table_size, + vsi->rss_size); + } + + if (ice_set_rss(vsi, seed, vsi->rss_lut_user, vsi->rss_table_size)) + return -EIO; + + return 0; +} + +static const struct ethtool_ops ice_ethtool_ops = { + .get_link_ksettings = ice_get_link_ksettings, + .get_drvinfo = ice_get_drvinfo, + .get_regs_len = ice_get_regs_len, + .get_regs = ice_get_regs, + .get_msglevel = ice_get_msglevel, + .set_msglevel = ice_set_msglevel, + .get_link = ethtool_op_get_link, + .get_strings = ice_get_strings, + .get_ethtool_stats = ice_get_ethtool_stats, + .get_sset_count = ice_get_sset_count, + .get_rxnfc = ice_get_rxnfc, + .get_ringparam = ice_get_ringparam, + .set_ringparam = ice_set_ringparam, + .nway_reset = ice_nway_reset, + .get_pauseparam = ice_get_pauseparam, + .set_pauseparam = ice_set_pauseparam, + .get_rxfh_key_size = ice_get_rxfh_key_size, + .get_rxfh_indir_size = ice_get_rxfh_indir_size, + .get_rxfh = ice_get_rxfh, + .set_rxfh = ice_set_rxfh, +}; + +/** + * ice_set_ethtool_ops - setup netdev ethtool ops + * @netdev: network interface device structure + * + * setup netdev ethtool ops with ice specific ops + */ +void ice_set_ethtool_ops(struct net_device *netdev) +{ + netdev->ethtool_ops = &ice_ethtool_ops; +} diff --git a/drivers/net/ethernet/intel/ice/ice_hw_autogen.h b/drivers/net/ethernet/intel/ice/ice_hw_autogen.h index fc9b0b179e99..777a1c653edb 100644 --- a/drivers/net/ethernet/intel/ice/ice_hw_autogen.h +++ b/drivers/net/ethernet/intel/ice/ice_hw_autogen.h @@ -94,6 +94,8 @@ #define PFGEN_CTRL 0x00091000 #define PFGEN_CTRL_PFSWR_S 0 #define PFGEN_CTRL_PFSWR_M BIT(PFGEN_CTRL_PFSWR_S) +#define PFGEN_STATE 0x00088000 +#define PRTGEN_STATUS 0x000B8100 #define PFHMC_ERRORDATA 0x00520500 #define PFHMC_ERRORINFO 0x00520400 #define GLINT_DYN_CTL(_INT) (0x00160000 + ((_INT) * 4)) @@ -165,6 +167,7 @@ #define QRX_CTRL_QENA_REQ_M BIT(QRX_CTRL_QENA_REQ_S) #define QRX_CTRL_QENA_STAT_S 2 #define QRX_CTRL_QENA_STAT_M BIT(QRX_CTRL_QENA_STAT_S) +#define QRX_ITR(_QRX) (0x00292000 + ((_QRX) * 4)) #define QRX_TAIL(_QRX) (0x00290000 + ((_QRX) * 4)) #define GLNVM_FLA 0x000B6108 #define GLNVM_FLA_LOCKED_S 6 @@ -180,5 +183,82 @@ #define PF_FUNC_RID 0x0009E880 #define PF_FUNC_RID_FUNC_NUM_S 0 #define PF_FUNC_RID_FUNC_NUM_M ICE_M(0x7, PF_FUNC_RID_FUNC_NUM_S) +#define GLPRT_BPRCH(_i) (0x00381384 + ((_i) * 8)) +#define GLPRT_BPRCL(_i) (0x00381380 + ((_i) * 8)) +#define GLPRT_BPTCH(_i) (0x00381244 + ((_i) * 8)) +#define GLPRT_BPTCL(_i) (0x00381240 + ((_i) * 8)) +#define GLPRT_CRCERRS(_i) (0x00380100 + ((_i) * 8)) +#define GLPRT_GORCH(_i) (0x00380004 + ((_i) * 8)) +#define GLPRT_GORCL(_i) (0x00380000 + ((_i) * 8)) +#define GLPRT_GOTCH(_i) (0x00380B44 + ((_i) * 8)) +#define GLPRT_GOTCL(_i) (0x00380B40 + ((_i) * 8)) +#define GLPRT_ILLERRC(_i) (0x003801C0 + ((_i) * 8)) +#define GLPRT_LXOFFRXC(_i) (0x003802C0 + ((_i) * 8)) +#define GLPRT_LXOFFTXC(_i) (0x00381180 + ((_i) * 8)) +#define GLPRT_LXONRXC(_i) (0x00380280 + ((_i) * 8)) +#define GLPRT_LXONTXC(_i) (0x00381140 + ((_i) * 8)) +#define GLPRT_MLFC(_i) (0x00380040 + ((_i) * 8)) +#define GLPRT_MPRCH(_i) (0x00381344 + ((_i) * 8)) +#define GLPRT_MPRCL(_i) (0x00381340 + ((_i) * 8)) +#define GLPRT_MPTCH(_i) (0x00381204 + ((_i) * 8)) +#define GLPRT_MPTCL(_i) (0x00381200 + ((_i) * 8)) +#define GLPRT_MRFC(_i) (0x00380080 + ((_i) * 8)) +#define GLPRT_PRC1023H(_i) (0x00380A04 + ((_i) * 8)) +#define GLPRT_PRC1023L(_i) (0x00380A00 + ((_i) * 8)) +#define GLPRT_PRC127H(_i) (0x00380944 + ((_i) * 8)) +#define GLPRT_PRC127L(_i) (0x00380940 + ((_i) * 8)) +#define GLPRT_PRC1522H(_i) (0x00380A44 + ((_i) * 8)) +#define GLPRT_PRC1522L(_i) (0x00380A40 + ((_i) * 8)) +#define GLPRT_PRC255H(_i) (0x00380984 + ((_i) * 8)) +#define GLPRT_PRC255L(_i) (0x00380980 + ((_i) * 8)) +#define GLPRT_PRC511H(_i) (0x003809C4 + ((_i) * 8)) +#define GLPRT_PRC511L(_i) (0x003809C0 + ((_i) * 8)) +#define GLPRT_PRC64H(_i) (0x00380904 + ((_i) * 8)) +#define GLPRT_PRC64L(_i) (0x00380900 + ((_i) * 8)) +#define GLPRT_PRC9522H(_i) (0x00380A84 + ((_i) * 8)) +#define GLPRT_PRC9522L(_i) (0x00380A80 + ((_i) * 8)) +#define GLPRT_PTC1023H(_i) (0x00380C84 + ((_i) * 8)) +#define GLPRT_PTC1023L(_i) (0x00380C80 + ((_i) * 8)) +#define GLPRT_PTC127H(_i) (0x00380BC4 + ((_i) * 8)) +#define GLPRT_PTC127L(_i) (0x00380BC0 + ((_i) * 8)) +#define GLPRT_PTC1522H(_i) (0x00380CC4 + ((_i) * 8)) +#define GLPRT_PTC1522L(_i) (0x00380CC0 + ((_i) * 8)) +#define GLPRT_PTC255H(_i) (0x00380C04 + ((_i) * 8)) +#define GLPRT_PTC255L(_i) (0x00380C00 + ((_i) * 8)) +#define GLPRT_PTC511H(_i) (0x00380C44 + ((_i) * 8)) +#define GLPRT_PTC511L(_i) (0x00380C40 + ((_i) * 8)) +#define GLPRT_PTC64H(_i) (0x00380B84 + ((_i) * 8)) +#define GLPRT_PTC64L(_i) (0x00380B80 + ((_i) * 8)) +#define GLPRT_PTC9522H(_i) (0x00380D04 + ((_i) * 8)) +#define GLPRT_PTC9522L(_i) (0x00380D00 + ((_i) * 8)) +#define GLPRT_RFC(_i) (0x00380AC0 + ((_i) * 8)) +#define GLPRT_RJC(_i) (0x00380B00 + ((_i) * 8)) +#define GLPRT_RLEC(_i) (0x00380140 + ((_i) * 8)) +#define GLPRT_ROC(_i) (0x00380240 + ((_i) * 8)) +#define GLPRT_RUC(_i) (0x00380200 + ((_i) * 8)) +#define GLPRT_TDOLD(_i) (0x00381280 + ((_i) * 8)) +#define GLPRT_UPRCH(_i) (0x00381304 + ((_i) * 8)) +#define GLPRT_UPRCL(_i) (0x00381300 + ((_i) * 8)) +#define GLPRT_UPTCH(_i) (0x003811C4 + ((_i) * 8)) +#define GLPRT_UPTCL(_i) (0x003811C0 + ((_i) * 8)) +#define GLV_BPRCH(_i) (0x003B6004 + ((_i) * 8)) +#define GLV_BPRCL(_i) (0x003B6000 + ((_i) * 8)) +#define GLV_BPTCH(_i) (0x0030E004 + ((_i) * 8)) +#define GLV_BPTCL(_i) (0x0030E000 + ((_i) * 8)) +#define GLV_GORCH(_i) (0x003B0004 + ((_i) * 8)) +#define GLV_GORCL(_i) (0x003B0000 + ((_i) * 8)) +#define GLV_GOTCH(_i) (0x00300004 + ((_i) * 8)) +#define GLV_GOTCL(_i) (0x00300000 + ((_i) * 8)) +#define GLV_MPRCH(_i) (0x003B4004 + ((_i) * 8)) +#define GLV_MPRCL(_i) (0x003B4000 + ((_i) * 8)) +#define GLV_MPTCH(_i) (0x0030C004 + ((_i) * 8)) +#define GLV_MPTCL(_i) (0x0030C000 + ((_i) * 8)) +#define GLV_RDPC(_i) (0x00294C04 + ((_i) * 4)) +#define GLV_TEPC(_VSI) (0x00312000 + ((_VSI) * 4)) +#define GLV_UPRCH(_i) (0x003B2004 + ((_i) * 8)) +#define GLV_UPRCL(_i) (0x003B2000 + ((_i) * 8)) +#define GLV_UPTCH(_i) (0x0030A004 + ((_i) * 8)) +#define GLV_UPTCL(_i) (0x0030A000 + ((_i) * 8)) +#define VSIQF_HKEY_MAX_INDEX 12 #endif /* _ICE_HW_AUTOGEN_H_ */ diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index 2124e7ad640c..9c1841700314 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -9,7 +9,7 @@ #define DRV_VERSION "ice-0.0.1-k" #define DRV_SUMMARY "Intel(R) Ethernet Connection E800 Series Linux Driver" -static const char ice_drv_ver[] = DRV_VERSION; +const char ice_drv_ver[] = DRV_VERSION; static const char ice_driver_string[] = DRV_SUMMARY; static const char ice_copyright[] = "Copyright (c) 2018, Intel Corporation."; @@ -30,6 +30,8 @@ static struct workqueue_struct *ice_wq; static const struct net_device_ops ice_netdev_ops; static int ice_vsi_release(struct ice_vsi *vsi); +static void ice_update_vsi_stats(struct ice_vsi *vsi); +static void ice_update_pf_stats(struct ice_pf *pf); /** * ice_get_free_slot - get the next non-NULL location index in array @@ -214,12 +216,41 @@ static void ice_free_fltr_list(struct device *dev, struct list_head *h) } } +/** + * ice_watchdog_subtask - periodic tasks not using event driven scheduling + * @pf: board private structure + */ +static void ice_watchdog_subtask(struct ice_pf *pf) +{ + int i; + + /* if interface is down do nothing */ + if (test_bit(__ICE_DOWN, pf->state) || + test_bit(__ICE_CFG_BUSY, pf->state)) + return; + + /* make sure we don't do these things too often */ + if (time_before(jiffies, + pf->serv_tmr_prev + pf->serv_tmr_period)) + return; + + pf->serv_tmr_prev = jiffies; + + /* Update the stats for active netdevs so the network stack + * can look at updated numbers whenever it cares to + */ + ice_update_pf_stats(pf); + for (i = 0; i < pf->num_alloc_vsi; i++) + if (pf->vsi[i] && pf->vsi[i]->netdev) + ice_update_vsi_stats(pf->vsi[i]); +} + /** * ice_print_link_msg - print link up or down message * @vsi: the VSI whose link status is being queried * @isup: boolean for if the link is now up or down */ -static void ice_print_link_msg(struct ice_vsi *vsi, bool isup) +void ice_print_link_msg(struct ice_vsi *vsi, bool isup) { const char *speed; const char *fc; @@ -452,6 +483,7 @@ static void ice_service_task(struct work_struct *work) unsigned long start_time = jiffies; /* subtasks */ + ice_watchdog_subtask(pf); ice_clean_adminq_subtask(pf); /* Clear __ICE_SERVICE_SCHED flag to allow scheduling next event */ @@ -1763,6 +1795,8 @@ static int ice_cfg_netdev(struct ice_vsi *vsi) /* setup watchdog timeout value to be 5 second */ netdev->watchdog_timeo = 5 * HZ; + ice_set_ethtool_ops(netdev); + netdev->min_mtu = ETH_MIN_MTU; netdev->max_mtu = ICE_MAX_MTU; @@ -3459,6 +3493,434 @@ static int ice_up_complete(struct ice_vsi *vsi) return err; } +/** + * ice_up - Bring the connection back up after being down + * @vsi: VSI being configured + */ +int ice_up(struct ice_vsi *vsi) +{ + int err; + + err = ice_vsi_cfg(vsi); + if (!err) + err = ice_up_complete(vsi); + + return err; +} + +/** + * ice_fetch_u64_stats_per_ring - get packets and bytes stats per ring + * @ring: Tx or Rx ring to read stats from + * @pkts: packets stats counter + * @bytes: bytes stats counter + * + * This function fetches stats from the ring considering the atomic operations + * that needs to be performed to read u64 values in 32 bit machine. + */ +static void ice_fetch_u64_stats_per_ring(struct ice_ring *ring, u64 *pkts, + u64 *bytes) +{ + unsigned int start; + *pkts = 0; + *bytes = 0; + + if (!ring) + return; + do { + start = u64_stats_fetch_begin_irq(&ring->syncp); + *pkts = ring->stats.pkts; + *bytes = ring->stats.bytes; + } while (u64_stats_fetch_retry_irq(&ring->syncp, start)); +} + +/** + * ice_stat_update40 - read 40 bit stat from the chip and update stat values + * @hw: ptr to the hardware info + * @hireg: high 32 bit HW register to read from + * @loreg: low 32 bit HW register to read from + * @prev_stat_loaded: bool to specify if previous stats are loaded + * @prev_stat: ptr to previous loaded stat value + * @cur_stat: ptr to current stat value + */ +static void ice_stat_update40(struct ice_hw *hw, u32 hireg, u32 loreg, + bool prev_stat_loaded, u64 *prev_stat, + u64 *cur_stat) +{ + u64 new_data; + + new_data = rd32(hw, loreg); + new_data |= ((u64)(rd32(hw, hireg) & 0xFFFF)) << 32; + + /* device stats are not reset at PFR, they likely will not be zeroed + * when the driver starts. So save the first values read and use them as + * offsets to be subtracted from the raw values in order to report stats + * that count from zero. + */ + if (!prev_stat_loaded) + *prev_stat = new_data; + if (likely(new_data >= *prev_stat)) + *cur_stat = new_data - *prev_stat; + else + /* to manage the potential roll-over */ + *cur_stat = (new_data + BIT_ULL(40)) - *prev_stat; + *cur_stat &= 0xFFFFFFFFFFULL; +} + +/** + * ice_stat_update32 - read 32 bit stat from the chip and update stat values + * @hw: ptr to the hardware info + * @reg: HW register to read from + * @prev_stat_loaded: bool to specify if previous stats are loaded + * @prev_stat: ptr to previous loaded stat value + * @cur_stat: ptr to current stat value + */ +static void ice_stat_update32(struct ice_hw *hw, u32 reg, bool prev_stat_loaded, + u64 *prev_stat, u64 *cur_stat) +{ + u32 new_data; + + new_data = rd32(hw, reg); + + /* device stats are not reset at PFR, they likely will not be zeroed + * when the driver starts. So save the first values read and use them as + * offsets to be subtracted from the raw values in order to report stats + * that count from zero. + */ + if (!prev_stat_loaded) + *prev_stat = new_data; + if (likely(new_data >= *prev_stat)) + *cur_stat = new_data - *prev_stat; + else + /* to manage the potential roll-over */ + *cur_stat = (new_data + BIT_ULL(32)) - *prev_stat; +} + +/** + * ice_update_eth_stats - Update VSI-specific ethernet statistics counters + * @vsi: the VSI to be updated + */ +static void ice_update_eth_stats(struct ice_vsi *vsi) +{ + struct ice_eth_stats *prev_es, *cur_es; + struct ice_hw *hw = &vsi->back->hw; + u16 vsi_num = vsi->vsi_num; /* HW absolute index of a VSI */ + + prev_es = &vsi->eth_stats_prev; + cur_es = &vsi->eth_stats; + + ice_stat_update40(hw, GLV_GORCH(vsi_num), GLV_GORCL(vsi_num), + vsi->stat_offsets_loaded, &prev_es->rx_bytes, + &cur_es->rx_bytes); + + ice_stat_update40(hw, GLV_UPRCH(vsi_num), GLV_UPRCL(vsi_num), + vsi->stat_offsets_loaded, &prev_es->rx_unicast, + &cur_es->rx_unicast); + + ice_stat_update40(hw, GLV_MPRCH(vsi_num), GLV_MPRCL(vsi_num), + vsi->stat_offsets_loaded, &prev_es->rx_multicast, + &cur_es->rx_multicast); + + ice_stat_update40(hw, GLV_BPRCH(vsi_num), GLV_BPRCL(vsi_num), + vsi->stat_offsets_loaded, &prev_es->rx_broadcast, + &cur_es->rx_broadcast); + + ice_stat_update32(hw, GLV_RDPC(vsi_num), vsi->stat_offsets_loaded, + &prev_es->rx_discards, &cur_es->rx_discards); + + ice_stat_update40(hw, GLV_GOTCH(vsi_num), GLV_GOTCL(vsi_num), + vsi->stat_offsets_loaded, &prev_es->tx_bytes, + &cur_es->tx_bytes); + + ice_stat_update40(hw, GLV_UPTCH(vsi_num), GLV_UPTCL(vsi_num), + vsi->stat_offsets_loaded, &prev_es->tx_unicast, + &cur_es->tx_unicast); + + ice_stat_update40(hw, GLV_MPTCH(vsi_num), GLV_MPTCL(vsi_num), + vsi->stat_offsets_loaded, &prev_es->tx_multicast, + &cur_es->tx_multicast); + + ice_stat_update40(hw, GLV_BPTCH(vsi_num), GLV_BPTCL(vsi_num), + vsi->stat_offsets_loaded, &prev_es->tx_broadcast, + &cur_es->tx_broadcast); + + ice_stat_update32(hw, GLV_TEPC(vsi_num), vsi->stat_offsets_loaded, + &prev_es->tx_errors, &cur_es->tx_errors); + + vsi->stat_offsets_loaded = true; +} + +/** + * ice_update_vsi_ring_stats - Update VSI stats counters + * @vsi: the VSI to be updated + */ +static void ice_update_vsi_ring_stats(struct ice_vsi *vsi) +{ + struct rtnl_link_stats64 *vsi_stats = &vsi->net_stats; + struct ice_ring *ring; + u64 pkts, bytes; + int i; + + /* reset netdev stats */ + vsi_stats->tx_packets = 0; + vsi_stats->tx_bytes = 0; + vsi_stats->rx_packets = 0; + vsi_stats->rx_bytes = 0; + + /* reset non-netdev (extended) stats */ + vsi->tx_restart = 0; + vsi->tx_busy = 0; + vsi->tx_linearize = 0; + vsi->rx_buf_failed = 0; + vsi->rx_page_failed = 0; + + rcu_read_lock(); + + /* update Tx rings counters */ + ice_for_each_txq(vsi, i) { + ring = READ_ONCE(vsi->tx_rings[i]); + ice_fetch_u64_stats_per_ring(ring, &pkts, &bytes); + vsi_stats->tx_packets += pkts; + vsi_stats->tx_bytes += bytes; + vsi->tx_restart += ring->tx_stats.restart_q; + vsi->tx_busy += ring->tx_stats.tx_busy; + vsi->tx_linearize += ring->tx_stats.tx_linearize; + } + + /* update Rx rings counters */ + ice_for_each_rxq(vsi, i) { + ring = READ_ONCE(vsi->rx_rings[i]); + ice_fetch_u64_stats_per_ring(ring, &pkts, &bytes); + vsi_stats->rx_packets += pkts; + vsi_stats->rx_bytes += bytes; + vsi->rx_buf_failed += ring->rx_stats.alloc_buf_failed; + vsi->rx_page_failed += ring->rx_stats.alloc_page_failed; + } + + rcu_read_unlock(); +} + +/** + * ice_update_vsi_stats - Update VSI stats counters + * @vsi: the VSI to be updated + */ +static void ice_update_vsi_stats(struct ice_vsi *vsi) +{ + struct rtnl_link_stats64 *cur_ns = &vsi->net_stats; + struct ice_eth_stats *cur_es = &vsi->eth_stats; + struct ice_pf *pf = vsi->back; + + if (test_bit(__ICE_DOWN, vsi->state) || + test_bit(__ICE_CFG_BUSY, pf->state)) + return; + + /* get stats as recorded by Tx/Rx rings */ + ice_update_vsi_ring_stats(vsi); + + /* get VSI stats as recorded by the hardware */ + ice_update_eth_stats(vsi); + + cur_ns->tx_errors = cur_es->tx_errors; + cur_ns->rx_dropped = cur_es->rx_discards; + cur_ns->tx_dropped = cur_es->tx_discards; + cur_ns->multicast = cur_es->rx_multicast; + + /* update some more netdev stats if this is main VSI */ + if (vsi->type == ICE_VSI_PF) { + cur_ns->rx_crc_errors = pf->stats.crc_errors; + cur_ns->rx_errors = pf->stats.crc_errors + + pf->stats.illegal_bytes; + cur_ns->rx_length_errors = pf->stats.rx_len_errors; + } +} + +/** + * ice_update_pf_stats - Update PF port stats counters + * @pf: PF whose stats needs to be updated + */ +static void ice_update_pf_stats(struct ice_pf *pf) +{ + struct ice_hw_port_stats *prev_ps, *cur_ps; + struct ice_hw *hw = &pf->hw; + u8 pf_id; + + prev_ps = &pf->stats_prev; + cur_ps = &pf->stats; + pf_id = hw->pf_id; + + ice_stat_update40(hw, GLPRT_GORCH(pf_id), GLPRT_GORCL(pf_id), + pf->stat_prev_loaded, &prev_ps->eth.rx_bytes, + &cur_ps->eth.rx_bytes); + + ice_stat_update40(hw, GLPRT_UPRCH(pf_id), GLPRT_UPRCL(pf_id), + pf->stat_prev_loaded, &prev_ps->eth.rx_unicast, + &cur_ps->eth.rx_unicast); + + ice_stat_update40(hw, GLPRT_MPRCH(pf_id), GLPRT_MPRCL(pf_id), + pf->stat_prev_loaded, &prev_ps->eth.rx_multicast, + &cur_ps->eth.rx_multicast); + + ice_stat_update40(hw, GLPRT_BPRCH(pf_id), GLPRT_BPRCL(pf_id), + pf->stat_prev_loaded, &prev_ps->eth.rx_broadcast, + &cur_ps->eth.rx_broadcast); + + ice_stat_update40(hw, GLPRT_GOTCH(pf_id), GLPRT_GOTCL(pf_id), + pf->stat_prev_loaded, &prev_ps->eth.tx_bytes, + &cur_ps->eth.tx_bytes); + + ice_stat_update40(hw, GLPRT_UPTCH(pf_id), GLPRT_UPTCL(pf_id), + pf->stat_prev_loaded, &prev_ps->eth.tx_unicast, + &cur_ps->eth.tx_unicast); + + ice_stat_update40(hw, GLPRT_MPTCH(pf_id), GLPRT_MPTCL(pf_id), + pf->stat_prev_loaded, &prev_ps->eth.tx_multicast, + &cur_ps->eth.tx_multicast); + + ice_stat_update40(hw, GLPRT_BPTCH(pf_id), GLPRT_BPTCL(pf_id), + pf->stat_prev_loaded, &prev_ps->eth.tx_broadcast, + &cur_ps->eth.tx_broadcast); + + ice_stat_update32(hw, GLPRT_TDOLD(pf_id), pf->stat_prev_loaded, + &prev_ps->tx_dropped_link_down, + &cur_ps->tx_dropped_link_down); + + ice_stat_update40(hw, GLPRT_PRC64H(pf_id), GLPRT_PRC64L(pf_id), + pf->stat_prev_loaded, &prev_ps->rx_size_64, + &cur_ps->rx_size_64); + + ice_stat_update40(hw, GLPRT_PRC127H(pf_id), GLPRT_PRC127L(pf_id), + pf->stat_prev_loaded, &prev_ps->rx_size_127, + &cur_ps->rx_size_127); + + ice_stat_update40(hw, GLPRT_PRC255H(pf_id), GLPRT_PRC255L(pf_id), + pf->stat_prev_loaded, &prev_ps->rx_size_255, + &cur_ps->rx_size_255); + + ice_stat_update40(hw, GLPRT_PRC511H(pf_id), GLPRT_PRC511L(pf_id), + pf->stat_prev_loaded, &prev_ps->rx_size_511, + &cur_ps->rx_size_511); + + ice_stat_update40(hw, GLPRT_PRC1023H(pf_id), + GLPRT_PRC1023L(pf_id), pf->stat_prev_loaded, + &prev_ps->rx_size_1023, &cur_ps->rx_size_1023); + + ice_stat_update40(hw, GLPRT_PRC1522H(pf_id), + GLPRT_PRC1522L(pf_id), pf->stat_prev_loaded, + &prev_ps->rx_size_1522, &cur_ps->rx_size_1522); + + ice_stat_update40(hw, GLPRT_PRC9522H(pf_id), + GLPRT_PRC9522L(pf_id), pf->stat_prev_loaded, + &prev_ps->rx_size_big, &cur_ps->rx_size_big); + + ice_stat_update40(hw, GLPRT_PTC64H(pf_id), GLPRT_PTC64L(pf_id), + pf->stat_prev_loaded, &prev_ps->tx_size_64, + &cur_ps->tx_size_64); + + ice_stat_update40(hw, GLPRT_PTC127H(pf_id), GLPRT_PTC127L(pf_id), + pf->stat_prev_loaded, &prev_ps->tx_size_127, + &cur_ps->tx_size_127); + + ice_stat_update40(hw, GLPRT_PTC255H(pf_id), GLPRT_PTC255L(pf_id), + pf->stat_prev_loaded, &prev_ps->tx_size_255, + &cur_ps->tx_size_255); + + ice_stat_update40(hw, GLPRT_PTC511H(pf_id), GLPRT_PTC511L(pf_id), + pf->stat_prev_loaded, &prev_ps->tx_size_511, + &cur_ps->tx_size_511); + + ice_stat_update40(hw, GLPRT_PTC1023H(pf_id), + GLPRT_PTC1023L(pf_id), pf->stat_prev_loaded, + &prev_ps->tx_size_1023, &cur_ps->tx_size_1023); + + ice_stat_update40(hw, GLPRT_PTC1522H(pf_id), + GLPRT_PTC1522L(pf_id), pf->stat_prev_loaded, + &prev_ps->tx_size_1522, &cur_ps->tx_size_1522); + + ice_stat_update40(hw, GLPRT_PTC9522H(pf_id), + GLPRT_PTC9522L(pf_id), pf->stat_prev_loaded, + &prev_ps->tx_size_big, &cur_ps->tx_size_big); + + ice_stat_update32(hw, GLPRT_LXONRXC(pf_id), pf->stat_prev_loaded, + &prev_ps->link_xon_rx, &cur_ps->link_xon_rx); + + ice_stat_update32(hw, GLPRT_LXOFFRXC(pf_id), pf->stat_prev_loaded, + &prev_ps->link_xoff_rx, &cur_ps->link_xoff_rx); + + ice_stat_update32(hw, GLPRT_LXONTXC(pf_id), pf->stat_prev_loaded, + &prev_ps->link_xon_tx, &cur_ps->link_xon_tx); + + ice_stat_update32(hw, GLPRT_LXOFFTXC(pf_id), pf->stat_prev_loaded, + &prev_ps->link_xoff_tx, &cur_ps->link_xoff_tx); + + ice_stat_update32(hw, GLPRT_CRCERRS(pf_id), pf->stat_prev_loaded, + &prev_ps->crc_errors, &cur_ps->crc_errors); + + ice_stat_update32(hw, GLPRT_ILLERRC(pf_id), pf->stat_prev_loaded, + &prev_ps->illegal_bytes, &cur_ps->illegal_bytes); + + ice_stat_update32(hw, GLPRT_MLFC(pf_id), pf->stat_prev_loaded, + &prev_ps->mac_local_faults, + &cur_ps->mac_local_faults); + + ice_stat_update32(hw, GLPRT_MRFC(pf_id), pf->stat_prev_loaded, + &prev_ps->mac_remote_faults, + &cur_ps->mac_remote_faults); + + ice_stat_update32(hw, GLPRT_RLEC(pf_id), pf->stat_prev_loaded, + &prev_ps->rx_len_errors, &cur_ps->rx_len_errors); + + ice_stat_update32(hw, GLPRT_RUC(pf_id), pf->stat_prev_loaded, + &prev_ps->rx_undersize, &cur_ps->rx_undersize); + + ice_stat_update32(hw, GLPRT_RFC(pf_id), pf->stat_prev_loaded, + &prev_ps->rx_fragments, &cur_ps->rx_fragments); + + ice_stat_update32(hw, GLPRT_ROC(pf_id), pf->stat_prev_loaded, + &prev_ps->rx_oversize, &cur_ps->rx_oversize); + + ice_stat_update32(hw, GLPRT_RJC(pf_id), pf->stat_prev_loaded, + &prev_ps->rx_jabber, &cur_ps->rx_jabber); + + pf->stat_prev_loaded = true; +} + +/** + * ice_get_stats64 - get statistics for network device structure + * @netdev: network interface device structure + * @stats: main device statistics structure + */ +static +void ice_get_stats64(struct net_device *netdev, struct rtnl_link_stats64 *stats) +{ + struct ice_netdev_priv *np = netdev_priv(netdev); + struct rtnl_link_stats64 *vsi_stats; + struct ice_vsi *vsi = np->vsi; + + vsi_stats = &vsi->net_stats; + + if (test_bit(__ICE_DOWN, vsi->state) || !vsi->num_txq || !vsi->num_rxq) + return; + /* netdev packet/byte stats come from ring counter. These are obtained + * by summing up ring counters (done by ice_update_vsi_ring_stats). + */ + ice_update_vsi_ring_stats(vsi); + stats->tx_packets = vsi_stats->tx_packets; + stats->tx_bytes = vsi_stats->tx_bytes; + stats->rx_packets = vsi_stats->rx_packets; + stats->rx_bytes = vsi_stats->rx_bytes; + + /* The rest of the stats can be read from the hardware but instead we + * just return values that the watchdog task has already obtained from + * the hardware. + */ + stats->multicast = vsi_stats->multicast; + stats->tx_errors = vsi_stats->tx_errors; + stats->tx_dropped = vsi_stats->tx_dropped; + stats->rx_errors = vsi_stats->rx_errors; + stats->rx_dropped = vsi_stats->rx_dropped; + stats->rx_crc_errors = vsi_stats->rx_crc_errors; + stats->rx_length_errors = vsi_stats->rx_length_errors; +} + /** * ice_napi_disable_all - Disable NAPI for all q_vectors in the VSI * @vsi: VSI having NAPI disabled @@ -3478,7 +3940,7 @@ static void ice_napi_disable_all(struct ice_vsi *vsi) * ice_down - Shutdown the connection * @vsi: The VSI being stopped */ -static int ice_down(struct ice_vsi *vsi) +int ice_down(struct ice_vsi *vsi) { int i, err; @@ -3878,6 +4340,7 @@ static const struct net_device_ops ice_netdev_ops = { .ndo_open = ice_open, .ndo_stop = ice_stop, .ndo_start_xmit = ice_start_xmit, + .ndo_get_stats64 = ice_get_stats64, .ndo_vlan_rx_add_vid = ice_vlan_rx_add_vid, .ndo_vlan_rx_kill_vid = ice_vlan_rx_kill_vid, .ndo_set_features = ice_set_features, diff --git a/drivers/net/ethernet/intel/ice/ice_type.h b/drivers/net/ethernet/intel/ice/ice_type.h index 991ac56ca7b3..c7e0de3b8662 100644 --- a/drivers/net/ethernet/intel/ice/ice_type.h +++ b/drivers/net/ethernet/intel/ice/ice_type.h @@ -20,6 +20,7 @@ #define ICE_DBG_RES BIT_ULL(17) #define ICE_DBG_AQ_MSG BIT_ULL(24) #define ICE_DBG_AQ_CMD BIT_ULL(27) +#define ICE_DBG_USER BIT_ULL(31) enum ice_aq_res_ids { ICE_NVM_RES_ID = 1, @@ -42,6 +43,13 @@ enum ice_fc_mode { ICE_FC_DFLT }; +enum ice_set_fc_aq_failures { + ICE_SET_FC_AQ_FAIL_NONE = 0, + ICE_SET_FC_AQ_FAIL_GET, + ICE_SET_FC_AQ_FAIL_SET, + ICE_SET_FC_AQ_FAIL_UPDATE +}; + /* Various MAC types */ enum ice_mac_type { ICE_MAC_UNKNOWN = 0, @@ -301,10 +309,72 @@ struct ice_hw { }; +/* Statistics collected by each port, VSI, VEB, and S-channel */ +struct ice_eth_stats { + u64 rx_bytes; /* gorc */ + u64 rx_unicast; /* uprc */ + u64 rx_multicast; /* mprc */ + u64 rx_broadcast; /* bprc */ + u64 rx_discards; /* rdpc */ + u64 rx_unknown_protocol; /* rupp */ + u64 tx_bytes; /* gotc */ + u64 tx_unicast; /* uptc */ + u64 tx_multicast; /* mptc */ + u64 tx_broadcast; /* bptc */ + u64 tx_discards; /* tdpc */ + u64 tx_errors; /* tepc */ +}; + +/* Statistics collected by the MAC */ +struct ice_hw_port_stats { + /* eth stats collected by the port */ + struct ice_eth_stats eth; + /* additional port specific stats */ + u64 tx_dropped_link_down; /* tdold */ + u64 crc_errors; /* crcerrs */ + u64 illegal_bytes; /* illerrc */ + u64 error_bytes; /* errbc */ + u64 mac_local_faults; /* mlfc */ + u64 mac_remote_faults; /* mrfc */ + u64 rx_len_errors; /* rlec */ + u64 link_xon_rx; /* lxonrxc */ + u64 link_xoff_rx; /* lxoffrxc */ + u64 link_xon_tx; /* lxontxc */ + u64 link_xoff_tx; /* lxofftxc */ + u64 rx_size_64; /* prc64 */ + u64 rx_size_127; /* prc127 */ + u64 rx_size_255; /* prc255 */ + u64 rx_size_511; /* prc511 */ + u64 rx_size_1023; /* prc1023 */ + u64 rx_size_1522; /* prc1522 */ + u64 rx_size_big; /* prc9522 */ + u64 rx_undersize; /* ruc */ + u64 rx_fragments; /* rfc */ + u64 rx_oversize; /* roc */ + u64 rx_jabber; /* rjc */ + u64 tx_size_64; /* ptc64 */ + u64 tx_size_127; /* ptc127 */ + u64 tx_size_255; /* ptc255 */ + u64 tx_size_511; /* ptc511 */ + u64 tx_size_1023; /* ptc1023 */ + u64 tx_size_1522; /* ptc1522 */ + u64 tx_size_big; /* ptc9522 */ +}; + /* Checksum and Shadow RAM pointers */ #define ICE_SR_NVM_DEV_STARTER_VER 0x18 #define ICE_SR_NVM_EETRACK_LO 0x2D #define ICE_SR_NVM_EETRACK_HI 0x2E +#define ICE_NVM_VER_LO_SHIFT 0 +#define ICE_NVM_VER_LO_MASK (0xff << ICE_NVM_VER_LO_SHIFT) +#define ICE_NVM_VER_HI_SHIFT 12 +#define ICE_NVM_VER_HI_MASK (0xf << ICE_NVM_VER_HI_SHIFT) +#define ICE_OEM_VER_PATCH_SHIFT 0 +#define ICE_OEM_VER_PATCH_MASK (0xff << ICE_OEM_VER_PATCH_SHIFT) +#define ICE_OEM_VER_BUILD_SHIFT 8 +#define ICE_OEM_VER_BUILD_MASK (0xffff << ICE_OEM_VER_BUILD_SHIFT) +#define ICE_OEM_VER_SHIFT 24 +#define ICE_OEM_VER_MASK (0xff << ICE_OEM_VER_SHIFT) #define ICE_SR_SECTOR_SIZE_IN_WORDS 0x800 #define ICE_SR_WORDS_IN_1KB 512 From 5513b920a4f7869acffc8e6d800aebee608409e0 Mon Sep 17 00:00:00 2001 From: Anirudh Venkataramanan Date: Tue, 20 Mar 2018 07:58:17 -0700 Subject: [PATCH 13/15] ice: Update Tx scheduler tree for VSI multi-Tx queue support This patch adds the ability for a VSI to use multiple Tx queues. More specifically, the patch 1) Provides the ability to update the Tx scheduler tree in the firmware. The driver can configure the Tx scheduler tree by adding/removing multiple Tx queues per TC per VSI. 2) Allows a VSI to reconfigure its Tx queues during runtime. 3) Synchronizes the Tx scheduler update operations using locks. Signed-off-by: Anirudh Venkataramanan Tested-by: Tony Brelinski Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/ice/ice.h | 7 + .../net/ethernet/intel/ice/ice_adminq_cmd.h | 28 + drivers/net/ethernet/intel/ice/ice_common.c | 54 ++ drivers/net/ethernet/intel/ice/ice_common.h | 3 + drivers/net/ethernet/intel/ice/ice_main.c | 20 +- drivers/net/ethernet/intel/ice/ice_sched.c | 886 ++++++++++++++++++ drivers/net/ethernet/intel/ice/ice_sched.h | 4 + drivers/net/ethernet/intel/ice/ice_type.h | 7 + 8 files changed, 1006 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h index 66d0976afa46..8898255e5a65 100644 --- a/drivers/net/ethernet/intel/ice/ice.h +++ b/drivers/net/ethernet/intel/ice/ice.h @@ -42,6 +42,7 @@ extern const char ice_drv_ver[]; #define ICE_MIN_NUM_DESC 8 #define ICE_MAX_NUM_DESC 8160 #define ICE_REQ_DESC_MULTIPLE 32 +#define ICE_DFLT_TRAFFIC_CLASS BIT(0) #define ICE_INT_NAME_STR_LEN (IFNAMSIZ + 16) #define ICE_ETHTOOL_FWVER_LEN 32 #define ICE_AQ_LEN 64 @@ -261,6 +262,12 @@ static inline void ice_irq_dynamic_ena(struct ice_hw *hw, struct ice_vsi *vsi, wr32(hw, GLINT_DYN_CTL(vector), val); } +static inline void ice_vsi_set_tc_cfg(struct ice_vsi *vsi) +{ + vsi->tc_cfg.ena_tc = ICE_DFLT_TRAFFIC_CLASS; + vsi->tc_cfg.numtc = 1; +} + void ice_set_ethtool_ops(struct net_device *netdev); int ice_up(struct ice_vsi *vsi); int ice_down(struct ice_vsi *vsi); diff --git a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h index ae7376f2135b..348b9cc3c596 100644 --- a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h +++ b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h @@ -631,6 +631,25 @@ struct ice_aqc_get_topo { __le32 addr_low; }; +/* Update TSE (indirect 0x0403) + * Get TSE (indirect 0x0404) + */ +struct ice_aqc_get_cfg_elem { + __le16 num_elem_req; /* Used by commands */ + __le16 num_elem_resp; /* Used by responses */ + __le32 reserved; + __le32 addr_high; + __le32 addr_low; +}; + +/* This is the buffer for: + * Suspend Nodes (indirect 0x0409) + * Resume Nodes (indirect 0x040A) + */ +struct ice_aqc_suspend_resume_elem { + __le32 teid[1]; +}; + /* Add TSE (indirect 0x0401) * Delete TSE (indirect 0x040F) * Move TSE (indirect 0x0408) @@ -691,6 +710,11 @@ struct ice_aqc_txsched_topo_grp_info_hdr { __le16 reserved2; }; +struct ice_aqc_add_elem { + struct ice_aqc_txsched_topo_grp_info_hdr hdr; + struct ice_aqc_txsched_elem_data generic[1]; +}; + struct ice_aqc_get_topo_elem { struct ice_aqc_txsched_topo_grp_info_hdr hdr; struct ice_aqc_txsched_elem_data @@ -1181,6 +1205,7 @@ struct ice_aq_desc { struct ice_aqc_get_sw_cfg get_sw_conf; struct ice_aqc_sw_rules sw_rules; struct ice_aqc_get_topo get_topo; + struct ice_aqc_get_cfg_elem get_update_elem; struct ice_aqc_query_txsched_res query_sched_res; struct ice_aqc_add_move_delete_elem add_move_delete_elem; struct ice_aqc_nvm nvm; @@ -1258,6 +1283,9 @@ enum ice_adminq_opc { /* transmit scheduler commands */ ice_aqc_opc_get_dflt_topo = 0x0400, + ice_aqc_opc_add_sched_elems = 0x0401, + ice_aqc_opc_suspend_sched_elems = 0x0409, + ice_aqc_opc_resume_sched_elems = 0x040A, ice_aqc_opc_delete_sched_elems = 0x040F, ice_aqc_opc_query_sched_res = 0x0412, diff --git a/drivers/net/ethernet/intel/ice/ice_common.c b/drivers/net/ethernet/intel/ice/ice_common.c index 02d589275db0..0ab9bf66c84e 100644 --- a/drivers/net/ethernet/intel/ice/ice_common.c +++ b/drivers/net/ethernet/intel/ice/ice_common.c @@ -2089,3 +2089,57 @@ ice_dis_vsi_txq(struct ice_port_info *pi, u8 num_queues, u16 *q_ids, mutex_unlock(&pi->sched_lock); return status; } + +/** + * ice_cfg_vsi_qs - configure the new/exisiting VSI queues + * @pi: port information structure + * @vsi_id: VSI Id + * @tc_bitmap: TC bitmap + * @maxqs: max queues array per TC + * @owner: lan or rdma + * + * This function adds/updates the VSI queues per TC. + */ +static enum ice_status +ice_cfg_vsi_qs(struct ice_port_info *pi, u16 vsi_id, u8 tc_bitmap, + u16 *maxqs, u8 owner) +{ + enum ice_status status = 0; + u8 i; + + if (!pi || pi->port_state != ICE_SCHED_PORT_STATE_READY) + return ICE_ERR_CFG; + + mutex_lock(&pi->sched_lock); + + for (i = 0; i < ICE_MAX_TRAFFIC_CLASS; i++) { + /* configuration is possible only if TC node is present */ + if (!ice_sched_get_tc_node(pi, i)) + continue; + + status = ice_sched_cfg_vsi(pi, vsi_id, i, maxqs[i], owner, + ice_is_tc_ena(tc_bitmap, i)); + if (status) + break; + } + + mutex_unlock(&pi->sched_lock); + return status; +} + +/** + * ice_cfg_vsi_lan - configure VSI lan queues + * @pi: port information structure + * @vsi_id: VSI Id + * @tc_bitmap: TC bitmap + * @max_lanqs: max lan queues array per TC + * + * This function adds/updates the VSI lan queues per TC. + */ +enum ice_status +ice_cfg_vsi_lan(struct ice_port_info *pi, u16 vsi_id, u8 tc_bitmap, + u16 *max_lanqs) +{ + return ice_cfg_vsi_qs(pi, vsi_id, tc_bitmap, max_lanqs, + ICE_SCHED_NODE_OWNER_LAN); +} diff --git a/drivers/net/ethernet/intel/ice/ice_common.h b/drivers/net/ethernet/intel/ice/ice_common.h index c4213fbf9579..971277ebcc86 100644 --- a/drivers/net/ethernet/intel/ice/ice_common.h +++ b/drivers/net/ethernet/intel/ice/ice_common.h @@ -69,6 +69,9 @@ enum ice_status ice_dis_vsi_txq(struct ice_port_info *pi, u8 num_queues, u16 *q_ids, u32 *q_teids, struct ice_sq_cd *cmd_details); enum ice_status +ice_cfg_vsi_lan(struct ice_port_info *pi, u16 vsi_id, u8 tc_bitmap, + u16 *max_lanqs); +enum ice_status ice_ena_vsi_txq(struct ice_port_info *pi, u16 vsi_id, u8 tc, u8 num_qgrps, struct ice_aqc_add_tx_qgrp *buf, u16 buf_size, struct ice_sq_cd *cd); diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index 9c1841700314..0f8637b97a34 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -2085,10 +2085,11 @@ static struct ice_vsi * ice_vsi_setup(struct ice_pf *pf, enum ice_vsi_type type, struct ice_port_info *pi) { + u16 max_txqs[ICE_MAX_TRAFFIC_CLASS] = { 0 }; struct device *dev = &pf->pdev->dev; struct ice_vsi_ctx ctxt = { 0 }; struct ice_vsi *vsi; - int ret; + int ret, i; vsi = ice_vsi_alloc(pf, type); if (!vsi) { @@ -2156,6 +2157,20 @@ ice_vsi_setup(struct ice_pf *pf, enum ice_vsi_type type, */ goto err_rings; } + + ice_vsi_set_tc_cfg(vsi); + + /* configure VSI nodes based on number of queues and TC's */ + for (i = 0; i < vsi->tc_cfg.numtc; i++) + max_txqs[i] = vsi->num_txq; + + ret = ice_cfg_vsi_lan(vsi->port_info, vsi->vsi_num, + vsi->tc_cfg.ena_tc, max_txqs); + if (ret) { + dev_info(&pf->pdev->dev, "Failed VSI lan queue config\n"); + goto err_rings; + } + return vsi; err_rings: @@ -2398,8 +2413,7 @@ static void ice_determine_q_usage(struct ice_pf *pf) q_left_tx = pf->hw.func_caps.common_cap.num_txq; q_left_rx = pf->hw.func_caps.common_cap.num_rxq; - /* initial support for only 1 tx queue */ - pf->num_lan_tx = 1; + pf->num_lan_tx = min_t(int, q_left_tx, num_online_cpus()); /* only 1 rx queue unless RSS is enabled */ if (!test_bit(ICE_FLAG_RSS_ENA, pf->flags)) diff --git a/drivers/net/ethernet/intel/ice/ice_sched.c b/drivers/net/ethernet/intel/ice/ice_sched.c index e50eebbf78e3..f16ff3e4a840 100644 --- a/drivers/net/ethernet/intel/ice/ice_sched.c +++ b/drivers/net/ethernet/intel/ice/ice_sched.c @@ -359,6 +359,110 @@ ice_aq_get_dflt_topo(struct ice_hw *hw, u8 lport, return status; } +/** + * ice_aq_add_sched_elems - adds scheduling element + * @hw: pointer to the hw struct + * @grps_req: the number of groups that are requested to be added + * @buf: pointer to buffer + * @buf_size: buffer size in bytes + * @grps_added: returns total number of groups added + * @cd: pointer to command details structure or NULL + * + * Add scheduling elements (0x0401) + */ +static enum ice_status +ice_aq_add_sched_elems(struct ice_hw *hw, u16 grps_req, + struct ice_aqc_add_elem *buf, u16 buf_size, + u16 *grps_added, struct ice_sq_cd *cd) +{ + struct ice_aqc_add_move_delete_elem *cmd; + struct ice_aq_desc desc; + enum ice_status status; + + cmd = &desc.params.add_move_delete_elem; + ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_add_sched_elems); + desc.flags |= cpu_to_le16(ICE_AQ_FLAG_RD); + + cmd->num_grps_req = cpu_to_le16(grps_req); + status = ice_aq_send_cmd(hw, &desc, buf, buf_size, cd); + if (!status && grps_added) + *grps_added = le16_to_cpu(cmd->num_grps_updated); + + return status; +} + +/** + * ice_suspend_resume_elems - suspend/resume scheduler elements + * @hw: pointer to the hw struct + * @elems_req: number of elements to suspend + * @buf: pointer to buffer + * @buf_size: buffer size in bytes + * @elems_ret: returns total number of elements suspended + * @cd: pointer to command details structure or NULL + * @cmd_code: command code for suspend or resume + * + * suspend/resume scheduler elements + */ +static enum ice_status +ice_suspend_resume_elems(struct ice_hw *hw, u16 elems_req, + struct ice_aqc_suspend_resume_elem *buf, u16 buf_size, + u16 *elems_ret, struct ice_sq_cd *cd, + enum ice_adminq_opc cmd_code) +{ + struct ice_aqc_get_cfg_elem *cmd; + struct ice_aq_desc desc; + enum ice_status status; + + cmd = &desc.params.get_update_elem; + ice_fill_dflt_direct_cmd_desc(&desc, cmd_code); + cmd->num_elem_req = cpu_to_le16(elems_req); + desc.flags |= cpu_to_le16(ICE_AQ_FLAG_RD); + status = ice_aq_send_cmd(hw, &desc, buf, buf_size, cd); + if (!status && elems_ret) + *elems_ret = le16_to_cpu(cmd->num_elem_resp); + return status; +} + +/** + * ice_aq_suspend_sched_elems - suspend scheduler elements + * @hw: pointer to the hw struct + * @elems_req: number of elements to suspend + * @buf: pointer to buffer + * @buf_size: buffer size in bytes + * @elems_ret: returns total number of elements suspended + * @cd: pointer to command details structure or NULL + * + * Suspend scheduling elements (0x0409) + */ +static enum ice_status +ice_aq_suspend_sched_elems(struct ice_hw *hw, u16 elems_req, + struct ice_aqc_suspend_resume_elem *buf, + u16 buf_size, u16 *elems_ret, struct ice_sq_cd *cd) +{ + return ice_suspend_resume_elems(hw, elems_req, buf, buf_size, elems_ret, + cd, ice_aqc_opc_suspend_sched_elems); +} + +/** + * ice_aq_resume_sched_elems - resume scheduler elements + * @hw: pointer to the hw struct + * @elems_req: number of elements to resume + * @buf: pointer to buffer + * @buf_size: buffer size in bytes + * @elems_ret: returns total number of elements resumed + * @cd: pointer to command details structure or NULL + * + * resume scheduling elements (0x040A) + */ +static enum ice_status +ice_aq_resume_sched_elems(struct ice_hw *hw, u16 elems_req, + struct ice_aqc_suspend_resume_elem *buf, + u16 buf_size, u16 *elems_ret, struct ice_sq_cd *cd) +{ + return ice_suspend_resume_elems(hw, elems_req, buf, buf_size, elems_ret, + cd, ice_aqc_opc_resume_sched_elems); +} + /** * ice_aq_query_sched_res - query scheduler resource * @hw: pointer to the hw struct @@ -379,6 +483,46 @@ ice_aq_query_sched_res(struct ice_hw *hw, u16 buf_size, return ice_aq_send_cmd(hw, &desc, buf, buf_size, cd); } +/** + * ice_sched_suspend_resume_elems - suspend or resume hw nodes + * @hw: pointer to the hw struct + * @num_nodes: number of nodes + * @node_teids: array of node teids to be suspended or resumed + * @suspend: true means suspend / false means resume + * + * This function suspends or resumes hw nodes + */ +static enum ice_status +ice_sched_suspend_resume_elems(struct ice_hw *hw, u8 num_nodes, u32 *node_teids, + bool suspend) +{ + struct ice_aqc_suspend_resume_elem *buf; + u16 i, buf_size, num_elem_ret = 0; + enum ice_status status; + + buf_size = sizeof(*buf) * num_nodes; + buf = devm_kzalloc(ice_hw_to_dev(hw), buf_size, GFP_KERNEL); + if (!buf) + return ICE_ERR_NO_MEMORY; + + for (i = 0; i < num_nodes; i++) + buf->teid[i] = cpu_to_le32(node_teids[i]); + + if (suspend) + status = ice_aq_suspend_sched_elems(hw, num_nodes, buf, + buf_size, &num_elem_ret, + NULL); + else + status = ice_aq_resume_sched_elems(hw, num_nodes, buf, + buf_size, &num_elem_ret, + NULL); + if (status || num_elem_ret != num_nodes) + ice_debug(hw, ICE_DBG_SCHED, "suspend/resume failed\n"); + + devm_kfree(ice_hw_to_dev(hw), buf); + return status; +} + /** * ice_sched_clear_tx_topo - clears the schduler tree nodes * @pi: port information structure @@ -462,6 +606,215 @@ void ice_sched_cleanup_all(struct ice_hw *hw) hw->max_cgds = 0; } +/** + * ice_sched_create_vsi_info_entry - create an empty new VSI entry + * @pi: port information structure + * @vsi_id: VSI Id + * + * This function creates a new VSI entry and adds it to list + */ +static struct ice_sched_vsi_info * +ice_sched_create_vsi_info_entry(struct ice_port_info *pi, u16 vsi_id) +{ + struct ice_sched_vsi_info *vsi_elem; + + if (!pi) + return NULL; + + vsi_elem = devm_kzalloc(ice_hw_to_dev(pi->hw), sizeof(*vsi_elem), + GFP_KERNEL); + if (!vsi_elem) + return NULL; + + list_add(&vsi_elem->list_entry, &pi->vsi_info_list); + vsi_elem->vsi_id = vsi_id; + return vsi_elem; +} + +/** + * ice_sched_add_elems - add nodes to hw and SW DB + * @pi: port information structure + * @tc_node: pointer to the branch node + * @parent: pointer to the parent node + * @layer: layer number to add nodes + * @num_nodes: number of nodes + * @num_nodes_added: pointer to num nodes added + * @first_node_teid: if new nodes are added then return the teid of first node + * + * This function add nodes to hw as well as to SW DB for a given layer + */ +static enum ice_status +ice_sched_add_elems(struct ice_port_info *pi, struct ice_sched_node *tc_node, + struct ice_sched_node *parent, u8 layer, u16 num_nodes, + u16 *num_nodes_added, u32 *first_node_teid) +{ + struct ice_sched_node *prev, *new_node; + struct ice_aqc_add_elem *buf; + u16 i, num_groups_added = 0; + enum ice_status status = 0; + struct ice_hw *hw = pi->hw; + u16 buf_size; + u32 teid; + + buf_size = sizeof(*buf) + sizeof(*buf->generic) * (num_nodes - 1); + buf = devm_kzalloc(ice_hw_to_dev(hw), buf_size, GFP_KERNEL); + if (!buf) + return ICE_ERR_NO_MEMORY; + + buf->hdr.parent_teid = parent->info.node_teid; + buf->hdr.num_elems = cpu_to_le16(num_nodes); + for (i = 0; i < num_nodes; i++) { + buf->generic[i].parent_teid = parent->info.node_teid; + buf->generic[i].data.elem_type = ICE_AQC_ELEM_TYPE_SE_GENERIC; + buf->generic[i].data.valid_sections = + ICE_AQC_ELEM_VALID_GENERIC | ICE_AQC_ELEM_VALID_CIR | + ICE_AQC_ELEM_VALID_EIR; + buf->generic[i].data.generic = 0; + buf->generic[i].data.cir_bw.bw_profile_idx = + ICE_SCHED_DFLT_RL_PROF_ID; + buf->generic[i].data.eir_bw.bw_profile_idx = + ICE_SCHED_DFLT_RL_PROF_ID; + } + + status = ice_aq_add_sched_elems(hw, 1, buf, buf_size, + &num_groups_added, NULL); + if (status || num_groups_added != 1) { + ice_debug(hw, ICE_DBG_SCHED, "add elements failed\n"); + devm_kfree(ice_hw_to_dev(hw), buf); + return ICE_ERR_CFG; + } + + *num_nodes_added = num_nodes; + /* add nodes to the SW DB */ + for (i = 0; i < num_nodes; i++) { + status = ice_sched_add_node(pi, layer, &buf->generic[i]); + if (status) { + ice_debug(hw, ICE_DBG_SCHED, + "add nodes in SW DB failed status =%d\n", + status); + break; + } + + teid = le32_to_cpu(buf->generic[i].node_teid); + new_node = ice_sched_find_node_by_teid(parent, teid); + + if (!new_node) { + ice_debug(hw, ICE_DBG_SCHED, + "Node is missing for teid =%d\n", teid); + break; + } + + new_node->sibling = NULL; + new_node->tc_num = tc_node->tc_num; + + /* add it to previous node sibling pointer */ + /* Note: siblings are not linked across branches */ + prev = ice_sched_get_first_node(hw, tc_node, layer); + + if (prev && prev != new_node) { + while (prev->sibling) + prev = prev->sibling; + prev->sibling = new_node; + } + + if (i == 0) + *first_node_teid = teid; + } + + devm_kfree(ice_hw_to_dev(hw), buf); + return status; +} + +/** + * ice_sched_add_nodes_to_layer - Add nodes to a given layer + * @pi: port information structure + * @tc_node: pointer to TC node + * @parent: pointer to parent node + * @layer: layer number to add nodes + * @num_nodes: number of nodes to be added + * @first_node_teid: pointer to the first node teid + * @num_nodes_added: pointer to number of nodes added + * + * This function add nodes to a given layer. + */ +static enum ice_status +ice_sched_add_nodes_to_layer(struct ice_port_info *pi, + struct ice_sched_node *tc_node, + struct ice_sched_node *parent, u8 layer, + u16 num_nodes, u32 *first_node_teid, + u16 *num_nodes_added) +{ + u32 *first_teid_ptr = first_node_teid; + u16 new_num_nodes, max_child_nodes; + enum ice_status status = 0; + struct ice_hw *hw = pi->hw; + u16 num_added = 0; + u32 temp; + + if (!num_nodes) + return status; + + if (!parent || layer < hw->sw_entry_point_layer) + return ICE_ERR_PARAM; + + *num_nodes_added = 0; + + /* max children per node per layer */ + max_child_nodes = + le16_to_cpu(hw->layer_info[parent->tx_sched_layer].max_children); + + /* current number of children + required nodes exceed max children ? */ + if ((parent->num_children + num_nodes) > max_child_nodes) { + /* Fail if the parent is a TC node */ + if (parent == tc_node) + return ICE_ERR_CFG; + + /* utilize all the spaces if the parent is not full */ + if (parent->num_children < max_child_nodes) { + new_num_nodes = max_child_nodes - parent->num_children; + /* this recursion is intentional, and wouldn't + * go more than 2 calls + */ + status = ice_sched_add_nodes_to_layer(pi, tc_node, + parent, layer, + new_num_nodes, + first_node_teid, + &num_added); + if (status) + return status; + + *num_nodes_added += num_added; + } + /* Don't modify the first node teid memory if the first node was + * added already in the above call. Instead send some temp + * memory for all other recursive calls. + */ + if (num_added) + first_teid_ptr = &temp; + + new_num_nodes = num_nodes - num_added; + + /* This parent is full, try the next sibling */ + parent = parent->sibling; + + /* this recursion is intentional, for 1024 queues + * per VSI, it goes max of 16 iterations. + * 1024 / 8 = 128 layer 8 nodes + * 128 /8 = 16 (add 8 nodes per iteration) + */ + status = ice_sched_add_nodes_to_layer(pi, tc_node, parent, + layer, new_num_nodes, + first_teid_ptr, + &num_added); + *num_nodes_added += num_added; + return status; + } + + status = ice_sched_add_elems(pi, tc_node, parent, layer, num_nodes, + num_nodes_added, first_node_teid); + return status; +} + /** * ice_sched_get_qgrp_layer - get the current queue group layer number * @hw: pointer to the hw struct @@ -474,6 +827,101 @@ static u8 ice_sched_get_qgrp_layer(struct ice_hw *hw) return hw->num_tx_sched_layers - ICE_QGRP_LAYER_OFFSET; } +/** + * ice_sched_get_vsi_layer - get the current VSI layer number + * @hw: pointer to the hw struct + * + * This function returns the current VSI layer number + */ +static u8 ice_sched_get_vsi_layer(struct ice_hw *hw) +{ + /* Num Layers VSI layer + * 9 6 + * 7 4 + * 5 or less sw_entry_point_layer + */ + /* calculate the vsi layer based on number of layers. */ + if (hw->num_tx_sched_layers > ICE_VSI_LAYER_OFFSET + 1) { + u8 layer = hw->num_tx_sched_layers - ICE_VSI_LAYER_OFFSET; + + if (layer > hw->sw_entry_point_layer) + return layer; + } + return hw->sw_entry_point_layer; +} + +/** + * ice_sched_get_num_nodes_per_layer - Get the total number of nodes per layer + * @pi: pointer to the port info struct + * @layer: layer number + * + * This function calculates the number of nodes present in the scheduler tree + * including all the branches for a given layer + */ +static u16 +ice_sched_get_num_nodes_per_layer(struct ice_port_info *pi, u8 layer) +{ + struct ice_hw *hw; + u16 num_nodes = 0; + u8 i; + + if (!pi) + return num_nodes; + + hw = pi->hw; + + /* Calculate the number of nodes for all TCs */ + for (i = 0; i < pi->root->num_children; i++) { + struct ice_sched_node *tc_node, *node; + + tc_node = pi->root->children[i]; + + /* Get the first node */ + node = ice_sched_get_first_node(hw, tc_node, layer); + if (!node) + continue; + + /* count the siblings */ + while (node) { + num_nodes++; + node = node->sibling; + } + } + + return num_nodes; +} + +/** + * ice_sched_val_max_nodes - check max number of nodes reached or not + * @pi: port information structure + * @new_num_nodes_per_layer: pointer to the new number of nodes array + * + * This function checks whether the scheduler tree layers have enough space to + * add new nodes + */ +static enum ice_status +ice_sched_validate_for_max_nodes(struct ice_port_info *pi, + u16 *new_num_nodes_per_layer) +{ + struct ice_hw *hw = pi->hw; + u8 i, qg_layer; + u16 num_nodes; + + qg_layer = ice_sched_get_qgrp_layer(hw); + + /* walk through all the layers from SW entry point to qgroup layer */ + for (i = hw->sw_entry_point_layer; i <= qg_layer; i++) { + num_nodes = ice_sched_get_num_nodes_per_layer(pi, i); + if (num_nodes + new_num_nodes_per_layer[i] > + le16_to_cpu(hw->layer_info[i].max_pf_nodes)) { + ice_debug(hw, ICE_DBG_SCHED, + "max nodes reached for layer = %d\n", i); + return ICE_ERR_CFG; + } + } + return 0; +} + /** * ice_rm_dflt_leaf_node - remove the default leaf node in the tree * @pi: port information structure @@ -516,6 +964,7 @@ ice_sched_rm_dflt_nodes(struct ice_port_info *pi) struct ice_sched_node *node; ice_rm_dflt_leaf_node(pi); + /* remove the default nodes except TC and root nodes */ node = pi->root; while (node) { @@ -525,6 +974,7 @@ ice_sched_rm_dflt_nodes(struct ice_port_info *pi) ice_free_sched_node(pi, node); break; } + if (!node->num_children) break; node = node->children[0]; @@ -720,8 +1170,10 @@ ice_sched_find_node_in_subtree(struct ice_hw *hw, struct ice_sched_node *base, if (node == child) return true; + if (child->tx_sched_layer > node->tx_sched_layer) return false; + /* this recursion is intentional, and wouldn't * go more than 8 calls */ @@ -751,13 +1203,17 @@ ice_sched_get_free_qparent(struct ice_port_info *pi, u16 vsi_id, u8 tc, qgrp_layer = ice_sched_get_qgrp_layer(pi->hw); max_children = le16_to_cpu(pi->hw->layer_info[qgrp_layer].max_children); + list_elem = ice_sched_get_vsi_info_entry(pi, vsi_id); if (!list_elem) goto lan_q_exit; + vsi_node = list_elem->vsi_node[tc]; + /* validate invalid VSI id */ if (!vsi_node) goto lan_q_exit; + /* get the first q group node from VSI sub-tree */ qgrp_node = ice_sched_get_first_node(pi->hw, vsi_node, qgrp_layer); while (qgrp_node) { @@ -768,6 +1224,436 @@ ice_sched_get_free_qparent(struct ice_port_info *pi, u16 vsi_id, u8 tc, break; qgrp_node = qgrp_node->sibling; } + lan_q_exit: return qgrp_node; } + +/** + * ice_sched_get_vsi_node - Get a VSI node based on VSI id + * @hw: pointer to the hw struct + * @tc_node: pointer to the TC node + * @vsi_id: VSI id + * + * This function retrieves a VSI node for a given VSI id from a given + * TC branch + */ +static struct ice_sched_node * +ice_sched_get_vsi_node(struct ice_hw *hw, struct ice_sched_node *tc_node, + u16 vsi_id) +{ + struct ice_sched_node *node; + u8 vsi_layer; + + vsi_layer = ice_sched_get_vsi_layer(hw); + node = ice_sched_get_first_node(hw, tc_node, vsi_layer); + + /* Check whether it already exists */ + while (node) { + if (node->vsi_id == vsi_id) + return node; + node = node->sibling; + } + + return node; +} + +/** + * ice_sched_calc_vsi_child_nodes - calculate number of VSI child nodes + * @hw: pointer to the hw struct + * @num_qs: number of queues + * @num_nodes: num nodes array + * + * This function calculates the number of VSI child nodes based on the + * number of queues. + */ +static void +ice_sched_calc_vsi_child_nodes(struct ice_hw *hw, u16 num_qs, u16 *num_nodes) +{ + u16 num = num_qs; + u8 i, qgl, vsil; + + qgl = ice_sched_get_qgrp_layer(hw); + vsil = ice_sched_get_vsi_layer(hw); + + /* calculate num nodes from q group to VSI layer */ + for (i = qgl; i > vsil; i--) { + u16 max_children = le16_to_cpu(hw->layer_info[i].max_children); + + /* round to the next integer if there is a remainder */ + num = DIV_ROUND_UP(num, max_children); + + /* need at least one node */ + num_nodes[i] = num ? num : 1; + } +} + +/** + * ice_sched_add_vsi_child_nodes - add VSI child nodes to tree + * @pi: port information structure + * @vsi_id: VSI id + * @tc_node: pointer to the TC node + * @num_nodes: pointer to the num nodes that needs to be added per layer + * @owner: node owner (lan or rdma) + * + * This function adds the VSI child nodes to tree. It gets called for + * lan and rdma separately. + */ +static enum ice_status +ice_sched_add_vsi_child_nodes(struct ice_port_info *pi, u16 vsi_id, + struct ice_sched_node *tc_node, u16 *num_nodes, + u8 owner) +{ + struct ice_sched_node *parent, *node; + struct ice_hw *hw = pi->hw; + enum ice_status status; + u32 first_node_teid; + u16 num_added = 0; + u8 i, qgl, vsil; + + status = ice_sched_validate_for_max_nodes(pi, num_nodes); + if (status) + return status; + + qgl = ice_sched_get_qgrp_layer(hw); + vsil = ice_sched_get_vsi_layer(hw); + parent = ice_sched_get_vsi_node(hw, tc_node, vsi_id); + for (i = vsil + 1; i <= qgl; i++) { + if (!parent) + return ICE_ERR_CFG; + status = ice_sched_add_nodes_to_layer(pi, tc_node, parent, i, + num_nodes[i], + &first_node_teid, + &num_added); + if (status || num_nodes[i] != num_added) + return ICE_ERR_CFG; + + /* The newly added node can be a new parent for the next + * layer nodes + */ + if (num_added) { + parent = ice_sched_find_node_by_teid(tc_node, + first_node_teid); + node = parent; + while (node) { + node->owner = owner; + node = node->sibling; + } + } else { + parent = parent->children[0]; + } + } + + return 0; +} + +/** + * ice_sched_rm_vsi_child_nodes - remove VSI child nodes from the tree + * @pi: port information structure + * @vsi_node: pointer to the VSI node + * @num_nodes: pointer to the num nodes that needs to be removed per layer + * @owner: node owner (lan or rdma) + * + * This function removes the VSI child nodes from the tree. It gets called for + * lan and rdma separately. + */ +static void +ice_sched_rm_vsi_child_nodes(struct ice_port_info *pi, + struct ice_sched_node *vsi_node, u16 *num_nodes, + u8 owner) +{ + struct ice_sched_node *node, *next; + u8 i, qgl, vsil; + u16 num; + + qgl = ice_sched_get_qgrp_layer(pi->hw); + vsil = ice_sched_get_vsi_layer(pi->hw); + + for (i = qgl; i > vsil; i--) { + num = num_nodes[i]; + node = ice_sched_get_first_node(pi->hw, vsi_node, i); + while (node && num) { + next = node->sibling; + if (node->owner == owner && !node->num_children) { + ice_free_sched_node(pi, node); + num--; + } + node = next; + } + } +} + +/** + * ice_sched_calc_vsi_support_nodes - calculate number of VSI support nodes + * @hw: pointer to the hw struct + * @tc_node: pointer to TC node + * @num_nodes: pointer to num nodes array + * + * This function calculates the number of supported nodes needed to add this + * VSI into tx tree including the VSI, parent and intermediate nodes in below + * layers + */ +static void +ice_sched_calc_vsi_support_nodes(struct ice_hw *hw, + struct ice_sched_node *tc_node, u16 *num_nodes) +{ + struct ice_sched_node *node; + u16 max_child; + u8 i, vsil; + + vsil = ice_sched_get_vsi_layer(hw); + for (i = vsil; i >= hw->sw_entry_point_layer; i--) + /* Add intermediate nodes if TC has no children and + * need at least one node for VSI + */ + if (!tc_node->num_children || i == vsil) { + num_nodes[i]++; + } else { + /* If intermediate nodes are reached max children + * then add a new one. + */ + node = ice_sched_get_first_node(hw, tc_node, i); + max_child = le16_to_cpu(hw->layer_info[i].max_children); + + /* scan all the siblings */ + while (node) { + if (node->num_children < max_child) + break; + node = node->sibling; + } + + /* all the nodes are full, allocate a new one */ + if (!node) + num_nodes[i]++; + } +} + +/** + * ice_sched_add_vsi_support_nodes - add VSI supported nodes into tx tree + * @pi: port information structure + * @vsi_id: VSI Id + * @tc_node: pointer to TC node + * @num_nodes: pointer to num nodes array + * + * This function adds the VSI supported nodes into tx tree including the + * VSI, its parent and intermediate nodes in below layers + */ +static enum ice_status +ice_sched_add_vsi_support_nodes(struct ice_port_info *pi, u16 vsi_id, + struct ice_sched_node *tc_node, u16 *num_nodes) +{ + struct ice_sched_node *parent = tc_node; + enum ice_status status; + u32 first_node_teid; + u16 num_added = 0; + u8 i, vsil; + + if (!pi) + return ICE_ERR_PARAM; + + status = ice_sched_validate_for_max_nodes(pi, num_nodes); + if (status) + return status; + + vsil = ice_sched_get_vsi_layer(pi->hw); + for (i = pi->hw->sw_entry_point_layer; i <= vsil; i++) { + status = ice_sched_add_nodes_to_layer(pi, tc_node, parent, + i, num_nodes[i], + &first_node_teid, + &num_added); + if (status || num_nodes[i] != num_added) + return ICE_ERR_CFG; + + /* The newly added node can be a new parent for the next + * layer nodes + */ + if (num_added) + parent = ice_sched_find_node_by_teid(tc_node, + first_node_teid); + else + parent = parent->children[0]; + + if (!parent) + return ICE_ERR_CFG; + + if (i == vsil) + parent->vsi_id = vsi_id; + } + return 0; +} + +/** + * ice_sched_add_vsi_to_topo - add a new VSI into tree + * @pi: port information structure + * @vsi_id: VSI Id + * @tc: TC number + * + * This function adds a new VSI into scheduler tree + */ +static enum ice_status +ice_sched_add_vsi_to_topo(struct ice_port_info *pi, u16 vsi_id, u8 tc) +{ + u16 num_nodes[ICE_AQC_TOPO_MAX_LEVEL_NUM] = { 0 }; + struct ice_sched_node *tc_node; + struct ice_hw *hw = pi->hw; + + tc_node = ice_sched_get_tc_node(pi, tc); + if (!tc_node) + return ICE_ERR_PARAM; + + /* calculate number of supported nodes needed for this VSI */ + ice_sched_calc_vsi_support_nodes(hw, tc_node, num_nodes); + + /* add vsi supported nodes to tc subtree */ + return ice_sched_add_vsi_support_nodes(pi, vsi_id, tc_node, num_nodes); +} + +/** + * ice_sched_update_vsi_child_nodes - update VSI child nodes + * @pi: port information structure + * @vsi_id: VSI Id + * @tc: TC number + * @new_numqs: new number of max queues + * @owner: owner of this subtree + * + * This function updates the VSI child nodes based on the number of queues + */ +static enum ice_status +ice_sched_update_vsi_child_nodes(struct ice_port_info *pi, u16 vsi_id, u8 tc, + u16 new_numqs, u8 owner) +{ + u16 prev_num_nodes[ICE_AQC_TOPO_MAX_LEVEL_NUM] = { 0 }; + u16 new_num_nodes[ICE_AQC_TOPO_MAX_LEVEL_NUM] = { 0 }; + struct ice_sched_node *vsi_node; + struct ice_sched_node *tc_node; + struct ice_sched_vsi_info *vsi; + enum ice_status status = 0; + struct ice_hw *hw = pi->hw; + u16 prev_numqs; + u8 i; + + tc_node = ice_sched_get_tc_node(pi, tc); + if (!tc_node) + return ICE_ERR_CFG; + + vsi_node = ice_sched_get_vsi_node(hw, tc_node, vsi_id); + if (!vsi_node) + return ICE_ERR_CFG; + + vsi = ice_sched_get_vsi_info_entry(pi, vsi_id); + if (!vsi) + return ICE_ERR_CFG; + + if (owner == ICE_SCHED_NODE_OWNER_LAN) + prev_numqs = vsi->max_lanq[tc]; + else + return ICE_ERR_PARAM; + + /* num queues are not changed */ + if (prev_numqs == new_numqs) + return status; + + /* calculate number of nodes based on prev/new number of qs */ + if (prev_numqs) + ice_sched_calc_vsi_child_nodes(hw, prev_numqs, prev_num_nodes); + + if (new_numqs) + ice_sched_calc_vsi_child_nodes(hw, new_numqs, new_num_nodes); + + if (prev_numqs > new_numqs) { + for (i = 0; i < ICE_AQC_TOPO_MAX_LEVEL_NUM; i++) + new_num_nodes[i] = prev_num_nodes[i] - new_num_nodes[i]; + + ice_sched_rm_vsi_child_nodes(pi, vsi_node, new_num_nodes, + owner); + } else { + for (i = 0; i < ICE_AQC_TOPO_MAX_LEVEL_NUM; i++) + new_num_nodes[i] -= prev_num_nodes[i]; + + status = ice_sched_add_vsi_child_nodes(pi, vsi_id, tc_node, + new_num_nodes, owner); + if (status) + return status; + } + + if (owner == ICE_SCHED_NODE_OWNER_LAN) + vsi->max_lanq[tc] = new_numqs; + + return status; +} + +/** + * ice_sched_cfg_vsi - configure the new/exisiting VSI + * @pi: port information structure + * @vsi_id: VSI Id + * @tc: TC number + * @maxqs: max number of queues + * @owner: lan or rdma + * @enable: TC enabled or disabled + * + * This function adds/updates VSI nodes based on the number of queues. If TC is + * enabled and VSI is in suspended state then resume the VSI back. If TC is + * disabled then suspend the VSI if it is not already. + */ +enum ice_status +ice_sched_cfg_vsi(struct ice_port_info *pi, u16 vsi_id, u8 tc, u16 maxqs, + u8 owner, bool enable) +{ + struct ice_sched_node *vsi_node, *tc_node; + struct ice_sched_vsi_info *vsi; + enum ice_status status = 0; + struct ice_hw *hw = pi->hw; + + tc_node = ice_sched_get_tc_node(pi, tc); + if (!tc_node) + return ICE_ERR_PARAM; + + vsi = ice_sched_get_vsi_info_entry(pi, vsi_id); + if (!vsi) + vsi = ice_sched_create_vsi_info_entry(pi, vsi_id); + if (!vsi) + return ICE_ERR_NO_MEMORY; + + vsi_node = ice_sched_get_vsi_node(hw, tc_node, vsi_id); + + /* suspend the VSI if tc is not enabled */ + if (!enable) { + if (vsi_node && vsi_node->in_use) { + u32 teid = le32_to_cpu(vsi_node->info.node_teid); + + status = ice_sched_suspend_resume_elems(hw, 1, &teid, + true); + if (!status) + vsi_node->in_use = false; + } + return status; + } + + /* TC is enabled, if it is a new VSI then add it to the tree */ + if (!vsi_node) { + status = ice_sched_add_vsi_to_topo(pi, vsi_id, tc); + if (status) + return status; + vsi_node = ice_sched_get_vsi_node(hw, tc_node, vsi_id); + if (!vsi_node) + return ICE_ERR_CFG; + vsi->vsi_node[tc] = vsi_node; + vsi_node->in_use = true; + } + + /* update the VSI child nodes */ + status = ice_sched_update_vsi_child_nodes(pi, vsi_id, tc, maxqs, owner); + if (status) + return status; + + /* TC is enabled, resume the VSI if it is in the suspend state */ + if (!vsi_node->in_use) { + u32 teid = le32_to_cpu(vsi_node->info.node_teid); + + status = ice_sched_suspend_resume_elems(hw, 1, &teid, false); + if (!status) + vsi_node->in_use = true; + } + + return status; +} diff --git a/drivers/net/ethernet/intel/ice/ice_sched.h b/drivers/net/ethernet/intel/ice/ice_sched.h index 639859a3d0f0..badadcc120d3 100644 --- a/drivers/net/ethernet/intel/ice/ice_sched.h +++ b/drivers/net/ethernet/intel/ice/ice_sched.h @@ -7,6 +7,7 @@ #include "ice_common.h" #define ICE_QGRP_LAYER_OFFSET 2 +#define ICE_VSI_LAYER_OFFSET 4 struct ice_sched_agg_vsi_info { struct list_head list_entry; @@ -36,4 +37,7 @@ struct ice_sched_node *ice_sched_get_tc_node(struct ice_port_info *pi, u8 tc); struct ice_sched_node * ice_sched_get_free_qparent(struct ice_port_info *pi, u16 vsi_id, u8 tc, u8 owner); +enum ice_status +ice_sched_cfg_vsi(struct ice_port_info *pi, u16 vsi_id, u8 tc, u16 maxqs, + u8 owner, bool enable); #endif /* _ICE_SCHED_H_ */ diff --git a/drivers/net/ethernet/intel/ice/ice_type.h b/drivers/net/ethernet/intel/ice/ice_type.h index c7e0de3b8662..137a59a74b71 100644 --- a/drivers/net/ethernet/intel/ice/ice_type.h +++ b/drivers/net/ethernet/intel/ice/ice_type.h @@ -10,6 +10,11 @@ #include "ice_controlq.h" #include "ice_lan_tx_rx.h" +static inline bool ice_is_tc_ena(u8 bitmap, u8 tc) +{ + return test_bit(tc, (unsigned long *)&bitmap); +} + /* debug masks - set these bits in hw->debug_mask to control output */ #define ICE_DBG_INIT BIT_ULL(1) #define ICE_DBG_QCTX BIT_ULL(6) @@ -194,6 +199,8 @@ enum ice_agg_type { ICE_AGG_TYPE_QG }; +#define ICE_SCHED_DFLT_RL_PROF_ID 0 + /* vsi type list entry to locate corresponding vsi/ag nodes */ struct ice_sched_vsi_info { struct ice_sched_node *vsi_node[ICE_MAX_TRAFFIC_CLASS]; From 0b28b702e72a6ff90d417689159f72e8891fed78 Mon Sep 17 00:00:00 2001 From: Anirudh Venkataramanan Date: Tue, 20 Mar 2018 07:58:18 -0700 Subject: [PATCH 14/15] ice: Support link events, reset and rebuild Link events are posted to a PF's admin receive queue (ARQ). This patch adds the ability to detect and process link events. This patch also adds the ability to process resets. The driver can process the following resets: 1) EMP Reset (EMPR) 2) Global Reset (GLOBR) 3) Core Reset (CORER) 4) Physical Function Reset (PFR) EMPR is the largest level of reset that the driver can handle. An EMPR resets the manageability block and also the data path, including PHY and link for all the PFs. The affected PFs are notified of this event through a miscellaneous interrupt. GLOBR is a subset of EMPR. It does everything EMPR does except that it doesn't reset the manageability block. CORER is a subset of GLOBR. It does everything GLOBR does but doesn't reset PHY and link. PFR is a subset of CORER and affects only the given physical function. In other words, PFR can be thought of as a CORER for a single PF. Since only the issuing PF is affected, a PFR doesn't result in the miscellaneous interrupt being triggered. All the resets have the following in common: 1) Tx/Rx is halted and all queues are stopped. 2) All the VSIs and filters programmed for the PF are lost and have to be reprogrammed. 3) Control queue interfaces are reset and have to be reprogrammed. In the rebuild flow, control queues are reinitialized, VSIs are reallocated and filters are restored. Signed-off-by: Anirudh Venkataramanan Tested-by: Tony Brelinski Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/ice/ice.h | 19 + .../net/ethernet/intel/ice/ice_adminq_cmd.h | 19 + drivers/net/ethernet/intel/ice/ice_common.c | 60 ++ drivers/net/ethernet/intel/ice/ice_common.h | 5 + .../net/ethernet/intel/ice/ice_hw_autogen.h | 2 + drivers/net/ethernet/intel/ice/ice_main.c | 581 +++++++++++++++++- drivers/net/ethernet/intel/ice/ice_type.h | 1 + 7 files changed, 681 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h index 8898255e5a65..24ce4b71c9ce 100644 --- a/drivers/net/ethernet/intel/ice/ice.h +++ b/drivers/net/ethernet/intel/ice/ice.h @@ -78,6 +78,11 @@ extern const char ice_drv_ver[]; #define ICE_RX_DESC(R, i) (&(((union ice_32b_rx_flex_desc *)((R)->desc))[i])) #define ICE_TX_CTX_DESC(R, i) (&(((struct ice_tx_ctx_desc *)((R)->desc))[i])) +/* Macro for each VSI in a PF */ +#define ice_for_each_vsi(pf, i) \ + for ((i) = 0; (i) < (pf)->num_alloc_vsi; (i)++) + +/* Macros for each tx/rx ring in a VSI */ #define ice_for_each_txq(vsi, i) \ for ((i) = 0; (i) < (vsi)->num_txq; (i)++) @@ -109,7 +114,16 @@ struct ice_sw { enum ice_state { __ICE_DOWN, + __ICE_NEEDS_RESTART, + __ICE_RESET_RECOVERY_PENDING, /* set by driver when reset starts */ __ICE_PFR_REQ, /* set by driver and peers */ + __ICE_CORER_REQ, /* set by driver and peers */ + __ICE_GLOBR_REQ, /* set by driver and peers */ + __ICE_CORER_RECV, /* set by OICR handler */ + __ICE_GLOBR_RECV, /* set by OICR handler */ + __ICE_EMPR_RECV, /* set by OICR handler */ + __ICE_SUSPENDED, /* set on module remove path */ + __ICE_RESET_FAILED, /* set by reset/rebuild */ __ICE_ADMINQ_EVENT_PENDING, __ICE_CFG_BUSY, __ICE_SERVICE_SCHED, @@ -226,6 +240,11 @@ struct ice_pf { u16 q_left_rx; /* remaining num rx queues left unclaimed */ u16 next_vsi; /* Next free slot in pf->vsi[] - 0-based! */ u16 num_alloc_vsi; + u16 corer_count; /* Core reset count */ + u16 globr_count; /* Global reset count */ + u16 empr_count; /* EMP reset count */ + u16 pfr_count; /* PF reset count */ + struct ice_hw_port_stats stats; struct ice_hw_port_stats stats_prev; struct ice_hw hw; diff --git a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h index 348b9cc3c596..ea822a9edce8 100644 --- a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h +++ b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h @@ -1009,6 +1009,23 @@ struct ice_aqc_get_link_status_data { __le64 reserved4; }; +/* Set event mask command (direct 0x0613) */ +struct ice_aqc_set_event_mask { + u8 lport_num; + u8 reserved[7]; + __le16 event_mask; +#define ICE_AQ_LINK_EVENT_UPDOWN BIT(1) +#define ICE_AQ_LINK_EVENT_MEDIA_NA BIT(2) +#define ICE_AQ_LINK_EVENT_LINK_FAULT BIT(3) +#define ICE_AQ_LINK_EVENT_PHY_TEMP_ALARM BIT(4) +#define ICE_AQ_LINK_EVENT_EXCESSIVE_ERRORS BIT(5) +#define ICE_AQ_LINK_EVENT_SIGNAL_DETECT BIT(6) +#define ICE_AQ_LINK_EVENT_AN_COMPLETED BIT(7) +#define ICE_AQ_LINK_EVENT_MODULE_QUAL_FAIL BIT(8) +#define ICE_AQ_LINK_EVENT_PORT_TX_SUSPENDED BIT(9) + u8 reserved1[6]; +}; + /* NVM Read command (indirect 0x0701) * NVM Erase commands (direct 0x0702) * NVM Update commands (indirect 0x0703) @@ -1215,6 +1232,7 @@ struct ice_aq_desc { struct ice_aqc_dis_txqs dis_txqs; struct ice_aqc_add_get_update_free_vsi vsi_cmd; struct ice_aqc_alloc_free_res_cmd sw_res_ctrl; + struct ice_aqc_set_event_mask set_event_mask; struct ice_aqc_get_link_status get_link_status; } params; }; @@ -1294,6 +1312,7 @@ enum ice_adminq_opc { ice_aqc_opc_set_phy_cfg = 0x0601, ice_aqc_opc_restart_an = 0x0605, ice_aqc_opc_get_link_status = 0x0607, + ice_aqc_opc_set_event_mask = 0x0613, /* NVM commands */ ice_aqc_opc_nvm_read = 0x0701, diff --git a/drivers/net/ethernet/intel/ice/ice_common.c b/drivers/net/ethernet/intel/ice/ice_common.c index 0ab9bf66c84e..791f1eba5953 100644 --- a/drivers/net/ethernet/intel/ice/ice_common.c +++ b/drivers/net/ethernet/intel/ice/ice_common.c @@ -1427,6 +1427,39 @@ ice_set_fc(struct ice_port_info *pi, u8 *aq_failures, bool atomic_restart) return status; } +/** + * ice_get_link_status - get status of the HW network link + * @pi: port information structure + * @link_up: pointer to bool (true/false = linkup/linkdown) + * + * Variable link_up is true if link is up, false if link is down. + * The variable link_up is invalid if status is non zero. As a + * result of this call, link status reporting becomes enabled + */ +enum ice_status ice_get_link_status(struct ice_port_info *pi, bool *link_up) +{ + struct ice_phy_info *phy_info; + enum ice_status status = 0; + + if (!pi) + return ICE_ERR_PARAM; + + phy_info = &pi->phy; + + if (phy_info->get_link_info) { + status = ice_update_link_info(pi); + + if (status) + ice_debug(pi->hw, ICE_DBG_LINK, + "get link status error, status = %d\n", + status); + } + + *link_up = phy_info->link_info.link_info & ICE_AQ_LINK_UP; + + return status; +} + /** * ice_aq_set_link_restart_an * @pi: pointer to the port information structure @@ -1456,6 +1489,33 @@ ice_aq_set_link_restart_an(struct ice_port_info *pi, bool ena_link, return ice_aq_send_cmd(pi->hw, &desc, NULL, 0, cd); } +/** + * ice_aq_set_event_mask + * @hw: pointer to the hw struct + * @port_num: port number of the physical function + * @mask: event mask to be set + * @cd: pointer to command details structure or NULL + * + * Set event mask (0x0613) + */ +enum ice_status +ice_aq_set_event_mask(struct ice_hw *hw, u8 port_num, u16 mask, + struct ice_sq_cd *cd) +{ + struct ice_aqc_set_event_mask *cmd; + struct ice_aq_desc desc; + + cmd = &desc.params.set_event_mask; + + ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_set_event_mask); + + cmd->lport_num = port_num; + + cmd->event_mask = cpu_to_le16(mask); + + return ice_aq_send_cmd(hw, &desc, NULL, 0, cd); +} + /** * __ice_aq_get_set_rss_lut * @hw: pointer to the hardware structure diff --git a/drivers/net/ethernet/intel/ice/ice_common.h b/drivers/net/ethernet/intel/ice/ice_common.h index 971277ebcc86..ad350a4921a1 100644 --- a/drivers/net/ethernet/intel/ice/ice_common.h +++ b/drivers/net/ethernet/intel/ice/ice_common.h @@ -20,6 +20,8 @@ enum ice_status ice_clean_rq_elem(struct ice_hw *hw, struct ice_ctl_q_info *cq, struct ice_rq_event_info *e, u16 *pending); enum ice_status +ice_get_link_status(struct ice_port_info *pi, bool *link_up); +enum ice_status ice_acquire_res(struct ice_hw *hw, enum ice_aq_res_ids res, enum ice_aq_res_access_type access); void ice_release_res(struct ice_hw *hw, enum ice_aq_res_ids res); @@ -66,6 +68,9 @@ enum ice_status ice_aq_get_link_info(struct ice_port_info *pi, bool ena_lse, struct ice_link_status *link, struct ice_sq_cd *cd); enum ice_status +ice_aq_set_event_mask(struct ice_hw *hw, u8 port_num, u16 mask, + struct ice_sq_cd *cd); +enum ice_status ice_dis_vsi_txq(struct ice_port_info *pi, u8 num_queues, u16 *q_ids, u32 *q_teids, struct ice_sq_cd *cmd_details); enum ice_status diff --git a/drivers/net/ethernet/intel/ice/ice_hw_autogen.h b/drivers/net/ethernet/intel/ice/ice_hw_autogen.h index 777a1c653edb..1b9e2ef48a9d 100644 --- a/drivers/net/ethernet/intel/ice/ice_hw_autogen.h +++ b/drivers/net/ethernet/intel/ice/ice_hw_autogen.h @@ -85,6 +85,8 @@ #define GLGEN_RSTCTL 0x000B8180 #define GLGEN_RSTCTL_GRSTDEL_S 0 #define GLGEN_RSTCTL_GRSTDEL_M ICE_M(0x3F, GLGEN_RSTCTL_GRSTDEL_S) +#define GLGEN_RSTAT_RESET_TYPE_S 2 +#define GLGEN_RSTAT_RESET_TYPE_M ICE_M(0x3, GLGEN_RSTAT_RESET_TYPE_S) #define GLGEN_RTRIG 0x000B8190 #define GLGEN_RTRIG_CORER_S 0 #define GLGEN_RTRIG_CORER_M BIT(GLGEN_RTRIG_CORER_S) diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index 0f8637b97a34..2478dbe9bf4d 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -29,6 +29,8 @@ MODULE_PARM_DESC(debug, "netif level (0=none,...,16=all)"); static struct workqueue_struct *ice_wq; static const struct net_device_ops ice_netdev_ops; +static void ice_pf_dis_all_vsi(struct ice_pf *pf); +static void ice_rebuild(struct ice_pf *pf); static int ice_vsi_release(struct ice_vsi *vsi); static void ice_update_vsi_stats(struct ice_vsi *vsi); static void ice_update_pf_stats(struct ice_pf *pf); @@ -216,6 +218,132 @@ static void ice_free_fltr_list(struct device *dev, struct list_head *h) } } +/** + * ice_is_reset_recovery_pending - schedule a reset + * @state: pf state field + */ +static bool ice_is_reset_recovery_pending(unsigned long int *state) +{ + return test_bit(__ICE_RESET_RECOVERY_PENDING, state); +} + +/** + * ice_prepare_for_reset - prep for the core to reset + * @pf: board private structure + * + * Inform or close all dependent features in prep for reset. + */ +static void +ice_prepare_for_reset(struct ice_pf *pf) +{ + struct ice_hw *hw = &pf->hw; + u32 v; + + ice_for_each_vsi(pf, v) + if (pf->vsi[v]) + ice_remove_vsi_fltr(hw, pf->vsi[v]->vsi_num); + + dev_dbg(&pf->pdev->dev, "Tearing down internal switch for reset\n"); + + /* disable the VSIs and their queues that are not already DOWN */ + /* pf_dis_all_vsi modifies netdev structures -rtnl_lock needed */ + ice_pf_dis_all_vsi(pf); + + ice_for_each_vsi(pf, v) + if (pf->vsi[v]) + pf->vsi[v]->vsi_num = 0; + + ice_shutdown_all_ctrlq(hw); +} + +/** + * ice_do_reset - Initiate one of many types of resets + * @pf: board private structure + * @reset_type: reset type requested + * before this function was called. + */ +static void ice_do_reset(struct ice_pf *pf, enum ice_reset_req reset_type) +{ + struct device *dev = &pf->pdev->dev; + struct ice_hw *hw = &pf->hw; + + dev_dbg(dev, "reset_type 0x%x requested\n", reset_type); + WARN_ON(in_interrupt()); + + /* PFR is a bit of a special case because it doesn't result in an OICR + * interrupt. So for PFR, we prepare for reset, issue the reset and + * rebuild sequentially. + */ + if (reset_type == ICE_RESET_PFR) { + set_bit(__ICE_RESET_RECOVERY_PENDING, pf->state); + ice_prepare_for_reset(pf); + } + + /* trigger the reset */ + if (ice_reset(hw, reset_type)) { + dev_err(dev, "reset %d failed\n", reset_type); + set_bit(__ICE_RESET_FAILED, pf->state); + clear_bit(__ICE_RESET_RECOVERY_PENDING, pf->state); + return; + } + + if (reset_type == ICE_RESET_PFR) { + pf->pfr_count++; + ice_rebuild(pf); + clear_bit(__ICE_RESET_RECOVERY_PENDING, pf->state); + } +} + +/** + * ice_reset_subtask - Set up for resetting the device and driver + * @pf: board private structure + */ +static void ice_reset_subtask(struct ice_pf *pf) +{ + enum ice_reset_req reset_type; + + rtnl_lock(); + + /* When a CORER/GLOBR/EMPR is about to happen, the hardware triggers an + * OICR interrupt. The OICR handler (ice_misc_intr) determines what + * type of reset happened and sets __ICE_RESET_RECOVERY_PENDING bit in + * pf->state. So if reset/recovery is pending (as indicated by this bit) + * we do a rebuild and return. + */ + if (ice_is_reset_recovery_pending(pf->state)) { + clear_bit(__ICE_GLOBR_RECV, pf->state); + clear_bit(__ICE_CORER_RECV, pf->state); + ice_prepare_for_reset(pf); + + /* make sure we are ready to rebuild */ + if (ice_check_reset(&pf->hw)) + set_bit(__ICE_RESET_FAILED, pf->state); + else + ice_rebuild(pf); + clear_bit(__ICE_RESET_RECOVERY_PENDING, pf->state); + goto unlock; + } + + /* No pending resets to finish processing. Check for new resets */ + if (test_and_clear_bit(__ICE_GLOBR_REQ, pf->state)) + reset_type = ICE_RESET_GLOBR; + else if (test_and_clear_bit(__ICE_CORER_REQ, pf->state)) + reset_type = ICE_RESET_CORER; + else if (test_and_clear_bit(__ICE_PFR_REQ, pf->state)) + reset_type = ICE_RESET_PFR; + else + goto unlock; + + /* reset if not already down or resetting */ + if (!test_bit(__ICE_DOWN, pf->state) && + !test_bit(__ICE_CFG_BUSY, pf->state)) { + ice_do_reset(pf, reset_type); + } + +unlock: + rtnl_unlock(); +} + /** * ice_watchdog_subtask - periodic tasks not using event driven scheduling * @pf: board private structure @@ -314,6 +442,144 @@ void ice_print_link_msg(struct ice_vsi *vsi, bool isup) speed, fc); } +/** + * ice_init_link_events - enable/initialize link events + * @pi: pointer to the port_info instance + * + * Returns -EIO on failure, 0 on success + */ +static int ice_init_link_events(struct ice_port_info *pi) +{ + u16 mask; + + mask = ~((u16)(ICE_AQ_LINK_EVENT_UPDOWN | ICE_AQ_LINK_EVENT_MEDIA_NA | + ICE_AQ_LINK_EVENT_MODULE_QUAL_FAIL)); + + if (ice_aq_set_event_mask(pi->hw, pi->lport, mask, NULL)) { + dev_dbg(ice_hw_to_dev(pi->hw), + "Failed to set link event mask for port %d\n", + pi->lport); + return -EIO; + } + + if (ice_aq_get_link_info(pi, true, NULL, NULL)) { + dev_dbg(ice_hw_to_dev(pi->hw), + "Failed to enable link events for port %d\n", + pi->lport); + return -EIO; + } + + return 0; +} + +/** + * ice_vsi_link_event - update the vsi's netdev + * @vsi: the vsi on which the link event occurred + * @link_up: whether or not the vsi needs to be set up or down + */ +static void ice_vsi_link_event(struct ice_vsi *vsi, bool link_up) +{ + if (!vsi || test_bit(__ICE_DOWN, vsi->state)) + return; + + if (vsi->type == ICE_VSI_PF) { + if (!vsi->netdev) { + dev_dbg(&vsi->back->pdev->dev, + "vsi->netdev is not initialized!\n"); + return; + } + if (link_up) { + netif_carrier_on(vsi->netdev); + netif_tx_wake_all_queues(vsi->netdev); + } else { + netif_carrier_off(vsi->netdev); + netif_tx_stop_all_queues(vsi->netdev); + } + } +} + +/** + * ice_link_event - process the link event + * @pf: pf that the link event is associated with + * @pi: port_info for the port that the link event is associated with + * + * Returns -EIO if ice_get_link_status() fails + * Returns 0 on success + */ +static int +ice_link_event(struct ice_pf *pf, struct ice_port_info *pi) +{ + u8 new_link_speed, old_link_speed; + struct ice_phy_info *phy_info; + bool new_link_same_as_old; + bool new_link, old_link; + u8 lport; + u16 v; + + phy_info = &pi->phy; + phy_info->link_info_old = phy_info->link_info; + /* Force ice_get_link_status() to update link info */ + phy_info->get_link_info = true; + + old_link = (phy_info->link_info_old.link_info & ICE_AQ_LINK_UP); + old_link_speed = phy_info->link_info_old.link_speed; + + lport = pi->lport; + if (ice_get_link_status(pi, &new_link)) { + dev_dbg(&pf->pdev->dev, + "Could not get link status for port %d\n", lport); + return -EIO; + } + + new_link_speed = phy_info->link_info.link_speed; + + new_link_same_as_old = (new_link == old_link && + new_link_speed == old_link_speed); + + ice_for_each_vsi(pf, v) { + struct ice_vsi *vsi = pf->vsi[v]; + + if (!vsi || !vsi->port_info) + continue; + + if (new_link_same_as_old && + (test_bit(__ICE_DOWN, vsi->state) || + new_link == netif_carrier_ok(vsi->netdev))) + continue; + + if (vsi->port_info->lport == lport) { + ice_print_link_msg(vsi, new_link); + ice_vsi_link_event(vsi, new_link); + } + } + + return 0; +} + +/** + * ice_handle_link_event - handle link event via ARQ + * @pf: pf that the link event is associated with + * + * Return -EINVAL if port_info is null + * Return status on succes + */ +static int ice_handle_link_event(struct ice_pf *pf) +{ + struct ice_port_info *port_info; + int status; + + port_info = pf->hw.port_info; + if (!port_info) + return -EINVAL; + + status = ice_link_event(pf, port_info); + if (status) + dev_dbg(&pf->pdev->dev, + "Could not process link event, error %d\n", status); + + return status; +} + /** * __ice_clean_ctrlq - helper function to clean controlq rings * @pf: ptr to struct ice_pf @@ -328,6 +594,10 @@ static int __ice_clean_ctrlq(struct ice_pf *pf, enum ice_ctl_q q_type) const char *qtype; u32 oldval, val; + /* Do not clean control queue if/when PF reset fails */ + if (test_bit(__ICE_RESET_FAILED, pf->state)) + return 0; + switch (q_type) { case ICE_CTL_Q_ADMIN: cq = &hw->adminq; @@ -394,6 +664,7 @@ static int __ice_clean_ctrlq(struct ice_pf *pf, enum ice_ctl_q q_type) do { enum ice_status ret; + u16 opcode; ret = ice_clean_rq_elem(hw, cq, &event, &pending); if (ret == ICE_ERR_AQ_NO_WORK) @@ -404,6 +675,21 @@ static int __ice_clean_ctrlq(struct ice_pf *pf, enum ice_ctl_q q_type) ret); break; } + + opcode = le16_to_cpu(event.desc.opcode); + + switch (opcode) { + case ice_aqc_opc_get_link_status: + if (ice_handle_link_event(pf)) + dev_err(&pf->pdev->dev, + "Could not handle link event"); + break; + default: + dev_dbg(&pf->pdev->dev, + "%s Receive Queue unknown event 0x%04x ignored\n", + qtype, opcode); + break; + } } while (pending && (i++ < ICE_DFLT_IRQ_WORK)); devm_kfree(&pf->pdev->dev, event.msg_buf); @@ -483,6 +769,17 @@ static void ice_service_task(struct work_struct *work) unsigned long start_time = jiffies; /* subtasks */ + + /* process reset requests first */ + ice_reset_subtask(pf); + + /* bail if a reset/recovery cycle is pending */ + if (ice_is_reset_recovery_pending(pf->state) || + test_bit(__ICE_SUSPENDED, pf->state)) { + ice_service_task_complete(pf); + return; + } + ice_watchdog_subtask(pf); ice_clean_adminq_subtask(pf); @@ -1208,6 +1505,37 @@ static irqreturn_t ice_misc_intr(int __always_unused irq, void *data) if (!(oicr & PFINT_OICR_INTEVENT_M)) goto ena_intr; + if (oicr & PFINT_OICR_GRST_M) { + u32 reset; + /* we have a reset warning */ + ena_mask &= ~PFINT_OICR_GRST_M; + reset = (rd32(hw, GLGEN_RSTAT) & GLGEN_RSTAT_RESET_TYPE_M) >> + GLGEN_RSTAT_RESET_TYPE_S; + + if (reset == ICE_RESET_CORER) + pf->corer_count++; + else if (reset == ICE_RESET_GLOBR) + pf->globr_count++; + else + pf->empr_count++; + + /* If a reset cycle isn't already in progress, we set a bit in + * pf->state so that the service task can start a reset/rebuild. + * We also make note of which reset happened so that peer + * devices/drivers can be informed. + */ + if (!test_bit(__ICE_RESET_RECOVERY_PENDING, pf->state)) { + if (reset == ICE_RESET_CORER) + set_bit(__ICE_CORER_RECV, pf->state); + else if (reset == ICE_RESET_GLOBR) + set_bit(__ICE_GLOBR_RECV, pf->state); + else + set_bit(__ICE_EMPR_RECV, pf->state); + + set_bit(__ICE_RESET_RECOVERY_PENDING, pf->state); + } + } + if (oicr & PFINT_OICR_HMC_ERR_M) { ena_mask &= ~PFINT_OICR_HMC_ERR_M; dev_dbg(&pf->pdev->dev, @@ -1226,9 +1554,10 @@ static irqreturn_t ice_misc_intr(int __always_unused irq, void *data) */ if (oicr & (PFINT_OICR_PE_CRITERR_M | PFINT_OICR_PCI_EXCEPTION_M | - PFINT_OICR_ECC_ERR_M)) + PFINT_OICR_ECC_ERR_M)) { set_bit(__ICE_PFR_REQ, pf->state); - + ice_service_task_schedule(pf); + } ena_mask &= ~oicr; } ret = IRQ_HANDLED; @@ -1485,6 +1814,13 @@ static int ice_req_irq_msix_misc(struct ice_pf *pf) dev_driver_string(&pf->pdev->dev), dev_name(&pf->pdev->dev)); + /* Do not request IRQ but do enable OICR interrupt since settings are + * lost during reset. Note that this function is called only during + * rebuild path and not while reset is in progress. + */ + if (ice_is_reset_recovery_pending(pf->state)) + goto skip_req_irq; + /* reserve one vector in irq_tracker for misc interrupts */ oicr_idx = ice_get_res(pf, pf->irq_tracker, 1, ICE_RES_MISC_VEC_ID); if (oicr_idx < 0) @@ -1503,6 +1839,7 @@ static int ice_req_irq_msix_misc(struct ice_pf *pf) return err; } +skip_req_irq: ice_ena_misc_vector(pf); val = (pf->oicr_idx & PFINT_OICR_CTL_MSIX_INDX_M) | @@ -2070,6 +2407,100 @@ static int ice_vsi_cfg_rss(struct ice_vsi *vsi) return err; } +/** + * ice_vsi_reinit_setup - return resource and reallocate resource for a VSI + * @vsi: pointer to the ice_vsi + * + * This reallocates the VSIs queue resources + * + * Returns 0 on success and negative value on failure + */ +static int ice_vsi_reinit_setup(struct ice_vsi *vsi) +{ + u16 max_txqs[ICE_MAX_TRAFFIC_CLASS] = { 0 }; + int ret, i; + + if (!vsi) + return -EINVAL; + + ice_vsi_free_q_vectors(vsi); + ice_free_res(vsi->back->irq_tracker, vsi->base_vector, vsi->idx); + vsi->base_vector = 0; + ice_vsi_clear_rings(vsi); + ice_vsi_free_arrays(vsi, false); + ice_vsi_set_num_qs(vsi); + + /* Initialize VSI struct elements and create VSI in FW */ + ret = ice_vsi_add(vsi); + if (ret < 0) + goto err_vsi; + + ret = ice_vsi_alloc_arrays(vsi, false); + if (ret < 0) + goto err_vsi; + + switch (vsi->type) { + case ICE_VSI_PF: + if (!vsi->netdev) { + ret = ice_cfg_netdev(vsi); + if (ret) + goto err_rings; + + ret = register_netdev(vsi->netdev); + if (ret) + goto err_rings; + + netif_carrier_off(vsi->netdev); + netif_tx_stop_all_queues(vsi->netdev); + } + + ret = ice_vsi_alloc_q_vectors(vsi); + if (ret) + goto err_rings; + + ret = ice_vsi_setup_vector_base(vsi); + if (ret) + goto err_vectors; + + ret = ice_vsi_alloc_rings(vsi); + if (ret) + goto err_vectors; + + ice_vsi_map_rings_to_vectors(vsi); + break; + default: + break; + } + + ice_vsi_set_tc_cfg(vsi); + + /* configure VSI nodes based on number of queues and TC's */ + for (i = 0; i < vsi->tc_cfg.numtc; i++) + max_txqs[i] = vsi->num_txq; + + ret = ice_cfg_vsi_lan(vsi->port_info, vsi->vsi_num, + vsi->tc_cfg.ena_tc, max_txqs); + if (ret) { + dev_info(&vsi->back->pdev->dev, + "Failed VSI lan queue config\n"); + goto err_vectors; + } + return 0; + +err_vectors: + ice_vsi_free_q_vectors(vsi); +err_rings: + if (vsi->netdev) { + unregister_netdev(vsi->netdev); + free_netdev(vsi->netdev); + vsi->netdev = NULL; + } +err_vsi: + ice_vsi_clear(vsi); + set_bit(__ICE_RESET_FAILED, vsi->back->state); + return ret; +} + /** * ice_vsi_setup - Set up a VSI by a given type * @pf: board private structure @@ -2345,10 +2776,17 @@ static int ice_setup_pf_sw(struct ice_pf *pf) struct ice_vsi *vsi; int status = 0; - vsi = ice_vsi_setup(pf, ICE_VSI_PF, pf->hw.port_info); - if (!vsi) { - status = -ENOMEM; - goto error_exit; + if (!ice_is_reset_recovery_pending(pf->state)) { + vsi = ice_vsi_setup(pf, ICE_VSI_PF, pf->hw.port_info); + if (!vsi) { + status = -ENOMEM; + goto error_exit; + } + } else { + vsi = pf->vsi[0]; + status = ice_vsi_reinit_setup(vsi); + if (status < 0) + return -EIO; } /* tmp_add_list contains a list of MAC addresses for which MAC @@ -2737,6 +3175,12 @@ static int ice_probe(struct pci_dev *pdev, /* since everything is good, start the service timer */ mod_timer(&pf->serv_tmr, round_jiffies(jiffies + pf->serv_tmr_period)); + err = ice_init_link_events(pf->hw.port_info); + if (err) { + dev_err(&pdev->dev, "ice_init_link_events failed: %d\n", err); + goto err_alloc_sw_unroll; + } + return 0; err_alloc_sw_unroll: @@ -4217,6 +4661,131 @@ static int ice_vsi_release(struct ice_vsi *vsi) return 0; } +/** + * ice_dis_vsi - pause a VSI + * @vsi: the VSI being paused + */ +static void ice_dis_vsi(struct ice_vsi *vsi) +{ + if (test_bit(__ICE_DOWN, vsi->state)) + return; + + set_bit(__ICE_NEEDS_RESTART, vsi->state); + + if (vsi->netdev && netif_running(vsi->netdev) && + vsi->type == ICE_VSI_PF) + vsi->netdev->netdev_ops->ndo_stop(vsi->netdev); + + ice_vsi_close(vsi); +} + +/** + * ice_ena_vsi - resume a VSI + * @vsi: the VSI being resume + */ +static void ice_ena_vsi(struct ice_vsi *vsi) +{ + if (!test_and_clear_bit(__ICE_NEEDS_RESTART, vsi->state)) + return; + + if (vsi->netdev && netif_running(vsi->netdev)) + vsi->netdev->netdev_ops->ndo_open(vsi->netdev); + else if (ice_vsi_open(vsi)) + /* this clears the DOWN bit */ + dev_dbg(&vsi->back->pdev->dev, "Failed open VSI 0x%04X on switch 0x%04X\n", + vsi->vsi_num, vsi->vsw->sw_id); +} + +/** + * ice_pf_dis_all_vsi - Pause all VSIs on a PF + * @pf: the PF + */ +static void ice_pf_dis_all_vsi(struct ice_pf *pf) +{ + int v; + + ice_for_each_vsi(pf, v) + if (pf->vsi[v]) + ice_dis_vsi(pf->vsi[v]); +} + +/** + * ice_pf_ena_all_vsi - Resume all VSIs on a PF + * @pf: the PF + */ +static void ice_pf_ena_all_vsi(struct ice_pf *pf) +{ + int v; + + ice_for_each_vsi(pf, v) + if (pf->vsi[v]) + ice_ena_vsi(pf->vsi[v]); +} + +/** + * ice_rebuild - rebuild after reset + * @pf: pf to rebuild + */ +static void ice_rebuild(struct ice_pf *pf) +{ + struct device *dev = &pf->pdev->dev; + struct ice_hw *hw = &pf->hw; + enum ice_status ret; + int err; + + if (test_bit(__ICE_DOWN, pf->state)) + goto clear_recovery; + + dev_dbg(dev, "rebuilding pf\n"); + + ret = ice_init_all_ctrlq(hw); + if (ret) { + dev_err(dev, "control queues init failed %d\n", ret); + goto fail_reset; + } + + ret = ice_clear_pf_cfg(hw); + if (ret) { + dev_err(dev, "clear PF configuration failed %d\n", ret); + goto fail_reset; + } + + ice_clear_pxe_mode(hw); + + ret = ice_get_caps(hw); + if (ret) { + dev_err(dev, "ice_get_caps failed %d\n", ret); + goto fail_reset; + } + + /* basic nic switch setup */ + err = ice_setup_pf_sw(pf); + if (err) { + dev_err(dev, "ice_setup_pf_sw failed\n"); + goto fail_reset; + } + + /* start misc vector */ + if (test_bit(ICE_FLAG_MSIX_ENA, pf->flags)) { + err = ice_req_irq_msix_misc(pf); + if (err) { + dev_err(dev, "misc vector setup failed: %d\n", err); + goto fail_reset; + } + } + + /* restart the VSIs that were rebuilt and running before the reset */ + ice_pf_ena_all_vsi(pf); + + return; + +fail_reset: + ice_shutdown_all_ctrlq(hw); + set_bit(__ICE_RESET_FAILED, pf->state); +clear_recovery: + set_bit(__ICE_RESET_RECOVERY_PENDING, pf->state); +} + /** * ice_set_rss - Set RSS keys and lut * @vsi: Pointer to VSI structure diff --git a/drivers/net/ethernet/intel/ice/ice_type.h b/drivers/net/ethernet/intel/ice/ice_type.h index 137a59a74b71..c45cdee4e03c 100644 --- a/drivers/net/ethernet/intel/ice/ice_type.h +++ b/drivers/net/ethernet/intel/ice/ice_type.h @@ -17,6 +17,7 @@ static inline bool ice_is_tc_ena(u8 bitmap, u8 tc) /* debug masks - set these bits in hw->debug_mask to control output */ #define ICE_DBG_INIT BIT_ULL(1) +#define ICE_DBG_LINK BIT_ULL(4) #define ICE_DBG_QCTX BIT_ULL(6) #define ICE_DBG_NVM BIT_ULL(7) #define ICE_DBG_LAN BIT_ULL(8) From e94d4478669357cd742170c77fc28d6db2040ce4 Mon Sep 17 00:00:00 2001 From: Anirudh Venkataramanan Date: Tue, 20 Mar 2018 07:58:19 -0700 Subject: [PATCH 15/15] ice: Implement filter sync, NDO operations and bump version This patch implements multiple pieces of functionality: 1. Added ice_vsi_sync_filters, which is called through the service task to push filter updates to the hardware. 2. Add support to enable/disable promiscuous mode on an interface. Enabling/disabling promiscuous mode on an interface results in addition/removal of a promisc filter rule through ice_vsi_sync_filters. 3. Implement handlers for ndo_set_mac_address, ndo_change_mtu, ndo_poll_controller and ndo_set_rx_mode. This patch also marks the end of the driver addition by bumping up the driver version. Signed-off-by: Anirudh Venkataramanan Tested-by: Tony Brelinski Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/ice/ice.h | 14 + .../net/ethernet/intel/ice/ice_adminq_cmd.h | 21 + drivers/net/ethernet/intel/ice/ice_common.c | 28 + drivers/net/ethernet/intel/ice/ice_common.h | 3 + .../net/ethernet/intel/ice/ice_lan_tx_rx.h | 12 + drivers/net/ethernet/intel/ice/ice_main.c | 567 +++++++++++++++++- drivers/net/ethernet/intel/ice/ice_switch.c | 77 +++ drivers/net/ethernet/intel/ice/ice_switch.h | 2 + drivers/net/ethernet/intel/ice/ice_type.h | 5 + 9 files changed, 728 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h index 24ce4b71c9ce..d8b5fff581e7 100644 --- a/drivers/net/ethernet/intel/ice/ice.h +++ b/drivers/net/ethernet/intel/ice/ice.h @@ -125,11 +125,20 @@ enum ice_state { __ICE_SUSPENDED, /* set on module remove path */ __ICE_RESET_FAILED, /* set by reset/rebuild */ __ICE_ADMINQ_EVENT_PENDING, + __ICE_FLTR_OVERFLOW_PROMISC, __ICE_CFG_BUSY, __ICE_SERVICE_SCHED, __ICE_STATE_NBITS /* must be last */ }; +enum ice_vsi_flags { + ICE_VSI_FLAG_UMAC_FLTR_CHANGED, + ICE_VSI_FLAG_MMAC_FLTR_CHANGED, + ICE_VSI_FLAG_VLAN_FLTR_CHANGED, + ICE_VSI_FLAG_PROMISC_CHANGED, + ICE_VSI_FLAG_NBITS /* must be last */ +}; + /* struct that defines a VSI, associated with a dev */ struct ice_vsi { struct net_device *netdev; @@ -144,7 +153,9 @@ struct ice_vsi { u64 tx_linearize; DECLARE_BITMAP(state, __ICE_STATE_NBITS); + DECLARE_BITMAP(flags, ICE_VSI_FLAG_NBITS); unsigned long active_vlans[BITS_TO_LONGS(VLAN_N_VID)]; + unsigned int current_netdev_flags; u32 tx_restart; u32 tx_busy; u32 rx_buf_failed; @@ -175,6 +186,9 @@ struct ice_vsi { struct ice_eth_stats eth_stats; struct ice_eth_stats eth_stats_prev; + struct list_head tmp_sync_list; /* MAC filters to be synced */ + struct list_head tmp_unsync_list; /* MAC filters to be unsynced */ + bool irqs_ready; bool current_isup; /* Sync 'link up' logging */ bool stat_offsets_loaded; diff --git a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h index ea822a9edce8..5b13ca1bd85f 100644 --- a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h +++ b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h @@ -135,6 +135,24 @@ struct ice_aqc_manage_mac_read_resp { u8 mac_addr[ETH_ALEN]; }; +/* Manage MAC address, write command - direct (0x0108) */ +struct ice_aqc_manage_mac_write { + u8 port_num; + u8 flags; +#define ICE_AQC_MAN_MAC_WR_MC_MAG_EN BIT(0) +#define ICE_AQC_MAN_MAC_WR_WOL_LAA_PFR_KEEP BIT(1) +#define ICE_AQC_MAN_MAC_WR_S 6 +#define ICE_AQC_MAN_MAC_WR_M (3 << ICE_AQC_MAN_MAC_WR_S) +#define ICE_AQC_MAN_MAC_UPDATE_LAA 0 +#define ICE_AQC_MAN_MAC_UPDATE_LAA_WOL (BIT(0) << ICE_AQC_MAN_MAC_WR_S) + /* High 16 bits of MAC address in big endian order */ + __be16 sah; + /* Low 32 bits of MAC address in big endian order */ + __be32 sal; + __le32 addr_high; + __le32 addr_low; +}; + /* Clear PXE Command and response (direct 0x0110) */ struct ice_aqc_clear_pxe { u8 rx_cnt; @@ -1214,6 +1232,7 @@ struct ice_aq_desc { struct ice_aqc_q_shutdown q_shutdown; struct ice_aqc_req_res res_owner; struct ice_aqc_manage_mac_read mac_read; + struct ice_aqc_manage_mac_write mac_write; struct ice_aqc_clear_pxe clear_pxe; struct ice_aqc_list_caps get_cap; struct ice_aqc_get_phy_caps get_phy; @@ -1258,6 +1277,7 @@ enum ice_aq_err { ICE_AQ_RC_ENOMEM = 9, /* Out of memory */ ICE_AQ_RC_EBUSY = 12, /* Device or resource busy */ ICE_AQ_RC_EEXIST = 13, /* object already exists */ + ICE_AQ_RC_ENOSPC = 16, /* No space left or allocation failure */ }; /* Admin Queue command opcodes */ @@ -1276,6 +1296,7 @@ enum ice_adminq_opc { /* manage MAC address */ ice_aqc_opc_manage_mac_read = 0x0107, + ice_aqc_opc_manage_mac_write = 0x0108, /* PXE */ ice_aqc_opc_clear_pxe_mode = 0x0110, diff --git a/drivers/net/ethernet/intel/ice/ice_common.c b/drivers/net/ethernet/intel/ice/ice_common.c index 791f1eba5953..385f5d425d19 100644 --- a/drivers/net/ethernet/intel/ice/ice_common.c +++ b/drivers/net/ethernet/intel/ice/ice_common.c @@ -1232,6 +1232,34 @@ enum ice_status ice_get_caps(struct ice_hw *hw) return status; } +/** + * ice_aq_manage_mac_write - manage MAC address write command + * @hw: pointer to the hw struct + * @mac_addr: MAC address to be written as LAA/LAA+WoL/Port address + * @flags: flags to control write behavior + * @cd: pointer to command details structure or NULL + * + * This function is used to write MAC address to the NVM (0x0108). + */ +enum ice_status +ice_aq_manage_mac_write(struct ice_hw *hw, u8 *mac_addr, u8 flags, + struct ice_sq_cd *cd) +{ + struct ice_aqc_manage_mac_write *cmd; + struct ice_aq_desc desc; + + cmd = &desc.params.mac_write; + ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_manage_mac_write); + + cmd->flags = flags; + + /* Prep values for flags, sah, sal */ + cmd->sah = htons(*((u16 *)mac_addr)); + cmd->sal = htonl(*((u32 *)(mac_addr + 2))); + + return ice_aq_send_cmd(hw, &desc, NULL, 0, cd); +} + /** * ice_aq_clear_pxe_mode * @hw: pointer to the hw struct diff --git a/drivers/net/ethernet/intel/ice/ice_common.h b/drivers/net/ethernet/intel/ice/ice_common.h index ad350a4921a1..9a5519130af1 100644 --- a/drivers/net/ethernet/intel/ice/ice_common.h +++ b/drivers/net/ethernet/intel/ice/ice_common.h @@ -58,6 +58,9 @@ enum ice_status ice_aq_send_cmd(struct ice_hw *hw, struct ice_aq_desc *desc, void *buf, u16 buf_size, struct ice_sq_cd *cd); enum ice_status ice_aq_get_fw_ver(struct ice_hw *hw, struct ice_sq_cd *cd); +enum ice_status +ice_aq_manage_mac_write(struct ice_hw *hw, u8 *mac_addr, u8 flags, + struct ice_sq_cd *cd); enum ice_status ice_clear_pf_cfg(struct ice_hw *hw); enum ice_status ice_set_fc(struct ice_port_info *pi, u8 *aq_failures, bool atomic_restart); diff --git a/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h b/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h index 57acbcff740b..d23a91665b46 100644 --- a/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h +++ b/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h @@ -353,6 +353,18 @@ enum ice_tx_desc_len_fields { ICE_TX_DESC_LEN_L4_LEN_S = 14 /* 4 BITS */ }; +#define ICE_TXD_QW1_MACLEN_M (0x7FUL << ICE_TX_DESC_LEN_MACLEN_S) +#define ICE_TXD_QW1_IPLEN_M (0x7FUL << ICE_TX_DESC_LEN_IPLEN_S) +#define ICE_TXD_QW1_L4LEN_M (0xFUL << ICE_TX_DESC_LEN_L4_LEN_S) + +/* Tx descriptor field limits in bytes */ +#define ICE_TXD_MACLEN_MAX ((ICE_TXD_QW1_MACLEN_M >> \ + ICE_TX_DESC_LEN_MACLEN_S) * ICE_BYTES_PER_WORD) +#define ICE_TXD_IPLEN_MAX ((ICE_TXD_QW1_IPLEN_M >> \ + ICE_TX_DESC_LEN_IPLEN_S) * ICE_BYTES_PER_DWORD) +#define ICE_TXD_L4LEN_MAX ((ICE_TXD_QW1_L4LEN_M >> \ + ICE_TX_DESC_LEN_L4_LEN_S) * ICE_BYTES_PER_DWORD) + #define ICE_TXD_QW1_TX_BUF_SZ_S 34 #define ICE_TXD_QW1_L2TAG1_S 48 diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index 2478dbe9bf4d..210b7910f1cd 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -7,7 +7,7 @@ #include "ice.h" -#define DRV_VERSION "ice-0.0.1-k" +#define DRV_VERSION "ice-0.7.0-k" #define DRV_SUMMARY "Intel(R) Ethernet Connection E800 Series Linux Driver" const char ice_drv_ver[] = DRV_VERSION; static const char ice_driver_string[] = DRV_SUMMARY; @@ -200,6 +200,48 @@ static int ice_add_mac_to_list(struct ice_vsi *vsi, struct list_head *add_list, return 0; } +/** + * ice_add_mac_to_sync_list - creates list of mac addresses to be synced + * @netdev: the net device on which the sync is happening + * @addr: mac address to sync + * + * This is a callback function which is called by the in kernel device sync + * functions (like __dev_uc_sync, __dev_mc_sync, etc). This function only + * populates the tmp_sync_list, which is later used by ice_add_mac to add the + * mac filters from the hardware. + */ +static int ice_add_mac_to_sync_list(struct net_device *netdev, const u8 *addr) +{ + struct ice_netdev_priv *np = netdev_priv(netdev); + struct ice_vsi *vsi = np->vsi; + + if (ice_add_mac_to_list(vsi, &vsi->tmp_sync_list, addr)) + return -EINVAL; + + return 0; +} + +/** + * ice_add_mac_to_unsync_list - creates list of mac addresses to be unsynced + * @netdev: the net device on which the unsync is happening + * @addr: mac address to unsync + * + * This is a callback function which is called by the in kernel device unsync + * functions (like __dev_uc_unsync, __dev_mc_unsync, etc). This function only + * populates the tmp_unsync_list, which is later used by ice_remove_mac to + * delete the mac filters from the hardware. + */ +static int ice_add_mac_to_unsync_list(struct net_device *netdev, const u8 *addr) +{ + struct ice_netdev_priv *np = netdev_priv(netdev); + struct ice_vsi *vsi = np->vsi; + + if (ice_add_mac_to_list(vsi, &vsi->tmp_unsync_list, addr)) + return -EINVAL; + + return 0; +} + /** * ice_free_fltr_list - free filter lists helper * @dev: pointer to the device struct @@ -218,6 +260,183 @@ static void ice_free_fltr_list(struct device *dev, struct list_head *h) } } +/** + * ice_vsi_fltr_changed - check if filter state changed + * @vsi: VSI to be checked + * + * returns true if filter state has changed, false otherwise. + */ +static bool ice_vsi_fltr_changed(struct ice_vsi *vsi) +{ + return test_bit(ICE_VSI_FLAG_UMAC_FLTR_CHANGED, vsi->flags) || + test_bit(ICE_VSI_FLAG_MMAC_FLTR_CHANGED, vsi->flags) || + test_bit(ICE_VSI_FLAG_VLAN_FLTR_CHANGED, vsi->flags); +} + +/** + * ice_vsi_sync_fltr - Update the VSI filter list to the HW + * @vsi: ptr to the VSI + * + * Push any outstanding VSI filter changes through the AdminQ. + */ +static int ice_vsi_sync_fltr(struct ice_vsi *vsi) +{ + struct device *dev = &vsi->back->pdev->dev; + struct net_device *netdev = vsi->netdev; + bool promisc_forced_on = false; + struct ice_pf *pf = vsi->back; + struct ice_hw *hw = &pf->hw; + enum ice_status status = 0; + u32 changed_flags = 0; + int err = 0; + + if (!vsi->netdev) + return -EINVAL; + + while (test_and_set_bit(__ICE_CFG_BUSY, vsi->state)) + usleep_range(1000, 2000); + + changed_flags = vsi->current_netdev_flags ^ vsi->netdev->flags; + vsi->current_netdev_flags = vsi->netdev->flags; + + INIT_LIST_HEAD(&vsi->tmp_sync_list); + INIT_LIST_HEAD(&vsi->tmp_unsync_list); + + if (ice_vsi_fltr_changed(vsi)) { + clear_bit(ICE_VSI_FLAG_UMAC_FLTR_CHANGED, vsi->flags); + clear_bit(ICE_VSI_FLAG_MMAC_FLTR_CHANGED, vsi->flags); + clear_bit(ICE_VSI_FLAG_VLAN_FLTR_CHANGED, vsi->flags); + + /* grab the netdev's addr_list_lock */ + netif_addr_lock_bh(netdev); + __dev_uc_sync(netdev, ice_add_mac_to_sync_list, + ice_add_mac_to_unsync_list); + __dev_mc_sync(netdev, ice_add_mac_to_sync_list, + ice_add_mac_to_unsync_list); + /* our temp lists are populated. release lock */ + netif_addr_unlock_bh(netdev); + } + + /* Remove mac addresses in the unsync list */ + status = ice_remove_mac(hw, &vsi->tmp_unsync_list); + ice_free_fltr_list(dev, &vsi->tmp_unsync_list); + if (status) { + netdev_err(netdev, "Failed to delete MAC filters\n"); + /* if we failed because of alloc failures, just bail */ + if (status == ICE_ERR_NO_MEMORY) { + err = -ENOMEM; + goto out; + } + } + + /* Add mac addresses in the sync list */ + status = ice_add_mac(hw, &vsi->tmp_sync_list); + ice_free_fltr_list(dev, &vsi->tmp_sync_list); + if (status) { + netdev_err(netdev, "Failed to add MAC filters\n"); + /* If there is no more space for new umac filters, vsi + * should go into promiscuous mode. There should be some + * space reserved for promiscuous filters. + */ + if (hw->adminq.sq_last_status == ICE_AQ_RC_ENOSPC && + !test_and_set_bit(__ICE_FLTR_OVERFLOW_PROMISC, + vsi->state)) { + promisc_forced_on = true; + netdev_warn(netdev, + "Reached MAC filter limit, forcing promisc mode on VSI %d\n", + vsi->vsi_num); + } else { + err = -EIO; + goto out; + } + } + /* check for changes in promiscuous modes */ + if (changed_flags & IFF_ALLMULTI) + netdev_warn(netdev, "Unsupported configuration\n"); + + if (((changed_flags & IFF_PROMISC) || promisc_forced_on) || + test_bit(ICE_VSI_FLAG_PROMISC_CHANGED, vsi->flags)) { + clear_bit(ICE_VSI_FLAG_PROMISC_CHANGED, vsi->flags); + if (vsi->current_netdev_flags & IFF_PROMISC) { + /* Apply TX filter rule to get traffic from VMs */ + status = ice_cfg_dflt_vsi(hw, vsi->vsi_num, true, + ICE_FLTR_TX); + if (status) { + netdev_err(netdev, "Error setting default VSI %i tx rule\n", + vsi->vsi_num); + vsi->current_netdev_flags &= ~IFF_PROMISC; + err = -EIO; + goto out_promisc; + } + /* Apply RX filter rule to get traffic from wire */ + status = ice_cfg_dflt_vsi(hw, vsi->vsi_num, true, + ICE_FLTR_RX); + if (status) { + netdev_err(netdev, "Error setting default VSI %i rx rule\n", + vsi->vsi_num); + vsi->current_netdev_flags &= ~IFF_PROMISC; + err = -EIO; + goto out_promisc; + } + } else { + /* Clear TX filter rule to stop traffic from VMs */ + status = ice_cfg_dflt_vsi(hw, vsi->vsi_num, false, + ICE_FLTR_TX); + if (status) { + netdev_err(netdev, "Error clearing default VSI %i tx rule\n", + vsi->vsi_num); + vsi->current_netdev_flags |= IFF_PROMISC; + err = -EIO; + goto out_promisc; + } + /* Clear filter RX to remove traffic from wire */ + status = ice_cfg_dflt_vsi(hw, vsi->vsi_num, false, + ICE_FLTR_RX); + if (status) { + netdev_err(netdev, "Error clearing default VSI %i rx rule\n", + vsi->vsi_num); + vsi->current_netdev_flags |= IFF_PROMISC; + err = -EIO; + goto out_promisc; + } + } + } + goto exit; + +out_promisc: + set_bit(ICE_VSI_FLAG_PROMISC_CHANGED, vsi->flags); + goto exit; +out: + /* if something went wrong then set the changed flag so we try again */ + set_bit(ICE_VSI_FLAG_UMAC_FLTR_CHANGED, vsi->flags); + set_bit(ICE_VSI_FLAG_MMAC_FLTR_CHANGED, vsi->flags); +exit: + clear_bit(__ICE_CFG_BUSY, vsi->state); + return err; +} + +/** + * ice_sync_fltr_subtask - Sync the VSI filter list with HW + * @pf: board private structure + */ +static void ice_sync_fltr_subtask(struct ice_pf *pf) +{ + int v; + + if (!pf || !(test_bit(ICE_FLAG_FLTR_SYNC, pf->flags))) + return; + + clear_bit(ICE_FLAG_FLTR_SYNC, pf->flags); + + for (v = 0; v < pf->num_alloc_vsi; v++) + if (pf->vsi[v] && ice_vsi_fltr_changed(pf->vsi[v]) && + ice_vsi_sync_fltr(pf->vsi[v])) { + /* come back and try again later */ + set_bit(ICE_FLAG_FLTR_SYNC, pf->flags); + break; + } +} + /** * ice_is_reset_recovery_pending - schedule a reset * @state: pf state field @@ -780,6 +999,7 @@ static void ice_service_task(struct work_struct *work) return; } + ice_sync_fltr_subtask(pf); ice_watchdog_subtask(pf); ice_clean_adminq_subtask(pf); @@ -2491,6 +2711,7 @@ static int ice_vsi_reinit_setup(struct ice_vsi *vsi) ice_vsi_free_q_vectors(vsi); err_rings: if (vsi->netdev) { + vsi->current_netdev_flags = 0; unregister_netdev(vsi->netdev); free_netdev(vsi->netdev); vsi->netdev = NULL; @@ -3300,6 +3521,197 @@ static void __exit ice_module_exit(void) } module_exit(ice_module_exit); +/** + * ice_set_mac_address - NDO callback to set mac address + * @netdev: network interface device structure + * @pi: pointer to an address structure + * + * Returns 0 on success, negative on failure + */ +static int ice_set_mac_address(struct net_device *netdev, void *pi) +{ + struct ice_netdev_priv *np = netdev_priv(netdev); + struct ice_vsi *vsi = np->vsi; + struct ice_pf *pf = vsi->back; + struct ice_hw *hw = &pf->hw; + struct sockaddr *addr = pi; + enum ice_status status; + LIST_HEAD(a_mac_list); + LIST_HEAD(r_mac_list); + u8 flags = 0; + int err; + u8 *mac; + + mac = (u8 *)addr->sa_data; + + if (!is_valid_ether_addr(mac)) + return -EADDRNOTAVAIL; + + if (ether_addr_equal(netdev->dev_addr, mac)) { + netdev_warn(netdev, "already using mac %pM\n", mac); + return 0; + } + + if (test_bit(__ICE_DOWN, pf->state) || + ice_is_reset_recovery_pending(pf->state)) { + netdev_err(netdev, "can't set mac %pM. device not ready\n", + mac); + return -EBUSY; + } + + /* When we change the mac address we also have to change the mac address + * based filter rules that were created previously for the old mac + * address. So first, we remove the old filter rule using ice_remove_mac + * and then create a new filter rule using ice_add_mac. Note that for + * both these operations, we first need to form a "list" of mac + * addresses (even though in this case, we have only 1 mac address to be + * added/removed) and this done using ice_add_mac_to_list. Depending on + * the ensuing operation this "list" of mac addresses is either to be + * added or removed from the filter. + */ + err = ice_add_mac_to_list(vsi, &r_mac_list, netdev->dev_addr); + if (err) { + err = -EADDRNOTAVAIL; + goto free_lists; + } + + status = ice_remove_mac(hw, &r_mac_list); + if (status) { + err = -EADDRNOTAVAIL; + goto free_lists; + } + + err = ice_add_mac_to_list(vsi, &a_mac_list, mac); + if (err) { + err = -EADDRNOTAVAIL; + goto free_lists; + } + + status = ice_add_mac(hw, &a_mac_list); + if (status) { + err = -EADDRNOTAVAIL; + goto free_lists; + } + +free_lists: + /* free list entries */ + ice_free_fltr_list(&pf->pdev->dev, &r_mac_list); + ice_free_fltr_list(&pf->pdev->dev, &a_mac_list); + + if (err) { + netdev_err(netdev, "can't set mac %pM. filter update failed\n", + mac); + return err; + } + + /* change the netdev's mac address */ + memcpy(netdev->dev_addr, mac, netdev->addr_len); + netdev_dbg(vsi->netdev, "updated mac address to %pM\n", + netdev->dev_addr); + + /* write new mac address to the firmware */ + flags = ICE_AQC_MAN_MAC_UPDATE_LAA_WOL; + status = ice_aq_manage_mac_write(hw, mac, flags, NULL); + if (status) { + netdev_err(netdev, "can't set mac %pM. write to firmware failed.\n", + mac); + } + return 0; +} + +/** + * ice_set_rx_mode - NDO callback to set the netdev filters + * @netdev: network interface device structure + */ +static void ice_set_rx_mode(struct net_device *netdev) +{ + struct ice_netdev_priv *np = netdev_priv(netdev); + struct ice_vsi *vsi = np->vsi; + + if (!vsi) + return; + + /* Set the flags to synchronize filters + * ndo_set_rx_mode may be triggered even without a change in netdev + * flags + */ + set_bit(ICE_VSI_FLAG_UMAC_FLTR_CHANGED, vsi->flags); + set_bit(ICE_VSI_FLAG_MMAC_FLTR_CHANGED, vsi->flags); + set_bit(ICE_FLAG_FLTR_SYNC, vsi->back->flags); + + /* schedule our worker thread which will take care of + * applying the new filter changes + */ + ice_service_task_schedule(vsi->back); +} + +/** + * ice_fdb_add - add an entry to the hardware database + * @ndm: the input from the stack + * @tb: pointer to array of nladdr (unused) + * @dev: the net device pointer + * @addr: the MAC address entry being added + * @vid: VLAN id + * @flags: instructions from stack about fdb operation + */ +static int ice_fdb_add(struct ndmsg *ndm, struct nlattr __always_unused *tb[], + struct net_device *dev, const unsigned char *addr, + u16 vid, u16 flags) +{ + int err; + + if (vid) { + netdev_err(dev, "VLANs aren't supported yet for dev_uc|mc_add()\n"); + return -EINVAL; + } + if (ndm->ndm_state && !(ndm->ndm_state & NUD_PERMANENT)) { + netdev_err(dev, "FDB only supports static addresses\n"); + return -EINVAL; + } + + if (is_unicast_ether_addr(addr) || is_link_local_ether_addr(addr)) + err = dev_uc_add_excl(dev, addr); + else if (is_multicast_ether_addr(addr)) + err = dev_mc_add_excl(dev, addr); + else + err = -EINVAL; + + /* Only return duplicate errors if NLM_F_EXCL is set */ + if (err == -EEXIST && !(flags & NLM_F_EXCL)) + err = 0; + + return err; +} + +/** + * ice_fdb_del - delete an entry from the hardware database + * @ndm: the input from the stack + * @tb: pointer to array of nladdr (unused) + * @dev: the net device pointer + * @addr: the MAC address entry being added + * @vid: VLAN id + */ +static int ice_fdb_del(struct ndmsg *ndm, __always_unused struct nlattr *tb[], + struct net_device *dev, const unsigned char *addr, + __always_unused u16 vid) +{ + int err; + + if (ndm->ndm_state & NUD_PERMANENT) { + netdev_err(dev, "FDB only supports static addresses\n"); + return -EINVAL; + } + + if (is_unicast_ether_addr(addr)) + err = dev_uc_del(dev, addr); + else if (is_multicast_ether_addr(addr)) + err = dev_mc_del(dev, addr); + else + err = -EINVAL; + + return err; +} + /** * ice_vsi_manage_vlan_insertion - Manage VLAN insertion for the VSI for Tx * @vsi: the vsi being changed @@ -3690,6 +4102,8 @@ static int ice_vsi_cfg(struct ice_vsi *vsi) { int err; + ice_set_rx_mode(vsi->netdev); + err = ice_restore_vlan(vsi); if (err) return err; @@ -4379,6 +4793,30 @@ void ice_get_stats64(struct net_device *netdev, struct rtnl_link_stats64 *stats) stats->rx_length_errors = vsi_stats->rx_length_errors; } +#ifdef CONFIG_NET_POLL_CONTROLLER +/** + * ice_netpoll - polling "interrupt" handler + * @netdev: network interface device structure + * + * Used by netconsole to send skbs without having to re-enable interrupts. + * This is not called in the normal interrupt path. + */ +static void ice_netpoll(struct net_device *netdev) +{ + struct ice_netdev_priv *np = netdev_priv(netdev); + struct ice_vsi *vsi = np->vsi; + struct ice_pf *pf = vsi->back; + int i; + + if (test_bit(__ICE_DOWN, vsi->state) || + !test_bit(ICE_FLAG_MSIX_ENA, pf->flags)) + return; + + for (i = 0; i < vsi->num_q_vectors; i++) + ice_msix_clean_rings(0, vsi->q_vectors[i]); +} +#endif /* CONFIG_NET_POLL_CONTROLLER */ + /** * ice_napi_disable_all - Disable NAPI for all q_vectors in the VSI * @vsi: VSI having NAPI disabled @@ -4786,6 +5224,73 @@ static void ice_rebuild(struct ice_pf *pf) set_bit(__ICE_RESET_RECOVERY_PENDING, pf->state); } +/** + * ice_change_mtu - NDO callback to change the MTU + * @netdev: network interface device structure + * @new_mtu: new value for maximum frame size + * + * Returns 0 on success, negative on failure + */ +static int ice_change_mtu(struct net_device *netdev, int new_mtu) +{ + struct ice_netdev_priv *np = netdev_priv(netdev); + struct ice_vsi *vsi = np->vsi; + struct ice_pf *pf = vsi->back; + u8 count = 0; + + if (new_mtu == netdev->mtu) { + netdev_warn(netdev, "mtu is already %d\n", netdev->mtu); + return 0; + } + + if (new_mtu < netdev->min_mtu) { + netdev_err(netdev, "new mtu invalid. min_mtu is %d\n", + netdev->min_mtu); + return -EINVAL; + } else if (new_mtu > netdev->max_mtu) { + netdev_err(netdev, "new mtu invalid. max_mtu is %d\n", + netdev->min_mtu); + return -EINVAL; + } + /* if a reset is in progress, wait for some time for it to complete */ + do { + if (ice_is_reset_recovery_pending(pf->state)) { + count++; + usleep_range(1000, 2000); + } else { + break; + } + + } while (count < 100); + + if (count == 100) { + netdev_err(netdev, "can't change mtu. Device is busy\n"); + return -EBUSY; + } + + netdev->mtu = new_mtu; + + /* if VSI is up, bring it down and then back up */ + if (!test_and_set_bit(__ICE_DOWN, vsi->state)) { + int err; + + err = ice_down(vsi); + if (err) { + netdev_err(netdev, "change mtu if_up err %d\n", err); + return err; + } + + err = ice_up(vsi); + if (err) { + netdev_err(netdev, "change mtu if_up err %d\n", err); + return err; + } + } + + netdev_dbg(netdev, "changed mtu to %d\n", new_mtu); + return 0; +} + /** * ice_set_rss - Set RSS keys and lut * @vsi: Pointer to VSI structure @@ -4919,12 +5424,72 @@ static int ice_stop(struct net_device *netdev) return 0; } +/** + * ice_features_check - Validate encapsulated packet conforms to limits + * @skb: skb buffer + * @netdev: This port's netdev + * @features: Offload features that the stack believes apply + */ +static netdev_features_t +ice_features_check(struct sk_buff *skb, + struct net_device __always_unused *netdev, + netdev_features_t features) +{ + size_t len; + + /* No point in doing any of this if neither checksum nor GSO are + * being requested for this frame. We can rule out both by just + * checking for CHECKSUM_PARTIAL + */ + if (skb->ip_summed != CHECKSUM_PARTIAL) + return features; + + /* We cannot support GSO if the MSS is going to be less than + * 64 bytes. If it is then we need to drop support for GSO. + */ + if (skb_is_gso(skb) && (skb_shinfo(skb)->gso_size < 64)) + features &= ~NETIF_F_GSO_MASK; + + len = skb_network_header(skb) - skb->data; + if (len & ~(ICE_TXD_MACLEN_MAX)) + goto out_rm_features; + + len = skb_transport_header(skb) - skb_network_header(skb); + if (len & ~(ICE_TXD_IPLEN_MAX)) + goto out_rm_features; + + if (skb->encapsulation) { + len = skb_inner_network_header(skb) - skb_transport_header(skb); + if (len & ~(ICE_TXD_L4LEN_MAX)) + goto out_rm_features; + + len = skb_inner_transport_header(skb) - + skb_inner_network_header(skb); + if (len & ~(ICE_TXD_IPLEN_MAX)) + goto out_rm_features; + } + + return features; +out_rm_features: + return features & ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK); +} + static const struct net_device_ops ice_netdev_ops = { .ndo_open = ice_open, .ndo_stop = ice_stop, .ndo_start_xmit = ice_start_xmit, + .ndo_features_check = ice_features_check, + .ndo_set_rx_mode = ice_set_rx_mode, + .ndo_set_mac_address = ice_set_mac_address, + .ndo_validate_addr = eth_validate_addr, + .ndo_change_mtu = ice_change_mtu, .ndo_get_stats64 = ice_get_stats64, +#ifdef CONFIG_NET_POLL_CONTROLLER + .ndo_poll_controller = ice_netpoll, +#endif /* CONFIG_NET_POLL_CONTROLLER */ .ndo_vlan_rx_add_vid = ice_vlan_rx_add_vid, .ndo_vlan_rx_kill_vid = ice_vlan_rx_kill_vid, .ndo_set_features = ice_set_features, + .ndo_fdb_add = ice_fdb_add, + .ndo_fdb_del = ice_fdb_del, }; diff --git a/drivers/net/ethernet/intel/ice/ice_switch.c b/drivers/net/ethernet/intel/ice/ice_switch.c index 5624df9eb6fc..723d15f1e90b 100644 --- a/drivers/net/ethernet/intel/ice/ice_switch.c +++ b/drivers/net/ethernet/intel/ice/ice_switch.c @@ -1626,6 +1626,83 @@ ice_remove_mac(struct ice_hw *hw, struct list_head *m_list) return status; } +/** + * ice_cfg_dflt_vsi - add filter rule to set/unset given VSI as default + * VSI for the switch (represented by swid) + * @hw: pointer to the hardware structure + * @vsi_id: number of VSI to set as default + * @set: true to add the above mentioned switch rule, false to remove it + * @direction: ICE_FLTR_RX or ICE_FLTR_TX + */ +enum ice_status +ice_cfg_dflt_vsi(struct ice_hw *hw, u16 vsi_id, bool set, u8 direction) +{ + struct ice_aqc_sw_rules_elem *s_rule; + struct ice_fltr_info f_info; + enum ice_adminq_opc opcode; + enum ice_status status; + u16 s_rule_size; + + s_rule_size = set ? ICE_SW_RULE_RX_TX_ETH_HDR_SIZE : + ICE_SW_RULE_RX_TX_NO_HDR_SIZE; + s_rule = devm_kzalloc(ice_hw_to_dev(hw), s_rule_size, GFP_KERNEL); + if (!s_rule) + return ICE_ERR_NO_MEMORY; + + memset(&f_info, 0, sizeof(f_info)); + + f_info.lkup_type = ICE_SW_LKUP_DFLT; + f_info.flag = direction; + f_info.fltr_act = ICE_FWD_TO_VSI; + f_info.fwd_id.vsi_id = vsi_id; + + if (f_info.flag & ICE_FLTR_RX) { + f_info.src = hw->port_info->lport; + if (!set) + f_info.fltr_rule_id = + hw->port_info->dflt_rx_vsi_rule_id; + } else if (f_info.flag & ICE_FLTR_TX) { + f_info.src = vsi_id; + if (!set) + f_info.fltr_rule_id = + hw->port_info->dflt_tx_vsi_rule_id; + } + + if (set) + opcode = ice_aqc_opc_add_sw_rules; + else + opcode = ice_aqc_opc_remove_sw_rules; + + ice_fill_sw_rule(hw, &f_info, s_rule, opcode); + + status = ice_aq_sw_rules(hw, s_rule, s_rule_size, 1, opcode, NULL); + if (status || !(f_info.flag & ICE_FLTR_TX_RX)) + goto out; + if (set) { + u16 index = le16_to_cpu(s_rule->pdata.lkup_tx_rx.index); + + if (f_info.flag & ICE_FLTR_TX) { + hw->port_info->dflt_tx_vsi_num = vsi_id; + hw->port_info->dflt_tx_vsi_rule_id = index; + } else if (f_info.flag & ICE_FLTR_RX) { + hw->port_info->dflt_rx_vsi_num = vsi_id; + hw->port_info->dflt_rx_vsi_rule_id = index; + } + } else { + if (f_info.flag & ICE_FLTR_TX) { + hw->port_info->dflt_tx_vsi_num = ICE_DFLT_VSI_INVAL; + hw->port_info->dflt_tx_vsi_rule_id = ICE_INVAL_ACT; + } else if (f_info.flag & ICE_FLTR_RX) { + hw->port_info->dflt_rx_vsi_num = ICE_DFLT_VSI_INVAL; + hw->port_info->dflt_rx_vsi_rule_id = ICE_INVAL_ACT; + } + } + +out: + devm_kfree(ice_hw_to_dev(hw), s_rule); + return status; +} + /** * ice_remove_vlan_internal - Remove one VLAN based filter rule * @hw: pointer to the hardware structure diff --git a/drivers/net/ethernet/intel/ice/ice_switch.h b/drivers/net/ethernet/intel/ice/ice_switch.h index 80b4e1951b32..6f4a0d159dbf 100644 --- a/drivers/net/ethernet/intel/ice/ice_switch.h +++ b/drivers/net/ethernet/intel/ice/ice_switch.h @@ -155,5 +155,7 @@ enum ice_status ice_remove_mac(struct ice_hw *hw, struct list_head *m_lst); void ice_remove_vsi_fltr(struct ice_hw *hw, u16 vsi_id); enum ice_status ice_add_vlan(struct ice_hw *hw, struct list_head *m_list); enum ice_status ice_remove_vlan(struct ice_hw *hw, struct list_head *v_list); +enum ice_status +ice_cfg_dflt_vsi(struct ice_hw *hw, u16 vsi_id, bool set, u8 direction); #endif /* _ICE_SWITCH_H_ */ diff --git a/drivers/net/ethernet/intel/ice/ice_type.h b/drivers/net/ethernet/intel/ice/ice_type.h index c45cdee4e03c..99c8a9a71b5e 100644 --- a/drivers/net/ethernet/intel/ice/ice_type.h +++ b/drivers/net/ethernet/intel/ice/ice_type.h @@ -10,6 +10,9 @@ #include "ice_controlq.h" #include "ice_lan_tx_rx.h" +#define ICE_BYTES_PER_WORD 2 +#define ICE_BYTES_PER_DWORD 4 + static inline bool ice_is_tc_ena(u8 bitmap, u8 tc) { return test_bit(tc, (unsigned long *)&bitmap); @@ -227,7 +230,9 @@ struct ice_port_info { u8 port_state; #define ICE_SCHED_PORT_STATE_INIT 0x0 #define ICE_SCHED_PORT_STATE_READY 0x1 + u16 dflt_tx_vsi_rule_id; u16 dflt_tx_vsi_num; + u16 dflt_rx_vsi_rule_id; u16 dflt_rx_vsi_num; struct ice_fc_info fc; struct ice_mac_info mac;