diff --git a/drivers/net/ethernet/hisilicon/hns3/hnae3.h b/drivers/net/ethernet/hisilicon/hns3/hnae3.h index c3bd2a10bc7d..e82e4ca20620 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hnae3.h +++ b/drivers/net/ethernet/hisilicon/hns3/hnae3.h @@ -402,7 +402,7 @@ struct hnae3_ae_ops { int (*set_vf_vlan_filter)(struct hnae3_handle *handle, int vfid, u16 vlan, u8 qos, __be16 proto); int (*enable_hw_strip_rxvtag)(struct hnae3_handle *handle, bool enable); - void (*reset_event)(struct hnae3_handle *handle); + void (*reset_event)(struct pci_dev *pdev, struct hnae3_handle *handle); void (*get_channels)(struct hnae3_handle *handle, struct ethtool_channels *ch); void (*get_tqps_and_rss_info)(struct hnae3_handle *h, @@ -429,6 +429,7 @@ struct hnae3_ae_ops { struct ethtool_rxnfc *cmd, u32 *rule_locs); int (*restore_fd_rules)(struct hnae3_handle *handle); void (*enable_fd)(struct hnae3_handle *handle, bool enable); + pci_ers_result_t (*process_hw_error)(struct hnae3_ae_dev *ae_dev); }; struct hnae3_dcb_ops { diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c index 76ce2f21178b..32f3aca814e7 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include @@ -1613,7 +1614,7 @@ static void hns3_nic_net_timeout(struct net_device *ndev) /* request the reset */ if (h->ae_algo->ops->reset_event) - h->ae_algo->ops->reset_event(h); + h->ae_algo->ops->reset_event(h->pdev, h); } static const struct net_device_ops hns3_nic_netdev_ops = { @@ -1771,6 +1772,52 @@ static void hns3_shutdown(struct pci_dev *pdev) pci_set_power_state(pdev, PCI_D3hot); } +static pci_ers_result_t hns3_error_detected(struct pci_dev *pdev, + pci_channel_state_t state) +{ + struct hnae3_ae_dev *ae_dev = pci_get_drvdata(pdev); + pci_ers_result_t ret; + + dev_info(&pdev->dev, "PCI error detected, state(=%d)!!\n", state); + + if (state == pci_channel_io_perm_failure) + return PCI_ERS_RESULT_DISCONNECT; + + if (!ae_dev) { + dev_err(&pdev->dev, + "Can't recover - error happened during device init\n"); + return PCI_ERS_RESULT_NONE; + } + + if (ae_dev->ops->process_hw_error) + ret = ae_dev->ops->process_hw_error(ae_dev); + else + return PCI_ERS_RESULT_NONE; + + return ret; +} + +static pci_ers_result_t hns3_slot_reset(struct pci_dev *pdev) +{ + struct hnae3_ae_dev *ae_dev = pci_get_drvdata(pdev); + struct device *dev = &pdev->dev; + + dev_info(dev, "requesting reset due to PCI error\n"); + + /* request the reset */ + if (ae_dev->ops->reset_event) { + ae_dev->ops->reset_event(pdev, NULL); + return PCI_ERS_RESULT_RECOVERED; + } + + return PCI_ERS_RESULT_DISCONNECT; +} + +static const struct pci_error_handlers hns3_err_handler = { + .error_detected = hns3_error_detected, + .slot_reset = hns3_slot_reset, +}; + static struct pci_driver hns3_driver = { .name = hns3_driver_name, .id_table = hns3_pci_tbl, @@ -1778,6 +1825,7 @@ static struct pci_driver hns3_driver = { .remove = hns3_remove, .shutdown = hns3_shutdown, .sriov_configure = hns3_pci_sriov_configure, + .err_handler = &hns3_err_handler, }; /* set default feature to hns3 */ diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/Makefile b/drivers/net/ethernet/hisilicon/hns3/hns3pf/Makefile index cb8ddd043476..580e81743681 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/Makefile +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/Makefile @@ -6,6 +6,6 @@ ccflags-y := -Idrivers/net/ethernet/hisilicon/hns3 obj-$(CONFIG_HNS3_HCLGE) += hclge.o -hclge-objs = hclge_main.o hclge_cmd.o hclge_mdio.o hclge_tm.o hclge_mbx.o +hclge-objs = hclge_main.o hclge_cmd.o hclge_mdio.o hclge_tm.o hclge_mbx.o hclge_err.o hclge-$(CONFIG_HNS3_DCB) += hclge_dcb.o diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h index 1ccde67db770..872cd4bdd70d 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h @@ -209,6 +209,28 @@ enum hclge_opcode_type { /* Led command */ HCLGE_OPC_LED_STATUS_CFG = 0xB000, + + /* Error INT commands */ + HCLGE_TM_SCH_ECC_INT_EN = 0x0829, + HCLGE_TM_SCH_ECC_ERR_RINT_CMD = 0x082d, + HCLGE_TM_SCH_ECC_ERR_RINT_CE = 0x082f, + HCLGE_TM_SCH_ECC_ERR_RINT_NFE = 0x0830, + HCLGE_TM_SCH_ECC_ERR_RINT_FE = 0x0831, + HCLGE_TM_SCH_MBIT_ECC_INFO_CMD = 0x0833, + HCLGE_COMMON_ECC_INT_CFG = 0x1505, + HCLGE_IGU_EGU_TNL_INT_QUERY = 0x1802, + HCLGE_IGU_EGU_TNL_INT_EN = 0x1803, + HCLGE_IGU_EGU_TNL_INT_CLR = 0x1804, + HCLGE_IGU_COMMON_INT_QUERY = 0x1805, + HCLGE_IGU_COMMON_INT_EN = 0x1806, + HCLGE_IGU_COMMON_INT_CLR = 0x1807, + HCLGE_TM_QCN_MEM_INT_CFG = 0x1A14, + HCLGE_TM_QCN_MEM_INT_INFO_CMD = 0x1A17, + HCLGE_PPP_CMD0_INT_CMD = 0x2100, + HCLGE_PPP_CMD1_INT_CMD = 0x2101, + HCLGE_NCSI_INT_QUERY = 0x2400, + HCLGE_NCSI_INT_EN = 0x2401, + HCLGE_NCSI_INT_CLR = 0x2402, }; #define HCLGE_TQP_REG_OFFSET 0x80000 diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c new file mode 100644 index 000000000000..f7e363b90fe0 --- /dev/null +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c @@ -0,0 +1,1088 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* Copyright (c) 2016-2017 Hisilicon Limited. */ + +#include "hclge_err.h" + +static const struct hclge_hw_error hclge_imp_tcm_ecc_int[] = { + { .int_msk = BIT(0), .msg = "imp_itcm0_ecc_1bit_err" }, + { .int_msk = BIT(1), .msg = "imp_itcm0_ecc_mbit_err" }, + { .int_msk = BIT(2), .msg = "imp_itcm1_ecc_1bit_err" }, + { .int_msk = BIT(3), .msg = "imp_itcm1_ecc_mbit_err" }, + { .int_msk = BIT(4), .msg = "imp_itcm2_ecc_1bit_err" }, + { .int_msk = BIT(5), .msg = "imp_itcm2_ecc_mbit_err" }, + { .int_msk = BIT(6), .msg = "imp_itcm3_ecc_1bit_err" }, + { .int_msk = BIT(7), .msg = "imp_itcm3_ecc_mbit_err" }, + { .int_msk = BIT(8), .msg = "imp_dtcm0_mem0_ecc_1bit_err" }, + { .int_msk = BIT(9), .msg = "imp_dtcm0_mem0_ecc_mbit_err" }, + { .int_msk = BIT(10), .msg = "imp_dtcm0_mem1_ecc_1bit_err" }, + { .int_msk = BIT(11), .msg = "imp_dtcm0_mem1_ecc_mbit_err" }, + { .int_msk = BIT(12), .msg = "imp_dtcm1_mem0_ecc_1bit_err" }, + { .int_msk = BIT(13), .msg = "imp_dtcm1_mem0_ecc_mbit_err" }, + { .int_msk = BIT(14), .msg = "imp_dtcm1_mem1_ecc_1bit_err" }, + { .int_msk = BIT(15), .msg = "imp_dtcm1_mem1_ecc_mbit_err" }, + { /* sentinel */ } +}; + +static const struct hclge_hw_error hclge_imp_itcm4_ecc_int[] = { + { .int_msk = BIT(0), .msg = "imp_itcm4_ecc_1bit_err" }, + { .int_msk = BIT(1), .msg = "imp_itcm4_ecc_mbit_err" }, + { /* sentinel */ } +}; + +static const struct hclge_hw_error hclge_cmdq_nic_mem_ecc_int[] = { + { .int_msk = BIT(0), .msg = "cmdq_nic_rx_depth_ecc_1bit_err" }, + { .int_msk = BIT(1), .msg = "cmdq_nic_rx_depth_ecc_mbit_err" }, + { .int_msk = BIT(2), .msg = "cmdq_nic_tx_depth_ecc_1bit_err" }, + { .int_msk = BIT(3), .msg = "cmdq_nic_tx_depth_ecc_mbit_err" }, + { .int_msk = BIT(4), .msg = "cmdq_nic_rx_tail_ecc_1bit_err" }, + { .int_msk = BIT(5), .msg = "cmdq_nic_rx_tail_ecc_mbit_err" }, + { .int_msk = BIT(6), .msg = "cmdq_nic_tx_tail_ecc_1bit_err" }, + { .int_msk = BIT(7), .msg = "cmdq_nic_tx_tail_ecc_mbit_err" }, + { .int_msk = BIT(8), .msg = "cmdq_nic_rx_head_ecc_1bit_err" }, + { .int_msk = BIT(9), .msg = "cmdq_nic_rx_head_ecc_mbit_err" }, + { .int_msk = BIT(10), .msg = "cmdq_nic_tx_head_ecc_1bit_err" }, + { .int_msk = BIT(11), .msg = "cmdq_nic_tx_head_ecc_mbit_err" }, + { .int_msk = BIT(12), .msg = "cmdq_nic_rx_addr_ecc_1bit_err" }, + { .int_msk = BIT(13), .msg = "cmdq_nic_rx_addr_ecc_mbit_err" }, + { .int_msk = BIT(14), .msg = "cmdq_nic_tx_addr_ecc_1bit_err" }, + { .int_msk = BIT(15), .msg = "cmdq_nic_tx_addr_ecc_mbit_err" }, + { /* sentinel */ } +}; + +static const struct hclge_hw_error hclge_cmdq_rocee_mem_ecc_int[] = { + { .int_msk = BIT(0), .msg = "cmdq_rocee_rx_depth_ecc_1bit_err" }, + { .int_msk = BIT(1), .msg = "cmdq_rocee_rx_depth_ecc_mbit_err" }, + { .int_msk = BIT(2), .msg = "cmdq_rocee_tx_depth_ecc_1bit_err" }, + { .int_msk = BIT(3), .msg = "cmdq_rocee_tx_depth_ecc_mbit_err" }, + { .int_msk = BIT(4), .msg = "cmdq_rocee_rx_tail_ecc_1bit_err" }, + { .int_msk = BIT(5), .msg = "cmdq_rocee_rx_tail_ecc_mbit_err" }, + { .int_msk = BIT(6), .msg = "cmdq_rocee_tx_tail_ecc_1bit_err" }, + { .int_msk = BIT(7), .msg = "cmdq_rocee_tx_tail_ecc_mbit_err" }, + { .int_msk = BIT(8), .msg = "cmdq_rocee_rx_head_ecc_1bit_err" }, + { .int_msk = BIT(9), .msg = "cmdq_rocee_rx_head_ecc_mbit_err" }, + { .int_msk = BIT(10), .msg = "cmdq_rocee_tx_head_ecc_1bit_err" }, + { .int_msk = BIT(11), .msg = "cmdq_rocee_tx_head_ecc_mbit_err" }, + { .int_msk = BIT(12), .msg = "cmdq_rocee_rx_addr_ecc_1bit_err" }, + { .int_msk = BIT(13), .msg = "cmdq_rocee_rx_addr_ecc_mbit_err" }, + { .int_msk = BIT(14), .msg = "cmdq_rocee_tx_addr_ecc_1bit_err" }, + { .int_msk = BIT(15), .msg = "cmdq_rocee_tx_addr_ecc_mbit_err" }, + { /* sentinel */ } +}; + +static const struct hclge_hw_error hclge_tqp_int_ecc_int[] = { + { .int_msk = BIT(0), .msg = "tqp_int_cfg_even_ecc_1bit_err" }, + { .int_msk = BIT(1), .msg = "tqp_int_cfg_odd_ecc_1bit_err" }, + { .int_msk = BIT(2), .msg = "tqp_int_ctrl_even_ecc_1bit_err" }, + { .int_msk = BIT(3), .msg = "tqp_int_ctrl_odd_ecc_1bit_err" }, + { .int_msk = BIT(4), .msg = "tx_que_scan_int_ecc_1bit_err" }, + { .int_msk = BIT(5), .msg = "rx_que_scan_int_ecc_1bit_err" }, + { .int_msk = BIT(6), .msg = "tqp_int_cfg_even_ecc_mbit_err" }, + { .int_msk = BIT(7), .msg = "tqp_int_cfg_odd_ecc_mbit_err" }, + { .int_msk = BIT(8), .msg = "tqp_int_ctrl_even_ecc_mbit_err" }, + { .int_msk = BIT(9), .msg = "tqp_int_ctrl_odd_ecc_mbit_err" }, + { .int_msk = BIT(10), .msg = "tx_que_scan_int_ecc_mbit_err" }, + { .int_msk = BIT(11), .msg = "rx_que_scan_int_ecc_mbit_err" }, + { /* sentinel */ } +}; + +static const struct hclge_hw_error hclge_igu_com_err_int[] = { + { .int_msk = BIT(0), .msg = "igu_rx_buf0_ecc_mbit_err" }, + { .int_msk = BIT(1), .msg = "igu_rx_buf0_ecc_1bit_err" }, + { .int_msk = BIT(2), .msg = "igu_rx_buf1_ecc_mbit_err" }, + { .int_msk = BIT(3), .msg = "igu_rx_buf1_ecc_1bit_err" }, + { /* sentinel */ } +}; + +static const struct hclge_hw_error hclge_igu_egu_tnl_err_int[] = { + { .int_msk = BIT(0), .msg = "rx_buf_overflow" }, + { .int_msk = BIT(1), .msg = "rx_stp_fifo_overflow" }, + { .int_msk = BIT(2), .msg = "rx_stp_fifo_undeflow" }, + { .int_msk = BIT(3), .msg = "tx_buf_overflow" }, + { .int_msk = BIT(4), .msg = "tx_buf_underrun" }, + { .int_msk = BIT(5), .msg = "rx_stp_buf_overflow" }, + { /* sentinel */ } +}; + +static const struct hclge_hw_error hclge_ncsi_err_int[] = { + { .int_msk = BIT(0), .msg = "ncsi_tx_ecc_1bit_err" }, + { .int_msk = BIT(1), .msg = "ncsi_tx_ecc_mbit_err" }, + { /* sentinel */ } +}; + +static const struct hclge_hw_error hclge_ppp_mpf_int0[] = { + { .int_msk = BIT(0), .msg = "vf_vlan_ad_mem_ecc_1bit_err" }, + { .int_msk = BIT(1), .msg = "umv_mcast_group_mem_ecc_1bit_err" }, + { .int_msk = BIT(2), .msg = "umv_key_mem0_ecc_1bit_err" }, + { .int_msk = BIT(3), .msg = "umv_key_mem1_ecc_1bit_err" }, + { .int_msk = BIT(4), .msg = "umv_key_mem2_ecc_1bit_err" }, + { .int_msk = BIT(5), .msg = "umv_key_mem3_ecc_1bit_err" }, + { .int_msk = BIT(6), .msg = "umv_ad_mem_ecc_1bit_err" }, + { .int_msk = BIT(7), .msg = "rss_tc_mode_mem_ecc_1bit_err" }, + { .int_msk = BIT(8), .msg = "rss_idt_mem0_ecc_1bit_err" }, + { .int_msk = BIT(9), .msg = "rss_idt_mem1_ecc_1bit_err" }, + { .int_msk = BIT(10), .msg = "rss_idt_mem2_ecc_1bit_err" }, + { .int_msk = BIT(11), .msg = "rss_idt_mem3_ecc_1bit_err" }, + { .int_msk = BIT(12), .msg = "rss_idt_mem4_ecc_1bit_err" }, + { .int_msk = BIT(13), .msg = "rss_idt_mem5_ecc_1bit_err" }, + { .int_msk = BIT(14), .msg = "rss_idt_mem6_ecc_1bit_err" }, + { .int_msk = BIT(15), .msg = "rss_idt_mem7_ecc_1bit_err" }, + { .int_msk = BIT(16), .msg = "rss_idt_mem8_ecc_1bit_err" }, + { .int_msk = BIT(17), .msg = "rss_idt_mem9_ecc_1bit_err" }, + { .int_msk = BIT(18), .msg = "rss_idt_mem10_ecc_1bit_err" }, + { .int_msk = BIT(19), .msg = "rss_idt_mem11_ecc_1bit_err" }, + { .int_msk = BIT(20), .msg = "rss_idt_mem12_ecc_1bit_err" }, + { .int_msk = BIT(21), .msg = "rss_idt_mem13_ecc_1bit_err" }, + { .int_msk = BIT(22), .msg = "rss_idt_mem14_ecc_1bit_err" }, + { .int_msk = BIT(23), .msg = "rss_idt_mem15_ecc_1bit_err" }, + { .int_msk = BIT(24), .msg = "port_vlan_mem_ecc_1bit_err" }, + { .int_msk = BIT(25), .msg = "mcast_linear_table_mem_ecc_1bit_err" }, + { .int_msk = BIT(26), .msg = "mcast_result_mem_ecc_1bit_err" }, + { .int_msk = BIT(27), + .msg = "flow_director_ad_mem0_ecc_1bit_err" }, + { .int_msk = BIT(28), + .msg = "flow_director_ad_mem1_ecc_1bit_err" }, + { .int_msk = BIT(29), + .msg = "rx_vlan_tag_memory_ecc_1bit_err" }, + { .int_msk = BIT(30), + .msg = "Tx_UP_mapping_config_mem_ecc_1bit_err" }, + { /* sentinel */ } +}; + +static const struct hclge_hw_error hclge_ppp_mpf_int1[] = { + { .int_msk = BIT(0), .msg = "vf_vlan_ad_mem_ecc_mbit_err" }, + { .int_msk = BIT(1), .msg = "umv_mcast_group_mem_ecc_mbit_err" }, + { .int_msk = BIT(2), .msg = "umv_key_mem0_ecc_mbit_err" }, + { .int_msk = BIT(3), .msg = "umv_key_mem1_ecc_mbit_err" }, + { .int_msk = BIT(4), .msg = "umv_key_mem2_ecc_mbit_err" }, + { .int_msk = BIT(5), .msg = "umv_key_mem3_ecc_mbit_err" }, + { .int_msk = BIT(6), .msg = "umv_ad_mem_ecc_mbit_erre" }, + { .int_msk = BIT(7), .msg = "rss_tc_mode_mem_ecc_mbit_err" }, + { .int_msk = BIT(8), .msg = "rss_idt_mem0_ecc_mbit_err" }, + { .int_msk = BIT(9), .msg = "rss_idt_mem1_ecc_mbit_err" }, + { .int_msk = BIT(10), .msg = "rss_idt_mem2_ecc_mbit_err" }, + { .int_msk = BIT(11), .msg = "rss_idt_mem3_ecc_mbit_err" }, + { .int_msk = BIT(12), .msg = "rss_idt_mem4_ecc_mbit_err" }, + { .int_msk = BIT(13), .msg = "rss_idt_mem5_ecc_mbit_err" }, + { .int_msk = BIT(14), .msg = "rss_idt_mem6_ecc_mbit_err" }, + { .int_msk = BIT(15), .msg = "rss_idt_mem7_ecc_mbit_err" }, + { .int_msk = BIT(16), .msg = "rss_idt_mem8_ecc_mbit_err" }, + { .int_msk = BIT(17), .msg = "rss_idt_mem9_ecc_mbit_err" }, + { .int_msk = BIT(18), .msg = "rss_idt_mem10_ecc_m1bit_err" }, + { .int_msk = BIT(19), .msg = "rss_idt_mem11_ecc_mbit_err" }, + { .int_msk = BIT(20), .msg = "rss_idt_mem12_ecc_mbit_err" }, + { .int_msk = BIT(21), .msg = "rss_idt_mem13_ecc_mbit_err" }, + { .int_msk = BIT(22), .msg = "rss_idt_mem14_ecc_mbit_err" }, + { .int_msk = BIT(23), .msg = "rss_idt_mem15_ecc_mbit_err" }, + { .int_msk = BIT(24), .msg = "port_vlan_mem_ecc_mbit_err" }, + { .int_msk = BIT(25), .msg = "mcast_linear_table_mem_ecc_mbit_err" }, + { .int_msk = BIT(26), .msg = "mcast_result_mem_ecc_mbit_err" }, + { .int_msk = BIT(27), + .msg = "flow_director_ad_mem0_ecc_mbit_err" }, + { .int_msk = BIT(28), + .msg = "flow_director_ad_mem1_ecc_mbit_err" }, + { .int_msk = BIT(29), + .msg = "rx_vlan_tag_memory_ecc_mbit_err" }, + { .int_msk = BIT(30), + .msg = "Tx_UP_mapping_config_mem_ecc_mbit_err" }, + { /* sentinel */ } +}; + +static const struct hclge_hw_error hclge_ppp_pf_int[] = { + { .int_msk = BIT(0), .msg = "Tx_vlan_tag_err" }, + { .int_msk = BIT(1), .msg = "rss_list_tc_unassigned_queue_err" }, + { /* sentinel */ } +}; + +static const struct hclge_hw_error hclge_ppp_mpf_int2[] = { + { .int_msk = BIT(0), .msg = "hfs_fifo_mem_ecc_1bit_err" }, + { .int_msk = BIT(1), .msg = "rslt_descr_fifo_mem_ecc_1bit_err" }, + { .int_msk = BIT(2), .msg = "tx_vlan_tag_mem_ecc_1bit_err" }, + { .int_msk = BIT(3), .msg = "FD_CN0_memory_ecc_1bit_err" }, + { .int_msk = BIT(4), .msg = "FD_CN1_memory_ecc_1bit_err" }, + { .int_msk = BIT(5), .msg = "GRO_AD_memory_ecc_1bit_err" }, + { /* sentinel */ } +}; + +static const struct hclge_hw_error hclge_ppp_mpf_int3[] = { + { .int_msk = BIT(0), .msg = "hfs_fifo_mem_ecc_mbit_err" }, + { .int_msk = BIT(1), .msg = "rslt_descr_fifo_mem_ecc_mbit_err" }, + { .int_msk = BIT(2), .msg = "tx_vlan_tag_mem_ecc_mbit_err" }, + { .int_msk = BIT(3), .msg = "FD_CN0_memory_ecc_mbit_err" }, + { .int_msk = BIT(4), .msg = "FD_CN1_memory_ecc_mbit_err" }, + { .int_msk = BIT(5), .msg = "GRO_AD_memory_ecc_mbit_err" }, + { /* sentinel */ } +}; + +struct hclge_tm_sch_ecc_info { + const char *name; +}; + +static const struct hclge_tm_sch_ecc_info hclge_tm_sch_ecc_err[7][15] = { + { + { .name = "QSET_QUEUE_CTRL:PRI_LEN TAB" }, + { .name = "QSET_QUEUE_CTRL:SPA_LEN TAB" }, + { .name = "QSET_QUEUE_CTRL:SPB_LEN TAB" }, + { .name = "QSET_QUEUE_CTRL:WRRA_LEN TAB" }, + { .name = "QSET_QUEUE_CTRL:WRRB_LEN TAB" }, + { .name = "QSET_QUEUE_CTRL:SPA_HPTR TAB" }, + { .name = "QSET_QUEUE_CTRL:SPB_HPTR TAB" }, + { .name = "QSET_QUEUE_CTRL:WRRA_HPTR TAB" }, + { .name = "QSET_QUEUE_CTRL:WRRB_HPTR TAB" }, + { .name = "QSET_QUEUE_CTRL:QS_LINKLIST TAB" }, + { .name = "QSET_QUEUE_CTRL:SPA_TPTR TAB" }, + { .name = "QSET_QUEUE_CTRL:SPB_TPTR TAB" }, + { .name = "QSET_QUEUE_CTRL:WRRA_TPTR TAB" }, + { .name = "QSET_QUEUE_CTRL:WRRB_TPTR TAB" }, + { .name = "QSET_QUEUE_CTRL:QS_DEFICITCNT TAB" }, + }, + { + { .name = "ROCE_QUEUE_CTRL:QS_LEN TAB" }, + { .name = "ROCE_QUEUE_CTRL:QS_TPTR TAB" }, + { .name = "ROCE_QUEUE_CTRL:QS_HPTR TAB" }, + { .name = "ROCE_QUEUE_CTRL:QLINKLIST TAB" }, + { .name = "ROCE_QUEUE_CTRL:QCLEN TAB" }, + }, + { + { .name = "NIC_QUEUE_CTRL:QS_LEN TAB" }, + { .name = "NIC_QUEUE_CTRL:QS_TPTR TAB" }, + { .name = "NIC_QUEUE_CTRL:QS_HPTR TAB" }, + { .name = "NIC_QUEUE_CTRL:QLINKLIST TAB" }, + { .name = "NIC_QUEUE_CTRL:QCLEN TAB" }, + }, + { + { .name = "RAM_CFG_CTRL:CSHAP TAB" }, + { .name = "RAM_CFG_CTRL:PSHAP TAB" }, + }, + { + { .name = "SHAPER_CTRL:PSHAP TAB" }, + }, + { + { .name = "MSCH_CTRL" }, + }, + { + { .name = "TOP_CTRL" }, + }, +}; + +static const struct hclge_hw_error hclge_tm_sch_err_int[] = { + { .int_msk = BIT(0), .msg = "tm_sch_ecc_1bit_err" }, + { .int_msk = BIT(1), .msg = "tm_sch_ecc_mbit_err" }, + { .int_msk = BIT(2), .msg = "tm_sch_port_shap_sub_fifo_wr_full_err" }, + { .int_msk = BIT(3), .msg = "tm_sch_port_shap_sub_fifo_rd_empty_err" }, + { .int_msk = BIT(4), .msg = "tm_sch_pg_pshap_sub_fifo_wr_full_err" }, + { .int_msk = BIT(5), .msg = "tm_sch_pg_pshap_sub_fifo_rd_empty_err" }, + { .int_msk = BIT(6), .msg = "tm_sch_pg_cshap_sub_fifo_wr_full_err" }, + { .int_msk = BIT(7), .msg = "tm_sch_pg_cshap_sub_fifo_rd_empty_err" }, + { .int_msk = BIT(8), .msg = "tm_sch_pri_pshap_sub_fifo_wr_full_err" }, + { .int_msk = BIT(9), .msg = "tm_sch_pri_pshap_sub_fifo_rd_empty_err" }, + { .int_msk = BIT(10), .msg = "tm_sch_pri_cshap_sub_fifo_wr_full_err" }, + { .int_msk = BIT(11), .msg = "tm_sch_pri_cshap_sub_fifo_rd_empty_err" }, + { .int_msk = BIT(12), + .msg = "tm_sch_port_shap_offset_fifo_wr_full_err" }, + { .int_msk = BIT(13), + .msg = "tm_sch_port_shap_offset_fifo_rd_empty_err" }, + { .int_msk = BIT(14), + .msg = "tm_sch_pg_pshap_offset_fifo_wr_full_err" }, + { .int_msk = BIT(15), + .msg = "tm_sch_pg_pshap_offset_fifo_rd_empty_err" }, + { .int_msk = BIT(16), + .msg = "tm_sch_pg_cshap_offset_fifo_wr_full_err" }, + { .int_msk = BIT(17), + .msg = "tm_sch_pg_cshap_offset_fifo_rd_empty_err" }, + { .int_msk = BIT(18), + .msg = "tm_sch_pri_pshap_offset_fifo_wr_full_err" }, + { .int_msk = BIT(19), + .msg = "tm_sch_pri_pshap_offset_fifo_rd_empty_err" }, + { .int_msk = BIT(20), + .msg = "tm_sch_pri_cshap_offset_fifo_wr_full_err" }, + { .int_msk = BIT(21), + .msg = "tm_sch_pri_cshap_offset_fifo_rd_empty_err" }, + { .int_msk = BIT(22), .msg = "tm_sch_rq_fifo_wr_full_err" }, + { .int_msk = BIT(23), .msg = "tm_sch_rq_fifo_rd_empty_err" }, + { .int_msk = BIT(24), .msg = "tm_sch_nq_fifo_wr_full_err" }, + { .int_msk = BIT(25), .msg = "tm_sch_nq_fifo_rd_empty_err" }, + { .int_msk = BIT(26), .msg = "tm_sch_roce_up_fifo_wr_full_err" }, + { .int_msk = BIT(27), .msg = "tm_sch_roce_up_fifo_rd_empty_err" }, + { .int_msk = BIT(28), .msg = "tm_sch_rcb_byte_fifo_wr_full_err" }, + { .int_msk = BIT(29), .msg = "tm_sch_rcb_byte_fifo_rd_empty_err" }, + { .int_msk = BIT(30), .msg = "tm_sch_ssu_byte_fifo_wr_full_err" }, + { .int_msk = BIT(31), .msg = "tm_sch_ssu_byte_fifo_rd_empty_err" }, + { /* sentinel */ } +}; + +static const struct hclge_hw_error hclge_qcn_ecc_err_int[] = { + { .int_msk = BIT(0), .msg = "qcn_byte_mem_ecc_1bit_err" }, + { .int_msk = BIT(1), .msg = "qcn_byte_mem_ecc_mbit_err" }, + { .int_msk = BIT(2), .msg = "qcn_time_mem_ecc_1bit_err" }, + { .int_msk = BIT(3), .msg = "qcn_time_mem_ecc_mbit_err" }, + { .int_msk = BIT(4), .msg = "qcn_fb_mem_ecc_1bit_err" }, + { .int_msk = BIT(5), .msg = "qcn_fb_mem_ecc_mbit_err" }, + { .int_msk = BIT(6), .msg = "qcn_link_mem_ecc_1bit_err" }, + { .int_msk = BIT(7), .msg = "qcn_link_mem_ecc_mbit_err" }, + { .int_msk = BIT(8), .msg = "qcn_rate_mem_ecc_1bit_err" }, + { .int_msk = BIT(9), .msg = "qcn_rate_mem_ecc_mbit_err" }, + { .int_msk = BIT(10), .msg = "qcn_tmplt_mem_ecc_1bit_err" }, + { .int_msk = BIT(11), .msg = "qcn_tmplt_mem_ecc_mbit_err" }, + { .int_msk = BIT(12), .msg = "qcn_shap_cfg_mem_ecc_1bit_err" }, + { .int_msk = BIT(13), .msg = "qcn_shap_cfg_mem_ecc_mbit_err" }, + { .int_msk = BIT(14), .msg = "qcn_gp0_barrel_mem_ecc_1bit_err" }, + { .int_msk = BIT(15), .msg = "qcn_gp0_barrel_mem_ecc_mbit_err" }, + { .int_msk = BIT(16), .msg = "qcn_gp1_barrel_mem_ecc_1bit_err" }, + { .int_msk = BIT(17), .msg = "qcn_gp1_barrel_mem_ecc_mbit_err" }, + { .int_msk = BIT(18), .msg = "qcn_gp2_barrel_mem_ecc_1bit_err" }, + { .int_msk = BIT(19), .msg = "qcn_gp2_barrel_mem_ecc_mbit_err" }, + { .int_msk = BIT(20), .msg = "qcn_gp3_barral_mem_ecc_1bit_err" }, + { .int_msk = BIT(21), .msg = "qcn_gp3_barral_mem_ecc_mbit_err" }, + { /* sentinel */ } +}; + +static void hclge_log_error(struct device *dev, + const struct hclge_hw_error *err_list, + u32 err_sts) +{ + const struct hclge_hw_error *err; + int i = 0; + + while (err_list[i].msg) { + err = &err_list[i]; + if (!(err->int_msk & err_sts)) { + i++; + continue; + } + dev_warn(dev, "%s [error status=0x%x] found\n", + err->msg, err_sts); + i++; + } +} + +/* hclge_cmd_query_error: read the error information + * @hdev: pointer to struct hclge_dev + * @desc: descriptor for describing the command + * @cmd: command opcode + * @flag: flag for extended command structure + * @w_num: offset for setting the read interrupt type. + * @int_type: select which type of the interrupt for which the error + * info will be read(RAS-CE/RAS-NFE/RAS-FE etc). + * + * This function query the error info from hw register/s using command + */ +static int hclge_cmd_query_error(struct hclge_dev *hdev, + struct hclge_desc *desc, u32 cmd, + u16 flag, u8 w_num, + enum hclge_err_int_type int_type) +{ + struct device *dev = &hdev->pdev->dev; + int num = 1; + int ret; + + hclge_cmd_setup_basic_desc(&desc[0], cmd, true); + if (flag) { + desc[0].flag |= cpu_to_le16(flag); + hclge_cmd_setup_basic_desc(&desc[1], cmd, true); + num = 2; + } + if (w_num) + desc[0].data[w_num] = cpu_to_le32(int_type); + + ret = hclge_cmd_send(&hdev->hw, &desc[0], num); + if (ret) + dev_err(dev, "query error cmd failed (%d)\n", ret); + + return ret; +} + +/* hclge_cmd_clear_error: clear the error status + * @hdev: pointer to struct hclge_dev + * @desc: descriptor for describing the command + * @desc_src: prefilled descriptor from the previous command for reusing + * @cmd: command opcode + * @flag: flag for extended command structure + * + * This function clear the error status in the hw register/s using command + */ +static int hclge_cmd_clear_error(struct hclge_dev *hdev, + struct hclge_desc *desc, + struct hclge_desc *desc_src, + u32 cmd, u16 flag) +{ + struct device *dev = &hdev->pdev->dev; + int num = 1; + int ret, i; + + if (cmd) { + hclge_cmd_setup_basic_desc(&desc[0], cmd, false); + if (flag) { + desc[0].flag |= cpu_to_le16(flag); + hclge_cmd_setup_basic_desc(&desc[1], cmd, false); + num = 2; + } + if (desc_src) { + for (i = 0; i < 6; i++) { + desc[0].data[i] = desc_src[0].data[i]; + if (flag) + desc[1].data[i] = desc_src[1].data[i]; + } + } + } else { + hclge_cmd_reuse_desc(&desc[0], false); + if (flag) { + desc[0].flag |= cpu_to_le16(flag); + hclge_cmd_reuse_desc(&desc[1], false); + num = 2; + } + } + ret = hclge_cmd_send(&hdev->hw, &desc[0], num); + if (ret) + dev_err(dev, "clear error cmd failed (%d)\n", ret); + + return ret; +} + +static int hclge_enable_common_error(struct hclge_dev *hdev, bool en) +{ + struct device *dev = &hdev->pdev->dev; + struct hclge_desc desc[2]; + int ret; + + hclge_cmd_setup_basic_desc(&desc[0], HCLGE_COMMON_ECC_INT_CFG, false); + desc[0].flag |= cpu_to_le16(HCLGE_CMD_FLAG_NEXT); + hclge_cmd_setup_basic_desc(&desc[1], HCLGE_COMMON_ECC_INT_CFG, false); + + if (en) { + /* enable COMMON error interrupts */ + desc[0].data[0] = cpu_to_le32(HCLGE_IMP_TCM_ECC_ERR_INT_EN); + desc[0].data[2] = cpu_to_le32(HCLGE_CMDQ_NIC_ECC_ERR_INT_EN | + HCLGE_CMDQ_ROCEE_ECC_ERR_INT_EN); + desc[0].data[3] = cpu_to_le32(HCLGE_IMP_RD_POISON_ERR_INT_EN); + desc[0].data[4] = cpu_to_le32(HCLGE_TQP_ECC_ERR_INT_EN); + desc[0].data[5] = cpu_to_le32(HCLGE_IMP_ITCM4_ECC_ERR_INT_EN); + } else { + /* disable COMMON error interrupts */ + desc[0].data[0] = 0; + desc[0].data[2] = 0; + desc[0].data[3] = 0; + desc[0].data[4] = 0; + desc[0].data[5] = 0; + } + desc[1].data[0] = cpu_to_le32(HCLGE_IMP_TCM_ECC_ERR_INT_EN_MASK); + desc[1].data[2] = cpu_to_le32(HCLGE_CMDQ_NIC_ECC_ERR_INT_EN_MASK | + HCLGE_CMDQ_ROCEE_ECC_ERR_INT_EN_MASK); + desc[1].data[3] = cpu_to_le32(HCLGE_IMP_RD_POISON_ERR_INT_EN_MASK); + desc[1].data[4] = cpu_to_le32(HCLGE_TQP_ECC_ERR_INT_EN_MASK); + desc[1].data[5] = cpu_to_le32(HCLGE_IMP_ITCM4_ECC_ERR_INT_EN_MASK); + + ret = hclge_cmd_send(&hdev->hw, &desc[0], 2); + if (ret) + dev_err(dev, + "failed(%d) to enable/disable COMMON err interrupts\n", + ret); + + return ret; +} + +static int hclge_enable_ncsi_error(struct hclge_dev *hdev, bool en) +{ + struct device *dev = &hdev->pdev->dev; + struct hclge_desc desc; + int ret; + + if (hdev->pdev->revision < 0x21) + return 0; + + /* enable/disable NCSI error interrupts */ + hclge_cmd_setup_basic_desc(&desc, HCLGE_NCSI_INT_EN, false); + if (en) + desc.data[0] = cpu_to_le32(HCLGE_NCSI_ERR_INT_EN); + else + desc.data[0] = 0; + + ret = hclge_cmd_send(&hdev->hw, &desc, 1); + if (ret) + dev_err(dev, + "failed(%d) to enable/disable NCSI error interrupts\n", + ret); + + return ret; +} + +static int hclge_enable_igu_egu_error(struct hclge_dev *hdev, bool en) +{ + struct device *dev = &hdev->pdev->dev; + struct hclge_desc desc; + int ret; + + /* enable/disable error interrupts */ + hclge_cmd_setup_basic_desc(&desc, HCLGE_IGU_COMMON_INT_EN, false); + if (en) + desc.data[0] = cpu_to_le32(HCLGE_IGU_ERR_INT_EN); + else + desc.data[0] = 0; + desc.data[1] = cpu_to_le32(HCLGE_IGU_ERR_INT_EN_MASK); + + ret = hclge_cmd_send(&hdev->hw, &desc, 1); + if (ret) { + dev_err(dev, + "failed(%d) to enable/disable IGU common interrupts\n", + ret); + return ret; + } + + hclge_cmd_setup_basic_desc(&desc, HCLGE_IGU_EGU_TNL_INT_EN, false); + if (en) + desc.data[0] = cpu_to_le32(HCLGE_IGU_TNL_ERR_INT_EN); + else + desc.data[0] = 0; + desc.data[1] = cpu_to_le32(HCLGE_IGU_TNL_ERR_INT_EN_MASK); + + ret = hclge_cmd_send(&hdev->hw, &desc, 1); + if (ret) { + dev_err(dev, + "failed(%d) to enable/disable IGU-EGU TNL interrupts\n", + ret); + return ret; + } + + ret = hclge_enable_ncsi_error(hdev, en); + if (ret) + dev_err(dev, "fail(%d) to en/disable err int\n", ret); + + return ret; +} + +static int hclge_enable_ppp_error_interrupt(struct hclge_dev *hdev, u32 cmd, + bool en) +{ + struct device *dev = &hdev->pdev->dev; + struct hclge_desc desc[2]; + int ret; + + /* enable/disable PPP error interrupts */ + hclge_cmd_setup_basic_desc(&desc[0], cmd, false); + desc[0].flag |= cpu_to_le16(HCLGE_CMD_FLAG_NEXT); + hclge_cmd_setup_basic_desc(&desc[1], cmd, false); + + if (cmd == HCLGE_PPP_CMD0_INT_CMD) { + if (en) { + desc[0].data[0] = + cpu_to_le32(HCLGE_PPP_MPF_ECC_ERR_INT0_EN); + desc[0].data[1] = + cpu_to_le32(HCLGE_PPP_MPF_ECC_ERR_INT1_EN); + } else { + desc[0].data[0] = 0; + desc[0].data[1] = 0; + } + desc[1].data[0] = + cpu_to_le32(HCLGE_PPP_MPF_ECC_ERR_INT0_EN_MASK); + desc[1].data[1] = + cpu_to_le32(HCLGE_PPP_MPF_ECC_ERR_INT1_EN_MASK); + } else if (cmd == HCLGE_PPP_CMD1_INT_CMD) { + if (en) { + desc[0].data[0] = + cpu_to_le32(HCLGE_PPP_MPF_ECC_ERR_INT2_EN); + desc[0].data[1] = + cpu_to_le32(HCLGE_PPP_MPF_ECC_ERR_INT3_EN); + } else { + desc[0].data[0] = 0; + desc[0].data[1] = 0; + } + desc[1].data[0] = + cpu_to_le32(HCLGE_PPP_MPF_ECC_ERR_INT2_EN_MASK); + desc[1].data[1] = + cpu_to_le32(HCLGE_PPP_MPF_ECC_ERR_INT3_EN_MASK); + } + + ret = hclge_cmd_send(&hdev->hw, &desc[0], 2); + if (ret) + dev_err(dev, + "failed(%d) to enable/disable PPP error interrupts\n", + ret); + + return ret; +} + +static int hclge_enable_ppp_error(struct hclge_dev *hdev, bool en) +{ + struct device *dev = &hdev->pdev->dev; + int ret; + + ret = hclge_enable_ppp_error_interrupt(hdev, HCLGE_PPP_CMD0_INT_CMD, + en); + if (ret) { + dev_err(dev, + "failed(%d) to enable/disable PPP error intr 0,1\n", + ret); + return ret; + } + + ret = hclge_enable_ppp_error_interrupt(hdev, HCLGE_PPP_CMD1_INT_CMD, + en); + if (ret) + dev_err(dev, + "failed(%d) to enable/disable PPP error intr 2,3\n", + ret); + + return ret; +} + +int hclge_enable_tm_hw_error(struct hclge_dev *hdev, bool en) +{ + struct device *dev = &hdev->pdev->dev; + struct hclge_desc desc; + int ret; + + /* enable TM SCH hw errors */ + hclge_cmd_setup_basic_desc(&desc, HCLGE_TM_SCH_ECC_INT_EN, false); + if (en) + desc.data[0] = cpu_to_le32(HCLGE_TM_SCH_ECC_ERR_INT_EN); + else + desc.data[0] = 0; + + ret = hclge_cmd_send(&hdev->hw, &desc, 1); + if (ret) { + dev_err(dev, "failed(%d) to configure TM SCH errors\n", ret); + return ret; + } + + /* enable TM QCN hw errors */ + ret = hclge_cmd_query_error(hdev, &desc, HCLGE_TM_QCN_MEM_INT_CFG, + 0, 0, 0); + if (ret) { + dev_err(dev, "failed(%d) to read TM QCN CFG status\n", ret); + return ret; + } + + hclge_cmd_reuse_desc(&desc, false); + if (en) + desc.data[1] = cpu_to_le32(HCLGE_TM_QCN_MEM_ERR_INT_EN); + else + desc.data[1] = 0; + + ret = hclge_cmd_send(&hdev->hw, &desc, 1); + if (ret) + dev_err(dev, + "failed(%d) to configure TM QCN mem errors\n", ret); + + return ret; +} + +static void hclge_process_common_error(struct hclge_dev *hdev, + enum hclge_err_int_type type) +{ + struct device *dev = &hdev->pdev->dev; + struct hclge_desc desc[2]; + u32 err_sts; + int ret; + + /* read err sts */ + ret = hclge_cmd_query_error(hdev, &desc[0], + HCLGE_COMMON_ECC_INT_CFG, + HCLGE_CMD_FLAG_NEXT, 0, 0); + if (ret) { + dev_err(dev, + "failed(=%d) to query COMMON error interrupt status\n", + ret); + return; + } + + /* log err */ + err_sts = (le32_to_cpu(desc[0].data[0])) & HCLGE_IMP_TCM_ECC_INT_MASK; + hclge_log_error(dev, &hclge_imp_tcm_ecc_int[0], err_sts); + + err_sts = (le32_to_cpu(desc[0].data[1])) & HCLGE_CMDQ_ECC_INT_MASK; + hclge_log_error(dev, &hclge_cmdq_nic_mem_ecc_int[0], err_sts); + + err_sts = (le32_to_cpu(desc[0].data[1]) >> HCLGE_CMDQ_ROC_ECC_INT_SHIFT) + & HCLGE_CMDQ_ECC_INT_MASK; + hclge_log_error(dev, &hclge_cmdq_rocee_mem_ecc_int[0], err_sts); + + if ((le32_to_cpu(desc[0].data[3])) & BIT(0)) + dev_warn(dev, "imp_rd_data_poison_err found\n"); + + err_sts = (le32_to_cpu(desc[0].data[3]) >> HCLGE_TQP_ECC_INT_SHIFT) & + HCLGE_TQP_ECC_INT_MASK; + hclge_log_error(dev, &hclge_tqp_int_ecc_int[0], err_sts); + + err_sts = (le32_to_cpu(desc[0].data[5])) & + HCLGE_IMP_ITCM4_ECC_INT_MASK; + hclge_log_error(dev, &hclge_imp_itcm4_ecc_int[0], err_sts); + + /* clear error interrupts */ + desc[1].data[0] = cpu_to_le32(HCLGE_IMP_TCM_ECC_CLR_MASK); + desc[1].data[1] = cpu_to_le32(HCLGE_CMDQ_NIC_ECC_CLR_MASK | + HCLGE_CMDQ_ROCEE_ECC_CLR_MASK); + desc[1].data[3] = cpu_to_le32(HCLGE_TQP_IMP_ERR_CLR_MASK); + desc[1].data[5] = cpu_to_le32(HCLGE_IMP_ITCM4_ECC_CLR_MASK); + + ret = hclge_cmd_clear_error(hdev, &desc[0], NULL, 0, + HCLGE_CMD_FLAG_NEXT); + if (ret) + dev_err(dev, + "failed(%d) to clear COMMON error interrupt status\n", + ret); +} + +static void hclge_process_ncsi_error(struct hclge_dev *hdev, + enum hclge_err_int_type type) +{ + struct device *dev = &hdev->pdev->dev; + struct hclge_desc desc_rd; + struct hclge_desc desc_wr; + u32 err_sts; + int ret; + + if (hdev->pdev->revision < 0x21) + return; + + /* read NCSI error status */ + ret = hclge_cmd_query_error(hdev, &desc_rd, HCLGE_NCSI_INT_QUERY, + 0, 1, HCLGE_NCSI_ERR_INT_TYPE); + if (ret) { + dev_err(dev, + "failed(=%d) to query NCSI error interrupt status\n", + ret); + return; + } + + /* log err */ + err_sts = le32_to_cpu(desc_rd.data[0]); + hclge_log_error(dev, &hclge_ncsi_err_int[0], err_sts); + + /* clear err int */ + ret = hclge_cmd_clear_error(hdev, &desc_wr, &desc_rd, + HCLGE_NCSI_INT_CLR, 0); + if (ret) + dev_err(dev, "failed(=%d) to clear NCSI intrerrupt status\n", + ret); +} + +static void hclge_process_igu_egu_error(struct hclge_dev *hdev, + enum hclge_err_int_type int_type) +{ + struct device *dev = &hdev->pdev->dev; + struct hclge_desc desc_rd; + struct hclge_desc desc_wr; + u32 err_sts; + int ret; + + /* read IGU common err sts */ + ret = hclge_cmd_query_error(hdev, &desc_rd, + HCLGE_IGU_COMMON_INT_QUERY, + 0, 1, int_type); + if (ret) { + dev_err(dev, "failed(=%d) to query IGU common int status\n", + ret); + return; + } + + /* log err */ + err_sts = le32_to_cpu(desc_rd.data[0]) & + HCLGE_IGU_COM_INT_MASK; + hclge_log_error(dev, &hclge_igu_com_err_int[0], err_sts); + + /* clear err int */ + ret = hclge_cmd_clear_error(hdev, &desc_wr, &desc_rd, + HCLGE_IGU_COMMON_INT_CLR, 0); + if (ret) { + dev_err(dev, "failed(=%d) to clear IGU common int status\n", + ret); + return; + } + + /* read IGU-EGU TNL err sts */ + ret = hclge_cmd_query_error(hdev, &desc_rd, + HCLGE_IGU_EGU_TNL_INT_QUERY, + 0, 1, int_type); + if (ret) { + dev_err(dev, "failed(=%d) to query IGU-EGU TNL int status\n", + ret); + return; + } + + /* log err */ + err_sts = le32_to_cpu(desc_rd.data[0]) & + HCLGE_IGU_EGU_TNL_INT_MASK; + hclge_log_error(dev, &hclge_igu_egu_tnl_err_int[0], err_sts); + + /* clear err int */ + ret = hclge_cmd_clear_error(hdev, &desc_wr, &desc_rd, + HCLGE_IGU_EGU_TNL_INT_CLR, 0); + if (ret) { + dev_err(dev, "failed(=%d) to clear IGU-EGU TNL int status\n", + ret); + return; + } + + hclge_process_ncsi_error(hdev, HCLGE_ERR_INT_RAS_NFE); +} + +static int hclge_log_and_clear_ppp_error(struct hclge_dev *hdev, u32 cmd, + enum hclge_err_int_type int_type) +{ + enum hnae3_reset_type reset_level = HNAE3_NONE_RESET; + struct device *dev = &hdev->pdev->dev; + const struct hclge_hw_error *hw_err_lst1, *hw_err_lst2, *hw_err_lst3; + struct hclge_desc desc[2]; + u32 err_sts; + int ret; + + /* read PPP INT sts */ + ret = hclge_cmd_query_error(hdev, &desc[0], cmd, + HCLGE_CMD_FLAG_NEXT, 5, int_type); + if (ret) { + dev_err(dev, "failed(=%d) to query PPP interrupt status\n", + ret); + return -EIO; + } + + /* log error */ + if (cmd == HCLGE_PPP_CMD0_INT_CMD) { + hw_err_lst1 = &hclge_ppp_mpf_int0[0]; + hw_err_lst2 = &hclge_ppp_mpf_int1[0]; + hw_err_lst3 = &hclge_ppp_pf_int[0]; + } else if (cmd == HCLGE_PPP_CMD1_INT_CMD) { + hw_err_lst1 = &hclge_ppp_mpf_int2[0]; + hw_err_lst2 = &hclge_ppp_mpf_int3[0]; + } else { + dev_err(dev, "invalid command(=%d)\n", cmd); + return -EINVAL; + } + + err_sts = le32_to_cpu(desc[0].data[2]); + if (err_sts) { + hclge_log_error(dev, hw_err_lst1, err_sts); + reset_level = HNAE3_FUNC_RESET; + } + + err_sts = le32_to_cpu(desc[0].data[3]); + if (err_sts) { + hclge_log_error(dev, hw_err_lst2, err_sts); + reset_level = HNAE3_FUNC_RESET; + } + + err_sts = (le32_to_cpu(desc[0].data[4]) >> 8) & 0x3; + if (err_sts) { + hclge_log_error(dev, hw_err_lst3, err_sts); + reset_level = HNAE3_FUNC_RESET; + } + + /* clear PPP INT */ + ret = hclge_cmd_clear_error(hdev, &desc[0], NULL, 0, + HCLGE_CMD_FLAG_NEXT); + if (ret) { + dev_err(dev, "failed(=%d) to clear PPP interrupt status\n", + ret); + return -EIO; + } + + return 0; +} + +static void hclge_process_ppp_error(struct hclge_dev *hdev, + enum hclge_err_int_type int_type) +{ + struct device *dev = &hdev->pdev->dev; + int ret; + + /* read PPP INT0,1 sts */ + ret = hclge_log_and_clear_ppp_error(hdev, HCLGE_PPP_CMD0_INT_CMD, + int_type); + if (ret < 0) { + dev_err(dev, "failed(=%d) to clear PPP interrupt 0,1 status\n", + ret); + return; + } + + /* read err PPP INT2,3 sts */ + ret = hclge_log_and_clear_ppp_error(hdev, HCLGE_PPP_CMD1_INT_CMD, + int_type); + if (ret < 0) + dev_err(dev, "failed(=%d) to clear PPP interrupt 2,3 status\n", + ret); +} + +static void hclge_process_tm_sch_error(struct hclge_dev *hdev) +{ + struct device *dev = &hdev->pdev->dev; + const struct hclge_tm_sch_ecc_info *tm_sch_ecc_info; + struct hclge_desc desc; + u32 ecc_info; + u8 module_no; + u8 ram_no; + int ret; + + /* read TM scheduler errors */ + ret = hclge_cmd_query_error(hdev, &desc, + HCLGE_TM_SCH_MBIT_ECC_INFO_CMD, 0, 0, 0); + if (ret) { + dev_err(dev, "failed(%d) to read SCH mbit ECC err info\n", ret); + return; + } + ecc_info = le32_to_cpu(desc.data[0]); + + ret = hclge_cmd_query_error(hdev, &desc, + HCLGE_TM_SCH_ECC_ERR_RINT_CMD, 0, 0, 0); + if (ret) { + dev_err(dev, "failed(%d) to read SCH ECC err status\n", ret); + return; + } + + /* log TM scheduler errors */ + if (le32_to_cpu(desc.data[0])) { + hclge_log_error(dev, &hclge_tm_sch_err_int[0], + le32_to_cpu(desc.data[0])); + if (le32_to_cpu(desc.data[0]) & 0x2) { + module_no = (ecc_info >> 20) & 0xF; + ram_no = (ecc_info >> 16) & 0xF; + tm_sch_ecc_info = + &hclge_tm_sch_ecc_err[module_no][ram_no]; + dev_warn(dev, "ecc err module:ram=%s\n", + tm_sch_ecc_info->name); + dev_warn(dev, "ecc memory address = 0x%x\n", + ecc_info & 0xFFFF); + } + } + + /* clear TM scheduler errors */ + ret = hclge_cmd_clear_error(hdev, &desc, NULL, 0, 0); + if (ret) { + dev_err(dev, "failed(%d) to clear TM SCH error status\n", ret); + return; + } + + ret = hclge_cmd_query_error(hdev, &desc, + HCLGE_TM_SCH_ECC_ERR_RINT_CE, 0, 0, 0); + if (ret) { + dev_err(dev, "failed(%d) to read SCH CE status\n", ret); + return; + } + + ret = hclge_cmd_clear_error(hdev, &desc, NULL, 0, 0); + if (ret) { + dev_err(dev, "failed(%d) to clear TM SCH CE status\n", ret); + return; + } + + ret = hclge_cmd_query_error(hdev, &desc, + HCLGE_TM_SCH_ECC_ERR_RINT_NFE, 0, 0, 0); + if (ret) { + dev_err(dev, "failed(%d) to read SCH NFE status\n", ret); + return; + } + + ret = hclge_cmd_clear_error(hdev, &desc, NULL, 0, 0); + if (ret) { + dev_err(dev, "failed(%d) to clear TM SCH NFE status\n", ret); + return; + } + + ret = hclge_cmd_query_error(hdev, &desc, + HCLGE_TM_SCH_ECC_ERR_RINT_FE, 0, 0, 0); + if (ret) { + dev_err(dev, "failed(%d) to read SCH FE status\n", ret); + return; + } + + ret = hclge_cmd_clear_error(hdev, &desc, NULL, 0, 0); + if (ret) + dev_err(dev, "failed(%d) to clear TM SCH FE status\n", ret); +} + +static void hclge_process_tm_qcn_error(struct hclge_dev *hdev) +{ + struct device *dev = &hdev->pdev->dev; + struct hclge_desc desc; + int ret; + + /* read QCN errors */ + ret = hclge_cmd_query_error(hdev, &desc, + HCLGE_TM_QCN_MEM_INT_INFO_CMD, 0, 0, 0); + if (ret) { + dev_err(dev, "failed(%d) to read QCN ECC err status\n", ret); + return; + } + + /* log QCN errors */ + if (le32_to_cpu(desc.data[0])) + hclge_log_error(dev, &hclge_qcn_ecc_err_int[0], + le32_to_cpu(desc.data[0])); + + /* clear QCN errors */ + ret = hclge_cmd_clear_error(hdev, &desc, NULL, 0, 0); + if (ret) + dev_err(dev, "failed(%d) to clear QCN error status\n", ret); +} + +static void hclge_process_tm_error(struct hclge_dev *hdev, + enum hclge_err_int_type type) +{ + hclge_process_tm_sch_error(hdev); + hclge_process_tm_qcn_error(hdev); +} + +static const struct hclge_hw_blk hw_blk[] = { + { .msk = BIT(0), .name = "IGU_EGU", + .enable_error = hclge_enable_igu_egu_error, + .process_error = hclge_process_igu_egu_error, }, + { .msk = BIT(5), .name = "COMMON", + .enable_error = hclge_enable_common_error, + .process_error = hclge_process_common_error, }, + { .msk = BIT(4), .name = "TM", + .enable_error = hclge_enable_tm_hw_error, + .process_error = hclge_process_tm_error, }, + { .msk = BIT(1), .name = "PPP", + .enable_error = hclge_enable_ppp_error, + .process_error = hclge_process_ppp_error, }, + { /* sentinel */ } +}; + +int hclge_hw_error_set_state(struct hclge_dev *hdev, bool state) +{ + struct device *dev = &hdev->pdev->dev; + int ret = 0; + int i = 0; + + while (hw_blk[i].name) { + if (!hw_blk[i].enable_error) { + i++; + continue; + } + ret = hw_blk[i].enable_error(hdev, state); + if (ret) { + dev_err(dev, "fail(%d) to en/disable err int\n", ret); + return ret; + } + i++; + } + + return ret; +} + +pci_ers_result_t hclge_process_ras_hw_error(struct hnae3_ae_dev *ae_dev) +{ + struct hclge_dev *hdev = ae_dev->priv; + struct device *dev = &hdev->pdev->dev; + u32 sts, val; + int i = 0; + + sts = hclge_read_dev(&hdev->hw, HCLGE_RAS_PF_OTHER_INT_STS_REG); + + /* Processing Non-fatal errors */ + if (sts & HCLGE_RAS_REG_NFE_MASK) { + val = (sts >> HCLGE_RAS_REG_NFE_SHIFT) & 0xFF; + i = 0; + while (hw_blk[i].name) { + if (!(hw_blk[i].msk & val)) { + i++; + continue; + } + dev_warn(dev, "%s ras non-fatal error identified\n", + hw_blk[i].name); + if (hw_blk[i].process_error) + hw_blk[i].process_error(hdev, + HCLGE_ERR_INT_RAS_NFE); + i++; + } + } + + return PCI_ERS_RESULT_NEED_RESET; +} diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.h new file mode 100644 index 000000000000..e0e3b5861495 --- /dev/null +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.h @@ -0,0 +1,83 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ +/* Copyright (c) 2016-2017 Hisilicon Limited. */ + +#ifndef __HCLGE_ERR_H +#define __HCLGE_ERR_H + +#include "hclge_main.h" + +#define HCLGE_RAS_PF_OTHER_INT_STS_REG 0x20B00 +#define HCLGE_RAS_REG_FE_MASK 0xFF +#define HCLGE_RAS_REG_NFE_MASK 0xFF00 +#define HCLGE_RAS_REG_NFE_SHIFT 8 + +#define HCLGE_IMP_TCM_ECC_ERR_INT_EN 0xFFFF0000 +#define HCLGE_IMP_TCM_ECC_ERR_INT_EN_MASK 0xFFFF0000 +#define HCLGE_IMP_ITCM4_ECC_ERR_INT_EN 0x300 +#define HCLGE_IMP_ITCM4_ECC_ERR_INT_EN_MASK 0x300 +#define HCLGE_CMDQ_NIC_ECC_ERR_INT_EN 0xFFFF +#define HCLGE_CMDQ_NIC_ECC_ERR_INT_EN_MASK 0xFFFF +#define HCLGE_CMDQ_ROCEE_ECC_ERR_INT_EN 0xFFFF0000 +#define HCLGE_CMDQ_ROCEE_ECC_ERR_INT_EN_MASK 0xFFFF0000 +#define HCLGE_IMP_RD_POISON_ERR_INT_EN 0x0100 +#define HCLGE_IMP_RD_POISON_ERR_INT_EN_MASK 0x0100 +#define HCLGE_TQP_ECC_ERR_INT_EN 0x0FFF +#define HCLGE_TQP_ECC_ERR_INT_EN_MASK 0x0FFF +#define HCLGE_IGU_ERR_INT_EN 0x0000066F +#define HCLGE_IGU_ERR_INT_EN_MASK 0x000F +#define HCLGE_IGU_TNL_ERR_INT_EN 0x0002AABF +#define HCLGE_IGU_TNL_ERR_INT_EN_MASK 0x003F +#define HCLGE_PPP_MPF_ECC_ERR_INT0_EN 0xFFFFFFFF +#define HCLGE_PPP_MPF_ECC_ERR_INT0_EN_MASK 0xFFFFFFFF +#define HCLGE_PPP_MPF_ECC_ERR_INT1_EN 0xFFFFFFFF +#define HCLGE_PPP_MPF_ECC_ERR_INT1_EN_MASK 0xFFFFFFFF +#define HCLGE_PPP_PF_ERR_INT_EN 0x0003 +#define HCLGE_PPP_PF_ERR_INT_EN_MASK 0x0003 +#define HCLGE_PPP_MPF_ECC_ERR_INT2_EN 0x003F +#define HCLGE_PPP_MPF_ECC_ERR_INT2_EN_MASK 0x003F +#define HCLGE_PPP_MPF_ECC_ERR_INT3_EN 0x003F +#define HCLGE_PPP_MPF_ECC_ERR_INT3_EN_MASK 0x003F +#define HCLGE_TM_SCH_ECC_ERR_INT_EN 0x3 +#define HCLGE_TM_QCN_MEM_ERR_INT_EN 0xFFFFFF +#define HCLGE_NCSI_ERR_INT_EN 0x3 +#define HCLGE_NCSI_ERR_INT_TYPE 0x9 + +#define HCLGE_IMP_TCM_ECC_INT_MASK 0xFFFF +#define HCLGE_IMP_ITCM4_ECC_INT_MASK 0x3 +#define HCLGE_CMDQ_ECC_INT_MASK 0xFFFF +#define HCLGE_CMDQ_ROC_ECC_INT_SHIFT 16 +#define HCLGE_TQP_ECC_INT_MASK 0xFFF +#define HCLGE_TQP_ECC_INT_SHIFT 16 +#define HCLGE_IMP_TCM_ECC_CLR_MASK 0xFFFF +#define HCLGE_IMP_ITCM4_ECC_CLR_MASK 0x3 +#define HCLGE_CMDQ_NIC_ECC_CLR_MASK 0xFFFF +#define HCLGE_CMDQ_ROCEE_ECC_CLR_MASK 0xFFFF0000 +#define HCLGE_TQP_IMP_ERR_CLR_MASK 0x0FFF0001 +#define HCLGE_IGU_COM_INT_MASK 0xF +#define HCLGE_IGU_EGU_TNL_INT_MASK 0x3F +#define HCLGE_PPP_PF_INT_MASK 0x100 + +enum hclge_err_int_type { + HCLGE_ERR_INT_MSIX = 0, + HCLGE_ERR_INT_RAS_CE = 1, + HCLGE_ERR_INT_RAS_NFE = 2, + HCLGE_ERR_INT_RAS_FE = 3, +}; + +struct hclge_hw_blk { + u32 msk; + const char *name; + int (*enable_error)(struct hclge_dev *hdev, bool en); + void (*process_error)(struct hclge_dev *hdev, + enum hclge_err_int_type type); +}; + +struct hclge_hw_error { + u32 int_msk; + const char *msg; +}; + +int hclge_hw_error_set_state(struct hclge_dev *hdev, bool state); +int hclge_enable_tm_hw_error(struct hclge_dev *hdev, bool en); +pci_ers_result_t hclge_process_ras_hw_error(struct hnae3_ae_dev *ae_dev); +#endif diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c index 1bd83e8268fc..5234b5373ed3 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c @@ -19,6 +19,7 @@ #include "hclge_mbx.h" #include "hclge_mdio.h" #include "hclge_tm.h" +#include "hclge_err.h" #include "hnae3.h" #define HCLGE_NAME "hclge" @@ -2488,12 +2489,18 @@ static void hclge_reset(struct hclge_dev *hdev) ae_dev->reset_type = HNAE3_NONE_RESET; } -static void hclge_reset_event(struct hnae3_handle *handle) +static void hclge_reset_event(struct pci_dev *pdev, struct hnae3_handle *handle) { - struct hclge_vport *vport = hclge_get_vport(handle); - struct hclge_dev *hdev = vport->back; + struct hnae3_ae_dev *ae_dev = pci_get_drvdata(pdev); + struct hclge_dev *hdev = ae_dev->priv; - /* check if this is a new reset request and we are not here just because + /* We might end up getting called broadly because of 2 below cases: + * 1. Recoverable error was conveyed through APEI and only way to bring + * normalcy is to reset. + * 2. A new reset request from the stack due to timeout + * + * For the first case,error event might not have ae handle available. + * check if this is a new reset request and we are not here just because * last reset attempt did not succeed and watchdog hit us again. We will * know this if last reset request did not occur very recently (watchdog * timer = 5*HZ, let us check after sufficiently large time, say 4*5*Hz) @@ -2502,6 +2509,9 @@ static void hclge_reset_event(struct hnae3_handle *handle) * want to make sure we throttle the reset request. Therefore, we will * not allow it again before 3*HZ times. */ + if (!handle) + handle = &hdev->vport[0].nic; + if (time_before(jiffies, (handle->last_reset_time + 3 * HZ))) return; else if (time_after(jiffies, (handle->last_reset_time + 4 * 5 * HZ))) @@ -6749,6 +6759,13 @@ static int hclge_init_ae_dev(struct hnae3_ae_dev *ae_dev) goto err_mdiobus_unreg; } + ret = hclge_hw_error_set_state(hdev, true); + if (ret) { + dev_err(&pdev->dev, + "hw error interrupts enable failed, ret =%d\n", ret); + goto err_mdiobus_unreg; + } + hclge_dcb_ops_set(hdev); timer_setup(&hdev->service_timer, hclge_service_timer, 0); @@ -6864,6 +6881,12 @@ static int hclge_reset_ae_dev(struct hnae3_ae_dev *ae_dev) return ret; } + /* Re-enable the TM hw error interrupts because + * they get disabled on core/global reset. + */ + if (hclge_enable_tm_hw_error(hdev, true)) + dev_err(&pdev->dev, "failed to enable TM hw error interrupts\n"); + dev_info(&pdev->dev, "Reset done, %s driver initialization finished.\n", HCLGE_DRIVER_NAME); @@ -6886,6 +6909,7 @@ static void hclge_uninit_ae_dev(struct hnae3_ae_dev *ae_dev) hclge_enable_vector(&hdev->misc_vector, false); synchronize_irq(hdev->misc_vector.vector_irq); + hclge_hw_error_set_state(hdev, false); hclge_destroy_cmd_queue(&hdev->hw); hclge_misc_irq_uninit(hdev); hclge_pci_uninit(hdev); @@ -7312,6 +7336,7 @@ static const struct hnae3_ae_ops hclge_ops = { .get_fd_all_rules = hclge_get_all_rules, .restore_fd_rules = hclge_restore_fd_entries, .enable_fd = hclge_enable_fd, + .process_hw_error = hclge_process_ras_hw_error, }; static struct hnae3_ae_algo ae_algo = { diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c index ac67fecb9408..e0a86a58342c 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c @@ -1214,7 +1214,8 @@ static int hclgevf_do_reset(struct hclgevf_dev *hdev) return status; } -static void hclgevf_reset_event(struct hnae3_handle *handle) +static void hclgevf_reset_event(struct pci_dev *pdev, + struct hnae3_handle *handle) { struct hclgevf_dev *hdev = hclgevf_ae_get_hdev(handle);