amd-xgbe: Add ECC status support for the device memory

Some versions of the amd-xgbe device are capable of reporting ECC error
information back to the driver. Add support to process, track and report
on this information.

Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
Lendacky, Thomas 2016-11-10 17:10:26 -06:00 committed by David S. Miller
parent 4c70dd8ac9
commit e78332b228
7 changed files with 340 additions and 3 deletions

View File

@ -178,6 +178,7 @@ config AMD_XGBE
select BITREVERSE
select CRC32
select PTP_1588_CLOCK
select AMD_XGBE_HAVE_ECC if X86
---help---
This driver supports the AMD 10GbE Ethernet device found on an
AMD SoC.
@ -195,4 +196,8 @@ config AMD_XGBE_DCB
If unsure, say N.
config AMD_XGBE_HAVE_ECC
bool
default n
endif # NET_VENDOR_AMD

View File

@ -920,6 +920,10 @@
#define XP_PROP_5 0x0014
#define XP_MAC_ADDR_LO 0x0020
#define XP_MAC_ADDR_HI 0x0024
#define XP_ECC_ISR 0x0030
#define XP_ECC_IER 0x0034
#define XP_ECC_CNT0 0x003c
#define XP_ECC_CNT1 0x0040
#define XP_DRIVER_INT_REQ 0x0060
#define XP_DRIVER_INT_RO 0x0064
#define XP_DRIVER_SCRATCH_0 0x0068
@ -935,6 +939,42 @@
#define XP_DRIVER_SCRATCH_0_COMMAND_WIDTH 8
#define XP_DRIVER_SCRATCH_0_SUB_COMMAND_INDEX 8
#define XP_DRIVER_SCRATCH_0_SUB_COMMAND_WIDTH 8
#define XP_ECC_CNT0_RX_DED_INDEX 24
#define XP_ECC_CNT0_RX_DED_WIDTH 8
#define XP_ECC_CNT0_RX_SEC_INDEX 16
#define XP_ECC_CNT0_RX_SEC_WIDTH 8
#define XP_ECC_CNT0_TX_DED_INDEX 8
#define XP_ECC_CNT0_TX_DED_WIDTH 8
#define XP_ECC_CNT0_TX_SEC_INDEX 0
#define XP_ECC_CNT0_TX_SEC_WIDTH 8
#define XP_ECC_CNT1_DESC_DED_INDEX 8
#define XP_ECC_CNT1_DESC_DED_WIDTH 8
#define XP_ECC_CNT1_DESC_SEC_INDEX 0
#define XP_ECC_CNT1_DESC_SEC_WIDTH 8
#define XP_ECC_IER_DESC_DED_INDEX 0
#define XP_ECC_IER_DESC_DED_WIDTH 1
#define XP_ECC_IER_DESC_SEC_INDEX 1
#define XP_ECC_IER_DESC_SEC_WIDTH 1
#define XP_ECC_IER_RX_DED_INDEX 2
#define XP_ECC_IER_RX_DED_WIDTH 1
#define XP_ECC_IER_RX_SEC_INDEX 3
#define XP_ECC_IER_RX_SEC_WIDTH 1
#define XP_ECC_IER_TX_DED_INDEX 4
#define XP_ECC_IER_TX_DED_WIDTH 1
#define XP_ECC_IER_TX_SEC_INDEX 5
#define XP_ECC_IER_TX_SEC_WIDTH 1
#define XP_ECC_ISR_DESC_DED_INDEX 0
#define XP_ECC_ISR_DESC_DED_WIDTH 1
#define XP_ECC_ISR_DESC_SEC_INDEX 1
#define XP_ECC_ISR_DESC_SEC_WIDTH 1
#define XP_ECC_ISR_RX_DED_INDEX 2
#define XP_ECC_ISR_RX_DED_WIDTH 1
#define XP_ECC_ISR_RX_SEC_INDEX 3
#define XP_ECC_ISR_RX_SEC_WIDTH 1
#define XP_ECC_ISR_TX_DED_INDEX 4
#define XP_ECC_ISR_TX_DED_WIDTH 1
#define XP_ECC_ISR_TX_SEC_INDEX 5
#define XP_ECC_ISR_TX_SEC_WIDTH 1
#define XP_MAC_ADDR_HI_VALID_INDEX 31
#define XP_MAC_ADDR_HI_VALID_WIDTH 1
#define XP_PROP_0_CONN_TYPE_INDEX 28

View File

@ -724,6 +724,65 @@ static void xgbe_enable_mac_interrupts(struct xgbe_prv_data *pdata)
XGMAC_IOWRITE_BITS(pdata, MMC_TIER, ALL_INTERRUPTS, 0xffffffff);
}
static void xgbe_enable_ecc_interrupts(struct xgbe_prv_data *pdata)
{
unsigned int ecc_isr, ecc_ier = 0;
if (!pdata->vdata->ecc_support)
return;
/* Clear all the interrupts which are set */
ecc_isr = XP_IOREAD(pdata, XP_ECC_ISR);
XP_IOWRITE(pdata, XP_ECC_ISR, ecc_isr);
/* Enable ECC interrupts */
XP_SET_BITS(ecc_ier, XP_ECC_IER, TX_DED, 1);
XP_SET_BITS(ecc_ier, XP_ECC_IER, TX_SEC, 1);
XP_SET_BITS(ecc_ier, XP_ECC_IER, RX_DED, 1);
XP_SET_BITS(ecc_ier, XP_ECC_IER, RX_SEC, 1);
XP_SET_BITS(ecc_ier, XP_ECC_IER, DESC_DED, 1);
XP_SET_BITS(ecc_ier, XP_ECC_IER, DESC_SEC, 1);
XP_IOWRITE(pdata, XP_ECC_IER, ecc_ier);
}
static void xgbe_disable_ecc_ded(struct xgbe_prv_data *pdata)
{
unsigned int ecc_ier;
ecc_ier = XP_IOREAD(pdata, XP_ECC_IER);
/* Disable ECC DED interrupts */
XP_SET_BITS(ecc_ier, XP_ECC_IER, TX_DED, 0);
XP_SET_BITS(ecc_ier, XP_ECC_IER, RX_DED, 0);
XP_SET_BITS(ecc_ier, XP_ECC_IER, DESC_DED, 0);
XP_IOWRITE(pdata, XP_ECC_IER, ecc_ier);
}
static void xgbe_disable_ecc_sec(struct xgbe_prv_data *pdata,
enum xgbe_ecc_sec sec)
{
unsigned int ecc_ier;
ecc_ier = XP_IOREAD(pdata, XP_ECC_IER);
/* Disable ECC SEC interrupt */
switch (sec) {
case XGBE_ECC_SEC_TX:
XP_SET_BITS(ecc_ier, XP_ECC_IER, TX_SEC, 0);
break;
case XGBE_ECC_SEC_RX:
XP_SET_BITS(ecc_ier, XP_ECC_IER, RX_SEC, 0);
break;
case XGBE_ECC_SEC_DESC:
XP_SET_BITS(ecc_ier, XP_ECC_IER, DESC_SEC, 0);
break;
}
XP_IOWRITE(pdata, XP_ECC_IER, ecc_ier);
}
static int xgbe_set_speed(struct xgbe_prv_data *pdata, int speed)
{
unsigned int ss;
@ -3294,6 +3353,11 @@ static int xgbe_init(struct xgbe_prv_data *pdata)
xgbe_config_mmc(pdata);
xgbe_enable_mac_interrupts(pdata);
/*
* Initialize ECC related features
*/
xgbe_enable_ecc_interrupts(pdata);
DBGPR("<--xgbe_init\n");
return 0;
@ -3399,5 +3463,9 @@ void xgbe_init_function_ptrs_dev(struct xgbe_hw_if *hw_if)
hw_if->set_rss_hash_key = xgbe_set_rss_hash_key;
hw_if->set_rss_lookup_table = xgbe_set_rss_lookup_table;
/* For ECC */
hw_if->disable_ecc_ded = xgbe_disable_ecc_ded;
hw_if->disable_ecc_sec = xgbe_disable_ecc_sec;
DBGPR("<--xgbe_init_function_ptrs\n");
}

View File

@ -114,6 +114,7 @@
* THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <linux/module.h>
#include <linux/spinlock.h>
#include <linux/tcp.h>
#include <linux/if_vlan.h>
@ -126,8 +127,35 @@
#include "xgbe.h"
#include "xgbe-common.h"
static unsigned int ecc_sec_info_threshold = 10;
static unsigned int ecc_sec_warn_threshold = 10000;
static unsigned int ecc_sec_period = 600;
static unsigned int ecc_ded_threshold = 2;
static unsigned int ecc_ded_period = 600;
#ifdef CONFIG_AMD_XGBE_HAVE_ECC
/* Only expose the ECC parameters if supported */
module_param(ecc_sec_info_threshold, uint, S_IWUSR | S_IRUGO);
MODULE_PARM_DESC(ecc_sec_info_threshold,
" ECC corrected error informational threshold setting");
module_param(ecc_sec_warn_threshold, uint, S_IWUSR | S_IRUGO);
MODULE_PARM_DESC(ecc_sec_warn_threshold,
" ECC corrected error warning threshold setting");
module_param(ecc_sec_period, uint, S_IWUSR | S_IRUGO);
MODULE_PARM_DESC(ecc_sec_period, " ECC corrected error period (in seconds)");
module_param(ecc_ded_threshold, uint, S_IWUSR | S_IRUGO);
MODULE_PARM_DESC(ecc_ded_threshold, " ECC detected error threshold setting");
module_param(ecc_ded_period, uint, S_IWUSR | S_IRUGO);
MODULE_PARM_DESC(ecc_ded_period, " ECC detected error period (in seconds)");
#endif
static int xgbe_one_poll(struct napi_struct *, int);
static int xgbe_all_poll(struct napi_struct *, int);
static void xgbe_stop(struct xgbe_prv_data *);
static int xgbe_alloc_channels(struct xgbe_prv_data *pdata)
{
@ -308,6 +336,107 @@ static void xgbe_disable_rx_tx_ints(struct xgbe_prv_data *pdata)
xgbe_disable_rx_tx_int(pdata, channel);
}
static bool xgbe_ecc_sec(struct xgbe_prv_data *pdata, unsigned long *period,
unsigned int *count, const char *area)
{
if (time_before(jiffies, *period)) {
(*count)++;
} else {
*period = jiffies + (ecc_sec_period * HZ);
*count = 1;
}
if (*count > ecc_sec_info_threshold)
dev_warn_once(pdata->dev,
"%s ECC corrected errors exceed informational threshold\n",
area);
if (*count > ecc_sec_warn_threshold) {
dev_warn_once(pdata->dev,
"%s ECC corrected errors exceed warning threshold\n",
area);
return true;
}
return false;
}
static bool xgbe_ecc_ded(struct xgbe_prv_data *pdata, unsigned long *period,
unsigned int *count, const char *area)
{
if (time_before(jiffies, *period)) {
(*count)++;
} else {
*period = jiffies + (ecc_ded_period * HZ);
*count = 1;
}
if (*count > ecc_ded_threshold) {
netdev_alert(pdata->netdev,
"%s ECC detected errors exceed threshold\n",
area);
return true;
}
return false;
}
static irqreturn_t xgbe_ecc_isr(int irq, void *data)
{
struct xgbe_prv_data *pdata = data;
unsigned int ecc_isr;
bool stop = false;
/* Mask status with only the interrupts we care about */
ecc_isr = XP_IOREAD(pdata, XP_ECC_ISR);
ecc_isr &= XP_IOREAD(pdata, XP_ECC_IER);
netif_dbg(pdata, intr, pdata->netdev, "ECC_ISR=%#010x\n", ecc_isr);
if (XP_GET_BITS(ecc_isr, XP_ECC_ISR, TX_DED)) {
stop |= xgbe_ecc_ded(pdata, &pdata->tx_ded_period,
&pdata->tx_ded_count, "TX fifo");
}
if (XP_GET_BITS(ecc_isr, XP_ECC_ISR, RX_DED)) {
stop |= xgbe_ecc_ded(pdata, &pdata->rx_ded_period,
&pdata->rx_ded_count, "RX fifo");
}
if (XP_GET_BITS(ecc_isr, XP_ECC_ISR, DESC_DED)) {
stop |= xgbe_ecc_ded(pdata, &pdata->desc_ded_period,
&pdata->desc_ded_count,
"descriptor cache");
}
if (stop) {
pdata->hw_if.disable_ecc_ded(pdata);
schedule_work(&pdata->stopdev_work);
goto out;
}
if (XP_GET_BITS(ecc_isr, XP_ECC_ISR, TX_SEC)) {
if (xgbe_ecc_sec(pdata, &pdata->tx_sec_period,
&pdata->tx_sec_count, "TX fifo"))
pdata->hw_if.disable_ecc_sec(pdata, XGBE_ECC_SEC_TX);
}
if (XP_GET_BITS(ecc_isr, XP_ECC_ISR, RX_SEC))
if (xgbe_ecc_sec(pdata, &pdata->rx_sec_period,
&pdata->rx_sec_count, "RX fifo"))
pdata->hw_if.disable_ecc_sec(pdata, XGBE_ECC_SEC_RX);
if (XP_GET_BITS(ecc_isr, XP_ECC_ISR, DESC_SEC))
if (xgbe_ecc_sec(pdata, &pdata->desc_sec_period,
&pdata->desc_sec_count, "descriptor cache"))
pdata->hw_if.disable_ecc_sec(pdata, XGBE_ECC_SEC_DESC);
out:
/* Clear all ECC interrupts */
XP_IOWRITE(pdata, XP_ECC_ISR, ecc_isr);
return IRQ_HANDLED;
}
static irqreturn_t xgbe_isr(int irq, void *data)
{
struct xgbe_prv_data *pdata = data;
@ -397,6 +526,10 @@ static irqreturn_t xgbe_isr(int irq, void *data)
if (pdata->dev_irq == pdata->an_irq)
pdata->phy_if.an_isr(irq, pdata);
/* If there is not a separate ECC irq, handle it here */
if (pdata->vdata->ecc_support && (pdata->dev_irq == pdata->ecc_irq))
xgbe_ecc_isr(irq, pdata);
isr_done:
return IRQ_HANDLED;
}
@ -679,6 +812,16 @@ static int xgbe_request_irqs(struct xgbe_prv_data *pdata)
return ret;
}
if (pdata->vdata->ecc_support && (pdata->dev_irq != pdata->ecc_irq)) {
ret = devm_request_irq(pdata->dev, pdata->ecc_irq, xgbe_ecc_isr,
0, pdata->ecc_name, pdata);
if (ret) {
netdev_alert(netdev, "error requesting ecc irq %d\n",
pdata->ecc_irq);
goto err_dev_irq;
}
}
if (!pdata->per_channel_irq)
return 0;
@ -695,17 +838,21 @@ static int xgbe_request_irqs(struct xgbe_prv_data *pdata)
if (ret) {
netdev_alert(netdev, "error requesting irq %d\n",
channel->dma_irq);
goto err_irq;
goto err_dma_irq;
}
}
return 0;
err_irq:
err_dma_irq:
/* Using an unsigned int, 'i' will go to UINT_MAX and exit */
for (i--, channel--; i < pdata->channel_count; i--, channel--)
devm_free_irq(pdata->dev, channel->dma_irq, channel);
if (pdata->vdata->ecc_support && (pdata->dev_irq != pdata->ecc_irq))
devm_free_irq(pdata->dev, pdata->ecc_irq, pdata);
err_dev_irq:
devm_free_irq(pdata->dev, pdata->dev_irq, pdata);
return ret;
@ -718,6 +865,9 @@ static void xgbe_free_irqs(struct xgbe_prv_data *pdata)
devm_free_irq(pdata->dev, pdata->dev_irq, pdata);
if (pdata->vdata->ecc_support && (pdata->dev_irq != pdata->ecc_irq))
devm_free_irq(pdata->dev, pdata->ecc_irq, pdata);
if (!pdata->per_channel_irq)
return;
@ -919,6 +1069,8 @@ static int xgbe_start(struct xgbe_prv_data *pdata)
xgbe_start_timers(pdata);
queue_work(pdata->dev_workqueue, &pdata->service_work);
clear_bit(XGBE_STOPPED, &pdata->dev_state);
DBGPR("<--xgbe_start\n");
return 0;
@ -945,6 +1097,9 @@ static void xgbe_stop(struct xgbe_prv_data *pdata)
DBGPR("-->xgbe_stop\n");
if (test_bit(XGBE_STOPPED, &pdata->dev_state))
return;
netif_tx_stop_all_queues(netdev);
xgbe_stop_timers(pdata);
@ -970,9 +1125,29 @@ static void xgbe_stop(struct xgbe_prv_data *pdata)
netdev_tx_reset_queue(txq);
}
set_bit(XGBE_STOPPED, &pdata->dev_state);
DBGPR("<--xgbe_stop\n");
}
static void xgbe_stopdev(struct work_struct *work)
{
struct xgbe_prv_data *pdata = container_of(work,
struct xgbe_prv_data,
stopdev_work);
rtnl_lock();
xgbe_stop(pdata);
xgbe_free_tx_data(pdata);
xgbe_free_rx_data(pdata);
rtnl_unlock();
netdev_alert(pdata->netdev, "device stopped\n");
}
static void xgbe_restart_dev(struct xgbe_prv_data *pdata)
{
DBGPR("-->xgbe_restart_dev\n");
@ -1355,6 +1530,7 @@ static int xgbe_open(struct net_device *netdev)
INIT_WORK(&pdata->service_work, xgbe_service);
INIT_WORK(&pdata->restart_work, xgbe_restart);
INIT_WORK(&pdata->stopdev_work, xgbe_stopdev);
INIT_WORK(&pdata->tx_tstamp_work, xgbe_tx_tstamp);
xgbe_init_timers(pdata);

View File

@ -190,6 +190,7 @@ struct xgbe_prv_data *xgbe_alloc_pdata(struct device *dev)
pdata->msg_enable = netif_msg_init(debug, default_msg_level);
set_bit(XGBE_DOWN, &pdata->dev_state);
set_bit(XGBE_STOPPED, &pdata->dev_state);
return pdata;
}
@ -263,6 +264,14 @@ int xgbe_config_netdev(struct xgbe_prv_data *pdata)
netdev->base_addr = (unsigned long)pdata->xgmac_regs;
memcpy(netdev->dev_addr, pdata->mac_addr, netdev->addr_len);
/* Initialize ECC timestamps */
pdata->tx_sec_period = jiffies;
pdata->tx_ded_period = jiffies;
pdata->rx_sec_period = jiffies;
pdata->rx_ded_period = jiffies;
pdata->desc_sec_period = jiffies;
pdata->desc_ded_period = jiffies;
/* Issue software reset to device */
pdata->hw_if.exit(pdata);
@ -384,6 +393,10 @@ int xgbe_config_netdev(struct xgbe_prv_data *pdata)
snprintf(pdata->an_name, sizeof(pdata->an_name) - 1, "%s-pcs",
netdev_name(netdev));
/* Create the ECC name based on netdev name */
snprintf(pdata->ecc_name, sizeof(pdata->ecc_name) - 1, "%s-ecc",
netdev_name(netdev));
/* Create workqueues */
pdata->dev_workqueue =
create_singlethread_workqueue(netdev_name(netdev));

View File

@ -482,6 +482,7 @@ static const struct xgbe_version_data xgbe_v2a = {
.tx_max_fifo_size = 229376,
.rx_max_fifo_size = 229376,
.tx_tstamp_workaround = 1,
.ecc_support = 1,
};
static const struct xgbe_version_data xgbe_v2b = {
@ -491,6 +492,7 @@ static const struct xgbe_version_data xgbe_v2b = {
.tx_max_fifo_size = 65536,
.rx_max_fifo_size = 65536,
.tx_tstamp_workaround = 1,
.ecc_support = 1,
};
static const struct pci_device_id xgbe_pci_table[] = {

View File

@ -285,6 +285,9 @@
#define XGBE_SGMII_AN_LINK_SPEED_1000 0x08
#define XGBE_SGMII_AN_LINK_DUPLEX BIT(4)
/* ECC correctable error notification window (seconds) */
#define XGBE_ECC_LIMIT 60
struct xgbe_prv_data;
struct xgbe_packet_data {
@ -461,6 +464,7 @@ enum xgbe_state {
XGBE_DOWN,
XGBE_LINK_INIT,
XGBE_LINK_ERR,
XGBE_STOPPED,
};
enum xgbe_int {
@ -480,6 +484,12 @@ enum xgbe_int_state {
XGMAC_INT_STATE_RESTORE,
};
enum xgbe_ecc_sec {
XGBE_ECC_SEC_TX,
XGBE_ECC_SEC_RX,
XGBE_ECC_SEC_DESC,
};
enum xgbe_speed {
XGBE_SPEED_1000 = 0,
XGBE_SPEED_2500,
@ -694,6 +704,10 @@ struct xgbe_hw_if {
int (*disable_rss)(struct xgbe_prv_data *);
int (*set_rss_hash_key)(struct xgbe_prv_data *, const u8 *);
int (*set_rss_lookup_table)(struct xgbe_prv_data *, const u32 *);
/* For ECC */
void (*disable_ecc_ded)(struct xgbe_prv_data *);
void (*disable_ecc_sec)(struct xgbe_prv_data *, enum xgbe_ecc_sec);
};
/* This structure represents implementation specific routines for an
@ -827,6 +841,7 @@ struct xgbe_version_data {
unsigned int tx_max_fifo_size;
unsigned int rx_max_fifo_size;
unsigned int tx_tstamp_workaround;
unsigned int ecc_support;
};
struct xgbe_prv_data {
@ -868,6 +883,21 @@ struct xgbe_prv_data {
/* Flags representing xgbe_state */
unsigned long dev_state;
/* ECC support */
unsigned long tx_sec_period;
unsigned long tx_ded_period;
unsigned long rx_sec_period;
unsigned long rx_ded_period;
unsigned long desc_sec_period;
unsigned long desc_ded_period;
unsigned int tx_sec_count;
unsigned int tx_ded_count;
unsigned int rx_sec_count;
unsigned int rx_ded_count;
unsigned int desc_ded_count;
unsigned int desc_sec_count;
struct msix_entry *msix_entries;
int dev_irq;
int ecc_irq;
@ -880,6 +910,8 @@ struct xgbe_prv_data {
unsigned int channel_irq_count;
unsigned int channel_irq_mode;
char ecc_name[IFNAMSIZ + 32];
struct xgbe_hw_if hw_if;
struct xgbe_phy_if phy_if;
struct xgbe_desc_if desc_if;
@ -990,8 +1022,9 @@ struct xgbe_prv_data {
/* Hardware features of the device */
struct xgbe_hw_features hw_feat;
/* Device restart work structure */
/* Device work structures */
struct work_struct restart_work;
struct work_struct stopdev_work;
/* Keeps track of power mode */
unsigned int power_down;