mirror of
https://github.com/AuxXxilium/linux_dsm_epyc7002.git
synced 2024-12-13 22:16:41 +07:00
* A substantial edac_mc cleanup, sanitizing object freeing, streamlining
and simplifying code flow, and getting rid of a lot of needless complexity in memory controller representation code, by Robert Richter. * A new EDAC driver for the ARM DMC-520 memory controller, by Lei Wang, Shiping Ji and others. * The usual sprinkling of misc cleanups and fixes all over the subsystem. -----BEGIN PGP SIGNATURE----- iQIzBAABCgAdFiEEzv7L6UO9uDPlPSfHEsHwGGHeVUoFAl6BqVEACgkQEsHwGGHe VUpD9w//cL/sIS+TVG7KqGf7xN08YCKUiq19xAhPPDAZAggoDWWoU/DDo12DT7I/ u+v/wwYqIloeXW/YV49yY65VWjFInxZzttUP7BF2hJl2f1sdF9d5MoMivLu4do2l F3t6XgPejvdZeFO01+Q0NdEs+SzQiCdfccs8InP4SadnbomaL+9Fsc7yxhAmZcfU FfzEy4unbMXIh+oowljv87/LT2bXLovQ5/d6qGJD3ycADw1ZT2pTaN+O5R5Rs+lI cI0nLr/for4NzXUdxYre5ANelU5xGlkjGb6gT4mwPxAdGexJ4tnFb4Pt9KopMh+O yWCc3vl/wMytD7VLOYzIiBjf7l9vsKt4gTj5+cgLNtFn4+vMZOkFgC4XaOiLlNJq PKnm6+L9m37ZhuQrPxGa92p1/5k/cuPiYpXMLocI9BLr7InMOSilNQql+o+pa6Fp 0zTV++nQp90Lv6Fh5ar3UIlnl2gQxtwRe/z7Csuj1X2kecTfnVyN4/ocY37mh7pz TBMKvmDuraXxQaBSjlvWD9O4jG2Sq6zhwgYTDRLQ7ikgGxy2473Gjuwug/SzPt6k Pwqrs2I9VqukR+9CSx+he6BuDMj13EZMrsHc60ql3JZz+E25ofi5LFYHOraF309X 7WCHZVbr3SYmN9wmbGfQsIK3xpRFSQjJYROrmu1/iiQZlyBMBDs= =aaCd -----END PGP SIGNATURE----- Merge tag 'edac_updates_for_5.7' of git://git.kernel.org/pub/scm/linux/kernel/git/ras/ras Pull EDAC updates from Borislav Petkov: - A substantial edac_mc cleanup, sanitizing object freeing, streamlining and simplifying code flow, and getting rid of a lot of needless complexity in memory controller representation code, by Robert Richter. - A new EDAC driver for the ARM DMC-520 memory controller, by Lei Wang, Shiping Ji and others. - The usual sprinkling of misc cleanups and fixes all over the subsystem. * tag 'edac_updates_for_5.7' of git://git.kernel.org/pub/scm/linux/kernel/git/ras/ras: EDAC/armada_xp: Use scnprintf() for avoiding potential buffer overflow EDAC/synopsys: Do not dump uninitialized pinf->col EDAC: Add EDAC driver for DMC520 dt-bindings: edac: Dmc-520.yaml EDAC/mce_amd: Print !SMCA processor warning only once EDAC/mc: Remove per layer counters EDAC/mc: Remove detail[] string and cleanup error string generation EDAC/mc: Pass the error descriptor to error reporting functions EDAC/mc: Remove enable_per_layer_report function argument EDAC/mc: Report "unknown memory" on too many DIMM labels found EDAC/mc: Carve out error increment into a separate function EDAC/mc: Determine mci pointer from the error descriptor EDAC: Store error type in struct edac_raw_error_desc EDAC/mc: Reorder functions edac_mc_alloc*() EDAC/mc: Split edac_mc_alloc() into smaller functions EDAC/mc: Change mci device removal to use put_device()
This commit is contained in:
commit
aaf985e21a
59
Documentation/devicetree/bindings/edac/dmc-520.yaml
Normal file
59
Documentation/devicetree/bindings/edac/dmc-520.yaml
Normal file
@ -0,0 +1,59 @@
|
||||
# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
|
||||
%YAML 1.2
|
||||
---
|
||||
$id: http://devicetree.org/schemas/edac/dmc-520.yaml#
|
||||
$schema: http://devicetree.org/meta-schemas/core.yaml#
|
||||
|
||||
title: ARM DMC-520 EDAC bindings
|
||||
|
||||
maintainers:
|
||||
- Lei Wang <lewan@microsoft.com>
|
||||
|
||||
description: |+
|
||||
DMC-520 node is defined to describe DRAM error detection and correction.
|
||||
|
||||
https://static.docs.arm.com/100000/0200/corelink_dmc520_trm_100000_0200_01_en.pdf
|
||||
|
||||
properties:
|
||||
compatible:
|
||||
items:
|
||||
- const: brcm,dmc-520
|
||||
- const: arm,dmc-520
|
||||
|
||||
reg:
|
||||
maxItems: 1
|
||||
|
||||
interrupts:
|
||||
minItems: 1
|
||||
maxItems: 10
|
||||
|
||||
interrupt-names:
|
||||
minItems: 1
|
||||
maxItems: 10
|
||||
items:
|
||||
enum:
|
||||
- ram_ecc_errc
|
||||
- ram_ecc_errd
|
||||
- dram_ecc_errc
|
||||
- dram_ecc_errd
|
||||
- failed_access
|
||||
- failed_prog
|
||||
- link_err
|
||||
- temperature_event
|
||||
- arch_fsm
|
||||
- phy_request
|
||||
|
||||
required:
|
||||
- compatible
|
||||
- reg
|
||||
- interrupts
|
||||
- interrupt-names
|
||||
|
||||
examples:
|
||||
- |
|
||||
dmc0: dmc@200000 {
|
||||
compatible = "brcm,dmc-520", "arm,dmc-520";
|
||||
reg = <0x200000 0x80000>;
|
||||
interrupts = <0x0 0x349 0x4>, <0x0 0x34B 0x4>;
|
||||
interrupt-names = "dram_ecc_errc", "dram_ecc_errd";
|
||||
};
|
@ -5998,6 +5998,12 @@ F: Documentation/driver-api/edac.rst
|
||||
F: drivers/edac/
|
||||
F: include/linux/edac.h
|
||||
|
||||
EDAC-DMC520
|
||||
M: Lei Wang <lewan@microsoft.com>
|
||||
L: linux-edac@vger.kernel.org
|
||||
S: Supported
|
||||
F: drivers/edac/dmc520_edac.c
|
||||
|
||||
EDAC-E752X
|
||||
M: Mark Gross <mark.gross@intel.com>
|
||||
L: linux-edac@vger.kernel.org
|
||||
|
@ -523,4 +523,11 @@ config EDAC_BLUEFIELD
|
||||
Support for error detection and correction on the
|
||||
Mellanox BlueField SoCs.
|
||||
|
||||
config EDAC_DMC520
|
||||
tristate "ARM DMC-520 ECC"
|
||||
depends on ARM64
|
||||
help
|
||||
Support for error detection and correction on the
|
||||
SoCs with ARM DMC-520 DRAM controller.
|
||||
|
||||
endif # EDAC
|
||||
|
@ -87,3 +87,4 @@ obj-$(CONFIG_EDAC_TI) += ti_edac.o
|
||||
obj-$(CONFIG_EDAC_QCOM) += qcom_edac.o
|
||||
obj-$(CONFIG_EDAC_ASPEED) += aspeed_edac.o
|
||||
obj-$(CONFIG_EDAC_BLUEFIELD) += bluefield_edac.o
|
||||
obj-$(CONFIG_EDAC_DMC520) += dmc520_edac.o
|
||||
|
@ -429,26 +429,26 @@ static void aurora_l2_check(struct edac_device_ctl_info *dci)
|
||||
|
||||
src = (attr_cap & AURORA_ERR_ATTR_SRC_MSK) >> AURORA_ERR_ATTR_SRC_OFF;
|
||||
if (src <= 3)
|
||||
len += snprintf(msg+len, size-len, "src=CPU%d ", src);
|
||||
len += scnprintf(msg+len, size-len, "src=CPU%d ", src);
|
||||
else
|
||||
len += snprintf(msg+len, size-len, "src=IO ");
|
||||
len += scnprintf(msg+len, size-len, "src=IO ");
|
||||
|
||||
txn = (attr_cap & AURORA_ERR_ATTR_TXN_MSK) >> AURORA_ERR_ATTR_TXN_OFF;
|
||||
switch (txn) {
|
||||
case 0:
|
||||
len += snprintf(msg+len, size-len, "txn=Data-Read ");
|
||||
len += scnprintf(msg+len, size-len, "txn=Data-Read ");
|
||||
break;
|
||||
case 1:
|
||||
len += snprintf(msg+len, size-len, "txn=Isn-Read ");
|
||||
len += scnprintf(msg+len, size-len, "txn=Isn-Read ");
|
||||
break;
|
||||
case 2:
|
||||
len += snprintf(msg+len, size-len, "txn=Clean-Flush ");
|
||||
len += scnprintf(msg+len, size-len, "txn=Clean-Flush ");
|
||||
break;
|
||||
case 3:
|
||||
len += snprintf(msg+len, size-len, "txn=Eviction ");
|
||||
len += scnprintf(msg+len, size-len, "txn=Eviction ");
|
||||
break;
|
||||
case 4:
|
||||
len += snprintf(msg+len, size-len,
|
||||
len += scnprintf(msg+len, size-len,
|
||||
"txn=Read-Modify-Write ");
|
||||
break;
|
||||
}
|
||||
@ -456,19 +456,19 @@ static void aurora_l2_check(struct edac_device_ctl_info *dci)
|
||||
err = (attr_cap & AURORA_ERR_ATTR_ERR_MSK) >> AURORA_ERR_ATTR_ERR_OFF;
|
||||
switch (err) {
|
||||
case 0:
|
||||
len += snprintf(msg+len, size-len, "err=CorrECC ");
|
||||
len += scnprintf(msg+len, size-len, "err=CorrECC ");
|
||||
break;
|
||||
case 1:
|
||||
len += snprintf(msg+len, size-len, "err=UnCorrECC ");
|
||||
len += scnprintf(msg+len, size-len, "err=UnCorrECC ");
|
||||
break;
|
||||
case 2:
|
||||
len += snprintf(msg+len, size-len, "err=TagParity ");
|
||||
len += scnprintf(msg+len, size-len, "err=TagParity ");
|
||||
break;
|
||||
}
|
||||
|
||||
len += snprintf(msg+len, size-len, "addr=0x%x ", addr_cap & AURORA_ERR_ADDR_CAP_ADDR_MASK);
|
||||
len += snprintf(msg+len, size-len, "index=0x%x ", (way_cap & AURORA_ERR_WAY_IDX_MSK) >> AURORA_ERR_WAY_IDX_OFF);
|
||||
len += snprintf(msg+len, size-len, "way=0x%x", (way_cap & AURORA_ERR_WAY_CAP_WAY_MASK) >> AURORA_ERR_WAY_CAP_WAY_OFFSET);
|
||||
len += scnprintf(msg+len, size-len, "addr=0x%x ", addr_cap & AURORA_ERR_ADDR_CAP_ADDR_MASK);
|
||||
len += scnprintf(msg+len, size-len, "index=0x%x ", (way_cap & AURORA_ERR_WAY_IDX_MSK) >> AURORA_ERR_WAY_IDX_OFF);
|
||||
len += scnprintf(msg+len, size-len, "way=0x%x", (way_cap & AURORA_ERR_WAY_CAP_WAY_MASK) >> AURORA_ERR_WAY_CAP_WAY_OFFSET);
|
||||
|
||||
/* clear error capture registers */
|
||||
writel(AURORA_ERR_ATTR_CAP_VALID, drvdata->base + AURORA_ERR_ATTR_CAP_REG);
|
||||
|
656
drivers/edac/dmc520_edac.c
Normal file
656
drivers/edac/dmc520_edac.c
Normal file
@ -0,0 +1,656 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
/*
|
||||
* EDAC driver for DMC-520 memory controller.
|
||||
*
|
||||
* The driver supports 10 interrupt lines,
|
||||
* though only dram_ecc_errc and dram_ecc_errd are currently handled.
|
||||
*
|
||||
* Authors: Rui Zhao <ruizhao@microsoft.com>
|
||||
* Lei Wang <lewan@microsoft.com>
|
||||
* Shiping Ji <shji@microsoft.com>
|
||||
*/
|
||||
|
||||
#include <linux/bitfield.h>
|
||||
#include <linux/edac.h>
|
||||
#include <linux/interrupt.h>
|
||||
#include <linux/io.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/of.h>
|
||||
#include <linux/platform_device.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/spinlock.h>
|
||||
#include "edac_mc.h"
|
||||
|
||||
/* DMC-520 registers */
|
||||
#define REG_OFFSET_FEATURE_CONFIG 0x130
|
||||
#define REG_OFFSET_ECC_ERRC_COUNT_31_00 0x158
|
||||
#define REG_OFFSET_ECC_ERRC_COUNT_63_32 0x15C
|
||||
#define REG_OFFSET_ECC_ERRD_COUNT_31_00 0x160
|
||||
#define REG_OFFSET_ECC_ERRD_COUNT_63_32 0x164
|
||||
#define REG_OFFSET_INTERRUPT_CONTROL 0x500
|
||||
#define REG_OFFSET_INTERRUPT_CLR 0x508
|
||||
#define REG_OFFSET_INTERRUPT_STATUS 0x510
|
||||
#define REG_OFFSET_DRAM_ECC_ERRC_INT_INFO_31_00 0x528
|
||||
#define REG_OFFSET_DRAM_ECC_ERRC_INT_INFO_63_32 0x52C
|
||||
#define REG_OFFSET_DRAM_ECC_ERRD_INT_INFO_31_00 0x530
|
||||
#define REG_OFFSET_DRAM_ECC_ERRD_INT_INFO_63_32 0x534
|
||||
#define REG_OFFSET_ADDRESS_CONTROL_NOW 0x1010
|
||||
#define REG_OFFSET_MEMORY_TYPE_NOW 0x1128
|
||||
#define REG_OFFSET_SCRUB_CONTROL0_NOW 0x1170
|
||||
#define REG_OFFSET_FORMAT_CONTROL 0x18
|
||||
|
||||
/* DMC-520 types, masks and bitfields */
|
||||
#define RAM_ECC_INT_CE_BIT BIT(0)
|
||||
#define RAM_ECC_INT_UE_BIT BIT(1)
|
||||
#define DRAM_ECC_INT_CE_BIT BIT(2)
|
||||
#define DRAM_ECC_INT_UE_BIT BIT(3)
|
||||
#define FAILED_ACCESS_INT_BIT BIT(4)
|
||||
#define FAILED_PROG_INT_BIT BIT(5)
|
||||
#define LINK_ERR_INT_BIT BIT(6)
|
||||
#define TEMPERATURE_EVENT_INT_BIT BIT(7)
|
||||
#define ARCH_FSM_INT_BIT BIT(8)
|
||||
#define PHY_REQUEST_INT_BIT BIT(9)
|
||||
#define MEMORY_WIDTH_MASK GENMASK(1, 0)
|
||||
#define SCRUB_TRIGGER0_NEXT_MASK GENMASK(1, 0)
|
||||
#define REG_FIELD_DRAM_ECC_ENABLED GENMASK(1, 0)
|
||||
#define REG_FIELD_MEMORY_TYPE GENMASK(2, 0)
|
||||
#define REG_FIELD_DEVICE_WIDTH GENMASK(9, 8)
|
||||
#define REG_FIELD_ADDRESS_CONTROL_COL GENMASK(2, 0)
|
||||
#define REG_FIELD_ADDRESS_CONTROL_ROW GENMASK(10, 8)
|
||||
#define REG_FIELD_ADDRESS_CONTROL_BANK GENMASK(18, 16)
|
||||
#define REG_FIELD_ADDRESS_CONTROL_RANK GENMASK(25, 24)
|
||||
#define REG_FIELD_ERR_INFO_LOW_VALID BIT(0)
|
||||
#define REG_FIELD_ERR_INFO_LOW_COL GENMASK(10, 1)
|
||||
#define REG_FIELD_ERR_INFO_LOW_ROW GENMASK(28, 11)
|
||||
#define REG_FIELD_ERR_INFO_LOW_RANK GENMASK(31, 29)
|
||||
#define REG_FIELD_ERR_INFO_HIGH_BANK GENMASK(3, 0)
|
||||
#define REG_FIELD_ERR_INFO_HIGH_VALID BIT(31)
|
||||
|
||||
#define DRAM_ADDRESS_CONTROL_MIN_COL_BITS 8
|
||||
#define DRAM_ADDRESS_CONTROL_MIN_ROW_BITS 11
|
||||
|
||||
#define DMC520_SCRUB_TRIGGER_ERR_DETECT 2
|
||||
#define DMC520_SCRUB_TRIGGER_IDLE 3
|
||||
|
||||
/* Driver settings */
|
||||
/*
|
||||
* The max-length message would be: "rank:7 bank:15 row:262143 col:1023".
|
||||
* Max length is 34. Using a 40-size buffer is enough.
|
||||
*/
|
||||
#define DMC520_MSG_BUF_SIZE 40
|
||||
#define EDAC_MOD_NAME "dmc520-edac"
|
||||
#define EDAC_CTL_NAME "dmc520"
|
||||
|
||||
/* the data bus width for the attached memory chips. */
|
||||
enum dmc520_mem_width {
|
||||
MEM_WIDTH_X32 = 2,
|
||||
MEM_WIDTH_X64 = 3
|
||||
};
|
||||
|
||||
/* memory type */
|
||||
enum dmc520_mem_type {
|
||||
MEM_TYPE_DDR3 = 1,
|
||||
MEM_TYPE_DDR4 = 2
|
||||
};
|
||||
|
||||
/* memory device width */
|
||||
enum dmc520_dev_width {
|
||||
DEV_WIDTH_X4 = 0,
|
||||
DEV_WIDTH_X8 = 1,
|
||||
DEV_WIDTH_X16 = 2
|
||||
};
|
||||
|
||||
struct ecc_error_info {
|
||||
u32 col;
|
||||
u32 row;
|
||||
u32 bank;
|
||||
u32 rank;
|
||||
};
|
||||
|
||||
/* The interrupt config */
|
||||
struct dmc520_irq_config {
|
||||
char *name;
|
||||
int mask;
|
||||
};
|
||||
|
||||
/* The interrupt mappings */
|
||||
static struct dmc520_irq_config dmc520_irq_configs[] = {
|
||||
{
|
||||
.name = "ram_ecc_errc",
|
||||
.mask = RAM_ECC_INT_CE_BIT
|
||||
},
|
||||
{
|
||||
.name = "ram_ecc_errd",
|
||||
.mask = RAM_ECC_INT_UE_BIT
|
||||
},
|
||||
{
|
||||
.name = "dram_ecc_errc",
|
||||
.mask = DRAM_ECC_INT_CE_BIT
|
||||
},
|
||||
{
|
||||
.name = "dram_ecc_errd",
|
||||
.mask = DRAM_ECC_INT_UE_BIT
|
||||
},
|
||||
{
|
||||
.name = "failed_access",
|
||||
.mask = FAILED_ACCESS_INT_BIT
|
||||
},
|
||||
{
|
||||
.name = "failed_prog",
|
||||
.mask = FAILED_PROG_INT_BIT
|
||||
},
|
||||
{
|
||||
.name = "link_err",
|
||||
.mask = LINK_ERR_INT_BIT
|
||||
},
|
||||
{
|
||||
.name = "temperature_event",
|
||||
.mask = TEMPERATURE_EVENT_INT_BIT
|
||||
},
|
||||
{
|
||||
.name = "arch_fsm",
|
||||
.mask = ARCH_FSM_INT_BIT
|
||||
},
|
||||
{
|
||||
.name = "phy_request",
|
||||
.mask = PHY_REQUEST_INT_BIT
|
||||
}
|
||||
};
|
||||
|
||||
#define NUMBER_OF_IRQS ARRAY_SIZE(dmc520_irq_configs)
|
||||
|
||||
/*
|
||||
* The EDAC driver private data.
|
||||
* error_lock is to protect concurrent writes to the mci->error_desc through
|
||||
* edac_mc_handle_error().
|
||||
*/
|
||||
struct dmc520_edac {
|
||||
void __iomem *reg_base;
|
||||
spinlock_t error_lock;
|
||||
u32 mem_width_in_bytes;
|
||||
int irqs[NUMBER_OF_IRQS];
|
||||
int masks[NUMBER_OF_IRQS];
|
||||
};
|
||||
|
||||
static int dmc520_mc_idx;
|
||||
|
||||
static u32 dmc520_read_reg(struct dmc520_edac *pvt, u32 offset)
|
||||
{
|
||||
return readl(pvt->reg_base + offset);
|
||||
}
|
||||
|
||||
static void dmc520_write_reg(struct dmc520_edac *pvt, u32 val, u32 offset)
|
||||
{
|
||||
writel(val, pvt->reg_base + offset);
|
||||
}
|
||||
|
||||
static u32 dmc520_calc_dram_ecc_error(u32 value)
|
||||
{
|
||||
u32 total = 0;
|
||||
|
||||
/* Each rank's error counter takes one byte. */
|
||||
while (value > 0) {
|
||||
total += (value & 0xFF);
|
||||
value >>= 8;
|
||||
}
|
||||
return total;
|
||||
}
|
||||
|
||||
static u32 dmc520_get_dram_ecc_error_count(struct dmc520_edac *pvt,
|
||||
bool is_ce)
|
||||
{
|
||||
u32 reg_offset_low, reg_offset_high;
|
||||
u32 err_low, err_high;
|
||||
u32 err_count;
|
||||
|
||||
reg_offset_low = is_ce ? REG_OFFSET_ECC_ERRC_COUNT_31_00 :
|
||||
REG_OFFSET_ECC_ERRD_COUNT_31_00;
|
||||
reg_offset_high = is_ce ? REG_OFFSET_ECC_ERRC_COUNT_63_32 :
|
||||
REG_OFFSET_ECC_ERRD_COUNT_63_32;
|
||||
|
||||
err_low = dmc520_read_reg(pvt, reg_offset_low);
|
||||
err_high = dmc520_read_reg(pvt, reg_offset_high);
|
||||
/* Reset error counters */
|
||||
dmc520_write_reg(pvt, 0, reg_offset_low);
|
||||
dmc520_write_reg(pvt, 0, reg_offset_high);
|
||||
|
||||
err_count = dmc520_calc_dram_ecc_error(err_low) +
|
||||
dmc520_calc_dram_ecc_error(err_high);
|
||||
|
||||
return err_count;
|
||||
}
|
||||
|
||||
static void dmc520_get_dram_ecc_error_info(struct dmc520_edac *pvt,
|
||||
bool is_ce,
|
||||
struct ecc_error_info *info)
|
||||
{
|
||||
u32 reg_offset_low, reg_offset_high;
|
||||
u32 reg_val_low, reg_val_high;
|
||||
bool valid;
|
||||
|
||||
reg_offset_low = is_ce ? REG_OFFSET_DRAM_ECC_ERRC_INT_INFO_31_00 :
|
||||
REG_OFFSET_DRAM_ECC_ERRD_INT_INFO_31_00;
|
||||
reg_offset_high = is_ce ? REG_OFFSET_DRAM_ECC_ERRC_INT_INFO_63_32 :
|
||||
REG_OFFSET_DRAM_ECC_ERRD_INT_INFO_63_32;
|
||||
|
||||
reg_val_low = dmc520_read_reg(pvt, reg_offset_low);
|
||||
reg_val_high = dmc520_read_reg(pvt, reg_offset_high);
|
||||
|
||||
valid = (FIELD_GET(REG_FIELD_ERR_INFO_LOW_VALID, reg_val_low) != 0) &&
|
||||
(FIELD_GET(REG_FIELD_ERR_INFO_HIGH_VALID, reg_val_high) != 0);
|
||||
|
||||
if (valid) {
|
||||
info->col = FIELD_GET(REG_FIELD_ERR_INFO_LOW_COL, reg_val_low);
|
||||
info->row = FIELD_GET(REG_FIELD_ERR_INFO_LOW_ROW, reg_val_low);
|
||||
info->rank = FIELD_GET(REG_FIELD_ERR_INFO_LOW_RANK, reg_val_low);
|
||||
info->bank = FIELD_GET(REG_FIELD_ERR_INFO_HIGH_BANK, reg_val_high);
|
||||
} else {
|
||||
memset(info, 0, sizeof(*info));
|
||||
}
|
||||
}
|
||||
|
||||
static bool dmc520_is_ecc_enabled(void __iomem *reg_base)
|
||||
{
|
||||
u32 reg_val = readl(reg_base + REG_OFFSET_FEATURE_CONFIG);
|
||||
|
||||
return FIELD_GET(REG_FIELD_DRAM_ECC_ENABLED, reg_val);
|
||||
}
|
||||
|
||||
static enum scrub_type dmc520_get_scrub_type(struct dmc520_edac *pvt)
|
||||
{
|
||||
enum scrub_type type = SCRUB_NONE;
|
||||
u32 reg_val, scrub_cfg;
|
||||
|
||||
reg_val = dmc520_read_reg(pvt, REG_OFFSET_SCRUB_CONTROL0_NOW);
|
||||
scrub_cfg = FIELD_GET(SCRUB_TRIGGER0_NEXT_MASK, reg_val);
|
||||
|
||||
if (scrub_cfg == DMC520_SCRUB_TRIGGER_ERR_DETECT ||
|
||||
scrub_cfg == DMC520_SCRUB_TRIGGER_IDLE)
|
||||
type = SCRUB_HW_PROG;
|
||||
|
||||
return type;
|
||||
}
|
||||
|
||||
/* Get the memory data bus width, in number of bytes. */
|
||||
static u32 dmc520_get_memory_width(struct dmc520_edac *pvt)
|
||||
{
|
||||
enum dmc520_mem_width mem_width_field;
|
||||
u32 mem_width_in_bytes = 0;
|
||||
u32 reg_val;
|
||||
|
||||
reg_val = dmc520_read_reg(pvt, REG_OFFSET_FORMAT_CONTROL);
|
||||
mem_width_field = FIELD_GET(MEMORY_WIDTH_MASK, reg_val);
|
||||
|
||||
if (mem_width_field == MEM_WIDTH_X32)
|
||||
mem_width_in_bytes = 4;
|
||||
else if (mem_width_field == MEM_WIDTH_X64)
|
||||
mem_width_in_bytes = 8;
|
||||
return mem_width_in_bytes;
|
||||
}
|
||||
|
||||
static enum mem_type dmc520_get_mtype(struct dmc520_edac *pvt)
|
||||
{
|
||||
enum mem_type mt = MEM_UNKNOWN;
|
||||
enum dmc520_mem_type type;
|
||||
u32 reg_val;
|
||||
|
||||
reg_val = dmc520_read_reg(pvt, REG_OFFSET_MEMORY_TYPE_NOW);
|
||||
type = FIELD_GET(REG_FIELD_MEMORY_TYPE, reg_val);
|
||||
|
||||
switch (type) {
|
||||
case MEM_TYPE_DDR3:
|
||||
mt = MEM_DDR3;
|
||||
break;
|
||||
|
||||
case MEM_TYPE_DDR4:
|
||||
mt = MEM_DDR4;
|
||||
break;
|
||||
}
|
||||
|
||||
return mt;
|
||||
}
|
||||
|
||||
static enum dev_type dmc520_get_dtype(struct dmc520_edac *pvt)
|
||||
{
|
||||
enum dmc520_dev_width device_width;
|
||||
enum dev_type dt = DEV_UNKNOWN;
|
||||
u32 reg_val;
|
||||
|
||||
reg_val = dmc520_read_reg(pvt, REG_OFFSET_MEMORY_TYPE_NOW);
|
||||
device_width = FIELD_GET(REG_FIELD_DEVICE_WIDTH, reg_val);
|
||||
|
||||
switch (device_width) {
|
||||
case DEV_WIDTH_X4:
|
||||
dt = DEV_X4;
|
||||
break;
|
||||
|
||||
case DEV_WIDTH_X8:
|
||||
dt = DEV_X8;
|
||||
break;
|
||||
|
||||
case DEV_WIDTH_X16:
|
||||
dt = DEV_X16;
|
||||
break;
|
||||
}
|
||||
|
||||
return dt;
|
||||
}
|
||||
|
||||
static u32 dmc520_get_rank_count(void __iomem *reg_base)
|
||||
{
|
||||
u32 reg_val, rank_bits;
|
||||
|
||||
reg_val = readl(reg_base + REG_OFFSET_ADDRESS_CONTROL_NOW);
|
||||
rank_bits = FIELD_GET(REG_FIELD_ADDRESS_CONTROL_RANK, reg_val);
|
||||
|
||||
return BIT(rank_bits);
|
||||
}
|
||||
|
||||
static u64 dmc520_get_rank_size(struct dmc520_edac *pvt)
|
||||
{
|
||||
u32 reg_val, col_bits, row_bits, bank_bits;
|
||||
|
||||
reg_val = dmc520_read_reg(pvt, REG_OFFSET_ADDRESS_CONTROL_NOW);
|
||||
|
||||
col_bits = FIELD_GET(REG_FIELD_ADDRESS_CONTROL_COL, reg_val) +
|
||||
DRAM_ADDRESS_CONTROL_MIN_COL_BITS;
|
||||
row_bits = FIELD_GET(REG_FIELD_ADDRESS_CONTROL_ROW, reg_val) +
|
||||
DRAM_ADDRESS_CONTROL_MIN_ROW_BITS;
|
||||
bank_bits = FIELD_GET(REG_FIELD_ADDRESS_CONTROL_BANK, reg_val);
|
||||
|
||||
return (u64)pvt->mem_width_in_bytes << (col_bits + row_bits + bank_bits);
|
||||
}
|
||||
|
||||
static void dmc520_handle_dram_ecc_errors(struct mem_ctl_info *mci,
|
||||
bool is_ce)
|
||||
{
|
||||
struct dmc520_edac *pvt = mci->pvt_info;
|
||||
char message[DMC520_MSG_BUF_SIZE];
|
||||
struct ecc_error_info info;
|
||||
u32 cnt;
|
||||
|
||||
dmc520_get_dram_ecc_error_info(pvt, is_ce, &info);
|
||||
|
||||
cnt = dmc520_get_dram_ecc_error_count(pvt, is_ce);
|
||||
if (!cnt)
|
||||
return;
|
||||
|
||||
snprintf(message, ARRAY_SIZE(message),
|
||||
"rank:%d bank:%d row:%d col:%d",
|
||||
info.rank, info.bank,
|
||||
info.row, info.col);
|
||||
|
||||
spin_lock(&pvt->error_lock);
|
||||
edac_mc_handle_error((is_ce ? HW_EVENT_ERR_CORRECTED :
|
||||
HW_EVENT_ERR_UNCORRECTED),
|
||||
mci, cnt, 0, 0, 0, info.rank, -1, -1,
|
||||
message, "");
|
||||
spin_unlock(&pvt->error_lock);
|
||||
}
|
||||
|
||||
static irqreturn_t dmc520_edac_dram_ecc_isr(int irq, struct mem_ctl_info *mci,
|
||||
bool is_ce)
|
||||
{
|
||||
struct dmc520_edac *pvt = mci->pvt_info;
|
||||
u32 i_mask;
|
||||
|
||||
i_mask = is_ce ? DRAM_ECC_INT_CE_BIT : DRAM_ECC_INT_UE_BIT;
|
||||
|
||||
dmc520_handle_dram_ecc_errors(mci, is_ce);
|
||||
|
||||
dmc520_write_reg(pvt, i_mask, REG_OFFSET_INTERRUPT_CLR);
|
||||
|
||||
return IRQ_HANDLED;
|
||||
}
|
||||
|
||||
static irqreturn_t dmc520_edac_dram_all_isr(int irq, struct mem_ctl_info *mci,
|
||||
u32 irq_mask)
|
||||
{
|
||||
struct dmc520_edac *pvt = mci->pvt_info;
|
||||
irqreturn_t irq_ret = IRQ_NONE;
|
||||
u32 status;
|
||||
|
||||
status = dmc520_read_reg(pvt, REG_OFFSET_INTERRUPT_STATUS);
|
||||
|
||||
if ((irq_mask & DRAM_ECC_INT_CE_BIT) &&
|
||||
(status & DRAM_ECC_INT_CE_BIT))
|
||||
irq_ret = dmc520_edac_dram_ecc_isr(irq, mci, true);
|
||||
|
||||
if ((irq_mask & DRAM_ECC_INT_UE_BIT) &&
|
||||
(status & DRAM_ECC_INT_UE_BIT))
|
||||
irq_ret = dmc520_edac_dram_ecc_isr(irq, mci, false);
|
||||
|
||||
return irq_ret;
|
||||
}
|
||||
|
||||
static irqreturn_t dmc520_isr(int irq, void *data)
|
||||
{
|
||||
struct mem_ctl_info *mci = data;
|
||||
struct dmc520_edac *pvt = mci->pvt_info;
|
||||
u32 mask = 0;
|
||||
int idx;
|
||||
|
||||
for (idx = 0; idx < NUMBER_OF_IRQS; idx++) {
|
||||
if (pvt->irqs[idx] == irq) {
|
||||
mask = pvt->masks[idx];
|
||||
break;
|
||||
}
|
||||
}
|
||||
return dmc520_edac_dram_all_isr(irq, mci, mask);
|
||||
}
|
||||
|
||||
static void dmc520_init_csrow(struct mem_ctl_info *mci)
|
||||
{
|
||||
struct dmc520_edac *pvt = mci->pvt_info;
|
||||
struct csrow_info *csi;
|
||||
struct dimm_info *dimm;
|
||||
u32 pages_per_rank;
|
||||
enum dev_type dt;
|
||||
enum mem_type mt;
|
||||
int row, ch;
|
||||
u64 rs;
|
||||
|
||||
dt = dmc520_get_dtype(pvt);
|
||||
mt = dmc520_get_mtype(pvt);
|
||||
rs = dmc520_get_rank_size(pvt);
|
||||
pages_per_rank = rs >> PAGE_SHIFT;
|
||||
|
||||
for (row = 0; row < mci->nr_csrows; row++) {
|
||||
csi = mci->csrows[row];
|
||||
|
||||
for (ch = 0; ch < csi->nr_channels; ch++) {
|
||||
dimm = csi->channels[ch]->dimm;
|
||||
dimm->grain = pvt->mem_width_in_bytes;
|
||||
dimm->dtype = dt;
|
||||
dimm->mtype = mt;
|
||||
dimm->edac_mode = EDAC_FLAG_SECDED;
|
||||
dimm->nr_pages = pages_per_rank / csi->nr_channels;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static int dmc520_edac_probe(struct platform_device *pdev)
|
||||
{
|
||||
bool registered[NUMBER_OF_IRQS] = { false };
|
||||
int irqs[NUMBER_OF_IRQS] = { -ENXIO };
|
||||
int masks[NUMBER_OF_IRQS] = { 0 };
|
||||
struct edac_mc_layer layers[1];
|
||||
struct dmc520_edac *pvt = NULL;
|
||||
struct mem_ctl_info *mci;
|
||||
void __iomem *reg_base;
|
||||
u32 irq_mask_all = 0;
|
||||
struct resource *res;
|
||||
struct device *dev;
|
||||
int ret, idx, irq;
|
||||
u32 reg_val;
|
||||
|
||||
/* Parse the device node */
|
||||
dev = &pdev->dev;
|
||||
|
||||
for (idx = 0; idx < NUMBER_OF_IRQS; idx++) {
|
||||
irq = platform_get_irq_byname(pdev, dmc520_irq_configs[idx].name);
|
||||
irqs[idx] = irq;
|
||||
masks[idx] = dmc520_irq_configs[idx].mask;
|
||||
if (irq >= 0) {
|
||||
irq_mask_all |= dmc520_irq_configs[idx].mask;
|
||||
edac_dbg(0, "Discovered %s, irq: %d.\n", dmc520_irq_configs[idx].name, irq);
|
||||
}
|
||||
}
|
||||
|
||||
if (!irq_mask_all) {
|
||||
edac_printk(KERN_ERR, EDAC_MOD_NAME,
|
||||
"At least one valid interrupt line is expected.\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
/* Initialize dmc520 edac */
|
||||
res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
|
||||
reg_base = devm_ioremap_resource(dev, res);
|
||||
if (IS_ERR(reg_base))
|
||||
return PTR_ERR(reg_base);
|
||||
|
||||
if (!dmc520_is_ecc_enabled(reg_base))
|
||||
return -ENXIO;
|
||||
|
||||
layers[0].type = EDAC_MC_LAYER_CHIP_SELECT;
|
||||
layers[0].size = dmc520_get_rank_count(reg_base);
|
||||
layers[0].is_virt_csrow = true;
|
||||
|
||||
mci = edac_mc_alloc(dmc520_mc_idx++, ARRAY_SIZE(layers), layers, sizeof(*pvt));
|
||||
if (!mci) {
|
||||
edac_printk(KERN_ERR, EDAC_MOD_NAME,
|
||||
"Failed to allocate memory for mc instance\n");
|
||||
ret = -ENOMEM;
|
||||
goto err;
|
||||
}
|
||||
|
||||
pvt = mci->pvt_info;
|
||||
|
||||
pvt->reg_base = reg_base;
|
||||
spin_lock_init(&pvt->error_lock);
|
||||
memcpy(pvt->irqs, irqs, sizeof(irqs));
|
||||
memcpy(pvt->masks, masks, sizeof(masks));
|
||||
|
||||
platform_set_drvdata(pdev, mci);
|
||||
|
||||
mci->pdev = dev;
|
||||
mci->mtype_cap = MEM_FLAG_DDR3 | MEM_FLAG_DDR4;
|
||||
mci->edac_ctl_cap = EDAC_FLAG_NONE | EDAC_FLAG_SECDED;
|
||||
mci->edac_cap = EDAC_FLAG_SECDED;
|
||||
mci->scrub_cap = SCRUB_FLAG_HW_SRC;
|
||||
mci->scrub_mode = dmc520_get_scrub_type(pvt);
|
||||
mci->ctl_name = EDAC_CTL_NAME;
|
||||
mci->dev_name = dev_name(mci->pdev);
|
||||
mci->mod_name = EDAC_MOD_NAME;
|
||||
|
||||
edac_op_state = EDAC_OPSTATE_INT;
|
||||
|
||||
pvt->mem_width_in_bytes = dmc520_get_memory_width(pvt);
|
||||
|
||||
dmc520_init_csrow(mci);
|
||||
|
||||
/* Clear interrupts, not affecting other unrelated interrupts */
|
||||
reg_val = dmc520_read_reg(pvt, REG_OFFSET_INTERRUPT_CONTROL);
|
||||
dmc520_write_reg(pvt, reg_val & (~irq_mask_all),
|
||||
REG_OFFSET_INTERRUPT_CONTROL);
|
||||
dmc520_write_reg(pvt, irq_mask_all, REG_OFFSET_INTERRUPT_CLR);
|
||||
|
||||
for (idx = 0; idx < NUMBER_OF_IRQS; idx++) {
|
||||
irq = irqs[idx];
|
||||
if (irq >= 0) {
|
||||
ret = devm_request_irq(&pdev->dev, irq,
|
||||
dmc520_isr, IRQF_SHARED,
|
||||
dev_name(&pdev->dev), mci);
|
||||
if (ret < 0) {
|
||||
edac_printk(KERN_ERR, EDAC_MC,
|
||||
"Failed to request irq %d\n", irq);
|
||||
goto err;
|
||||
}
|
||||
registered[idx] = true;
|
||||
}
|
||||
}
|
||||
|
||||
/* Reset DRAM CE/UE counters */
|
||||
if (irq_mask_all & DRAM_ECC_INT_CE_BIT)
|
||||
dmc520_get_dram_ecc_error_count(pvt, true);
|
||||
|
||||
if (irq_mask_all & DRAM_ECC_INT_UE_BIT)
|
||||
dmc520_get_dram_ecc_error_count(pvt, false);
|
||||
|
||||
ret = edac_mc_add_mc(mci);
|
||||
if (ret) {
|
||||
edac_printk(KERN_ERR, EDAC_MOD_NAME,
|
||||
"Failed to register with EDAC core\n");
|
||||
goto err;
|
||||
}
|
||||
|
||||
/* Enable interrupts, not affecting other unrelated interrupts */
|
||||
dmc520_write_reg(pvt, reg_val | irq_mask_all,
|
||||
REG_OFFSET_INTERRUPT_CONTROL);
|
||||
|
||||
return 0;
|
||||
|
||||
err:
|
||||
for (idx = 0; idx < NUMBER_OF_IRQS; idx++) {
|
||||
if (registered[idx])
|
||||
devm_free_irq(&pdev->dev, pvt->irqs[idx], mci);
|
||||
}
|
||||
if (mci)
|
||||
edac_mc_free(mci);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int dmc520_edac_remove(struct platform_device *pdev)
|
||||
{
|
||||
u32 reg_val, idx, irq_mask_all = 0;
|
||||
struct mem_ctl_info *mci;
|
||||
struct dmc520_edac *pvt;
|
||||
|
||||
mci = platform_get_drvdata(pdev);
|
||||
pvt = mci->pvt_info;
|
||||
|
||||
/* Disable interrupts */
|
||||
reg_val = dmc520_read_reg(pvt, REG_OFFSET_INTERRUPT_CONTROL);
|
||||
dmc520_write_reg(pvt, reg_val & (~irq_mask_all),
|
||||
REG_OFFSET_INTERRUPT_CONTROL);
|
||||
|
||||
/* free irq's */
|
||||
for (idx = 0; idx < NUMBER_OF_IRQS; idx++) {
|
||||
if (pvt->irqs[idx] >= 0) {
|
||||
irq_mask_all |= pvt->masks[idx];
|
||||
devm_free_irq(&pdev->dev, pvt->irqs[idx], mci);
|
||||
}
|
||||
}
|
||||
|
||||
edac_mc_del_mc(&pdev->dev);
|
||||
edac_mc_free(mci);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static const struct of_device_id dmc520_edac_driver_id[] = {
|
||||
{ .compatible = "arm,dmc-520", },
|
||||
{ /* end of table */ }
|
||||
};
|
||||
|
||||
MODULE_DEVICE_TABLE(of, dmc520_edac_driver_id);
|
||||
|
||||
static struct platform_driver dmc520_edac_driver = {
|
||||
.driver = {
|
||||
.name = "dmc520",
|
||||
.of_match_table = dmc520_edac_driver_id,
|
||||
},
|
||||
|
||||
.probe = dmc520_edac_probe,
|
||||
.remove = dmc520_edac_remove
|
||||
};
|
||||
|
||||
module_platform_driver(dmc520_edac_driver);
|
||||
|
||||
MODULE_AUTHOR("Rui Zhao <ruizhao@microsoft.com>");
|
||||
MODULE_AUTHOR("Lei Wang <lewan@microsoft.com>");
|
||||
MODULE_AUTHOR("Shiping Ji <shji@microsoft.com>");
|
||||
MODULE_DESCRIPTION("DMC-520 ECC driver");
|
||||
MODULE_LICENSE("GPL v2");
|
@ -55,6 +55,11 @@ static LIST_HEAD(mc_devices);
|
||||
*/
|
||||
static const char *edac_mc_owner;
|
||||
|
||||
static struct mem_ctl_info *error_desc_to_mci(struct edac_raw_error_desc *e)
|
||||
{
|
||||
return container_of(e, struct mem_ctl_info, error_desc);
|
||||
}
|
||||
|
||||
int edac_get_report_status(void)
|
||||
{
|
||||
return edac_report;
|
||||
@ -278,6 +283,12 @@ void *edac_align_ptr(void **p, unsigned int size, int n_elems)
|
||||
|
||||
static void _edac_mc_free(struct mem_ctl_info *mci)
|
||||
{
|
||||
put_device(&mci->dev);
|
||||
}
|
||||
|
||||
static void mci_release(struct device *dev)
|
||||
{
|
||||
struct mem_ctl_info *mci = container_of(dev, struct mem_ctl_info, dev);
|
||||
struct csrow_info *csr;
|
||||
int i, chn, row;
|
||||
|
||||
@ -305,6 +316,134 @@ static void _edac_mc_free(struct mem_ctl_info *mci)
|
||||
kfree(mci);
|
||||
}
|
||||
|
||||
static int edac_mc_alloc_csrows(struct mem_ctl_info *mci)
|
||||
{
|
||||
unsigned int tot_channels = mci->num_cschannel;
|
||||
unsigned int tot_csrows = mci->nr_csrows;
|
||||
unsigned int row, chn;
|
||||
|
||||
/*
|
||||
* Alocate and fill the csrow/channels structs
|
||||
*/
|
||||
mci->csrows = kcalloc(tot_csrows, sizeof(*mci->csrows), GFP_KERNEL);
|
||||
if (!mci->csrows)
|
||||
return -ENOMEM;
|
||||
|
||||
for (row = 0; row < tot_csrows; row++) {
|
||||
struct csrow_info *csr;
|
||||
|
||||
csr = kzalloc(sizeof(**mci->csrows), GFP_KERNEL);
|
||||
if (!csr)
|
||||
return -ENOMEM;
|
||||
|
||||
mci->csrows[row] = csr;
|
||||
csr->csrow_idx = row;
|
||||
csr->mci = mci;
|
||||
csr->nr_channels = tot_channels;
|
||||
csr->channels = kcalloc(tot_channels, sizeof(*csr->channels),
|
||||
GFP_KERNEL);
|
||||
if (!csr->channels)
|
||||
return -ENOMEM;
|
||||
|
||||
for (chn = 0; chn < tot_channels; chn++) {
|
||||
struct rank_info *chan;
|
||||
|
||||
chan = kzalloc(sizeof(**csr->channels), GFP_KERNEL);
|
||||
if (!chan)
|
||||
return -ENOMEM;
|
||||
|
||||
csr->channels[chn] = chan;
|
||||
chan->chan_idx = chn;
|
||||
chan->csrow = csr;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int edac_mc_alloc_dimms(struct mem_ctl_info *mci)
|
||||
{
|
||||
unsigned int pos[EDAC_MAX_LAYERS];
|
||||
unsigned int row, chn, idx;
|
||||
int layer;
|
||||
void *p;
|
||||
|
||||
/*
|
||||
* Allocate and fill the dimm structs
|
||||
*/
|
||||
mci->dimms = kcalloc(mci->tot_dimms, sizeof(*mci->dimms), GFP_KERNEL);
|
||||
if (!mci->dimms)
|
||||
return -ENOMEM;
|
||||
|
||||
memset(&pos, 0, sizeof(pos));
|
||||
row = 0;
|
||||
chn = 0;
|
||||
for (idx = 0; idx < mci->tot_dimms; idx++) {
|
||||
struct dimm_info *dimm;
|
||||
struct rank_info *chan;
|
||||
int n, len;
|
||||
|
||||
chan = mci->csrows[row]->channels[chn];
|
||||
|
||||
dimm = kzalloc(sizeof(**mci->dimms), GFP_KERNEL);
|
||||
if (!dimm)
|
||||
return -ENOMEM;
|
||||
mci->dimms[idx] = dimm;
|
||||
dimm->mci = mci;
|
||||
dimm->idx = idx;
|
||||
|
||||
/*
|
||||
* Copy DIMM location and initialize it.
|
||||
*/
|
||||
len = sizeof(dimm->label);
|
||||
p = dimm->label;
|
||||
n = snprintf(p, len, "mc#%u", mci->mc_idx);
|
||||
p += n;
|
||||
len -= n;
|
||||
for (layer = 0; layer < mci->n_layers; layer++) {
|
||||
n = snprintf(p, len, "%s#%u",
|
||||
edac_layer_name[mci->layers[layer].type],
|
||||
pos[layer]);
|
||||
p += n;
|
||||
len -= n;
|
||||
dimm->location[layer] = pos[layer];
|
||||
|
||||
if (len <= 0)
|
||||
break;
|
||||
}
|
||||
|
||||
/* Link it to the csrows old API data */
|
||||
chan->dimm = dimm;
|
||||
dimm->csrow = row;
|
||||
dimm->cschannel = chn;
|
||||
|
||||
/* Increment csrow location */
|
||||
if (mci->layers[0].is_virt_csrow) {
|
||||
chn++;
|
||||
if (chn == mci->num_cschannel) {
|
||||
chn = 0;
|
||||
row++;
|
||||
}
|
||||
} else {
|
||||
row++;
|
||||
if (row == mci->nr_csrows) {
|
||||
row = 0;
|
||||
chn++;
|
||||
}
|
||||
}
|
||||
|
||||
/* Increment dimm location */
|
||||
for (layer = mci->n_layers - 1; layer >= 0; layer--) {
|
||||
pos[layer]++;
|
||||
if (pos[layer] < mci->layers[layer].size)
|
||||
break;
|
||||
pos[layer] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
struct mem_ctl_info *edac_mc_alloc(unsigned int mc_num,
|
||||
unsigned int n_layers,
|
||||
struct edac_mc_layer *layers,
|
||||
@ -312,15 +451,9 @@ struct mem_ctl_info *edac_mc_alloc(unsigned int mc_num,
|
||||
{
|
||||
struct mem_ctl_info *mci;
|
||||
struct edac_mc_layer *layer;
|
||||
struct csrow_info *csr;
|
||||
struct rank_info *chan;
|
||||
struct dimm_info *dimm;
|
||||
u32 *ce_per_layer[EDAC_MAX_LAYERS], *ue_per_layer[EDAC_MAX_LAYERS];
|
||||
unsigned int pos[EDAC_MAX_LAYERS];
|
||||
unsigned int idx, size, tot_dimms = 1, count = 1;
|
||||
unsigned int tot_csrows = 1, tot_channels = 1, tot_errcount = 0;
|
||||
void *pvt, *p, *ptr = NULL;
|
||||
int i, j, row, chn, n, len;
|
||||
unsigned int idx, size, tot_dimms = 1;
|
||||
unsigned int tot_csrows = 1, tot_channels = 1;
|
||||
void *pvt, *ptr = NULL;
|
||||
bool per_rank = false;
|
||||
|
||||
if (WARN_ON(n_layers > EDAC_MAX_LAYERS || n_layers == 0))
|
||||
@ -347,19 +480,10 @@ struct mem_ctl_info *edac_mc_alloc(unsigned int mc_num,
|
||||
* stringent as what the compiler would provide if we could simply
|
||||
* hardcode everything into a single struct.
|
||||
*/
|
||||
mci = edac_align_ptr(&ptr, sizeof(*mci), 1);
|
||||
layer = edac_align_ptr(&ptr, sizeof(*layer), n_layers);
|
||||
for (i = 0; i < n_layers; i++) {
|
||||
count *= layers[i].size;
|
||||
edac_dbg(4, "errcount layer %d size %d\n", i, count);
|
||||
ce_per_layer[i] = edac_align_ptr(&ptr, sizeof(u32), count);
|
||||
ue_per_layer[i] = edac_align_ptr(&ptr, sizeof(u32), count);
|
||||
tot_errcount += 2 * count;
|
||||
}
|
||||
|
||||
edac_dbg(4, "allocating %d error counters\n", tot_errcount);
|
||||
pvt = edac_align_ptr(&ptr, sz_pvt, 1);
|
||||
size = ((unsigned long)pvt) + sz_pvt;
|
||||
mci = edac_align_ptr(&ptr, sizeof(*mci), 1);
|
||||
layer = edac_align_ptr(&ptr, sizeof(*layer), n_layers);
|
||||
pvt = edac_align_ptr(&ptr, sz_pvt, 1);
|
||||
size = ((unsigned long)pvt) + sz_pvt;
|
||||
|
||||
edac_dbg(1, "allocating %u bytes for mci data (%d %s, %d csrows/channels)\n",
|
||||
size,
|
||||
@ -371,14 +495,13 @@ struct mem_ctl_info *edac_mc_alloc(unsigned int mc_num,
|
||||
if (mci == NULL)
|
||||
return NULL;
|
||||
|
||||
mci->dev.release = mci_release;
|
||||
device_initialize(&mci->dev);
|
||||
|
||||
/* Adjust pointers so they point within the memory we just allocated
|
||||
* rather than an imaginary chunk of memory located at address 0.
|
||||
*/
|
||||
layer = (struct edac_mc_layer *)(((char *)mci) + ((unsigned long)layer));
|
||||
for (i = 0; i < n_layers; i++) {
|
||||
mci->ce_per_layer[i] = (u32 *)((char *)mci + ((unsigned long)ce_per_layer[i]));
|
||||
mci->ue_per_layer[i] = (u32 *)((char *)mci + ((unsigned long)ue_per_layer[i]));
|
||||
}
|
||||
pvt = sz_pvt ? (((char *)mci) + ((unsigned long)pvt)) : NULL;
|
||||
|
||||
/* setup index and various internal pointers */
|
||||
@ -392,103 +515,11 @@ struct mem_ctl_info *edac_mc_alloc(unsigned int mc_num,
|
||||
mci->num_cschannel = tot_channels;
|
||||
mci->csbased = per_rank;
|
||||
|
||||
/*
|
||||
* Alocate and fill the csrow/channels structs
|
||||
*/
|
||||
mci->csrows = kcalloc(tot_csrows, sizeof(*mci->csrows), GFP_KERNEL);
|
||||
if (!mci->csrows)
|
||||
goto error;
|
||||
for (row = 0; row < tot_csrows; row++) {
|
||||
csr = kzalloc(sizeof(**mci->csrows), GFP_KERNEL);
|
||||
if (!csr)
|
||||
goto error;
|
||||
mci->csrows[row] = csr;
|
||||
csr->csrow_idx = row;
|
||||
csr->mci = mci;
|
||||
csr->nr_channels = tot_channels;
|
||||
csr->channels = kcalloc(tot_channels, sizeof(*csr->channels),
|
||||
GFP_KERNEL);
|
||||
if (!csr->channels)
|
||||
goto error;
|
||||
|
||||
for (chn = 0; chn < tot_channels; chn++) {
|
||||
chan = kzalloc(sizeof(**csr->channels), GFP_KERNEL);
|
||||
if (!chan)
|
||||
goto error;
|
||||
csr->channels[chn] = chan;
|
||||
chan->chan_idx = chn;
|
||||
chan->csrow = csr;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Allocate and fill the dimm structs
|
||||
*/
|
||||
mci->dimms = kcalloc(tot_dimms, sizeof(*mci->dimms), GFP_KERNEL);
|
||||
if (!mci->dimms)
|
||||
if (edac_mc_alloc_csrows(mci))
|
||||
goto error;
|
||||
|
||||
memset(&pos, 0, sizeof(pos));
|
||||
row = 0;
|
||||
chn = 0;
|
||||
for (idx = 0; idx < tot_dimms; idx++) {
|
||||
chan = mci->csrows[row]->channels[chn];
|
||||
|
||||
dimm = kzalloc(sizeof(**mci->dimms), GFP_KERNEL);
|
||||
if (!dimm)
|
||||
goto error;
|
||||
mci->dimms[idx] = dimm;
|
||||
dimm->mci = mci;
|
||||
dimm->idx = idx;
|
||||
|
||||
/*
|
||||
* Copy DIMM location and initialize it.
|
||||
*/
|
||||
len = sizeof(dimm->label);
|
||||
p = dimm->label;
|
||||
n = snprintf(p, len, "mc#%u", mc_num);
|
||||
p += n;
|
||||
len -= n;
|
||||
for (j = 0; j < n_layers; j++) {
|
||||
n = snprintf(p, len, "%s#%u",
|
||||
edac_layer_name[layers[j].type],
|
||||
pos[j]);
|
||||
p += n;
|
||||
len -= n;
|
||||
dimm->location[j] = pos[j];
|
||||
|
||||
if (len <= 0)
|
||||
break;
|
||||
}
|
||||
|
||||
/* Link it to the csrows old API data */
|
||||
chan->dimm = dimm;
|
||||
dimm->csrow = row;
|
||||
dimm->cschannel = chn;
|
||||
|
||||
/* Increment csrow location */
|
||||
if (layers[0].is_virt_csrow) {
|
||||
chn++;
|
||||
if (chn == tot_channels) {
|
||||
chn = 0;
|
||||
row++;
|
||||
}
|
||||
} else {
|
||||
row++;
|
||||
if (row == tot_csrows) {
|
||||
row = 0;
|
||||
chn++;
|
||||
}
|
||||
}
|
||||
|
||||
/* Increment dimm location */
|
||||
for (j = n_layers - 1; j >= 0; j--) {
|
||||
pos[j]++;
|
||||
if (pos[j] < layers[j].size)
|
||||
break;
|
||||
pos[j] = 0;
|
||||
}
|
||||
}
|
||||
if (edac_mc_alloc_dimms(mci))
|
||||
goto error;
|
||||
|
||||
mci->op_state = OP_ALLOC;
|
||||
|
||||
@ -505,9 +536,6 @@ void edac_mc_free(struct mem_ctl_info *mci)
|
||||
{
|
||||
edac_dbg(1, "\n");
|
||||
|
||||
if (device_is_registered(&mci->dev))
|
||||
edac_unregister_sysfs(mci);
|
||||
|
||||
_edac_mc_free(mci);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(edac_mc_free);
|
||||
@ -902,88 +930,51 @@ const char *edac_layer_name[] = {
|
||||
};
|
||||
EXPORT_SYMBOL_GPL(edac_layer_name);
|
||||
|
||||
static void edac_inc_ce_error(struct mem_ctl_info *mci,
|
||||
bool enable_per_layer_report,
|
||||
const int pos[EDAC_MAX_LAYERS],
|
||||
const u16 count)
|
||||
static void edac_inc_ce_error(struct edac_raw_error_desc *e)
|
||||
{
|
||||
int i, index = 0;
|
||||
int pos[EDAC_MAX_LAYERS] = { e->top_layer, e->mid_layer, e->low_layer };
|
||||
struct mem_ctl_info *mci = error_desc_to_mci(e);
|
||||
struct dimm_info *dimm = edac_get_dimm(mci, pos[0], pos[1], pos[2]);
|
||||
|
||||
mci->ce_mc += count;
|
||||
mci->ce_mc += e->error_count;
|
||||
|
||||
if (!enable_per_layer_report) {
|
||||
mci->ce_noinfo_count += count;
|
||||
return;
|
||||
}
|
||||
|
||||
for (i = 0; i < mci->n_layers; i++) {
|
||||
if (pos[i] < 0)
|
||||
break;
|
||||
index += pos[i];
|
||||
mci->ce_per_layer[i][index] += count;
|
||||
|
||||
if (i < mci->n_layers - 1)
|
||||
index *= mci->layers[i + 1].size;
|
||||
}
|
||||
if (dimm)
|
||||
dimm->ce_count += e->error_count;
|
||||
else
|
||||
mci->ce_noinfo_count += e->error_count;
|
||||
}
|
||||
|
||||
static void edac_inc_ue_error(struct mem_ctl_info *mci,
|
||||
bool enable_per_layer_report,
|
||||
const int pos[EDAC_MAX_LAYERS],
|
||||
const u16 count)
|
||||
static void edac_inc_ue_error(struct edac_raw_error_desc *e)
|
||||
{
|
||||
int i, index = 0;
|
||||
int pos[EDAC_MAX_LAYERS] = { e->top_layer, e->mid_layer, e->low_layer };
|
||||
struct mem_ctl_info *mci = error_desc_to_mci(e);
|
||||
struct dimm_info *dimm = edac_get_dimm(mci, pos[0], pos[1], pos[2]);
|
||||
|
||||
mci->ue_mc += count;
|
||||
mci->ue_mc += e->error_count;
|
||||
|
||||
if (!enable_per_layer_report) {
|
||||
mci->ue_noinfo_count += count;
|
||||
return;
|
||||
}
|
||||
|
||||
for (i = 0; i < mci->n_layers; i++) {
|
||||
if (pos[i] < 0)
|
||||
break;
|
||||
index += pos[i];
|
||||
mci->ue_per_layer[i][index] += count;
|
||||
|
||||
if (i < mci->n_layers - 1)
|
||||
index *= mci->layers[i + 1].size;
|
||||
}
|
||||
if (dimm)
|
||||
dimm->ue_count += e->error_count;
|
||||
else
|
||||
mci->ue_noinfo_count += e->error_count;
|
||||
}
|
||||
|
||||
static void edac_ce_error(struct mem_ctl_info *mci,
|
||||
const u16 error_count,
|
||||
const int pos[EDAC_MAX_LAYERS],
|
||||
const char *msg,
|
||||
const char *location,
|
||||
const char *label,
|
||||
const char *detail,
|
||||
const char *other_detail,
|
||||
const bool enable_per_layer_report,
|
||||
const unsigned long page_frame_number,
|
||||
const unsigned long offset_in_page,
|
||||
long grain)
|
||||
static void edac_ce_error(struct edac_raw_error_desc *e)
|
||||
{
|
||||
struct mem_ctl_info *mci = error_desc_to_mci(e);
|
||||
unsigned long remapped_page;
|
||||
char *msg_aux = "";
|
||||
|
||||
if (*msg)
|
||||
msg_aux = " ";
|
||||
|
||||
if (edac_mc_get_log_ce()) {
|
||||
if (other_detail && *other_detail)
|
||||
edac_mc_printk(mci, KERN_WARNING,
|
||||
"%d CE %s%son %s (%s %s - %s)\n",
|
||||
error_count, msg, msg_aux, label,
|
||||
location, detail, other_detail);
|
||||
else
|
||||
edac_mc_printk(mci, KERN_WARNING,
|
||||
"%d CE %s%son %s (%s %s)\n",
|
||||
error_count, msg, msg_aux, label,
|
||||
location, detail);
|
||||
edac_mc_printk(mci, KERN_WARNING,
|
||||
"%d CE %s%son %s (%s page:0x%lx offset:0x%lx grain:%ld syndrome:0x%lx%s%s)\n",
|
||||
e->error_count, e->msg,
|
||||
*e->msg ? " " : "",
|
||||
e->label, e->location, e->page_frame_number, e->offset_in_page,
|
||||
e->grain, e->syndrome,
|
||||
*e->other_detail ? " - " : "",
|
||||
e->other_detail);
|
||||
}
|
||||
edac_inc_ce_error(mci, enable_per_layer_report, pos, error_count);
|
||||
|
||||
edac_inc_ce_error(e);
|
||||
|
||||
if (mci->scrub_mode == SCRUB_SW_SRC) {
|
||||
/*
|
||||
@ -998,60 +989,64 @@ static void edac_ce_error(struct mem_ctl_info *mci,
|
||||
* be scrubbed.
|
||||
*/
|
||||
remapped_page = mci->ctl_page_to_phys ?
|
||||
mci->ctl_page_to_phys(mci, page_frame_number) :
|
||||
page_frame_number;
|
||||
mci->ctl_page_to_phys(mci, e->page_frame_number) :
|
||||
e->page_frame_number;
|
||||
|
||||
edac_mc_scrub_block(remapped_page,
|
||||
offset_in_page, grain);
|
||||
edac_mc_scrub_block(remapped_page, e->offset_in_page, e->grain);
|
||||
}
|
||||
}
|
||||
|
||||
static void edac_ue_error(struct mem_ctl_info *mci,
|
||||
const u16 error_count,
|
||||
const int pos[EDAC_MAX_LAYERS],
|
||||
const char *msg,
|
||||
const char *location,
|
||||
const char *label,
|
||||
const char *detail,
|
||||
const char *other_detail,
|
||||
const bool enable_per_layer_report)
|
||||
static void edac_ue_error(struct edac_raw_error_desc *e)
|
||||
{
|
||||
char *msg_aux = "";
|
||||
|
||||
if (*msg)
|
||||
msg_aux = " ";
|
||||
struct mem_ctl_info *mci = error_desc_to_mci(e);
|
||||
|
||||
if (edac_mc_get_log_ue()) {
|
||||
if (other_detail && *other_detail)
|
||||
edac_mc_printk(mci, KERN_WARNING,
|
||||
"%d UE %s%son %s (%s %s - %s)\n",
|
||||
error_count, msg, msg_aux, label,
|
||||
location, detail, other_detail);
|
||||
else
|
||||
edac_mc_printk(mci, KERN_WARNING,
|
||||
"%d UE %s%son %s (%s %s)\n",
|
||||
error_count, msg, msg_aux, label,
|
||||
location, detail);
|
||||
edac_mc_printk(mci, KERN_WARNING,
|
||||
"%d UE %s%son %s (%s page:0x%lx offset:0x%lx grain:%ld%s%s)\n",
|
||||
e->error_count, e->msg,
|
||||
*e->msg ? " " : "",
|
||||
e->label, e->location, e->page_frame_number, e->offset_in_page,
|
||||
e->grain,
|
||||
*e->other_detail ? " - " : "",
|
||||
e->other_detail);
|
||||
}
|
||||
|
||||
if (edac_mc_get_panic_on_ue()) {
|
||||
if (other_detail && *other_detail)
|
||||
panic("UE %s%son %s (%s%s - %s)\n",
|
||||
msg, msg_aux, label, location, detail, other_detail);
|
||||
else
|
||||
panic("UE %s%son %s (%s%s)\n",
|
||||
msg, msg_aux, label, location, detail);
|
||||
panic("UE %s%son %s (%s page:0x%lx offset:0x%lx grain:%ld%s%s)\n",
|
||||
e->msg,
|
||||
*e->msg ? " " : "",
|
||||
e->label, e->location, e->page_frame_number, e->offset_in_page,
|
||||
e->grain,
|
||||
*e->other_detail ? " - " : "",
|
||||
e->other_detail);
|
||||
}
|
||||
|
||||
edac_inc_ue_error(mci, enable_per_layer_report, pos, error_count);
|
||||
edac_inc_ue_error(e);
|
||||
}
|
||||
|
||||
void edac_raw_mc_handle_error(const enum hw_event_mc_err_type type,
|
||||
struct mem_ctl_info *mci,
|
||||
struct edac_raw_error_desc *e)
|
||||
static void edac_inc_csrow(struct edac_raw_error_desc *e, int row, int chan)
|
||||
{
|
||||
char detail[80];
|
||||
int pos[EDAC_MAX_LAYERS] = { e->top_layer, e->mid_layer, e->low_layer };
|
||||
struct mem_ctl_info *mci = error_desc_to_mci(e);
|
||||
enum hw_event_mc_err_type type = e->type;
|
||||
u16 count = e->error_count;
|
||||
|
||||
if (row < 0)
|
||||
return;
|
||||
|
||||
edac_dbg(4, "csrow/channel to increment: (%d,%d)\n", row, chan);
|
||||
|
||||
if (type == HW_EVENT_ERR_CORRECTED) {
|
||||
mci->csrows[row]->ce_count += count;
|
||||
if (chan >= 0)
|
||||
mci->csrows[row]->channels[chan]->ce_count += count;
|
||||
} else {
|
||||
mci->csrows[row]->ue_count += count;
|
||||
}
|
||||
}
|
||||
|
||||
void edac_raw_mc_handle_error(struct edac_raw_error_desc *e)
|
||||
{
|
||||
struct mem_ctl_info *mci = error_desc_to_mci(e);
|
||||
u8 grain_bits;
|
||||
|
||||
/* Sanity-check driver-supplied grain value. */
|
||||
@ -1062,31 +1057,16 @@ void edac_raw_mc_handle_error(const enum hw_event_mc_err_type type,
|
||||
|
||||
/* Report the error via the trace interface */
|
||||
if (IS_ENABLED(CONFIG_RAS))
|
||||
trace_mc_event(type, e->msg, e->label, e->error_count,
|
||||
trace_mc_event(e->type, e->msg, e->label, e->error_count,
|
||||
mci->mc_idx, e->top_layer, e->mid_layer,
|
||||
e->low_layer,
|
||||
(e->page_frame_number << PAGE_SHIFT) | e->offset_in_page,
|
||||
grain_bits, e->syndrome, e->other_detail);
|
||||
|
||||
/* Memory type dependent details about the error */
|
||||
if (type == HW_EVENT_ERR_CORRECTED) {
|
||||
snprintf(detail, sizeof(detail),
|
||||
"page:0x%lx offset:0x%lx grain:%ld syndrome:0x%lx",
|
||||
e->page_frame_number, e->offset_in_page,
|
||||
e->grain, e->syndrome);
|
||||
edac_ce_error(mci, e->error_count, pos, e->msg, e->location, e->label,
|
||||
detail, e->other_detail, e->enable_per_layer_report,
|
||||
e->page_frame_number, e->offset_in_page, e->grain);
|
||||
} else {
|
||||
snprintf(detail, sizeof(detail),
|
||||
"page:0x%lx offset:0x%lx grain:%ld",
|
||||
e->page_frame_number, e->offset_in_page, e->grain);
|
||||
|
||||
edac_ue_error(mci, e->error_count, pos, e->msg, e->location, e->label,
|
||||
detail, e->other_detail, e->enable_per_layer_report);
|
||||
}
|
||||
|
||||
|
||||
if (e->type == HW_EVENT_ERR_CORRECTED)
|
||||
edac_ce_error(e);
|
||||
else
|
||||
edac_ue_error(e);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(edac_raw_mc_handle_error);
|
||||
|
||||
@ -1108,25 +1088,27 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type,
|
||||
int pos[EDAC_MAX_LAYERS] = { top_layer, mid_layer, low_layer };
|
||||
int i, n_labels = 0;
|
||||
struct edac_raw_error_desc *e = &mci->error_desc;
|
||||
bool any_memory = true;
|
||||
|
||||
edac_dbg(3, "MC%d\n", mci->mc_idx);
|
||||
|
||||
/* Fills the error report buffer */
|
||||
memset(e, 0, sizeof (*e));
|
||||
e->error_count = error_count;
|
||||
e->type = type;
|
||||
e->top_layer = top_layer;
|
||||
e->mid_layer = mid_layer;
|
||||
e->low_layer = low_layer;
|
||||
e->page_frame_number = page_frame_number;
|
||||
e->offset_in_page = offset_in_page;
|
||||
e->syndrome = syndrome;
|
||||
e->msg = msg;
|
||||
e->other_detail = other_detail;
|
||||
/* need valid strings here for both: */
|
||||
e->msg = msg ?: "";
|
||||
e->other_detail = other_detail ?: "";
|
||||
|
||||
/*
|
||||
* Check if the event report is consistent and if the memory
|
||||
* location is known. If it is known, enable_per_layer_report will be
|
||||
* true, the DIMM(s) label info will be filled and the per-layer
|
||||
* Check if the event report is consistent and if the memory location is
|
||||
* known. If it is, the DIMM(s) label info will be filled and the DIMM's
|
||||
* error counters will be incremented.
|
||||
*/
|
||||
for (i = 0; i < mci->n_layers; i++) {
|
||||
@ -1145,7 +1127,7 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type,
|
||||
pos[i] = -1;
|
||||
}
|
||||
if (pos[i] >= 0)
|
||||
e->enable_per_layer_report = true;
|
||||
any_memory = false;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -1176,24 +1158,25 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type,
|
||||
|
||||
/*
|
||||
* If the error is memory-controller wide, there's no need to
|
||||
* seek for the affected DIMMs because the whole
|
||||
* channel/memory controller/... may be affected.
|
||||
* Also, don't show errors for empty DIMM slots.
|
||||
* seek for the affected DIMMs because the whole channel/memory
|
||||
* controller/... may be affected. Also, don't show errors for
|
||||
* empty DIMM slots.
|
||||
*/
|
||||
if (!e->enable_per_layer_report || !dimm->nr_pages)
|
||||
if (!dimm->nr_pages)
|
||||
continue;
|
||||
|
||||
if (n_labels >= EDAC_MAX_LABELS) {
|
||||
e->enable_per_layer_report = false;
|
||||
break;
|
||||
}
|
||||
n_labels++;
|
||||
if (p != e->label) {
|
||||
strcpy(p, OTHER_LABEL);
|
||||
p += strlen(OTHER_LABEL);
|
||||
if (n_labels > EDAC_MAX_LABELS) {
|
||||
p = e->label;
|
||||
*p = '\0';
|
||||
} else {
|
||||
if (p != e->label) {
|
||||
strcpy(p, OTHER_LABEL);
|
||||
p += strlen(OTHER_LABEL);
|
||||
}
|
||||
strcpy(p, dimm->label);
|
||||
p += strlen(p);
|
||||
}
|
||||
strcpy(p, dimm->label);
|
||||
p += strlen(p);
|
||||
|
||||
/*
|
||||
* get csrow/channel of the DIMM, in order to allow
|
||||
@ -1213,22 +1196,12 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type,
|
||||
chan = -2;
|
||||
}
|
||||
|
||||
if (!e->enable_per_layer_report) {
|
||||
if (any_memory)
|
||||
strcpy(e->label, "any memory");
|
||||
} else {
|
||||
edac_dbg(4, "csrow/channel to increment: (%d,%d)\n", row, chan);
|
||||
if (p == e->label)
|
||||
strcpy(e->label, "unknown memory");
|
||||
if (type == HW_EVENT_ERR_CORRECTED) {
|
||||
if (row >= 0) {
|
||||
mci->csrows[row]->ce_count += error_count;
|
||||
if (chan >= 0)
|
||||
mci->csrows[row]->channels[chan]->ce_count += error_count;
|
||||
}
|
||||
} else
|
||||
if (row >= 0)
|
||||
mci->csrows[row]->ue_count += error_count;
|
||||
}
|
||||
else if (!*e->label)
|
||||
strcpy(e->label, "unknown memory");
|
||||
|
||||
edac_inc_csrow(e, row, chan);
|
||||
|
||||
/* Fill the RAM location data */
|
||||
p = e->location;
|
||||
@ -1244,6 +1217,6 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type,
|
||||
if (p > e->location)
|
||||
*(p - 1) = '\0';
|
||||
|
||||
edac_raw_mc_handle_error(type, mci, e);
|
||||
edac_raw_mc_handle_error(e);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(edac_mc_handle_error);
|
||||
|
@ -212,17 +212,13 @@ extern int edac_mc_find_csrow_by_page(struct mem_ctl_info *mci,
|
||||
* edac_raw_mc_handle_error() - Reports a memory event to userspace without
|
||||
* doing anything to discover the error location.
|
||||
*
|
||||
* @type: severity of the error (CE/UE/Fatal)
|
||||
* @mci: a struct mem_ctl_info pointer
|
||||
* @e: error description
|
||||
*
|
||||
* This raw function is used internally by edac_mc_handle_error(). It should
|
||||
* only be called directly when the hardware error come directly from BIOS,
|
||||
* like in the case of APEI GHES driver.
|
||||
*/
|
||||
void edac_raw_mc_handle_error(const enum hw_event_mc_err_type type,
|
||||
struct mem_ctl_info *mci,
|
||||
struct edac_raw_error_desc *e);
|
||||
void edac_raw_mc_handle_error(struct edac_raw_error_desc *e);
|
||||
|
||||
/**
|
||||
* edac_mc_handle_error() - Reports a memory event to userspace.
|
||||
|
@ -274,14 +274,8 @@ static const struct attribute_group *csrow_attr_groups[] = {
|
||||
NULL
|
||||
};
|
||||
|
||||
static void csrow_attr_release(struct device *dev)
|
||||
{
|
||||
/* release device with _edac_mc_free() */
|
||||
}
|
||||
|
||||
static const struct device_type csrow_attr_type = {
|
||||
.groups = csrow_attr_groups,
|
||||
.release = csrow_attr_release,
|
||||
};
|
||||
|
||||
/*
|
||||
@ -387,6 +381,14 @@ static const struct attribute_group *csrow_dev_groups[] = {
|
||||
NULL
|
||||
};
|
||||
|
||||
static void csrow_release(struct device *dev)
|
||||
{
|
||||
/*
|
||||
* Nothing to do, just unregister sysfs here. The mci
|
||||
* device owns the data and will also release it.
|
||||
*/
|
||||
}
|
||||
|
||||
static inline int nr_pages_per_csrow(struct csrow_info *csrow)
|
||||
{
|
||||
int chan, nr_pages = 0;
|
||||
@ -405,6 +407,7 @@ static int edac_create_csrow_object(struct mem_ctl_info *mci,
|
||||
|
||||
csrow->dev.type = &csrow_attr_type;
|
||||
csrow->dev.groups = csrow_dev_groups;
|
||||
csrow->dev.release = csrow_release;
|
||||
device_initialize(&csrow->dev);
|
||||
csrow->dev.parent = &mci->dev;
|
||||
csrow->mci = mci;
|
||||
@ -441,10 +444,8 @@ static int edac_create_csrow_objects(struct mem_ctl_info *mci)
|
||||
|
||||
error:
|
||||
for (--i; i >= 0; i--) {
|
||||
csrow = mci->csrows[i];
|
||||
if (!nr_pages_per_csrow(csrow))
|
||||
continue;
|
||||
device_unregister(&mci->csrows[i]->dev);
|
||||
if (device_is_registered(&mci->csrows[i]->dev))
|
||||
device_unregister(&mci->csrows[i]->dev);
|
||||
}
|
||||
|
||||
return err;
|
||||
@ -453,15 +454,13 @@ static int edac_create_csrow_objects(struct mem_ctl_info *mci)
|
||||
static void edac_delete_csrow_objects(struct mem_ctl_info *mci)
|
||||
{
|
||||
int i;
|
||||
struct csrow_info *csrow;
|
||||
|
||||
for (i = mci->nr_csrows - 1; i >= 0; i--) {
|
||||
csrow = mci->csrows[i];
|
||||
if (!nr_pages_per_csrow(csrow))
|
||||
continue;
|
||||
device_unregister(&mci->csrows[i]->dev);
|
||||
for (i = 0; i < mci->nr_csrows; i++) {
|
||||
if (device_is_registered(&mci->csrows[i]->dev))
|
||||
device_unregister(&mci->csrows[i]->dev);
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
/*
|
||||
@ -552,10 +551,8 @@ static ssize_t dimmdev_ce_count_show(struct device *dev,
|
||||
char *data)
|
||||
{
|
||||
struct dimm_info *dimm = to_dimm(dev);
|
||||
u32 count;
|
||||
|
||||
count = dimm->mci->ce_per_layer[dimm->mci->n_layers-1][dimm->idx];
|
||||
return sprintf(data, "%u\n", count);
|
||||
return sprintf(data, "%u\n", dimm->ce_count);
|
||||
}
|
||||
|
||||
static ssize_t dimmdev_ue_count_show(struct device *dev,
|
||||
@ -563,10 +560,8 @@ static ssize_t dimmdev_ue_count_show(struct device *dev,
|
||||
char *data)
|
||||
{
|
||||
struct dimm_info *dimm = to_dimm(dev);
|
||||
u32 count;
|
||||
|
||||
count = dimm->mci->ue_per_layer[dimm->mci->n_layers-1][dimm->idx];
|
||||
return sprintf(data, "%u\n", count);
|
||||
return sprintf(data, "%u\n", dimm->ue_count);
|
||||
}
|
||||
|
||||
/* dimm/rank attribute files */
|
||||
@ -602,16 +597,18 @@ static const struct attribute_group *dimm_attr_groups[] = {
|
||||
NULL
|
||||
};
|
||||
|
||||
static void dimm_attr_release(struct device *dev)
|
||||
{
|
||||
/* release device with _edac_mc_free() */
|
||||
}
|
||||
|
||||
static const struct device_type dimm_attr_type = {
|
||||
.groups = dimm_attr_groups,
|
||||
.release = dimm_attr_release,
|
||||
};
|
||||
|
||||
static void dimm_release(struct device *dev)
|
||||
{
|
||||
/*
|
||||
* Nothing to do, just unregister sysfs here. The mci
|
||||
* device owns the data and will also release it.
|
||||
*/
|
||||
}
|
||||
|
||||
/* Create a DIMM object under specifed memory controller device */
|
||||
static int edac_create_dimm_object(struct mem_ctl_info *mci,
|
||||
struct dimm_info *dimm)
|
||||
@ -620,6 +617,7 @@ static int edac_create_dimm_object(struct mem_ctl_info *mci,
|
||||
dimm->mci = mci;
|
||||
|
||||
dimm->dev.type = &dimm_attr_type;
|
||||
dimm->dev.release = dimm_release;
|
||||
device_initialize(&dimm->dev);
|
||||
|
||||
dimm->dev.parent = &mci->dev;
|
||||
@ -659,7 +657,9 @@ static ssize_t mci_reset_counters_store(struct device *dev,
|
||||
const char *data, size_t count)
|
||||
{
|
||||
struct mem_ctl_info *mci = to_mci(dev);
|
||||
int cnt, row, chan, i;
|
||||
struct dimm_info *dimm;
|
||||
int row, chan;
|
||||
|
||||
mci->ue_mc = 0;
|
||||
mci->ce_mc = 0;
|
||||
mci->ue_noinfo_count = 0;
|
||||
@ -675,11 +675,9 @@ static ssize_t mci_reset_counters_store(struct device *dev,
|
||||
ri->channels[chan]->ce_count = 0;
|
||||
}
|
||||
|
||||
cnt = 1;
|
||||
for (i = 0; i < mci->n_layers; i++) {
|
||||
cnt *= mci->layers[i].size;
|
||||
memset(mci->ce_per_layer[i], 0, cnt * sizeof(u32));
|
||||
memset(mci->ue_per_layer[i], 0, cnt * sizeof(u32));
|
||||
mci_for_each_dimm(mci, dimm) {
|
||||
dimm->ue_count = 0;
|
||||
dimm->ce_count = 0;
|
||||
}
|
||||
|
||||
mci->start_time = jiffies;
|
||||
@ -884,14 +882,8 @@ static const struct attribute_group *mci_attr_groups[] = {
|
||||
NULL
|
||||
};
|
||||
|
||||
static void mci_attr_release(struct device *dev)
|
||||
{
|
||||
/* release device with _edac_mc_free() */
|
||||
}
|
||||
|
||||
static const struct device_type mci_attr_type = {
|
||||
.groups = mci_attr_groups,
|
||||
.release = mci_attr_release,
|
||||
};
|
||||
|
||||
/*
|
||||
@ -910,8 +902,6 @@ int edac_create_sysfs_mci_device(struct mem_ctl_info *mci,
|
||||
|
||||
/* get the /sys/devices/system/edac subsys reference */
|
||||
mci->dev.type = &mci_attr_type;
|
||||
device_initialize(&mci->dev);
|
||||
|
||||
mci->dev.parent = mci_pdev;
|
||||
mci->dev.groups = groups;
|
||||
dev_set_name(&mci->dev, "mc%d", mci->mc_idx);
|
||||
@ -921,7 +911,7 @@ int edac_create_sysfs_mci_device(struct mem_ctl_info *mci,
|
||||
err = device_add(&mci->dev);
|
||||
if (err < 0) {
|
||||
edac_dbg(1, "failure: create device %s\n", dev_name(&mci->dev));
|
||||
put_device(&mci->dev);
|
||||
/* no put_device() here, free mci with _edac_mc_free() */
|
||||
return err;
|
||||
}
|
||||
|
||||
@ -937,24 +927,20 @@ int edac_create_sysfs_mci_device(struct mem_ctl_info *mci,
|
||||
|
||||
err = edac_create_dimm_object(mci, dimm);
|
||||
if (err)
|
||||
goto fail_unregister_dimm;
|
||||
goto fail;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_EDAC_LEGACY_SYSFS
|
||||
err = edac_create_csrow_objects(mci);
|
||||
if (err < 0)
|
||||
goto fail_unregister_dimm;
|
||||
goto fail;
|
||||
#endif
|
||||
|
||||
edac_create_debugfs_nodes(mci);
|
||||
return 0;
|
||||
|
||||
fail_unregister_dimm:
|
||||
mci_for_each_dimm(mci, dimm) {
|
||||
if (device_is_registered(&dimm->dev))
|
||||
device_unregister(&dimm->dev);
|
||||
}
|
||||
device_unregister(&mci->dev);
|
||||
fail:
|
||||
edac_remove_sysfs_mci_device(mci);
|
||||
|
||||
return err;
|
||||
}
|
||||
@ -966,6 +952,9 @@ void edac_remove_sysfs_mci_device(struct mem_ctl_info *mci)
|
||||
{
|
||||
struct dimm_info *dimm;
|
||||
|
||||
if (!device_is_registered(&mci->dev))
|
||||
return;
|
||||
|
||||
edac_dbg(0, "\n");
|
||||
|
||||
#ifdef CONFIG_EDAC_DEBUG
|
||||
@ -976,17 +965,14 @@ void edac_remove_sysfs_mci_device(struct mem_ctl_info *mci)
|
||||
#endif
|
||||
|
||||
mci_for_each_dimm(mci, dimm) {
|
||||
if (dimm->nr_pages == 0)
|
||||
if (!device_is_registered(&dimm->dev))
|
||||
continue;
|
||||
edac_dbg(1, "unregistering device %s\n", dev_name(&dimm->dev));
|
||||
device_unregister(&dimm->dev);
|
||||
}
|
||||
}
|
||||
|
||||
void edac_unregister_sysfs(struct mem_ctl_info *mci)
|
||||
{
|
||||
edac_dbg(1, "unregistering device %s\n", dev_name(&mci->dev));
|
||||
device_unregister(&mci->dev);
|
||||
/* only remove the device, but keep mci */
|
||||
device_del(&mci->dev);
|
||||
}
|
||||
|
||||
static void mc_attr_release(struct device *dev)
|
||||
@ -1000,9 +986,6 @@ static void mc_attr_release(struct device *dev)
|
||||
kfree(dev);
|
||||
}
|
||||
|
||||
static const struct device_type mc_attr_type = {
|
||||
.release = mc_attr_release,
|
||||
};
|
||||
/*
|
||||
* Init/exit code for the module. Basically, creates/removes /sys/class/rc
|
||||
*/
|
||||
@ -1015,11 +998,10 @@ int __init edac_mc_sysfs_init(void)
|
||||
return -ENOMEM;
|
||||
|
||||
mci_pdev->bus = edac_get_sysfs_subsys();
|
||||
mci_pdev->type = &mc_attr_type;
|
||||
device_initialize(mci_pdev);
|
||||
dev_set_name(mci_pdev, "mc");
|
||||
mci_pdev->release = mc_attr_release;
|
||||
mci_pdev->init_name = "mc";
|
||||
|
||||
err = device_add(mci_pdev);
|
||||
err = device_register(mci_pdev);
|
||||
if (err < 0) {
|
||||
edac_dbg(1, "failure: create device %s\n", dev_name(mci_pdev));
|
||||
put_device(mci_pdev);
|
||||
|
@ -28,7 +28,6 @@ void edac_mc_sysfs_exit(void);
|
||||
extern int edac_create_sysfs_mci_device(struct mem_ctl_info *mci,
|
||||
const struct attribute_group **groups);
|
||||
extern void edac_remove_sysfs_mci_device(struct mem_ctl_info *mci);
|
||||
void edac_unregister_sysfs(struct mem_ctl_info *mci);
|
||||
extern int edac_get_log_ue(void);
|
||||
extern int edac_get_log_ce(void);
|
||||
extern int edac_get_panic_on_ue(void);
|
||||
|
@ -201,7 +201,6 @@ static void ghes_edac_dmidecode(const struct dmi_header *dh, void *arg)
|
||||
|
||||
void ghes_edac_report_mem_error(int sev, struct cper_sec_mem_err *mem_err)
|
||||
{
|
||||
enum hw_event_mc_err_type type;
|
||||
struct edac_raw_error_desc *e;
|
||||
struct mem_ctl_info *mci;
|
||||
struct ghes_edac_pvt *pvt;
|
||||
@ -240,17 +239,17 @@ void ghes_edac_report_mem_error(int sev, struct cper_sec_mem_err *mem_err)
|
||||
|
||||
switch (sev) {
|
||||
case GHES_SEV_CORRECTED:
|
||||
type = HW_EVENT_ERR_CORRECTED;
|
||||
e->type = HW_EVENT_ERR_CORRECTED;
|
||||
break;
|
||||
case GHES_SEV_RECOVERABLE:
|
||||
type = HW_EVENT_ERR_UNCORRECTED;
|
||||
e->type = HW_EVENT_ERR_UNCORRECTED;
|
||||
break;
|
||||
case GHES_SEV_PANIC:
|
||||
type = HW_EVENT_ERR_FATAL;
|
||||
e->type = HW_EVENT_ERR_FATAL;
|
||||
break;
|
||||
default:
|
||||
case GHES_SEV_NO:
|
||||
type = HW_EVENT_ERR_INFO;
|
||||
e->type = HW_EVENT_ERR_INFO;
|
||||
}
|
||||
|
||||
edac_dbg(1, "error validation_bits: 0x%08llx\n",
|
||||
@ -356,11 +355,8 @@ void ghes_edac_report_mem_error(int sev, struct cper_sec_mem_err *mem_err)
|
||||
mem_err->mem_dev_handle);
|
||||
|
||||
index = get_dimm_smbios_index(mci, mem_err->mem_dev_handle);
|
||||
if (index >= 0) {
|
||||
if (index >= 0)
|
||||
e->top_layer = index;
|
||||
e->enable_per_layer_report = true;
|
||||
}
|
||||
|
||||
}
|
||||
if (p > e->location)
|
||||
*(p - 1) = '\0';
|
||||
@ -442,7 +438,7 @@ void ghes_edac_report_mem_error(int sev, struct cper_sec_mem_err *mem_err)
|
||||
if (p > pvt->other_detail)
|
||||
*(p - 1) = '\0';
|
||||
|
||||
edac_raw_mc_handle_error(type, mci, e);
|
||||
edac_raw_mc_handle_error(e);
|
||||
|
||||
unlock:
|
||||
spin_unlock_irqrestore(&ghes_lock, flags);
|
||||
|
@ -1239,7 +1239,7 @@ static int __init mce_amd_init(void)
|
||||
|
||||
case 0x17:
|
||||
case 0x18:
|
||||
pr_warn("Decoding supported only on Scalable MCA processors.\n");
|
||||
pr_warn_once("Decoding supported only on Scalable MCA processors.\n");
|
||||
return -EINVAL;
|
||||
|
||||
default:
|
||||
|
@ -477,16 +477,16 @@ static void handle_error(struct mem_ctl_info *mci, struct synps_ecc_status *p)
|
||||
|
||||
if (p->ce_cnt) {
|
||||
pinf = &p->ceinfo;
|
||||
if (!priv->p_data->quirks) {
|
||||
if (priv->p_data->quirks & DDR_ECC_INTR_SUPPORT) {
|
||||
snprintf(priv->message, SYNPS_EDAC_MSG_SIZE,
|
||||
"DDR ECC error type:%s Row %d Bank %d Col %d Bit Position: %d Data: 0x%08x",
|
||||
"CE", pinf->row, pinf->bank, pinf->col,
|
||||
"DDR ECC error type:%s Row %d Bank %d BankGroup Number %d Block Number %d Bit Position: %d Data: 0x%08x",
|
||||
"CE", pinf->row, pinf->bank,
|
||||
pinf->bankgrpnr, pinf->blknr,
|
||||
pinf->bitpos, pinf->data);
|
||||
} else {
|
||||
snprintf(priv->message, SYNPS_EDAC_MSG_SIZE,
|
||||
"DDR ECC error type:%s Row %d Bank %d Col %d BankGroup Number %d Block Number %d Bit Position: %d Data: 0x%08x",
|
||||
"DDR ECC error type:%s Row %d Bank %d Col %d Bit Position: %d Data: 0x%08x",
|
||||
"CE", pinf->row, pinf->bank, pinf->col,
|
||||
pinf->bankgrpnr, pinf->blknr,
|
||||
pinf->bitpos, pinf->data);
|
||||
}
|
||||
|
||||
@ -497,15 +497,15 @@ static void handle_error(struct mem_ctl_info *mci, struct synps_ecc_status *p)
|
||||
|
||||
if (p->ue_cnt) {
|
||||
pinf = &p->ueinfo;
|
||||
if (!priv->p_data->quirks) {
|
||||
if (priv->p_data->quirks & DDR_ECC_INTR_SUPPORT) {
|
||||
snprintf(priv->message, SYNPS_EDAC_MSG_SIZE,
|
||||
"DDR ECC error type :%s Row %d Bank %d Col %d ",
|
||||
"UE", pinf->row, pinf->bank, pinf->col);
|
||||
"DDR ECC error type :%s Row %d Bank %d BankGroup Number %d Block Number %d",
|
||||
"UE", pinf->row, pinf->bank,
|
||||
pinf->bankgrpnr, pinf->blknr);
|
||||
} else {
|
||||
snprintf(priv->message, SYNPS_EDAC_MSG_SIZE,
|
||||
"DDR ECC error type :%s Row %d Bank %d Col %d BankGroup Number %d Block Number %d",
|
||||
"UE", pinf->row, pinf->bank, pinf->col,
|
||||
pinf->bankgrpnr, pinf->blknr);
|
||||
"DDR ECC error type :%s Row %d Bank %d Col %d ",
|
||||
"UE", pinf->row, pinf->bank, pinf->col);
|
||||
}
|
||||
|
||||
edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci,
|
||||
|
@ -383,6 +383,9 @@ struct dimm_info {
|
||||
unsigned int csrow, cschannel; /* Points to the old API data */
|
||||
|
||||
u16 smbios_handle; /* Handle for SMBIOS type 17 */
|
||||
|
||||
u32 ce_count;
|
||||
u32 ue_count;
|
||||
};
|
||||
|
||||
/**
|
||||
@ -442,6 +445,7 @@ struct errcount_attribute_data {
|
||||
* struct edac_raw_error_desc - Raw error report structure
|
||||
* @grain: minimum granularity for an error report, in bytes
|
||||
* @error_count: number of errors of the same type
|
||||
* @type: severity of the error (CE/UE/Fatal)
|
||||
* @top_layer: top layer of the error (layer[0])
|
||||
* @mid_layer: middle layer of the error (layer[1])
|
||||
* @low_layer: low layer of the error (layer[2])
|
||||
@ -453,8 +457,6 @@ struct errcount_attribute_data {
|
||||
* @location: location of the error
|
||||
* @label: label of the affected DIMM(s)
|
||||
* @other_detail: other driver-specific detail about the error
|
||||
* @enable_per_layer_report: if false, the error affects all layers
|
||||
* (typically, a memory controller error)
|
||||
*/
|
||||
struct edac_raw_error_desc {
|
||||
char location[LOCATION_SIZE];
|
||||
@ -462,6 +464,7 @@ struct edac_raw_error_desc {
|
||||
long grain;
|
||||
|
||||
u16 error_count;
|
||||
enum hw_event_mc_err_type type;
|
||||
int top_layer;
|
||||
int mid_layer;
|
||||
int low_layer;
|
||||
@ -470,7 +473,6 @@ struct edac_raw_error_desc {
|
||||
unsigned long syndrome;
|
||||
const char *msg;
|
||||
const char *other_detail;
|
||||
bool enable_per_layer_report;
|
||||
};
|
||||
|
||||
/* MEMORY controller information structure
|
||||
@ -560,7 +562,6 @@ struct mem_ctl_info {
|
||||
*/
|
||||
u32 ce_noinfo_count, ue_noinfo_count;
|
||||
u32 ue_mc, ce_mc;
|
||||
u32 *ce_per_layer[EDAC_MAX_LAYERS], *ue_per_layer[EDAC_MAX_LAYERS];
|
||||
|
||||
struct completion complete;
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user