mirror of
https://github.com/AuxXxilium/linux_dsm_epyc7002.git
synced 2025-01-23 09:39:50 +07:00
x86/MCE: Extend table to report action optional errors through CMCI too
According to the Intel SDM Volume 3B (253669-063US, July 2017), action optional (SRAO) errors can be reported either via MCE or CMC: In cases when SRAO is signaled via CMCI the error signature is indicated via UC=1, PCC=0, S=0. Type(*1) UC EN PCC S AR Signaling --------------------------------------------------------------- UC 1 1 1 x x MCE SRAR 1 1 0 1 1 MCE SRAO 1 x(*2) 0 x(*2) 0 MCE/CMC UCNA 1 x 0 0 0 CMC CE 0 x x x x CMC NOTES: 1. SRAR, SRAO and UCNA errors are supported by the processor only when IA32_MCG_CAP[24] (MCG_SER_P) is set. 2. EN=1, S=1 when signaled via MCE. EN=x, S=0 when signaled via CMC. And there is a description in 15.6.2 UCR Error Reporting and Logging, for bit S: S (Signaling) flag, bit 56 - Indicates (when set) that a machine check exception was generated for the UCR error reported in this MC bank... When the S flag in the IA32_MCi_STATUS register is clear, this UCR error was not signaled via a machine check exception and instead was reported as a corrected machine check (CMC). So merge the two cases and just remove the S=0 check for SRAO in mce_severity(). [ Borislav: Massage commit message.] Signed-off-by: Xie XiuQi <xiexiuqi@huawei.com> Signed-off-by: Borislav Petkov <bp@suse.de> Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Reviewed-by: Tony Luck <tony.luck@intel.com> Tested-by: Chen Wei <chenwei68@huawei.com> Cc: linux-edac <linux-edac@vger.kernel.org> Link: http://lkml.kernel.org/r/1511575548-41992-1-git-send-email-xiexiuqi@huawei.com
This commit is contained in:
parent
b29c6ef7bb
commit
e085ac7a6d
@ -59,6 +59,7 @@ static struct severity {
|
||||
#define MCGMASK(x, y) .mcgmask = x, .mcgres = y
|
||||
#define MASK(x, y) .mask = x, .result = y
|
||||
#define MCI_UC_S (MCI_STATUS_UC|MCI_STATUS_S)
|
||||
#define MCI_UC_AR (MCI_STATUS_UC|MCI_STATUS_AR)
|
||||
#define MCI_UC_SAR (MCI_STATUS_UC|MCI_STATUS_S|MCI_STATUS_AR)
|
||||
#define MCI_ADDR (MCI_STATUS_ADDRV|MCI_STATUS_MISCV)
|
||||
|
||||
@ -101,6 +102,22 @@ static struct severity {
|
||||
NOSER, BITCLR(MCI_STATUS_UC)
|
||||
),
|
||||
|
||||
/*
|
||||
* known AO MCACODs reported via MCE or CMC:
|
||||
*
|
||||
* SRAO could be signaled either via a machine check exception or
|
||||
* CMCI with the corresponding bit S 1 or 0. So we don't need to
|
||||
* check bit S for SRAO.
|
||||
*/
|
||||
MCESEV(
|
||||
AO, "Action optional: memory scrubbing error",
|
||||
SER, MASK(MCI_STATUS_OVER|MCI_UC_AR|MCACOD_SCRUBMSK, MCI_STATUS_UC|MCACOD_SCRUB)
|
||||
),
|
||||
MCESEV(
|
||||
AO, "Action optional: last level cache writeback error",
|
||||
SER, MASK(MCI_STATUS_OVER|MCI_UC_AR|MCACOD, MCI_STATUS_UC|MCACOD_L3WB)
|
||||
),
|
||||
|
||||
/* ignore OVER for UCNA */
|
||||
MCESEV(
|
||||
UCNA, "Uncorrected no action required",
|
||||
@ -149,15 +166,6 @@ static struct severity {
|
||||
SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR, MCI_UC_SAR)
|
||||
),
|
||||
|
||||
/* known AO MCACODs: */
|
||||
MCESEV(
|
||||
AO, "Action optional: memory scrubbing error",
|
||||
SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCACOD_SCRUBMSK, MCI_UC_S|MCACOD_SCRUB)
|
||||
),
|
||||
MCESEV(
|
||||
AO, "Action optional: last level cache writeback error",
|
||||
SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCACOD, MCI_UC_S|MCACOD_L3WB)
|
||||
),
|
||||
MCESEV(
|
||||
SOME, "Action optional: unknown MCACOD",
|
||||
SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR, MCI_UC_S)
|
||||
|
Loading…
Reference in New Issue
Block a user