x86/mce: Add infrastructure to support Local MCE

Initialize and prepare for handling LMCEs. Add a boot-time
option to disable LMCEs.

Signed-off-by: Ashok Raj <ashok.raj@intel.com>
[ Simplify stuff, align statements for better readability, reflow comments; kill
  unused lmce_clear(); save us an MSR write if LMCE is already enabled. ]
Signed-off-by: Borislav Petkov <bp@suse.de>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Tony Luck <tony.luck@intel.com>
Cc: linux-edac <linux-edac@vger.kernel.org>
Link: http://lkml.kernel.org/r/1433436928-31903-16-git-send-email-bp@alien8.de
Signed-off-by: Ingo Molnar <mingo@kernel.org>
This commit is contained in:
Ashok Raj 2015-06-04 18:55:23 +02:00 committed by Ingo Molnar
parent bc12edb873
commit 88d538672e
4 changed files with 54 additions and 0 deletions

View File

@ -31,6 +31,9 @@ Machine check
(e.g. BIOS or hardware monitoring applications), conflicting (e.g. BIOS or hardware monitoring applications), conflicting
with OS's error handling, and you cannot deactivate the agent, with OS's error handling, and you cannot deactivate the agent,
then this option will be a help. then this option will be a help.
mce=no_lmce
Do not opt-in to Local MCE delivery. Use legacy method
to broadcast MCEs.
mce=bootlog mce=bootlog
Enable logging of machine checks left over from booting. Enable logging of machine checks left over from booting.
Disabled by default on AMD because some BIOS leave bogus ones. Disabled by default on AMD because some BIOS leave bogus ones.

View File

@ -109,6 +109,7 @@ struct mce_log {
struct mca_config { struct mca_config {
bool dont_log_ce; bool dont_log_ce;
bool cmci_disabled; bool cmci_disabled;
bool lmce_disabled;
bool ignore_ce; bool ignore_ce;
bool disabled; bool disabled;
bool ser; bool ser;
@ -184,12 +185,16 @@ void cmci_clear(void);
void cmci_reenable(void); void cmci_reenable(void);
void cmci_rediscover(void); void cmci_rediscover(void);
void cmci_recheck(void); void cmci_recheck(void);
void lmce_clear(void);
void lmce_enable(void);
#else #else
static inline void mce_intel_feature_init(struct cpuinfo_x86 *c) { } static inline void mce_intel_feature_init(struct cpuinfo_x86 *c) { }
static inline void cmci_clear(void) {} static inline void cmci_clear(void) {}
static inline void cmci_reenable(void) {} static inline void cmci_reenable(void) {}
static inline void cmci_rediscover(void) {} static inline void cmci_rediscover(void) {}
static inline void cmci_recheck(void) {} static inline void cmci_recheck(void) {}
static inline void lmce_clear(void) {}
static inline void lmce_enable(void) {}
#endif #endif
#ifdef CONFIG_X86_MCE_AMD #ifdef CONFIG_X86_MCE_AMD

View File

@ -1982,6 +1982,7 @@ void mce_disable_bank(int bank)
/* /*
* mce=off Disables machine check * mce=off Disables machine check
* mce=no_cmci Disables CMCI * mce=no_cmci Disables CMCI
* mce=no_lmce Disables LMCE
* mce=dont_log_ce Clears corrected events silently, no log created for CEs. * mce=dont_log_ce Clears corrected events silently, no log created for CEs.
* mce=ignore_ce Disables polling and CMCI, corrected events are not cleared. * mce=ignore_ce Disables polling and CMCI, corrected events are not cleared.
* mce=TOLERANCELEVEL[,monarchtimeout] (number, see above) * mce=TOLERANCELEVEL[,monarchtimeout] (number, see above)
@ -2005,6 +2006,8 @@ static int __init mcheck_enable(char *str)
cfg->disabled = true; cfg->disabled = true;
else if (!strcmp(str, "no_cmci")) else if (!strcmp(str, "no_cmci"))
cfg->cmci_disabled = true; cfg->cmci_disabled = true;
else if (!strcmp(str, "no_lmce"))
cfg->lmce_disabled = true;
else if (!strcmp(str, "dont_log_ce")) else if (!strcmp(str, "dont_log_ce"))
cfg->dont_log_ce = true; cfg->dont_log_ce = true;
else if (!strcmp(str, "ignore_ce")) else if (!strcmp(str, "ignore_ce"))

View File

@ -91,6 +91,36 @@ static int cmci_supported(int *banks)
return !!(cap & MCG_CMCI_P); return !!(cap & MCG_CMCI_P);
} }
static bool lmce_supported(void)
{
u64 tmp;
if (mca_cfg.lmce_disabled)
return false;
rdmsrl(MSR_IA32_MCG_CAP, tmp);
/*
* LMCE depends on recovery support in the processor. Hence both
* MCG_SER_P and MCG_LMCE_P should be present in MCG_CAP.
*/
if ((tmp & (MCG_SER_P | MCG_LMCE_P)) !=
(MCG_SER_P | MCG_LMCE_P))
return false;
/*
* BIOS should indicate support for LMCE by setting bit 20 in
* IA32_FEATURE_CONTROL without which touching MCG_EXT_CTL will
* generate a #GP fault.
*/
rdmsrl(MSR_IA32_FEATURE_CONTROL, tmp);
if ((tmp & (FEATURE_CONTROL_LOCKED | FEATURE_CONTROL_LMCE)) ==
(FEATURE_CONTROL_LOCKED | FEATURE_CONTROL_LMCE))
return true;
return false;
}
bool mce_intel_cmci_poll(void) bool mce_intel_cmci_poll(void)
{ {
if (__this_cpu_read(cmci_storm_state) == CMCI_STORM_NONE) if (__this_cpu_read(cmci_storm_state) == CMCI_STORM_NONE)
@ -405,6 +435,19 @@ static void intel_init_cmci(void)
cmci_recheck(); cmci_recheck();
} }
void intel_init_lmce(void)
{
u64 val;
if (!lmce_supported())
return;
rdmsrl(MSR_IA32_MCG_EXT_CTL, val);
if (!(val & MCG_EXT_CTL_LMCE_EN))
wrmsrl(MSR_IA32_MCG_EXT_CTL, val | MCG_EXT_CTL_LMCE_EN);
}
void mce_intel_feature_init(struct cpuinfo_x86 *c) void mce_intel_feature_init(struct cpuinfo_x86 *c)
{ {
intel_init_thermal(c); intel_init_thermal(c);