Merge branch 'x86/ras' into x86/core, to fix conflicts

Conflicts:
	arch/x86/include/asm/irq_vectors.h

Signed-off-by: Ingo Molnar <mingo@kernel.org>
This commit is contained in:
Ingo Molnar 2015-06-07 15:35:27 +02:00
commit c2f9b0af8b
16 changed files with 271 additions and 52 deletions

View File

@ -31,6 +31,9 @@ Machine check
(e.g. BIOS or hardware monitoring applications), conflicting (e.g. BIOS or hardware monitoring applications), conflicting
with OS's error handling, and you cannot deactivate the agent, with OS's error handling, and you cannot deactivate the agent,
then this option will be a help. then this option will be a help.
mce=no_lmce
Do not opt-in to Local MCE delivery. Use legacy method
to broadcast MCEs.
mce=bootlog mce=bootlog
Enable logging of machine checks left over from booting. Enable logging of machine checks left over from booting.
Disabled by default on AMD because some BIOS leave bogus ones. Disabled by default on AMD because some BIOS leave bogus ones.

View File

@ -52,4 +52,7 @@ BUILD_INTERRUPT(thermal_interrupt,THERMAL_APIC_VECTOR)
BUILD_INTERRUPT(threshold_interrupt,THRESHOLD_APIC_VECTOR) BUILD_INTERRUPT(threshold_interrupt,THRESHOLD_APIC_VECTOR)
#endif #endif
#ifdef CONFIG_X86_MCE_AMD
BUILD_INTERRUPT(deferred_error_interrupt, DEFERRED_ERROR_VECTOR)
#endif
#endif #endif

View File

@ -34,6 +34,9 @@ typedef struct {
#ifdef CONFIG_X86_MCE_THRESHOLD #ifdef CONFIG_X86_MCE_THRESHOLD
unsigned int irq_threshold_count; unsigned int irq_threshold_count;
#endif #endif
#ifdef CONFIG_X86_MCE_AMD
unsigned int irq_deferred_error_count;
#endif
#if IS_ENABLED(CONFIG_HYPERV) || defined(CONFIG_XEN) #if IS_ENABLED(CONFIG_HYPERV) || defined(CONFIG_XEN)
unsigned int irq_hv_callback_count; unsigned int irq_hv_callback_count;
#endif #endif

View File

@ -40,6 +40,7 @@ extern asmlinkage void reschedule_interrupt(void);
extern asmlinkage void irq_move_cleanup_interrupt(void); extern asmlinkage void irq_move_cleanup_interrupt(void);
extern asmlinkage void reboot_interrupt(void); extern asmlinkage void reboot_interrupt(void);
extern asmlinkage void threshold_interrupt(void); extern asmlinkage void threshold_interrupt(void);
extern asmlinkage void deferred_error_interrupt(void);
extern asmlinkage void call_function_interrupt(void); extern asmlinkage void call_function_interrupt(void);
extern asmlinkage void call_function_single_interrupt(void); extern asmlinkage void call_function_single_interrupt(void);
@ -54,6 +55,7 @@ extern void trace_spurious_interrupt(void);
extern void trace_thermal_interrupt(void); extern void trace_thermal_interrupt(void);
extern void trace_reschedule_interrupt(void); extern void trace_reschedule_interrupt(void);
extern void trace_threshold_interrupt(void); extern void trace_threshold_interrupt(void);
extern void trace_deferred_error_interrupt(void);
extern void trace_call_function_interrupt(void); extern void trace_call_function_interrupt(void);
extern void trace_call_function_single_interrupt(void); extern void trace_call_function_single_interrupt(void);
#define trace_irq_move_cleanup_interrupt irq_move_cleanup_interrupt #define trace_irq_move_cleanup_interrupt irq_move_cleanup_interrupt

View File

@ -83,22 +83,23 @@
*/ */
#define X86_PLATFORM_IPI_VECTOR 0xf7 #define X86_PLATFORM_IPI_VECTOR 0xf7
/* Vector for KVM to deliver posted interrupt IPI */
#ifdef CONFIG_HAVE_KVM
#define POSTED_INTR_VECTOR 0xf2
#define POSTED_INTR_WAKEUP_VECTOR 0xf1 #define POSTED_INTR_WAKEUP_VECTOR 0xf1
#endif
/* /*
* IRQ work vector: * IRQ work vector:
*/ */
#define IRQ_WORK_VECTOR 0xf6 #define IRQ_WORK_VECTOR 0xf6
#define UV_BAU_MESSAGE 0xf5 #define UV_BAU_MESSAGE 0xf5
#define DEFERRED_ERROR_VECTOR 0xf4
/* Vector on which hypervisor callbacks will be delivered */ /* Vector on which hypervisor callbacks will be delivered */
#define HYPERVISOR_CALLBACK_VECTOR 0xf3 #define HYPERVISOR_CALLBACK_VECTOR 0xf3
/* Vector for KVM to deliver posted interrupt IPI */
#ifdef CONFIG_HAVE_KVM
#define POSTED_INTR_VECTOR 0xf2
#endif
/* /*
* Local APIC timer IRQ vector is on a different priority level, * Local APIC timer IRQ vector is on a different priority level,
* to work around the 'lost local interrupt if more than 2 IRQ * to work around the 'lost local interrupt if more than 2 IRQ

View File

@ -17,11 +17,16 @@
#define MCG_EXT_CNT(c) (((c) & MCG_EXT_CNT_MASK) >> MCG_EXT_CNT_SHIFT) #define MCG_EXT_CNT(c) (((c) & MCG_EXT_CNT_MASK) >> MCG_EXT_CNT_SHIFT)
#define MCG_SER_P (1ULL<<24) /* MCA recovery/new status bits */ #define MCG_SER_P (1ULL<<24) /* MCA recovery/new status bits */
#define MCG_ELOG_P (1ULL<<26) /* Extended error log supported */ #define MCG_ELOG_P (1ULL<<26) /* Extended error log supported */
#define MCG_LMCE_P (1ULL<<27) /* Local machine check supported */
/* MCG_STATUS register defines */ /* MCG_STATUS register defines */
#define MCG_STATUS_RIPV (1ULL<<0) /* restart ip valid */ #define MCG_STATUS_RIPV (1ULL<<0) /* restart ip valid */
#define MCG_STATUS_EIPV (1ULL<<1) /* ip points to correct instruction */ #define MCG_STATUS_EIPV (1ULL<<1) /* ip points to correct instruction */
#define MCG_STATUS_MCIP (1ULL<<2) /* machine check in progress */ #define MCG_STATUS_MCIP (1ULL<<2) /* machine check in progress */
#define MCG_STATUS_LMCES (1ULL<<3) /* LMCE signaled */
/* MCG_EXT_CTL register defines */
#define MCG_EXT_CTL_LMCE_EN (1ULL<<0) /* Enable LMCE */
/* MCi_STATUS register defines */ /* MCi_STATUS register defines */
#define MCI_STATUS_VAL (1ULL<<63) /* valid error */ #define MCI_STATUS_VAL (1ULL<<63) /* valid error */
@ -104,6 +109,7 @@ struct mce_log {
struct mca_config { struct mca_config {
bool dont_log_ce; bool dont_log_ce;
bool cmci_disabled; bool cmci_disabled;
bool lmce_disabled;
bool ignore_ce; bool ignore_ce;
bool disabled; bool disabled;
bool ser; bool ser;
@ -117,8 +123,19 @@ struct mca_config {
}; };
struct mce_vendor_flags { struct mce_vendor_flags {
__u64 overflow_recov : 1, /* cpuid_ebx(80000007) */ /*
__reserved_0 : 63; * overflow recovery cpuid bit indicates that overflow
* conditions are not fatal
*/
__u64 overflow_recov : 1,
/*
* SUCCOR stands for S/W UnCorrectable error COntainment
* and Recovery. It indicates support for data poisoning
* in HW and deferred error interrupts.
*/
succor : 1,
__reserved_0 : 62;
}; };
extern struct mce_vendor_flags mce_flags; extern struct mce_vendor_flags mce_flags;
@ -168,12 +185,16 @@ void cmci_clear(void);
void cmci_reenable(void); void cmci_reenable(void);
void cmci_rediscover(void); void cmci_rediscover(void);
void cmci_recheck(void); void cmci_recheck(void);
void lmce_clear(void);
void lmce_enable(void);
#else #else
static inline void mce_intel_feature_init(struct cpuinfo_x86 *c) { } static inline void mce_intel_feature_init(struct cpuinfo_x86 *c) { }
static inline void cmci_clear(void) {} static inline void cmci_clear(void) {}
static inline void cmci_reenable(void) {} static inline void cmci_reenable(void) {}
static inline void cmci_rediscover(void) {} static inline void cmci_rediscover(void) {}
static inline void cmci_recheck(void) {} static inline void cmci_recheck(void) {}
static inline void lmce_clear(void) {}
static inline void lmce_enable(void) {}
#endif #endif
#ifdef CONFIG_X86_MCE_AMD #ifdef CONFIG_X86_MCE_AMD
@ -223,6 +244,9 @@ void do_machine_check(struct pt_regs *, long);
extern void (*mce_threshold_vector)(void); extern void (*mce_threshold_vector)(void);
extern void (*threshold_cpu_callback)(unsigned long action, unsigned int cpu); extern void (*threshold_cpu_callback)(unsigned long action, unsigned int cpu);
/* Deferred error interrupt handler */
extern void (*deferred_error_int_vector)(void);
/* /*
* Thermal handler * Thermal handler
*/ */

View File

@ -100,6 +100,12 @@ DEFINE_IRQ_VECTOR_EVENT(call_function_single);
*/ */
DEFINE_IRQ_VECTOR_EVENT(threshold_apic); DEFINE_IRQ_VECTOR_EVENT(threshold_apic);
/*
* deferred_error_apic - called when entering/exiting a deferred apic interrupt
* vector handler
*/
DEFINE_IRQ_VECTOR_EVENT(deferred_error_apic);
/* /*
* thermal_apic - called when entering/exiting a thermal apic interrupt * thermal_apic - called when entering/exiting a thermal apic interrupt
* vector handler * vector handler

View File

@ -108,7 +108,8 @@ extern int panic_on_unrecovered_nmi;
void math_emulate(struct math_emu_info *); void math_emulate(struct math_emu_info *);
#ifndef CONFIG_X86_32 #ifndef CONFIG_X86_32
asmlinkage void smp_thermal_interrupt(void); asmlinkage void smp_thermal_interrupt(void);
asmlinkage void mce_threshold_interrupt(void); asmlinkage void smp_threshold_interrupt(void);
asmlinkage void smp_deferred_error_interrupt(void);
#endif #endif
extern enum ctx_state ist_enter(struct pt_regs *regs); extern enum ctx_state ist_enter(struct pt_regs *regs);

View File

@ -56,6 +56,7 @@
#define MSR_IA32_MCG_CAP 0x00000179 #define MSR_IA32_MCG_CAP 0x00000179
#define MSR_IA32_MCG_STATUS 0x0000017a #define MSR_IA32_MCG_STATUS 0x0000017a
#define MSR_IA32_MCG_CTL 0x0000017b #define MSR_IA32_MCG_CTL 0x0000017b
#define MSR_IA32_MCG_EXT_CTL 0x000004d0
#define MSR_OFFCORE_RSP_0 0x000001a6 #define MSR_OFFCORE_RSP_0 0x000001a6
#define MSR_OFFCORE_RSP_1 0x000001a7 #define MSR_OFFCORE_RSP_1 0x000001a7
@ -380,6 +381,7 @@
#define FEATURE_CONTROL_LOCKED (1<<0) #define FEATURE_CONTROL_LOCKED (1<<0)
#define FEATURE_CONTROL_VMXON_ENABLED_INSIDE_SMX (1<<1) #define FEATURE_CONTROL_VMXON_ENABLED_INSIDE_SMX (1<<1)
#define FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX (1<<2) #define FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX (1<<2)
#define FEATURE_CONTROL_LMCE (1<<20)
#define MSR_IA32_APICBASE 0x0000001b #define MSR_IA32_APICBASE 0x0000001b
#define MSR_IA32_APICBASE_BSP (1<<8) #define MSR_IA32_APICBASE_BSP (1<<8)

View File

@ -1050,6 +1050,7 @@ void do_machine_check(struct pt_regs *regs, long error_code)
char *msg = "Unknown"; char *msg = "Unknown";
u64 recover_paddr = ~0ull; u64 recover_paddr = ~0ull;
int flags = MF_ACTION_REQUIRED; int flags = MF_ACTION_REQUIRED;
int lmce = 0;
prev_state = ist_enter(regs); prev_state = ist_enter(regs);
@ -1077,11 +1078,20 @@ void do_machine_check(struct pt_regs *regs, long error_code)
kill_it = 1; kill_it = 1;
/* /*
* Go through all the banks in exclusion of the other CPUs. * Check if this MCE is signaled to only this logical processor
* This way we don't report duplicated events on shared banks
* because the first one to see it will clear it.
*/ */
order = mce_start(&no_way_out); if (m.mcgstatus & MCG_STATUS_LMCES)
lmce = 1;
else {
/*
* Go through all the banks in exclusion of the other CPUs.
* This way we don't report duplicated events on shared banks
* because the first one to see it will clear it.
* If this is a Local MCE, then no need to perform rendezvous.
*/
order = mce_start(&no_way_out);
}
for (i = 0; i < cfg->banks; i++) { for (i = 0; i < cfg->banks; i++) {
__clear_bit(i, toclear); __clear_bit(i, toclear);
if (!test_bit(i, valid_banks)) if (!test_bit(i, valid_banks))
@ -1158,8 +1168,18 @@ void do_machine_check(struct pt_regs *regs, long error_code)
* Do most of the synchronization with other CPUs. * Do most of the synchronization with other CPUs.
* When there's any problem use only local no_way_out state. * When there's any problem use only local no_way_out state.
*/ */
if (mce_end(order) < 0) if (!lmce) {
no_way_out = worst >= MCE_PANIC_SEVERITY; if (mce_end(order) < 0)
no_way_out = worst >= MCE_PANIC_SEVERITY;
} else {
/*
* Local MCE skipped calling mce_reign()
* If we found a fatal error, we need to panic here.
*/
if (worst >= MCE_PANIC_SEVERITY && mca_cfg.tolerant < 3)
mce_panic("Machine check from unknown source",
NULL, NULL);
}
/* /*
* At insane "tolerant" levels we take no action. Otherwise * At insane "tolerant" levels we take no action. Otherwise
@ -1640,10 +1660,16 @@ static void __mcheck_cpu_init_vendor(struct cpuinfo_x86 *c)
mce_intel_feature_init(c); mce_intel_feature_init(c);
mce_adjust_timer = cmci_intel_adjust_timer; mce_adjust_timer = cmci_intel_adjust_timer;
break; break;
case X86_VENDOR_AMD:
case X86_VENDOR_AMD: {
u32 ebx = cpuid_ebx(0x80000007);
mce_amd_feature_init(c); mce_amd_feature_init(c);
mce_flags.overflow_recov = cpuid_ebx(0x80000007) & 0x1; mce_flags.overflow_recov = !!(ebx & BIT(0));
mce_flags.succor = !!(ebx & BIT(1));
break; break;
}
default: default:
break; break;
} }
@ -1979,6 +2005,7 @@ void mce_disable_bank(int bank)
/* /*
* mce=off Disables machine check * mce=off Disables machine check
* mce=no_cmci Disables CMCI * mce=no_cmci Disables CMCI
* mce=no_lmce Disables LMCE
* mce=dont_log_ce Clears corrected events silently, no log created for CEs. * mce=dont_log_ce Clears corrected events silently, no log created for CEs.
* mce=ignore_ce Disables polling and CMCI, corrected events are not cleared. * mce=ignore_ce Disables polling and CMCI, corrected events are not cleared.
* mce=TOLERANCELEVEL[,monarchtimeout] (number, see above) * mce=TOLERANCELEVEL[,monarchtimeout] (number, see above)
@ -2002,6 +2029,8 @@ static int __init mcheck_enable(char *str)
cfg->disabled = true; cfg->disabled = true;
else if (!strcmp(str, "no_cmci")) else if (!strcmp(str, "no_cmci"))
cfg->cmci_disabled = true; cfg->cmci_disabled = true;
else if (!strcmp(str, "no_lmce"))
cfg->lmce_disabled = true;
else if (!strcmp(str, "dont_log_ce")) else if (!strcmp(str, "dont_log_ce"))
cfg->dont_log_ce = true; cfg->dont_log_ce = true;
else if (!strcmp(str, "ignore_ce")) else if (!strcmp(str, "ignore_ce"))
@ -2011,11 +2040,8 @@ static int __init mcheck_enable(char *str)
else if (!strcmp(str, "bios_cmci_threshold")) else if (!strcmp(str, "bios_cmci_threshold"))
cfg->bios_cmci_threshold = true; cfg->bios_cmci_threshold = true;
else if (isdigit(str[0])) { else if (isdigit(str[0])) {
get_option(&str, &(cfg->tolerant)); if (get_option(&str, &cfg->tolerant) == 2)
if (*str == ',') {
++str;
get_option(&str, &(cfg->monarch_timeout)); get_option(&str, &(cfg->monarch_timeout));
}
} else { } else {
pr_info("mce argument %s ignored. Please use /sys\n", str); pr_info("mce argument %s ignored. Please use /sys\n", str);
return 0; return 0;

View File

@ -1,19 +1,13 @@
/* /*
* (c) 2005-2012 Advanced Micro Devices, Inc. * (c) 2005-2015 Advanced Micro Devices, Inc.
* Your use of this code is subject to the terms and conditions of the * Your use of this code is subject to the terms and conditions of the
* GNU general public license version 2. See "COPYING" or * GNU general public license version 2. See "COPYING" or
* http://www.gnu.org/licenses/gpl.html * http://www.gnu.org/licenses/gpl.html
* *
* Written by Jacob Shin - AMD, Inc. * Written by Jacob Shin - AMD, Inc.
*
* Maintained by: Borislav Petkov <bp@alien8.de> * Maintained by: Borislav Petkov <bp@alien8.de>
* *
* April 2006 * All MC4_MISCi registers are shared between cores on a node.
* - added support for AMD Family 0x10 processors
* May 2012
* - major scrubbing
*
* All MC4_MISCi registers are shared between multi-cores
*/ */
#include <linux/interrupt.h> #include <linux/interrupt.h>
#include <linux/notifier.h> #include <linux/notifier.h>
@ -32,6 +26,7 @@
#include <asm/idle.h> #include <asm/idle.h>
#include <asm/mce.h> #include <asm/mce.h>
#include <asm/msr.h> #include <asm/msr.h>
#include <asm/trace/irq_vectors.h>
#define NR_BLOCKS 9 #define NR_BLOCKS 9
#define THRESHOLD_MAX 0xFFF #define THRESHOLD_MAX 0xFFF
@ -47,6 +42,13 @@
#define MASK_BLKPTR_LO 0xFF000000 #define MASK_BLKPTR_LO 0xFF000000
#define MCG_XBLK_ADDR 0xC0000400 #define MCG_XBLK_ADDR 0xC0000400
/* Deferred error settings */
#define MSR_CU_DEF_ERR 0xC0000410
#define MASK_DEF_LVTOFF 0x000000F0
#define MASK_DEF_INT_TYPE 0x00000006
#define DEF_LVT_OFF 0x2
#define DEF_INT_TYPE_APIC 0x2
static const char * const th_names[] = { static const char * const th_names[] = {
"load_store", "load_store",
"insn_fetch", "insn_fetch",
@ -60,6 +62,13 @@ static DEFINE_PER_CPU(struct threshold_bank **, threshold_banks);
static DEFINE_PER_CPU(unsigned char, bank_map); /* see which banks are on */ static DEFINE_PER_CPU(unsigned char, bank_map); /* see which banks are on */
static void amd_threshold_interrupt(void); static void amd_threshold_interrupt(void);
static void amd_deferred_error_interrupt(void);
static void default_deferred_error_interrupt(void)
{
pr_err("Unexpected deferred interrupt at vector %x\n", DEFERRED_ERROR_VECTOR);
}
void (*deferred_error_int_vector)(void) = default_deferred_error_interrupt;
/* /*
* CPU Initialization * CPU Initialization
@ -196,7 +205,7 @@ static void mce_threshold_block_init(struct threshold_block *b, int offset)
threshold_restart_bank(&tr); threshold_restart_bank(&tr);
}; };
static int setup_APIC_mce(int reserved, int new) static int setup_APIC_mce_threshold(int reserved, int new)
{ {
if (reserved < 0 && !setup_APIC_eilvt(new, THRESHOLD_APIC_VECTOR, if (reserved < 0 && !setup_APIC_eilvt(new, THRESHOLD_APIC_VECTOR,
APIC_EILVT_MSG_FIX, 0)) APIC_EILVT_MSG_FIX, 0))
@ -205,6 +214,39 @@ static int setup_APIC_mce(int reserved, int new)
return reserved; return reserved;
} }
static int setup_APIC_deferred_error(int reserved, int new)
{
if (reserved < 0 && !setup_APIC_eilvt(new, DEFERRED_ERROR_VECTOR,
APIC_EILVT_MSG_FIX, 0))
return new;
return reserved;
}
static void deferred_error_interrupt_enable(struct cpuinfo_x86 *c)
{
u32 low = 0, high = 0;
int def_offset = -1, def_new;
if (rdmsr_safe(MSR_CU_DEF_ERR, &low, &high))
return;
def_new = (low & MASK_DEF_LVTOFF) >> 4;
if (!(low & MASK_DEF_LVTOFF)) {
pr_err(FW_BUG "Your BIOS is not setting up LVT offset 0x2 for deferred error IRQs correctly.\n");
def_new = DEF_LVT_OFF;
low = (low & ~MASK_DEF_LVTOFF) | (DEF_LVT_OFF << 4);
}
def_offset = setup_APIC_deferred_error(def_offset, def_new);
if ((def_offset == def_new) &&
(deferred_error_int_vector != amd_deferred_error_interrupt))
deferred_error_int_vector = amd_deferred_error_interrupt;
low = (low & ~MASK_DEF_INT_TYPE) | DEF_INT_TYPE_APIC;
wrmsr(MSR_CU_DEF_ERR, low, high);
}
/* cpu init entry point, called from mce.c with preempt off */ /* cpu init entry point, called from mce.c with preempt off */
void mce_amd_feature_init(struct cpuinfo_x86 *c) void mce_amd_feature_init(struct cpuinfo_x86 *c)
{ {
@ -252,7 +294,7 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c)
b.interrupt_enable = 1; b.interrupt_enable = 1;
new = (high & MASK_LVTOFF_HI) >> 20; new = (high & MASK_LVTOFF_HI) >> 20;
offset = setup_APIC_mce(offset, new); offset = setup_APIC_mce_threshold(offset, new);
if ((offset == new) && if ((offset == new) &&
(mce_threshold_vector != amd_threshold_interrupt)) (mce_threshold_vector != amd_threshold_interrupt))
@ -262,6 +304,73 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c)
mce_threshold_block_init(&b, offset); mce_threshold_block_init(&b, offset);
} }
} }
if (mce_flags.succor)
deferred_error_interrupt_enable(c);
}
static void __log_error(unsigned int bank, bool threshold_err, u64 misc)
{
struct mce m;
u64 status;
rdmsrl(MSR_IA32_MCx_STATUS(bank), status);
if (!(status & MCI_STATUS_VAL))
return;
mce_setup(&m);
m.status = status;
m.bank = bank;
if (threshold_err)
m.misc = misc;
if (m.status & MCI_STATUS_ADDRV)
rdmsrl(MSR_IA32_MCx_ADDR(bank), m.addr);
mce_log(&m);
wrmsrl(MSR_IA32_MCx_STATUS(bank), 0);
}
static inline void __smp_deferred_error_interrupt(void)
{
inc_irq_stat(irq_deferred_error_count);
deferred_error_int_vector();
}
asmlinkage __visible void smp_deferred_error_interrupt(void)
{
entering_irq();
__smp_deferred_error_interrupt();
exiting_ack_irq();
}
asmlinkage __visible void smp_trace_deferred_error_interrupt(void)
{
entering_irq();
trace_deferred_error_apic_entry(DEFERRED_ERROR_VECTOR);
__smp_deferred_error_interrupt();
trace_deferred_error_apic_exit(DEFERRED_ERROR_VECTOR);
exiting_ack_irq();
}
/* APIC interrupt handler for deferred errors */
static void amd_deferred_error_interrupt(void)
{
u64 status;
unsigned int bank;
for (bank = 0; bank < mca_cfg.banks; ++bank) {
rdmsrl(MSR_IA32_MCx_STATUS(bank), status);
if (!(status & MCI_STATUS_VAL) ||
!(status & MCI_STATUS_DEFERRED))
continue;
__log_error(bank, false, 0);
break;
}
} }
/* /*
@ -273,12 +382,12 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c)
* the interrupt goes off when error_count reaches threshold_limit. * the interrupt goes off when error_count reaches threshold_limit.
* the handler will simply log mcelog w/ software defined bank number. * the handler will simply log mcelog w/ software defined bank number.
*/ */
static void amd_threshold_interrupt(void) static void amd_threshold_interrupt(void)
{ {
u32 low = 0, high = 0, address = 0; u32 low = 0, high = 0, address = 0;
int cpu = smp_processor_id(); int cpu = smp_processor_id();
unsigned int bank, block; unsigned int bank, block;
struct mce m;
/* assume first bank caused it */ /* assume first bank caused it */
for (bank = 0; bank < mca_cfg.banks; ++bank) { for (bank = 0; bank < mca_cfg.banks; ++bank) {
@ -321,15 +430,7 @@ static void amd_threshold_interrupt(void)
return; return;
log: log:
mce_setup(&m); __log_error(bank, true, ((u64)high << 32) | low);
rdmsrl(MSR_IA32_MCx_STATUS(bank), m.status);
if (!(m.status & MCI_STATUS_VAL))
return;
m.misc = ((u64)high << 32) | low;
m.bank = bank;
mce_log(&m);
wrmsrl(MSR_IA32_MCx_STATUS(bank), 0);
} }
/* /*

View File

@ -91,6 +91,36 @@ static int cmci_supported(int *banks)
return !!(cap & MCG_CMCI_P); return !!(cap & MCG_CMCI_P);
} }
static bool lmce_supported(void)
{
u64 tmp;
if (mca_cfg.lmce_disabled)
return false;
rdmsrl(MSR_IA32_MCG_CAP, tmp);
/*
* LMCE depends on recovery support in the processor. Hence both
* MCG_SER_P and MCG_LMCE_P should be present in MCG_CAP.
*/
if ((tmp & (MCG_SER_P | MCG_LMCE_P)) !=
(MCG_SER_P | MCG_LMCE_P))
return false;
/*
* BIOS should indicate support for LMCE by setting bit 20 in
* IA32_FEATURE_CONTROL without which touching MCG_EXT_CTL will
* generate a #GP fault.
*/
rdmsrl(MSR_IA32_FEATURE_CONTROL, tmp);
if ((tmp & (FEATURE_CONTROL_LOCKED | FEATURE_CONTROL_LMCE)) ==
(FEATURE_CONTROL_LOCKED | FEATURE_CONTROL_LMCE))
return true;
return false;
}
bool mce_intel_cmci_poll(void) bool mce_intel_cmci_poll(void)
{ {
if (__this_cpu_read(cmci_storm_state) == CMCI_STORM_NONE) if (__this_cpu_read(cmci_storm_state) == CMCI_STORM_NONE)
@ -405,8 +435,22 @@ static void intel_init_cmci(void)
cmci_recheck(); cmci_recheck();
} }
void intel_init_lmce(void)
{
u64 val;
if (!lmce_supported())
return;
rdmsrl(MSR_IA32_MCG_EXT_CTL, val);
if (!(val & MCG_EXT_CTL_LMCE_EN))
wrmsrl(MSR_IA32_MCG_EXT_CTL, val | MCG_EXT_CTL_LMCE_EN);
}
void mce_intel_feature_init(struct cpuinfo_x86 *c) void mce_intel_feature_init(struct cpuinfo_x86 *c)
{ {
intel_init_thermal(c); intel_init_thermal(c);
intel_init_cmci(); intel_init_cmci();
intel_init_lmce();
} }

View File

@ -925,6 +925,11 @@ apicinterrupt THRESHOLD_APIC_VECTOR \
threshold_interrupt smp_threshold_interrupt threshold_interrupt smp_threshold_interrupt
#endif #endif
#ifdef CONFIG_X86_MCE_AMD
apicinterrupt DEFERRED_ERROR_VECTOR \
deferred_error_interrupt smp_deferred_error_interrupt
#endif
#ifdef CONFIG_X86_THERMAL_VECTOR #ifdef CONFIG_X86_THERMAL_VECTOR
apicinterrupt THERMAL_APIC_VECTOR \ apicinterrupt THERMAL_APIC_VECTOR \
thermal_interrupt smp_thermal_interrupt thermal_interrupt smp_thermal_interrupt

View File

@ -122,6 +122,12 @@ int arch_show_interrupts(struct seq_file *p, int prec)
seq_printf(p, "%10u ", irq_stats(j)->irq_threshold_count); seq_printf(p, "%10u ", irq_stats(j)->irq_threshold_count);
seq_puts(p, " Threshold APIC interrupts\n"); seq_puts(p, " Threshold APIC interrupts\n");
#endif #endif
#ifdef CONFIG_X86_MCE_AMD
seq_printf(p, "%*s: ", prec, "DFR");
for_each_online_cpu(j)
seq_printf(p, "%10u ", irq_stats(j)->irq_deferred_error_count);
seq_puts(p, " Deferred Error APIC interrupts\n");
#endif
#ifdef CONFIG_X86_MCE #ifdef CONFIG_X86_MCE
seq_printf(p, "%*s: ", prec, "MCE"); seq_printf(p, "%*s: ", prec, "MCE");
for_each_online_cpu(j) for_each_online_cpu(j)

View File

@ -135,6 +135,10 @@ static void __init apic_intr_init(void)
alloc_intr_gate(THRESHOLD_APIC_VECTOR, threshold_interrupt); alloc_intr_gate(THRESHOLD_APIC_VECTOR, threshold_interrupt);
#endif #endif
#ifdef CONFIG_X86_MCE_AMD
alloc_intr_gate(DEFERRED_ERROR_VECTOR, deferred_error_interrupt);
#endif
#ifdef CONFIG_X86_LOCAL_APIC #ifdef CONFIG_X86_LOCAL_APIC
/* self generated IPI for local APIC timer */ /* self generated IPI for local APIC timer */
alloc_intr_gate(LOCAL_TIMER_VECTOR, apic_timer_interrupt); alloc_intr_gate(LOCAL_TIMER_VECTOR, apic_timer_interrupt);

View File

@ -813,18 +813,6 @@ dotraplinkage void
do_spurious_interrupt_bug(struct pt_regs *regs, long error_code) do_spurious_interrupt_bug(struct pt_regs *regs, long error_code)
{ {
conditional_sti(regs); conditional_sti(regs);
#if 0
/* No need to warn about this any longer. */
pr_info("Ignoring P6 Local APIC Spurious Interrupt Bug...\n");
#endif
}
asmlinkage __visible void __attribute__((weak)) smp_thermal_interrupt(void)
{
}
asmlinkage __visible void __attribute__((weak)) smp_threshold_interrupt(void)
{
} }
/* /*