mirror of
https://github.com/AuxXxilium/linux_dsm_epyc7002.git
synced 2025-01-19 11:46:05 +07:00
[PATCH] x86_64: Don't run NMI watchdog during machine checks
Machine checks can stall the machine for a long time and it's not good to trigger the nmi watchdog during that. Signed-off-by: Andi Kleen <ak@suse.de> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
This commit is contained in:
parent
be56db6186
commit
553f265fe8
@ -29,6 +29,8 @@
|
||||
#define MISC_MCELOG_MINOR 227
|
||||
#define NR_BANKS 6
|
||||
|
||||
atomic_t mce_entry;
|
||||
|
||||
static int mce_dont_init;
|
||||
|
||||
/* 0: always panic, 1: panic if deadlock possible, 2: try to avoid panic,
|
||||
@ -172,10 +174,12 @@ void do_machine_check(struct pt_regs * regs, long error_code)
|
||||
int i;
|
||||
int panicm_found = 0;
|
||||
|
||||
atomic_inc(&mce_entry);
|
||||
|
||||
if (regs)
|
||||
notify_die(DIE_NMI, "machine check", regs, error_code, 18, SIGKILL);
|
||||
if (!banks)
|
||||
return;
|
||||
goto out2;
|
||||
|
||||
memset(&m, 0, sizeof(struct mce));
|
||||
m.cpu = safe_smp_processor_id();
|
||||
@ -266,6 +270,8 @@ void do_machine_check(struct pt_regs * regs, long error_code)
|
||||
out:
|
||||
/* Last thing done in the machine check exception to clear state. */
|
||||
wrmsrl(MSR_IA32_MCG_STATUS, 0);
|
||||
out2:
|
||||
atomic_dec(&mce_entry);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -34,6 +34,7 @@
|
||||
#include <asm/proto.h>
|
||||
#include <asm/kdebug.h>
|
||||
#include <asm/local.h>
|
||||
#include <asm/mce.h>
|
||||
|
||||
/*
|
||||
* lapic_nmi_owner tracks the ownership of the lapic NMI hardware:
|
||||
@ -480,6 +481,12 @@ void __kprobes nmi_watchdog_tick(struct pt_regs * regs, unsigned reason)
|
||||
__get_cpu_var(nmi_touch) = 0;
|
||||
touched = 1;
|
||||
}
|
||||
#ifdef CONFIG_X86_MCE
|
||||
/* Could check oops_in_progress here too, but it's safer
|
||||
not too */
|
||||
if (atomic_read(&mce_entry) > 0)
|
||||
touched = 1;
|
||||
#endif
|
||||
if (!touched && __get_cpu_var(last_irq_sum) == sum) {
|
||||
/*
|
||||
* Ayiee, looks like this CPU is stuck ...
|
||||
|
@ -70,6 +70,9 @@ struct mce_log {
|
||||
#define MCE_THRESHOLD_BASE MCE_EXTENDED_BANK + 1 /* MCE_AMD */
|
||||
#define MCE_THRESHOLD_DRAM_ECC MCE_THRESHOLD_BASE + 4
|
||||
|
||||
#ifdef __KERNEL__
|
||||
#include <asm/atomic.h>
|
||||
|
||||
void mce_log(struct mce *m);
|
||||
#ifdef CONFIG_X86_MCE_INTEL
|
||||
void mce_intel_feature_init(struct cpuinfo_x86 *c);
|
||||
@ -87,4 +90,8 @@ static inline void mce_amd_feature_init(struct cpuinfo_x86 *c)
|
||||
}
|
||||
#endif
|
||||
|
||||
extern atomic_t mce_entry;
|
||||
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
Loading…
Reference in New Issue
Block a user