x86/mce: Check for faults tagged in EXTABLE_CLASS_FAULT exception table entries

Extend the severity checking code to add a new context IN_KERN_RECOV
which is used to indicate that the machine check was triggered by code
in the kernel tagged with _ASM_EXTABLE_FAULT() so that the ex_handler_fault()
handler will provide the fixup code with the trap number.

Major re-work to the tail code in do_machine_check() to make all this
readable/maintainable. One functional change is that tolerant=3 no longer
stops recovery actions. Revert to only skipping sending SIGBUS to the
current process.

Signed-off-by: Tony Luck <tony.luck@intel.com>
Reviewed-by: Borislav Petkov <bp@suse.de>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/89d243d05a7943bb187d1074bb30d9c4f482d5f5.1455732970.git.tony.luck@intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
This commit is contained in:
Tony Luck 2016-02-17 10:20:13 -08:00 committed by Ingo Molnar
parent 548acf1923
commit b2f9d678e2
2 changed files with 56 additions and 36 deletions

View File

@ -14,6 +14,7 @@
#include <linux/init.h> #include <linux/init.h>
#include <linux/debugfs.h> #include <linux/debugfs.h>
#include <asm/mce.h> #include <asm/mce.h>
#include <asm/uaccess.h>
#include "mce-internal.h" #include "mce-internal.h"
@ -29,7 +30,7 @@
* panic situations) * panic situations)
*/ */
enum context { IN_KERNEL = 1, IN_USER = 2 }; enum context { IN_KERNEL = 1, IN_USER = 2, IN_KERNEL_RECOV = 3 };
enum ser { SER_REQUIRED = 1, NO_SER = 2 }; enum ser { SER_REQUIRED = 1, NO_SER = 2 };
enum exception { EXCP_CONTEXT = 1, NO_EXCP = 2 }; enum exception { EXCP_CONTEXT = 1, NO_EXCP = 2 };
@ -48,6 +49,7 @@ static struct severity {
#define MCESEV(s, m, c...) { .sev = MCE_ ## s ## _SEVERITY, .msg = m, ## c } #define MCESEV(s, m, c...) { .sev = MCE_ ## s ## _SEVERITY, .msg = m, ## c }
#define KERNEL .context = IN_KERNEL #define KERNEL .context = IN_KERNEL
#define USER .context = IN_USER #define USER .context = IN_USER
#define KERNEL_RECOV .context = IN_KERNEL_RECOV
#define SER .ser = SER_REQUIRED #define SER .ser = SER_REQUIRED
#define NOSER .ser = NO_SER #define NOSER .ser = NO_SER
#define EXCP .excp = EXCP_CONTEXT #define EXCP .excp = EXCP_CONTEXT
@ -86,6 +88,10 @@ static struct severity {
PANIC, "In kernel and no restart IP", PANIC, "In kernel and no restart IP",
EXCP, KERNEL, MCGMASK(MCG_STATUS_RIPV, 0) EXCP, KERNEL, MCGMASK(MCG_STATUS_RIPV, 0)
), ),
MCESEV(
PANIC, "In kernel and no restart IP",
EXCP, KERNEL_RECOV, MCGMASK(MCG_STATUS_RIPV, 0)
),
MCESEV( MCESEV(
DEFERRED, "Deferred error", DEFERRED, "Deferred error",
NOSER, MASK(MCI_STATUS_UC|MCI_STATUS_DEFERRED|MCI_STATUS_POISON, MCI_STATUS_DEFERRED) NOSER, MASK(MCI_STATUS_UC|MCI_STATUS_DEFERRED|MCI_STATUS_POISON, MCI_STATUS_DEFERRED)
@ -122,6 +128,11 @@ static struct severity {
SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR, MCI_UC_SAR|MCI_ADDR), SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR, MCI_UC_SAR|MCI_ADDR),
MCGMASK(MCG_STATUS_RIPV|MCG_STATUS_EIPV, MCG_STATUS_RIPV) MCGMASK(MCG_STATUS_RIPV|MCG_STATUS_EIPV, MCG_STATUS_RIPV)
), ),
MCESEV(
AR, "Action required: data load in error recoverable area of kernel",
SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|MCACOD_DATA),
KERNEL_RECOV
),
MCESEV( MCESEV(
AR, "Action required: data load error in a user process", AR, "Action required: data load error in a user process",
SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|MCACOD_DATA), SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|MCACOD_DATA),
@ -170,6 +181,9 @@ static struct severity {
) /* always matches. keep at end */ ) /* always matches. keep at end */
}; };
#define mc_recoverable(mcg) (((mcg) & (MCG_STATUS_RIPV|MCG_STATUS_EIPV)) == \
(MCG_STATUS_RIPV|MCG_STATUS_EIPV))
/* /*
* If mcgstatus indicated that ip/cs on the stack were * If mcgstatus indicated that ip/cs on the stack were
* no good, then "m->cs" will be zero and we will have * no good, then "m->cs" will be zero and we will have
@ -183,7 +197,11 @@ static struct severity {
*/ */
static int error_context(struct mce *m) static int error_context(struct mce *m)
{ {
return ((m->cs & 3) == 3) ? IN_USER : IN_KERNEL; if ((m->cs & 3) == 3)
return IN_USER;
if (mc_recoverable(m->mcgstatus) && ex_has_fault_handler(m->ip))
return IN_KERNEL_RECOV;
return IN_KERNEL;
} }
/* /*

View File

@ -961,6 +961,20 @@ static void mce_clear_state(unsigned long *toclear)
} }
} }
static int do_memory_failure(struct mce *m)
{
int flags = MF_ACTION_REQUIRED;
int ret;
pr_err("Uncorrected hardware memory error in user-access at %llx", m->addr);
if (!(m->mcgstatus & MCG_STATUS_RIPV))
flags |= MF_MUST_KILL;
ret = memory_failure(m->addr >> PAGE_SHIFT, MCE_VECTOR, flags);
if (ret)
pr_err("Memory error not recovered");
return ret;
}
/* /*
* The actual machine check handler. This only handles real * The actual machine check handler. This only handles real
* exceptions when something got corrupted coming in through int 18. * exceptions when something got corrupted coming in through int 18.
@ -998,8 +1012,6 @@ void do_machine_check(struct pt_regs *regs, long error_code)
DECLARE_BITMAP(toclear, MAX_NR_BANKS); DECLARE_BITMAP(toclear, MAX_NR_BANKS);
DECLARE_BITMAP(valid_banks, MAX_NR_BANKS); DECLARE_BITMAP(valid_banks, MAX_NR_BANKS);
char *msg = "Unknown"; char *msg = "Unknown";
u64 recover_paddr = ~0ull;
int flags = MF_ACTION_REQUIRED;
int lmce = 0; int lmce = 0;
/* If this CPU is offline, just bail out. */ /* If this CPU is offline, just bail out. */
@ -1136,22 +1148,13 @@ void do_machine_check(struct pt_regs *regs, long error_code)
} }
/* /*
* At insane "tolerant" levels we take no action. Otherwise * If tolerant is at an insane level we drop requests to kill
* we only die if we have no other choice. For less serious * processes and continue even when there is no way out.
* issues we try to recover, or limit damage to the current
* process.
*/ */
if (cfg->tolerant < 3) { if (cfg->tolerant == 3)
if (no_way_out) kill_it = 0;
else if (no_way_out)
mce_panic("Fatal machine check on current CPU", &m, msg); mce_panic("Fatal machine check on current CPU", &m, msg);
if (worst == MCE_AR_SEVERITY) {
recover_paddr = m.addr;
if (!(m.mcgstatus & MCG_STATUS_RIPV))
flags |= MF_MUST_KILL;
} else if (kill_it) {
force_sig(SIGBUS, current);
}
}
if (worst > 0) if (worst > 0)
mce_report_event(regs); mce_report_event(regs);
@ -1159,25 +1162,24 @@ void do_machine_check(struct pt_regs *regs, long error_code)
out: out:
sync_core(); sync_core();
if (recover_paddr == ~0ull) if (worst != MCE_AR_SEVERITY && !kill_it)
goto done; goto out_ist;
pr_err("Uncorrected hardware memory error in user-access at %llx", /* Fault was in user mode and we need to take some action */
recover_paddr); if ((m.cs & 3) == 3) {
/*
* We must call memory_failure() here even if the current process is
* doomed. We still need to mark the page as poisoned and alert any
* other users of the page.
*/
ist_begin_non_atomic(regs); ist_begin_non_atomic(regs);
local_irq_enable(); local_irq_enable();
if (memory_failure(recover_paddr >> PAGE_SHIFT, MCE_VECTOR, flags) < 0) {
pr_err("Memory error not recovered"); if (kill_it || do_memory_failure(&m))
force_sig(SIGBUS, current); force_sig(SIGBUS, current);
}
local_irq_disable(); local_irq_disable();
ist_end_non_atomic(); ist_end_non_atomic();
done: } else {
if (!fixup_exception(regs, X86_TRAP_MC))
mce_panic("Failed kernel mode recovery", &m, NULL);
}
out_ist:
ist_exit(regs); ist_exit(regs);
} }
EXPORT_SYMBOL_GPL(do_machine_check); EXPORT_SYMBOL_GPL(do_machine_check);