mirror of
https://github.com/AuxXxilium/linux_dsm_epyc7002.git
synced 2024-12-16 00:06:46 +07:00
powerpc/64s/pseries: machine check convert to use common event code
The common machine_check_event data structures and queues are mostly platform independent, with powernv decoding SRR1/DSISR/etc., into machine_check_event objects. This patch converts pseries to use this infrastructure by decoding fwnmi/rtas data into machine_check_event objects. This allows queueing to be used by a subsequent change to delay the virtual mode handling of machine checks that occur in kernel space where it is unsafe to switch immediately to virtual mode, similarly to powernv. Signed-off-by: Nicholas Piggin <npiggin@gmail.com> [mpe: Fix implicit fallthrough warnings in mce_handle_error()] Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/20190802105709.27696-10-npiggin@gmail.com
This commit is contained in:
parent
7290f3b3d3
commit
9ca766f989
@ -30,6 +30,10 @@ enum MCE_Disposition {
|
|||||||
enum MCE_Initiator {
|
enum MCE_Initiator {
|
||||||
MCE_INITIATOR_UNKNOWN = 0,
|
MCE_INITIATOR_UNKNOWN = 0,
|
||||||
MCE_INITIATOR_CPU = 1,
|
MCE_INITIATOR_CPU = 1,
|
||||||
|
MCE_INITIATOR_PCI = 2,
|
||||||
|
MCE_INITIATOR_ISA = 3,
|
||||||
|
MCE_INITIATOR_MEMORY= 4,
|
||||||
|
MCE_INITIATOR_POWERMGM = 5,
|
||||||
};
|
};
|
||||||
|
|
||||||
enum MCE_ErrorType {
|
enum MCE_ErrorType {
|
||||||
@ -41,6 +45,8 @@ enum MCE_ErrorType {
|
|||||||
MCE_ERROR_TYPE_USER = 5,
|
MCE_ERROR_TYPE_USER = 5,
|
||||||
MCE_ERROR_TYPE_RA = 6,
|
MCE_ERROR_TYPE_RA = 6,
|
||||||
MCE_ERROR_TYPE_LINK = 7,
|
MCE_ERROR_TYPE_LINK = 7,
|
||||||
|
MCE_ERROR_TYPE_DCACHE = 8,
|
||||||
|
MCE_ERROR_TYPE_ICACHE = 9,
|
||||||
};
|
};
|
||||||
|
|
||||||
enum MCE_ErrorClass {
|
enum MCE_ErrorClass {
|
||||||
|
@ -325,7 +325,7 @@ static void machine_check_process_queued_event(struct irq_work *work)
|
|||||||
void machine_check_print_event_info(struct machine_check_event *evt,
|
void machine_check_print_event_info(struct machine_check_event *evt,
|
||||||
bool user_mode, bool in_guest)
|
bool user_mode, bool in_guest)
|
||||||
{
|
{
|
||||||
const char *level, *sevstr, *subtype, *err_type;
|
const char *level, *sevstr, *subtype, *err_type, *initiator;
|
||||||
uint64_t ea = 0, pa = 0;
|
uint64_t ea = 0, pa = 0;
|
||||||
int n = 0;
|
int n = 0;
|
||||||
char dar_str[50];
|
char dar_str[50];
|
||||||
@ -410,6 +410,28 @@ void machine_check_print_event_info(struct machine_check_event *evt,
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
switch(evt->initiator) {
|
||||||
|
case MCE_INITIATOR_CPU:
|
||||||
|
initiator = "CPU";
|
||||||
|
break;
|
||||||
|
case MCE_INITIATOR_PCI:
|
||||||
|
initiator = "PCI";
|
||||||
|
break;
|
||||||
|
case MCE_INITIATOR_ISA:
|
||||||
|
initiator = "ISA";
|
||||||
|
break;
|
||||||
|
case MCE_INITIATOR_MEMORY:
|
||||||
|
initiator = "Memory";
|
||||||
|
break;
|
||||||
|
case MCE_INITIATOR_POWERMGM:
|
||||||
|
initiator = "Power Management";
|
||||||
|
break;
|
||||||
|
case MCE_INITIATOR_UNKNOWN:
|
||||||
|
default:
|
||||||
|
initiator = "Unknown";
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
switch (evt->error_type) {
|
switch (evt->error_type) {
|
||||||
case MCE_ERROR_TYPE_UE:
|
case MCE_ERROR_TYPE_UE:
|
||||||
err_type = "UE";
|
err_type = "UE";
|
||||||
@ -476,6 +498,14 @@ void machine_check_print_event_info(struct machine_check_event *evt,
|
|||||||
if (evt->u.link_error.effective_address_provided)
|
if (evt->u.link_error.effective_address_provided)
|
||||||
ea = evt->u.link_error.effective_address;
|
ea = evt->u.link_error.effective_address;
|
||||||
break;
|
break;
|
||||||
|
case MCE_ERROR_TYPE_DCACHE:
|
||||||
|
err_type = "D-Cache";
|
||||||
|
subtype = "Unknown";
|
||||||
|
break;
|
||||||
|
case MCE_ERROR_TYPE_ICACHE:
|
||||||
|
err_type = "I-Cache";
|
||||||
|
subtype = "Unknown";
|
||||||
|
break;
|
||||||
default:
|
default:
|
||||||
case MCE_ERROR_TYPE_UNKNOWN:
|
case MCE_ERROR_TYPE_UNKNOWN:
|
||||||
err_type = "Unknown";
|
err_type = "Unknown";
|
||||||
@ -508,6 +538,8 @@ void machine_check_print_event_info(struct machine_check_event *evt,
|
|||||||
level, evt->cpu, evt->srr0, (void *)evt->srr0, pa_str);
|
level, evt->cpu, evt->srr0, (void *)evt->srr0, pa_str);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
printk("%sMCE: CPU%d: Initiator %s\n", level, evt->cpu, initiator);
|
||||||
|
|
||||||
subtype = evt->error_class < ARRAY_SIZE(mc_error_class) ?
|
subtype = evt->error_class < ARRAY_SIZE(mc_error_class) ?
|
||||||
mc_error_class[evt->error_class] : "Unknown";
|
mc_error_class[evt->error_class] : "Unknown";
|
||||||
printk("%sMCE: CPU%d: %s\n", level, evt->cpu, subtype);
|
printk("%sMCE: CPU%d: %s\n", level, evt->cpu, subtype);
|
||||||
|
@ -76,6 +76,7 @@ struct pseries_mc_errorlog {
|
|||||||
#define MC_ERROR_TYPE_UE 0x00
|
#define MC_ERROR_TYPE_UE 0x00
|
||||||
#define MC_ERROR_TYPE_SLB 0x01
|
#define MC_ERROR_TYPE_SLB 0x01
|
||||||
#define MC_ERROR_TYPE_ERAT 0x02
|
#define MC_ERROR_TYPE_ERAT 0x02
|
||||||
|
#define MC_ERROR_TYPE_UNKNOWN 0x03
|
||||||
#define MC_ERROR_TYPE_TLB 0x04
|
#define MC_ERROR_TYPE_TLB 0x04
|
||||||
#define MC_ERROR_TYPE_D_CACHE 0x05
|
#define MC_ERROR_TYPE_D_CACHE 0x05
|
||||||
#define MC_ERROR_TYPE_I_CACHE 0x07
|
#define MC_ERROR_TYPE_I_CACHE 0x07
|
||||||
@ -87,6 +88,9 @@ struct pseries_mc_errorlog {
|
|||||||
#define MC_ERROR_UE_LOAD_STORE 3
|
#define MC_ERROR_UE_LOAD_STORE 3
|
||||||
#define MC_ERROR_UE_PAGE_TABLE_WALK_LOAD_STORE 4
|
#define MC_ERROR_UE_PAGE_TABLE_WALK_LOAD_STORE 4
|
||||||
|
|
||||||
|
#define UE_EFFECTIVE_ADDR_PROVIDED 0x40
|
||||||
|
#define UE_LOGICAL_ADDR_PROVIDED 0x20
|
||||||
|
|
||||||
#define MC_ERROR_SLB_PARITY 0
|
#define MC_ERROR_SLB_PARITY 0
|
||||||
#define MC_ERROR_SLB_MULTIHIT 1
|
#define MC_ERROR_SLB_MULTIHIT 1
|
||||||
#define MC_ERROR_SLB_INDETERMINATE 2
|
#define MC_ERROR_SLB_INDETERMINATE 2
|
||||||
@ -113,27 +117,6 @@ static inline u8 rtas_mc_error_sub_type(const struct pseries_mc_errorlog *mlog)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static
|
|
||||||
inline u64 rtas_mc_get_effective_addr(const struct pseries_mc_errorlog *mlog)
|
|
||||||
{
|
|
||||||
__be64 addr = 0;
|
|
||||||
|
|
||||||
switch (mlog->error_type) {
|
|
||||||
case MC_ERROR_TYPE_UE:
|
|
||||||
if (mlog->sub_err_type & 0x40)
|
|
||||||
addr = mlog->effective_address;
|
|
||||||
break;
|
|
||||||
case MC_ERROR_TYPE_SLB:
|
|
||||||
case MC_ERROR_TYPE_ERAT:
|
|
||||||
case MC_ERROR_TYPE_TLB:
|
|
||||||
if (mlog->sub_err_type & 0x80)
|
|
||||||
addr = mlog->effective_address;
|
|
||||||
default:
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
return be64_to_cpu(addr);
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Enable the hotplug interrupt late because processing them may touch other
|
* Enable the hotplug interrupt late because processing them may touch other
|
||||||
* devices or systems (e.g. hugepages) that have not been initialized at the
|
* devices or systems (e.g. hugepages) that have not been initialized at the
|
||||||
@ -511,149 +494,55 @@ int pSeries_system_reset_exception(struct pt_regs *regs)
|
|||||||
return 0; /* need to perform reset */
|
return 0; /* need to perform reset */
|
||||||
}
|
}
|
||||||
|
|
||||||
#define VAL_TO_STRING(ar, val) \
|
|
||||||
(((val) < ARRAY_SIZE(ar)) ? ar[(val)] : "Unknown")
|
|
||||||
|
|
||||||
static void pseries_print_mce_info(struct pt_regs *regs,
|
static int mce_handle_error(struct pt_regs *regs, struct rtas_error_log *errp)
|
||||||
struct rtas_error_log *errp)
|
|
||||||
{
|
{
|
||||||
const char *level, *sevstr;
|
struct mce_error_info mce_err = { 0 };
|
||||||
|
unsigned long eaddr = 0, paddr = 0;
|
||||||
struct pseries_errorlog *pseries_log;
|
struct pseries_errorlog *pseries_log;
|
||||||
struct pseries_mc_errorlog *mce_log;
|
struct pseries_mc_errorlog *mce_log;
|
||||||
|
int disposition = rtas_error_disposition(errp);
|
||||||
|
int initiator = rtas_error_initiator(errp);
|
||||||
|
int severity = rtas_error_severity(errp);
|
||||||
u8 error_type, err_sub_type;
|
u8 error_type, err_sub_type;
|
||||||
u64 addr;
|
|
||||||
u8 initiator = rtas_error_initiator(errp);
|
|
||||||
int disposition = rtas_error_disposition(errp);
|
|
||||||
|
|
||||||
static const char * const initiators[] = {
|
if (initiator == RTAS_INITIATOR_UNKNOWN)
|
||||||
[0] = "Unknown",
|
mce_err.initiator = MCE_INITIATOR_UNKNOWN;
|
||||||
[1] = "CPU",
|
else if (initiator == RTAS_INITIATOR_CPU)
|
||||||
[2] = "PCI",
|
mce_err.initiator = MCE_INITIATOR_CPU;
|
||||||
[3] = "ISA",
|
else if (initiator == RTAS_INITIATOR_PCI)
|
||||||
[4] = "Memory",
|
mce_err.initiator = MCE_INITIATOR_PCI;
|
||||||
[5] = "Power Mgmt",
|
else if (initiator == RTAS_INITIATOR_ISA)
|
||||||
};
|
mce_err.initiator = MCE_INITIATOR_ISA;
|
||||||
static const char * const mc_err_types[] = {
|
else if (initiator == RTAS_INITIATOR_MEMORY)
|
||||||
[0] = "UE",
|
mce_err.initiator = MCE_INITIATOR_MEMORY;
|
||||||
[1] = "SLB",
|
else if (initiator == RTAS_INITIATOR_POWERMGM)
|
||||||
[2] = "ERAT",
|
mce_err.initiator = MCE_INITIATOR_POWERMGM;
|
||||||
[3] = "Unknown",
|
else
|
||||||
[4] = "TLB",
|
mce_err.initiator = MCE_INITIATOR_UNKNOWN;
|
||||||
[5] = "D-Cache",
|
|
||||||
[6] = "Unknown",
|
|
||||||
[7] = "I-Cache",
|
|
||||||
};
|
|
||||||
static const char * const mc_ue_types[] = {
|
|
||||||
[0] = "Indeterminate",
|
|
||||||
[1] = "Instruction fetch",
|
|
||||||
[2] = "Page table walk ifetch",
|
|
||||||
[3] = "Load/Store",
|
|
||||||
[4] = "Page table walk Load/Store",
|
|
||||||
};
|
|
||||||
|
|
||||||
/* SLB sub errors valid values are 0x0, 0x1, 0x2 */
|
if (severity == RTAS_SEVERITY_NO_ERROR)
|
||||||
static const char * const mc_slb_types[] = {
|
mce_err.severity = MCE_SEV_NO_ERROR;
|
||||||
[0] = "Parity",
|
else if (severity == RTAS_SEVERITY_EVENT)
|
||||||
[1] = "Multihit",
|
mce_err.severity = MCE_SEV_WARNING;
|
||||||
[2] = "Indeterminate",
|
else if (severity == RTAS_SEVERITY_WARNING)
|
||||||
};
|
mce_err.severity = MCE_SEV_WARNING;
|
||||||
|
else if (severity == RTAS_SEVERITY_ERROR_SYNC)
|
||||||
|
mce_err.severity = MCE_SEV_SEVERE;
|
||||||
|
else if (severity == RTAS_SEVERITY_ERROR)
|
||||||
|
mce_err.severity = MCE_SEV_SEVERE;
|
||||||
|
else if (severity == RTAS_SEVERITY_FATAL)
|
||||||
|
mce_err.severity = MCE_SEV_FATAL;
|
||||||
|
else
|
||||||
|
mce_err.severity = MCE_SEV_FATAL;
|
||||||
|
|
||||||
/* TLB and ERAT sub errors valid values are 0x1, 0x2, 0x3 */
|
if (severity <= RTAS_SEVERITY_ERROR_SYNC)
|
||||||
static const char * const mc_soft_types[] = {
|
mce_err.sync_error = true;
|
||||||
[0] = "Unknown",
|
else
|
||||||
[1] = "Parity",
|
mce_err.sync_error = false;
|
||||||
[2] = "Multihit",
|
|
||||||
[3] = "Indeterminate",
|
|
||||||
};
|
|
||||||
|
|
||||||
if (!rtas_error_extended(errp)) {
|
mce_err.error_type = MCE_ERROR_TYPE_UNKNOWN;
|
||||||
pr_err("Machine check interrupt: Missing extended error log\n");
|
mce_err.error_class = MCE_ECLASS_UNKNOWN;
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
pseries_log = get_pseries_errorlog(errp, PSERIES_ELOG_SECT_ID_MCE);
|
|
||||||
if (pseries_log == NULL)
|
|
||||||
return;
|
|
||||||
|
|
||||||
mce_log = (struct pseries_mc_errorlog *)pseries_log->data;
|
|
||||||
|
|
||||||
error_type = mce_log->error_type;
|
|
||||||
err_sub_type = rtas_mc_error_sub_type(mce_log);
|
|
||||||
|
|
||||||
switch (rtas_error_severity(errp)) {
|
|
||||||
case RTAS_SEVERITY_NO_ERROR:
|
|
||||||
level = KERN_INFO;
|
|
||||||
sevstr = "Harmless";
|
|
||||||
break;
|
|
||||||
case RTAS_SEVERITY_WARNING:
|
|
||||||
level = KERN_WARNING;
|
|
||||||
sevstr = "";
|
|
||||||
break;
|
|
||||||
case RTAS_SEVERITY_ERROR:
|
|
||||||
case RTAS_SEVERITY_ERROR_SYNC:
|
|
||||||
level = KERN_ERR;
|
|
||||||
sevstr = "Severe";
|
|
||||||
break;
|
|
||||||
case RTAS_SEVERITY_FATAL:
|
|
||||||
default:
|
|
||||||
level = KERN_ERR;
|
|
||||||
sevstr = "Fatal";
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
#ifdef CONFIG_PPC_BOOK3S_64
|
|
||||||
/* Display faulty slb contents for SLB errors. */
|
|
||||||
if (error_type == MC_ERROR_TYPE_SLB)
|
|
||||||
slb_dump_contents(local_paca->mce_faulty_slbs);
|
|
||||||
#endif
|
|
||||||
|
|
||||||
printk("%s%s Machine check interrupt [%s]\n", level, sevstr,
|
|
||||||
disposition == RTAS_DISP_FULLY_RECOVERED ?
|
|
||||||
"Recovered" : "Not recovered");
|
|
||||||
if (user_mode(regs)) {
|
|
||||||
printk("%s NIP: [%016lx] PID: %d Comm: %s\n", level,
|
|
||||||
regs->nip, current->pid, current->comm);
|
|
||||||
} else {
|
|
||||||
printk("%s NIP [%016lx]: %pS\n", level, regs->nip,
|
|
||||||
(void *)regs->nip);
|
|
||||||
}
|
|
||||||
printk("%s Initiator: %s\n", level,
|
|
||||||
VAL_TO_STRING(initiators, initiator));
|
|
||||||
|
|
||||||
switch (error_type) {
|
|
||||||
case MC_ERROR_TYPE_UE:
|
|
||||||
printk("%s Error type: %s [%s]\n", level,
|
|
||||||
VAL_TO_STRING(mc_err_types, error_type),
|
|
||||||
VAL_TO_STRING(mc_ue_types, err_sub_type));
|
|
||||||
break;
|
|
||||||
case MC_ERROR_TYPE_SLB:
|
|
||||||
printk("%s Error type: %s [%s]\n", level,
|
|
||||||
VAL_TO_STRING(mc_err_types, error_type),
|
|
||||||
VAL_TO_STRING(mc_slb_types, err_sub_type));
|
|
||||||
break;
|
|
||||||
case MC_ERROR_TYPE_ERAT:
|
|
||||||
case MC_ERROR_TYPE_TLB:
|
|
||||||
printk("%s Error type: %s [%s]\n", level,
|
|
||||||
VAL_TO_STRING(mc_err_types, error_type),
|
|
||||||
VAL_TO_STRING(mc_soft_types, err_sub_type));
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
printk("%s Error type: %s\n", level,
|
|
||||||
VAL_TO_STRING(mc_err_types, error_type));
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
addr = rtas_mc_get_effective_addr(mce_log);
|
|
||||||
if (addr)
|
|
||||||
printk("%s Effective address: %016llx\n", level, addr);
|
|
||||||
}
|
|
||||||
|
|
||||||
static int mce_handle_error(struct rtas_error_log *errp)
|
|
||||||
{
|
|
||||||
struct pseries_errorlog *pseries_log;
|
|
||||||
struct pseries_mc_errorlog *mce_log;
|
|
||||||
int disposition = rtas_error_disposition(errp);
|
|
||||||
u8 error_type;
|
|
||||||
|
|
||||||
if (!rtas_error_extended(errp))
|
if (!rtas_error_extended(errp))
|
||||||
goto out;
|
goto out;
|
||||||
@ -664,6 +553,105 @@ static int mce_handle_error(struct rtas_error_log *errp)
|
|||||||
|
|
||||||
mce_log = (struct pseries_mc_errorlog *)pseries_log->data;
|
mce_log = (struct pseries_mc_errorlog *)pseries_log->data;
|
||||||
error_type = mce_log->error_type;
|
error_type = mce_log->error_type;
|
||||||
|
err_sub_type = rtas_mc_error_sub_type(mce_log);
|
||||||
|
|
||||||
|
switch (mce_log->error_type) {
|
||||||
|
case MC_ERROR_TYPE_UE:
|
||||||
|
mce_err.error_type = MCE_ERROR_TYPE_UE;
|
||||||
|
switch (err_sub_type) {
|
||||||
|
case MC_ERROR_UE_IFETCH:
|
||||||
|
mce_err.u.ue_error_type = MCE_UE_ERROR_IFETCH;
|
||||||
|
break;
|
||||||
|
case MC_ERROR_UE_PAGE_TABLE_WALK_IFETCH:
|
||||||
|
mce_err.u.ue_error_type = MCE_UE_ERROR_PAGE_TABLE_WALK_IFETCH;
|
||||||
|
break;
|
||||||
|
case MC_ERROR_UE_LOAD_STORE:
|
||||||
|
mce_err.u.ue_error_type = MCE_UE_ERROR_LOAD_STORE;
|
||||||
|
break;
|
||||||
|
case MC_ERROR_UE_PAGE_TABLE_WALK_LOAD_STORE:
|
||||||
|
mce_err.u.ue_error_type = MCE_UE_ERROR_PAGE_TABLE_WALK_LOAD_STORE;
|
||||||
|
break;
|
||||||
|
case MC_ERROR_UE_INDETERMINATE:
|
||||||
|
default:
|
||||||
|
mce_err.u.ue_error_type = MCE_UE_ERROR_INDETERMINATE;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
if (mce_log->sub_err_type & UE_EFFECTIVE_ADDR_PROVIDED)
|
||||||
|
eaddr = be64_to_cpu(mce_log->effective_address);
|
||||||
|
|
||||||
|
if (mce_log->sub_err_type & UE_LOGICAL_ADDR_PROVIDED) {
|
||||||
|
paddr = be64_to_cpu(mce_log->logical_address);
|
||||||
|
} else if (mce_log->sub_err_type & UE_EFFECTIVE_ADDR_PROVIDED) {
|
||||||
|
unsigned long pfn;
|
||||||
|
|
||||||
|
pfn = addr_to_pfn(regs, eaddr);
|
||||||
|
if (pfn != ULONG_MAX)
|
||||||
|
paddr = pfn << PAGE_SHIFT;
|
||||||
|
}
|
||||||
|
|
||||||
|
break;
|
||||||
|
case MC_ERROR_TYPE_SLB:
|
||||||
|
mce_err.error_type = MCE_ERROR_TYPE_SLB;
|
||||||
|
switch (err_sub_type) {
|
||||||
|
case MC_ERROR_SLB_PARITY:
|
||||||
|
mce_err.u.slb_error_type = MCE_SLB_ERROR_PARITY;
|
||||||
|
break;
|
||||||
|
case MC_ERROR_SLB_MULTIHIT:
|
||||||
|
mce_err.u.slb_error_type = MCE_SLB_ERROR_MULTIHIT;
|
||||||
|
break;
|
||||||
|
case MC_ERROR_SLB_INDETERMINATE:
|
||||||
|
default:
|
||||||
|
mce_err.u.slb_error_type = MCE_SLB_ERROR_INDETERMINATE;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
if (mce_log->sub_err_type & 0x80)
|
||||||
|
eaddr = be64_to_cpu(mce_log->effective_address);
|
||||||
|
break;
|
||||||
|
case MC_ERROR_TYPE_ERAT:
|
||||||
|
mce_err.error_type = MCE_ERROR_TYPE_ERAT;
|
||||||
|
switch (err_sub_type) {
|
||||||
|
case MC_ERROR_ERAT_PARITY:
|
||||||
|
mce_err.u.erat_error_type = MCE_ERAT_ERROR_PARITY;
|
||||||
|
break;
|
||||||
|
case MC_ERROR_ERAT_MULTIHIT:
|
||||||
|
mce_err.u.erat_error_type = MCE_ERAT_ERROR_MULTIHIT;
|
||||||
|
break;
|
||||||
|
case MC_ERROR_ERAT_INDETERMINATE:
|
||||||
|
default:
|
||||||
|
mce_err.u.erat_error_type = MCE_ERAT_ERROR_INDETERMINATE;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
if (mce_log->sub_err_type & 0x80)
|
||||||
|
eaddr = be64_to_cpu(mce_log->effective_address);
|
||||||
|
break;
|
||||||
|
case MC_ERROR_TYPE_TLB:
|
||||||
|
mce_err.error_type = MCE_ERROR_TYPE_TLB;
|
||||||
|
switch (err_sub_type) {
|
||||||
|
case MC_ERROR_TLB_PARITY:
|
||||||
|
mce_err.u.tlb_error_type = MCE_TLB_ERROR_PARITY;
|
||||||
|
break;
|
||||||
|
case MC_ERROR_TLB_MULTIHIT:
|
||||||
|
mce_err.u.tlb_error_type = MCE_TLB_ERROR_MULTIHIT;
|
||||||
|
break;
|
||||||
|
case MC_ERROR_TLB_INDETERMINATE:
|
||||||
|
default:
|
||||||
|
mce_err.u.tlb_error_type = MCE_TLB_ERROR_INDETERMINATE;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
if (mce_log->sub_err_type & 0x80)
|
||||||
|
eaddr = be64_to_cpu(mce_log->effective_address);
|
||||||
|
break;
|
||||||
|
case MC_ERROR_TYPE_D_CACHE:
|
||||||
|
mce_err.error_type = MCE_ERROR_TYPE_DCACHE;
|
||||||
|
break;
|
||||||
|
case MC_ERROR_TYPE_I_CACHE:
|
||||||
|
mce_err.error_type = MCE_ERROR_TYPE_DCACHE;
|
||||||
|
break;
|
||||||
|
case MC_ERROR_TYPE_UNKNOWN:
|
||||||
|
default:
|
||||||
|
mce_err.error_type = MCE_ERROR_TYPE_UNKNOWN;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
#ifdef CONFIG_PPC_BOOK3S_64
|
#ifdef CONFIG_PPC_BOOK3S_64
|
||||||
if (disposition == RTAS_DISP_NOT_RECOVERED) {
|
if (disposition == RTAS_DISP_NOT_RECOVERED) {
|
||||||
@ -682,99 +670,25 @@ static int mce_handle_error(struct rtas_error_log *errp)
|
|||||||
slb_save_contents(local_paca->mce_faulty_slbs);
|
slb_save_contents(local_paca->mce_faulty_slbs);
|
||||||
flush_and_reload_slb();
|
flush_and_reload_slb();
|
||||||
disposition = RTAS_DISP_FULLY_RECOVERED;
|
disposition = RTAS_DISP_FULLY_RECOVERED;
|
||||||
rtas_set_disposition_recovered(errp);
|
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
} else if (disposition == RTAS_DISP_LIMITED_RECOVERY) {
|
||||||
|
/* Platform corrected itself but could be degraded */
|
||||||
|
printk(KERN_ERR "MCE: limited recovery, system may "
|
||||||
|
"be degraded\n");
|
||||||
|
disposition = RTAS_DISP_FULLY_RECOVERED;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
out:
|
out:
|
||||||
|
save_mce_event(regs, disposition == RTAS_DISP_FULLY_RECOVERED,
|
||||||
|
&mce_err, regs->nip, eaddr, paddr);
|
||||||
|
|
||||||
return disposition;
|
return disposition;
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef CONFIG_MEMORY_FAILURE
|
|
||||||
|
|
||||||
static DEFINE_PER_CPU(int, rtas_ue_count);
|
|
||||||
static DEFINE_PER_CPU(unsigned long, rtas_ue_paddr[MAX_MC_EVT]);
|
|
||||||
|
|
||||||
#define UE_EFFECTIVE_ADDR_PROVIDED 0x40
|
|
||||||
#define UE_LOGICAL_ADDR_PROVIDED 0x20
|
|
||||||
|
|
||||||
|
|
||||||
static void pseries_hwpoison_work_fn(struct work_struct *work)
|
|
||||||
{
|
|
||||||
unsigned long paddr;
|
|
||||||
int index;
|
|
||||||
|
|
||||||
while (__this_cpu_read(rtas_ue_count) > 0) {
|
|
||||||
index = __this_cpu_read(rtas_ue_count) - 1;
|
|
||||||
paddr = __this_cpu_read(rtas_ue_paddr[index]);
|
|
||||||
memory_failure(paddr >> PAGE_SHIFT, 0);
|
|
||||||
__this_cpu_dec(rtas_ue_count);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static DECLARE_WORK(hwpoison_work, pseries_hwpoison_work_fn);
|
|
||||||
|
|
||||||
static void queue_ue_paddr(unsigned long paddr)
|
|
||||||
{
|
|
||||||
int index;
|
|
||||||
|
|
||||||
index = __this_cpu_inc_return(rtas_ue_count) - 1;
|
|
||||||
if (index >= MAX_MC_EVT) {
|
|
||||||
__this_cpu_dec(rtas_ue_count);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
this_cpu_write(rtas_ue_paddr[index], paddr);
|
|
||||||
schedule_work(&hwpoison_work);
|
|
||||||
}
|
|
||||||
|
|
||||||
static void pseries_do_memory_failure(struct pt_regs *regs,
|
|
||||||
struct pseries_mc_errorlog *mce_log)
|
|
||||||
{
|
|
||||||
unsigned long paddr;
|
|
||||||
|
|
||||||
if (mce_log->sub_err_type & UE_LOGICAL_ADDR_PROVIDED) {
|
|
||||||
paddr = be64_to_cpu(mce_log->logical_address);
|
|
||||||
} else if (mce_log->sub_err_type & UE_EFFECTIVE_ADDR_PROVIDED) {
|
|
||||||
unsigned long pfn;
|
|
||||||
|
|
||||||
pfn = addr_to_pfn(regs,
|
|
||||||
be64_to_cpu(mce_log->effective_address));
|
|
||||||
if (pfn == ULONG_MAX)
|
|
||||||
return;
|
|
||||||
paddr = pfn << PAGE_SHIFT;
|
|
||||||
} else {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
queue_ue_paddr(paddr);
|
|
||||||
}
|
|
||||||
|
|
||||||
static void pseries_process_ue(struct pt_regs *regs,
|
|
||||||
struct rtas_error_log *errp)
|
|
||||||
{
|
|
||||||
struct pseries_errorlog *pseries_log;
|
|
||||||
struct pseries_mc_errorlog *mce_log;
|
|
||||||
|
|
||||||
if (!rtas_error_extended(errp))
|
|
||||||
return;
|
|
||||||
|
|
||||||
pseries_log = get_pseries_errorlog(errp, PSERIES_ELOG_SECT_ID_MCE);
|
|
||||||
if (!pseries_log)
|
|
||||||
return;
|
|
||||||
|
|
||||||
mce_log = (struct pseries_mc_errorlog *)pseries_log->data;
|
|
||||||
|
|
||||||
if (mce_log->error_type == MC_ERROR_TYPE_UE)
|
|
||||||
pseries_do_memory_failure(regs, mce_log);
|
|
||||||
}
|
|
||||||
#else
|
|
||||||
static inline void pseries_process_ue(struct pt_regs *regs,
|
|
||||||
struct rtas_error_log *errp) { }
|
|
||||||
#endif /*CONFIG_MEMORY_FAILURE */
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Process MCE rtas errlog event.
|
* Process MCE rtas errlog event.
|
||||||
*/
|
*/
|
||||||
@ -795,48 +709,50 @@ static void mce_process_errlog_event(struct irq_work *work)
|
|||||||
* Return 1 if corrected (or delivered a signal).
|
* Return 1 if corrected (or delivered a signal).
|
||||||
* Return 0 if there is nothing we can do.
|
* Return 0 if there is nothing we can do.
|
||||||
*/
|
*/
|
||||||
static int recover_mce(struct pt_regs *regs, struct rtas_error_log *err)
|
static int recover_mce(struct pt_regs *regs, struct machine_check_event *evt)
|
||||||
{
|
{
|
||||||
int recovered = 0;
|
int recovered = 0;
|
||||||
int disposition = rtas_error_disposition(err);
|
|
||||||
|
|
||||||
pseries_print_mce_info(regs, err);
|
|
||||||
|
|
||||||
if (!(regs->msr & MSR_RI)) {
|
if (!(regs->msr & MSR_RI)) {
|
||||||
/* If MSR_RI isn't set, we cannot recover */
|
/* If MSR_RI isn't set, we cannot recover */
|
||||||
pr_err("Machine check interrupt unrecoverable: MSR(RI=0)\n");
|
pr_err("Machine check interrupt unrecoverable: MSR(RI=0)\n");
|
||||||
recovered = 0;
|
recovered = 0;
|
||||||
|
} else if (evt->disposition == MCE_DISPOSITION_RECOVERED) {
|
||||||
} else if (disposition == RTAS_DISP_FULLY_RECOVERED) {
|
|
||||||
/* Platform corrected itself */
|
/* Platform corrected itself */
|
||||||
recovered = 1;
|
recovered = 1;
|
||||||
|
} else if (evt->severity == MCE_SEV_FATAL) {
|
||||||
} else if (disposition == RTAS_DISP_LIMITED_RECOVERY) {
|
/* Fatal machine check */
|
||||||
/* Platform corrected itself but could be degraded */
|
pr_err("Machine check interrupt is fatal\n");
|
||||||
printk(KERN_ERR "MCE: limited recovery, system may "
|
recovered = 0;
|
||||||
"be degraded\n");
|
|
||||||
recovered = 1;
|
|
||||||
|
|
||||||
} else if (user_mode(regs) && !is_global_init(current) &&
|
|
||||||
rtas_error_severity(err) == RTAS_SEVERITY_ERROR_SYNC) {
|
|
||||||
|
|
||||||
/*
|
|
||||||
* If we received a synchronous error when in userspace
|
|
||||||
* kill the task. Firmware may report details of the fail
|
|
||||||
* asynchronously, so we can't rely on the target and type
|
|
||||||
* fields being valid here.
|
|
||||||
*/
|
|
||||||
printk(KERN_ERR "MCE: uncorrectable error, killing task "
|
|
||||||
"%s:%d\n", current->comm, current->pid);
|
|
||||||
|
|
||||||
_exception(SIGBUS, regs, BUS_MCEERR_AR, regs->nip);
|
|
||||||
recovered = 1;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
pseries_process_ue(regs, err);
|
if (!recovered && evt->sync_error) {
|
||||||
|
/*
|
||||||
/* Queue irq work to log this rtas event later. */
|
* Try to kill processes if we get a synchronous machine check
|
||||||
irq_work_queue(&mce_errlog_process_work);
|
* (e.g., one caused by execution of this instruction). This
|
||||||
|
* will devolve into a panic if we try to kill init or are in
|
||||||
|
* an interrupt etc.
|
||||||
|
*
|
||||||
|
* TODO: Queue up this address for hwpoisioning later.
|
||||||
|
* TODO: This is not quite right for d-side machine
|
||||||
|
* checks ->nip is not necessarily the important
|
||||||
|
* address.
|
||||||
|
*/
|
||||||
|
if ((user_mode(regs))) {
|
||||||
|
_exception(SIGBUS, regs, BUS_MCEERR_AR, regs->nip);
|
||||||
|
recovered = 1;
|
||||||
|
} else if (die_will_crash()) {
|
||||||
|
/*
|
||||||
|
* die() would kill the kernel, so better to go via
|
||||||
|
* the platform reboot code that will log the
|
||||||
|
* machine check.
|
||||||
|
*/
|
||||||
|
recovered = 0;
|
||||||
|
} else {
|
||||||
|
die("Machine check", regs, SIGBUS);
|
||||||
|
recovered = 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
return recovered;
|
return recovered;
|
||||||
}
|
}
|
||||||
@ -853,14 +769,21 @@ static int recover_mce(struct pt_regs *regs, struct rtas_error_log *err)
|
|||||||
*/
|
*/
|
||||||
int pSeries_machine_check_exception(struct pt_regs *regs)
|
int pSeries_machine_check_exception(struct pt_regs *regs)
|
||||||
{
|
{
|
||||||
struct rtas_error_log *errp;
|
struct machine_check_event evt;
|
||||||
|
|
||||||
if (fwnmi_active) {
|
if (!get_mce_event(&evt, MCE_EVENT_RELEASE))
|
||||||
fwnmi_release_errinfo();
|
return 0;
|
||||||
errp = fwnmi_get_errlog();
|
|
||||||
if (errp && recover_mce(regs, errp))
|
/* Print things out */
|
||||||
return 1;
|
if (evt.version != MCE_V1) {
|
||||||
|
pr_err("Machine Check Exception, Unknown event version %d !\n",
|
||||||
|
evt.version);
|
||||||
|
return 0;
|
||||||
}
|
}
|
||||||
|
machine_check_print_event_info(&evt, user_mode(regs), false);
|
||||||
|
|
||||||
|
if (recover_mce(regs, &evt))
|
||||||
|
return 1;
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
@ -877,7 +800,12 @@ long pseries_machine_check_realmode(struct pt_regs *regs)
|
|||||||
* to panic. Hence we will call it as soon as we go into
|
* to panic. Hence we will call it as soon as we go into
|
||||||
* virtual mode.
|
* virtual mode.
|
||||||
*/
|
*/
|
||||||
disposition = mce_handle_error(errp);
|
disposition = mce_handle_error(regs, errp);
|
||||||
|
fwnmi_release_errinfo();
|
||||||
|
|
||||||
|
/* Queue irq work to log this rtas event later. */
|
||||||
|
irq_work_queue(&mce_errlog_process_work);
|
||||||
|
|
||||||
if (disposition == RTAS_DISP_FULLY_RECOVERED)
|
if (disposition == RTAS_DISP_FULLY_RECOVERED)
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user