mirror of
https://github.com/AuxXxilium/linux_dsm_epyc7002.git
synced 2025-01-14 23:56:06 +07:00
50dbabe06a
Print more information about MCE error whether it is an hardware or software error. Some of the MCE errors can be easily categorized as hardware or software errors e.g. UEs are due to hardware error, where as error triggered due to invalid usage of tlbie is a pure software bug. But not all the MCE errors can be easily categorize into either software or hardware. There are errors like multihit errors which are usually result of a software bug, but in some rare cases a hardware failure can cause a multihit error. In past, we have seen case where after replacing faulty chip, multihit errors stopped occurring. Same with parity errors, which are usually due to faulty hardware but there are chances where multihit can also cause an parity error. Such errors are difficult to determine what really caused it. Hence this patch classifies MCE errors into following four categorize: 1. Hardware error: UE and Link timeout failure errors. 2. Probable hardware error (some chance of software cause) SLB/ERAT/TLB Parity errors. 3. Software error Invalid tlbie form. 4. Probable software error (some chance of hardware cause) SLB/ERAT/TLB Multihit errors. Sample output: MCE: CPU80: machine check (Warning) Guest SLB Multihit DAR: 000001001b6e0320 [Recovered] MCE: CPU80: PID: 24765 Comm: qemu-system-ppc Guest NIP: [00007fffa309dc60] MCE: CPU80: Probable Software error (some chance of hardware cause) Signed-off-by: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
230 lines
5.7 KiB
C
230 lines
5.7 KiB
C
/*
|
|
* Machine check exception header file.
|
|
*
|
|
* This program is free software; you can redistribute it and/or modify
|
|
* it under the terms of the GNU General Public License as published by
|
|
* the Free Software Foundation; either version 2 of the License, or
|
|
* (at your option) any later version.
|
|
*
|
|
* This program is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
* GNU General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU General Public License
|
|
* along with this program; if not, write to the Free Software
|
|
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
|
*
|
|
* Copyright 2013 IBM Corporation
|
|
* Author: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
|
|
*/
|
|
|
|
#ifndef __ASM_PPC64_MCE_H__
|
|
#define __ASM_PPC64_MCE_H__
|
|
|
|
#include <linux/bitops.h>
|
|
|
|
enum MCE_Version {
|
|
MCE_V1 = 1,
|
|
};
|
|
|
|
enum MCE_Severity {
|
|
MCE_SEV_NO_ERROR = 0,
|
|
MCE_SEV_WARNING = 1,
|
|
MCE_SEV_SEVERE = 2,
|
|
MCE_SEV_FATAL = 3,
|
|
};
|
|
|
|
enum MCE_Disposition {
|
|
MCE_DISPOSITION_RECOVERED = 0,
|
|
MCE_DISPOSITION_NOT_RECOVERED = 1,
|
|
};
|
|
|
|
enum MCE_Initiator {
|
|
MCE_INITIATOR_UNKNOWN = 0,
|
|
MCE_INITIATOR_CPU = 1,
|
|
};
|
|
|
|
enum MCE_ErrorType {
|
|
MCE_ERROR_TYPE_UNKNOWN = 0,
|
|
MCE_ERROR_TYPE_UE = 1,
|
|
MCE_ERROR_TYPE_SLB = 2,
|
|
MCE_ERROR_TYPE_ERAT = 3,
|
|
MCE_ERROR_TYPE_TLB = 4,
|
|
MCE_ERROR_TYPE_USER = 5,
|
|
MCE_ERROR_TYPE_RA = 6,
|
|
MCE_ERROR_TYPE_LINK = 7,
|
|
};
|
|
|
|
enum MCE_ErrorClass {
|
|
MCE_ECLASS_UNKNOWN = 0,
|
|
MCE_ECLASS_HARDWARE,
|
|
MCE_ECLASS_HARD_INDETERMINATE,
|
|
MCE_ECLASS_SOFTWARE,
|
|
MCE_ECLASS_SOFT_INDETERMINATE,
|
|
};
|
|
|
|
enum MCE_UeErrorType {
|
|
MCE_UE_ERROR_INDETERMINATE = 0,
|
|
MCE_UE_ERROR_IFETCH = 1,
|
|
MCE_UE_ERROR_PAGE_TABLE_WALK_IFETCH = 2,
|
|
MCE_UE_ERROR_LOAD_STORE = 3,
|
|
MCE_UE_ERROR_PAGE_TABLE_WALK_LOAD_STORE = 4,
|
|
};
|
|
|
|
enum MCE_SlbErrorType {
|
|
MCE_SLB_ERROR_INDETERMINATE = 0,
|
|
MCE_SLB_ERROR_PARITY = 1,
|
|
MCE_SLB_ERROR_MULTIHIT = 2,
|
|
};
|
|
|
|
enum MCE_EratErrorType {
|
|
MCE_ERAT_ERROR_INDETERMINATE = 0,
|
|
MCE_ERAT_ERROR_PARITY = 1,
|
|
MCE_ERAT_ERROR_MULTIHIT = 2,
|
|
};
|
|
|
|
enum MCE_TlbErrorType {
|
|
MCE_TLB_ERROR_INDETERMINATE = 0,
|
|
MCE_TLB_ERROR_PARITY = 1,
|
|
MCE_TLB_ERROR_MULTIHIT = 2,
|
|
};
|
|
|
|
enum MCE_UserErrorType {
|
|
MCE_USER_ERROR_INDETERMINATE = 0,
|
|
MCE_USER_ERROR_TLBIE = 1,
|
|
};
|
|
|
|
enum MCE_RaErrorType {
|
|
MCE_RA_ERROR_INDETERMINATE = 0,
|
|
MCE_RA_ERROR_IFETCH = 1,
|
|
MCE_RA_ERROR_IFETCH_FOREIGN = 2,
|
|
MCE_RA_ERROR_PAGE_TABLE_WALK_IFETCH = 3,
|
|
MCE_RA_ERROR_PAGE_TABLE_WALK_IFETCH_FOREIGN = 4,
|
|
MCE_RA_ERROR_LOAD = 5,
|
|
MCE_RA_ERROR_STORE = 6,
|
|
MCE_RA_ERROR_PAGE_TABLE_WALK_LOAD_STORE = 7,
|
|
MCE_RA_ERROR_PAGE_TABLE_WALK_LOAD_STORE_FOREIGN = 8,
|
|
MCE_RA_ERROR_LOAD_STORE_FOREIGN = 9,
|
|
};
|
|
|
|
enum MCE_LinkErrorType {
|
|
MCE_LINK_ERROR_INDETERMINATE = 0,
|
|
MCE_LINK_ERROR_IFETCH_TIMEOUT = 1,
|
|
MCE_LINK_ERROR_PAGE_TABLE_WALK_IFETCH_TIMEOUT = 2,
|
|
MCE_LINK_ERROR_LOAD_TIMEOUT = 3,
|
|
MCE_LINK_ERROR_STORE_TIMEOUT = 4,
|
|
MCE_LINK_ERROR_PAGE_TABLE_WALK_LOAD_STORE_TIMEOUT = 5,
|
|
};
|
|
|
|
struct machine_check_event {
|
|
enum MCE_Version version:8;
|
|
u8 in_use;
|
|
enum MCE_Severity severity:8;
|
|
enum MCE_Initiator initiator:8;
|
|
enum MCE_ErrorType error_type:8;
|
|
enum MCE_ErrorClass error_class:8;
|
|
enum MCE_Disposition disposition:8;
|
|
bool sync_error;
|
|
u16 cpu;
|
|
u64 gpr3;
|
|
u64 srr0;
|
|
u64 srr1;
|
|
union {
|
|
struct {
|
|
enum MCE_UeErrorType ue_error_type:8;
|
|
u8 effective_address_provided;
|
|
u8 physical_address_provided;
|
|
u8 reserved_1[5];
|
|
u64 effective_address;
|
|
u64 physical_address;
|
|
u8 reserved_2[8];
|
|
} ue_error;
|
|
|
|
struct {
|
|
enum MCE_SlbErrorType slb_error_type:8;
|
|
u8 effective_address_provided;
|
|
u8 reserved_1[6];
|
|
u64 effective_address;
|
|
u8 reserved_2[16];
|
|
} slb_error;
|
|
|
|
struct {
|
|
enum MCE_EratErrorType erat_error_type:8;
|
|
u8 effective_address_provided;
|
|
u8 reserved_1[6];
|
|
u64 effective_address;
|
|
u8 reserved_2[16];
|
|
} erat_error;
|
|
|
|
struct {
|
|
enum MCE_TlbErrorType tlb_error_type:8;
|
|
u8 effective_address_provided;
|
|
u8 reserved_1[6];
|
|
u64 effective_address;
|
|
u8 reserved_2[16];
|
|
} tlb_error;
|
|
|
|
struct {
|
|
enum MCE_UserErrorType user_error_type:8;
|
|
u8 effective_address_provided;
|
|
u8 reserved_1[6];
|
|
u64 effective_address;
|
|
u8 reserved_2[16];
|
|
} user_error;
|
|
|
|
struct {
|
|
enum MCE_RaErrorType ra_error_type:8;
|
|
u8 effective_address_provided;
|
|
u8 reserved_1[6];
|
|
u64 effective_address;
|
|
u8 reserved_2[16];
|
|
} ra_error;
|
|
|
|
struct {
|
|
enum MCE_LinkErrorType link_error_type:8;
|
|
u8 effective_address_provided;
|
|
u8 reserved_1[6];
|
|
u64 effective_address;
|
|
u8 reserved_2[16];
|
|
} link_error;
|
|
} u;
|
|
};
|
|
|
|
struct mce_error_info {
|
|
enum MCE_ErrorType error_type:8;
|
|
union {
|
|
enum MCE_UeErrorType ue_error_type:8;
|
|
enum MCE_SlbErrorType slb_error_type:8;
|
|
enum MCE_EratErrorType erat_error_type:8;
|
|
enum MCE_TlbErrorType tlb_error_type:8;
|
|
enum MCE_UserErrorType user_error_type:8;
|
|
enum MCE_RaErrorType ra_error_type:8;
|
|
enum MCE_LinkErrorType link_error_type:8;
|
|
} u;
|
|
enum MCE_Severity severity:8;
|
|
enum MCE_Initiator initiator:8;
|
|
enum MCE_ErrorClass error_class:8;
|
|
bool sync_error;
|
|
};
|
|
|
|
#define MAX_MC_EVT 100
|
|
|
|
/* Release flags for get_mce_event() */
|
|
#define MCE_EVENT_RELEASE true
|
|
#define MCE_EVENT_DONTRELEASE false
|
|
|
|
extern void save_mce_event(struct pt_regs *regs, long handled,
|
|
struct mce_error_info *mce_err, uint64_t nip,
|
|
uint64_t addr, uint64_t phys_addr);
|
|
extern int get_mce_event(struct machine_check_event *mce, bool release);
|
|
extern void release_mce_event(void);
|
|
extern void machine_check_queue_event(void);
|
|
extern void machine_check_print_event_info(struct machine_check_event *evt,
|
|
bool user_mode, bool in_guest);
|
|
unsigned long addr_to_pfn(struct pt_regs *regs, unsigned long addr);
|
|
#ifdef CONFIG_PPC_BOOK3S_64
|
|
void flush_and_reload_slb(void);
|
|
#endif /* CONFIG_PPC_BOOK3S_64 */
|
|
#endif /* __ASM_PPC64_MCE_H__ */
|