mirror of
https://github.com/AuxXxilium/linux_dsm_epyc7002.git
synced 2025-01-13 01:36:48 +07:00
6fcd6baa90
There are quite a few machine check exceptions that can be caused by kernel bugs. To make debugging easier, use the kernel crash path in cases of synchronous machine checks that occur in kernel mode, if that would not result in the machine going straight to panic or crash dump. There is a downside here that die()ing the process in kernel mode can still leave the system unstable. panic_on_oops will always force the system to fail-stop, so systems where that behaviour is important will still do the right thing. As a test, when triggering an i-side 0111b error (ifetch from foreign address) in kernel mode process context on POWER9, the kernel currently dies quickly like this: Severe Machine check interrupt [Not recovered] NIP [ffff000000000000]: 0xffff000000000000 Initiator: CPU Error type: Real address [Instruction fetch (foreign)] [ 127.426651616,0] OPAL: Reboot requested due to Platform error. Effective[ 127.426693712,3] OPAL: Reboot requested due to Platform error. address: ffff000000000000 opal: Reboot type 1 not supported Kernel panic - not syncing: PowerNV Unrecovered Machine Check CPU: 56 PID: 4425 Comm: syscall Tainted: G M 4.12.0-rc1-13857-ga4700a261072-dirty #35 Call Trace: [ 128.017988928,4] IPMI: BUG: Dropping ESEL on the floor due to buggy/mising code in OPAL for this BMC Rebooting in 10 seconds.. Trying to free IRQ 496 from IRQ context! After this patch, the process is killed and the kernel continues with this message, which gives enough information to identify the offending branch (i.e., with CFAR): Severe Machine check interrupt [Not recovered] NIP [ffff000000000000]: 0xffff000000000000 Initiator: CPU Error type: Real address [Instruction fetch (foreign)] Effective address: ffff000000000000 Oops: Machine check, sig: 7 [#1] SMP NR_CPUS=2048 NUMA PowerNV Modules linked in: iptable_mangle ipt_MASQUERADE nf_nat_masquerade_ipv4 ... CPU: 22 PID: 4436 Comm: syscall Tainted: G M 4.12.0-rc1-13857-ga4700a261072-dirty #36 task: c000000932300000 task.stack: c000000932380000 NIP: ffff000000000000 LR: 00000000217706a4 CTR: ffff000000000000 REGS: c00000000fc8fd80 TRAP: 0200 Tainted: G M (4.12.0-rc1-13857-ga4700a261072-dirty) MSR: 90000000001c1003 <SF,HV,ME,RI,LE> CR: 24000484 XER: 20000000 CFAR: c000000000004c80 DAR: 0000000021770a90 DSISR: 0a000000 SOFTE: 1 GPR00: 0000000000001ebe 00007fffce4818b0 0000000021797f00 0000000000000000 GPR04: 00007fff8007ac24 0000000044000484 0000000000004000 00007fff801405e8 GPR08: 900000000280f033 0000000024000484 0000000000000000 0000000000000030 GPR12: 9000000000001003 00007fff801bc370 0000000000000000 0000000000000000 GPR16: 0000000000000000 0000000000000000 0000000000000000 0000000000000000 GPR20: 0000000000000000 0000000000000000 0000000000000000 0000000000000000 GPR24: 0000000000000000 0000000000000000 0000000000000000 0000000000000000 GPR28: 00007fff801b0000 0000000000000000 00000000217707a0 00007fffce481918 NIP [ffff000000000000] 0xffff000000000000 LR [00000000217706a4] 0x217706a4 Call Trace: Instruction dump: XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX Signed-off-by: Nicholas Piggin <npiggin@gmail.com> Reviewed-by: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
142 lines
3.5 KiB
C
142 lines
3.5 KiB
C
#ifndef _ASM_POWERPC_BUG_H
|
|
#define _ASM_POWERPC_BUG_H
|
|
#ifdef __KERNEL__
|
|
|
|
#include <asm/asm-compat.h>
|
|
|
|
/*
|
|
* Define an illegal instr to trap on the bug.
|
|
* We don't use 0 because that marks the end of a function
|
|
* in the ELF ABI. That's "Boo Boo" in case you wonder...
|
|
*/
|
|
#define BUG_OPCODE .long 0x00b00b00 /* For asm */
|
|
#define BUG_ILLEGAL_INSTR "0x00b00b00" /* For BUG macro */
|
|
|
|
#ifdef CONFIG_BUG
|
|
|
|
#ifdef __ASSEMBLY__
|
|
#include <asm/asm-offsets.h>
|
|
#ifdef CONFIG_DEBUG_BUGVERBOSE
|
|
.macro EMIT_BUG_ENTRY addr,file,line,flags
|
|
.section __bug_table,"aw"
|
|
5001: PPC_LONG \addr, 5002f
|
|
.short \line, \flags
|
|
.org 5001b+BUG_ENTRY_SIZE
|
|
.previous
|
|
.section .rodata,"a"
|
|
5002: .asciz "\file"
|
|
.previous
|
|
.endm
|
|
#else
|
|
.macro EMIT_BUG_ENTRY addr,file,line,flags
|
|
.section __bug_table,"aw"
|
|
5001: PPC_LONG \addr
|
|
.short \flags
|
|
.org 5001b+BUG_ENTRY_SIZE
|
|
.previous
|
|
.endm
|
|
#endif /* verbose */
|
|
|
|
#else /* !__ASSEMBLY__ */
|
|
/* _EMIT_BUG_ENTRY expects args %0,%1,%2,%3 to be FILE, LINE, flags and
|
|
sizeof(struct bug_entry), respectively */
|
|
#ifdef CONFIG_DEBUG_BUGVERBOSE
|
|
#define _EMIT_BUG_ENTRY \
|
|
".section __bug_table,\"aw\"\n" \
|
|
"2:\t" PPC_LONG "1b, %0\n" \
|
|
"\t.short %1, %2\n" \
|
|
".org 2b+%3\n" \
|
|
".previous\n"
|
|
#else
|
|
#define _EMIT_BUG_ENTRY \
|
|
".section __bug_table,\"aw\"\n" \
|
|
"2:\t" PPC_LONG "1b\n" \
|
|
"\t.short %2\n" \
|
|
".org 2b+%3\n" \
|
|
".previous\n"
|
|
#endif
|
|
|
|
/*
|
|
* BUG_ON() and WARN_ON() do their best to cooperate with compile-time
|
|
* optimisations. However depending on the complexity of the condition
|
|
* some compiler versions may not produce optimal results.
|
|
*/
|
|
|
|
#define BUG() do { \
|
|
__asm__ __volatile__( \
|
|
"1: twi 31,0,0\n" \
|
|
_EMIT_BUG_ENTRY \
|
|
: : "i" (__FILE__), "i" (__LINE__), \
|
|
"i" (0), "i" (sizeof(struct bug_entry))); \
|
|
unreachable(); \
|
|
} while (0)
|
|
|
|
#define BUG_ON(x) do { \
|
|
if (__builtin_constant_p(x)) { \
|
|
if (x) \
|
|
BUG(); \
|
|
} else { \
|
|
__asm__ __volatile__( \
|
|
"1: "PPC_TLNEI" %4,0\n" \
|
|
_EMIT_BUG_ENTRY \
|
|
: : "i" (__FILE__), "i" (__LINE__), "i" (0), \
|
|
"i" (sizeof(struct bug_entry)), \
|
|
"r" ((__force long)(x))); \
|
|
} \
|
|
} while (0)
|
|
|
|
#define __WARN_FLAGS(flags) do { \
|
|
__asm__ __volatile__( \
|
|
"1: twi 31,0,0\n" \
|
|
_EMIT_BUG_ENTRY \
|
|
: : "i" (__FILE__), "i" (__LINE__), \
|
|
"i" (BUGFLAG_WARNING|(flags)), \
|
|
"i" (sizeof(struct bug_entry))); \
|
|
} while (0)
|
|
|
|
#define WARN_ON(x) ({ \
|
|
int __ret_warn_on = !!(x); \
|
|
if (__builtin_constant_p(__ret_warn_on)) { \
|
|
if (__ret_warn_on) \
|
|
__WARN(); \
|
|
} else { \
|
|
__asm__ __volatile__( \
|
|
"1: "PPC_TLNEI" %4,0\n" \
|
|
_EMIT_BUG_ENTRY \
|
|
: : "i" (__FILE__), "i" (__LINE__), \
|
|
"i" (BUGFLAG_WARNING|BUGFLAG_TAINT(TAINT_WARN)),\
|
|
"i" (sizeof(struct bug_entry)), \
|
|
"r" (__ret_warn_on)); \
|
|
} \
|
|
unlikely(__ret_warn_on); \
|
|
})
|
|
|
|
#define HAVE_ARCH_BUG
|
|
#define HAVE_ARCH_BUG_ON
|
|
#define HAVE_ARCH_WARN_ON
|
|
#endif /* __ASSEMBLY __ */
|
|
#else
|
|
#ifdef __ASSEMBLY__
|
|
.macro EMIT_BUG_ENTRY addr,file,line,flags
|
|
.endm
|
|
#else /* !__ASSEMBLY__ */
|
|
#define _EMIT_BUG_ENTRY
|
|
#endif
|
|
#endif /* CONFIG_BUG */
|
|
|
|
#include <asm-generic/bug.h>
|
|
|
|
#ifndef __ASSEMBLY__
|
|
|
|
struct pt_regs;
|
|
extern int do_page_fault(struct pt_regs *, unsigned long, unsigned long);
|
|
extern void bad_page_fault(struct pt_regs *, unsigned long, int);
|
|
extern void _exception(int, struct pt_regs *, int, unsigned long);
|
|
extern void die(const char *, struct pt_regs *, long);
|
|
extern bool die_will_crash(void);
|
|
|
|
#endif /* !__ASSEMBLY__ */
|
|
|
|
#endif /* __KERNEL__ */
|
|
#endif /* _ASM_POWERPC_BUG_H */
|