linux_dsm_epyc7002/arch/x86/kernel/dumpstack_64.c
Thomas Gleixner 2a594d4ccf x86/exceptions: Split debug IST stack
The debug IST stack is actually two separate debug stacks to handle #DB
recursion. This is required because the CPU starts always at top of stack
on exception entry, which means on #DB recursion the second #DB would
overwrite the stack of the first.

The low level entry code therefore adjusts the top of stack on entry so a
secondary #DB starts from a different stack page. But the stack pages are
adjacent without a guard page between them.

Split the debug stack into 3 stacks which are separated by guard pages. The
3rd stack is never mapped into the cpu_entry_area and is only there to
catch triple #DB nesting:

      --- top of DB_stack	<- Initial stack
      --- end of DB_stack
      	  guard page

      --- top of DB1_stack	<- Top of stack after entering first #DB
      --- end of DB1_stack
      	  guard page

      --- top of DB2_stack	<- Top of stack after entering second #DB
      --- end of DB2_stack
      	  guard page

If DB2 would not act as the final guard hole, a second #DB would point the
top of #DB stack to the stack below #DB1 which would be valid and not catch
the not so desired triple nesting.

The backing store does not allocate any memory for DB2 and its guard page
as it is not going to be mapped into the cpu_entry_area.

 - Adjust the low level entry code so it adjusts top of #DB with the offset
   between the stacks instead of exception stack size.

 - Make the dumpstack code aware of the new stacks.

 - Adjust the in_debug_stack() implementation and move it into the NMI code
   where it belongs. As this is NMI hotpath code, it just checks the full
   area between top of DB_stack and bottom of DB1_stack without checking
   for the guard page. That's correct because the NMI cannot hit a
   stackpointer pointing to the guard page between DB and DB1 stack.  Even
   if it would, then the NMI operation still is unaffected, but the resume
   of the debug exception on the topmost DB stack will crash by touching
   the guard page.

  [ bp: Make exception_stack_names static const char * const ]

Suggested-by: Andy Lutomirski <luto@kernel.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Borislav Petkov <bp@suse.de>
Reviewed-by: Sean Christopherson <sean.j.christopherson@intel.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Baoquan He <bhe@redhat.com>
Cc: "Chang S. Bae" <chang.seok.bae@intel.com>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Dominik Brodowski <linux@dominikbrodowski.net>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Joerg Roedel <jroedel@suse.de>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Juergen Gross <jgross@suse.com>
Cc: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Cc: linux-doc@vger.kernel.org
Cc: Masahiro Yamada <yamada.masahiro@socionext.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Qian Cai <cai@lca.pw>
Cc: Sean Christopherson <sean.j.christopherson@intel.com>
Cc: x86-ml <x86@kernel.org>
Link: https://lkml.kernel.org/r/20190414160145.439944544@linutronix.de
2019-04-17 15:14:28 +02:00

171 lines
4.0 KiB
C

// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (C) 1991, 1992 Linus Torvalds
* Copyright (C) 2000, 2001, 2002 Andi Kleen, SuSE Labs
*/
#include <linux/sched/debug.h>
#include <linux/kallsyms.h>
#include <linux/kprobes.h>
#include <linux/uaccess.h>
#include <linux/hardirq.h>
#include <linux/kdebug.h>
#include <linux/export.h>
#include <linux/ptrace.h>
#include <linux/kexec.h>
#include <linux/sysfs.h>
#include <linux/bug.h>
#include <linux/nmi.h>
#include <asm/cpu_entry_area.h>
#include <asm/stacktrace.h>
static const char * const exception_stack_names[] = {
[ ESTACK_DF ] = "#DF",
[ ESTACK_NMI ] = "NMI",
[ ESTACK_DB2 ] = "#DB2",
[ ESTACK_DB1 ] = "#DB1",
[ ESTACK_DB ] = "#DB",
[ ESTACK_MCE ] = "#MC",
};
const char *stack_type_name(enum stack_type type)
{
BUILD_BUG_ON(N_EXCEPTION_STACKS != 6);
if (type == STACK_TYPE_IRQ)
return "IRQ";
if (type == STACK_TYPE_ENTRY) {
/*
* On 64-bit, we have a generic entry stack that we
* use for all the kernel entry points, including
* SYSENTER.
*/
return "ENTRY_TRAMPOLINE";
}
if (type >= STACK_TYPE_EXCEPTION && type <= STACK_TYPE_EXCEPTION_LAST)
return exception_stack_names[type - STACK_TYPE_EXCEPTION];
return NULL;
}
struct estack_layout {
unsigned int begin;
unsigned int end;
};
#define ESTACK_ENTRY(x) { \
.begin = offsetof(struct cea_exception_stacks, x## _stack), \
.end = offsetof(struct cea_exception_stacks, x## _stack_guard) \
}
static const struct estack_layout layout[] = {
[ ESTACK_DF ] = ESTACK_ENTRY(DF),
[ ESTACK_NMI ] = ESTACK_ENTRY(NMI),
[ ESTACK_DB2 ] = { .begin = 0, .end = 0},
[ ESTACK_DB1 ] = ESTACK_ENTRY(DB1),
[ ESTACK_DB ] = ESTACK_ENTRY(DB),
[ ESTACK_MCE ] = ESTACK_ENTRY(MCE),
};
static bool in_exception_stack(unsigned long *stack, struct stack_info *info)
{
unsigned long estacks, begin, end, stk = (unsigned long)stack;
struct pt_regs *regs;
unsigned int k;
BUILD_BUG_ON(N_EXCEPTION_STACKS != 6);
estacks = (unsigned long)__this_cpu_read(cea_exception_stacks);
for (k = 0; k < N_EXCEPTION_STACKS; k++) {
begin = estacks + layout[k].begin;
end = estacks + layout[k].end;
regs = (struct pt_regs *)end - 1;
if (stk < begin || stk >= end)
continue;
info->type = STACK_TYPE_EXCEPTION + k;
info->begin = (unsigned long *)begin;
info->end = (unsigned long *)end;
info->next_sp = (unsigned long *)regs->sp;
return true;
}
return false;
}
static bool in_irq_stack(unsigned long *stack, struct stack_info *info)
{
unsigned long *end = (unsigned long *)this_cpu_read(irq_stack_ptr);
unsigned long *begin = end - (IRQ_STACK_SIZE / sizeof(long));
/*
* This is a software stack, so 'end' can be a valid stack pointer.
* It just means the stack is empty.
*/
if (stack < begin || stack >= end)
return false;
info->type = STACK_TYPE_IRQ;
info->begin = begin;
info->end = end;
/*
* The next stack pointer is the first thing pushed by the entry code
* after switching to the irq stack.
*/
info->next_sp = (unsigned long *)*(end - 1);
return true;
}
int get_stack_info(unsigned long *stack, struct task_struct *task,
struct stack_info *info, unsigned long *visit_mask)
{
if (!stack)
goto unknown;
task = task ? : current;
if (in_task_stack(stack, task, info))
goto recursion_check;
if (task != current)
goto unknown;
if (in_exception_stack(stack, info))
goto recursion_check;
if (in_irq_stack(stack, info))
goto recursion_check;
if (in_entry_stack(stack, info))
goto recursion_check;
goto unknown;
recursion_check:
/*
* Make sure we don't iterate through any given stack more than once.
* If it comes up a second time then there's something wrong going on:
* just break out and report an unknown stack type.
*/
if (visit_mask) {
if (*visit_mask & (1UL << info->type)) {
printk_deferred_once(KERN_WARNING "WARNING: stack recursion on stack type %d\n", info->type);
goto unknown;
}
*visit_mask |= 1UL << info->type;
}
return 0;
unknown:
info->type = STACK_TYPE_UNKNOWN;
return -EINVAL;
}