linux_dsm_epyc7002/arch/powerpc/kernel/stacktrace.c

272 lines
6.5 KiB
C
Raw Normal View History

// SPDX-License-Identifier: GPL-2.0
/*
* Stack trace utility functions etc.
*
* Copyright 2008 Christoph Hellwig, IBM Corp.
2018-05-04 19:38:34 +07:00
* Copyright 2018 SUSE Linux GmbH
* Copyright 2018 Nick Piggin, Michael Ellerman, IBM Corp.
*/
#include <linux/export.h>
2018-05-04 19:38:34 +07:00
#include <linux/kallsyms.h>
#include <linux/module.h>
powerpc/64s: Wire up arch_trigger_cpumask_backtrace() This allows eg. the RCU stall detector, or the soft/hardlockup detectors to trigger a backtrace on all CPUs. We implement this by sending a "safe" NMI, which will actually only send an IPI. Unfortunately the generic code prints "NMI", so that's a little confusing but we can probably live with it. If one of the CPUs doesn't respond to the IPI, we then print some info from it's paca and do a backtrace based on its saved_r1. Example output: INFO: rcu_sched detected stalls on CPUs/tasks: 2-...0: (0 ticks this GP) idle=1be/1/4611686018427387904 softirq=1055/1055 fqs=25735 (detected by 4, t=58847 jiffies, g=58, c=57, q=1258) Sending NMI from CPU 4 to CPUs 2: CPU 2 didn't respond to backtrace IPI, inspecting paca. irq_soft_mask: 0x01 in_mce: 0 in_nmi: 0 current: 3623 (bash) Back trace of paca->saved_r1 (0xc0000000e1c83ba0) (possibly stale): Call Trace: [c0000000e1c83ba0] [0000000000000014] 0x14 (unreliable) [c0000000e1c83bc0] [c000000000765798] lkdtm_do_action+0x48/0x80 [c0000000e1c83bf0] [c000000000765a40] direct_entry+0x110/0x1b0 [c0000000e1c83c90] [c00000000058e650] full_proxy_write+0x90/0xe0 [c0000000e1c83ce0] [c0000000003aae3c] __vfs_write+0x6c/0x1f0 [c0000000e1c83d80] [c0000000003ab214] vfs_write+0xd4/0x240 [c0000000e1c83dd0] [c0000000003ab5cc] ksys_write+0x6c/0x110 [c0000000e1c83e30] [c00000000000b860] system_call+0x58/0x6c Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Reviewed-by: Nicholas Piggin <npiggin@gmail.com>
2018-05-02 20:07:28 +07:00
#include <linux/nmi.h>
#include <linux/sched.h>
#include <linux/sched/debug.h>
2018-05-04 19:38:34 +07:00
#include <linux/sched/task_stack.h>
#include <linux/stacktrace.h>
#include <asm/ptrace.h>
#include <asm/processor.h>
2018-05-04 19:38:34 +07:00
#include <linux/ftrace.h>
#include <asm/kprobes.h>
powerpc/64s: Wire up arch_trigger_cpumask_backtrace() This allows eg. the RCU stall detector, or the soft/hardlockup detectors to trigger a backtrace on all CPUs. We implement this by sending a "safe" NMI, which will actually only send an IPI. Unfortunately the generic code prints "NMI", so that's a little confusing but we can probably live with it. If one of the CPUs doesn't respond to the IPI, we then print some info from it's paca and do a backtrace based on its saved_r1. Example output: INFO: rcu_sched detected stalls on CPUs/tasks: 2-...0: (0 ticks this GP) idle=1be/1/4611686018427387904 softirq=1055/1055 fqs=25735 (detected by 4, t=58847 jiffies, g=58, c=57, q=1258) Sending NMI from CPU 4 to CPUs 2: CPU 2 didn't respond to backtrace IPI, inspecting paca. irq_soft_mask: 0x01 in_mce: 0 in_nmi: 0 current: 3623 (bash) Back trace of paca->saved_r1 (0xc0000000e1c83ba0) (possibly stale): Call Trace: [c0000000e1c83ba0] [0000000000000014] 0x14 (unreliable) [c0000000e1c83bc0] [c000000000765798] lkdtm_do_action+0x48/0x80 [c0000000e1c83bf0] [c000000000765a40] direct_entry+0x110/0x1b0 [c0000000e1c83c90] [c00000000058e650] full_proxy_write+0x90/0xe0 [c0000000e1c83ce0] [c0000000003aae3c] __vfs_write+0x6c/0x1f0 [c0000000e1c83d80] [c0000000003ab214] vfs_write+0xd4/0x240 [c0000000e1c83dd0] [c0000000003ab5cc] ksys_write+0x6c/0x110 [c0000000e1c83e30] [c00000000000b860] system_call+0x58/0x6c Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Reviewed-by: Nicholas Piggin <npiggin@gmail.com>
2018-05-02 20:07:28 +07:00
#include <asm/paca.h>
/*
* Save stack-backtrace addresses into a stack_trace buffer.
*/
static void save_context_stack(struct stack_trace *trace, unsigned long sp,
struct task_struct *tsk, int savesched)
{
for (;;) {
unsigned long *stack = (unsigned long *) sp;
unsigned long newsp, ip;
if (!validate_sp(sp, tsk, STACK_FRAME_OVERHEAD))
return;
newsp = stack[0];
ip = stack[STACK_FRAME_LR_SAVE];
if (savesched || !in_sched_functions(ip)) {
if (!trace->skip)
trace->entries[trace->nr_entries++] = ip;
else
trace->skip--;
}
if (trace->nr_entries >= trace->max_entries)
return;
sp = newsp;
}
}
void save_stack_trace(struct stack_trace *trace)
{
unsigned long sp;
sp = current_stack_pointer();
save_context_stack(trace, sp, current, 1);
}
EXPORT_SYMBOL_GPL(save_stack_trace);
void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace)
{
unsigned long sp;
if (!try_get_task_stack(tsk))
return;
if (tsk == current)
sp = current_stack_pointer();
else
sp = tsk->thread.ksp;
save_context_stack(trace, sp, tsk, 0);
put_task_stack(tsk);
}
EXPORT_SYMBOL_GPL(save_stack_trace_tsk);
void
save_stack_trace_regs(struct pt_regs *regs, struct stack_trace *trace)
{
save_context_stack(trace, regs->gpr[1], current, 0);
}
EXPORT_SYMBOL_GPL(save_stack_trace_regs);
2018-05-04 19:38:34 +07:00
#ifdef CONFIG_HAVE_RELIABLE_STACKTRACE
/*
* This function returns an error if it detects any unreliable features of the
* stack. Otherwise it guarantees that the stack trace is reliable.
*
* If the task is not 'current', the caller *must* ensure the task is inactive.
*/
static int __save_stack_trace_tsk_reliable(struct task_struct *tsk,
struct stack_trace *trace)
2018-05-04 19:38:34 +07:00
{
unsigned long sp;
unsigned long newsp;
2018-05-04 19:38:34 +07:00
unsigned long stack_page = (unsigned long)task_stack_page(tsk);
unsigned long stack_end;
int graph_idx = 0;
bool firstframe;
2018-05-04 19:38:34 +07:00
stack_end = stack_page + THREAD_SIZE;
if (!is_idle_task(tsk)) {
/*
* For user tasks, this is the SP value loaded on
* kernel entry, see "PACAKSAVE(r13)" in _switch() and
* system_call_common()/EXCEPTION_PROLOG_COMMON().
*
* Likewise for non-swapper kernel threads,
* this also happens to be the top of the stack
* as setup by copy_thread().
*
* Note that stack backlinks are not properly setup by
* copy_thread() and thus, a forked task() will have
* an unreliable stack trace until it's been
* _switch()'ed to for the first time.
*/
stack_end -= STACK_FRAME_OVERHEAD + sizeof(struct pt_regs);
} else {
/*
* idle tasks have a custom stack layout,
* c.f. cpu_idle_thread_init().
*/
stack_end -= STACK_FRAME_OVERHEAD;
}
if (tsk == current)
sp = current_stack_pointer();
else
sp = tsk->thread.ksp;
2018-05-04 19:38:34 +07:00
if (sp < stack_page + sizeof(struct thread_struct) ||
sp > stack_end - STACK_FRAME_MIN_SIZE) {
return -EINVAL;
2018-05-04 19:38:34 +07:00
}
for (firstframe = true; sp != stack_end;
firstframe = false, sp = newsp) {
2018-05-04 19:38:34 +07:00
unsigned long *stack = (unsigned long *) sp;
unsigned long ip;
2018-05-04 19:38:34 +07:00
/* sanity check: ABI requires SP to be aligned 16 bytes. */
if (sp & 0xF)
return -EINVAL;
2018-05-04 19:38:34 +07:00
newsp = stack[0];
/* Stack grows downwards; unwinder may only go up. */
if (newsp <= sp)
return -EINVAL;
2018-05-04 19:38:34 +07:00
if (newsp != stack_end &&
newsp > stack_end - STACK_FRAME_MIN_SIZE) {
return -EINVAL; /* invalid backlink, too far up. */
2018-05-04 19:38:34 +07:00
}
/*
* We can only trust the bottom frame's backlink, the
* rest of the frame may be uninitialized, continue to
* the next.
*/
if (firstframe)
continue;
/* Mark stacktraces with exception frames as unreliable. */
if (sp <= stack_end - STACK_INT_FRAME_SIZE &&
stack[STACK_FRAME_MARKER] == STACK_FRAME_REGS_MARKER) {
return -EINVAL;
}
2018-05-04 19:38:34 +07:00
/* Examine the saved LR: it must point into kernel code. */
ip = stack[STACK_FRAME_LR_SAVE];
if (!__kernel_text_address(ip))
return -EINVAL;
2018-05-04 19:38:34 +07:00
/*
* FIXME: IMHO these tests do not belong in
* arch-dependent code, they are generic.
*/
ip = ftrace_graph_ret_addr(tsk, &graph_idx, ip, stack);
#ifdef CONFIG_KPROBES
2018-05-04 19:38:34 +07:00
/*
* Mark stacktraces with kretprobed functions on them
* as unreliable.
*/
if (ip == (unsigned long)kretprobe_trampoline)
return -EINVAL;
#endif
2018-05-04 19:38:34 +07:00
if (trace->nr_entries >= trace->max_entries)
return -E2BIG;
2018-05-04 19:38:34 +07:00
if (!trace->skip)
trace->entries[trace->nr_entries++] = ip;
else
trace->skip--;
}
return 0;
}
int save_stack_trace_tsk_reliable(struct task_struct *tsk,
struct stack_trace *trace)
{
int ret;
/*
* If the task doesn't have a stack (e.g., a zombie), the stack is
* "reliably" empty.
*/
if (!try_get_task_stack(tsk))
return 0;
ret = __save_stack_trace_tsk_reliable(tsk, trace);
put_task_stack(tsk);
return ret;
}
2018-05-04 19:38:34 +07:00
#endif /* CONFIG_HAVE_RELIABLE_STACKTRACE */
powerpc/64s: Wire up arch_trigger_cpumask_backtrace() This allows eg. the RCU stall detector, or the soft/hardlockup detectors to trigger a backtrace on all CPUs. We implement this by sending a "safe" NMI, which will actually only send an IPI. Unfortunately the generic code prints "NMI", so that's a little confusing but we can probably live with it. If one of the CPUs doesn't respond to the IPI, we then print some info from it's paca and do a backtrace based on its saved_r1. Example output: INFO: rcu_sched detected stalls on CPUs/tasks: 2-...0: (0 ticks this GP) idle=1be/1/4611686018427387904 softirq=1055/1055 fqs=25735 (detected by 4, t=58847 jiffies, g=58, c=57, q=1258) Sending NMI from CPU 4 to CPUs 2: CPU 2 didn't respond to backtrace IPI, inspecting paca. irq_soft_mask: 0x01 in_mce: 0 in_nmi: 0 current: 3623 (bash) Back trace of paca->saved_r1 (0xc0000000e1c83ba0) (possibly stale): Call Trace: [c0000000e1c83ba0] [0000000000000014] 0x14 (unreliable) [c0000000e1c83bc0] [c000000000765798] lkdtm_do_action+0x48/0x80 [c0000000e1c83bf0] [c000000000765a40] direct_entry+0x110/0x1b0 [c0000000e1c83c90] [c00000000058e650] full_proxy_write+0x90/0xe0 [c0000000e1c83ce0] [c0000000003aae3c] __vfs_write+0x6c/0x1f0 [c0000000e1c83d80] [c0000000003ab214] vfs_write+0xd4/0x240 [c0000000e1c83dd0] [c0000000003ab5cc] ksys_write+0x6c/0x110 [c0000000e1c83e30] [c00000000000b860] system_call+0x58/0x6c Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Reviewed-by: Nicholas Piggin <npiggin@gmail.com>
2018-05-02 20:07:28 +07:00
#if defined(CONFIG_PPC_BOOK3S_64) && defined(CONFIG_NMI_IPI)
powerpc/64s: Wire up arch_trigger_cpumask_backtrace() This allows eg. the RCU stall detector, or the soft/hardlockup detectors to trigger a backtrace on all CPUs. We implement this by sending a "safe" NMI, which will actually only send an IPI. Unfortunately the generic code prints "NMI", so that's a little confusing but we can probably live with it. If one of the CPUs doesn't respond to the IPI, we then print some info from it's paca and do a backtrace based on its saved_r1. Example output: INFO: rcu_sched detected stalls on CPUs/tasks: 2-...0: (0 ticks this GP) idle=1be/1/4611686018427387904 softirq=1055/1055 fqs=25735 (detected by 4, t=58847 jiffies, g=58, c=57, q=1258) Sending NMI from CPU 4 to CPUs 2: CPU 2 didn't respond to backtrace IPI, inspecting paca. irq_soft_mask: 0x01 in_mce: 0 in_nmi: 0 current: 3623 (bash) Back trace of paca->saved_r1 (0xc0000000e1c83ba0) (possibly stale): Call Trace: [c0000000e1c83ba0] [0000000000000014] 0x14 (unreliable) [c0000000e1c83bc0] [c000000000765798] lkdtm_do_action+0x48/0x80 [c0000000e1c83bf0] [c000000000765a40] direct_entry+0x110/0x1b0 [c0000000e1c83c90] [c00000000058e650] full_proxy_write+0x90/0xe0 [c0000000e1c83ce0] [c0000000003aae3c] __vfs_write+0x6c/0x1f0 [c0000000e1c83d80] [c0000000003ab214] vfs_write+0xd4/0x240 [c0000000e1c83dd0] [c0000000003ab5cc] ksys_write+0x6c/0x110 [c0000000e1c83e30] [c00000000000b860] system_call+0x58/0x6c Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Reviewed-by: Nicholas Piggin <npiggin@gmail.com>
2018-05-02 20:07:28 +07:00
static void handle_backtrace_ipi(struct pt_regs *regs)
{
nmi_cpu_backtrace(regs);
}
static void raise_backtrace_ipi(cpumask_t *mask)
{
unsigned int cpu;
for_each_cpu(cpu, mask) {
if (cpu == smp_processor_id())
handle_backtrace_ipi(NULL);
else
smp_send_safe_nmi_ipi(cpu, handle_backtrace_ipi, 5 * USEC_PER_SEC);
}
for_each_cpu(cpu, mask) {
struct paca_struct *p = paca_ptrs[cpu];
cpumask_clear_cpu(cpu, mask);
pr_warn("CPU %d didn't respond to backtrace IPI, inspecting paca.\n", cpu);
if (!virt_addr_valid(p)) {
pr_warn("paca pointer appears corrupt? (%px)\n", p);
continue;
}
pr_warn("irq_soft_mask: 0x%02x in_mce: %d in_nmi: %d",
p->irq_soft_mask, p->in_mce, p->in_nmi);
if (virt_addr_valid(p->__current))
pr_cont(" current: %d (%s)\n", p->__current->pid,
p->__current->comm);
else
pr_cont(" current pointer corrupt? (%px)\n", p->__current);
pr_warn("Back trace of paca->saved_r1 (0x%016llx) (possibly stale):\n", p->saved_r1);
show_stack(p->__current, (unsigned long *)p->saved_r1);
}
}
void arch_trigger_cpumask_backtrace(const cpumask_t *mask, bool exclude_self)
{
nmi_trigger_cpumask_backtrace(mask, exclude_self, raise_backtrace_ipi);
}
#endif /* defined(CONFIG_PPC_BOOK3S_64) && defined(CONFIG_NMI_IPI) */