mirror of
https://github.com/AuxXxilium/linux_dsm_epyc7002.git
synced 2024-12-25 02:59:26 +07:00
ed235875e2
A 'softlockup' is defined as a bug that causes the kernel to loop in kernel mode for more than a predefined period to time, without giving other tasks a chance to run. Currently, upon detection of this condition by the per-cpu watchdog task, debug information (including a stack trace) is sent to the system log. On some occasions, we have observed that the "victim" rather than the actual "culprit" (i.e. the owner/holder of the contended resource) is reported to the user. Often this information has proven to be insufficient to assist debugging efforts. To avoid loss of useful debug information, for architectures which support NMI, this patch makes it possible to improve soft lockup reporting. This is accomplished by issuing an NMI to each cpu to obtain a stack trace. If NMI is not supported we just revert back to the old method. A sysctl and boot-time parameter is available to toggle this feature. [dzickus@redhat.com: add CONFIG_SMP in certain areas] [akpm@linux-foundation.org: additional CONFIG_SMP=n optimisations] [mq@suse.cz: fix warning] Signed-off-by: Aaron Tomlin <atomlin@redhat.com> Signed-off-by: Don Zickus <dzickus@redhat.com> Cc: David S. Miller <davem@davemloft.net> Cc: Mateusz Guzik <mguzik@redhat.com> Cc: Oleg Nesterov <oleg@redhat.com> Signed-off-by: Jan Moskyto Matejka <mq@suse.cz> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
67 lines
1.6 KiB
C
67 lines
1.6 KiB
C
/*
|
|
* linux/include/linux/nmi.h
|
|
*/
|
|
#ifndef LINUX_NMI_H
|
|
#define LINUX_NMI_H
|
|
|
|
#include <linux/sched.h>
|
|
#include <asm/irq.h>
|
|
|
|
/**
|
|
* touch_nmi_watchdog - restart NMI watchdog timeout.
|
|
*
|
|
* If the architecture supports the NMI watchdog, touch_nmi_watchdog()
|
|
* may be used to reset the timeout - for code which intentionally
|
|
* disables interrupts for a long time. This call is stateless.
|
|
*/
|
|
#if defined(CONFIG_HAVE_NMI_WATCHDOG) || defined(CONFIG_HARDLOCKUP_DETECTOR)
|
|
#include <asm/nmi.h>
|
|
extern void touch_nmi_watchdog(void);
|
|
#else
|
|
static inline void touch_nmi_watchdog(void)
|
|
{
|
|
touch_softlockup_watchdog();
|
|
}
|
|
#endif
|
|
|
|
/*
|
|
* Create trigger_all_cpu_backtrace() out of the arch-provided
|
|
* base function. Return whether such support was available,
|
|
* to allow calling code to fall back to some other mechanism:
|
|
*/
|
|
#ifdef arch_trigger_all_cpu_backtrace
|
|
static inline bool trigger_all_cpu_backtrace(void)
|
|
{
|
|
arch_trigger_all_cpu_backtrace(true);
|
|
|
|
return true;
|
|
}
|
|
static inline bool trigger_allbutself_cpu_backtrace(void)
|
|
{
|
|
arch_trigger_all_cpu_backtrace(false);
|
|
return true;
|
|
}
|
|
#else
|
|
static inline bool trigger_all_cpu_backtrace(void)
|
|
{
|
|
return false;
|
|
}
|
|
static inline bool trigger_allbutself_cpu_backtrace(void)
|
|
{
|
|
return false;
|
|
}
|
|
#endif
|
|
|
|
#ifdef CONFIG_LOCKUP_DETECTOR
|
|
int hw_nmi_is_cpu_stuck(struct pt_regs *);
|
|
u64 hw_nmi_get_sample_period(int watchdog_thresh);
|
|
extern int watchdog_user_enabled;
|
|
extern int watchdog_thresh;
|
|
extern int sysctl_softlockup_all_cpu_backtrace;
|
|
struct ctl_table;
|
|
extern int proc_dowatchdog(struct ctl_table *, int ,
|
|
void __user *, size_t *, loff_t *);
|
|
#endif
|
|
|
|
#endif
|