linux_dsm_epyc7002/drivers/char/sysrq.c

803 lines
18 KiB
C
Raw Normal View History

/*
* Linux Magic System Request Key Hacks
*
* (c) 1997 Martin Mares <mj@atrey.karlin.mff.cuni.cz>
* based on ideas by Pavel Machek <pavel@atrey.karlin.mff.cuni.cz>
*
* (c) 2000 Crutcher Dunnavant <crutcher+kernel@datastacks.com>
* overhauled to use key registration
* based upon discusions in irc://irc.openprojects.net/#kernelnewbies
*
* Copyright (c) 2010 Dmitry Torokhov
* Input handler conversion
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/sched.h>
#include <linux/interrupt.h>
#include <linux/mm.h>
#include <linux/fs.h>
#include <linux/tty.h>
#include <linux/mount.h>
#include <linux/kdev_t.h>
#include <linux/major.h>
#include <linux/reboot.h>
#include <linux/sysrq.h>
#include <linux/kbd_kern.h>
#include <linux/proc_fs.h>
debug lockups: Improve lockup detection When debugging a recent lockup bug i found various deficiencies in how our current lockup detection helpers work: - SysRq-L is not very efficient as it uses a workqueue, hence it cannot punch through hard lockups and cannot see through most soft lockups either. - The SysRq-L code depends on the NMI watchdog - which is off by default. - We dont print backtraces from the RCU code's built-in 'RCU state machine is stuck' debug code. This debug code tends to be one of the first (and only) mechanisms that show that a lockup has occured. This patch changes the code so taht we: - Trigger the NMI backtrace code from SysRq-L instead of using a workqueue (which cannot punch through hard lockups) - Trigger print-all-CPU-backtraces from the RCU lockup detection code Also decouple the backtrace printing code from the NMI watchdog: - Dont use variable size cpumasks (it might not be initialized and they are a bit more fragile anyway) - Trigger an NMI immediately via an IPI, instead of waiting for the NMI tick to occur. This is a lot faster and can produce more relevant backtraces. It will also work if the NMI watchdog is disabled. - Dont print the 'dazed and confused' message when we print a backtrace from the NMI - Do a show_regs() plus a dump_stack() to get maximum info out of the dump. Worst-case we get two stacktraces - which is not a big deal. Sometimes, if register content is corrupted, the precise stack walker in show_regs() wont give us a full backtrace - in this case dump_stack() will do it. Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Andrew Morton <akpm@linux-foundation.org> Cc: Linus Torvalds <torvalds@linux-foundation.org> LKML-Reference: <new-submission> Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-08-02 16:28:21 +07:00
#include <linux/nmi.h>
#include <linux/quotaops.h>
perf: Do the big rename: Performance Counters -> Performance Events Bye-bye Performance Counters, welcome Performance Events! In the past few months the perfcounters subsystem has grown out its initial role of counting hardware events, and has become (and is becoming) a much broader generic event enumeration, reporting, logging, monitoring, analysis facility. Naming its core object 'perf_counter' and naming the subsystem 'perfcounters' has become more and more of a misnomer. With pending code like hw-breakpoints support the 'counter' name is less and less appropriate. All in one, we've decided to rename the subsystem to 'performance events' and to propagate this rename through all fields, variables and API names. (in an ABI compatible fashion) The word 'event' is also a bit shorter than 'counter' - which makes it slightly more convenient to write/handle as well. Thanks goes to Stephane Eranian who first observed this misnomer and suggested a rename. User-space tooling and ABI compatibility is not affected - this patch should be function-invariant. (Also, defconfigs were not touched to keep the size down.) This patch has been generated via the following script: FILES=$(find * -type f | grep -vE 'oprofile|[^K]config') sed -i \ -e 's/PERF_EVENT_/PERF_RECORD_/g' \ -e 's/PERF_COUNTER/PERF_EVENT/g' \ -e 's/perf_counter/perf_event/g' \ -e 's/nb_counters/nb_events/g' \ -e 's/swcounter/swevent/g' \ -e 's/tpcounter_event/tp_event/g' \ $FILES for N in $(find . -name perf_counter.[ch]); do M=$(echo $N | sed 's/perf_counter/perf_event/g') mv $N $M done FILES=$(find . -name perf_event.*) sed -i \ -e 's/COUNTER_MASK/REG_MASK/g' \ -e 's/COUNTER/EVENT/g' \ -e 's/\<event\>/event_id/g' \ -e 's/counter/event/g' \ -e 's/Counter/Event/g' \ $FILES ... to keep it as correct as possible. This script can also be used by anyone who has pending perfcounters patches - it converts a Linux kernel tree over to the new naming. We tried to time this change to the point in time where the amount of pending patches is the smallest: the end of the merge window. Namespace clashes were fixed up in a preparatory patch - and some stylistic fallout will be fixed up in a subsequent patch. ( NOTE: 'counters' are still the proper terminology when we deal with hardware registers - and these sed scripts are a bit over-eager in renaming them. I've undone some of that, but in case there's something left where 'counter' would be better than 'event' we can undo that on an individual basis instead of touching an otherwise nicely automated patch. ) Suggested-by: Stephane Eranian <eranian@google.com> Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl> Acked-by: Paul Mackerras <paulus@samba.org> Reviewed-by: Arjan van de Ven <arjan@linux.intel.com> Cc: Mike Galbraith <efault@gmx.de> Cc: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Frederic Weisbecker <fweisbec@gmail.com> Cc: Steven Rostedt <rostedt@goodmis.org> Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org> Cc: David Howells <dhowells@redhat.com> Cc: Kyle McMartin <kyle@mcmartin.ca> Cc: Martin Schwidefsky <schwidefsky@de.ibm.com> Cc: "David S. Miller" <davem@davemloft.net> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: "H. Peter Anvin" <hpa@zytor.com> Cc: <linux-arch@vger.kernel.org> LKML-Reference: <new-submission> Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-09-21 17:02:48 +07:00
#include <linux/perf_event.h>
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/suspend.h>
#include <linux/writeback.h>
#include <linux/buffer_head.h> /* for fsync_bdev() */
#include <linux/swap.h>
#include <linux/spinlock.h>
#include <linux/vt_kern.h>
#include <linux/workqueue.h>
#include <linux/hrtimer.h>
#include <linux/oom.h>
include cleanup: Update gfp.h and slab.h includes to prepare for breaking implicit slab.h inclusion from percpu.h percpu.h is included by sched.h and module.h and thus ends up being included when building most .c files. percpu.h includes slab.h which in turn includes gfp.h making everything defined by the two files universally available and complicating inclusion dependencies. percpu.h -> slab.h dependency is about to be removed. Prepare for this change by updating users of gfp and slab facilities include those headers directly instead of assuming availability. As this conversion needs to touch large number of source files, the following script is used as the basis of conversion. http://userweb.kernel.org/~tj/misc/slabh-sweep.py The script does the followings. * Scan files for gfp and slab usages and update includes such that only the necessary includes are there. ie. if only gfp is used, gfp.h, if slab is used, slab.h. * When the script inserts a new include, it looks at the include blocks and try to put the new include such that its order conforms to its surrounding. It's put in the include block which contains core kernel includes, in the same order that the rest are ordered - alphabetical, Christmas tree, rev-Xmas-tree or at the end if there doesn't seem to be any matching order. * If the script can't find a place to put a new include (mostly because the file doesn't have fitting include block), it prints out an error message indicating which .h file needs to be added to the file. The conversion was done in the following steps. 1. The initial automatic conversion of all .c files updated slightly over 4000 files, deleting around 700 includes and adding ~480 gfp.h and ~3000 slab.h inclusions. The script emitted errors for ~400 files. 2. Each error was manually checked. Some didn't need the inclusion, some needed manual addition while adding it to implementation .h or embedding .c file was more appropriate for others. This step added inclusions to around 150 files. 3. The script was run again and the output was compared to the edits from #2 to make sure no file was left behind. 4. Several build tests were done and a couple of problems were fixed. e.g. lib/decompress_*.c used malloc/free() wrappers around slab APIs requiring slab.h to be added manually. 5. The script was run on all .h files but without automatically editing them as sprinkling gfp.h and slab.h inclusions around .h files could easily lead to inclusion dependency hell. Most gfp.h inclusion directives were ignored as stuff from gfp.h was usually wildly available and often used in preprocessor macros. Each slab.h inclusion directive was examined and added manually as necessary. 6. percpu.h was updated not to include slab.h. 7. Build test were done on the following configurations and failures were fixed. CONFIG_GCOV_KERNEL was turned off for all tests (as my distributed build env didn't work with gcov compiles) and a few more options had to be turned off depending on archs to make things build (like ipr on powerpc/64 which failed due to missing writeq). * x86 and x86_64 UP and SMP allmodconfig and a custom test config. * powerpc and powerpc64 SMP allmodconfig * sparc and sparc64 SMP allmodconfig * ia64 SMP allmodconfig * s390 SMP allmodconfig * alpha SMP allmodconfig * um on x86_64 SMP allmodconfig 8. percpu.h modifications were reverted so that it could be applied as a separate patch and serve as bisection point. Given the fact that I had only a couple of failures from tests on step 6, I'm fairly confident about the coverage of this conversion patch. If there is a breakage, it's likely to be something in one of the arch headers which should be easily discoverable easily on most builds of the specific arch. Signed-off-by: Tejun Heo <tj@kernel.org> Guess-its-ok-by: Christoph Lameter <cl@linux-foundation.org> Cc: Ingo Molnar <mingo@redhat.com> Cc: Lee Schermerhorn <Lee.Schermerhorn@hp.com>
2010-03-24 15:04:11 +07:00
#include <linux/slab.h>
#include <linux/input.h>
#include <asm/ptrace.h>
#include <asm/irq_regs.h>
/* Whether we react on sysrq keys or just ignore them */
static int __read_mostly sysrq_enabled = 1;
static bool __read_mostly sysrq_always_enabled;
static bool sysrq_on(void)
{
return sysrq_enabled || sysrq_always_enabled;
}
/*
* A value of 1 means 'all', other nonzero values are an op mask:
*/
static bool sysrq_on_mask(int mask)
{
return sysrq_always_enabled ||
sysrq_enabled == 1 ||
(sysrq_enabled & mask);
}
static int __init sysrq_always_enabled_setup(char *str)
{
sysrq_always_enabled = true;
pr_info("sysrq always enabled.\n");
return 1;
}
__setup("sysrq_always_enabled", sysrq_always_enabled_setup);
static void sysrq_handle_loglevel(int key)
{
int i;
i = key - '0';
console_loglevel = 7;
printk("Loglevel set to %d\n", i);
console_loglevel = i;
}
static struct sysrq_key_op sysrq_loglevel_op = {
.handler = sysrq_handle_loglevel,
.help_msg = "loglevel(0-9)",
.action_msg = "Changing Loglevel",
.enable_mask = SYSRQ_ENABLE_LOG,
};
#ifdef CONFIG_VT
static void sysrq_handle_SAK(int key)
{
struct work_struct *SAK_work = &vc_cons[fg_console].SAK_work;
schedule_work(SAK_work);
}
static struct sysrq_key_op sysrq_SAK_op = {
.handler = sysrq_handle_SAK,
.help_msg = "saK",
.action_msg = "SAK",
.enable_mask = SYSRQ_ENABLE_KEYBOARD,
};
#else
#define sysrq_SAK_op (*(struct sysrq_key_op *)NULL)
#endif
#ifdef CONFIG_VT
static void sysrq_handle_unraw(int key)
{
struct kbd_struct *kbd = &kbd_table[fg_console];
if (kbd)
kbd->kbdmode = default_utf8 ? VC_UNICODE : VC_XLATE;
}
static struct sysrq_key_op sysrq_unraw_op = {
.handler = sysrq_handle_unraw,
.help_msg = "unRaw",
.action_msg = "Keyboard mode set to system default",
.enable_mask = SYSRQ_ENABLE_KEYBOARD,
};
#else
#define sysrq_unraw_op (*(struct sysrq_key_op *)NULL)
#endif /* CONFIG_VT */
static void sysrq_handle_crash(int key)
{
char *killer = NULL;
sysrq, kdump: make sysrq-c consistent commit d6580a9f15238b87e618310c862231ae3f352d2d ("kexec: sysrq: simplify sysrq-c handler") changed the behavior of sysrq-c to unconditional dereference of NULL pointer. So in cases with CONFIG_KEXEC, where crash_kexec() was directly called from sysrq-c before, now it can be said that a step of "real oops" was inserted before starting kdump. However, in contrast to oops via SysRq-c from keyboard which results in panic due to in_interrupt(), oops via "echo c > /proc/sysrq-trigger" will not become panic unless panic_on_oops=1. It means that even if dump is properly configured to be taken on panic, the sysrq-c from proc interface might not start crashdump while the sysrq-c from keyboard can start crashdump. This confuses traditional users of kdump, i.e. people who expect sysrq-c to do common behavior in both of the keyboard and proc interface. This patch brings the keyboard and proc interface behavior of sysrq-c in line, by forcing panic_on_oops=1 before oops in sysrq-c handler. And some updates in documentation are included, to clarify that there is no longer dependency with CONFIG_KEXEC, and that now the system can just crash by sysrq-c if no dump mechanism is configured. Signed-off-by: Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com> Cc: Lai Jiangshan <laijs@cn.fujitsu.com> Cc: Ken'ichi Ohmichi <oomichi@mxs.nes.nec.co.jp> Acked-by: Neil Horman <nhorman@tuxdriver.com> Acked-by: Vivek Goyal <vgoyal@redhat.com> Cc: Brayan Arraes <brayan@yack.com.br> Cc: Eric W. Biederman <ebiederm@xmission.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2009-07-30 05:04:14 +07:00
panic_on_oops = 1; /* force panic */
wmb();
*killer = 1;
}
sysrq, kdump: make sysrq-c consistent commit d6580a9f15238b87e618310c862231ae3f352d2d ("kexec: sysrq: simplify sysrq-c handler") changed the behavior of sysrq-c to unconditional dereference of NULL pointer. So in cases with CONFIG_KEXEC, where crash_kexec() was directly called from sysrq-c before, now it can be said that a step of "real oops" was inserted before starting kdump. However, in contrast to oops via SysRq-c from keyboard which results in panic due to in_interrupt(), oops via "echo c > /proc/sysrq-trigger" will not become panic unless panic_on_oops=1. It means that even if dump is properly configured to be taken on panic, the sysrq-c from proc interface might not start crashdump while the sysrq-c from keyboard can start crashdump. This confuses traditional users of kdump, i.e. people who expect sysrq-c to do common behavior in both of the keyboard and proc interface. This patch brings the keyboard and proc interface behavior of sysrq-c in line, by forcing panic_on_oops=1 before oops in sysrq-c handler. And some updates in documentation are included, to clarify that there is no longer dependency with CONFIG_KEXEC, and that now the system can just crash by sysrq-c if no dump mechanism is configured. Signed-off-by: Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com> Cc: Lai Jiangshan <laijs@cn.fujitsu.com> Cc: Ken'ichi Ohmichi <oomichi@mxs.nes.nec.co.jp> Acked-by: Neil Horman <nhorman@tuxdriver.com> Acked-by: Vivek Goyal <vgoyal@redhat.com> Cc: Brayan Arraes <brayan@yack.com.br> Cc: Eric W. Biederman <ebiederm@xmission.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2009-07-30 05:04:14 +07:00
static struct sysrq_key_op sysrq_crash_op = {
.handler = sysrq_handle_crash,
.help_msg = "Crash",
.action_msg = "Trigger a crash",
.enable_mask = SYSRQ_ENABLE_DUMP,
};
static void sysrq_handle_reboot(int key)
{
[PATCH] sysrq: disable lockdep on reboot SysRq : Emergency Sync Emergency Sync complete SysRq : Emergency Remount R/O Emergency Remount complete SysRq : Resetting BUG: warning at kernel/lockdep.c:1816/trace_hardirqs_on() (Not tainted) Call Trace: [<ffffffff8026d56d>] show_trace+0xae/0x319 [<ffffffff8026d7ed>] dump_stack+0x15/0x17 [<ffffffff802a68d1>] trace_hardirqs_on+0xbc/0x13d [<ffffffff803a8eec>] sysrq_handle_reboot+0x9/0x11 [<ffffffff803a8f8d>] __handle_sysrq+0x99/0x130 [<ffffffff803a903b>] handle_sysrq+0x17/0x19 [<ffffffff803a36ee>] kbd_event+0x32e/0x57d [<ffffffff80401e35>] input_event+0x42d/0x45b [<ffffffff804063eb>] atkbd_interrupt+0x44d/0x53d [<ffffffff803fe5c5>] serio_interrupt+0x49/0x86 [<ffffffff803ff2a4>] i8042_interrupt+0x202/0x21a [<ffffffff80210cf0>] handle_IRQ_event+0x2c/0x64 [<ffffffff802bfd8b>] __do_IRQ+0xaf/0x114 [<ffffffff8026ea24>] do_IRQ+0xf8/0x107 [<ffffffff8025f886>] ret_from_intr+0x0/0xf DWARF2 unwinder stuck at ret_from_intr+0x0/0xf Leftover inexact backtrace: <IRQ> <EOI> [<ffffffff80258e36>] mwait_idle+0x3f/0x54 [<ffffffff8024a33a>] cpu_idle+0xa2/0xc5 [<ffffffff8026c34e>] rest_init+0x2b/0x2d [<ffffffff809708bc>] start_kernel+0x24a/0x24c [<ffffffff8097028b>] _sinittext+0x28b/0x292 Since we're shutting down anyway, don't bother being smart, just turn the thing off. Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl> Acked-by: Ingo Molnar <mingo@elte.hu> Cc: Arjan van de Ven <arjan@linux.intel.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2006-10-01 13:28:02 +07:00
lockdep_off();
local_irq_enable();
emergency_restart();
}
static struct sysrq_key_op sysrq_reboot_op = {
.handler = sysrq_handle_reboot,
.help_msg = "reBoot",
.action_msg = "Resetting",
.enable_mask = SYSRQ_ENABLE_BOOT,
};
static void sysrq_handle_sync(int key)
{
emergency_sync();
}
static struct sysrq_key_op sysrq_sync_op = {
.handler = sysrq_handle_sync,
.help_msg = "Sync",
.action_msg = "Emergency Sync",
.enable_mask = SYSRQ_ENABLE_SYNC,
};
static void sysrq_handle_show_timers(int key)
{
sysrq_timer_list_show();
}
static struct sysrq_key_op sysrq_show_timers_op = {
.handler = sysrq_handle_show_timers,
.help_msg = "show-all-timers(Q)",
.action_msg = "Show clockevent devices & pending hrtimers (no others)",
};
static void sysrq_handle_mountro(int key)
{
emergency_remount();
}
static struct sysrq_key_op sysrq_mountro_op = {
.handler = sysrq_handle_mountro,
.help_msg = "Unmount",
.action_msg = "Emergency Remount R/O",
.enable_mask = SYSRQ_ENABLE_REMOUNT,
};
#ifdef CONFIG_LOCKDEP
static void sysrq_handle_showlocks(int key)
{
debug_show_all_locks();
}
static struct sysrq_key_op sysrq_showlocks_op = {
.handler = sysrq_handle_showlocks,
.help_msg = "show-all-locks(D)",
.action_msg = "Show Locks Held",
};
#else
#define sysrq_showlocks_op (*(struct sysrq_key_op *)NULL)
#endif
#ifdef CONFIG_SMP
static DEFINE_SPINLOCK(show_lock);
static void showacpu(void *dummy)
{
unsigned long flags;
/* Idle CPUs have no interesting backtrace. */
if (idle_cpu(smp_processor_id()))
return;
spin_lock_irqsave(&show_lock, flags);
printk(KERN_INFO "CPU%d:\n", smp_processor_id());
show_stack(NULL, NULL);
spin_unlock_irqrestore(&show_lock, flags);
}
static void sysrq_showregs_othercpus(struct work_struct *dummy)
{
smp_call_function(showacpu, NULL, 0);
}
static DECLARE_WORK(sysrq_showallcpus, sysrq_showregs_othercpus);
static void sysrq_handle_showallcpus(int key)
{
/*
* Fall back to the workqueue based printing if the
* backtrace printing did not succeed or the
* architecture has no support for it:
*/
if (!trigger_all_cpu_backtrace()) {
struct pt_regs *regs = get_irq_regs();
if (regs) {
printk(KERN_INFO "CPU%d:\n", smp_processor_id());
show_regs(regs);
}
schedule_work(&sysrq_showallcpus);
}
}
static struct sysrq_key_op sysrq_showallcpus_op = {
.handler = sysrq_handle_showallcpus,
.help_msg = "show-backtrace-all-active-cpus(L)",
.action_msg = "Show backtrace of all active CPUs",
.enable_mask = SYSRQ_ENABLE_DUMP,
};
#endif
static void sysrq_handle_showregs(int key)
{
IRQ: Maintain regs pointer globally rather than passing to IRQ handlers Maintain a per-CPU global "struct pt_regs *" variable which can be used instead of passing regs around manually through all ~1800 interrupt handlers in the Linux kernel. The regs pointer is used in few places, but it potentially costs both stack space and code to pass it around. On the FRV arch, removing the regs parameter from all the genirq function results in a 20% speed up of the IRQ exit path (ie: from leaving timer_interrupt() to leaving do_IRQ()). Where appropriate, an arch may override the generic storage facility and do something different with the variable. On FRV, for instance, the address is maintained in GR28 at all times inside the kernel as part of general exception handling. Having looked over the code, it appears that the parameter may be handed down through up to twenty or so layers of functions. Consider a USB character device attached to a USB hub, attached to a USB controller that posts its interrupts through a cascaded auxiliary interrupt controller. A character device driver may want to pass regs to the sysrq handler through the input layer which adds another few layers of parameter passing. I've build this code with allyesconfig for x86_64 and i386. I've runtested the main part of the code on FRV and i386, though I can't test most of the drivers. I've also done partial conversion for powerpc and MIPS - these at least compile with minimal configurations. This will affect all archs. Mostly the changes should be relatively easy. Take do_IRQ(), store the regs pointer at the beginning, saving the old one: struct pt_regs *old_regs = set_irq_regs(regs); And put the old one back at the end: set_irq_regs(old_regs); Don't pass regs through to generic_handle_irq() or __do_IRQ(). In timer_interrupt(), this sort of change will be necessary: - update_process_times(user_mode(regs)); - profile_tick(CPU_PROFILING, regs); + update_process_times(user_mode(get_irq_regs())); + profile_tick(CPU_PROFILING); I'd like to move update_process_times()'s use of get_irq_regs() into itself, except that i386, alone of the archs, uses something other than user_mode(). Some notes on the interrupt handling in the drivers: (*) input_dev() is now gone entirely. The regs pointer is no longer stored in the input_dev struct. (*) finish_unlinks() in drivers/usb/host/ohci-q.c needs checking. It does something different depending on whether it's been supplied with a regs pointer or not. (*) Various IRQ handler function pointers have been moved to type irq_handler_t. Signed-Off-By: David Howells <dhowells@redhat.com> (cherry picked from 1b16e7ac850969f38b375e511e3fa2f474a33867 commit)
2006-10-05 20:55:46 +07:00
struct pt_regs *regs = get_irq_regs();
if (regs)
show_regs(regs);
perf: Do the big rename: Performance Counters -> Performance Events Bye-bye Performance Counters, welcome Performance Events! In the past few months the perfcounters subsystem has grown out its initial role of counting hardware events, and has become (and is becoming) a much broader generic event enumeration, reporting, logging, monitoring, analysis facility. Naming its core object 'perf_counter' and naming the subsystem 'perfcounters' has become more and more of a misnomer. With pending code like hw-breakpoints support the 'counter' name is less and less appropriate. All in one, we've decided to rename the subsystem to 'performance events' and to propagate this rename through all fields, variables and API names. (in an ABI compatible fashion) The word 'event' is also a bit shorter than 'counter' - which makes it slightly more convenient to write/handle as well. Thanks goes to Stephane Eranian who first observed this misnomer and suggested a rename. User-space tooling and ABI compatibility is not affected - this patch should be function-invariant. (Also, defconfigs were not touched to keep the size down.) This patch has been generated via the following script: FILES=$(find * -type f | grep -vE 'oprofile|[^K]config') sed -i \ -e 's/PERF_EVENT_/PERF_RECORD_/g' \ -e 's/PERF_COUNTER/PERF_EVENT/g' \ -e 's/perf_counter/perf_event/g' \ -e 's/nb_counters/nb_events/g' \ -e 's/swcounter/swevent/g' \ -e 's/tpcounter_event/tp_event/g' \ $FILES for N in $(find . -name perf_counter.[ch]); do M=$(echo $N | sed 's/perf_counter/perf_event/g') mv $N $M done FILES=$(find . -name perf_event.*) sed -i \ -e 's/COUNTER_MASK/REG_MASK/g' \ -e 's/COUNTER/EVENT/g' \ -e 's/\<event\>/event_id/g' \ -e 's/counter/event/g' \ -e 's/Counter/Event/g' \ $FILES ... to keep it as correct as possible. This script can also be used by anyone who has pending perfcounters patches - it converts a Linux kernel tree over to the new naming. We tried to time this change to the point in time where the amount of pending patches is the smallest: the end of the merge window. Namespace clashes were fixed up in a preparatory patch - and some stylistic fallout will be fixed up in a subsequent patch. ( NOTE: 'counters' are still the proper terminology when we deal with hardware registers - and these sed scripts are a bit over-eager in renaming them. I've undone some of that, but in case there's something left where 'counter' would be better than 'event' we can undo that on an individual basis instead of touching an otherwise nicely automated patch. ) Suggested-by: Stephane Eranian <eranian@google.com> Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl> Acked-by: Paul Mackerras <paulus@samba.org> Reviewed-by: Arjan van de Ven <arjan@linux.intel.com> Cc: Mike Galbraith <efault@gmx.de> Cc: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Frederic Weisbecker <fweisbec@gmail.com> Cc: Steven Rostedt <rostedt@goodmis.org> Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org> Cc: David Howells <dhowells@redhat.com> Cc: Kyle McMartin <kyle@mcmartin.ca> Cc: Martin Schwidefsky <schwidefsky@de.ibm.com> Cc: "David S. Miller" <davem@davemloft.net> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: "H. Peter Anvin" <hpa@zytor.com> Cc: <linux-arch@vger.kernel.org> LKML-Reference: <new-submission> Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-09-21 17:02:48 +07:00
perf_event_print_debug();
}
static struct sysrq_key_op sysrq_showregs_op = {
.handler = sysrq_handle_showregs,
.help_msg = "show-registers(P)",
.action_msg = "Show Regs",
.enable_mask = SYSRQ_ENABLE_DUMP,
};
static void sysrq_handle_showstate(int key)
{
show_state();
}
static struct sysrq_key_op sysrq_showstate_op = {
.handler = sysrq_handle_showstate,
.help_msg = "show-task-states(T)",
.action_msg = "Show State",
.enable_mask = SYSRQ_ENABLE_DUMP,
};
static void sysrq_handle_showstate_blocked(int key)
{
show_state_filter(TASK_UNINTERRUPTIBLE);
}
static struct sysrq_key_op sysrq_showstate_blocked_op = {
.handler = sysrq_handle_showstate_blocked,
.help_msg = "show-blocked-tasks(W)",
.action_msg = "Show Blocked State",
.enable_mask = SYSRQ_ENABLE_DUMP,
};
#ifdef CONFIG_TRACING
#include <linux/ftrace.h>
static void sysrq_ftrace_dump(int key)
{
ftrace_dump(DUMP_ALL);
}
static struct sysrq_key_op sysrq_ftrace_dump_op = {
.handler = sysrq_ftrace_dump,
.help_msg = "dump-ftrace-buffer(Z)",
.action_msg = "Dump ftrace buffer",
.enable_mask = SYSRQ_ENABLE_DUMP,
};
#else
#define sysrq_ftrace_dump_op (*(struct sysrq_key_op *)NULL)
#endif
static void sysrq_handle_showmem(int key)
{
show_mem();
}
static struct sysrq_key_op sysrq_showmem_op = {
.handler = sysrq_handle_showmem,
.help_msg = "show-memory-usage(M)",
.action_msg = "Show Memory",
.enable_mask = SYSRQ_ENABLE_DUMP,
};
/*
* Signal sysrq helper function. Sends a signal to all user processes.
*/
static void send_sig_all(int sig)
{
struct task_struct *p;
for_each_process(p) {
pid namespaces: define is_global_init() and is_container_init() is_init() is an ambiguous name for the pid==1 check. Split it into is_global_init() and is_container_init(). A cgroup init has it's tsk->pid == 1. A global init also has it's tsk->pid == 1 and it's active pid namespace is the init_pid_ns. But rather than check the active pid namespace, compare the task structure with 'init_pid_ns.child_reaper', which is initialized during boot to the /sbin/init process and never changes. Changelog: 2.6.22-rc4-mm2-pidns1: - Use 'init_pid_ns.child_reaper' to determine if a given task is the global init (/sbin/init) process. This would improve performance and remove dependence on the task_pid(). 2.6.21-mm2-pidns2: - [Sukadev Bhattiprolu] Changed is_container_init() calls in {powerpc, ppc,avr32}/traps.c for the _exception() call to is_global_init(). This way, we kill only the cgroup if the cgroup's init has a bug rather than force a kernel panic. [akpm@linux-foundation.org: fix comment] [sukadev@us.ibm.com: Use is_global_init() in arch/m32r/mm/fault.c] [bunk@stusta.de: kernel/pid.c: remove unused exports] [sukadev@us.ibm.com: Fix capability.c to work with threaded init] Signed-off-by: Serge E. Hallyn <serue@us.ibm.com> Signed-off-by: Sukadev Bhattiprolu <sukadev@us.ibm.com> Acked-by: Pavel Emelianov <xemul@openvz.org> Cc: Eric W. Biederman <ebiederm@xmission.com> Cc: Cedric Le Goater <clg@fr.ibm.com> Cc: Dave Hansen <haveblue@us.ibm.com> Cc: Herbert Poetzel <herbert@13thfloor.at> Cc: Kirill Korotaev <dev@sw.ru> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2007-10-19 13:39:52 +07:00
if (p->mm && !is_global_init(p))
/* Not swapper, init nor kernel thread */
force_sig(sig, p);
}
}
static void sysrq_handle_term(int key)
{
send_sig_all(SIGTERM);
console_loglevel = 8;
}
static struct sysrq_key_op sysrq_term_op = {
.handler = sysrq_handle_term,
.help_msg = "terminate-all-tasks(E)",
.action_msg = "Terminate All Tasks",
.enable_mask = SYSRQ_ENABLE_SIGNAL,
};
2006-11-22 21:55:48 +07:00
static void moom_callback(struct work_struct *ignored)
{
out_of_memory(node_zonelist(0, GFP_KERNEL), GFP_KERNEL, 0, NULL);
}
2006-11-22 21:55:48 +07:00
static DECLARE_WORK(moom_work, moom_callback);
static void sysrq_handle_moom(int key)
{
schedule_work(&moom_work);
}
static struct sysrq_key_op sysrq_moom_op = {
.handler = sysrq_handle_moom,
.help_msg = "memory-full-oom-kill(F)",
.action_msg = "Manual OOM execution",
.enable_mask = SYSRQ_ENABLE_SIGNAL,
};
#ifdef CONFIG_BLOCK
static void sysrq_handle_thaw(int key)
{
emergency_thaw_all();
}
static struct sysrq_key_op sysrq_thaw_op = {
.handler = sysrq_handle_thaw,
.help_msg = "thaw-filesystems(J)",
.action_msg = "Emergency Thaw of all frozen filesystems",
.enable_mask = SYSRQ_ENABLE_SIGNAL,
};
#endif
static void sysrq_handle_kill(int key)
{
send_sig_all(SIGKILL);
console_loglevel = 8;
}
static struct sysrq_key_op sysrq_kill_op = {
.handler = sysrq_handle_kill,
.help_msg = "kill-all-tasks(I)",
.action_msg = "Kill All Tasks",
.enable_mask = SYSRQ_ENABLE_SIGNAL,
};
static void sysrq_handle_unrt(int key)
{
normalize_rt_tasks();
}
static struct sysrq_key_op sysrq_unrt_op = {
.handler = sysrq_handle_unrt,
.help_msg = "nice-all-RT-tasks(N)",
.action_msg = "Nice All RT Tasks",
.enable_mask = SYSRQ_ENABLE_RTNICE,
};
/* Key Operations table and lock */
static DEFINE_SPINLOCK(sysrq_key_table_lock);
static struct sysrq_key_op *sysrq_key_table[36] = {
&sysrq_loglevel_op, /* 0 */
&sysrq_loglevel_op, /* 1 */
&sysrq_loglevel_op, /* 2 */
&sysrq_loglevel_op, /* 3 */
&sysrq_loglevel_op, /* 4 */
&sysrq_loglevel_op, /* 5 */
&sysrq_loglevel_op, /* 6 */
&sysrq_loglevel_op, /* 7 */
&sysrq_loglevel_op, /* 8 */
&sysrq_loglevel_op, /* 9 */
/*
* a: Don't use for system provided sysrqs, it is handled specially on
* sparc and will never arrive.
*/
NULL, /* a */
&sysrq_reboot_op, /* b */
sysrq, kdump: make sysrq-c consistent commit d6580a9f15238b87e618310c862231ae3f352d2d ("kexec: sysrq: simplify sysrq-c handler") changed the behavior of sysrq-c to unconditional dereference of NULL pointer. So in cases with CONFIG_KEXEC, where crash_kexec() was directly called from sysrq-c before, now it can be said that a step of "real oops" was inserted before starting kdump. However, in contrast to oops via SysRq-c from keyboard which results in panic due to in_interrupt(), oops via "echo c > /proc/sysrq-trigger" will not become panic unless panic_on_oops=1. It means that even if dump is properly configured to be taken on panic, the sysrq-c from proc interface might not start crashdump while the sysrq-c from keyboard can start crashdump. This confuses traditional users of kdump, i.e. people who expect sysrq-c to do common behavior in both of the keyboard and proc interface. This patch brings the keyboard and proc interface behavior of sysrq-c in line, by forcing panic_on_oops=1 before oops in sysrq-c handler. And some updates in documentation are included, to clarify that there is no longer dependency with CONFIG_KEXEC, and that now the system can just crash by sysrq-c if no dump mechanism is configured. Signed-off-by: Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com> Cc: Lai Jiangshan <laijs@cn.fujitsu.com> Cc: Ken'ichi Ohmichi <oomichi@mxs.nes.nec.co.jp> Acked-by: Neil Horman <nhorman@tuxdriver.com> Acked-by: Vivek Goyal <vgoyal@redhat.com> Cc: Brayan Arraes <brayan@yack.com.br> Cc: Eric W. Biederman <ebiederm@xmission.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2009-07-30 05:04:14 +07:00
&sysrq_crash_op, /* c & ibm_emac driver debug */
&sysrq_showlocks_op, /* d */
&sysrq_term_op, /* e */
&sysrq_moom_op, /* f */
/* g: May be registered for the kernel debugger */
NULL, /* g */
NULL, /* h - reserved for help */
&sysrq_kill_op, /* i */
#ifdef CONFIG_BLOCK
&sysrq_thaw_op, /* j */
#else
NULL, /* j */
#endif
&sysrq_SAK_op, /* k */
#ifdef CONFIG_SMP
&sysrq_showallcpus_op, /* l */
#else
NULL, /* l */
#endif
&sysrq_showmem_op, /* m */
&sysrq_unrt_op, /* n */
/* o: This will often be registered as 'Off' at init time */
NULL, /* o */
&sysrq_showregs_op, /* p */
&sysrq_show_timers_op, /* q */
&sysrq_unraw_op, /* r */
&sysrq_sync_op, /* s */
&sysrq_showstate_op, /* t */
&sysrq_mountro_op, /* u */
/* v: May be registered for frame buffer console restore */
NULL, /* v */
&sysrq_showstate_blocked_op, /* w */
/* x: May be registered on ppc/powerpc for xmon */
NULL, /* x */
/* y: May be registered on sparc64 for global register dump */
NULL, /* y */
&sysrq_ftrace_dump_op, /* z */
};
/* key2index calculation, -1 on invalid index */
static int sysrq_key_table_key2index(int key)
{
int retval;
if ((key >= '0') && (key <= '9'))
retval = key - '0';
else if ((key >= 'a') && (key <= 'z'))
retval = key + 10 - 'a';
else
retval = -1;
return retval;
}
/*
* get and put functions for the table, exposed to modules.
*/
struct sysrq_key_op *__sysrq_get_key_op(int key)
{
struct sysrq_key_op *op_p = NULL;
int i;
i = sysrq_key_table_key2index(key);
if (i != -1)
op_p = sysrq_key_table[i];
return op_p;
}
static void __sysrq_put_key_op(int key, struct sysrq_key_op *op_p)
{
int i = sysrq_key_table_key2index(key);
if (i != -1)
sysrq_key_table[i] = op_p;
}
void __handle_sysrq(int key, struct tty_struct *tty, int check_mask)
{
struct sysrq_key_op *op_p;
int orig_log_level;
int i;
unsigned long flags;
spin_lock_irqsave(&sysrq_key_table_lock, flags);
/*
* Raise the apparent loglevel to maximum so that the sysrq header
* is shown to provide the user with positive feedback. We do not
* simply emit this at KERN_EMERG as that would change message
* routing in the consumers of /proc/kmsg.
*/
orig_log_level = console_loglevel;
console_loglevel = 7;
printk(KERN_INFO "SysRq : ");
op_p = __sysrq_get_key_op(key);
if (op_p) {
/*
* Should we check for enabled operations (/proc/sysrq-trigger
* should not) and is the invoked operation enabled?
*/
if (!check_mask || sysrq_on_mask(op_p->enable_mask)) {
printk("%s\n", op_p->action_msg);
console_loglevel = orig_log_level;
op_p->handler(key);
} else {
printk("This sysrq operation is disabled.\n");
}
} else {
printk("HELP : ");
/* Only print the help msg once per handler */
for (i = 0; i < ARRAY_SIZE(sysrq_key_table); i++) {
if (sysrq_key_table[i]) {
int j;
for (j = 0; sysrq_key_table[i] !=
sysrq_key_table[j]; j++)
;
if (j != i)
continue;
printk("%s ", sysrq_key_table[i]->help_msg);
}
}
printk("\n");
console_loglevel = orig_log_level;
}
spin_unlock_irqrestore(&sysrq_key_table_lock, flags);
}
IRQ: Maintain regs pointer globally rather than passing to IRQ handlers Maintain a per-CPU global "struct pt_regs *" variable which can be used instead of passing regs around manually through all ~1800 interrupt handlers in the Linux kernel. The regs pointer is used in few places, but it potentially costs both stack space and code to pass it around. On the FRV arch, removing the regs parameter from all the genirq function results in a 20% speed up of the IRQ exit path (ie: from leaving timer_interrupt() to leaving do_IRQ()). Where appropriate, an arch may override the generic storage facility and do something different with the variable. On FRV, for instance, the address is maintained in GR28 at all times inside the kernel as part of general exception handling. Having looked over the code, it appears that the parameter may be handed down through up to twenty or so layers of functions. Consider a USB character device attached to a USB hub, attached to a USB controller that posts its interrupts through a cascaded auxiliary interrupt controller. A character device driver may want to pass regs to the sysrq handler through the input layer which adds another few layers of parameter passing. I've build this code with allyesconfig for x86_64 and i386. I've runtested the main part of the code on FRV and i386, though I can't test most of the drivers. I've also done partial conversion for powerpc and MIPS - these at least compile with minimal configurations. This will affect all archs. Mostly the changes should be relatively easy. Take do_IRQ(), store the regs pointer at the beginning, saving the old one: struct pt_regs *old_regs = set_irq_regs(regs); And put the old one back at the end: set_irq_regs(old_regs); Don't pass regs through to generic_handle_irq() or __do_IRQ(). In timer_interrupt(), this sort of change will be necessary: - update_process_times(user_mode(regs)); - profile_tick(CPU_PROFILING, regs); + update_process_times(user_mode(get_irq_regs())); + profile_tick(CPU_PROFILING); I'd like to move update_process_times()'s use of get_irq_regs() into itself, except that i386, alone of the archs, uses something other than user_mode(). Some notes on the interrupt handling in the drivers: (*) input_dev() is now gone entirely. The regs pointer is no longer stored in the input_dev struct. (*) finish_unlinks() in drivers/usb/host/ohci-q.c needs checking. It does something different depending on whether it's been supplied with a regs pointer or not. (*) Various IRQ handler function pointers have been moved to type irq_handler_t. Signed-Off-By: David Howells <dhowells@redhat.com> (cherry picked from 1b16e7ac850969f38b375e511e3fa2f474a33867 commit)
2006-10-05 20:55:46 +07:00
void handle_sysrq(int key, struct tty_struct *tty)
{
if (sysrq_on())
__handle_sysrq(key, tty, 1);
}
EXPORT_SYMBOL(handle_sysrq);
#ifdef CONFIG_INPUT
/* Simple translation table for the SysRq keys */
static const unsigned char sysrq_xlate[KEY_MAX + 1] =
"\000\0331234567890-=\177\t" /* 0x00 - 0x0f */
"qwertyuiop[]\r\000as" /* 0x10 - 0x1f */
"dfghjkl;'`\000\\zxcv" /* 0x20 - 0x2f */
"bnm,./\000*\000 \000\201\202\203\204\205" /* 0x30 - 0x3f */
"\206\207\210\211\212\000\000789-456+1" /* 0x40 - 0x4f */
"230\177\000\000\213\214\000\000\000\000\000\000\000\000\000\000" /* 0x50 - 0x5f */
"\r\000/"; /* 0x60 - 0x6f */
static bool sysrq_down;
static int sysrq_alt_use;
static int sysrq_alt;
static bool sysrq_filter(struct input_handle *handle, unsigned int type,
unsigned int code, int value)
{
if (type != EV_KEY)
goto out;
switch (code) {
case KEY_LEFTALT:
case KEY_RIGHTALT:
if (value)
sysrq_alt = code;
else {
if (sysrq_down && code == sysrq_alt_use)
sysrq_down = false;
sysrq_alt = 0;
}
break;
case KEY_SYSRQ:
if (value == 1 && sysrq_alt) {
sysrq_down = true;
sysrq_alt_use = sysrq_alt;
}
break;
default:
if (sysrq_down && value && value != 2)
__handle_sysrq(sysrq_xlate[code], NULL, 1);
break;
}
out:
return sysrq_down;
}
static int sysrq_connect(struct input_handler *handler,
struct input_dev *dev,
const struct input_device_id *id)
{
struct input_handle *handle;
int error;
sysrq_down = false;
sysrq_alt = 0;
handle = kzalloc(sizeof(struct input_handle), GFP_KERNEL);
if (!handle)
return -ENOMEM;
handle->dev = dev;
handle->handler = handler;
handle->name = "sysrq";
error = input_register_handle(handle);
if (error) {
pr_err("Failed to register input sysrq handler, error %d\n",
error);
goto err_free;
}
error = input_open_device(handle);
if (error) {
pr_err("Failed to open input device, error %d\n", error);
goto err_unregister;
}
return 0;
err_unregister:
input_unregister_handle(handle);
err_free:
kfree(handle);
return error;
}
static void sysrq_disconnect(struct input_handle *handle)
{
input_close_device(handle);
input_unregister_handle(handle);
kfree(handle);
}
/*
* We are matching on KEY_LEFTALT insteard of KEY_SYSRQ because not all
* keyboards have SysRq ikey predefined and so user may add it to keymap
* later, but we expect all such keyboards to have left alt.
*/
static const struct input_device_id sysrq_ids[] = {
{
.flags = INPUT_DEVICE_ID_MATCH_EVBIT |
INPUT_DEVICE_ID_MATCH_KEYBIT,
.evbit = { BIT_MASK(EV_KEY) },
.keybit = { BIT_MASK(KEY_LEFTALT) },
},
{ },
};
static struct input_handler sysrq_handler = {
.filter = sysrq_filter,
.connect = sysrq_connect,
.disconnect = sysrq_disconnect,
.name = "sysrq",
.id_table = sysrq_ids,
};
static bool sysrq_handler_registered;
static inline void sysrq_register_handler(void)
{
int error;
error = input_register_handler(&sysrq_handler);
if (error)
pr_err("Failed to register input handler, error %d", error);
else
sysrq_handler_registered = true;
}
static inline void sysrq_unregister_handler(void)
{
if (sysrq_handler_registered) {
input_unregister_handler(&sysrq_handler);
sysrq_handler_registered = false;
}
}
#else
static inline void sysrq_register_handler(void)
{
}
static inline void sysrq_unregister_handler(void)
{
}
#endif /* CONFIG_INPUT */
int sysrq_toggle_support(int enable_mask)
{
bool was_enabled = sysrq_on();
sysrq_enabled = enable_mask;
if (was_enabled != sysrq_on()) {
if (sysrq_on())
sysrq_register_handler();
else
sysrq_unregister_handler();
}
return 0;
}
static int __sysrq_swap_key_ops(int key, struct sysrq_key_op *insert_op_p,
struct sysrq_key_op *remove_op_p)
{
int retval;
unsigned long flags;
spin_lock_irqsave(&sysrq_key_table_lock, flags);
if (__sysrq_get_key_op(key) == remove_op_p) {
__sysrq_put_key_op(key, insert_op_p);
retval = 0;
} else {
retval = -1;
}
spin_unlock_irqrestore(&sysrq_key_table_lock, flags);
return retval;
}
int register_sysrq_key(int key, struct sysrq_key_op *op_p)
{
return __sysrq_swap_key_ops(key, op_p, NULL);
}
EXPORT_SYMBOL(register_sysrq_key);
int unregister_sysrq_key(int key, struct sysrq_key_op *op_p)
{
return __sysrq_swap_key_ops(key, NULL, op_p);
}
EXPORT_SYMBOL(unregister_sysrq_key);
#ifdef CONFIG_PROC_FS
/*
* writing 'C' to /proc/sysrq-trigger is like sysrq-C
*/
static ssize_t write_sysrq_trigger(struct file *file, const char __user *buf,
size_t count, loff_t *ppos)
{
if (count) {
char c;
if (get_user(c, buf))
return -EFAULT;
__handle_sysrq(c, NULL, 0);
}
return count;
}
static const struct file_operations proc_sysrq_trigger_operations = {
.write = write_sysrq_trigger,
};
static void sysrq_init_procfs(void)
{
if (!proc_create("sysrq-trigger", S_IWUSR, NULL,
&proc_sysrq_trigger_operations))
pr_err("Failed to register proc interface\n");
}
#else
static inline void sysrq_init_procfs(void)
{
}
#endif /* CONFIG_PROC_FS */
static int __init sysrq_init(void)
{
sysrq_init_procfs();
if (sysrq_on())
sysrq_register_handler();
return 0;
}
module_init(sysrq_init);