2009-03-20 02:26:15 +07:00
|
|
|
/*
|
2010-03-05 11:35:37 +07:00
|
|
|
* trace event based perf event profiling/tracing
|
2009-03-20 02:26:15 +07:00
|
|
|
*
|
|
|
|
* Copyright (C) 2009 Red Hat Inc, Peter Zijlstra <pzijlstr@redhat.com>
|
2010-03-03 13:16:16 +07:00
|
|
|
* Copyright (C) 2009-2010 Frederic Weisbecker <fweisbec@gmail.com>
|
2009-03-20 02:26:15 +07:00
|
|
|
*/
|
|
|
|
|
2009-08-24 11:19:47 +07:00
|
|
|
#include <linux/module.h>
|
2010-01-28 08:32:29 +07:00
|
|
|
#include <linux/kprobes.h>
|
2009-03-20 02:26:15 +07:00
|
|
|
#include "trace.h"
|
|
|
|
|
2010-03-03 13:16:16 +07:00
|
|
|
DEFINE_PER_CPU(struct pt_regs, perf_trace_regs);
|
2010-03-11 14:30:35 +07:00
|
|
|
EXPORT_PER_CPU_SYMBOL_GPL(perf_trace_regs);
|
2009-09-18 11:10:28 +07:00
|
|
|
|
2010-03-16 07:05:02 +07:00
|
|
|
EXPORT_SYMBOL_GPL(perf_arch_fetch_caller_regs);
|
2009-09-18 11:10:28 +07:00
|
|
|
|
2010-01-28 08:32:29 +07:00
|
|
|
static char *perf_trace_buf;
|
|
|
|
static char *perf_trace_buf_nmi;
|
2009-09-18 11:10:28 +07:00
|
|
|
|
2010-03-23 06:08:59 +07:00
|
|
|
/*
|
|
|
|
* Force it to be aligned to unsigned long to avoid misaligned accesses
|
|
|
|
* suprises
|
|
|
|
*/
|
|
|
|
typedef typeof(unsigned long [PERF_MAX_TRACE_SIZE / sizeof(unsigned long)])
|
|
|
|
perf_trace_t;
|
2009-11-22 11:26:55 +07:00
|
|
|
|
2009-09-18 11:10:28 +07:00
|
|
|
/* Count the events in use (per event id, not per instance) */
|
2010-03-05 11:35:37 +07:00
|
|
|
static int total_ref_count;
|
2009-09-18 11:10:28 +07:00
|
|
|
|
2010-03-05 11:35:37 +07:00
|
|
|
static int perf_trace_event_enable(struct ftrace_event_call *event)
|
2009-09-18 05:54:43 +07:00
|
|
|
{
|
2009-11-22 11:26:55 +07:00
|
|
|
char *buf;
|
2009-09-18 11:10:28 +07:00
|
|
|
int ret = -ENOMEM;
|
|
|
|
|
2010-03-05 11:35:37 +07:00
|
|
|
if (event->perf_refcount++ > 0)
|
2009-09-18 05:54:43 +07:00
|
|
|
return 0;
|
|
|
|
|
2010-03-05 11:35:37 +07:00
|
|
|
if (!total_ref_count) {
|
2009-11-22 11:26:55 +07:00
|
|
|
buf = (char *)alloc_percpu(perf_trace_t);
|
2009-09-18 11:10:28 +07:00
|
|
|
if (!buf)
|
|
|
|
goto fail_buf;
|
|
|
|
|
tracing, perf_events: Protect the buffer from recursion in perf
While tracing using events with perf, if one enables the
lockdep:lock_acquire event, it will infect every other perf
trace events.
Basically, you can enable whatever set of trace events through
perf but if this event is part of the set, the only result we
can get is a long list of lock_acquire events of rcu read lock,
and only that.
This is because of a recursion inside perf.
1) When a trace event is triggered, it will fill a per cpu
buffer and submit it to perf.
2) Perf will commit this event but will also protect some data
using rcu_read_lock
3) A recursion appears: rcu_read_lock triggers a lock_acquire
event that will fill the per cpu event and then submit the
buffer to perf.
4) Perf detects a recursion and ignores it
5) Perf continues its work on the previous event, but its buffer
has been overwritten by the lock_acquire event, it has then
been turned into a lock_acquire event of rcu read lock
Such scenario also happens with lock_release with
rcu_read_unlock().
We could turn the rcu_read_lock() into __rcu_read_lock() to drop
the lock debugging from perf fast path, but that would make us
lose the rcu debugging and that doesn't prevent from other
possible kind of recursion from perf in the future.
This patch adds a recursion protection based on a counter on the
perf trace per cpu buffers to solve the problem.
-v2: Fixed lost whitespace, added reviewed-by tag
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Reviewed-by: Masami Hiramatsu <mhiramat@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Li Zefan <lizf@cn.fujitsu.com>
Cc: Jason Baron <jbaron@redhat.com>
LKML-Reference: <1257477185-7838-1-git-send-email-fweisbec@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-11-06 10:13:05 +07:00
|
|
|
rcu_assign_pointer(perf_trace_buf, buf);
|
2009-09-18 11:10:28 +07:00
|
|
|
|
2009-11-22 11:26:55 +07:00
|
|
|
buf = (char *)alloc_percpu(perf_trace_t);
|
2009-09-18 11:10:28 +07:00
|
|
|
if (!buf)
|
|
|
|
goto fail_buf_nmi;
|
|
|
|
|
tracing, perf_events: Protect the buffer from recursion in perf
While tracing using events with perf, if one enables the
lockdep:lock_acquire event, it will infect every other perf
trace events.
Basically, you can enable whatever set of trace events through
perf but if this event is part of the set, the only result we
can get is a long list of lock_acquire events of rcu read lock,
and only that.
This is because of a recursion inside perf.
1) When a trace event is triggered, it will fill a per cpu
buffer and submit it to perf.
2) Perf will commit this event but will also protect some data
using rcu_read_lock
3) A recursion appears: rcu_read_lock triggers a lock_acquire
event that will fill the per cpu event and then submit the
buffer to perf.
4) Perf detects a recursion and ignores it
5) Perf continues its work on the previous event, but its buffer
has been overwritten by the lock_acquire event, it has then
been turned into a lock_acquire event of rcu read lock
Such scenario also happens with lock_release with
rcu_read_unlock().
We could turn the rcu_read_lock() into __rcu_read_lock() to drop
the lock debugging from perf fast path, but that would make us
lose the rcu debugging and that doesn't prevent from other
possible kind of recursion from perf in the future.
This patch adds a recursion protection based on a counter on the
perf trace per cpu buffers to solve the problem.
-v2: Fixed lost whitespace, added reviewed-by tag
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Reviewed-by: Masami Hiramatsu <mhiramat@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Li Zefan <lizf@cn.fujitsu.com>
Cc: Jason Baron <jbaron@redhat.com>
LKML-Reference: <1257477185-7838-1-git-send-email-fweisbec@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-11-06 10:13:05 +07:00
|
|
|
rcu_assign_pointer(perf_trace_buf_nmi, buf);
|
2009-09-18 11:10:28 +07:00
|
|
|
}
|
|
|
|
|
2010-03-05 11:35:37 +07:00
|
|
|
ret = event->perf_event_enable(event);
|
2009-10-03 19:55:18 +07:00
|
|
|
if (!ret) {
|
2010-03-05 11:35:37 +07:00
|
|
|
total_ref_count++;
|
2009-09-18 11:10:28 +07:00
|
|
|
return 0;
|
2009-10-03 19:55:18 +07:00
|
|
|
}
|
2009-09-18 11:10:28 +07:00
|
|
|
|
|
|
|
fail_buf_nmi:
|
2010-03-05 11:35:37 +07:00
|
|
|
if (!total_ref_count) {
|
tracing, perf_events: Protect the buffer from recursion in perf
While tracing using events with perf, if one enables the
lockdep:lock_acquire event, it will infect every other perf
trace events.
Basically, you can enable whatever set of trace events through
perf but if this event is part of the set, the only result we
can get is a long list of lock_acquire events of rcu read lock,
and only that.
This is because of a recursion inside perf.
1) When a trace event is triggered, it will fill a per cpu
buffer and submit it to perf.
2) Perf will commit this event but will also protect some data
using rcu_read_lock
3) A recursion appears: rcu_read_lock triggers a lock_acquire
event that will fill the per cpu event and then submit the
buffer to perf.
4) Perf detects a recursion and ignores it
5) Perf continues its work on the previous event, but its buffer
has been overwritten by the lock_acquire event, it has then
been turned into a lock_acquire event of rcu read lock
Such scenario also happens with lock_release with
rcu_read_unlock().
We could turn the rcu_read_lock() into __rcu_read_lock() to drop
the lock debugging from perf fast path, but that would make us
lose the rcu debugging and that doesn't prevent from other
possible kind of recursion from perf in the future.
This patch adds a recursion protection based on a counter on the
perf trace per cpu buffers to solve the problem.
-v2: Fixed lost whitespace, added reviewed-by tag
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Reviewed-by: Masami Hiramatsu <mhiramat@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Li Zefan <lizf@cn.fujitsu.com>
Cc: Jason Baron <jbaron@redhat.com>
LKML-Reference: <1257477185-7838-1-git-send-email-fweisbec@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-11-06 10:13:05 +07:00
|
|
|
free_percpu(perf_trace_buf_nmi);
|
|
|
|
free_percpu(perf_trace_buf);
|
|
|
|
perf_trace_buf_nmi = NULL;
|
|
|
|
perf_trace_buf = NULL;
|
2009-10-03 19:55:18 +07:00
|
|
|
}
|
2009-09-18 11:10:28 +07:00
|
|
|
fail_buf:
|
2010-03-05 11:35:37 +07:00
|
|
|
event->perf_refcount--;
|
2009-09-18 11:10:28 +07:00
|
|
|
|
|
|
|
return ret;
|
2009-09-18 05:54:43 +07:00
|
|
|
}
|
|
|
|
|
2010-03-05 11:35:37 +07:00
|
|
|
int perf_trace_enable(int event_id)
|
2009-03-20 02:26:15 +07:00
|
|
|
{
|
|
|
|
struct ftrace_event_call *event;
|
2009-05-06 09:33:45 +07:00
|
|
|
int ret = -EINVAL;
|
2009-03-20 02:26:15 +07:00
|
|
|
|
2009-05-06 09:33:45 +07:00
|
|
|
mutex_lock(&event_mutex);
|
2009-04-11 00:52:20 +07:00
|
|
|
list_for_each_entry(event, &ftrace_events, list) {
|
2010-03-05 11:35:37 +07:00
|
|
|
if (event->id == event_id && event->perf_event_enable &&
|
2009-08-24 11:19:47 +07:00
|
|
|
try_module_get(event->mod)) {
|
2010-03-05 11:35:37 +07:00
|
|
|
ret = perf_trace_event_enable(event);
|
2009-05-06 09:33:45 +07:00
|
|
|
break;
|
|
|
|
}
|
2009-03-20 02:26:15 +07:00
|
|
|
}
|
2009-05-06 09:33:45 +07:00
|
|
|
mutex_unlock(&event_mutex);
|
2009-03-20 02:26:15 +07:00
|
|
|
|
2009-05-06 09:33:45 +07:00
|
|
|
return ret;
|
2009-03-20 02:26:15 +07:00
|
|
|
}
|
|
|
|
|
2010-03-05 11:35:37 +07:00
|
|
|
static void perf_trace_event_disable(struct ftrace_event_call *event)
|
2009-09-18 05:54:43 +07:00
|
|
|
{
|
2009-11-22 11:26:55 +07:00
|
|
|
char *buf, *nmi_buf;
|
2009-09-18 11:10:28 +07:00
|
|
|
|
2010-03-05 11:35:37 +07:00
|
|
|
if (--event->perf_refcount > 0)
|
2009-09-18 05:54:43 +07:00
|
|
|
return;
|
|
|
|
|
2010-03-05 11:35:37 +07:00
|
|
|
event->perf_event_disable(event);
|
2009-09-18 11:10:28 +07:00
|
|
|
|
2010-03-05 11:35:37 +07:00
|
|
|
if (!--total_ref_count) {
|
tracing, perf_events: Protect the buffer from recursion in perf
While tracing using events with perf, if one enables the
lockdep:lock_acquire event, it will infect every other perf
trace events.
Basically, you can enable whatever set of trace events through
perf but if this event is part of the set, the only result we
can get is a long list of lock_acquire events of rcu read lock,
and only that.
This is because of a recursion inside perf.
1) When a trace event is triggered, it will fill a per cpu
buffer and submit it to perf.
2) Perf will commit this event but will also protect some data
using rcu_read_lock
3) A recursion appears: rcu_read_lock triggers a lock_acquire
event that will fill the per cpu event and then submit the
buffer to perf.
4) Perf detects a recursion and ignores it
5) Perf continues its work on the previous event, but its buffer
has been overwritten by the lock_acquire event, it has then
been turned into a lock_acquire event of rcu read lock
Such scenario also happens with lock_release with
rcu_read_unlock().
We could turn the rcu_read_lock() into __rcu_read_lock() to drop
the lock debugging from perf fast path, but that would make us
lose the rcu debugging and that doesn't prevent from other
possible kind of recursion from perf in the future.
This patch adds a recursion protection based on a counter on the
perf trace per cpu buffers to solve the problem.
-v2: Fixed lost whitespace, added reviewed-by tag
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Reviewed-by: Masami Hiramatsu <mhiramat@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Li Zefan <lizf@cn.fujitsu.com>
Cc: Jason Baron <jbaron@redhat.com>
LKML-Reference: <1257477185-7838-1-git-send-email-fweisbec@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-11-06 10:13:05 +07:00
|
|
|
buf = perf_trace_buf;
|
|
|
|
rcu_assign_pointer(perf_trace_buf, NULL);
|
2009-09-18 11:10:28 +07:00
|
|
|
|
tracing, perf_events: Protect the buffer from recursion in perf
While tracing using events with perf, if one enables the
lockdep:lock_acquire event, it will infect every other perf
trace events.
Basically, you can enable whatever set of trace events through
perf but if this event is part of the set, the only result we
can get is a long list of lock_acquire events of rcu read lock,
and only that.
This is because of a recursion inside perf.
1) When a trace event is triggered, it will fill a per cpu
buffer and submit it to perf.
2) Perf will commit this event but will also protect some data
using rcu_read_lock
3) A recursion appears: rcu_read_lock triggers a lock_acquire
event that will fill the per cpu event and then submit the
buffer to perf.
4) Perf detects a recursion and ignores it
5) Perf continues its work on the previous event, but its buffer
has been overwritten by the lock_acquire event, it has then
been turned into a lock_acquire event of rcu read lock
Such scenario also happens with lock_release with
rcu_read_unlock().
We could turn the rcu_read_lock() into __rcu_read_lock() to drop
the lock debugging from perf fast path, but that would make us
lose the rcu debugging and that doesn't prevent from other
possible kind of recursion from perf in the future.
This patch adds a recursion protection based on a counter on the
perf trace per cpu buffers to solve the problem.
-v2: Fixed lost whitespace, added reviewed-by tag
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Reviewed-by: Masami Hiramatsu <mhiramat@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Li Zefan <lizf@cn.fujitsu.com>
Cc: Jason Baron <jbaron@redhat.com>
LKML-Reference: <1257477185-7838-1-git-send-email-fweisbec@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-11-06 10:13:05 +07:00
|
|
|
nmi_buf = perf_trace_buf_nmi;
|
|
|
|
rcu_assign_pointer(perf_trace_buf_nmi, NULL);
|
2009-09-18 11:10:28 +07:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Ensure every events in profiling have finished before
|
|
|
|
* releasing the buffers
|
|
|
|
*/
|
|
|
|
synchronize_sched();
|
|
|
|
|
|
|
|
free_percpu(buf);
|
|
|
|
free_percpu(nmi_buf);
|
|
|
|
}
|
2009-09-18 05:54:43 +07:00
|
|
|
}
|
|
|
|
|
2010-03-05 11:35:37 +07:00
|
|
|
void perf_trace_disable(int event_id)
|
2009-03-20 02:26:15 +07:00
|
|
|
{
|
|
|
|
struct ftrace_event_call *event;
|
|
|
|
|
2009-05-06 09:33:45 +07:00
|
|
|
mutex_lock(&event_mutex);
|
2009-04-11 00:52:20 +07:00
|
|
|
list_for_each_entry(event, &ftrace_events, list) {
|
2009-05-06 09:33:45 +07:00
|
|
|
if (event->id == event_id) {
|
2010-03-05 11:35:37 +07:00
|
|
|
perf_trace_event_disable(event);
|
2009-08-24 11:19:47 +07:00
|
|
|
module_put(event->mod);
|
2009-05-06 09:33:45 +07:00
|
|
|
break;
|
|
|
|
}
|
2009-03-20 02:26:15 +07:00
|
|
|
}
|
2009-05-06 09:33:45 +07:00
|
|
|
mutex_unlock(&event_mutex);
|
2009-03-20 02:26:15 +07:00
|
|
|
}
|
2010-01-28 08:32:29 +07:00
|
|
|
|
2010-03-05 11:35:37 +07:00
|
|
|
__kprobes void *perf_trace_buf_prepare(int size, unsigned short type,
|
|
|
|
int *rctxp, unsigned long *irq_flags)
|
2010-01-28 08:32:29 +07:00
|
|
|
{
|
|
|
|
struct trace_entry *entry;
|
|
|
|
char *trace_buf, *raw_data;
|
|
|
|
int pc, cpu;
|
|
|
|
|
2010-03-23 06:08:59 +07:00
|
|
|
BUILD_BUG_ON(PERF_MAX_TRACE_SIZE % sizeof(unsigned long));
|
|
|
|
|
2010-01-28 08:32:29 +07:00
|
|
|
pc = preempt_count();
|
|
|
|
|
|
|
|
/* Protect the per cpu buffer, begin the rcu read side */
|
|
|
|
local_irq_save(*irq_flags);
|
|
|
|
|
|
|
|
*rctxp = perf_swevent_get_recursion_context();
|
|
|
|
if (*rctxp < 0)
|
|
|
|
goto err_recursion;
|
|
|
|
|
|
|
|
cpu = smp_processor_id();
|
|
|
|
|
|
|
|
if (in_nmi())
|
2010-03-04 08:50:18 +07:00
|
|
|
trace_buf = rcu_dereference_sched(perf_trace_buf_nmi);
|
2010-01-28 08:32:29 +07:00
|
|
|
else
|
2010-03-04 08:50:18 +07:00
|
|
|
trace_buf = rcu_dereference_sched(perf_trace_buf);
|
2010-01-28 08:32:29 +07:00
|
|
|
|
|
|
|
if (!trace_buf)
|
|
|
|
goto err;
|
|
|
|
|
|
|
|
raw_data = per_cpu_ptr(trace_buf, cpu);
|
|
|
|
|
|
|
|
/* zero the dead bytes from align to not leak stack to user */
|
2010-03-23 06:08:59 +07:00
|
|
|
memset(&raw_data[size - sizeof(u64)], 0, sizeof(u64));
|
2010-01-28 08:32:29 +07:00
|
|
|
|
|
|
|
entry = (struct trace_entry *)raw_data;
|
|
|
|
tracing_generic_entry_update(entry, *irq_flags, pc);
|
|
|
|
entry->type = type;
|
|
|
|
|
|
|
|
return raw_data;
|
|
|
|
err:
|
|
|
|
perf_swevent_put_recursion_context(*rctxp);
|
|
|
|
err_recursion:
|
|
|
|
local_irq_restore(*irq_flags);
|
|
|
|
return NULL;
|
|
|
|
}
|
2010-03-05 11:35:37 +07:00
|
|
|
EXPORT_SYMBOL_GPL(perf_trace_buf_prepare);
|