mirror of
https://github.com/AuxXxilium/linux_dsm_epyc7002.git
synced 2024-12-27 20:55:09 +07:00
1f35cd6538
Improve thread_stack__no_call_return() to better handle 'returns' that do not match the stack i.e. 'no call'. See code comments for details. The example below shows how retpolines are affected: Example: $ cat simple-retpoline.c __attribute__((noinline)) int bar(void) { return -1; } int foo(void) { return bar() + 1; } __attribute__((indirect_branch("thunk"))) int main() { int (*volatile fn)(void) = foo; fn(); return fn(); } $ gcc -ggdb3 -Wall -Wextra -O2 -o simple-retpoline simple-retpoline.c $ objdump -d simple-retpoline <SNIP> 0000000000001040 <main>: 1040: 48 83 ec 18 sub $0x18,%rsp 1044: 48 8d 05 25 01 00 00 lea 0x125(%rip),%rax # 1170 <foo> 104b: 48 89 44 24 08 mov %rax,0x8(%rsp) 1050: 48 8b 44 24 08 mov 0x8(%rsp),%rax 1055: e8 1f 01 00 00 callq 1179 <__x86_indirect_thunk_rax> 105a: 48 8b 44 24 08 mov 0x8(%rsp),%rax 105f: 48 83 c4 18 add $0x18,%rsp 1063: e9 11 01 00 00 jmpq 1179 <__x86_indirect_thunk_rax> <SNIP> 0000000000001160 <bar>: 1160: b8 ff ff ff ff mov $0xffffffff,%eax 1165: c3 retq <SNIP> 0000000000001170 <foo>: 1170: e8 eb ff ff ff callq 1160 <bar> 1175: 83 c0 01 add $0x1,%eax 1178: c3 retq 0000000000001179 <__x86_indirect_thunk_rax>: 1179: e8 07 00 00 00 callq 1185 <__x86_indirect_thunk_rax+0xc> 117e: f3 90 pause 1180: 0f ae e8 lfence 1183: eb f9 jmp 117e <__x86_indirect_thunk_rax+0x5> 1185: 48 89 04 24 mov %rax,(%rsp) 1189: c3 retq <SNIP> $ perf record -o simple-retpoline.perf.data -e intel_pt/cyc/u ./simple-retpoline [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0,017 MB simple-retpoline.perf.data ] $ perf script -i simple-retpoline.perf.data --itrace=be -s ~/libexec/perf-core/scripts/python/export-to-sqlite.py simple-retpoline.db branches calls 2019-01-08 14:03:37.851655 Creating database... 2019-01-08 14:03:37.863256 Writing records... 2019-01-08 14:03:38.069750 Adding indexes 2019-01-08 14:03:38.078799 Done $ ~/libexec/perf-core/scripts/python/exported-sql-viewer.py simple-retpoline.db Before: main -> __x86_indirect_thunk_rax -> __x86_indirect_thunk_rax -> __x86_indirect_thunk_rax -> bar After: main -> __x86_indirect_thunk_rax -> __x86_indirect_thunk_rax -> foo -> bar Committer testing: Chose "Reports", Then "Context-Sensitive Call Graph" and then go on expanding: Before: simple-retpolin PID:PID _start _start __libc_start_main main __x86_indirect_thunk_rax __x86_indirect_thunk_rax bar After: Remove the "simple.retpoline.db" file, run again the 'perf script' line to regenerate the .db file and run the exported-sql-viewer.py again to get the same all the way to 'main', then, from there, including 'main': main __x86_indirect_thunk_rax __x86_indirect_thunk_rax foo bar Signed-off-by: Adrian Hunter <adrian.hunter@intel.com> Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com> Acked-by: Jiri Olsa <jolsa@kernel.org> Link: http://lkml.kernel.org/r/20190109091835.5570-6-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
842 lines
19 KiB
C
842 lines
19 KiB
C
/*
|
|
* thread-stack.c: Synthesize a thread's stack using call / return events
|
|
* Copyright (c) 2014, Intel Corporation.
|
|
*
|
|
* This program is free software; you can redistribute it and/or modify it
|
|
* under the terms and conditions of the GNU General Public License,
|
|
* version 2, as published by the Free Software Foundation.
|
|
*
|
|
* This program is distributed in the hope it will be useful, but WITHOUT
|
|
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
|
* more details.
|
|
*
|
|
*/
|
|
|
|
#include <linux/rbtree.h>
|
|
#include <linux/list.h>
|
|
#include <linux/log2.h>
|
|
#include <errno.h>
|
|
#include "thread.h"
|
|
#include "event.h"
|
|
#include "machine.h"
|
|
#include "util.h"
|
|
#include "debug.h"
|
|
#include "symbol.h"
|
|
#include "comm.h"
|
|
#include "call-path.h"
|
|
#include "thread-stack.h"
|
|
|
|
#define STACK_GROWTH 2048
|
|
|
|
/**
|
|
* struct thread_stack_entry - thread stack entry.
|
|
* @ret_addr: return address
|
|
* @timestamp: timestamp (if known)
|
|
* @ref: external reference (e.g. db_id of sample)
|
|
* @branch_count: the branch count when the entry was created
|
|
* @cp: call path
|
|
* @no_call: a 'call' was not seen
|
|
* @trace_end: a 'call' but trace ended
|
|
* @non_call: a branch but not a 'call' to the start of a different symbol
|
|
*/
|
|
struct thread_stack_entry {
|
|
u64 ret_addr;
|
|
u64 timestamp;
|
|
u64 ref;
|
|
u64 branch_count;
|
|
struct call_path *cp;
|
|
bool no_call;
|
|
bool trace_end;
|
|
bool non_call;
|
|
};
|
|
|
|
/**
|
|
* struct thread_stack - thread stack constructed from 'call' and 'return'
|
|
* branch samples.
|
|
* @stack: array that holds the stack
|
|
* @cnt: number of entries in the stack
|
|
* @sz: current maximum stack size
|
|
* @trace_nr: current trace number
|
|
* @branch_count: running branch count
|
|
* @kernel_start: kernel start address
|
|
* @last_time: last timestamp
|
|
* @crp: call/return processor
|
|
* @comm: current comm
|
|
* @arr_sz: size of array if this is the first element of an array
|
|
*/
|
|
struct thread_stack {
|
|
struct thread_stack_entry *stack;
|
|
size_t cnt;
|
|
size_t sz;
|
|
u64 trace_nr;
|
|
u64 branch_count;
|
|
u64 kernel_start;
|
|
u64 last_time;
|
|
struct call_return_processor *crp;
|
|
struct comm *comm;
|
|
unsigned int arr_sz;
|
|
};
|
|
|
|
/*
|
|
* Assume pid == tid == 0 identifies the idle task as defined by
|
|
* perf_session__register_idle_thread(). The idle task is really 1 task per cpu,
|
|
* and therefore requires a stack for each cpu.
|
|
*/
|
|
static inline bool thread_stack__per_cpu(struct thread *thread)
|
|
{
|
|
return !(thread->tid || thread->pid_);
|
|
}
|
|
|
|
static int thread_stack__grow(struct thread_stack *ts)
|
|
{
|
|
struct thread_stack_entry *new_stack;
|
|
size_t sz, new_sz;
|
|
|
|
new_sz = ts->sz + STACK_GROWTH;
|
|
sz = new_sz * sizeof(struct thread_stack_entry);
|
|
|
|
new_stack = realloc(ts->stack, sz);
|
|
if (!new_stack)
|
|
return -ENOMEM;
|
|
|
|
ts->stack = new_stack;
|
|
ts->sz = new_sz;
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int thread_stack__init(struct thread_stack *ts, struct thread *thread,
|
|
struct call_return_processor *crp)
|
|
{
|
|
int err;
|
|
|
|
err = thread_stack__grow(ts);
|
|
if (err)
|
|
return err;
|
|
|
|
if (thread->mg && thread->mg->machine)
|
|
ts->kernel_start = machine__kernel_start(thread->mg->machine);
|
|
else
|
|
ts->kernel_start = 1ULL << 63;
|
|
ts->crp = crp;
|
|
|
|
return 0;
|
|
}
|
|
|
|
static struct thread_stack *thread_stack__new(struct thread *thread, int cpu,
|
|
struct call_return_processor *crp)
|
|
{
|
|
struct thread_stack *ts = thread->ts, *new_ts;
|
|
unsigned int old_sz = ts ? ts->arr_sz : 0;
|
|
unsigned int new_sz = 1;
|
|
|
|
if (thread_stack__per_cpu(thread) && cpu > 0)
|
|
new_sz = roundup_pow_of_two(cpu + 1);
|
|
|
|
if (!ts || new_sz > old_sz) {
|
|
new_ts = calloc(new_sz, sizeof(*ts));
|
|
if (!new_ts)
|
|
return NULL;
|
|
if (ts)
|
|
memcpy(new_ts, ts, old_sz * sizeof(*ts));
|
|
new_ts->arr_sz = new_sz;
|
|
zfree(&thread->ts);
|
|
thread->ts = new_ts;
|
|
ts = new_ts;
|
|
}
|
|
|
|
if (thread_stack__per_cpu(thread) && cpu > 0 &&
|
|
(unsigned int)cpu < ts->arr_sz)
|
|
ts += cpu;
|
|
|
|
if (!ts->stack &&
|
|
thread_stack__init(ts, thread, crp))
|
|
return NULL;
|
|
|
|
return ts;
|
|
}
|
|
|
|
static struct thread_stack *thread__cpu_stack(struct thread *thread, int cpu)
|
|
{
|
|
struct thread_stack *ts = thread->ts;
|
|
|
|
if (cpu < 0)
|
|
cpu = 0;
|
|
|
|
if (!ts || (unsigned int)cpu >= ts->arr_sz)
|
|
return NULL;
|
|
|
|
ts += cpu;
|
|
|
|
if (!ts->stack)
|
|
return NULL;
|
|
|
|
return ts;
|
|
}
|
|
|
|
static inline struct thread_stack *thread__stack(struct thread *thread,
|
|
int cpu)
|
|
{
|
|
if (!thread)
|
|
return NULL;
|
|
|
|
if (thread_stack__per_cpu(thread))
|
|
return thread__cpu_stack(thread, cpu);
|
|
|
|
return thread->ts;
|
|
}
|
|
|
|
static int thread_stack__push(struct thread_stack *ts, u64 ret_addr,
|
|
bool trace_end)
|
|
{
|
|
int err = 0;
|
|
|
|
if (ts->cnt == ts->sz) {
|
|
err = thread_stack__grow(ts);
|
|
if (err) {
|
|
pr_warning("Out of memory: discarding thread stack\n");
|
|
ts->cnt = 0;
|
|
}
|
|
}
|
|
|
|
ts->stack[ts->cnt].trace_end = trace_end;
|
|
ts->stack[ts->cnt++].ret_addr = ret_addr;
|
|
|
|
return err;
|
|
}
|
|
|
|
static void thread_stack__pop(struct thread_stack *ts, u64 ret_addr)
|
|
{
|
|
size_t i;
|
|
|
|
/*
|
|
* In some cases there may be functions which are not seen to return.
|
|
* For example when setjmp / longjmp has been used. Or the perf context
|
|
* switch in the kernel which doesn't stop and start tracing in exactly
|
|
* the same code path. When that happens the return address will be
|
|
* further down the stack. If the return address is not found at all,
|
|
* we assume the opposite (i.e. this is a return for a call that wasn't
|
|
* seen for some reason) and leave the stack alone.
|
|
*/
|
|
for (i = ts->cnt; i; ) {
|
|
if (ts->stack[--i].ret_addr == ret_addr) {
|
|
ts->cnt = i;
|
|
return;
|
|
}
|
|
}
|
|
}
|
|
|
|
static void thread_stack__pop_trace_end(struct thread_stack *ts)
|
|
{
|
|
size_t i;
|
|
|
|
for (i = ts->cnt; i; ) {
|
|
if (ts->stack[--i].trace_end)
|
|
ts->cnt = i;
|
|
else
|
|
return;
|
|
}
|
|
}
|
|
|
|
static bool thread_stack__in_kernel(struct thread_stack *ts)
|
|
{
|
|
if (!ts->cnt)
|
|
return false;
|
|
|
|
return ts->stack[ts->cnt - 1].cp->in_kernel;
|
|
}
|
|
|
|
static int thread_stack__call_return(struct thread *thread,
|
|
struct thread_stack *ts, size_t idx,
|
|
u64 timestamp, u64 ref, bool no_return)
|
|
{
|
|
struct call_return_processor *crp = ts->crp;
|
|
struct thread_stack_entry *tse;
|
|
struct call_return cr = {
|
|
.thread = thread,
|
|
.comm = ts->comm,
|
|
.db_id = 0,
|
|
};
|
|
|
|
tse = &ts->stack[idx];
|
|
cr.cp = tse->cp;
|
|
cr.call_time = tse->timestamp;
|
|
cr.return_time = timestamp;
|
|
cr.branch_count = ts->branch_count - tse->branch_count;
|
|
cr.call_ref = tse->ref;
|
|
cr.return_ref = ref;
|
|
if (tse->no_call)
|
|
cr.flags |= CALL_RETURN_NO_CALL;
|
|
if (no_return)
|
|
cr.flags |= CALL_RETURN_NO_RETURN;
|
|
if (tse->non_call)
|
|
cr.flags |= CALL_RETURN_NON_CALL;
|
|
|
|
return crp->process(&cr, crp->data);
|
|
}
|
|
|
|
static int __thread_stack__flush(struct thread *thread, struct thread_stack *ts)
|
|
{
|
|
struct call_return_processor *crp = ts->crp;
|
|
int err;
|
|
|
|
if (!crp) {
|
|
ts->cnt = 0;
|
|
return 0;
|
|
}
|
|
|
|
while (ts->cnt) {
|
|
err = thread_stack__call_return(thread, ts, --ts->cnt,
|
|
ts->last_time, 0, true);
|
|
if (err) {
|
|
pr_err("Error flushing thread stack!\n");
|
|
ts->cnt = 0;
|
|
return err;
|
|
}
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
int thread_stack__flush(struct thread *thread)
|
|
{
|
|
struct thread_stack *ts = thread->ts;
|
|
unsigned int pos;
|
|
int err = 0;
|
|
|
|
if (ts) {
|
|
for (pos = 0; pos < ts->arr_sz; pos++) {
|
|
int ret = __thread_stack__flush(thread, ts + pos);
|
|
|
|
if (ret)
|
|
err = ret;
|
|
}
|
|
}
|
|
|
|
return err;
|
|
}
|
|
|
|
int thread_stack__event(struct thread *thread, int cpu, u32 flags, u64 from_ip,
|
|
u64 to_ip, u16 insn_len, u64 trace_nr)
|
|
{
|
|
struct thread_stack *ts = thread__stack(thread, cpu);
|
|
|
|
if (!thread)
|
|
return -EINVAL;
|
|
|
|
if (!ts) {
|
|
ts = thread_stack__new(thread, cpu, NULL);
|
|
if (!ts) {
|
|
pr_warning("Out of memory: no thread stack\n");
|
|
return -ENOMEM;
|
|
}
|
|
ts->trace_nr = trace_nr;
|
|
}
|
|
|
|
/*
|
|
* When the trace is discontinuous, the trace_nr changes. In that case
|
|
* the stack might be completely invalid. Better to report nothing than
|
|
* to report something misleading, so flush the stack.
|
|
*/
|
|
if (trace_nr != ts->trace_nr) {
|
|
if (ts->trace_nr)
|
|
__thread_stack__flush(thread, ts);
|
|
ts->trace_nr = trace_nr;
|
|
}
|
|
|
|
/* Stop here if thread_stack__process() is in use */
|
|
if (ts->crp)
|
|
return 0;
|
|
|
|
if (flags & PERF_IP_FLAG_CALL) {
|
|
u64 ret_addr;
|
|
|
|
if (!to_ip)
|
|
return 0;
|
|
ret_addr = from_ip + insn_len;
|
|
if (ret_addr == to_ip)
|
|
return 0; /* Zero-length calls are excluded */
|
|
return thread_stack__push(ts, ret_addr,
|
|
flags & PERF_IP_FLAG_TRACE_END);
|
|
} else if (flags & PERF_IP_FLAG_TRACE_BEGIN) {
|
|
/*
|
|
* If the caller did not change the trace number (which would
|
|
* have flushed the stack) then try to make sense of the stack.
|
|
* Possibly, tracing began after returning to the current
|
|
* address, so try to pop that. Also, do not expect a call made
|
|
* when the trace ended, to return, so pop that.
|
|
*/
|
|
thread_stack__pop(ts, to_ip);
|
|
thread_stack__pop_trace_end(ts);
|
|
} else if ((flags & PERF_IP_FLAG_RETURN) && from_ip) {
|
|
thread_stack__pop(ts, to_ip);
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
void thread_stack__set_trace_nr(struct thread *thread, int cpu, u64 trace_nr)
|
|
{
|
|
struct thread_stack *ts = thread__stack(thread, cpu);
|
|
|
|
if (!ts)
|
|
return;
|
|
|
|
if (trace_nr != ts->trace_nr) {
|
|
if (ts->trace_nr)
|
|
__thread_stack__flush(thread, ts);
|
|
ts->trace_nr = trace_nr;
|
|
}
|
|
}
|
|
|
|
static void __thread_stack__free(struct thread *thread, struct thread_stack *ts)
|
|
{
|
|
__thread_stack__flush(thread, ts);
|
|
zfree(&ts->stack);
|
|
}
|
|
|
|
static void thread_stack__reset(struct thread *thread, struct thread_stack *ts)
|
|
{
|
|
unsigned int arr_sz = ts->arr_sz;
|
|
|
|
__thread_stack__free(thread, ts);
|
|
memset(ts, 0, sizeof(*ts));
|
|
ts->arr_sz = arr_sz;
|
|
}
|
|
|
|
void thread_stack__free(struct thread *thread)
|
|
{
|
|
struct thread_stack *ts = thread->ts;
|
|
unsigned int pos;
|
|
|
|
if (ts) {
|
|
for (pos = 0; pos < ts->arr_sz; pos++)
|
|
__thread_stack__free(thread, ts + pos);
|
|
zfree(&thread->ts);
|
|
}
|
|
}
|
|
|
|
static inline u64 callchain_context(u64 ip, u64 kernel_start)
|
|
{
|
|
return ip < kernel_start ? PERF_CONTEXT_USER : PERF_CONTEXT_KERNEL;
|
|
}
|
|
|
|
void thread_stack__sample(struct thread *thread, int cpu,
|
|
struct ip_callchain *chain,
|
|
size_t sz, u64 ip, u64 kernel_start)
|
|
{
|
|
struct thread_stack *ts = thread__stack(thread, cpu);
|
|
u64 context = callchain_context(ip, kernel_start);
|
|
u64 last_context;
|
|
size_t i, j;
|
|
|
|
if (sz < 2) {
|
|
chain->nr = 0;
|
|
return;
|
|
}
|
|
|
|
chain->ips[0] = context;
|
|
chain->ips[1] = ip;
|
|
|
|
if (!ts) {
|
|
chain->nr = 2;
|
|
return;
|
|
}
|
|
|
|
last_context = context;
|
|
|
|
for (i = 2, j = 1; i < sz && j <= ts->cnt; i++, j++) {
|
|
ip = ts->stack[ts->cnt - j].ret_addr;
|
|
context = callchain_context(ip, kernel_start);
|
|
if (context != last_context) {
|
|
if (i >= sz - 1)
|
|
break;
|
|
chain->ips[i++] = context;
|
|
last_context = context;
|
|
}
|
|
chain->ips[i] = ip;
|
|
}
|
|
|
|
chain->nr = i;
|
|
}
|
|
|
|
struct call_return_processor *
|
|
call_return_processor__new(int (*process)(struct call_return *cr, void *data),
|
|
void *data)
|
|
{
|
|
struct call_return_processor *crp;
|
|
|
|
crp = zalloc(sizeof(struct call_return_processor));
|
|
if (!crp)
|
|
return NULL;
|
|
crp->cpr = call_path_root__new();
|
|
if (!crp->cpr)
|
|
goto out_free;
|
|
crp->process = process;
|
|
crp->data = data;
|
|
return crp;
|
|
|
|
out_free:
|
|
free(crp);
|
|
return NULL;
|
|
}
|
|
|
|
void call_return_processor__free(struct call_return_processor *crp)
|
|
{
|
|
if (crp) {
|
|
call_path_root__free(crp->cpr);
|
|
free(crp);
|
|
}
|
|
}
|
|
|
|
static int thread_stack__push_cp(struct thread_stack *ts, u64 ret_addr,
|
|
u64 timestamp, u64 ref, struct call_path *cp,
|
|
bool no_call, bool trace_end)
|
|
{
|
|
struct thread_stack_entry *tse;
|
|
int err;
|
|
|
|
if (!cp)
|
|
return -ENOMEM;
|
|
|
|
if (ts->cnt == ts->sz) {
|
|
err = thread_stack__grow(ts);
|
|
if (err)
|
|
return err;
|
|
}
|
|
|
|
tse = &ts->stack[ts->cnt++];
|
|
tse->ret_addr = ret_addr;
|
|
tse->timestamp = timestamp;
|
|
tse->ref = ref;
|
|
tse->branch_count = ts->branch_count;
|
|
tse->cp = cp;
|
|
tse->no_call = no_call;
|
|
tse->trace_end = trace_end;
|
|
tse->non_call = false;
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int thread_stack__pop_cp(struct thread *thread, struct thread_stack *ts,
|
|
u64 ret_addr, u64 timestamp, u64 ref,
|
|
struct symbol *sym)
|
|
{
|
|
int err;
|
|
|
|
if (!ts->cnt)
|
|
return 1;
|
|
|
|
if (ts->cnt == 1) {
|
|
struct thread_stack_entry *tse = &ts->stack[0];
|
|
|
|
if (tse->cp->sym == sym)
|
|
return thread_stack__call_return(thread, ts, --ts->cnt,
|
|
timestamp, ref, false);
|
|
}
|
|
|
|
if (ts->stack[ts->cnt - 1].ret_addr == ret_addr &&
|
|
!ts->stack[ts->cnt - 1].non_call) {
|
|
return thread_stack__call_return(thread, ts, --ts->cnt,
|
|
timestamp, ref, false);
|
|
} else {
|
|
size_t i = ts->cnt - 1;
|
|
|
|
while (i--) {
|
|
if (ts->stack[i].ret_addr != ret_addr ||
|
|
ts->stack[i].non_call)
|
|
continue;
|
|
i += 1;
|
|
while (ts->cnt > i) {
|
|
err = thread_stack__call_return(thread, ts,
|
|
--ts->cnt,
|
|
timestamp, ref,
|
|
true);
|
|
if (err)
|
|
return err;
|
|
}
|
|
return thread_stack__call_return(thread, ts, --ts->cnt,
|
|
timestamp, ref, false);
|
|
}
|
|
}
|
|
|
|
return 1;
|
|
}
|
|
|
|
static int thread_stack__bottom(struct thread_stack *ts,
|
|
struct perf_sample *sample,
|
|
struct addr_location *from_al,
|
|
struct addr_location *to_al, u64 ref)
|
|
{
|
|
struct call_path_root *cpr = ts->crp->cpr;
|
|
struct call_path *cp;
|
|
struct symbol *sym;
|
|
u64 ip;
|
|
|
|
if (sample->ip) {
|
|
ip = sample->ip;
|
|
sym = from_al->sym;
|
|
} else if (sample->addr) {
|
|
ip = sample->addr;
|
|
sym = to_al->sym;
|
|
} else {
|
|
return 0;
|
|
}
|
|
|
|
cp = call_path__findnew(cpr, &cpr->call_path, sym, ip,
|
|
ts->kernel_start);
|
|
|
|
return thread_stack__push_cp(ts, ip, sample->time, ref, cp,
|
|
true, false);
|
|
}
|
|
|
|
static int thread_stack__no_call_return(struct thread *thread,
|
|
struct thread_stack *ts,
|
|
struct perf_sample *sample,
|
|
struct addr_location *from_al,
|
|
struct addr_location *to_al, u64 ref)
|
|
{
|
|
struct call_path_root *cpr = ts->crp->cpr;
|
|
struct call_path *root = &cpr->call_path;
|
|
struct symbol *fsym = from_al->sym;
|
|
struct symbol *tsym = to_al->sym;
|
|
struct call_path *cp, *parent;
|
|
u64 ks = ts->kernel_start;
|
|
u64 addr = sample->addr;
|
|
u64 tm = sample->time;
|
|
u64 ip = sample->ip;
|
|
int err;
|
|
|
|
if (ip >= ks && addr < ks) {
|
|
/* Return to userspace, so pop all kernel addresses */
|
|
while (thread_stack__in_kernel(ts)) {
|
|
err = thread_stack__call_return(thread, ts, --ts->cnt,
|
|
tm, ref, true);
|
|
if (err)
|
|
return err;
|
|
}
|
|
|
|
/* If the stack is empty, push the userspace address */
|
|
if (!ts->cnt) {
|
|
cp = call_path__findnew(cpr, root, tsym, addr, ks);
|
|
return thread_stack__push_cp(ts, 0, tm, ref, cp, true,
|
|
false);
|
|
}
|
|
} else if (thread_stack__in_kernel(ts) && ip < ks) {
|
|
/* Return to userspace, so pop all kernel addresses */
|
|
while (thread_stack__in_kernel(ts)) {
|
|
err = thread_stack__call_return(thread, ts, --ts->cnt,
|
|
tm, ref, true);
|
|
if (err)
|
|
return err;
|
|
}
|
|
}
|
|
|
|
if (ts->cnt)
|
|
parent = ts->stack[ts->cnt - 1].cp;
|
|
else
|
|
parent = root;
|
|
|
|
if (parent->sym == from_al->sym) {
|
|
/*
|
|
* At the bottom of the stack, assume the missing 'call' was
|
|
* before the trace started. So, pop the current symbol and push
|
|
* the 'to' symbol.
|
|
*/
|
|
if (ts->cnt == 1) {
|
|
err = thread_stack__call_return(thread, ts, --ts->cnt,
|
|
tm, ref, false);
|
|
if (err)
|
|
return err;
|
|
}
|
|
|
|
if (!ts->cnt) {
|
|
cp = call_path__findnew(cpr, root, tsym, addr, ks);
|
|
|
|
return thread_stack__push_cp(ts, addr, tm, ref, cp,
|
|
true, false);
|
|
}
|
|
|
|
/*
|
|
* Otherwise assume the 'return' is being used as a jump (e.g.
|
|
* retpoline) and just push the 'to' symbol.
|
|
*/
|
|
cp = call_path__findnew(cpr, parent, tsym, addr, ks);
|
|
|
|
err = thread_stack__push_cp(ts, 0, tm, ref, cp, true, false);
|
|
if (!err)
|
|
ts->stack[ts->cnt - 1].non_call = true;
|
|
|
|
return err;
|
|
}
|
|
|
|
/*
|
|
* Assume 'parent' has not yet returned, so push 'to', and then push and
|
|
* pop 'from'.
|
|
*/
|
|
|
|
cp = call_path__findnew(cpr, parent, tsym, addr, ks);
|
|
|
|
err = thread_stack__push_cp(ts, addr, tm, ref, cp, true, false);
|
|
if (err)
|
|
return err;
|
|
|
|
cp = call_path__findnew(cpr, cp, fsym, ip, ks);
|
|
|
|
err = thread_stack__push_cp(ts, ip, tm, ref, cp, true, false);
|
|
if (err)
|
|
return err;
|
|
|
|
return thread_stack__call_return(thread, ts, --ts->cnt, tm, ref, false);
|
|
}
|
|
|
|
static int thread_stack__trace_begin(struct thread *thread,
|
|
struct thread_stack *ts, u64 timestamp,
|
|
u64 ref)
|
|
{
|
|
struct thread_stack_entry *tse;
|
|
int err;
|
|
|
|
if (!ts->cnt)
|
|
return 0;
|
|
|
|
/* Pop trace end */
|
|
tse = &ts->stack[ts->cnt - 1];
|
|
if (tse->trace_end) {
|
|
err = thread_stack__call_return(thread, ts, --ts->cnt,
|
|
timestamp, ref, false);
|
|
if (err)
|
|
return err;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int thread_stack__trace_end(struct thread_stack *ts,
|
|
struct perf_sample *sample, u64 ref)
|
|
{
|
|
struct call_path_root *cpr = ts->crp->cpr;
|
|
struct call_path *cp;
|
|
u64 ret_addr;
|
|
|
|
/* No point having 'trace end' on the bottom of the stack */
|
|
if (!ts->cnt || (ts->cnt == 1 && ts->stack[0].ref == ref))
|
|
return 0;
|
|
|
|
cp = call_path__findnew(cpr, ts->stack[ts->cnt - 1].cp, NULL, 0,
|
|
ts->kernel_start);
|
|
|
|
ret_addr = sample->ip + sample->insn_len;
|
|
|
|
return thread_stack__push_cp(ts, ret_addr, sample->time, ref, cp,
|
|
false, true);
|
|
}
|
|
|
|
int thread_stack__process(struct thread *thread, struct comm *comm,
|
|
struct perf_sample *sample,
|
|
struct addr_location *from_al,
|
|
struct addr_location *to_al, u64 ref,
|
|
struct call_return_processor *crp)
|
|
{
|
|
struct thread_stack *ts = thread__stack(thread, sample->cpu);
|
|
int err = 0;
|
|
|
|
if (ts && !ts->crp) {
|
|
/* Supersede thread_stack__event() */
|
|
thread_stack__reset(thread, ts);
|
|
ts = NULL;
|
|
}
|
|
|
|
if (!ts) {
|
|
ts = thread_stack__new(thread, sample->cpu, crp);
|
|
if (!ts)
|
|
return -ENOMEM;
|
|
ts->comm = comm;
|
|
}
|
|
|
|
/* Flush stack on exec */
|
|
if (ts->comm != comm && thread->pid_ == thread->tid) {
|
|
err = __thread_stack__flush(thread, ts);
|
|
if (err)
|
|
return err;
|
|
ts->comm = comm;
|
|
}
|
|
|
|
/* If the stack is empty, put the current symbol on the stack */
|
|
if (!ts->cnt) {
|
|
err = thread_stack__bottom(ts, sample, from_al, to_al, ref);
|
|
if (err)
|
|
return err;
|
|
}
|
|
|
|
ts->branch_count += 1;
|
|
ts->last_time = sample->time;
|
|
|
|
if (sample->flags & PERF_IP_FLAG_CALL) {
|
|
bool trace_end = sample->flags & PERF_IP_FLAG_TRACE_END;
|
|
struct call_path_root *cpr = ts->crp->cpr;
|
|
struct call_path *cp;
|
|
u64 ret_addr;
|
|
|
|
if (!sample->ip || !sample->addr)
|
|
return 0;
|
|
|
|
ret_addr = sample->ip + sample->insn_len;
|
|
if (ret_addr == sample->addr)
|
|
return 0; /* Zero-length calls are excluded */
|
|
|
|
cp = call_path__findnew(cpr, ts->stack[ts->cnt - 1].cp,
|
|
to_al->sym, sample->addr,
|
|
ts->kernel_start);
|
|
err = thread_stack__push_cp(ts, ret_addr, sample->time, ref,
|
|
cp, false, trace_end);
|
|
} else if (sample->flags & PERF_IP_FLAG_RETURN) {
|
|
if (!sample->ip || !sample->addr)
|
|
return 0;
|
|
|
|
err = thread_stack__pop_cp(thread, ts, sample->addr,
|
|
sample->time, ref, from_al->sym);
|
|
if (err) {
|
|
if (err < 0)
|
|
return err;
|
|
err = thread_stack__no_call_return(thread, ts, sample,
|
|
from_al, to_al, ref);
|
|
}
|
|
} else if (sample->flags & PERF_IP_FLAG_TRACE_BEGIN) {
|
|
err = thread_stack__trace_begin(thread, ts, sample->time, ref);
|
|
} else if (sample->flags & PERF_IP_FLAG_TRACE_END) {
|
|
err = thread_stack__trace_end(ts, sample, ref);
|
|
} else if (sample->flags & PERF_IP_FLAG_BRANCH &&
|
|
from_al->sym != to_al->sym && to_al->sym &&
|
|
to_al->addr == to_al->sym->start) {
|
|
struct call_path_root *cpr = ts->crp->cpr;
|
|
struct call_path *cp;
|
|
|
|
/*
|
|
* The compiler might optimize a call/ret combination by making
|
|
* it a jmp. Make that visible by recording on the stack a
|
|
* branch to the start of a different symbol. Note, that means
|
|
* when a ret pops the stack, all jmps must be popped off first.
|
|
*/
|
|
cp = call_path__findnew(cpr, ts->stack[ts->cnt - 1].cp,
|
|
to_al->sym, sample->addr,
|
|
ts->kernel_start);
|
|
err = thread_stack__push_cp(ts, 0, sample->time, ref, cp, false,
|
|
false);
|
|
if (!err)
|
|
ts->stack[ts->cnt - 1].non_call = true;
|
|
}
|
|
|
|
return err;
|
|
}
|
|
|
|
size_t thread_stack__depth(struct thread *thread, int cpu)
|
|
{
|
|
struct thread_stack *ts = thread__stack(thread, cpu);
|
|
|
|
if (!ts)
|
|
return 0;
|
|
return ts->cnt;
|
|
}
|