linux_dsm_epyc7002/tools/perf/util/event.c
Namhyung Kim ba78c1c546 perf tools: Basic support for CGROUP event
Implement basic functionality to support cgroup tracking.  Each cgroup
can be identified by inode number which can be read from userspace too.
The actual cgroup processing will come in the later patch.

Reported-by: kernel test robot <rong.a.chen@intel.com>
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Cc: Adrian Hunter <adrian.hunter@intel.com>
[ fix perf test failure on sampling parsing ]
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lore.kernel.org/lkml/20200325124536.2800725-4-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2020-04-03 09:37:55 -03:00

672 lines
18 KiB
C

#include <errno.h>
#include <fcntl.h>
#include <inttypes.h>
#include <linux/kernel.h>
#include <linux/types.h>
#include <perf/cpumap.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <unistd.h>
#include <uapi/linux/mman.h> /* To get things like MAP_HUGETLB even on older libc headers */
#include <linux/perf_event.h>
#include <linux/zalloc.h>
#include "cpumap.h"
#include "dso.h"
#include "event.h"
#include "debug.h"
#include "hist.h"
#include "machine.h"
#include "sort.h"
#include "string2.h"
#include "strlist.h"
#include "thread.h"
#include "thread_map.h"
#include "time-utils.h"
#include <linux/ctype.h>
#include "map.h"
#include "util/namespaces.h"
#include "symbol.h"
#include "symbol/kallsyms.h"
#include "asm/bug.h"
#include "stat.h"
#include "session.h"
#include "bpf-event.h"
#include "tool.h"
#include "../perf.h"
static const char *perf_event__names[] = {
[0] = "TOTAL",
[PERF_RECORD_MMAP] = "MMAP",
[PERF_RECORD_MMAP2] = "MMAP2",
[PERF_RECORD_LOST] = "LOST",
[PERF_RECORD_COMM] = "COMM",
[PERF_RECORD_EXIT] = "EXIT",
[PERF_RECORD_THROTTLE] = "THROTTLE",
[PERF_RECORD_UNTHROTTLE] = "UNTHROTTLE",
[PERF_RECORD_FORK] = "FORK",
[PERF_RECORD_READ] = "READ",
[PERF_RECORD_SAMPLE] = "SAMPLE",
[PERF_RECORD_AUX] = "AUX",
[PERF_RECORD_ITRACE_START] = "ITRACE_START",
[PERF_RECORD_LOST_SAMPLES] = "LOST_SAMPLES",
[PERF_RECORD_SWITCH] = "SWITCH",
[PERF_RECORD_SWITCH_CPU_WIDE] = "SWITCH_CPU_WIDE",
[PERF_RECORD_NAMESPACES] = "NAMESPACES",
[PERF_RECORD_KSYMBOL] = "KSYMBOL",
[PERF_RECORD_BPF_EVENT] = "BPF_EVENT",
[PERF_RECORD_CGROUP] = "CGROUP",
[PERF_RECORD_HEADER_ATTR] = "ATTR",
[PERF_RECORD_HEADER_EVENT_TYPE] = "EVENT_TYPE",
[PERF_RECORD_HEADER_TRACING_DATA] = "TRACING_DATA",
[PERF_RECORD_HEADER_BUILD_ID] = "BUILD_ID",
[PERF_RECORD_FINISHED_ROUND] = "FINISHED_ROUND",
[PERF_RECORD_ID_INDEX] = "ID_INDEX",
[PERF_RECORD_AUXTRACE_INFO] = "AUXTRACE_INFO",
[PERF_RECORD_AUXTRACE] = "AUXTRACE",
[PERF_RECORD_AUXTRACE_ERROR] = "AUXTRACE_ERROR",
[PERF_RECORD_THREAD_MAP] = "THREAD_MAP",
[PERF_RECORD_CPU_MAP] = "CPU_MAP",
[PERF_RECORD_STAT_CONFIG] = "STAT_CONFIG",
[PERF_RECORD_STAT] = "STAT",
[PERF_RECORD_STAT_ROUND] = "STAT_ROUND",
[PERF_RECORD_EVENT_UPDATE] = "EVENT_UPDATE",
[PERF_RECORD_TIME_CONV] = "TIME_CONV",
[PERF_RECORD_HEADER_FEATURE] = "FEATURE",
[PERF_RECORD_COMPRESSED] = "COMPRESSED",
};
const char *perf_event__name(unsigned int id)
{
if (id >= ARRAY_SIZE(perf_event__names))
return "INVALID";
if (!perf_event__names[id])
return "UNKNOWN";
return perf_event__names[id];
}
struct process_symbol_args {
const char *name;
u64 start;
};
static int find_symbol_cb(void *arg, const char *name, char type,
u64 start)
{
struct process_symbol_args *args = arg;
/*
* Must be a function or at least an alias, as in PARISC64, where "_text" is
* an 'A' to the same address as "_stext".
*/
if (!(kallsyms__is_function(type) ||
type == 'A') || strcmp(name, args->name))
return 0;
args->start = start;
return 1;
}
int kallsyms__get_function_start(const char *kallsyms_filename,
const char *symbol_name, u64 *addr)
{
struct process_symbol_args args = { .name = symbol_name, };
if (kallsyms__parse(kallsyms_filename, &args, find_symbol_cb) <= 0)
return -1;
*addr = args.start;
return 0;
}
void perf_event__read_stat_config(struct perf_stat_config *config,
struct perf_record_stat_config *event)
{
unsigned i;
for (i = 0; i < event->nr; i++) {
switch (event->data[i].tag) {
#define CASE(__term, __val) \
case PERF_STAT_CONFIG_TERM__##__term: \
config->__val = event->data[i].val; \
break;
CASE(AGGR_MODE, aggr_mode)
CASE(SCALE, scale)
CASE(INTERVAL, interval)
#undef CASE
default:
pr_warning("unknown stat config term %" PRI_lu64 "\n",
event->data[i].tag);
}
}
}
size_t perf_event__fprintf_comm(union perf_event *event, FILE *fp)
{
const char *s;
if (event->header.misc & PERF_RECORD_MISC_COMM_EXEC)
s = " exec";
else
s = "";
return fprintf(fp, "%s: %s:%d/%d\n", s, event->comm.comm, event->comm.pid, event->comm.tid);
}
size_t perf_event__fprintf_namespaces(union perf_event *event, FILE *fp)
{
size_t ret = 0;
struct perf_ns_link_info *ns_link_info;
u32 nr_namespaces, idx;
ns_link_info = event->namespaces.link_info;
nr_namespaces = event->namespaces.nr_namespaces;
ret += fprintf(fp, " %d/%d - nr_namespaces: %u\n\t\t[",
event->namespaces.pid,
event->namespaces.tid,
nr_namespaces);
for (idx = 0; idx < nr_namespaces; idx++) {
if (idx && (idx % 4 == 0))
ret += fprintf(fp, "\n\t\t ");
ret += fprintf(fp, "%u/%s: %" PRIu64 "/%#" PRIx64 "%s", idx,
perf_ns__name(idx), (u64)ns_link_info[idx].dev,
(u64)ns_link_info[idx].ino,
((idx + 1) != nr_namespaces) ? ", " : "]\n");
}
return ret;
}
size_t perf_event__fprintf_cgroup(union perf_event *event, FILE *fp)
{
return fprintf(fp, " cgroup: %" PRI_lu64 " %s\n",
event->cgroup.id, event->cgroup.path);
}
int perf_event__process_comm(struct perf_tool *tool __maybe_unused,
union perf_event *event,
struct perf_sample *sample,
struct machine *machine)
{
return machine__process_comm_event(machine, event, sample);
}
int perf_event__process_namespaces(struct perf_tool *tool __maybe_unused,
union perf_event *event,
struct perf_sample *sample,
struct machine *machine)
{
return machine__process_namespaces_event(machine, event, sample);
}
int perf_event__process_cgroup(struct perf_tool *tool __maybe_unused,
union perf_event *event,
struct perf_sample *sample,
struct machine *machine)
{
return machine__process_cgroup_event(machine, event, sample);
}
int perf_event__process_lost(struct perf_tool *tool __maybe_unused,
union perf_event *event,
struct perf_sample *sample,
struct machine *machine)
{
return machine__process_lost_event(machine, event, sample);
}
int perf_event__process_aux(struct perf_tool *tool __maybe_unused,
union perf_event *event,
struct perf_sample *sample __maybe_unused,
struct machine *machine)
{
return machine__process_aux_event(machine, event);
}
int perf_event__process_itrace_start(struct perf_tool *tool __maybe_unused,
union perf_event *event,
struct perf_sample *sample __maybe_unused,
struct machine *machine)
{
return machine__process_itrace_start_event(machine, event);
}
int perf_event__process_lost_samples(struct perf_tool *tool __maybe_unused,
union perf_event *event,
struct perf_sample *sample,
struct machine *machine)
{
return machine__process_lost_samples_event(machine, event, sample);
}
int perf_event__process_switch(struct perf_tool *tool __maybe_unused,
union perf_event *event,
struct perf_sample *sample __maybe_unused,
struct machine *machine)
{
return machine__process_switch_event(machine, event);
}
int perf_event__process_ksymbol(struct perf_tool *tool __maybe_unused,
union perf_event *event,
struct perf_sample *sample __maybe_unused,
struct machine *machine)
{
return machine__process_ksymbol(machine, event, sample);
}
int perf_event__process_bpf(struct perf_tool *tool __maybe_unused,
union perf_event *event,
struct perf_sample *sample,
struct machine *machine)
{
return machine__process_bpf(machine, event, sample);
}
size_t perf_event__fprintf_mmap(union perf_event *event, FILE *fp)
{
return fprintf(fp, " %d/%d: [%#" PRI_lx64 "(%#" PRI_lx64 ") @ %#" PRI_lx64 "]: %c %s\n",
event->mmap.pid, event->mmap.tid, event->mmap.start,
event->mmap.len, event->mmap.pgoff,
(event->header.misc & PERF_RECORD_MISC_MMAP_DATA) ? 'r' : 'x',
event->mmap.filename);
}
size_t perf_event__fprintf_mmap2(union perf_event *event, FILE *fp)
{
return fprintf(fp, " %d/%d: [%#" PRI_lx64 "(%#" PRI_lx64 ") @ %#" PRI_lx64
" %02x:%02x %"PRI_lu64" %"PRI_lu64"]: %c%c%c%c %s\n",
event->mmap2.pid, event->mmap2.tid, event->mmap2.start,
event->mmap2.len, event->mmap2.pgoff, event->mmap2.maj,
event->mmap2.min, event->mmap2.ino,
event->mmap2.ino_generation,
(event->mmap2.prot & PROT_READ) ? 'r' : '-',
(event->mmap2.prot & PROT_WRITE) ? 'w' : '-',
(event->mmap2.prot & PROT_EXEC) ? 'x' : '-',
(event->mmap2.flags & MAP_SHARED) ? 's' : 'p',
event->mmap2.filename);
}
size_t perf_event__fprintf_thread_map(union perf_event *event, FILE *fp)
{
struct perf_thread_map *threads = thread_map__new_event(&event->thread_map);
size_t ret;
ret = fprintf(fp, " nr: ");
if (threads)
ret += thread_map__fprintf(threads, fp);
else
ret += fprintf(fp, "failed to get threads from event\n");
perf_thread_map__put(threads);
return ret;
}
size_t perf_event__fprintf_cpu_map(union perf_event *event, FILE *fp)
{
struct perf_cpu_map *cpus = cpu_map__new_data(&event->cpu_map.data);
size_t ret;
ret = fprintf(fp, ": ");
if (cpus)
ret += cpu_map__fprintf(cpus, fp);
else
ret += fprintf(fp, "failed to get cpumap from event\n");
perf_cpu_map__put(cpus);
return ret;
}
int perf_event__process_mmap(struct perf_tool *tool __maybe_unused,
union perf_event *event,
struct perf_sample *sample,
struct machine *machine)
{
return machine__process_mmap_event(machine, event, sample);
}
int perf_event__process_mmap2(struct perf_tool *tool __maybe_unused,
union perf_event *event,
struct perf_sample *sample,
struct machine *machine)
{
return machine__process_mmap2_event(machine, event, sample);
}
size_t perf_event__fprintf_task(union perf_event *event, FILE *fp)
{
return fprintf(fp, "(%d:%d):(%d:%d)\n",
event->fork.pid, event->fork.tid,
event->fork.ppid, event->fork.ptid);
}
int perf_event__process_fork(struct perf_tool *tool __maybe_unused,
union perf_event *event,
struct perf_sample *sample,
struct machine *machine)
{
return machine__process_fork_event(machine, event, sample);
}
int perf_event__process_exit(struct perf_tool *tool __maybe_unused,
union perf_event *event,
struct perf_sample *sample,
struct machine *machine)
{
return machine__process_exit_event(machine, event, sample);
}
size_t perf_event__fprintf_aux(union perf_event *event, FILE *fp)
{
return fprintf(fp, " offset: %#"PRI_lx64" size: %#"PRI_lx64" flags: %#"PRI_lx64" [%s%s%s]\n",
event->aux.aux_offset, event->aux.aux_size,
event->aux.flags,
event->aux.flags & PERF_AUX_FLAG_TRUNCATED ? "T" : "",
event->aux.flags & PERF_AUX_FLAG_OVERWRITE ? "O" : "",
event->aux.flags & PERF_AUX_FLAG_PARTIAL ? "P" : "");
}
size_t perf_event__fprintf_itrace_start(union perf_event *event, FILE *fp)
{
return fprintf(fp, " pid: %u tid: %u\n",
event->itrace_start.pid, event->itrace_start.tid);
}
size_t perf_event__fprintf_switch(union perf_event *event, FILE *fp)
{
bool out = event->header.misc & PERF_RECORD_MISC_SWITCH_OUT;
const char *in_out = !out ? "IN " :
!(event->header.misc & PERF_RECORD_MISC_SWITCH_OUT_PREEMPT) ?
"OUT " : "OUT preempt";
if (event->header.type == PERF_RECORD_SWITCH)
return fprintf(fp, " %s\n", in_out);
return fprintf(fp, " %s %s pid/tid: %5u/%-5u\n",
in_out, out ? "next" : "prev",
event->context_switch.next_prev_pid,
event->context_switch.next_prev_tid);
}
static size_t perf_event__fprintf_lost(union perf_event *event, FILE *fp)
{
return fprintf(fp, " lost %" PRI_lu64 "\n", event->lost.lost);
}
size_t perf_event__fprintf_ksymbol(union perf_event *event, FILE *fp)
{
return fprintf(fp, " addr %" PRI_lx64 " len %u type %u flags 0x%x name %s\n",
event->ksymbol.addr, event->ksymbol.len,
event->ksymbol.ksym_type,
event->ksymbol.flags, event->ksymbol.name);
}
size_t perf_event__fprintf_bpf(union perf_event *event, FILE *fp)
{
return fprintf(fp, " type %u, flags %u, id %u\n",
event->bpf.type, event->bpf.flags, event->bpf.id);
}
size_t perf_event__fprintf(union perf_event *event, FILE *fp)
{
size_t ret = fprintf(fp, "PERF_RECORD_%s",
perf_event__name(event->header.type));
switch (event->header.type) {
case PERF_RECORD_COMM:
ret += perf_event__fprintf_comm(event, fp);
break;
case PERF_RECORD_FORK:
case PERF_RECORD_EXIT:
ret += perf_event__fprintf_task(event, fp);
break;
case PERF_RECORD_MMAP:
ret += perf_event__fprintf_mmap(event, fp);
break;
case PERF_RECORD_NAMESPACES:
ret += perf_event__fprintf_namespaces(event, fp);
break;
case PERF_RECORD_CGROUP:
ret += perf_event__fprintf_cgroup(event, fp);
break;
case PERF_RECORD_MMAP2:
ret += perf_event__fprintf_mmap2(event, fp);
break;
case PERF_RECORD_AUX:
ret += perf_event__fprintf_aux(event, fp);
break;
case PERF_RECORD_ITRACE_START:
ret += perf_event__fprintf_itrace_start(event, fp);
break;
case PERF_RECORD_SWITCH:
case PERF_RECORD_SWITCH_CPU_WIDE:
ret += perf_event__fprintf_switch(event, fp);
break;
case PERF_RECORD_LOST:
ret += perf_event__fprintf_lost(event, fp);
break;
case PERF_RECORD_KSYMBOL:
ret += perf_event__fprintf_ksymbol(event, fp);
break;
case PERF_RECORD_BPF_EVENT:
ret += perf_event__fprintf_bpf(event, fp);
break;
default:
ret += fprintf(fp, "\n");
}
return ret;
}
int perf_event__process(struct perf_tool *tool __maybe_unused,
union perf_event *event,
struct perf_sample *sample,
struct machine *machine)
{
return machine__process_event(machine, event, sample);
}
struct map *thread__find_map(struct thread *thread, u8 cpumode, u64 addr,
struct addr_location *al)
{
struct maps *maps = thread->maps;
struct machine *machine = maps->machine;
bool load_map = false;
al->maps = maps;
al->thread = thread;
al->addr = addr;
al->cpumode = cpumode;
al->filtered = 0;
if (machine == NULL) {
al->map = NULL;
return NULL;
}
if (cpumode == PERF_RECORD_MISC_KERNEL && perf_host) {
al->level = 'k';
al->maps = maps = &machine->kmaps;
load_map = true;
} else if (cpumode == PERF_RECORD_MISC_USER && perf_host) {
al->level = '.';
} else if (cpumode == PERF_RECORD_MISC_GUEST_KERNEL && perf_guest) {
al->level = 'g';
al->maps = maps = &machine->kmaps;
load_map = true;
} else if (cpumode == PERF_RECORD_MISC_GUEST_USER && perf_guest) {
al->level = 'u';
} else {
al->level = 'H';
al->map = NULL;
if ((cpumode == PERF_RECORD_MISC_GUEST_USER ||
cpumode == PERF_RECORD_MISC_GUEST_KERNEL) &&
!perf_guest)
al->filtered |= (1 << HIST_FILTER__GUEST);
if ((cpumode == PERF_RECORD_MISC_USER ||
cpumode == PERF_RECORD_MISC_KERNEL) &&
!perf_host)
al->filtered |= (1 << HIST_FILTER__HOST);
return NULL;
}
al->map = maps__find(maps, al->addr);
if (al->map != NULL) {
/*
* Kernel maps might be changed when loading symbols so loading
* must be done prior to using kernel maps.
*/
if (load_map)
map__load(al->map);
al->addr = al->map->map_ip(al->map, al->addr);
}
return al->map;
}
/*
* For branch stacks or branch samples, the sample cpumode might not be correct
* because it applies only to the sample 'ip' and not necessary to 'addr' or
* branch stack addresses. If possible, use a fallback to deal with those cases.
*/
struct map *thread__find_map_fb(struct thread *thread, u8 cpumode, u64 addr,
struct addr_location *al)
{
struct map *map = thread__find_map(thread, cpumode, addr, al);
struct machine *machine = thread->maps->machine;
u8 addr_cpumode = machine__addr_cpumode(machine, cpumode, addr);
if (map || addr_cpumode == cpumode)
return map;
return thread__find_map(thread, addr_cpumode, addr, al);
}
struct symbol *thread__find_symbol(struct thread *thread, u8 cpumode,
u64 addr, struct addr_location *al)
{
al->sym = NULL;
if (thread__find_map(thread, cpumode, addr, al))
al->sym = map__find_symbol(al->map, al->addr);
return al->sym;
}
struct symbol *thread__find_symbol_fb(struct thread *thread, u8 cpumode,
u64 addr, struct addr_location *al)
{
al->sym = NULL;
if (thread__find_map_fb(thread, cpumode, addr, al))
al->sym = map__find_symbol(al->map, al->addr);
return al->sym;
}
/*
* Callers need to drop the reference to al->thread, obtained in
* machine__findnew_thread()
*/
int machine__resolve(struct machine *machine, struct addr_location *al,
struct perf_sample *sample)
{
struct thread *thread = machine__findnew_thread(machine, sample->pid,
sample->tid);
if (thread == NULL)
return -1;
dump_printf(" ... thread: %s:%d\n", thread__comm_str(thread), thread->tid);
thread__find_map(thread, sample->cpumode, sample->ip, al);
dump_printf(" ...... dso: %s\n",
al->map ? al->map->dso->long_name :
al->level == 'H' ? "[hypervisor]" : "<not found>");
if (thread__is_filtered(thread))
al->filtered |= (1 << HIST_FILTER__THREAD);
al->sym = NULL;
al->cpu = sample->cpu;
al->socket = -1;
al->srcline = NULL;
if (al->cpu >= 0) {
struct perf_env *env = machine->env;
if (env && env->cpu)
al->socket = env->cpu[al->cpu].socket_id;
}
if (al->map) {
struct dso *dso = al->map->dso;
if (symbol_conf.dso_list &&
(!dso || !(strlist__has_entry(symbol_conf.dso_list,
dso->short_name) ||
(dso->short_name != dso->long_name &&
strlist__has_entry(symbol_conf.dso_list,
dso->long_name))))) {
al->filtered |= (1 << HIST_FILTER__DSO);
}
al->sym = map__find_symbol(al->map, al->addr);
}
if (symbol_conf.sym_list &&
(!al->sym || !strlist__has_entry(symbol_conf.sym_list,
al->sym->name))) {
al->filtered |= (1 << HIST_FILTER__SYMBOL);
}
return 0;
}
/*
* The preprocess_sample method will return with reference counts for the
* in it, when done using (and perhaps getting ref counts if needing to
* keep a pointer to one of those entries) it must be paired with
* addr_location__put(), so that the refcounts can be decremented.
*/
void addr_location__put(struct addr_location *al)
{
thread__zput(al->thread);
}
bool is_bts_event(struct perf_event_attr *attr)
{
return attr->type == PERF_TYPE_HARDWARE &&
(attr->config & PERF_COUNT_HW_BRANCH_INSTRUCTIONS) &&
attr->sample_period == 1;
}
bool sample_addr_correlates_sym(struct perf_event_attr *attr)
{
if (attr->type == PERF_TYPE_SOFTWARE &&
(attr->config == PERF_COUNT_SW_PAGE_FAULTS ||
attr->config == PERF_COUNT_SW_PAGE_FAULTS_MIN ||
attr->config == PERF_COUNT_SW_PAGE_FAULTS_MAJ))
return true;
if (is_bts_event(attr))
return true;
return false;
}
void thread__resolve(struct thread *thread, struct addr_location *al,
struct perf_sample *sample)
{
thread__find_map_fb(thread, sample->cpumode, sample->addr, al);
al->cpu = sample->cpu;
al->sym = NULL;
if (al->map)
al->sym = map__find_symbol(al->map, al->addr);
}