mirror of
https://github.com/AuxXxilium/linux_dsm_epyc7002.git
synced 2025-01-18 14:36:06 +07:00
0983cc0dbc
Trace events have a period (weight) of 1 by default. This can be
overriden on events definition by using the __perf_count() macro.
For example, the sched_stat_runtime() is weighted with the runtime of
the task that fired the event.
By default, perf handles such weighted event by dividing it into
individual events carrying a weight of 1. For example if
sched_stat_runtime is fired and the task has run 5000000 nsecs, perf
divides it into 5000000 events in the buffer.
This behaviour makes weighted events unusable because they quickly
fullfill the buffers and we lose most events.
The commit 5d81e5cfb3
("events: Don't
divide events if it has field period") solves this problem by sending
only one event when PERF_SAMPLE_PERIOD flag is set. The weight is
carried in the sample itself such that we don't need to demultiplex it
anymore.
This patch provides the last missing piece to use this feature by
setting PERF_SAMPLE_PERIOD from perf tools when we deal with trace
events.
Before:
$ ./perf record -e sched:* -a sleep 1
[ perf record: Woken up 3 times to write data ]
[ perf record: Captured and wrote 1.619 MB perf.data (~70749 samples) ]
Warning:
Processed 16909 events and lost 1 chunks!
Check IO/CPU overload!
$ ./perf script
perf 1894 [003] 824.898327: sched_migrate_task: comm=perf pid=1898 prio=120 orig_cpu=2 dest_cpu=0
perf 1894 [003] 824.898335: sched_stat_sleep: comm=perf pid=1898 delay=113179500 [ns]
perf 1894 [003] 824.898336: sched_stat_sleep: comm=perf pid=1898 delay=113179500 [ns]
perf 1894 [003] 824.898337: sched_stat_sleep: comm=perf pid=1898 delay=113179500 [ns]
perf 1894 [003] 824.898338: sched_stat_sleep: comm=perf pid=1898 delay=113179500 [ns]
perf 1894 [003] 824.898339: sched_stat_sleep: comm=perf pid=1898 delay=113179500 [ns]
perf 1894 [003] 824.898340: sched_stat_sleep: comm=perf pid=1898 delay=113179500 [ns]
perf 1894 [003] 824.898341: sched_stat_sleep: comm=perf pid=1898 delay=113179500 [ns]
[...]
After:
$ ./perf record -e sched:* -a sleep 1
[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 0.074 MB perf.data (~3228 samples) ]
$ ./perf script
perf 1461 [000] 554.286957: sched_migrate_task: comm=perf pid=1465 prio=120 orig_cpu=3 dest_cpu=1
perf 1461 [000] 554.286964: sched_stat_sleep: comm=perf pid=1465 delay=133047190 [ns]
perf 1461 [000] 554.286967: sched_wakeup: comm=perf pid=1465 prio=120 success=1 target_cpu=001
swapper 0 [001] 554.286976: sched_stat_wait: comm=perf pid=1465 delay=0 [ns]
swapper 0 [001] 554.286983: sched_switch: prev_comm=swapper/1 prev_pid=0 prev_prio=120 prev_state=R ==> next_comm=perf
[...]
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@gmail.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/1342631456-7233-1-git-send-email-fweisbec@gmail.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
884 lines
20 KiB
C
884 lines
20 KiB
C
/*
|
|
* Copyright (C) 2011, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
|
|
*
|
|
* Parts came from builtin-{top,stat,record}.c, see those files for further
|
|
* copyright notes.
|
|
*
|
|
* Released under the GPL v2. (and only v2, not any later version)
|
|
*/
|
|
#include "util.h"
|
|
#include "debugfs.h"
|
|
#include <poll.h>
|
|
#include "cpumap.h"
|
|
#include "thread_map.h"
|
|
#include "target.h"
|
|
#include "evlist.h"
|
|
#include "evsel.h"
|
|
#include <unistd.h>
|
|
|
|
#include "parse-events.h"
|
|
|
|
#include <sys/mman.h>
|
|
|
|
#include <linux/bitops.h>
|
|
#include <linux/hash.h>
|
|
|
|
#define FD(e, x, y) (*(int *)xyarray__entry(e->fd, x, y))
|
|
#define SID(e, x, y) xyarray__entry(e->sample_id, x, y)
|
|
|
|
void perf_evlist__init(struct perf_evlist *evlist, struct cpu_map *cpus,
|
|
struct thread_map *threads)
|
|
{
|
|
int i;
|
|
|
|
for (i = 0; i < PERF_EVLIST__HLIST_SIZE; ++i)
|
|
INIT_HLIST_HEAD(&evlist->heads[i]);
|
|
INIT_LIST_HEAD(&evlist->entries);
|
|
perf_evlist__set_maps(evlist, cpus, threads);
|
|
evlist->workload.pid = -1;
|
|
}
|
|
|
|
struct perf_evlist *perf_evlist__new(struct cpu_map *cpus,
|
|
struct thread_map *threads)
|
|
{
|
|
struct perf_evlist *evlist = zalloc(sizeof(*evlist));
|
|
|
|
if (evlist != NULL)
|
|
perf_evlist__init(evlist, cpus, threads);
|
|
|
|
return evlist;
|
|
}
|
|
|
|
void perf_evlist__config_attrs(struct perf_evlist *evlist,
|
|
struct perf_record_opts *opts)
|
|
{
|
|
struct perf_evsel *evsel, *first;
|
|
|
|
if (evlist->cpus->map[0] < 0)
|
|
opts->no_inherit = true;
|
|
|
|
first = list_entry(evlist->entries.next, struct perf_evsel, node);
|
|
|
|
list_for_each_entry(evsel, &evlist->entries, node) {
|
|
perf_evsel__config(evsel, opts, first);
|
|
|
|
if (evlist->nr_entries > 1)
|
|
evsel->attr.sample_type |= PERF_SAMPLE_ID;
|
|
}
|
|
}
|
|
|
|
static void perf_evlist__purge(struct perf_evlist *evlist)
|
|
{
|
|
struct perf_evsel *pos, *n;
|
|
|
|
list_for_each_entry_safe(pos, n, &evlist->entries, node) {
|
|
list_del_init(&pos->node);
|
|
perf_evsel__delete(pos);
|
|
}
|
|
|
|
evlist->nr_entries = 0;
|
|
}
|
|
|
|
void perf_evlist__exit(struct perf_evlist *evlist)
|
|
{
|
|
free(evlist->mmap);
|
|
free(evlist->pollfd);
|
|
evlist->mmap = NULL;
|
|
evlist->pollfd = NULL;
|
|
}
|
|
|
|
void perf_evlist__delete(struct perf_evlist *evlist)
|
|
{
|
|
perf_evlist__purge(evlist);
|
|
perf_evlist__exit(evlist);
|
|
free(evlist);
|
|
}
|
|
|
|
void perf_evlist__add(struct perf_evlist *evlist, struct perf_evsel *entry)
|
|
{
|
|
list_add_tail(&entry->node, &evlist->entries);
|
|
++evlist->nr_entries;
|
|
}
|
|
|
|
void perf_evlist__splice_list_tail(struct perf_evlist *evlist,
|
|
struct list_head *list,
|
|
int nr_entries)
|
|
{
|
|
list_splice_tail(list, &evlist->entries);
|
|
evlist->nr_entries += nr_entries;
|
|
}
|
|
|
|
int perf_evlist__add_default(struct perf_evlist *evlist)
|
|
{
|
|
struct perf_event_attr attr = {
|
|
.type = PERF_TYPE_HARDWARE,
|
|
.config = PERF_COUNT_HW_CPU_CYCLES,
|
|
};
|
|
struct perf_evsel *evsel;
|
|
|
|
event_attr_init(&attr);
|
|
|
|
evsel = perf_evsel__new(&attr, 0);
|
|
if (evsel == NULL)
|
|
goto error;
|
|
|
|
/* use strdup() because free(evsel) assumes name is allocated */
|
|
evsel->name = strdup("cycles");
|
|
if (!evsel->name)
|
|
goto error_free;
|
|
|
|
perf_evlist__add(evlist, evsel);
|
|
return 0;
|
|
error_free:
|
|
perf_evsel__delete(evsel);
|
|
error:
|
|
return -ENOMEM;
|
|
}
|
|
|
|
int perf_evlist__add_attrs(struct perf_evlist *evlist,
|
|
struct perf_event_attr *attrs, size_t nr_attrs)
|
|
{
|
|
struct perf_evsel *evsel, *n;
|
|
LIST_HEAD(head);
|
|
size_t i;
|
|
|
|
for (i = 0; i < nr_attrs; i++) {
|
|
evsel = perf_evsel__new(attrs + i, evlist->nr_entries + i);
|
|
if (evsel == NULL)
|
|
goto out_delete_partial_list;
|
|
list_add_tail(&evsel->node, &head);
|
|
}
|
|
|
|
perf_evlist__splice_list_tail(evlist, &head, nr_attrs);
|
|
|
|
return 0;
|
|
|
|
out_delete_partial_list:
|
|
list_for_each_entry_safe(evsel, n, &head, node)
|
|
perf_evsel__delete(evsel);
|
|
return -1;
|
|
}
|
|
|
|
int __perf_evlist__add_default_attrs(struct perf_evlist *evlist,
|
|
struct perf_event_attr *attrs, size_t nr_attrs)
|
|
{
|
|
size_t i;
|
|
|
|
for (i = 0; i < nr_attrs; i++)
|
|
event_attr_init(attrs + i);
|
|
|
|
return perf_evlist__add_attrs(evlist, attrs, nr_attrs);
|
|
}
|
|
|
|
static int trace_event__id(const char *evname)
|
|
{
|
|
char *filename, *colon;
|
|
int err = -1, fd;
|
|
|
|
if (asprintf(&filename, "%s/%s/id", tracing_events_path, evname) < 0)
|
|
return -1;
|
|
|
|
colon = strrchr(filename, ':');
|
|
if (colon != NULL)
|
|
*colon = '/';
|
|
|
|
fd = open(filename, O_RDONLY);
|
|
if (fd >= 0) {
|
|
char id[16];
|
|
if (read(fd, id, sizeof(id)) > 0)
|
|
err = atoi(id);
|
|
close(fd);
|
|
}
|
|
|
|
free(filename);
|
|
return err;
|
|
}
|
|
|
|
int perf_evlist__add_tracepoints(struct perf_evlist *evlist,
|
|
const char *tracepoints[],
|
|
size_t nr_tracepoints)
|
|
{
|
|
int err;
|
|
size_t i;
|
|
struct perf_event_attr *attrs = zalloc(nr_tracepoints * sizeof(*attrs));
|
|
|
|
if (attrs == NULL)
|
|
return -1;
|
|
|
|
for (i = 0; i < nr_tracepoints; i++) {
|
|
err = trace_event__id(tracepoints[i]);
|
|
|
|
if (err < 0)
|
|
goto out_free_attrs;
|
|
|
|
attrs[i].type = PERF_TYPE_TRACEPOINT;
|
|
attrs[i].config = err;
|
|
attrs[i].sample_type = (PERF_SAMPLE_RAW | PERF_SAMPLE_TIME |
|
|
PERF_SAMPLE_CPU | PERF_SAMPLE_PERIOD);
|
|
attrs[i].sample_period = 1;
|
|
}
|
|
|
|
err = perf_evlist__add_attrs(evlist, attrs, nr_tracepoints);
|
|
out_free_attrs:
|
|
free(attrs);
|
|
return err;
|
|
}
|
|
|
|
struct perf_evsel *
|
|
perf_evlist__find_tracepoint_by_id(struct perf_evlist *evlist, int id)
|
|
{
|
|
struct perf_evsel *evsel;
|
|
|
|
list_for_each_entry(evsel, &evlist->entries, node) {
|
|
if (evsel->attr.type == PERF_TYPE_TRACEPOINT &&
|
|
(int)evsel->attr.config == id)
|
|
return evsel;
|
|
}
|
|
|
|
return NULL;
|
|
}
|
|
|
|
int perf_evlist__set_tracepoints_handlers(struct perf_evlist *evlist,
|
|
const struct perf_evsel_str_handler *assocs,
|
|
size_t nr_assocs)
|
|
{
|
|
struct perf_evsel *evsel;
|
|
int err;
|
|
size_t i;
|
|
|
|
for (i = 0; i < nr_assocs; i++) {
|
|
err = trace_event__id(assocs[i].name);
|
|
if (err < 0)
|
|
goto out;
|
|
|
|
evsel = perf_evlist__find_tracepoint_by_id(evlist, err);
|
|
if (evsel == NULL)
|
|
continue;
|
|
|
|
err = -EEXIST;
|
|
if (evsel->handler.func != NULL)
|
|
goto out;
|
|
evsel->handler.func = assocs[i].handler;
|
|
}
|
|
|
|
err = 0;
|
|
out:
|
|
return err;
|
|
}
|
|
|
|
void perf_evlist__disable(struct perf_evlist *evlist)
|
|
{
|
|
int cpu, thread;
|
|
struct perf_evsel *pos;
|
|
|
|
for (cpu = 0; cpu < evlist->cpus->nr; cpu++) {
|
|
list_for_each_entry(pos, &evlist->entries, node) {
|
|
for (thread = 0; thread < evlist->threads->nr; thread++)
|
|
ioctl(FD(pos, cpu, thread),
|
|
PERF_EVENT_IOC_DISABLE, 0);
|
|
}
|
|
}
|
|
}
|
|
|
|
void perf_evlist__enable(struct perf_evlist *evlist)
|
|
{
|
|
int cpu, thread;
|
|
struct perf_evsel *pos;
|
|
|
|
for (cpu = 0; cpu < evlist->cpus->nr; cpu++) {
|
|
list_for_each_entry(pos, &evlist->entries, node) {
|
|
for (thread = 0; thread < evlist->threads->nr; thread++)
|
|
ioctl(FD(pos, cpu, thread),
|
|
PERF_EVENT_IOC_ENABLE, 0);
|
|
}
|
|
}
|
|
}
|
|
|
|
static int perf_evlist__alloc_pollfd(struct perf_evlist *evlist)
|
|
{
|
|
int nfds = evlist->cpus->nr * evlist->threads->nr * evlist->nr_entries;
|
|
evlist->pollfd = malloc(sizeof(struct pollfd) * nfds);
|
|
return evlist->pollfd != NULL ? 0 : -ENOMEM;
|
|
}
|
|
|
|
void perf_evlist__add_pollfd(struct perf_evlist *evlist, int fd)
|
|
{
|
|
fcntl(fd, F_SETFL, O_NONBLOCK);
|
|
evlist->pollfd[evlist->nr_fds].fd = fd;
|
|
evlist->pollfd[evlist->nr_fds].events = POLLIN;
|
|
evlist->nr_fds++;
|
|
}
|
|
|
|
static void perf_evlist__id_hash(struct perf_evlist *evlist,
|
|
struct perf_evsel *evsel,
|
|
int cpu, int thread, u64 id)
|
|
{
|
|
int hash;
|
|
struct perf_sample_id *sid = SID(evsel, cpu, thread);
|
|
|
|
sid->id = id;
|
|
sid->evsel = evsel;
|
|
hash = hash_64(sid->id, PERF_EVLIST__HLIST_BITS);
|
|
hlist_add_head(&sid->node, &evlist->heads[hash]);
|
|
}
|
|
|
|
void perf_evlist__id_add(struct perf_evlist *evlist, struct perf_evsel *evsel,
|
|
int cpu, int thread, u64 id)
|
|
{
|
|
perf_evlist__id_hash(evlist, evsel, cpu, thread, id);
|
|
evsel->id[evsel->ids++] = id;
|
|
}
|
|
|
|
static int perf_evlist__id_add_fd(struct perf_evlist *evlist,
|
|
struct perf_evsel *evsel,
|
|
int cpu, int thread, int fd)
|
|
{
|
|
u64 read_data[4] = { 0, };
|
|
int id_idx = 1; /* The first entry is the counter value */
|
|
|
|
if (!(evsel->attr.read_format & PERF_FORMAT_ID) ||
|
|
read(fd, &read_data, sizeof(read_data)) == -1)
|
|
return -1;
|
|
|
|
if (evsel->attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
|
|
++id_idx;
|
|
if (evsel->attr.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
|
|
++id_idx;
|
|
|
|
perf_evlist__id_add(evlist, evsel, cpu, thread, read_data[id_idx]);
|
|
return 0;
|
|
}
|
|
|
|
struct perf_evsel *perf_evlist__id2evsel(struct perf_evlist *evlist, u64 id)
|
|
{
|
|
struct hlist_head *head;
|
|
struct hlist_node *pos;
|
|
struct perf_sample_id *sid;
|
|
int hash;
|
|
|
|
if (evlist->nr_entries == 1)
|
|
return list_entry(evlist->entries.next, struct perf_evsel, node);
|
|
|
|
hash = hash_64(id, PERF_EVLIST__HLIST_BITS);
|
|
head = &evlist->heads[hash];
|
|
|
|
hlist_for_each_entry(sid, pos, head, node)
|
|
if (sid->id == id)
|
|
return sid->evsel;
|
|
|
|
if (!perf_evlist__sample_id_all(evlist))
|
|
return list_entry(evlist->entries.next, struct perf_evsel, node);
|
|
|
|
return NULL;
|
|
}
|
|
|
|
union perf_event *perf_evlist__mmap_read(struct perf_evlist *evlist, int idx)
|
|
{
|
|
/* XXX Move this to perf.c, making it generally available */
|
|
unsigned int page_size = sysconf(_SC_PAGE_SIZE);
|
|
struct perf_mmap *md = &evlist->mmap[idx];
|
|
unsigned int head = perf_mmap__read_head(md);
|
|
unsigned int old = md->prev;
|
|
unsigned char *data = md->base + page_size;
|
|
union perf_event *event = NULL;
|
|
|
|
if (evlist->overwrite) {
|
|
/*
|
|
* If we're further behind than half the buffer, there's a chance
|
|
* the writer will bite our tail and mess up the samples under us.
|
|
*
|
|
* If we somehow ended up ahead of the head, we got messed up.
|
|
*
|
|
* In either case, truncate and restart at head.
|
|
*/
|
|
int diff = head - old;
|
|
if (diff > md->mask / 2 || diff < 0) {
|
|
fprintf(stderr, "WARNING: failed to keep up with mmap data.\n");
|
|
|
|
/*
|
|
* head points to a known good entry, start there.
|
|
*/
|
|
old = head;
|
|
}
|
|
}
|
|
|
|
if (old != head) {
|
|
size_t size;
|
|
|
|
event = (union perf_event *)&data[old & md->mask];
|
|
size = event->header.size;
|
|
|
|
/*
|
|
* Event straddles the mmap boundary -- header should always
|
|
* be inside due to u64 alignment of output.
|
|
*/
|
|
if ((old & md->mask) + size != ((old + size) & md->mask)) {
|
|
unsigned int offset = old;
|
|
unsigned int len = min(sizeof(*event), size), cpy;
|
|
void *dst = &evlist->event_copy;
|
|
|
|
do {
|
|
cpy = min(md->mask + 1 - (offset & md->mask), len);
|
|
memcpy(dst, &data[offset & md->mask], cpy);
|
|
offset += cpy;
|
|
dst += cpy;
|
|
len -= cpy;
|
|
} while (len);
|
|
|
|
event = &evlist->event_copy;
|
|
}
|
|
|
|
old += size;
|
|
}
|
|
|
|
md->prev = old;
|
|
|
|
if (!evlist->overwrite)
|
|
perf_mmap__write_tail(md, old);
|
|
|
|
return event;
|
|
}
|
|
|
|
void perf_evlist__munmap(struct perf_evlist *evlist)
|
|
{
|
|
int i;
|
|
|
|
for (i = 0; i < evlist->nr_mmaps; i++) {
|
|
if (evlist->mmap[i].base != NULL) {
|
|
munmap(evlist->mmap[i].base, evlist->mmap_len);
|
|
evlist->mmap[i].base = NULL;
|
|
}
|
|
}
|
|
|
|
free(evlist->mmap);
|
|
evlist->mmap = NULL;
|
|
}
|
|
|
|
static int perf_evlist__alloc_mmap(struct perf_evlist *evlist)
|
|
{
|
|
evlist->nr_mmaps = evlist->cpus->nr;
|
|
if (evlist->cpus->map[0] == -1)
|
|
evlist->nr_mmaps = evlist->threads->nr;
|
|
evlist->mmap = zalloc(evlist->nr_mmaps * sizeof(struct perf_mmap));
|
|
return evlist->mmap != NULL ? 0 : -ENOMEM;
|
|
}
|
|
|
|
static int __perf_evlist__mmap(struct perf_evlist *evlist,
|
|
int idx, int prot, int mask, int fd)
|
|
{
|
|
evlist->mmap[idx].prev = 0;
|
|
evlist->mmap[idx].mask = mask;
|
|
evlist->mmap[idx].base = mmap(NULL, evlist->mmap_len, prot,
|
|
MAP_SHARED, fd, 0);
|
|
if (evlist->mmap[idx].base == MAP_FAILED) {
|
|
evlist->mmap[idx].base = NULL;
|
|
return -1;
|
|
}
|
|
|
|
perf_evlist__add_pollfd(evlist, fd);
|
|
return 0;
|
|
}
|
|
|
|
static int perf_evlist__mmap_per_cpu(struct perf_evlist *evlist, int prot, int mask)
|
|
{
|
|
struct perf_evsel *evsel;
|
|
int cpu, thread;
|
|
|
|
for (cpu = 0; cpu < evlist->cpus->nr; cpu++) {
|
|
int output = -1;
|
|
|
|
for (thread = 0; thread < evlist->threads->nr; thread++) {
|
|
list_for_each_entry(evsel, &evlist->entries, node) {
|
|
int fd = FD(evsel, cpu, thread);
|
|
|
|
if (output == -1) {
|
|
output = fd;
|
|
if (__perf_evlist__mmap(evlist, cpu,
|
|
prot, mask, output) < 0)
|
|
goto out_unmap;
|
|
} else {
|
|
if (ioctl(fd, PERF_EVENT_IOC_SET_OUTPUT, output) != 0)
|
|
goto out_unmap;
|
|
}
|
|
|
|
if ((evsel->attr.read_format & PERF_FORMAT_ID) &&
|
|
perf_evlist__id_add_fd(evlist, evsel, cpu, thread, fd) < 0)
|
|
goto out_unmap;
|
|
}
|
|
}
|
|
}
|
|
|
|
return 0;
|
|
|
|
out_unmap:
|
|
for (cpu = 0; cpu < evlist->cpus->nr; cpu++) {
|
|
if (evlist->mmap[cpu].base != NULL) {
|
|
munmap(evlist->mmap[cpu].base, evlist->mmap_len);
|
|
evlist->mmap[cpu].base = NULL;
|
|
}
|
|
}
|
|
return -1;
|
|
}
|
|
|
|
static int perf_evlist__mmap_per_thread(struct perf_evlist *evlist, int prot, int mask)
|
|
{
|
|
struct perf_evsel *evsel;
|
|
int thread;
|
|
|
|
for (thread = 0; thread < evlist->threads->nr; thread++) {
|
|
int output = -1;
|
|
|
|
list_for_each_entry(evsel, &evlist->entries, node) {
|
|
int fd = FD(evsel, 0, thread);
|
|
|
|
if (output == -1) {
|
|
output = fd;
|
|
if (__perf_evlist__mmap(evlist, thread,
|
|
prot, mask, output) < 0)
|
|
goto out_unmap;
|
|
} else {
|
|
if (ioctl(fd, PERF_EVENT_IOC_SET_OUTPUT, output) != 0)
|
|
goto out_unmap;
|
|
}
|
|
|
|
if ((evsel->attr.read_format & PERF_FORMAT_ID) &&
|
|
perf_evlist__id_add_fd(evlist, evsel, 0, thread, fd) < 0)
|
|
goto out_unmap;
|
|
}
|
|
}
|
|
|
|
return 0;
|
|
|
|
out_unmap:
|
|
for (thread = 0; thread < evlist->threads->nr; thread++) {
|
|
if (evlist->mmap[thread].base != NULL) {
|
|
munmap(evlist->mmap[thread].base, evlist->mmap_len);
|
|
evlist->mmap[thread].base = NULL;
|
|
}
|
|
}
|
|
return -1;
|
|
}
|
|
|
|
/** perf_evlist__mmap - Create per cpu maps to receive events
|
|
*
|
|
* @evlist - list of events
|
|
* @pages - map length in pages
|
|
* @overwrite - overwrite older events?
|
|
*
|
|
* If overwrite is false the user needs to signal event consuption using:
|
|
*
|
|
* struct perf_mmap *m = &evlist->mmap[cpu];
|
|
* unsigned int head = perf_mmap__read_head(m);
|
|
*
|
|
* perf_mmap__write_tail(m, head)
|
|
*
|
|
* Using perf_evlist__read_on_cpu does this automatically.
|
|
*/
|
|
int perf_evlist__mmap(struct perf_evlist *evlist, unsigned int pages,
|
|
bool overwrite)
|
|
{
|
|
unsigned int page_size = sysconf(_SC_PAGE_SIZE);
|
|
struct perf_evsel *evsel;
|
|
const struct cpu_map *cpus = evlist->cpus;
|
|
const struct thread_map *threads = evlist->threads;
|
|
int prot = PROT_READ | (overwrite ? 0 : PROT_WRITE), mask;
|
|
|
|
/* 512 kiB: default amount of unprivileged mlocked memory */
|
|
if (pages == UINT_MAX)
|
|
pages = (512 * 1024) / page_size;
|
|
else if (!is_power_of_2(pages))
|
|
return -EINVAL;
|
|
|
|
mask = pages * page_size - 1;
|
|
|
|
if (evlist->mmap == NULL && perf_evlist__alloc_mmap(evlist) < 0)
|
|
return -ENOMEM;
|
|
|
|
if (evlist->pollfd == NULL && perf_evlist__alloc_pollfd(evlist) < 0)
|
|
return -ENOMEM;
|
|
|
|
evlist->overwrite = overwrite;
|
|
evlist->mmap_len = (pages + 1) * page_size;
|
|
|
|
list_for_each_entry(evsel, &evlist->entries, node) {
|
|
if ((evsel->attr.read_format & PERF_FORMAT_ID) &&
|
|
evsel->sample_id == NULL &&
|
|
perf_evsel__alloc_id(evsel, cpus->nr, threads->nr) < 0)
|
|
return -ENOMEM;
|
|
}
|
|
|
|
if (evlist->cpus->map[0] == -1)
|
|
return perf_evlist__mmap_per_thread(evlist, prot, mask);
|
|
|
|
return perf_evlist__mmap_per_cpu(evlist, prot, mask);
|
|
}
|
|
|
|
int perf_evlist__create_maps(struct perf_evlist *evlist,
|
|
struct perf_target *target)
|
|
{
|
|
evlist->threads = thread_map__new_str(target->pid, target->tid,
|
|
target->uid);
|
|
|
|
if (evlist->threads == NULL)
|
|
return -1;
|
|
|
|
if (perf_target__has_task(target))
|
|
evlist->cpus = cpu_map__dummy_new();
|
|
else if (!perf_target__has_cpu(target) && !target->uses_mmap)
|
|
evlist->cpus = cpu_map__dummy_new();
|
|
else
|
|
evlist->cpus = cpu_map__new(target->cpu_list);
|
|
|
|
if (evlist->cpus == NULL)
|
|
goto out_delete_threads;
|
|
|
|
return 0;
|
|
|
|
out_delete_threads:
|
|
thread_map__delete(evlist->threads);
|
|
return -1;
|
|
}
|
|
|
|
void perf_evlist__delete_maps(struct perf_evlist *evlist)
|
|
{
|
|
cpu_map__delete(evlist->cpus);
|
|
thread_map__delete(evlist->threads);
|
|
evlist->cpus = NULL;
|
|
evlist->threads = NULL;
|
|
}
|
|
|
|
int perf_evlist__set_filters(struct perf_evlist *evlist)
|
|
{
|
|
const struct thread_map *threads = evlist->threads;
|
|
const struct cpu_map *cpus = evlist->cpus;
|
|
struct perf_evsel *evsel;
|
|
char *filter;
|
|
int thread;
|
|
int cpu;
|
|
int err;
|
|
int fd;
|
|
|
|
list_for_each_entry(evsel, &evlist->entries, node) {
|
|
filter = evsel->filter;
|
|
if (!filter)
|
|
continue;
|
|
for (cpu = 0; cpu < cpus->nr; cpu++) {
|
|
for (thread = 0; thread < threads->nr; thread++) {
|
|
fd = FD(evsel, cpu, thread);
|
|
err = ioctl(fd, PERF_EVENT_IOC_SET_FILTER, filter);
|
|
if (err)
|
|
return err;
|
|
}
|
|
}
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
bool perf_evlist__valid_sample_type(const struct perf_evlist *evlist)
|
|
{
|
|
struct perf_evsel *pos, *first;
|
|
|
|
pos = first = list_entry(evlist->entries.next, struct perf_evsel, node);
|
|
|
|
list_for_each_entry_continue(pos, &evlist->entries, node) {
|
|
if (first->attr.sample_type != pos->attr.sample_type)
|
|
return false;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
u64 perf_evlist__sample_type(const struct perf_evlist *evlist)
|
|
{
|
|
struct perf_evsel *first;
|
|
|
|
first = list_entry(evlist->entries.next, struct perf_evsel, node);
|
|
return first->attr.sample_type;
|
|
}
|
|
|
|
u16 perf_evlist__id_hdr_size(const struct perf_evlist *evlist)
|
|
{
|
|
struct perf_evsel *first;
|
|
struct perf_sample *data;
|
|
u64 sample_type;
|
|
u16 size = 0;
|
|
|
|
first = list_entry(evlist->entries.next, struct perf_evsel, node);
|
|
|
|
if (!first->attr.sample_id_all)
|
|
goto out;
|
|
|
|
sample_type = first->attr.sample_type;
|
|
|
|
if (sample_type & PERF_SAMPLE_TID)
|
|
size += sizeof(data->tid) * 2;
|
|
|
|
if (sample_type & PERF_SAMPLE_TIME)
|
|
size += sizeof(data->time);
|
|
|
|
if (sample_type & PERF_SAMPLE_ID)
|
|
size += sizeof(data->id);
|
|
|
|
if (sample_type & PERF_SAMPLE_STREAM_ID)
|
|
size += sizeof(data->stream_id);
|
|
|
|
if (sample_type & PERF_SAMPLE_CPU)
|
|
size += sizeof(data->cpu) * 2;
|
|
out:
|
|
return size;
|
|
}
|
|
|
|
bool perf_evlist__valid_sample_id_all(const struct perf_evlist *evlist)
|
|
{
|
|
struct perf_evsel *pos, *first;
|
|
|
|
pos = first = list_entry(evlist->entries.next, struct perf_evsel, node);
|
|
|
|
list_for_each_entry_continue(pos, &evlist->entries, node) {
|
|
if (first->attr.sample_id_all != pos->attr.sample_id_all)
|
|
return false;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
bool perf_evlist__sample_id_all(const struct perf_evlist *evlist)
|
|
{
|
|
struct perf_evsel *first;
|
|
|
|
first = list_entry(evlist->entries.next, struct perf_evsel, node);
|
|
return first->attr.sample_id_all;
|
|
}
|
|
|
|
void perf_evlist__set_selected(struct perf_evlist *evlist,
|
|
struct perf_evsel *evsel)
|
|
{
|
|
evlist->selected = evsel;
|
|
}
|
|
|
|
int perf_evlist__open(struct perf_evlist *evlist, bool group)
|
|
{
|
|
struct perf_evsel *evsel, *first;
|
|
int err, ncpus, nthreads;
|
|
|
|
first = list_entry(evlist->entries.next, struct perf_evsel, node);
|
|
|
|
list_for_each_entry(evsel, &evlist->entries, node) {
|
|
struct xyarray *group_fd = NULL;
|
|
|
|
if (group && evsel != first)
|
|
group_fd = first->fd;
|
|
|
|
err = perf_evsel__open(evsel, evlist->cpus, evlist->threads,
|
|
group, group_fd);
|
|
if (err < 0)
|
|
goto out_err;
|
|
}
|
|
|
|
return 0;
|
|
out_err:
|
|
ncpus = evlist->cpus ? evlist->cpus->nr : 1;
|
|
nthreads = evlist->threads ? evlist->threads->nr : 1;
|
|
|
|
list_for_each_entry_reverse(evsel, &evlist->entries, node)
|
|
perf_evsel__close(evsel, ncpus, nthreads);
|
|
|
|
errno = -err;
|
|
return err;
|
|
}
|
|
|
|
int perf_evlist__prepare_workload(struct perf_evlist *evlist,
|
|
struct perf_record_opts *opts,
|
|
const char *argv[])
|
|
{
|
|
int child_ready_pipe[2], go_pipe[2];
|
|
char bf;
|
|
|
|
if (pipe(child_ready_pipe) < 0) {
|
|
perror("failed to create 'ready' pipe");
|
|
return -1;
|
|
}
|
|
|
|
if (pipe(go_pipe) < 0) {
|
|
perror("failed to create 'go' pipe");
|
|
goto out_close_ready_pipe;
|
|
}
|
|
|
|
evlist->workload.pid = fork();
|
|
if (evlist->workload.pid < 0) {
|
|
perror("failed to fork");
|
|
goto out_close_pipes;
|
|
}
|
|
|
|
if (!evlist->workload.pid) {
|
|
if (opts->pipe_output)
|
|
dup2(2, 1);
|
|
|
|
close(child_ready_pipe[0]);
|
|
close(go_pipe[1]);
|
|
fcntl(go_pipe[0], F_SETFD, FD_CLOEXEC);
|
|
|
|
/*
|
|
* Do a dummy execvp to get the PLT entry resolved,
|
|
* so we avoid the resolver overhead on the real
|
|
* execvp call.
|
|
*/
|
|
execvp("", (char **)argv);
|
|
|
|
/*
|
|
* Tell the parent we're ready to go
|
|
*/
|
|
close(child_ready_pipe[1]);
|
|
|
|
/*
|
|
* Wait until the parent tells us to go.
|
|
*/
|
|
if (read(go_pipe[0], &bf, 1) == -1)
|
|
perror("unable to read pipe");
|
|
|
|
execvp(argv[0], (char **)argv);
|
|
|
|
perror(argv[0]);
|
|
kill(getppid(), SIGUSR1);
|
|
exit(-1);
|
|
}
|
|
|
|
if (perf_target__none(&opts->target))
|
|
evlist->threads->map[0] = evlist->workload.pid;
|
|
|
|
close(child_ready_pipe[1]);
|
|
close(go_pipe[0]);
|
|
/*
|
|
* wait for child to settle
|
|
*/
|
|
if (read(child_ready_pipe[0], &bf, 1) == -1) {
|
|
perror("unable to read pipe");
|
|
goto out_close_pipes;
|
|
}
|
|
|
|
evlist->workload.cork_fd = go_pipe[1];
|
|
close(child_ready_pipe[0]);
|
|
return 0;
|
|
|
|
out_close_pipes:
|
|
close(go_pipe[0]);
|
|
close(go_pipe[1]);
|
|
out_close_ready_pipe:
|
|
close(child_ready_pipe[0]);
|
|
close(child_ready_pipe[1]);
|
|
return -1;
|
|
}
|
|
|
|
int perf_evlist__start_workload(struct perf_evlist *evlist)
|
|
{
|
|
if (evlist->workload.cork_fd > 0) {
|
|
/*
|
|
* Remove the cork, let it rip!
|
|
*/
|
|
return close(evlist->workload.cork_fd);
|
|
}
|
|
|
|
return 0;
|
|
}
|