perf tools: Add dynamic sort key for tracepoint events

The existing sort keys are less useful for tracepoint events in that
they are always sampled at the same place, the function where the
tracepoint is located.

For example, a 'perf report' on sched:sched_switch event looks like the
following:

  # Overhead  Command          Shared Object     Symbol
  # ........  ...............  ................  ..............
  #
      47.22%  swapper          [kernel.vmlinux]  [k] __schedule
      21.67%  transmission-gt  [kernel.vmlinux]  [k] __schedule
       8.23%  netctl-auto      [kernel.vmlinux]  [k] __schedule
       5.53%  kworker/0:1H     [kernel.vmlinux]  [k] __schedule
       1.98%  Xephyr           [kernel.vmlinux]  [k] __schedule
       1.33%  irq/33-iwlwifi   [kernel.vmlinux]  [k] __schedule
       1.17%  wpa_cli          [kernel.vmlinux]  [k] __schedule
       1.13%  rcu_preempt      [kernel.vmlinux]  [k] __schedule
       0.85%  ksoftirqd/0      [kernel.vmlinux]  [k] __schedule
       0.77%  Timer            [kernel.vmlinux]  [k] __schedule

In fact, tracepoints have meaningful information in their fields but
there's no way to use in 'perf report' currently.  The dynamic sort keys
are introduced in this patc to overcome this limitation.

The sched:sched_switch events have following fields:

  # sudo cat /sys/kernel/debug/tracing/events/sched/sched_switch/format
  name: sched_switch
  ID: 268
  format:
	field:unsigned short common_type;         offset:0; size:2; signed:0;
	field:unsigned char common_flags;         offset:2; size:1; signed:0;
	field:unsigned char common_preempt_count; offset:3; size:1; signed:0;
	field:int common_pid;                     offset:4; size:4; signed:1;

	field:char prev_comm[16]; offset:8;  size:16; signed:1;
	field:pid_t prev_pid;     offset:24; size:4;  signed:1;
	field:int prev_prio;      offset:28; size:4;  signed:1;
	field:long prev_state;    offset:32; size:8;  signed:1;
	field:char next_comm[16]; offset:40; size:16; signed:1;
	field:pid_t next_pid;     offset:56; size:4;  signed:1;
	field:int next_prio;      offset:60; size:4;  signed:1;

  print fmt: "prev_comm=%s prev_pid=%d prev_prio=%d prev_state=%s%s ==>
              next_comm=%s next_pid=%d next_prio=%d",
    REC->prev_comm, REC->prev_pid, REC->prev_prio,
    REC->prev_state & (2048-1) ? __print_flags(REC->prev_state & (2048-1),
    "|", { 1, "S"} , { 2, "D" }, { 4, "T" }, { 8, "t" }, { 16, "Z" }, { 32, "X" },
    { 64, "x" }, { 128, "K"}, { 256, "W" }, { 512, "P" }, { 1024, "N" }) : "R",
    REC->prev_state & 2048 ? "+" : "", REC->next_comm, REC->next_pid, REC->next_prio

With dynamic sort keys, you can use <event.field> as a sort key.  Those
dynamic keys are checked and created on demand.  For instance, below is
to sort by next_pid field output on the same data file:

  $ perf report -s comm,sched:sched_switch.next_pid --stdio
  ...
  # Overhead  Command            next_pid
  # ........  ...............  ..........
  #
      21.23%  transmission-gt           0
      20.86%  swapper               17773
       6.62%  netctl-auto               0
       5.25%  swapper                 109
       5.21%  kworker/0:1H              0
       1.98%  Xephyr                    0
       1.98%  swapper                6524
       1.98%  swapper               27478
       1.37%  swapper               27476
       1.17%  swapper                 233

Multiple dynamic sort keys are also supported:

  $ perf report -s comm,sched:sched_switch.next_pid,sched:sched_switch.next_comm --stdio
  ...
  # Overhead  Command            next_pid         next_comm
  # ........  ...............  ..........  ................
  #
      20.86%  swapper               17773   transmission-gt
       9.64%  transmission-gt           0         swapper/0
       9.16%  transmission-gt           0         swapper/2
       5.25%  swapper                 109      kworker/0:1H
       5.21%  kworker/0:1H              0         swapper/0
       2.14%  netctl-auto               0         swapper/2
       1.98%  netctl-auto               0         swapper/0
       1.98%  swapper                6524            Xephyr
       1.98%  swapper               27478       netctl-auto
       1.78%  transmission-gt           0         swapper/3
       1.53%  Xephyr                    0         swapper/0
       1.29%  netctl-auto               0         swapper/1
       1.29%  swapper               27476       netctl-auto
       1.21%  netctl-auto               0         swapper/3
       1.17%  swapper                 233    irq/33-iwlwifi

Note that pid 0 exists for each cpu so have comm of 'swapper/N'.

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: http://lkml.kernel.org/r/1450804030-29193-6-git-send-email-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
This commit is contained in:
Namhyung Kim 2015-12-23 02:07:02 +09:00 committed by Arnaldo Carvalho de Melo
parent 40184c46a3
commit c7c2a5e40f

View File

@ -1531,6 +1531,216 @@ static int __sort_dimension__add_hpp_output(struct sort_dimension *sd)
return 0;
}
struct hpp_dynamic_entry {
struct perf_hpp_fmt hpp;
struct perf_evsel *evsel;
struct format_field *field;
unsigned dynamic_len;
};
static int hde_width(struct hpp_dynamic_entry *hde)
{
if (!hde->hpp.len) {
int len = hde->dynamic_len;
int namelen = strlen(hde->field->name);
int fieldlen = hde->field->size;
if (namelen > len)
len = namelen;
if (!(hde->field->flags & FIELD_IS_STRING)) {
/* length for print hex numbers */
fieldlen = hde->field->size * 2 + 2;
}
if (fieldlen > len)
len = fieldlen;
hde->hpp.len = len;
}
return hde->hpp.len;
}
static int __sort__hde_header(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
struct perf_evsel *evsel __maybe_unused)
{
struct hpp_dynamic_entry *hde;
size_t len = fmt->user_len;
hde = container_of(fmt, struct hpp_dynamic_entry, hpp);
if (!len)
len = hde_width(hde);
return scnprintf(hpp->buf, hpp->size, "%*.*s", len, len, hde->field->name);
}
static int __sort__hde_width(struct perf_hpp_fmt *fmt,
struct perf_hpp *hpp __maybe_unused,
struct perf_evsel *evsel __maybe_unused)
{
struct hpp_dynamic_entry *hde;
size_t len = fmt->user_len;
hde = container_of(fmt, struct hpp_dynamic_entry, hpp);
if (!len)
len = hde_width(hde);
return len;
}
static int __sort__hde_entry(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
struct hist_entry *he)
{
struct hpp_dynamic_entry *hde;
size_t len = fmt->user_len;
struct trace_seq seq;
int ret;
hde = container_of(fmt, struct hpp_dynamic_entry, hpp);
if (!len)
len = hde_width(hde);
if (hists_to_evsel(he->hists) != hde->evsel)
return scnprintf(hpp->buf, hpp->size, "%*.*s", len, len, "N/A");
trace_seq_init(&seq);
pevent_print_field(&seq, he->raw_data, hde->field);
ret = scnprintf(hpp->buf, hpp->size, "%*.*s", len, len, seq.buffer);
trace_seq_destroy(&seq);
return ret;
}
static int64_t __sort__hde_cmp(struct perf_hpp_fmt *fmt,
struct hist_entry *a, struct hist_entry *b)
{
struct hpp_dynamic_entry *hde;
struct format_field *field;
unsigned offset, size;
hde = container_of(fmt, struct hpp_dynamic_entry, hpp);
if (hists_to_evsel(a->hists) != hde->evsel)
return 0;
field = hde->field;
if (field->flags & FIELD_IS_DYNAMIC) {
unsigned long long dyn;
pevent_read_number_field(field, a->raw_data, &dyn);
offset = dyn & 0xffff;
size = (dyn >> 16) & 0xffff;
/* record max width for output */
if (size > hde->dynamic_len)
hde->dynamic_len = size;
} else {
offset = field->offset;
size = field->size;
}
return memcmp(a->raw_data + offset, b->raw_data + offset, size);
}
static struct hpp_dynamic_entry *
__alloc_dynamic_entry(struct perf_evsel *evsel, struct format_field *field)
{
struct hpp_dynamic_entry *hde;
hde = malloc(sizeof(*hde));
if (hde == NULL) {
pr_debug("Memory allocation failed\n");
return NULL;
}
hde->evsel = evsel;
hde->field = field;
hde->dynamic_len = 0;
hde->hpp.name = field->name;
hde->hpp.header = __sort__hde_header;
hde->hpp.width = __sort__hde_width;
hde->hpp.entry = __sort__hde_entry;
hde->hpp.color = NULL;
hde->hpp.cmp = __sort__hde_cmp;
hde->hpp.collapse = __sort__hde_cmp;
hde->hpp.sort = __sort__hde_cmp;
INIT_LIST_HEAD(&hde->hpp.list);
INIT_LIST_HEAD(&hde->hpp.sort_list);
hde->hpp.elide = false;
hde->hpp.len = 0;
hde->hpp.user_len = 0;
return hde;
}
static int add_dynamic_entry(struct perf_evlist *evlist, const char *tok)
{
char *str, *event_name, *field_name;
struct perf_evsel *evsel, *pos;
struct format_field *field;
struct hpp_dynamic_entry *hde;
int ret = 0;
if (evlist == NULL)
return -ENOENT;
str = strdup(tok);
if (str == NULL)
return -ENOMEM;
event_name = str;
field_name = strchr(str, '.');
if (field_name == NULL) {
ret = -EINVAL;
goto out;
}
*field_name++ = '\0';
evsel = NULL;
evlist__for_each(evlist, pos) {
if (!strcmp(pos->name, event_name)) {
evsel = pos;
break;
}
}
if (evsel == NULL) {
pr_debug("Cannot find event: %s\n", event_name);
ret = -ENOENT;
goto out;
}
if (evsel->attr.type != PERF_TYPE_TRACEPOINT) {
pr_debug("%s is not a tracepoint event\n", event_name);
ret = -EINVAL;
goto out;
}
field = pevent_find_any_field(evsel->tp_format, field_name);
if (field == NULL) {
pr_debug("Cannot find event field for %s.%s\n",
event_name, field_name);
ret = -ENOENT;
goto out;
}
hde = __alloc_dynamic_entry(evsel, field);
if (hde == NULL) {
ret = -ENOMEM;
goto out;
}
perf_hpp__register_sort_field(&hde->hpp);
out:
free(str);
return ret;
}
static int __sort_dimension__add(struct sort_dimension *sd)
{
if (sd->taken)
@ -1667,6 +1877,9 @@ static int sort_dimension__add(const char *tok,
return 0;
}
if (!add_dynamic_entry(evlist, tok))
return 0;
return -ESRCH;
}