linux_dsm_epyc7002/tools/lib/traceevent/plugin_sched_switch.c
Daniel Bristot de Oliveira 074859184d tools lib traceevent: Fix prev/next_prio for deadline tasks
Currently, the sched:sched_switch tracepoint reports deadline tasks with
priority -1. But when reading the trace via perf script I've got the
following output:

  # ./d & # (d is a deadline task, see [1])
  # perf record -e sched:sched_switch -a sleep 1
  # perf script
      ...
         swapper     0 [000]  2146.962441: sched:sched_switch: swapper/0:0 [120] R ==> d:2593 [4294967295]
               d  2593 [000]  2146.972472: sched:sched_switch: d:2593 [4294967295] R ==> g:2590 [4294967295]

The task d reports the wrong priority [4294967295]. This happens because
the "int prio" is stored in an unsigned long long val. Although it is
set as a %lld, as int is shorter than unsigned long long,
trace_seq_printf prints it as a positive number.

The fix is just to cast the val as an int, and print it as a %d,
as in the sched:sched_switch tracepoint's "format".

The output with the fix is:

  # ./d &
  # perf record -e sched:sched_switch -a sleep 1
  # perf script
      ...
         swapper     0 [000]  4306.374037: sched:sched_switch: swapper/0:0 [120] R ==> d:10941 [-1]
               d 10941 [000]  4306.383823: sched:sched_switch: d:10941 [-1] R ==> swapper/0:0 [120]

[1] d.c

 ---
  #include <stdio.h>
  #include <unistd.h>
  #include <sys/syscall.h>
  #include <linux/types.h>
  #include <linux/sched.h>

  struct sched_attr {
	__u32 size, sched_policy;
	__u64 sched_flags;
	__s32 sched_nice;
	__u32 sched_priority;
	__u64 sched_runtime, sched_deadline, sched_period;
  };

  int sched_setattr(pid_t pid, const struct sched_attr *attr, unsigned int flags)
  {
	return syscall(__NR_sched_setattr, pid, attr, flags);
  }

  int main(void)
  {
	struct sched_attr attr = {
		.size		= sizeof(attr),
		.sched_policy	= SCHED_DEADLINE, /* This creates a 10ms/30ms reservation */
		.sched_runtime	= 10 * 1000 * 1000,
		.sched_period	= attr.sched_deadline = 30 * 1000 * 1000,
	};

	if (sched_setattr(0, &attr, 0) < 0) {
		perror("sched_setattr");
		return -1;
	}

	for(;;);
  }
 ---

Committer notes:

Got the program from the provided URL, http://bristot.me/lkml/d.c,
trimmed it and included in the cset log above, so that we have
everything needed to test it in one place.

Signed-off-by: Daniel Bristot de Oliveira <bristot@redhat.com>
Acked-by: Steven Rostedt <rostedt@goodmis.org>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Daniel Bristot de Oliveira <bristot@redhat.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/866ef75bcebf670ae91c6a96daa63597ba981f0d.1483443552.git.bristot@redhat.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2017-01-03 16:11:12 -03:00

161 lines
4.4 KiB
C

/*
* Copyright (C) 2009, 2010 Red Hat Inc, Steven Rostedt <srostedt@redhat.com>
*
* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation;
* version 2.1 of the License (not later!)
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this program; if not, see <http://www.gnu.org/licenses>
*
* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "event-parse.h"
static void write_state(struct trace_seq *s, int val)
{
const char states[] = "SDTtZXxW";
int found = 0;
int i;
for (i = 0; i < (sizeof(states) - 1); i++) {
if (!(val & (1 << i)))
continue;
if (found)
trace_seq_putc(s, '|');
found = 1;
trace_seq_putc(s, states[i]);
}
if (!found)
trace_seq_putc(s, 'R');
}
static void write_and_save_comm(struct format_field *field,
struct pevent_record *record,
struct trace_seq *s, int pid)
{
const char *comm;
int len;
comm = (char *)(record->data + field->offset);
len = s->len;
trace_seq_printf(s, "%.*s",
field->size, comm);
/* make sure the comm has a \0 at the end. */
trace_seq_terminate(s);
comm = &s->buffer[len];
/* Help out the comm to ids. This will handle dups */
pevent_register_comm(field->event->pevent, comm, pid);
}
static int sched_wakeup_handler(struct trace_seq *s,
struct pevent_record *record,
struct event_format *event, void *context)
{
struct format_field *field;
unsigned long long val;
if (pevent_get_field_val(s, event, "pid", record, &val, 1))
return trace_seq_putc(s, '!');
field = pevent_find_any_field(event, "comm");
if (field) {
write_and_save_comm(field, record, s, val);
trace_seq_putc(s, ':');
}
trace_seq_printf(s, "%lld", val);
if (pevent_get_field_val(s, event, "prio", record, &val, 0) == 0)
trace_seq_printf(s, " [%lld]", val);
if (pevent_get_field_val(s, event, "success", record, &val, 1) == 0)
trace_seq_printf(s, " success=%lld", val);
if (pevent_get_field_val(s, event, "target_cpu", record, &val, 0) == 0)
trace_seq_printf(s, " CPU:%03llu", val);
return 0;
}
static int sched_switch_handler(struct trace_seq *s,
struct pevent_record *record,
struct event_format *event, void *context)
{
struct format_field *field;
unsigned long long val;
if (pevent_get_field_val(s, event, "prev_pid", record, &val, 1))
return trace_seq_putc(s, '!');
field = pevent_find_any_field(event, "prev_comm");
if (field) {
write_and_save_comm(field, record, s, val);
trace_seq_putc(s, ':');
}
trace_seq_printf(s, "%lld ", val);
if (pevent_get_field_val(s, event, "prev_prio", record, &val, 0) == 0)
trace_seq_printf(s, "[%d] ", (int) val);
if (pevent_get_field_val(s, event, "prev_state", record, &val, 0) == 0)
write_state(s, val);
trace_seq_puts(s, " ==> ");
if (pevent_get_field_val(s, event, "next_pid", record, &val, 1))
return trace_seq_putc(s, '!');
field = pevent_find_any_field(event, "next_comm");
if (field) {
write_and_save_comm(field, record, s, val);
trace_seq_putc(s, ':');
}
trace_seq_printf(s, "%lld", val);
if (pevent_get_field_val(s, event, "next_prio", record, &val, 0) == 0)
trace_seq_printf(s, " [%d]", (int) val);
return 0;
}
int PEVENT_PLUGIN_LOADER(struct pevent *pevent)
{
pevent_register_event_handler(pevent, -1, "sched", "sched_switch",
sched_switch_handler, NULL);
pevent_register_event_handler(pevent, -1, "sched", "sched_wakeup",
sched_wakeup_handler, NULL);
pevent_register_event_handler(pevent, -1, "sched", "sched_wakeup_new",
sched_wakeup_handler, NULL);
return 0;
}
void PEVENT_PLUGIN_UNLOADER(struct pevent *pevent)
{
pevent_unregister_event_handler(pevent, -1, "sched", "sched_switch",
sched_switch_handler, NULL);
pevent_unregister_event_handler(pevent, -1, "sched", "sched_wakeup",
sched_wakeup_handler, NULL);
pevent_unregister_event_handler(pevent, -1, "sched", "sched_wakeup_new",
sched_wakeup_handler, NULL);
}