linux_dsm_epyc7002/tools/perf/util/bpf-loader.c

1593 lines
36 KiB
C
Raw Normal View History

/*
* bpf-loader.c
*
* Copyright (C) 2015 Wang Nan <wangnan0@huawei.com>
* Copyright (C) 2015 Huawei Inc.
*/
#include <linux/bpf.h>
#include <bpf/libbpf.h>
perf record: Apply config to BPF objects before recording bpf__apply_obj_config() is introduced as the core API to apply object config options to all BPF objects. This patch also does the real work for setting values for BPF_MAP_TYPE_PERF_ARRAY maps by inserting value stored in map's private field into the BPF map. This patch is required because we are not always able to set all BPF config during parsing. Further patch will set events created by perf to BPF_MAP_TYPE_PERF_EVENT_ARRAY maps, which is not exist until perf_evsel__open(). bpf_map_foreach_key() is introduced to iterate over each key needs to be configured. This function would be extended to support more map types and different key settings. In perf record, before start recording, call bpf__apply_config() to turn on all BPF config options. Test result: # cat ./test_bpf_map_1.c /************************ BEGIN **************************/ #include <uapi/linux/bpf.h> #define SEC(NAME) __attribute__((section(NAME), used)) struct bpf_map_def { unsigned int type; unsigned int key_size; unsigned int value_size; unsigned int max_entries; }; static void *(*map_lookup_elem)(struct bpf_map_def *, void *) = (void *)BPF_FUNC_map_lookup_elem; static int (*trace_printk)(const char *fmt, int fmt_size, ...) = (void *)BPF_FUNC_trace_printk; struct bpf_map_def SEC("maps") channel = { .type = BPF_MAP_TYPE_ARRAY, .key_size = sizeof(int), .value_size = sizeof(int), .max_entries = 1, }; SEC("func=sys_nanosleep") int func(void *ctx) { int key = 0; char fmt[] = "%d\n"; int *pval = map_lookup_elem(&channel, &key); if (!pval) return 0; trace_printk(fmt, sizeof(fmt), *pval); return 0; } char _license[] SEC("license") = "GPL"; int _version SEC("version") = LINUX_VERSION_CODE; /************************* END ***************************/ # echo "" > /sys/kernel/debug/tracing/trace # ./perf record -e './test_bpf_map_1.c/map:channel.value=11/' usleep 10 [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.012 MB perf.data ] # cat /sys/kernel/debug/tracing/trace # tracer: nop # # entries-in-buffer/entries-written: 1/1 #P:8 [SNIP] # TASK-PID CPU# |||| TIMESTAMP FUNCTION # | | | |||| | | usleep-18593 [007] d... 2394714.395539: : 11 # ./perf record -e './test_bpf_map_1.c/map:channel.value=101/' usleep 10 [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.012 MB perf.data ] # cat /sys/kernel/debug/tracing/trace # tracer: nop # # entries-in-buffer/entries-written: 1/1 #P:8 [SNIP] # TASK-PID CPU# |||| TIMESTAMP FUNCTION # | | | |||| | | usleep-18593 [007] d... 2394714.395539: : 11 usleep-19000 [006] d... 2394831.057840: : 101 Signed-off-by: Wang Nan <wangnan0@huawei.com> Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Adrian Hunter <adrian.hunter@intel.com> Cc: Alexei Starovoitov <ast@kernel.org> Cc: Brendan Gregg <brendan.d.gregg@gmail.com> Cc: Cody P Schafer <dev@codyps.com> Cc: He Kuang <hekuang@huawei.com> Cc: Jeremie Galarneau <jeremie.galarneau@efficios.com> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Kirill Smelkov <kirr@nexedi.com> Cc: Li Zefan <lizefan@huawei.com> Cc: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Zefan Li <lizefan@huawei.com> Cc: pi3orama@163.com Link: http://lkml.kernel.org/r/1456132275-98875-6-git-send-email-wangnan0@huawei.com Signed-off-by: He Kuang <hekuang@huawei.com> Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2016-02-22 16:10:32 +07:00
#include <bpf/bpf.h>
#include <linux/err.h>
perf bpf: Allow BPF program config probing options By extending the syntax of BPF object section names, this patch allows users to config probing options like what they can do in 'perf probe'. The error message in 'perf probe' is also updated. Test result: For following BPF file test_probe_glob.c: # cat test_probe_glob.c __attribute__((section("inlines=no;func=SyS_dup?"), used)) int func(void *ctx) { return 1; } char _license[] __attribute__((section("license"), used)) = "GPL"; int _version __attribute__((section("version"), used)) = 0x40300; # # ./perf record -e ./test_probe_glob.c ls / ... [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.013 MB perf.data ] # ./perf evlist perf_bpf_probe:func_1 perf_bpf_probe:func After changing "inlines=no" to "inlines=yes": # ./perf record -e ./test_probe_glob.c ls / ... [ perf record: Woken up 2 times to write data ] [ perf record: Captured and wrote 0.013 MB perf.data ] # ./perf evlist perf_bpf_probe:func_3 perf_bpf_probe:func_2 perf_bpf_probe:func_1 perf_bpf_probe:func Then test 'force': Use following program: # cat test_probe_force.c __attribute__((section("func=sys_write"), used)) int funca(void *ctx) { return 1; } __attribute__((section("force=yes;func=sys_write"), used)) int funcb(void *ctx) { return 1; } char _license[] __attribute__((section("license"), used)) = "GPL"; int _version __attribute__((section("version"), used)) = 0x40300; # # perf record -e ./test_probe_force.c usleep 1 Error: event "func" already exists. Hint: Remove existing event by 'perf probe -d' or force duplicates by 'perf probe -f' or set 'force=yes' in BPF source. event syntax error: './test_probe_force.c' \___ Probe point exist. Try 'perf probe -d "*"' and set 'force=yes' (add -v to see detail) ... Then replace 'force=no' to 'force=yes': # vim test_probe_force.c # perf record -e ./test_probe_force.c usleep 1 [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.017 MB perf.data ] # perf evlist perf_bpf_probe:func_1 perf_bpf_probe:func # Signed-off-by: Wang Nan <wangnan0@huawei.com> Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Alexei Starovoitov <ast@kernel.org> Cc: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com> Cc: Zefan Li <lizefan@huawei.com> Cc: pi3orama@163.com Link: http://lkml.kernel.org/r/1447675815-166222-7-git-send-email-wangnan0@huawei.com Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2015-11-16 19:10:08 +07:00
#include <linux/string.h>
#include "perf.h"
#include "debug.h"
#include "bpf-loader.h"
#include "bpf-prologue.h"
#include "llvm-utils.h"
perf tools: Create probe points for BPF programs This patch introduces bpf__{un,}probe() functions to enable callers to create kprobe points based on section names a BPF program. It parses the section names in the program and creates corresponding 'struct perf_probe_event' structures. The parse_perf_probe_command() function is used to do the main parsing work. The resuling 'struct perf_probe_event' is stored into program private data for further using. By utilizing the new probing API, this patch creates probe points during event parsing. To ensure probe points be removed correctly, register an atexit hook so even perf quit through exit() bpf__clear() is still called, so probing points are cleared. Note that bpf_clear() should be registered before bpf__probe() is called, so failure of bpf__probe() can still trigger bpf__clear() to remove probe points which are already probed. strerror style error reporting scaffold is created by this patch. bpf__strerror_probe() is the first error reporting function in bpf-loader.c. Committer note: Trying it: To build a test eBPF object file: I am testing using a script I built from the 'perf test -v LLVM' output: $ cat ~/bin/hello-ebpf export KERNEL_INC_OPTIONS="-nostdinc -isystem /usr/lib/gcc/x86_64-redhat-linux/4.8.3/include -I/home/acme/git/linux/arch/x86/include -Iarch/x86/include/generated/uapi -Iarch/x86/include/generated -I/home/acme/git/linux/include -Iinclude -I/home/acme/git/linux/arch/x86/include/uapi -Iarch/x86/include/generated/uapi -I/home/acme/git/linux/include/uapi -Iinclude/generated/uapi -include /home/acme/git/linux/include/linux/kconfig.h" export WORKING_DIR=/lib/modules/4.2.0/build export CLANG_SOURCE=- export CLANG_OPTIONS=-xc OBJ=/tmp/foo.o rm -f $OBJ echo '__attribute__((section("fork=do_fork"), used)) int fork(void *ctx) {return 0;} char _license[] __attribute__((section("license"), used)) = "GPL";int _version __attribute__((section("version"), used)) = 0x40100;' | \ clang -D__KERNEL__ $CLANG_OPTIONS $KERNEL_INC_OPTIONS -Wno-unused-value -Wno-pointer-sign -working-directory $WORKING_DIR -c "$CLANG_SOURCE" -target bpf -O2 -o /tmp/foo.o && file $OBJ --- First asking to put a probe in a function not present in the kernel (misses the initial _): $ perf record --event /tmp/foo.o sleep 1 Probe point 'do_fork' not found. event syntax error: '/tmp/foo.o' \___ You need to check probing points in BPF file (add -v to see detail) Run 'perf list' for a list of valid events Usage: perf record [<options>] [<command>] or: perf record [<options>] -- <command> [<options>] -e, --event <event> event selector. use 'perf list' to list available events $ --- Now, with "__attribute__((section("fork=_do_fork"), used)): $ grep _do_fork /proc/kallsyms ffffffff81099ab0 T _do_fork $ perf record --event /tmp/foo.o sleep 1 Failed to open kprobe_events: Permission denied event syntax error: '/tmp/foo.o' \___ Permission denied --- Cool, we need to provide some better hints, "kprobe_events" is too low level, one doesn't strictly need to know the precise details of how these things are put in place, so something that shows the command needed to fix the permissions would be more helpful. Lets try as root instead: # perf record --event /tmp/foo.o sleep 1 Lowering default frequency rate to 1000. Please consider tweaking /proc/sys/kernel/perf_event_max_sample_rate. [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.013 MB perf.data ] # perf evlist /tmp/foo.o [root@felicio ~]# perf evlist -v /tmp/foo.o: type: 1, size: 112, config: 0x9, { sample_period, sample_freq }: 1000, sample_type: IP|TID|TIME|PERIOD, disabled: 1, inherit: 1, mmap: 1, comm: 1, freq: 1, enable_on_exec: 1, task: 1, sample_id_all: 1, exclude_guest: 1, mmap2: 1, comm_exec: 1 --- Signed-off-by: Wang Nan <wangnan0@huawei.com> Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Alexei Starovoitov <ast@plumgrid.com> Cc: Brendan Gregg <brendan.d.gregg@gmail.com> Cc: Daniel Borkmann <daniel@iogearbox.net> Cc: David Ahern <dsahern@gmail.com> Cc: He Kuang <hekuang@huawei.com> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Kaixu Xia <xiakaixu@huawei.com> Cc: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Zefan Li <lizefan@huawei.com> Cc: pi3orama@163.com Link: http://lkml.kernel.org/r/1444826502-49291-5-git-send-email-wangnan0@huawei.com Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2015-10-14 19:41:15 +07:00
#include "probe-event.h"
#include "probe-finder.h" // for MAX_PROBES
2016-02-22 16:10:34 +07:00
#include "parse-events.h"
perf tools: Compile scriptlets to BPF objects when passing '.c' to --event This patch provides infrastructure for passing source files to --event directly using: # perf record --event bpf-file.c command This patch does following works: 1) Allow passing '.c' file to '--event'. parse_events_load_bpf() is expanded to allow caller tell it whether the passed file is source file or object. 2) llvm__compile_bpf() is called to compile the '.c' file, the result is saved into memory. Use bpf_object__open_buffer() to load the in-memory object. Introduces a bpf-script-example.c so we can manually test it: # perf record --clang-opt "-DLINUX_VERSION_CODE=0x40200" --event ./bpf-script-example.c sleep 1 Note that '--clang-opt' must put before '--event'. Futher patches will merge it into a testcase so can be tested automatically. Signed-off-by: Wang Nan <wangnan0@huawei.com> Acked-by: Alexei Starovoitov <ast@plumgrid.com> Cc: Brendan Gregg <brendan.d.gregg@gmail.com> Cc: Daniel Borkmann <daniel@iogearbox.net> Cc: David Ahern <dsahern@gmail.com> Cc: He Kuang <hekuang@huawei.com> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Kaixu Xia <xiakaixu@huawei.com> Cc: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Zefan Li <lizefan@huawei.com> Cc: pi3orama@163.com Link: http://lkml.kernel.org/r/1444826502-49291-10-git-send-email-wangnan0@huawei.com Signed-off-by: He Kuang <hekuang@huawei.com> Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2015-10-14 19:41:20 +07:00
#include "llvm-utils.h"
#define DEFINE_PRINT_FN(name, level) \
static int libbpf_##name(const char *fmt, ...) \
{ \
va_list args; \
int ret; \
\
va_start(args, fmt); \
ret = veprintf(level, verbose, pr_fmt(fmt), args);\
va_end(args); \
return ret; \
}
perf bpf: Mute libbpf when '-v' not set According to [1], libbpf should be muted. This patch reset info and warning message level to ensure libbpf doesn't output anything even if error happened. [1] http://lkml.kernel.org/r/20151020151255.GF5119@kernel.org Committer note: Before: Testing it with an incompatible kernel version in the .c file that generated foo.o: [root@zoo ~]# perf record -e /tmp/foo.o sleep 1 libbpf: load bpf program failed: Invalid argument libbpf: -- BEGIN DUMP LOG --- libbpf: libbpf: -- END LOG -- libbpf: failed to load program 'fork=_do_fork' libbpf: failed to load object '/tmp/foo.o' event syntax error: '/tmp/foo.o' \___ Invalid argument: Are you root and runing a CONFIG_BPF_SYSCALL kernel? (add -v to see detail) Run 'perf list' for a list of valid events Usage: perf record [<options>] [<command>] or: perf record [<options>] -- <command> [<options>] -e, --event <event> event selector. use 'perf list' to list available events [root@zoo ~]# After: [root@zoo ~]# perf record -e /tmp/foo.o sleep 1 event syntax error: '/tmp/foo.o' \___ Invalid argument: Are you root and runing a CONFIG_BPF_SYSCALL kernel? (add -v to see detail) Run 'perf list' for a list of valid events Usage: perf record [<options>] [<command>] or: perf record [<options>] -- <command> [<options>] -e, --event <event> event selector. use 'perf list' to list available events [root@zoo ~]# This, BTW, need fixing to emit a proper message by validating the version in the foo.o "version" ELF section against the running kernel, warning the user instead of asking the kernel to load a binary that it will refuse due to unmatching kernel version. Signed-off-by: Wang Nan <wangnan0@huawei.com> Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Zefan Li <lizefan@huawei.com> Cc: pi3orama@163.com Link: http://lkml.kernel.org/r/1446547486-229499-3-git-send-email-wangnan0@huawei.com Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2015-11-03 17:44:43 +07:00
DEFINE_PRINT_FN(warning, 1)
DEFINE_PRINT_FN(info, 1)
DEFINE_PRINT_FN(debug, 1)
perf tools: Create probe points for BPF programs This patch introduces bpf__{un,}probe() functions to enable callers to create kprobe points based on section names a BPF program. It parses the section names in the program and creates corresponding 'struct perf_probe_event' structures. The parse_perf_probe_command() function is used to do the main parsing work. The resuling 'struct perf_probe_event' is stored into program private data for further using. By utilizing the new probing API, this patch creates probe points during event parsing. To ensure probe points be removed correctly, register an atexit hook so even perf quit through exit() bpf__clear() is still called, so probing points are cleared. Note that bpf_clear() should be registered before bpf__probe() is called, so failure of bpf__probe() can still trigger bpf__clear() to remove probe points which are already probed. strerror style error reporting scaffold is created by this patch. bpf__strerror_probe() is the first error reporting function in bpf-loader.c. Committer note: Trying it: To build a test eBPF object file: I am testing using a script I built from the 'perf test -v LLVM' output: $ cat ~/bin/hello-ebpf export KERNEL_INC_OPTIONS="-nostdinc -isystem /usr/lib/gcc/x86_64-redhat-linux/4.8.3/include -I/home/acme/git/linux/arch/x86/include -Iarch/x86/include/generated/uapi -Iarch/x86/include/generated -I/home/acme/git/linux/include -Iinclude -I/home/acme/git/linux/arch/x86/include/uapi -Iarch/x86/include/generated/uapi -I/home/acme/git/linux/include/uapi -Iinclude/generated/uapi -include /home/acme/git/linux/include/linux/kconfig.h" export WORKING_DIR=/lib/modules/4.2.0/build export CLANG_SOURCE=- export CLANG_OPTIONS=-xc OBJ=/tmp/foo.o rm -f $OBJ echo '__attribute__((section("fork=do_fork"), used)) int fork(void *ctx) {return 0;} char _license[] __attribute__((section("license"), used)) = "GPL";int _version __attribute__((section("version"), used)) = 0x40100;' | \ clang -D__KERNEL__ $CLANG_OPTIONS $KERNEL_INC_OPTIONS -Wno-unused-value -Wno-pointer-sign -working-directory $WORKING_DIR -c "$CLANG_SOURCE" -target bpf -O2 -o /tmp/foo.o && file $OBJ --- First asking to put a probe in a function not present in the kernel (misses the initial _): $ perf record --event /tmp/foo.o sleep 1 Probe point 'do_fork' not found. event syntax error: '/tmp/foo.o' \___ You need to check probing points in BPF file (add -v to see detail) Run 'perf list' for a list of valid events Usage: perf record [<options>] [<command>] or: perf record [<options>] -- <command> [<options>] -e, --event <event> event selector. use 'perf list' to list available events $ --- Now, with "__attribute__((section("fork=_do_fork"), used)): $ grep _do_fork /proc/kallsyms ffffffff81099ab0 T _do_fork $ perf record --event /tmp/foo.o sleep 1 Failed to open kprobe_events: Permission denied event syntax error: '/tmp/foo.o' \___ Permission denied --- Cool, we need to provide some better hints, "kprobe_events" is too low level, one doesn't strictly need to know the precise details of how these things are put in place, so something that shows the command needed to fix the permissions would be more helpful. Lets try as root instead: # perf record --event /tmp/foo.o sleep 1 Lowering default frequency rate to 1000. Please consider tweaking /proc/sys/kernel/perf_event_max_sample_rate. [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.013 MB perf.data ] # perf evlist /tmp/foo.o [root@felicio ~]# perf evlist -v /tmp/foo.o: type: 1, size: 112, config: 0x9, { sample_period, sample_freq }: 1000, sample_type: IP|TID|TIME|PERIOD, disabled: 1, inherit: 1, mmap: 1, comm: 1, freq: 1, enable_on_exec: 1, task: 1, sample_id_all: 1, exclude_guest: 1, mmap2: 1, comm_exec: 1 --- Signed-off-by: Wang Nan <wangnan0@huawei.com> Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Alexei Starovoitov <ast@plumgrid.com> Cc: Brendan Gregg <brendan.d.gregg@gmail.com> Cc: Daniel Borkmann <daniel@iogearbox.net> Cc: David Ahern <dsahern@gmail.com> Cc: He Kuang <hekuang@huawei.com> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Kaixu Xia <xiakaixu@huawei.com> Cc: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Zefan Li <lizefan@huawei.com> Cc: pi3orama@163.com Link: http://lkml.kernel.org/r/1444826502-49291-5-git-send-email-wangnan0@huawei.com Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2015-10-14 19:41:15 +07:00
struct bpf_prog_priv {
struct perf_probe_event pev;
bool need_prologue;
struct bpf_insn *insns_buf;
perf bpf: Use same BPF program if arguments are identical This patch allows creating only one BPF program for different 'probe_trace_event'(tev) entries generated by one 'perf_probe_event'(pev) if their prologues are identical. This is done by comparing the argument list of different tev instances, and the maps type of prologue and tev using a mapping array. This patch utilizes qsort to sort the tevs. After sorting, tevs with identical argument lists will be grouped together. Test result: Sample BPF program: #define SEC(NAME) __attribute__((section(NAME), used)) SEC("inlines=no;" "func=SyS_dup? oldfd") int func(void *ctx) { return 1; } It would probe at SyS_dup2 and SyS_dup3, obtaining oldfd as its argument. The following cmdline shows a BPF program being loaded into the kernel by perf: # perf record -e ./test_bpf_arg.c sleep 4 & sleep 1 && ls /proc/$!/fd/ -l | grep bpf-prog Before this patch: # perf record -e ./test_bpf_arg.c sleep 4 & sleep 1 && ls /proc/$!/fd/ -l | grep bpf-prog [1] 24858 lrwx------ 1 root root 64 Nov 14 04:09 3 -> anon_inode:bpf-prog lrwx------ 1 root root 64 Nov 14 04:09 4 -> anon_inode:bpf-prog ... After this patch: # perf record -e ./test_bpf_arg.c sleep 4 & sleep 1 && ls /proc/$!/fd/ -l | grep bpf-prog [1] 25699 lrwx------ 1 root root 64 Nov 14 04:10 3 -> anon_inode:bpf-prog ... Signed-off-by: Wang Nan <wangnan0@huawei.com> Cc: Alexei Starovoitov <ast@kernel.org> Cc: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com> Cc: Zefan Li <lizefan@huawei.com> Cc: pi3orama@163.com Link: http://lkml.kernel.org/r/1447749170-175898-3-git-send-email-wangnan0@huawei.com Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2015-11-17 15:32:47 +07:00
int nr_types;
int *type_mapping;
perf tools: Create probe points for BPF programs This patch introduces bpf__{un,}probe() functions to enable callers to create kprobe points based on section names a BPF program. It parses the section names in the program and creates corresponding 'struct perf_probe_event' structures. The parse_perf_probe_command() function is used to do the main parsing work. The resuling 'struct perf_probe_event' is stored into program private data for further using. By utilizing the new probing API, this patch creates probe points during event parsing. To ensure probe points be removed correctly, register an atexit hook so even perf quit through exit() bpf__clear() is still called, so probing points are cleared. Note that bpf_clear() should be registered before bpf__probe() is called, so failure of bpf__probe() can still trigger bpf__clear() to remove probe points which are already probed. strerror style error reporting scaffold is created by this patch. bpf__strerror_probe() is the first error reporting function in bpf-loader.c. Committer note: Trying it: To build a test eBPF object file: I am testing using a script I built from the 'perf test -v LLVM' output: $ cat ~/bin/hello-ebpf export KERNEL_INC_OPTIONS="-nostdinc -isystem /usr/lib/gcc/x86_64-redhat-linux/4.8.3/include -I/home/acme/git/linux/arch/x86/include -Iarch/x86/include/generated/uapi -Iarch/x86/include/generated -I/home/acme/git/linux/include -Iinclude -I/home/acme/git/linux/arch/x86/include/uapi -Iarch/x86/include/generated/uapi -I/home/acme/git/linux/include/uapi -Iinclude/generated/uapi -include /home/acme/git/linux/include/linux/kconfig.h" export WORKING_DIR=/lib/modules/4.2.0/build export CLANG_SOURCE=- export CLANG_OPTIONS=-xc OBJ=/tmp/foo.o rm -f $OBJ echo '__attribute__((section("fork=do_fork"), used)) int fork(void *ctx) {return 0;} char _license[] __attribute__((section("license"), used)) = "GPL";int _version __attribute__((section("version"), used)) = 0x40100;' | \ clang -D__KERNEL__ $CLANG_OPTIONS $KERNEL_INC_OPTIONS -Wno-unused-value -Wno-pointer-sign -working-directory $WORKING_DIR -c "$CLANG_SOURCE" -target bpf -O2 -o /tmp/foo.o && file $OBJ --- First asking to put a probe in a function not present in the kernel (misses the initial _): $ perf record --event /tmp/foo.o sleep 1 Probe point 'do_fork' not found. event syntax error: '/tmp/foo.o' \___ You need to check probing points in BPF file (add -v to see detail) Run 'perf list' for a list of valid events Usage: perf record [<options>] [<command>] or: perf record [<options>] -- <command> [<options>] -e, --event <event> event selector. use 'perf list' to list available events $ --- Now, with "__attribute__((section("fork=_do_fork"), used)): $ grep _do_fork /proc/kallsyms ffffffff81099ab0 T _do_fork $ perf record --event /tmp/foo.o sleep 1 Failed to open kprobe_events: Permission denied event syntax error: '/tmp/foo.o' \___ Permission denied --- Cool, we need to provide some better hints, "kprobe_events" is too low level, one doesn't strictly need to know the precise details of how these things are put in place, so something that shows the command needed to fix the permissions would be more helpful. Lets try as root instead: # perf record --event /tmp/foo.o sleep 1 Lowering default frequency rate to 1000. Please consider tweaking /proc/sys/kernel/perf_event_max_sample_rate. [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.013 MB perf.data ] # perf evlist /tmp/foo.o [root@felicio ~]# perf evlist -v /tmp/foo.o: type: 1, size: 112, config: 0x9, { sample_period, sample_freq }: 1000, sample_type: IP|TID|TIME|PERIOD, disabled: 1, inherit: 1, mmap: 1, comm: 1, freq: 1, enable_on_exec: 1, task: 1, sample_id_all: 1, exclude_guest: 1, mmap2: 1, comm_exec: 1 --- Signed-off-by: Wang Nan <wangnan0@huawei.com> Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Alexei Starovoitov <ast@plumgrid.com> Cc: Brendan Gregg <brendan.d.gregg@gmail.com> Cc: Daniel Borkmann <daniel@iogearbox.net> Cc: David Ahern <dsahern@gmail.com> Cc: He Kuang <hekuang@huawei.com> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Kaixu Xia <xiakaixu@huawei.com> Cc: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Zefan Li <lizefan@huawei.com> Cc: pi3orama@163.com Link: http://lkml.kernel.org/r/1444826502-49291-5-git-send-email-wangnan0@huawei.com Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2015-10-14 19:41:15 +07:00
};
static bool libbpf_initialized;
struct bpf_object *
bpf__prepare_load_buffer(void *obj_buf, size_t obj_buf_sz, const char *name)
{
struct bpf_object *obj;
if (!libbpf_initialized) {
libbpf_set_print(libbpf_warning,
libbpf_info,
libbpf_debug);
libbpf_initialized = true;
}
obj = bpf_object__open_buffer(obj_buf, obj_buf_sz, name);
if (IS_ERR(obj)) {
pr_debug("bpf: failed to load buffer\n");
return ERR_PTR(-EINVAL);
}
return obj;
}
perf tools: Compile scriptlets to BPF objects when passing '.c' to --event This patch provides infrastructure for passing source files to --event directly using: # perf record --event bpf-file.c command This patch does following works: 1) Allow passing '.c' file to '--event'. parse_events_load_bpf() is expanded to allow caller tell it whether the passed file is source file or object. 2) llvm__compile_bpf() is called to compile the '.c' file, the result is saved into memory. Use bpf_object__open_buffer() to load the in-memory object. Introduces a bpf-script-example.c so we can manually test it: # perf record --clang-opt "-DLINUX_VERSION_CODE=0x40200" --event ./bpf-script-example.c sleep 1 Note that '--clang-opt' must put before '--event'. Futher patches will merge it into a testcase so can be tested automatically. Signed-off-by: Wang Nan <wangnan0@huawei.com> Acked-by: Alexei Starovoitov <ast@plumgrid.com> Cc: Brendan Gregg <brendan.d.gregg@gmail.com> Cc: Daniel Borkmann <daniel@iogearbox.net> Cc: David Ahern <dsahern@gmail.com> Cc: He Kuang <hekuang@huawei.com> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Kaixu Xia <xiakaixu@huawei.com> Cc: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Zefan Li <lizefan@huawei.com> Cc: pi3orama@163.com Link: http://lkml.kernel.org/r/1444826502-49291-10-git-send-email-wangnan0@huawei.com Signed-off-by: He Kuang <hekuang@huawei.com> Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2015-10-14 19:41:20 +07:00
struct bpf_object *bpf__prepare_load(const char *filename, bool source)
{
struct bpf_object *obj;
if (!libbpf_initialized) {
libbpf_set_print(libbpf_warning,
libbpf_info,
libbpf_debug);
libbpf_initialized = true;
}
perf tools: Compile scriptlets to BPF objects when passing '.c' to --event This patch provides infrastructure for passing source files to --event directly using: # perf record --event bpf-file.c command This patch does following works: 1) Allow passing '.c' file to '--event'. parse_events_load_bpf() is expanded to allow caller tell it whether the passed file is source file or object. 2) llvm__compile_bpf() is called to compile the '.c' file, the result is saved into memory. Use bpf_object__open_buffer() to load the in-memory object. Introduces a bpf-script-example.c so we can manually test it: # perf record --clang-opt "-DLINUX_VERSION_CODE=0x40200" --event ./bpf-script-example.c sleep 1 Note that '--clang-opt' must put before '--event'. Futher patches will merge it into a testcase so can be tested automatically. Signed-off-by: Wang Nan <wangnan0@huawei.com> Acked-by: Alexei Starovoitov <ast@plumgrid.com> Cc: Brendan Gregg <brendan.d.gregg@gmail.com> Cc: Daniel Borkmann <daniel@iogearbox.net> Cc: David Ahern <dsahern@gmail.com> Cc: He Kuang <hekuang@huawei.com> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Kaixu Xia <xiakaixu@huawei.com> Cc: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Zefan Li <lizefan@huawei.com> Cc: pi3orama@163.com Link: http://lkml.kernel.org/r/1444826502-49291-10-git-send-email-wangnan0@huawei.com Signed-off-by: He Kuang <hekuang@huawei.com> Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2015-10-14 19:41:20 +07:00
if (source) {
int err;
void *obj_buf;
size_t obj_buf_sz;
err = llvm__compile_bpf(filename, &obj_buf, &obj_buf_sz);
if (err)
perf bpf: Improve BPF related error messages A series of bpf loader related error codes were introduced to help error reporting. Functions were improved to return these new error codes. Functions which return pointers were adjusted to encode error codes into return value using the ERR_PTR() interface. bpf_loader_strerror() was improved to convert these error messages to strings. It checks the error codes and calls libbpf_strerror() and strerror_r() accordingly, so caller don't need to consider checking the range of the error code. In bpf__strerror_load(), print kernel version of running kernel and the object's 'version' section to notify user how to fix his/her program. v1 -> v2: Use macro for error code. Fetch error message based on array index, eliminate for-loop. Print version strings. Before: # perf record -e ./test_kversion_nomatch_program.o sleep 1 event syntax error: './test_kversion_nomatch_program.o' \___ Failed to load program: Validate your program and check 'license'/'version' sections in your object SKIP After: # perf record -e ./test_kversion_nomatch_program.o ls event syntax error: './test_kversion_nomatch_program.o' \___ 'version' (4.4.0) doesn't match running kernel (4.3.0) SKIP Signed-off-by: Wang Nan <wangnan0@huawei.com> Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Zefan Li <lizefan@huawei.com> Cc: pi3orama@163.com Link: http://lkml.kernel.org/r/1446818289-87444-1-git-send-email-wangnan0@huawei.com [ Add 'static inline' to bpf__strerror_prepare_load() when LIBBPF is disabled ] Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2015-11-06 20:58:09 +07:00
return ERR_PTR(-BPF_LOADER_ERRNO__COMPILE);
perf tools: Compile scriptlets to BPF objects when passing '.c' to --event This patch provides infrastructure for passing source files to --event directly using: # perf record --event bpf-file.c command This patch does following works: 1) Allow passing '.c' file to '--event'. parse_events_load_bpf() is expanded to allow caller tell it whether the passed file is source file or object. 2) llvm__compile_bpf() is called to compile the '.c' file, the result is saved into memory. Use bpf_object__open_buffer() to load the in-memory object. Introduces a bpf-script-example.c so we can manually test it: # perf record --clang-opt "-DLINUX_VERSION_CODE=0x40200" --event ./bpf-script-example.c sleep 1 Note that '--clang-opt' must put before '--event'. Futher patches will merge it into a testcase so can be tested automatically. Signed-off-by: Wang Nan <wangnan0@huawei.com> Acked-by: Alexei Starovoitov <ast@plumgrid.com> Cc: Brendan Gregg <brendan.d.gregg@gmail.com> Cc: Daniel Borkmann <daniel@iogearbox.net> Cc: David Ahern <dsahern@gmail.com> Cc: He Kuang <hekuang@huawei.com> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Kaixu Xia <xiakaixu@huawei.com> Cc: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Zefan Li <lizefan@huawei.com> Cc: pi3orama@163.com Link: http://lkml.kernel.org/r/1444826502-49291-10-git-send-email-wangnan0@huawei.com Signed-off-by: He Kuang <hekuang@huawei.com> Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2015-10-14 19:41:20 +07:00
obj = bpf_object__open_buffer(obj_buf, obj_buf_sz, filename);
free(obj_buf);
} else
obj = bpf_object__open(filename);
bpf tools: Improve libbpf error reporting In this patch, a series of libbpf specific error numbers and libbpf_strerror() are introduced to help reporting errors. Functions are updated to pass correct the error number through the CHECK_ERR() macro. All users of bpf_object__open{_buffer}() and bpf_program__title() in perf are modified accordingly. In addition, due to the error codes changing, bpf__strerror_load() is also modified to use them. bpf__strerror_head() is also changed accordingly so it can parse libbpf errors. bpf_loader_strerror() is introduced for that purpose, and will be improved by the following patch. load_program() is improved not to dump log buffer if it is empty. log buffer is also used to deduce whether the error was caused by an invalid program or other problem. v1 -> v2: - Using macro for error code. - Fetch error message based on array index, eliminate for-loop. - Use log buffer to detect the reason of failure. 3 new error code are introduced to replace LIBBPF_ERRNO__LOAD. In v1: # perf record -e ./test_ill_program.o ls event syntax error: './test_ill_program.o' \___ Failed to load program: Validate your program and check 'license'/'version' sections in your object SKIP # perf record -e ./test_kversion_nomatch_program.o ls event syntax error: './test_kversion_nomatch_program.o' \___ Failed to load program: Validate your program and check 'license'/'version' sections in your object SKIP # perf record -e ./test_big_program.o ls event syntax error: './test_big_program.o' \___ Failed to load program: Validate your program and check 'license'/'version' sections in your object SKIP In v2: # perf record -e ./test_ill_program.o ls event syntax error: './test_ill_program.o' \___ Kernel verifier blocks program loading SKIP # perf record -e ./test_kversion_nomatch_program.o event syntax error: './test_kversion_nomatch_program.o' \___ Incorrect kernel version SKIP (Will be further improved by following patches) # perf record -e ./test_big_program.o event syntax error: './test_big_program.o' \___ Program too big SKIP Signed-off-by: Wang Nan <wangnan0@huawei.com> Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Zefan Li <lizefan@huawei.com> Cc: pi3orama@163.com Link: http://lkml.kernel.org/r/1446817783-86722-2-git-send-email-wangnan0@huawei.com Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2015-11-06 20:49:37 +07:00
if (IS_ERR(obj)) {
pr_debug("bpf: failed to load %s\n", filename);
bpf tools: Improve libbpf error reporting In this patch, a series of libbpf specific error numbers and libbpf_strerror() are introduced to help reporting errors. Functions are updated to pass correct the error number through the CHECK_ERR() macro. All users of bpf_object__open{_buffer}() and bpf_program__title() in perf are modified accordingly. In addition, due to the error codes changing, bpf__strerror_load() is also modified to use them. bpf__strerror_head() is also changed accordingly so it can parse libbpf errors. bpf_loader_strerror() is introduced for that purpose, and will be improved by the following patch. load_program() is improved not to dump log buffer if it is empty. log buffer is also used to deduce whether the error was caused by an invalid program or other problem. v1 -> v2: - Using macro for error code. - Fetch error message based on array index, eliminate for-loop. - Use log buffer to detect the reason of failure. 3 new error code are introduced to replace LIBBPF_ERRNO__LOAD. In v1: # perf record -e ./test_ill_program.o ls event syntax error: './test_ill_program.o' \___ Failed to load program: Validate your program and check 'license'/'version' sections in your object SKIP # perf record -e ./test_kversion_nomatch_program.o ls event syntax error: './test_kversion_nomatch_program.o' \___ Failed to load program: Validate your program and check 'license'/'version' sections in your object SKIP # perf record -e ./test_big_program.o ls event syntax error: './test_big_program.o' \___ Failed to load program: Validate your program and check 'license'/'version' sections in your object SKIP In v2: # perf record -e ./test_ill_program.o ls event syntax error: './test_ill_program.o' \___ Kernel verifier blocks program loading SKIP # perf record -e ./test_kversion_nomatch_program.o event syntax error: './test_kversion_nomatch_program.o' \___ Incorrect kernel version SKIP (Will be further improved by following patches) # perf record -e ./test_big_program.o event syntax error: './test_big_program.o' \___ Program too big SKIP Signed-off-by: Wang Nan <wangnan0@huawei.com> Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Zefan Li <lizefan@huawei.com> Cc: pi3orama@163.com Link: http://lkml.kernel.org/r/1446817783-86722-2-git-send-email-wangnan0@huawei.com Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2015-11-06 20:49:37 +07:00
return obj;
}
return obj;
}
void bpf__clear(void)
{
struct bpf_object *obj, *tmp;
perf tools: Create probe points for BPF programs This patch introduces bpf__{un,}probe() functions to enable callers to create kprobe points based on section names a BPF program. It parses the section names in the program and creates corresponding 'struct perf_probe_event' structures. The parse_perf_probe_command() function is used to do the main parsing work. The resuling 'struct perf_probe_event' is stored into program private data for further using. By utilizing the new probing API, this patch creates probe points during event parsing. To ensure probe points be removed correctly, register an atexit hook so even perf quit through exit() bpf__clear() is still called, so probing points are cleared. Note that bpf_clear() should be registered before bpf__probe() is called, so failure of bpf__probe() can still trigger bpf__clear() to remove probe points which are already probed. strerror style error reporting scaffold is created by this patch. bpf__strerror_probe() is the first error reporting function in bpf-loader.c. Committer note: Trying it: To build a test eBPF object file: I am testing using a script I built from the 'perf test -v LLVM' output: $ cat ~/bin/hello-ebpf export KERNEL_INC_OPTIONS="-nostdinc -isystem /usr/lib/gcc/x86_64-redhat-linux/4.8.3/include -I/home/acme/git/linux/arch/x86/include -Iarch/x86/include/generated/uapi -Iarch/x86/include/generated -I/home/acme/git/linux/include -Iinclude -I/home/acme/git/linux/arch/x86/include/uapi -Iarch/x86/include/generated/uapi -I/home/acme/git/linux/include/uapi -Iinclude/generated/uapi -include /home/acme/git/linux/include/linux/kconfig.h" export WORKING_DIR=/lib/modules/4.2.0/build export CLANG_SOURCE=- export CLANG_OPTIONS=-xc OBJ=/tmp/foo.o rm -f $OBJ echo '__attribute__((section("fork=do_fork"), used)) int fork(void *ctx) {return 0;} char _license[] __attribute__((section("license"), used)) = "GPL";int _version __attribute__((section("version"), used)) = 0x40100;' | \ clang -D__KERNEL__ $CLANG_OPTIONS $KERNEL_INC_OPTIONS -Wno-unused-value -Wno-pointer-sign -working-directory $WORKING_DIR -c "$CLANG_SOURCE" -target bpf -O2 -o /tmp/foo.o && file $OBJ --- First asking to put a probe in a function not present in the kernel (misses the initial _): $ perf record --event /tmp/foo.o sleep 1 Probe point 'do_fork' not found. event syntax error: '/tmp/foo.o' \___ You need to check probing points in BPF file (add -v to see detail) Run 'perf list' for a list of valid events Usage: perf record [<options>] [<command>] or: perf record [<options>] -- <command> [<options>] -e, --event <event> event selector. use 'perf list' to list available events $ --- Now, with "__attribute__((section("fork=_do_fork"), used)): $ grep _do_fork /proc/kallsyms ffffffff81099ab0 T _do_fork $ perf record --event /tmp/foo.o sleep 1 Failed to open kprobe_events: Permission denied event syntax error: '/tmp/foo.o' \___ Permission denied --- Cool, we need to provide some better hints, "kprobe_events" is too low level, one doesn't strictly need to know the precise details of how these things are put in place, so something that shows the command needed to fix the permissions would be more helpful. Lets try as root instead: # perf record --event /tmp/foo.o sleep 1 Lowering default frequency rate to 1000. Please consider tweaking /proc/sys/kernel/perf_event_max_sample_rate. [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.013 MB perf.data ] # perf evlist /tmp/foo.o [root@felicio ~]# perf evlist -v /tmp/foo.o: type: 1, size: 112, config: 0x9, { sample_period, sample_freq }: 1000, sample_type: IP|TID|TIME|PERIOD, disabled: 1, inherit: 1, mmap: 1, comm: 1, freq: 1, enable_on_exec: 1, task: 1, sample_id_all: 1, exclude_guest: 1, mmap2: 1, comm_exec: 1 --- Signed-off-by: Wang Nan <wangnan0@huawei.com> Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Alexei Starovoitov <ast@plumgrid.com> Cc: Brendan Gregg <brendan.d.gregg@gmail.com> Cc: Daniel Borkmann <daniel@iogearbox.net> Cc: David Ahern <dsahern@gmail.com> Cc: He Kuang <hekuang@huawei.com> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Kaixu Xia <xiakaixu@huawei.com> Cc: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Zefan Li <lizefan@huawei.com> Cc: pi3orama@163.com Link: http://lkml.kernel.org/r/1444826502-49291-5-git-send-email-wangnan0@huawei.com Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2015-10-14 19:41:15 +07:00
bpf_object__for_each_safe(obj, tmp) {
bpf__unprobe(obj);
bpf_object__close(obj);
perf tools: Create probe points for BPF programs This patch introduces bpf__{un,}probe() functions to enable callers to create kprobe points based on section names a BPF program. It parses the section names in the program and creates corresponding 'struct perf_probe_event' structures. The parse_perf_probe_command() function is used to do the main parsing work. The resuling 'struct perf_probe_event' is stored into program private data for further using. By utilizing the new probing API, this patch creates probe points during event parsing. To ensure probe points be removed correctly, register an atexit hook so even perf quit through exit() bpf__clear() is still called, so probing points are cleared. Note that bpf_clear() should be registered before bpf__probe() is called, so failure of bpf__probe() can still trigger bpf__clear() to remove probe points which are already probed. strerror style error reporting scaffold is created by this patch. bpf__strerror_probe() is the first error reporting function in bpf-loader.c. Committer note: Trying it: To build a test eBPF object file: I am testing using a script I built from the 'perf test -v LLVM' output: $ cat ~/bin/hello-ebpf export KERNEL_INC_OPTIONS="-nostdinc -isystem /usr/lib/gcc/x86_64-redhat-linux/4.8.3/include -I/home/acme/git/linux/arch/x86/include -Iarch/x86/include/generated/uapi -Iarch/x86/include/generated -I/home/acme/git/linux/include -Iinclude -I/home/acme/git/linux/arch/x86/include/uapi -Iarch/x86/include/generated/uapi -I/home/acme/git/linux/include/uapi -Iinclude/generated/uapi -include /home/acme/git/linux/include/linux/kconfig.h" export WORKING_DIR=/lib/modules/4.2.0/build export CLANG_SOURCE=- export CLANG_OPTIONS=-xc OBJ=/tmp/foo.o rm -f $OBJ echo '__attribute__((section("fork=do_fork"), used)) int fork(void *ctx) {return 0;} char _license[] __attribute__((section("license"), used)) = "GPL";int _version __attribute__((section("version"), used)) = 0x40100;' | \ clang -D__KERNEL__ $CLANG_OPTIONS $KERNEL_INC_OPTIONS -Wno-unused-value -Wno-pointer-sign -working-directory $WORKING_DIR -c "$CLANG_SOURCE" -target bpf -O2 -o /tmp/foo.o && file $OBJ --- First asking to put a probe in a function not present in the kernel (misses the initial _): $ perf record --event /tmp/foo.o sleep 1 Probe point 'do_fork' not found. event syntax error: '/tmp/foo.o' \___ You need to check probing points in BPF file (add -v to see detail) Run 'perf list' for a list of valid events Usage: perf record [<options>] [<command>] or: perf record [<options>] -- <command> [<options>] -e, --event <event> event selector. use 'perf list' to list available events $ --- Now, with "__attribute__((section("fork=_do_fork"), used)): $ grep _do_fork /proc/kallsyms ffffffff81099ab0 T _do_fork $ perf record --event /tmp/foo.o sleep 1 Failed to open kprobe_events: Permission denied event syntax error: '/tmp/foo.o' \___ Permission denied --- Cool, we need to provide some better hints, "kprobe_events" is too low level, one doesn't strictly need to know the precise details of how these things are put in place, so something that shows the command needed to fix the permissions would be more helpful. Lets try as root instead: # perf record --event /tmp/foo.o sleep 1 Lowering default frequency rate to 1000. Please consider tweaking /proc/sys/kernel/perf_event_max_sample_rate. [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.013 MB perf.data ] # perf evlist /tmp/foo.o [root@felicio ~]# perf evlist -v /tmp/foo.o: type: 1, size: 112, config: 0x9, { sample_period, sample_freq }: 1000, sample_type: IP|TID|TIME|PERIOD, disabled: 1, inherit: 1, mmap: 1, comm: 1, freq: 1, enable_on_exec: 1, task: 1, sample_id_all: 1, exclude_guest: 1, mmap2: 1, comm_exec: 1 --- Signed-off-by: Wang Nan <wangnan0@huawei.com> Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Alexei Starovoitov <ast@plumgrid.com> Cc: Brendan Gregg <brendan.d.gregg@gmail.com> Cc: Daniel Borkmann <daniel@iogearbox.net> Cc: David Ahern <dsahern@gmail.com> Cc: He Kuang <hekuang@huawei.com> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Kaixu Xia <xiakaixu@huawei.com> Cc: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Zefan Li <lizefan@huawei.com> Cc: pi3orama@163.com Link: http://lkml.kernel.org/r/1444826502-49291-5-git-send-email-wangnan0@huawei.com Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2015-10-14 19:41:15 +07:00
}
}
static void
clear_prog_priv(struct bpf_program *prog __maybe_unused,
void *_priv)
perf tools: Create probe points for BPF programs This patch introduces bpf__{un,}probe() functions to enable callers to create kprobe points based on section names a BPF program. It parses the section names in the program and creates corresponding 'struct perf_probe_event' structures. The parse_perf_probe_command() function is used to do the main parsing work. The resuling 'struct perf_probe_event' is stored into program private data for further using. By utilizing the new probing API, this patch creates probe points during event parsing. To ensure probe points be removed correctly, register an atexit hook so even perf quit through exit() bpf__clear() is still called, so probing points are cleared. Note that bpf_clear() should be registered before bpf__probe() is called, so failure of bpf__probe() can still trigger bpf__clear() to remove probe points which are already probed. strerror style error reporting scaffold is created by this patch. bpf__strerror_probe() is the first error reporting function in bpf-loader.c. Committer note: Trying it: To build a test eBPF object file: I am testing using a script I built from the 'perf test -v LLVM' output: $ cat ~/bin/hello-ebpf export KERNEL_INC_OPTIONS="-nostdinc -isystem /usr/lib/gcc/x86_64-redhat-linux/4.8.3/include -I/home/acme/git/linux/arch/x86/include -Iarch/x86/include/generated/uapi -Iarch/x86/include/generated -I/home/acme/git/linux/include -Iinclude -I/home/acme/git/linux/arch/x86/include/uapi -Iarch/x86/include/generated/uapi -I/home/acme/git/linux/include/uapi -Iinclude/generated/uapi -include /home/acme/git/linux/include/linux/kconfig.h" export WORKING_DIR=/lib/modules/4.2.0/build export CLANG_SOURCE=- export CLANG_OPTIONS=-xc OBJ=/tmp/foo.o rm -f $OBJ echo '__attribute__((section("fork=do_fork"), used)) int fork(void *ctx) {return 0;} char _license[] __attribute__((section("license"), used)) = "GPL";int _version __attribute__((section("version"), used)) = 0x40100;' | \ clang -D__KERNEL__ $CLANG_OPTIONS $KERNEL_INC_OPTIONS -Wno-unused-value -Wno-pointer-sign -working-directory $WORKING_DIR -c "$CLANG_SOURCE" -target bpf -O2 -o /tmp/foo.o && file $OBJ --- First asking to put a probe in a function not present in the kernel (misses the initial _): $ perf record --event /tmp/foo.o sleep 1 Probe point 'do_fork' not found. event syntax error: '/tmp/foo.o' \___ You need to check probing points in BPF file (add -v to see detail) Run 'perf list' for a list of valid events Usage: perf record [<options>] [<command>] or: perf record [<options>] -- <command> [<options>] -e, --event <event> event selector. use 'perf list' to list available events $ --- Now, with "__attribute__((section("fork=_do_fork"), used)): $ grep _do_fork /proc/kallsyms ffffffff81099ab0 T _do_fork $ perf record --event /tmp/foo.o sleep 1 Failed to open kprobe_events: Permission denied event syntax error: '/tmp/foo.o' \___ Permission denied --- Cool, we need to provide some better hints, "kprobe_events" is too low level, one doesn't strictly need to know the precise details of how these things are put in place, so something that shows the command needed to fix the permissions would be more helpful. Lets try as root instead: # perf record --event /tmp/foo.o sleep 1 Lowering default frequency rate to 1000. Please consider tweaking /proc/sys/kernel/perf_event_max_sample_rate. [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.013 MB perf.data ] # perf evlist /tmp/foo.o [root@felicio ~]# perf evlist -v /tmp/foo.o: type: 1, size: 112, config: 0x9, { sample_period, sample_freq }: 1000, sample_type: IP|TID|TIME|PERIOD, disabled: 1, inherit: 1, mmap: 1, comm: 1, freq: 1, enable_on_exec: 1, task: 1, sample_id_all: 1, exclude_guest: 1, mmap2: 1, comm_exec: 1 --- Signed-off-by: Wang Nan <wangnan0@huawei.com> Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Alexei Starovoitov <ast@plumgrid.com> Cc: Brendan Gregg <brendan.d.gregg@gmail.com> Cc: Daniel Borkmann <daniel@iogearbox.net> Cc: David Ahern <dsahern@gmail.com> Cc: He Kuang <hekuang@huawei.com> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Kaixu Xia <xiakaixu@huawei.com> Cc: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Zefan Li <lizefan@huawei.com> Cc: pi3orama@163.com Link: http://lkml.kernel.org/r/1444826502-49291-5-git-send-email-wangnan0@huawei.com Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2015-10-14 19:41:15 +07:00
{
struct bpf_prog_priv *priv = _priv;
cleanup_perf_probe_events(&priv->pev, 1);
zfree(&priv->insns_buf);
perf bpf: Use same BPF program if arguments are identical This patch allows creating only one BPF program for different 'probe_trace_event'(tev) entries generated by one 'perf_probe_event'(pev) if their prologues are identical. This is done by comparing the argument list of different tev instances, and the maps type of prologue and tev using a mapping array. This patch utilizes qsort to sort the tevs. After sorting, tevs with identical argument lists will be grouped together. Test result: Sample BPF program: #define SEC(NAME) __attribute__((section(NAME), used)) SEC("inlines=no;" "func=SyS_dup? oldfd") int func(void *ctx) { return 1; } It would probe at SyS_dup2 and SyS_dup3, obtaining oldfd as its argument. The following cmdline shows a BPF program being loaded into the kernel by perf: # perf record -e ./test_bpf_arg.c sleep 4 & sleep 1 && ls /proc/$!/fd/ -l | grep bpf-prog Before this patch: # perf record -e ./test_bpf_arg.c sleep 4 & sleep 1 && ls /proc/$!/fd/ -l | grep bpf-prog [1] 24858 lrwx------ 1 root root 64 Nov 14 04:09 3 -> anon_inode:bpf-prog lrwx------ 1 root root 64 Nov 14 04:09 4 -> anon_inode:bpf-prog ... After this patch: # perf record -e ./test_bpf_arg.c sleep 4 & sleep 1 && ls /proc/$!/fd/ -l | grep bpf-prog [1] 25699 lrwx------ 1 root root 64 Nov 14 04:10 3 -> anon_inode:bpf-prog ... Signed-off-by: Wang Nan <wangnan0@huawei.com> Cc: Alexei Starovoitov <ast@kernel.org> Cc: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com> Cc: Zefan Li <lizefan@huawei.com> Cc: pi3orama@163.com Link: http://lkml.kernel.org/r/1447749170-175898-3-git-send-email-wangnan0@huawei.com Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2015-11-17 15:32:47 +07:00
zfree(&priv->type_mapping);
perf tools: Create probe points for BPF programs This patch introduces bpf__{un,}probe() functions to enable callers to create kprobe points based on section names a BPF program. It parses the section names in the program and creates corresponding 'struct perf_probe_event' structures. The parse_perf_probe_command() function is used to do the main parsing work. The resuling 'struct perf_probe_event' is stored into program private data for further using. By utilizing the new probing API, this patch creates probe points during event parsing. To ensure probe points be removed correctly, register an atexit hook so even perf quit through exit() bpf__clear() is still called, so probing points are cleared. Note that bpf_clear() should be registered before bpf__probe() is called, so failure of bpf__probe() can still trigger bpf__clear() to remove probe points which are already probed. strerror style error reporting scaffold is created by this patch. bpf__strerror_probe() is the first error reporting function in bpf-loader.c. Committer note: Trying it: To build a test eBPF object file: I am testing using a script I built from the 'perf test -v LLVM' output: $ cat ~/bin/hello-ebpf export KERNEL_INC_OPTIONS="-nostdinc -isystem /usr/lib/gcc/x86_64-redhat-linux/4.8.3/include -I/home/acme/git/linux/arch/x86/include -Iarch/x86/include/generated/uapi -Iarch/x86/include/generated -I/home/acme/git/linux/include -Iinclude -I/home/acme/git/linux/arch/x86/include/uapi -Iarch/x86/include/generated/uapi -I/home/acme/git/linux/include/uapi -Iinclude/generated/uapi -include /home/acme/git/linux/include/linux/kconfig.h" export WORKING_DIR=/lib/modules/4.2.0/build export CLANG_SOURCE=- export CLANG_OPTIONS=-xc OBJ=/tmp/foo.o rm -f $OBJ echo '__attribute__((section("fork=do_fork"), used)) int fork(void *ctx) {return 0;} char _license[] __attribute__((section("license"), used)) = "GPL";int _version __attribute__((section("version"), used)) = 0x40100;' | \ clang -D__KERNEL__ $CLANG_OPTIONS $KERNEL_INC_OPTIONS -Wno-unused-value -Wno-pointer-sign -working-directory $WORKING_DIR -c "$CLANG_SOURCE" -target bpf -O2 -o /tmp/foo.o && file $OBJ --- First asking to put a probe in a function not present in the kernel (misses the initial _): $ perf record --event /tmp/foo.o sleep 1 Probe point 'do_fork' not found. event syntax error: '/tmp/foo.o' \___ You need to check probing points in BPF file (add -v to see detail) Run 'perf list' for a list of valid events Usage: perf record [<options>] [<command>] or: perf record [<options>] -- <command> [<options>] -e, --event <event> event selector. use 'perf list' to list available events $ --- Now, with "__attribute__((section("fork=_do_fork"), used)): $ grep _do_fork /proc/kallsyms ffffffff81099ab0 T _do_fork $ perf record --event /tmp/foo.o sleep 1 Failed to open kprobe_events: Permission denied event syntax error: '/tmp/foo.o' \___ Permission denied --- Cool, we need to provide some better hints, "kprobe_events" is too low level, one doesn't strictly need to know the precise details of how these things are put in place, so something that shows the command needed to fix the permissions would be more helpful. Lets try as root instead: # perf record --event /tmp/foo.o sleep 1 Lowering default frequency rate to 1000. Please consider tweaking /proc/sys/kernel/perf_event_max_sample_rate. [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.013 MB perf.data ] # perf evlist /tmp/foo.o [root@felicio ~]# perf evlist -v /tmp/foo.o: type: 1, size: 112, config: 0x9, { sample_period, sample_freq }: 1000, sample_type: IP|TID|TIME|PERIOD, disabled: 1, inherit: 1, mmap: 1, comm: 1, freq: 1, enable_on_exec: 1, task: 1, sample_id_all: 1, exclude_guest: 1, mmap2: 1, comm_exec: 1 --- Signed-off-by: Wang Nan <wangnan0@huawei.com> Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Alexei Starovoitov <ast@plumgrid.com> Cc: Brendan Gregg <brendan.d.gregg@gmail.com> Cc: Daniel Borkmann <daniel@iogearbox.net> Cc: David Ahern <dsahern@gmail.com> Cc: He Kuang <hekuang@huawei.com> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Kaixu Xia <xiakaixu@huawei.com> Cc: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Zefan Li <lizefan@huawei.com> Cc: pi3orama@163.com Link: http://lkml.kernel.org/r/1444826502-49291-5-git-send-email-wangnan0@huawei.com Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2015-10-14 19:41:15 +07:00
free(priv);
}
static int
prog_config__exec(const char *value, struct perf_probe_event *pev)
{
pev->uprobes = true;
pev->target = strdup(value);
if (!pev->target)
return -ENOMEM;
return 0;
}
static int
prog_config__module(const char *value, struct perf_probe_event *pev)
{
pev->uprobes = false;
pev->target = strdup(value);
if (!pev->target)
return -ENOMEM;
return 0;
}
perf bpf: Allow BPF program config probing options By extending the syntax of BPF object section names, this patch allows users to config probing options like what they can do in 'perf probe'. The error message in 'perf probe' is also updated. Test result: For following BPF file test_probe_glob.c: # cat test_probe_glob.c __attribute__((section("inlines=no;func=SyS_dup?"), used)) int func(void *ctx) { return 1; } char _license[] __attribute__((section("license"), used)) = "GPL"; int _version __attribute__((section("version"), used)) = 0x40300; # # ./perf record -e ./test_probe_glob.c ls / ... [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.013 MB perf.data ] # ./perf evlist perf_bpf_probe:func_1 perf_bpf_probe:func After changing "inlines=no" to "inlines=yes": # ./perf record -e ./test_probe_glob.c ls / ... [ perf record: Woken up 2 times to write data ] [ perf record: Captured and wrote 0.013 MB perf.data ] # ./perf evlist perf_bpf_probe:func_3 perf_bpf_probe:func_2 perf_bpf_probe:func_1 perf_bpf_probe:func Then test 'force': Use following program: # cat test_probe_force.c __attribute__((section("func=sys_write"), used)) int funca(void *ctx) { return 1; } __attribute__((section("force=yes;func=sys_write"), used)) int funcb(void *ctx) { return 1; } char _license[] __attribute__((section("license"), used)) = "GPL"; int _version __attribute__((section("version"), used)) = 0x40300; # # perf record -e ./test_probe_force.c usleep 1 Error: event "func" already exists. Hint: Remove existing event by 'perf probe -d' or force duplicates by 'perf probe -f' or set 'force=yes' in BPF source. event syntax error: './test_probe_force.c' \___ Probe point exist. Try 'perf probe -d "*"' and set 'force=yes' (add -v to see detail) ... Then replace 'force=no' to 'force=yes': # vim test_probe_force.c # perf record -e ./test_probe_force.c usleep 1 [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.017 MB perf.data ] # perf evlist perf_bpf_probe:func_1 perf_bpf_probe:func # Signed-off-by: Wang Nan <wangnan0@huawei.com> Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Alexei Starovoitov <ast@kernel.org> Cc: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com> Cc: Zefan Li <lizefan@huawei.com> Cc: pi3orama@163.com Link: http://lkml.kernel.org/r/1447675815-166222-7-git-send-email-wangnan0@huawei.com Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2015-11-16 19:10:08 +07:00
static int
prog_config__bool(const char *value, bool *pbool, bool invert)
perf bpf: Allow BPF program config probing options By extending the syntax of BPF object section names, this patch allows users to config probing options like what they can do in 'perf probe'. The error message in 'perf probe' is also updated. Test result: For following BPF file test_probe_glob.c: # cat test_probe_glob.c __attribute__((section("inlines=no;func=SyS_dup?"), used)) int func(void *ctx) { return 1; } char _license[] __attribute__((section("license"), used)) = "GPL"; int _version __attribute__((section("version"), used)) = 0x40300; # # ./perf record -e ./test_probe_glob.c ls / ... [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.013 MB perf.data ] # ./perf evlist perf_bpf_probe:func_1 perf_bpf_probe:func After changing "inlines=no" to "inlines=yes": # ./perf record -e ./test_probe_glob.c ls / ... [ perf record: Woken up 2 times to write data ] [ perf record: Captured and wrote 0.013 MB perf.data ] # ./perf evlist perf_bpf_probe:func_3 perf_bpf_probe:func_2 perf_bpf_probe:func_1 perf_bpf_probe:func Then test 'force': Use following program: # cat test_probe_force.c __attribute__((section("func=sys_write"), used)) int funca(void *ctx) { return 1; } __attribute__((section("force=yes;func=sys_write"), used)) int funcb(void *ctx) { return 1; } char _license[] __attribute__((section("license"), used)) = "GPL"; int _version __attribute__((section("version"), used)) = 0x40300; # # perf record -e ./test_probe_force.c usleep 1 Error: event "func" already exists. Hint: Remove existing event by 'perf probe -d' or force duplicates by 'perf probe -f' or set 'force=yes' in BPF source. event syntax error: './test_probe_force.c' \___ Probe point exist. Try 'perf probe -d "*"' and set 'force=yes' (add -v to see detail) ... Then replace 'force=no' to 'force=yes': # vim test_probe_force.c # perf record -e ./test_probe_force.c usleep 1 [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.017 MB perf.data ] # perf evlist perf_bpf_probe:func_1 perf_bpf_probe:func # Signed-off-by: Wang Nan <wangnan0@huawei.com> Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Alexei Starovoitov <ast@kernel.org> Cc: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com> Cc: Zefan Li <lizefan@huawei.com> Cc: pi3orama@163.com Link: http://lkml.kernel.org/r/1447675815-166222-7-git-send-email-wangnan0@huawei.com Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2015-11-16 19:10:08 +07:00
{
int err;
bool bool_value;
if (!pbool)
return -EINVAL;
err = strtobool(value, &bool_value);
if (err)
return err;
*pbool = invert ? !bool_value : bool_value;
return 0;
}
static int
prog_config__inlines(const char *value,
struct perf_probe_event *pev __maybe_unused)
perf bpf: Allow BPF program config probing options By extending the syntax of BPF object section names, this patch allows users to config probing options like what they can do in 'perf probe'. The error message in 'perf probe' is also updated. Test result: For following BPF file test_probe_glob.c: # cat test_probe_glob.c __attribute__((section("inlines=no;func=SyS_dup?"), used)) int func(void *ctx) { return 1; } char _license[] __attribute__((section("license"), used)) = "GPL"; int _version __attribute__((section("version"), used)) = 0x40300; # # ./perf record -e ./test_probe_glob.c ls / ... [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.013 MB perf.data ] # ./perf evlist perf_bpf_probe:func_1 perf_bpf_probe:func After changing "inlines=no" to "inlines=yes": # ./perf record -e ./test_probe_glob.c ls / ... [ perf record: Woken up 2 times to write data ] [ perf record: Captured and wrote 0.013 MB perf.data ] # ./perf evlist perf_bpf_probe:func_3 perf_bpf_probe:func_2 perf_bpf_probe:func_1 perf_bpf_probe:func Then test 'force': Use following program: # cat test_probe_force.c __attribute__((section("func=sys_write"), used)) int funca(void *ctx) { return 1; } __attribute__((section("force=yes;func=sys_write"), used)) int funcb(void *ctx) { return 1; } char _license[] __attribute__((section("license"), used)) = "GPL"; int _version __attribute__((section("version"), used)) = 0x40300; # # perf record -e ./test_probe_force.c usleep 1 Error: event "func" already exists. Hint: Remove existing event by 'perf probe -d' or force duplicates by 'perf probe -f' or set 'force=yes' in BPF source. event syntax error: './test_probe_force.c' \___ Probe point exist. Try 'perf probe -d "*"' and set 'force=yes' (add -v to see detail) ... Then replace 'force=no' to 'force=yes': # vim test_probe_force.c # perf record -e ./test_probe_force.c usleep 1 [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.017 MB perf.data ] # perf evlist perf_bpf_probe:func_1 perf_bpf_probe:func # Signed-off-by: Wang Nan <wangnan0@huawei.com> Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Alexei Starovoitov <ast@kernel.org> Cc: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com> Cc: Zefan Li <lizefan@huawei.com> Cc: pi3orama@163.com Link: http://lkml.kernel.org/r/1447675815-166222-7-git-send-email-wangnan0@huawei.com Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2015-11-16 19:10:08 +07:00
{
return prog_config__bool(value, &probe_conf.no_inlines, true);
perf bpf: Allow BPF program config probing options By extending the syntax of BPF object section names, this patch allows users to config probing options like what they can do in 'perf probe'. The error message in 'perf probe' is also updated. Test result: For following BPF file test_probe_glob.c: # cat test_probe_glob.c __attribute__((section("inlines=no;func=SyS_dup?"), used)) int func(void *ctx) { return 1; } char _license[] __attribute__((section("license"), used)) = "GPL"; int _version __attribute__((section("version"), used)) = 0x40300; # # ./perf record -e ./test_probe_glob.c ls / ... [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.013 MB perf.data ] # ./perf evlist perf_bpf_probe:func_1 perf_bpf_probe:func After changing "inlines=no" to "inlines=yes": # ./perf record -e ./test_probe_glob.c ls / ... [ perf record: Woken up 2 times to write data ] [ perf record: Captured and wrote 0.013 MB perf.data ] # ./perf evlist perf_bpf_probe:func_3 perf_bpf_probe:func_2 perf_bpf_probe:func_1 perf_bpf_probe:func Then test 'force': Use following program: # cat test_probe_force.c __attribute__((section("func=sys_write"), used)) int funca(void *ctx) { return 1; } __attribute__((section("force=yes;func=sys_write"), used)) int funcb(void *ctx) { return 1; } char _license[] __attribute__((section("license"), used)) = "GPL"; int _version __attribute__((section("version"), used)) = 0x40300; # # perf record -e ./test_probe_force.c usleep 1 Error: event "func" already exists. Hint: Remove existing event by 'perf probe -d' or force duplicates by 'perf probe -f' or set 'force=yes' in BPF source. event syntax error: './test_probe_force.c' \___ Probe point exist. Try 'perf probe -d "*"' and set 'force=yes' (add -v to see detail) ... Then replace 'force=no' to 'force=yes': # vim test_probe_force.c # perf record -e ./test_probe_force.c usleep 1 [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.017 MB perf.data ] # perf evlist perf_bpf_probe:func_1 perf_bpf_probe:func # Signed-off-by: Wang Nan <wangnan0@huawei.com> Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Alexei Starovoitov <ast@kernel.org> Cc: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com> Cc: Zefan Li <lizefan@huawei.com> Cc: pi3orama@163.com Link: http://lkml.kernel.org/r/1447675815-166222-7-git-send-email-wangnan0@huawei.com Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2015-11-16 19:10:08 +07:00
}
static int
prog_config__force(const char *value,
struct perf_probe_event *pev __maybe_unused)
perf bpf: Allow BPF program config probing options By extending the syntax of BPF object section names, this patch allows users to config probing options like what they can do in 'perf probe'. The error message in 'perf probe' is also updated. Test result: For following BPF file test_probe_glob.c: # cat test_probe_glob.c __attribute__((section("inlines=no;func=SyS_dup?"), used)) int func(void *ctx) { return 1; } char _license[] __attribute__((section("license"), used)) = "GPL"; int _version __attribute__((section("version"), used)) = 0x40300; # # ./perf record -e ./test_probe_glob.c ls / ... [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.013 MB perf.data ] # ./perf evlist perf_bpf_probe:func_1 perf_bpf_probe:func After changing "inlines=no" to "inlines=yes": # ./perf record -e ./test_probe_glob.c ls / ... [ perf record: Woken up 2 times to write data ] [ perf record: Captured and wrote 0.013 MB perf.data ] # ./perf evlist perf_bpf_probe:func_3 perf_bpf_probe:func_2 perf_bpf_probe:func_1 perf_bpf_probe:func Then test 'force': Use following program: # cat test_probe_force.c __attribute__((section("func=sys_write"), used)) int funca(void *ctx) { return 1; } __attribute__((section("force=yes;func=sys_write"), used)) int funcb(void *ctx) { return 1; } char _license[] __attribute__((section("license"), used)) = "GPL"; int _version __attribute__((section("version"), used)) = 0x40300; # # perf record -e ./test_probe_force.c usleep 1 Error: event "func" already exists. Hint: Remove existing event by 'perf probe -d' or force duplicates by 'perf probe -f' or set 'force=yes' in BPF source. event syntax error: './test_probe_force.c' \___ Probe point exist. Try 'perf probe -d "*"' and set 'force=yes' (add -v to see detail) ... Then replace 'force=no' to 'force=yes': # vim test_probe_force.c # perf record -e ./test_probe_force.c usleep 1 [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.017 MB perf.data ] # perf evlist perf_bpf_probe:func_1 perf_bpf_probe:func # Signed-off-by: Wang Nan <wangnan0@huawei.com> Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Alexei Starovoitov <ast@kernel.org> Cc: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com> Cc: Zefan Li <lizefan@huawei.com> Cc: pi3orama@163.com Link: http://lkml.kernel.org/r/1447675815-166222-7-git-send-email-wangnan0@huawei.com Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2015-11-16 19:10:08 +07:00
{
return prog_config__bool(value, &probe_conf.force_add, false);
perf bpf: Allow BPF program config probing options By extending the syntax of BPF object section names, this patch allows users to config probing options like what they can do in 'perf probe'. The error message in 'perf probe' is also updated. Test result: For following BPF file test_probe_glob.c: # cat test_probe_glob.c __attribute__((section("inlines=no;func=SyS_dup?"), used)) int func(void *ctx) { return 1; } char _license[] __attribute__((section("license"), used)) = "GPL"; int _version __attribute__((section("version"), used)) = 0x40300; # # ./perf record -e ./test_probe_glob.c ls / ... [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.013 MB perf.data ] # ./perf evlist perf_bpf_probe:func_1 perf_bpf_probe:func After changing "inlines=no" to "inlines=yes": # ./perf record -e ./test_probe_glob.c ls / ... [ perf record: Woken up 2 times to write data ] [ perf record: Captured and wrote 0.013 MB perf.data ] # ./perf evlist perf_bpf_probe:func_3 perf_bpf_probe:func_2 perf_bpf_probe:func_1 perf_bpf_probe:func Then test 'force': Use following program: # cat test_probe_force.c __attribute__((section("func=sys_write"), used)) int funca(void *ctx) { return 1; } __attribute__((section("force=yes;func=sys_write"), used)) int funcb(void *ctx) { return 1; } char _license[] __attribute__((section("license"), used)) = "GPL"; int _version __attribute__((section("version"), used)) = 0x40300; # # perf record -e ./test_probe_force.c usleep 1 Error: event "func" already exists. Hint: Remove existing event by 'perf probe -d' or force duplicates by 'perf probe -f' or set 'force=yes' in BPF source. event syntax error: './test_probe_force.c' \___ Probe point exist. Try 'perf probe -d "*"' and set 'force=yes' (add -v to see detail) ... Then replace 'force=no' to 'force=yes': # vim test_probe_force.c # perf record -e ./test_probe_force.c usleep 1 [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.017 MB perf.data ] # perf evlist perf_bpf_probe:func_1 perf_bpf_probe:func # Signed-off-by: Wang Nan <wangnan0@huawei.com> Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Alexei Starovoitov <ast@kernel.org> Cc: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com> Cc: Zefan Li <lizefan@huawei.com> Cc: pi3orama@163.com Link: http://lkml.kernel.org/r/1447675815-166222-7-git-send-email-wangnan0@huawei.com Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2015-11-16 19:10:08 +07:00
}
static struct {
const char *key;
const char *usage;
const char *desc;
int (*func)(const char *, struct perf_probe_event *);
} bpf_prog_config_terms[] = {
{
.key = "exec",
.usage = "exec=<full path of file>",
.desc = "Set uprobe target",
.func = prog_config__exec,
},
{
.key = "module",
.usage = "module=<module name> ",
.desc = "Set kprobe module",
.func = prog_config__module,
perf bpf: Allow BPF program config probing options By extending the syntax of BPF object section names, this patch allows users to config probing options like what they can do in 'perf probe'. The error message in 'perf probe' is also updated. Test result: For following BPF file test_probe_glob.c: # cat test_probe_glob.c __attribute__((section("inlines=no;func=SyS_dup?"), used)) int func(void *ctx) { return 1; } char _license[] __attribute__((section("license"), used)) = "GPL"; int _version __attribute__((section("version"), used)) = 0x40300; # # ./perf record -e ./test_probe_glob.c ls / ... [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.013 MB perf.data ] # ./perf evlist perf_bpf_probe:func_1 perf_bpf_probe:func After changing "inlines=no" to "inlines=yes": # ./perf record -e ./test_probe_glob.c ls / ... [ perf record: Woken up 2 times to write data ] [ perf record: Captured and wrote 0.013 MB perf.data ] # ./perf evlist perf_bpf_probe:func_3 perf_bpf_probe:func_2 perf_bpf_probe:func_1 perf_bpf_probe:func Then test 'force': Use following program: # cat test_probe_force.c __attribute__((section("func=sys_write"), used)) int funca(void *ctx) { return 1; } __attribute__((section("force=yes;func=sys_write"), used)) int funcb(void *ctx) { return 1; } char _license[] __attribute__((section("license"), used)) = "GPL"; int _version __attribute__((section("version"), used)) = 0x40300; # # perf record -e ./test_probe_force.c usleep 1 Error: event "func" already exists. Hint: Remove existing event by 'perf probe -d' or force duplicates by 'perf probe -f' or set 'force=yes' in BPF source. event syntax error: './test_probe_force.c' \___ Probe point exist. Try 'perf probe -d "*"' and set 'force=yes' (add -v to see detail) ... Then replace 'force=no' to 'force=yes': # vim test_probe_force.c # perf record -e ./test_probe_force.c usleep 1 [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.017 MB perf.data ] # perf evlist perf_bpf_probe:func_1 perf_bpf_probe:func # Signed-off-by: Wang Nan <wangnan0@huawei.com> Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Alexei Starovoitov <ast@kernel.org> Cc: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com> Cc: Zefan Li <lizefan@huawei.com> Cc: pi3orama@163.com Link: http://lkml.kernel.org/r/1447675815-166222-7-git-send-email-wangnan0@huawei.com Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2015-11-16 19:10:08 +07:00
},
{
.key = "inlines",
.usage = "inlines=[yes|no] ",
.desc = "Probe at inline symbol",
.func = prog_config__inlines,
perf bpf: Allow BPF program config probing options By extending the syntax of BPF object section names, this patch allows users to config probing options like what they can do in 'perf probe'. The error message in 'perf probe' is also updated. Test result: For following BPF file test_probe_glob.c: # cat test_probe_glob.c __attribute__((section("inlines=no;func=SyS_dup?"), used)) int func(void *ctx) { return 1; } char _license[] __attribute__((section("license"), used)) = "GPL"; int _version __attribute__((section("version"), used)) = 0x40300; # # ./perf record -e ./test_probe_glob.c ls / ... [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.013 MB perf.data ] # ./perf evlist perf_bpf_probe:func_1 perf_bpf_probe:func After changing "inlines=no" to "inlines=yes": # ./perf record -e ./test_probe_glob.c ls / ... [ perf record: Woken up 2 times to write data ] [ perf record: Captured and wrote 0.013 MB perf.data ] # ./perf evlist perf_bpf_probe:func_3 perf_bpf_probe:func_2 perf_bpf_probe:func_1 perf_bpf_probe:func Then test 'force': Use following program: # cat test_probe_force.c __attribute__((section("func=sys_write"), used)) int funca(void *ctx) { return 1; } __attribute__((section("force=yes;func=sys_write"), used)) int funcb(void *ctx) { return 1; } char _license[] __attribute__((section("license"), used)) = "GPL"; int _version __attribute__((section("version"), used)) = 0x40300; # # perf record -e ./test_probe_force.c usleep 1 Error: event "func" already exists. Hint: Remove existing event by 'perf probe -d' or force duplicates by 'perf probe -f' or set 'force=yes' in BPF source. event syntax error: './test_probe_force.c' \___ Probe point exist. Try 'perf probe -d "*"' and set 'force=yes' (add -v to see detail) ... Then replace 'force=no' to 'force=yes': # vim test_probe_force.c # perf record -e ./test_probe_force.c usleep 1 [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.017 MB perf.data ] # perf evlist perf_bpf_probe:func_1 perf_bpf_probe:func # Signed-off-by: Wang Nan <wangnan0@huawei.com> Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Alexei Starovoitov <ast@kernel.org> Cc: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com> Cc: Zefan Li <lizefan@huawei.com> Cc: pi3orama@163.com Link: http://lkml.kernel.org/r/1447675815-166222-7-git-send-email-wangnan0@huawei.com Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2015-11-16 19:10:08 +07:00
},
{
.key = "force",
.usage = "force=[yes|no] ",
.desc = "Forcibly add events with existing name",
.func = prog_config__force,
perf bpf: Allow BPF program config probing options By extending the syntax of BPF object section names, this patch allows users to config probing options like what they can do in 'perf probe'. The error message in 'perf probe' is also updated. Test result: For following BPF file test_probe_glob.c: # cat test_probe_glob.c __attribute__((section("inlines=no;func=SyS_dup?"), used)) int func(void *ctx) { return 1; } char _license[] __attribute__((section("license"), used)) = "GPL"; int _version __attribute__((section("version"), used)) = 0x40300; # # ./perf record -e ./test_probe_glob.c ls / ... [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.013 MB perf.data ] # ./perf evlist perf_bpf_probe:func_1 perf_bpf_probe:func After changing "inlines=no" to "inlines=yes": # ./perf record -e ./test_probe_glob.c ls / ... [ perf record: Woken up 2 times to write data ] [ perf record: Captured and wrote 0.013 MB perf.data ] # ./perf evlist perf_bpf_probe:func_3 perf_bpf_probe:func_2 perf_bpf_probe:func_1 perf_bpf_probe:func Then test 'force': Use following program: # cat test_probe_force.c __attribute__((section("func=sys_write"), used)) int funca(void *ctx) { return 1; } __attribute__((section("force=yes;func=sys_write"), used)) int funcb(void *ctx) { return 1; } char _license[] __attribute__((section("license"), used)) = "GPL"; int _version __attribute__((section("version"), used)) = 0x40300; # # perf record -e ./test_probe_force.c usleep 1 Error: event "func" already exists. Hint: Remove existing event by 'perf probe -d' or force duplicates by 'perf probe -f' or set 'force=yes' in BPF source. event syntax error: './test_probe_force.c' \___ Probe point exist. Try 'perf probe -d "*"' and set 'force=yes' (add -v to see detail) ... Then replace 'force=no' to 'force=yes': # vim test_probe_force.c # perf record -e ./test_probe_force.c usleep 1 [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.017 MB perf.data ] # perf evlist perf_bpf_probe:func_1 perf_bpf_probe:func # Signed-off-by: Wang Nan <wangnan0@huawei.com> Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Alexei Starovoitov <ast@kernel.org> Cc: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com> Cc: Zefan Li <lizefan@huawei.com> Cc: pi3orama@163.com Link: http://lkml.kernel.org/r/1447675815-166222-7-git-send-email-wangnan0@huawei.com Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2015-11-16 19:10:08 +07:00
},
};
static int
do_prog_config(const char *key, const char *value,
struct perf_probe_event *pev)
{
unsigned int i;
pr_debug("config bpf program: %s=%s\n", key, value);
for (i = 0; i < ARRAY_SIZE(bpf_prog_config_terms); i++)
if (strcmp(key, bpf_prog_config_terms[i].key) == 0)
return bpf_prog_config_terms[i].func(value, pev);
pr_debug("BPF: ERROR: invalid program config option: %s=%s\n",
key, value);
pr_debug("\nHint: Valid options are:\n");
for (i = 0; i < ARRAY_SIZE(bpf_prog_config_terms); i++)
pr_debug("\t%s:\t%s\n", bpf_prog_config_terms[i].usage,
bpf_prog_config_terms[i].desc);
pr_debug("\n");
return -BPF_LOADER_ERRNO__PROGCONF_TERM;
}
static const char *
parse_prog_config_kvpair(const char *config_str, struct perf_probe_event *pev)
{
char *text = strdup(config_str);
char *sep, *line;
const char *main_str = NULL;
int err = 0;
if (!text) {
pr_debug("No enough memory: dup config_str failed\n");
return ERR_PTR(-ENOMEM);
}
line = text;
while ((sep = strchr(line, ';'))) {
char *equ;
*sep = '\0';
equ = strchr(line, '=');
if (!equ) {
pr_warning("WARNING: invalid config in BPF object: %s\n",
line);
pr_warning("\tShould be 'key=value'.\n");
goto nextline;
}
*equ = '\0';
err = do_prog_config(line, equ + 1, pev);
if (err)
break;
nextline:
line = sep + 1;
}
if (!err)
main_str = config_str + (line - text);
free(text);
return err ? ERR_PTR(err) : main_str;
}
static int
parse_prog_config(const char *config_str, struct perf_probe_event *pev)
{
int err;
const char *main_str = parse_prog_config_kvpair(config_str, pev);
if (IS_ERR(main_str))
return PTR_ERR(main_str);
err = parse_perf_probe_command(main_str, pev);
if (err < 0) {
pr_debug("bpf: '%s' is not a valid config string\n",
config_str);
/* parse failed, don't need clear pev. */
return -BPF_LOADER_ERRNO__CONFIG;
}
return 0;
}
perf tools: Create probe points for BPF programs This patch introduces bpf__{un,}probe() functions to enable callers to create kprobe points based on section names a BPF program. It parses the section names in the program and creates corresponding 'struct perf_probe_event' structures. The parse_perf_probe_command() function is used to do the main parsing work. The resuling 'struct perf_probe_event' is stored into program private data for further using. By utilizing the new probing API, this patch creates probe points during event parsing. To ensure probe points be removed correctly, register an atexit hook so even perf quit through exit() bpf__clear() is still called, so probing points are cleared. Note that bpf_clear() should be registered before bpf__probe() is called, so failure of bpf__probe() can still trigger bpf__clear() to remove probe points which are already probed. strerror style error reporting scaffold is created by this patch. bpf__strerror_probe() is the first error reporting function in bpf-loader.c. Committer note: Trying it: To build a test eBPF object file: I am testing using a script I built from the 'perf test -v LLVM' output: $ cat ~/bin/hello-ebpf export KERNEL_INC_OPTIONS="-nostdinc -isystem /usr/lib/gcc/x86_64-redhat-linux/4.8.3/include -I/home/acme/git/linux/arch/x86/include -Iarch/x86/include/generated/uapi -Iarch/x86/include/generated -I/home/acme/git/linux/include -Iinclude -I/home/acme/git/linux/arch/x86/include/uapi -Iarch/x86/include/generated/uapi -I/home/acme/git/linux/include/uapi -Iinclude/generated/uapi -include /home/acme/git/linux/include/linux/kconfig.h" export WORKING_DIR=/lib/modules/4.2.0/build export CLANG_SOURCE=- export CLANG_OPTIONS=-xc OBJ=/tmp/foo.o rm -f $OBJ echo '__attribute__((section("fork=do_fork"), used)) int fork(void *ctx) {return 0;} char _license[] __attribute__((section("license"), used)) = "GPL";int _version __attribute__((section("version"), used)) = 0x40100;' | \ clang -D__KERNEL__ $CLANG_OPTIONS $KERNEL_INC_OPTIONS -Wno-unused-value -Wno-pointer-sign -working-directory $WORKING_DIR -c "$CLANG_SOURCE" -target bpf -O2 -o /tmp/foo.o && file $OBJ --- First asking to put a probe in a function not present in the kernel (misses the initial _): $ perf record --event /tmp/foo.o sleep 1 Probe point 'do_fork' not found. event syntax error: '/tmp/foo.o' \___ You need to check probing points in BPF file (add -v to see detail) Run 'perf list' for a list of valid events Usage: perf record [<options>] [<command>] or: perf record [<options>] -- <command> [<options>] -e, --event <event> event selector. use 'perf list' to list available events $ --- Now, with "__attribute__((section("fork=_do_fork"), used)): $ grep _do_fork /proc/kallsyms ffffffff81099ab0 T _do_fork $ perf record --event /tmp/foo.o sleep 1 Failed to open kprobe_events: Permission denied event syntax error: '/tmp/foo.o' \___ Permission denied --- Cool, we need to provide some better hints, "kprobe_events" is too low level, one doesn't strictly need to know the precise details of how these things are put in place, so something that shows the command needed to fix the permissions would be more helpful. Lets try as root instead: # perf record --event /tmp/foo.o sleep 1 Lowering default frequency rate to 1000. Please consider tweaking /proc/sys/kernel/perf_event_max_sample_rate. [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.013 MB perf.data ] # perf evlist /tmp/foo.o [root@felicio ~]# perf evlist -v /tmp/foo.o: type: 1, size: 112, config: 0x9, { sample_period, sample_freq }: 1000, sample_type: IP|TID|TIME|PERIOD, disabled: 1, inherit: 1, mmap: 1, comm: 1, freq: 1, enable_on_exec: 1, task: 1, sample_id_all: 1, exclude_guest: 1, mmap2: 1, comm_exec: 1 --- Signed-off-by: Wang Nan <wangnan0@huawei.com> Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Alexei Starovoitov <ast@plumgrid.com> Cc: Brendan Gregg <brendan.d.gregg@gmail.com> Cc: Daniel Borkmann <daniel@iogearbox.net> Cc: David Ahern <dsahern@gmail.com> Cc: He Kuang <hekuang@huawei.com> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Kaixu Xia <xiakaixu@huawei.com> Cc: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Zefan Li <lizefan@huawei.com> Cc: pi3orama@163.com Link: http://lkml.kernel.org/r/1444826502-49291-5-git-send-email-wangnan0@huawei.com Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2015-10-14 19:41:15 +07:00
static int
config_bpf_program(struct bpf_program *prog)
{
struct perf_probe_event *pev = NULL;
struct bpf_prog_priv *priv = NULL;
const char *config_str;
int err;
perf bpf: Allow BPF program config probing options By extending the syntax of BPF object section names, this patch allows users to config probing options like what they can do in 'perf probe'. The error message in 'perf probe' is also updated. Test result: For following BPF file test_probe_glob.c: # cat test_probe_glob.c __attribute__((section("inlines=no;func=SyS_dup?"), used)) int func(void *ctx) { return 1; } char _license[] __attribute__((section("license"), used)) = "GPL"; int _version __attribute__((section("version"), used)) = 0x40300; # # ./perf record -e ./test_probe_glob.c ls / ... [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.013 MB perf.data ] # ./perf evlist perf_bpf_probe:func_1 perf_bpf_probe:func After changing "inlines=no" to "inlines=yes": # ./perf record -e ./test_probe_glob.c ls / ... [ perf record: Woken up 2 times to write data ] [ perf record: Captured and wrote 0.013 MB perf.data ] # ./perf evlist perf_bpf_probe:func_3 perf_bpf_probe:func_2 perf_bpf_probe:func_1 perf_bpf_probe:func Then test 'force': Use following program: # cat test_probe_force.c __attribute__((section("func=sys_write"), used)) int funca(void *ctx) { return 1; } __attribute__((section("force=yes;func=sys_write"), used)) int funcb(void *ctx) { return 1; } char _license[] __attribute__((section("license"), used)) = "GPL"; int _version __attribute__((section("version"), used)) = 0x40300; # # perf record -e ./test_probe_force.c usleep 1 Error: event "func" already exists. Hint: Remove existing event by 'perf probe -d' or force duplicates by 'perf probe -f' or set 'force=yes' in BPF source. event syntax error: './test_probe_force.c' \___ Probe point exist. Try 'perf probe -d "*"' and set 'force=yes' (add -v to see detail) ... Then replace 'force=no' to 'force=yes': # vim test_probe_force.c # perf record -e ./test_probe_force.c usleep 1 [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.017 MB perf.data ] # perf evlist perf_bpf_probe:func_1 perf_bpf_probe:func # Signed-off-by: Wang Nan <wangnan0@huawei.com> Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Alexei Starovoitov <ast@kernel.org> Cc: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com> Cc: Zefan Li <lizefan@huawei.com> Cc: pi3orama@163.com Link: http://lkml.kernel.org/r/1447675815-166222-7-git-send-email-wangnan0@huawei.com Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2015-11-16 19:10:08 +07:00
/* Initialize per-program probing setting */
probe_conf.no_inlines = false;
probe_conf.force_add = false;
perf tools: Create probe points for BPF programs This patch introduces bpf__{un,}probe() functions to enable callers to create kprobe points based on section names a BPF program. It parses the section names in the program and creates corresponding 'struct perf_probe_event' structures. The parse_perf_probe_command() function is used to do the main parsing work. The resuling 'struct perf_probe_event' is stored into program private data for further using. By utilizing the new probing API, this patch creates probe points during event parsing. To ensure probe points be removed correctly, register an atexit hook so even perf quit through exit() bpf__clear() is still called, so probing points are cleared. Note that bpf_clear() should be registered before bpf__probe() is called, so failure of bpf__probe() can still trigger bpf__clear() to remove probe points which are already probed. strerror style error reporting scaffold is created by this patch. bpf__strerror_probe() is the first error reporting function in bpf-loader.c. Committer note: Trying it: To build a test eBPF object file: I am testing using a script I built from the 'perf test -v LLVM' output: $ cat ~/bin/hello-ebpf export KERNEL_INC_OPTIONS="-nostdinc -isystem /usr/lib/gcc/x86_64-redhat-linux/4.8.3/include -I/home/acme/git/linux/arch/x86/include -Iarch/x86/include/generated/uapi -Iarch/x86/include/generated -I/home/acme/git/linux/include -Iinclude -I/home/acme/git/linux/arch/x86/include/uapi -Iarch/x86/include/generated/uapi -I/home/acme/git/linux/include/uapi -Iinclude/generated/uapi -include /home/acme/git/linux/include/linux/kconfig.h" export WORKING_DIR=/lib/modules/4.2.0/build export CLANG_SOURCE=- export CLANG_OPTIONS=-xc OBJ=/tmp/foo.o rm -f $OBJ echo '__attribute__((section("fork=do_fork"), used)) int fork(void *ctx) {return 0;} char _license[] __attribute__((section("license"), used)) = "GPL";int _version __attribute__((section("version"), used)) = 0x40100;' | \ clang -D__KERNEL__ $CLANG_OPTIONS $KERNEL_INC_OPTIONS -Wno-unused-value -Wno-pointer-sign -working-directory $WORKING_DIR -c "$CLANG_SOURCE" -target bpf -O2 -o /tmp/foo.o && file $OBJ --- First asking to put a probe in a function not present in the kernel (misses the initial _): $ perf record --event /tmp/foo.o sleep 1 Probe point 'do_fork' not found. event syntax error: '/tmp/foo.o' \___ You need to check probing points in BPF file (add -v to see detail) Run 'perf list' for a list of valid events Usage: perf record [<options>] [<command>] or: perf record [<options>] -- <command> [<options>] -e, --event <event> event selector. use 'perf list' to list available events $ --- Now, with "__attribute__((section("fork=_do_fork"), used)): $ grep _do_fork /proc/kallsyms ffffffff81099ab0 T _do_fork $ perf record --event /tmp/foo.o sleep 1 Failed to open kprobe_events: Permission denied event syntax error: '/tmp/foo.o' \___ Permission denied --- Cool, we need to provide some better hints, "kprobe_events" is too low level, one doesn't strictly need to know the precise details of how these things are put in place, so something that shows the command needed to fix the permissions would be more helpful. Lets try as root instead: # perf record --event /tmp/foo.o sleep 1 Lowering default frequency rate to 1000. Please consider tweaking /proc/sys/kernel/perf_event_max_sample_rate. [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.013 MB perf.data ] # perf evlist /tmp/foo.o [root@felicio ~]# perf evlist -v /tmp/foo.o: type: 1, size: 112, config: 0x9, { sample_period, sample_freq }: 1000, sample_type: IP|TID|TIME|PERIOD, disabled: 1, inherit: 1, mmap: 1, comm: 1, freq: 1, enable_on_exec: 1, task: 1, sample_id_all: 1, exclude_guest: 1, mmap2: 1, comm_exec: 1 --- Signed-off-by: Wang Nan <wangnan0@huawei.com> Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Alexei Starovoitov <ast@plumgrid.com> Cc: Brendan Gregg <brendan.d.gregg@gmail.com> Cc: Daniel Borkmann <daniel@iogearbox.net> Cc: David Ahern <dsahern@gmail.com> Cc: He Kuang <hekuang@huawei.com> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Kaixu Xia <xiakaixu@huawei.com> Cc: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Zefan Li <lizefan@huawei.com> Cc: pi3orama@163.com Link: http://lkml.kernel.org/r/1444826502-49291-5-git-send-email-wangnan0@huawei.com Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2015-10-14 19:41:15 +07:00
config_str = bpf_program__title(prog, false);
bpf tools: Improve libbpf error reporting In this patch, a series of libbpf specific error numbers and libbpf_strerror() are introduced to help reporting errors. Functions are updated to pass correct the error number through the CHECK_ERR() macro. All users of bpf_object__open{_buffer}() and bpf_program__title() in perf are modified accordingly. In addition, due to the error codes changing, bpf__strerror_load() is also modified to use them. bpf__strerror_head() is also changed accordingly so it can parse libbpf errors. bpf_loader_strerror() is introduced for that purpose, and will be improved by the following patch. load_program() is improved not to dump log buffer if it is empty. log buffer is also used to deduce whether the error was caused by an invalid program or other problem. v1 -> v2: - Using macro for error code. - Fetch error message based on array index, eliminate for-loop. - Use log buffer to detect the reason of failure. 3 new error code are introduced to replace LIBBPF_ERRNO__LOAD. In v1: # perf record -e ./test_ill_program.o ls event syntax error: './test_ill_program.o' \___ Failed to load program: Validate your program and check 'license'/'version' sections in your object SKIP # perf record -e ./test_kversion_nomatch_program.o ls event syntax error: './test_kversion_nomatch_program.o' \___ Failed to load program: Validate your program and check 'license'/'version' sections in your object SKIP # perf record -e ./test_big_program.o ls event syntax error: './test_big_program.o' \___ Failed to load program: Validate your program and check 'license'/'version' sections in your object SKIP In v2: # perf record -e ./test_ill_program.o ls event syntax error: './test_ill_program.o' \___ Kernel verifier blocks program loading SKIP # perf record -e ./test_kversion_nomatch_program.o event syntax error: './test_kversion_nomatch_program.o' \___ Incorrect kernel version SKIP (Will be further improved by following patches) # perf record -e ./test_big_program.o event syntax error: './test_big_program.o' \___ Program too big SKIP Signed-off-by: Wang Nan <wangnan0@huawei.com> Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Zefan Li <lizefan@huawei.com> Cc: pi3orama@163.com Link: http://lkml.kernel.org/r/1446817783-86722-2-git-send-email-wangnan0@huawei.com Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2015-11-06 20:49:37 +07:00
if (IS_ERR(config_str)) {
perf tools: Create probe points for BPF programs This patch introduces bpf__{un,}probe() functions to enable callers to create kprobe points based on section names a BPF program. It parses the section names in the program and creates corresponding 'struct perf_probe_event' structures. The parse_perf_probe_command() function is used to do the main parsing work. The resuling 'struct perf_probe_event' is stored into program private data for further using. By utilizing the new probing API, this patch creates probe points during event parsing. To ensure probe points be removed correctly, register an atexit hook so even perf quit through exit() bpf__clear() is still called, so probing points are cleared. Note that bpf_clear() should be registered before bpf__probe() is called, so failure of bpf__probe() can still trigger bpf__clear() to remove probe points which are already probed. strerror style error reporting scaffold is created by this patch. bpf__strerror_probe() is the first error reporting function in bpf-loader.c. Committer note: Trying it: To build a test eBPF object file: I am testing using a script I built from the 'perf test -v LLVM' output: $ cat ~/bin/hello-ebpf export KERNEL_INC_OPTIONS="-nostdinc -isystem /usr/lib/gcc/x86_64-redhat-linux/4.8.3/include -I/home/acme/git/linux/arch/x86/include -Iarch/x86/include/generated/uapi -Iarch/x86/include/generated -I/home/acme/git/linux/include -Iinclude -I/home/acme/git/linux/arch/x86/include/uapi -Iarch/x86/include/generated/uapi -I/home/acme/git/linux/include/uapi -Iinclude/generated/uapi -include /home/acme/git/linux/include/linux/kconfig.h" export WORKING_DIR=/lib/modules/4.2.0/build export CLANG_SOURCE=- export CLANG_OPTIONS=-xc OBJ=/tmp/foo.o rm -f $OBJ echo '__attribute__((section("fork=do_fork"), used)) int fork(void *ctx) {return 0;} char _license[] __attribute__((section("license"), used)) = "GPL";int _version __attribute__((section("version"), used)) = 0x40100;' | \ clang -D__KERNEL__ $CLANG_OPTIONS $KERNEL_INC_OPTIONS -Wno-unused-value -Wno-pointer-sign -working-directory $WORKING_DIR -c "$CLANG_SOURCE" -target bpf -O2 -o /tmp/foo.o && file $OBJ --- First asking to put a probe in a function not present in the kernel (misses the initial _): $ perf record --event /tmp/foo.o sleep 1 Probe point 'do_fork' not found. event syntax error: '/tmp/foo.o' \___ You need to check probing points in BPF file (add -v to see detail) Run 'perf list' for a list of valid events Usage: perf record [<options>] [<command>] or: perf record [<options>] -- <command> [<options>] -e, --event <event> event selector. use 'perf list' to list available events $ --- Now, with "__attribute__((section("fork=_do_fork"), used)): $ grep _do_fork /proc/kallsyms ffffffff81099ab0 T _do_fork $ perf record --event /tmp/foo.o sleep 1 Failed to open kprobe_events: Permission denied event syntax error: '/tmp/foo.o' \___ Permission denied --- Cool, we need to provide some better hints, "kprobe_events" is too low level, one doesn't strictly need to know the precise details of how these things are put in place, so something that shows the command needed to fix the permissions would be more helpful. Lets try as root instead: # perf record --event /tmp/foo.o sleep 1 Lowering default frequency rate to 1000. Please consider tweaking /proc/sys/kernel/perf_event_max_sample_rate. [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.013 MB perf.data ] # perf evlist /tmp/foo.o [root@felicio ~]# perf evlist -v /tmp/foo.o: type: 1, size: 112, config: 0x9, { sample_period, sample_freq }: 1000, sample_type: IP|TID|TIME|PERIOD, disabled: 1, inherit: 1, mmap: 1, comm: 1, freq: 1, enable_on_exec: 1, task: 1, sample_id_all: 1, exclude_guest: 1, mmap2: 1, comm_exec: 1 --- Signed-off-by: Wang Nan <wangnan0@huawei.com> Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Alexei Starovoitov <ast@plumgrid.com> Cc: Brendan Gregg <brendan.d.gregg@gmail.com> Cc: Daniel Borkmann <daniel@iogearbox.net> Cc: David Ahern <dsahern@gmail.com> Cc: He Kuang <hekuang@huawei.com> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Kaixu Xia <xiakaixu@huawei.com> Cc: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Zefan Li <lizefan@huawei.com> Cc: pi3orama@163.com Link: http://lkml.kernel.org/r/1444826502-49291-5-git-send-email-wangnan0@huawei.com Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2015-10-14 19:41:15 +07:00
pr_debug("bpf: unable to get title for program\n");
bpf tools: Improve libbpf error reporting In this patch, a series of libbpf specific error numbers and libbpf_strerror() are introduced to help reporting errors. Functions are updated to pass correct the error number through the CHECK_ERR() macro. All users of bpf_object__open{_buffer}() and bpf_program__title() in perf are modified accordingly. In addition, due to the error codes changing, bpf__strerror_load() is also modified to use them. bpf__strerror_head() is also changed accordingly so it can parse libbpf errors. bpf_loader_strerror() is introduced for that purpose, and will be improved by the following patch. load_program() is improved not to dump log buffer if it is empty. log buffer is also used to deduce whether the error was caused by an invalid program or other problem. v1 -> v2: - Using macro for error code. - Fetch error message based on array index, eliminate for-loop. - Use log buffer to detect the reason of failure. 3 new error code are introduced to replace LIBBPF_ERRNO__LOAD. In v1: # perf record -e ./test_ill_program.o ls event syntax error: './test_ill_program.o' \___ Failed to load program: Validate your program and check 'license'/'version' sections in your object SKIP # perf record -e ./test_kversion_nomatch_program.o ls event syntax error: './test_kversion_nomatch_program.o' \___ Failed to load program: Validate your program and check 'license'/'version' sections in your object SKIP # perf record -e ./test_big_program.o ls event syntax error: './test_big_program.o' \___ Failed to load program: Validate your program and check 'license'/'version' sections in your object SKIP In v2: # perf record -e ./test_ill_program.o ls event syntax error: './test_ill_program.o' \___ Kernel verifier blocks program loading SKIP # perf record -e ./test_kversion_nomatch_program.o event syntax error: './test_kversion_nomatch_program.o' \___ Incorrect kernel version SKIP (Will be further improved by following patches) # perf record -e ./test_big_program.o event syntax error: './test_big_program.o' \___ Program too big SKIP Signed-off-by: Wang Nan <wangnan0@huawei.com> Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Zefan Li <lizefan@huawei.com> Cc: pi3orama@163.com Link: http://lkml.kernel.org/r/1446817783-86722-2-git-send-email-wangnan0@huawei.com Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2015-11-06 20:49:37 +07:00
return PTR_ERR(config_str);
perf tools: Create probe points for BPF programs This patch introduces bpf__{un,}probe() functions to enable callers to create kprobe points based on section names a BPF program. It parses the section names in the program and creates corresponding 'struct perf_probe_event' structures. The parse_perf_probe_command() function is used to do the main parsing work. The resuling 'struct perf_probe_event' is stored into program private data for further using. By utilizing the new probing API, this patch creates probe points during event parsing. To ensure probe points be removed correctly, register an atexit hook so even perf quit through exit() bpf__clear() is still called, so probing points are cleared. Note that bpf_clear() should be registered before bpf__probe() is called, so failure of bpf__probe() can still trigger bpf__clear() to remove probe points which are already probed. strerror style error reporting scaffold is created by this patch. bpf__strerror_probe() is the first error reporting function in bpf-loader.c. Committer note: Trying it: To build a test eBPF object file: I am testing using a script I built from the 'perf test -v LLVM' output: $ cat ~/bin/hello-ebpf export KERNEL_INC_OPTIONS="-nostdinc -isystem /usr/lib/gcc/x86_64-redhat-linux/4.8.3/include -I/home/acme/git/linux/arch/x86/include -Iarch/x86/include/generated/uapi -Iarch/x86/include/generated -I/home/acme/git/linux/include -Iinclude -I/home/acme/git/linux/arch/x86/include/uapi -Iarch/x86/include/generated/uapi -I/home/acme/git/linux/include/uapi -Iinclude/generated/uapi -include /home/acme/git/linux/include/linux/kconfig.h" export WORKING_DIR=/lib/modules/4.2.0/build export CLANG_SOURCE=- export CLANG_OPTIONS=-xc OBJ=/tmp/foo.o rm -f $OBJ echo '__attribute__((section("fork=do_fork"), used)) int fork(void *ctx) {return 0;} char _license[] __attribute__((section("license"), used)) = "GPL";int _version __attribute__((section("version"), used)) = 0x40100;' | \ clang -D__KERNEL__ $CLANG_OPTIONS $KERNEL_INC_OPTIONS -Wno-unused-value -Wno-pointer-sign -working-directory $WORKING_DIR -c "$CLANG_SOURCE" -target bpf -O2 -o /tmp/foo.o && file $OBJ --- First asking to put a probe in a function not present in the kernel (misses the initial _): $ perf record --event /tmp/foo.o sleep 1 Probe point 'do_fork' not found. event syntax error: '/tmp/foo.o' \___ You need to check probing points in BPF file (add -v to see detail) Run 'perf list' for a list of valid events Usage: perf record [<options>] [<command>] or: perf record [<options>] -- <command> [<options>] -e, --event <event> event selector. use 'perf list' to list available events $ --- Now, with "__attribute__((section("fork=_do_fork"), used)): $ grep _do_fork /proc/kallsyms ffffffff81099ab0 T _do_fork $ perf record --event /tmp/foo.o sleep 1 Failed to open kprobe_events: Permission denied event syntax error: '/tmp/foo.o' \___ Permission denied --- Cool, we need to provide some better hints, "kprobe_events" is too low level, one doesn't strictly need to know the precise details of how these things are put in place, so something that shows the command needed to fix the permissions would be more helpful. Lets try as root instead: # perf record --event /tmp/foo.o sleep 1 Lowering default frequency rate to 1000. Please consider tweaking /proc/sys/kernel/perf_event_max_sample_rate. [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.013 MB perf.data ] # perf evlist /tmp/foo.o [root@felicio ~]# perf evlist -v /tmp/foo.o: type: 1, size: 112, config: 0x9, { sample_period, sample_freq }: 1000, sample_type: IP|TID|TIME|PERIOD, disabled: 1, inherit: 1, mmap: 1, comm: 1, freq: 1, enable_on_exec: 1, task: 1, sample_id_all: 1, exclude_guest: 1, mmap2: 1, comm_exec: 1 --- Signed-off-by: Wang Nan <wangnan0@huawei.com> Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Alexei Starovoitov <ast@plumgrid.com> Cc: Brendan Gregg <brendan.d.gregg@gmail.com> Cc: Daniel Borkmann <daniel@iogearbox.net> Cc: David Ahern <dsahern@gmail.com> Cc: He Kuang <hekuang@huawei.com> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Kaixu Xia <xiakaixu@huawei.com> Cc: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Zefan Li <lizefan@huawei.com> Cc: pi3orama@163.com Link: http://lkml.kernel.org/r/1444826502-49291-5-git-send-email-wangnan0@huawei.com Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2015-10-14 19:41:15 +07:00
}
priv = calloc(sizeof(*priv), 1);
if (!priv) {
pr_debug("bpf: failed to alloc priv\n");
return -ENOMEM;
}
pev = &priv->pev;
pr_debug("bpf: config program '%s'\n", config_str);
err = parse_prog_config(config_str, pev);
if (err)
perf tools: Create probe points for BPF programs This patch introduces bpf__{un,}probe() functions to enable callers to create kprobe points based on section names a BPF program. It parses the section names in the program and creates corresponding 'struct perf_probe_event' structures. The parse_perf_probe_command() function is used to do the main parsing work. The resuling 'struct perf_probe_event' is stored into program private data for further using. By utilizing the new probing API, this patch creates probe points during event parsing. To ensure probe points be removed correctly, register an atexit hook so even perf quit through exit() bpf__clear() is still called, so probing points are cleared. Note that bpf_clear() should be registered before bpf__probe() is called, so failure of bpf__probe() can still trigger bpf__clear() to remove probe points which are already probed. strerror style error reporting scaffold is created by this patch. bpf__strerror_probe() is the first error reporting function in bpf-loader.c. Committer note: Trying it: To build a test eBPF object file: I am testing using a script I built from the 'perf test -v LLVM' output: $ cat ~/bin/hello-ebpf export KERNEL_INC_OPTIONS="-nostdinc -isystem /usr/lib/gcc/x86_64-redhat-linux/4.8.3/include -I/home/acme/git/linux/arch/x86/include -Iarch/x86/include/generated/uapi -Iarch/x86/include/generated -I/home/acme/git/linux/include -Iinclude -I/home/acme/git/linux/arch/x86/include/uapi -Iarch/x86/include/generated/uapi -I/home/acme/git/linux/include/uapi -Iinclude/generated/uapi -include /home/acme/git/linux/include/linux/kconfig.h" export WORKING_DIR=/lib/modules/4.2.0/build export CLANG_SOURCE=- export CLANG_OPTIONS=-xc OBJ=/tmp/foo.o rm -f $OBJ echo '__attribute__((section("fork=do_fork"), used)) int fork(void *ctx) {return 0;} char _license[] __attribute__((section("license"), used)) = "GPL";int _version __attribute__((section("version"), used)) = 0x40100;' | \ clang -D__KERNEL__ $CLANG_OPTIONS $KERNEL_INC_OPTIONS -Wno-unused-value -Wno-pointer-sign -working-directory $WORKING_DIR -c "$CLANG_SOURCE" -target bpf -O2 -o /tmp/foo.o && file $OBJ --- First asking to put a probe in a function not present in the kernel (misses the initial _): $ perf record --event /tmp/foo.o sleep 1 Probe point 'do_fork' not found. event syntax error: '/tmp/foo.o' \___ You need to check probing points in BPF file (add -v to see detail) Run 'perf list' for a list of valid events Usage: perf record [<options>] [<command>] or: perf record [<options>] -- <command> [<options>] -e, --event <event> event selector. use 'perf list' to list available events $ --- Now, with "__attribute__((section("fork=_do_fork"), used)): $ grep _do_fork /proc/kallsyms ffffffff81099ab0 T _do_fork $ perf record --event /tmp/foo.o sleep 1 Failed to open kprobe_events: Permission denied event syntax error: '/tmp/foo.o' \___ Permission denied --- Cool, we need to provide some better hints, "kprobe_events" is too low level, one doesn't strictly need to know the precise details of how these things are put in place, so something that shows the command needed to fix the permissions would be more helpful. Lets try as root instead: # perf record --event /tmp/foo.o sleep 1 Lowering default frequency rate to 1000. Please consider tweaking /proc/sys/kernel/perf_event_max_sample_rate. [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.013 MB perf.data ] # perf evlist /tmp/foo.o [root@felicio ~]# perf evlist -v /tmp/foo.o: type: 1, size: 112, config: 0x9, { sample_period, sample_freq }: 1000, sample_type: IP|TID|TIME|PERIOD, disabled: 1, inherit: 1, mmap: 1, comm: 1, freq: 1, enable_on_exec: 1, task: 1, sample_id_all: 1, exclude_guest: 1, mmap2: 1, comm_exec: 1 --- Signed-off-by: Wang Nan <wangnan0@huawei.com> Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Alexei Starovoitov <ast@plumgrid.com> Cc: Brendan Gregg <brendan.d.gregg@gmail.com> Cc: Daniel Borkmann <daniel@iogearbox.net> Cc: David Ahern <dsahern@gmail.com> Cc: He Kuang <hekuang@huawei.com> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Kaixu Xia <xiakaixu@huawei.com> Cc: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Zefan Li <lizefan@huawei.com> Cc: pi3orama@163.com Link: http://lkml.kernel.org/r/1444826502-49291-5-git-send-email-wangnan0@huawei.com Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2015-10-14 19:41:15 +07:00
goto errout;
if (pev->group && strcmp(pev->group, PERF_BPF_PROBE_GROUP)) {
pr_debug("bpf: '%s': group for event is set and not '%s'.\n",
config_str, PERF_BPF_PROBE_GROUP);
perf bpf: Improve BPF related error messages A series of bpf loader related error codes were introduced to help error reporting. Functions were improved to return these new error codes. Functions which return pointers were adjusted to encode error codes into return value using the ERR_PTR() interface. bpf_loader_strerror() was improved to convert these error messages to strings. It checks the error codes and calls libbpf_strerror() and strerror_r() accordingly, so caller don't need to consider checking the range of the error code. In bpf__strerror_load(), print kernel version of running kernel and the object's 'version' section to notify user how to fix his/her program. v1 -> v2: Use macro for error code. Fetch error message based on array index, eliminate for-loop. Print version strings. Before: # perf record -e ./test_kversion_nomatch_program.o sleep 1 event syntax error: './test_kversion_nomatch_program.o' \___ Failed to load program: Validate your program and check 'license'/'version' sections in your object SKIP After: # perf record -e ./test_kversion_nomatch_program.o ls event syntax error: './test_kversion_nomatch_program.o' \___ 'version' (4.4.0) doesn't match running kernel (4.3.0) SKIP Signed-off-by: Wang Nan <wangnan0@huawei.com> Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Zefan Li <lizefan@huawei.com> Cc: pi3orama@163.com Link: http://lkml.kernel.org/r/1446818289-87444-1-git-send-email-wangnan0@huawei.com [ Add 'static inline' to bpf__strerror_prepare_load() when LIBBPF is disabled ] Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2015-11-06 20:58:09 +07:00
err = -BPF_LOADER_ERRNO__GROUP;
perf tools: Create probe points for BPF programs This patch introduces bpf__{un,}probe() functions to enable callers to create kprobe points based on section names a BPF program. It parses the section names in the program and creates corresponding 'struct perf_probe_event' structures. The parse_perf_probe_command() function is used to do the main parsing work. The resuling 'struct perf_probe_event' is stored into program private data for further using. By utilizing the new probing API, this patch creates probe points during event parsing. To ensure probe points be removed correctly, register an atexit hook so even perf quit through exit() bpf__clear() is still called, so probing points are cleared. Note that bpf_clear() should be registered before bpf__probe() is called, so failure of bpf__probe() can still trigger bpf__clear() to remove probe points which are already probed. strerror style error reporting scaffold is created by this patch. bpf__strerror_probe() is the first error reporting function in bpf-loader.c. Committer note: Trying it: To build a test eBPF object file: I am testing using a script I built from the 'perf test -v LLVM' output: $ cat ~/bin/hello-ebpf export KERNEL_INC_OPTIONS="-nostdinc -isystem /usr/lib/gcc/x86_64-redhat-linux/4.8.3/include -I/home/acme/git/linux/arch/x86/include -Iarch/x86/include/generated/uapi -Iarch/x86/include/generated -I/home/acme/git/linux/include -Iinclude -I/home/acme/git/linux/arch/x86/include/uapi -Iarch/x86/include/generated/uapi -I/home/acme/git/linux/include/uapi -Iinclude/generated/uapi -include /home/acme/git/linux/include/linux/kconfig.h" export WORKING_DIR=/lib/modules/4.2.0/build export CLANG_SOURCE=- export CLANG_OPTIONS=-xc OBJ=/tmp/foo.o rm -f $OBJ echo '__attribute__((section("fork=do_fork"), used)) int fork(void *ctx) {return 0;} char _license[] __attribute__((section("license"), used)) = "GPL";int _version __attribute__((section("version"), used)) = 0x40100;' | \ clang -D__KERNEL__ $CLANG_OPTIONS $KERNEL_INC_OPTIONS -Wno-unused-value -Wno-pointer-sign -working-directory $WORKING_DIR -c "$CLANG_SOURCE" -target bpf -O2 -o /tmp/foo.o && file $OBJ --- First asking to put a probe in a function not present in the kernel (misses the initial _): $ perf record --event /tmp/foo.o sleep 1 Probe point 'do_fork' not found. event syntax error: '/tmp/foo.o' \___ You need to check probing points in BPF file (add -v to see detail) Run 'perf list' for a list of valid events Usage: perf record [<options>] [<command>] or: perf record [<options>] -- <command> [<options>] -e, --event <event> event selector. use 'perf list' to list available events $ --- Now, with "__attribute__((section("fork=_do_fork"), used)): $ grep _do_fork /proc/kallsyms ffffffff81099ab0 T _do_fork $ perf record --event /tmp/foo.o sleep 1 Failed to open kprobe_events: Permission denied event syntax error: '/tmp/foo.o' \___ Permission denied --- Cool, we need to provide some better hints, "kprobe_events" is too low level, one doesn't strictly need to know the precise details of how these things are put in place, so something that shows the command needed to fix the permissions would be more helpful. Lets try as root instead: # perf record --event /tmp/foo.o sleep 1 Lowering default frequency rate to 1000. Please consider tweaking /proc/sys/kernel/perf_event_max_sample_rate. [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.013 MB perf.data ] # perf evlist /tmp/foo.o [root@felicio ~]# perf evlist -v /tmp/foo.o: type: 1, size: 112, config: 0x9, { sample_period, sample_freq }: 1000, sample_type: IP|TID|TIME|PERIOD, disabled: 1, inherit: 1, mmap: 1, comm: 1, freq: 1, enable_on_exec: 1, task: 1, sample_id_all: 1, exclude_guest: 1, mmap2: 1, comm_exec: 1 --- Signed-off-by: Wang Nan <wangnan0@huawei.com> Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Alexei Starovoitov <ast@plumgrid.com> Cc: Brendan Gregg <brendan.d.gregg@gmail.com> Cc: Daniel Borkmann <daniel@iogearbox.net> Cc: David Ahern <dsahern@gmail.com> Cc: He Kuang <hekuang@huawei.com> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Kaixu Xia <xiakaixu@huawei.com> Cc: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Zefan Li <lizefan@huawei.com> Cc: pi3orama@163.com Link: http://lkml.kernel.org/r/1444826502-49291-5-git-send-email-wangnan0@huawei.com Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2015-10-14 19:41:15 +07:00
goto errout;
} else if (!pev->group)
pev->group = strdup(PERF_BPF_PROBE_GROUP);
if (!pev->group) {
pr_debug("bpf: strdup failed\n");
err = -ENOMEM;
goto errout;
}
if (!pev->event) {
perf bpf: Improve BPF related error messages A series of bpf loader related error codes were introduced to help error reporting. Functions were improved to return these new error codes. Functions which return pointers were adjusted to encode error codes into return value using the ERR_PTR() interface. bpf_loader_strerror() was improved to convert these error messages to strings. It checks the error codes and calls libbpf_strerror() and strerror_r() accordingly, so caller don't need to consider checking the range of the error code. In bpf__strerror_load(), print kernel version of running kernel and the object's 'version' section to notify user how to fix his/her program. v1 -> v2: Use macro for error code. Fetch error message based on array index, eliminate for-loop. Print version strings. Before: # perf record -e ./test_kversion_nomatch_program.o sleep 1 event syntax error: './test_kversion_nomatch_program.o' \___ Failed to load program: Validate your program and check 'license'/'version' sections in your object SKIP After: # perf record -e ./test_kversion_nomatch_program.o ls event syntax error: './test_kversion_nomatch_program.o' \___ 'version' (4.4.0) doesn't match running kernel (4.3.0) SKIP Signed-off-by: Wang Nan <wangnan0@huawei.com> Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Zefan Li <lizefan@huawei.com> Cc: pi3orama@163.com Link: http://lkml.kernel.org/r/1446818289-87444-1-git-send-email-wangnan0@huawei.com [ Add 'static inline' to bpf__strerror_prepare_load() when LIBBPF is disabled ] Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2015-11-06 20:58:09 +07:00
pr_debug("bpf: '%s': event name is missing. Section name should be 'key=value'\n",
perf tools: Create probe points for BPF programs This patch introduces bpf__{un,}probe() functions to enable callers to create kprobe points based on section names a BPF program. It parses the section names in the program and creates corresponding 'struct perf_probe_event' structures. The parse_perf_probe_command() function is used to do the main parsing work. The resuling 'struct perf_probe_event' is stored into program private data for further using. By utilizing the new probing API, this patch creates probe points during event parsing. To ensure probe points be removed correctly, register an atexit hook so even perf quit through exit() bpf__clear() is still called, so probing points are cleared. Note that bpf_clear() should be registered before bpf__probe() is called, so failure of bpf__probe() can still trigger bpf__clear() to remove probe points which are already probed. strerror style error reporting scaffold is created by this patch. bpf__strerror_probe() is the first error reporting function in bpf-loader.c. Committer note: Trying it: To build a test eBPF object file: I am testing using a script I built from the 'perf test -v LLVM' output: $ cat ~/bin/hello-ebpf export KERNEL_INC_OPTIONS="-nostdinc -isystem /usr/lib/gcc/x86_64-redhat-linux/4.8.3/include -I/home/acme/git/linux/arch/x86/include -Iarch/x86/include/generated/uapi -Iarch/x86/include/generated -I/home/acme/git/linux/include -Iinclude -I/home/acme/git/linux/arch/x86/include/uapi -Iarch/x86/include/generated/uapi -I/home/acme/git/linux/include/uapi -Iinclude/generated/uapi -include /home/acme/git/linux/include/linux/kconfig.h" export WORKING_DIR=/lib/modules/4.2.0/build export CLANG_SOURCE=- export CLANG_OPTIONS=-xc OBJ=/tmp/foo.o rm -f $OBJ echo '__attribute__((section("fork=do_fork"), used)) int fork(void *ctx) {return 0;} char _license[] __attribute__((section("license"), used)) = "GPL";int _version __attribute__((section("version"), used)) = 0x40100;' | \ clang -D__KERNEL__ $CLANG_OPTIONS $KERNEL_INC_OPTIONS -Wno-unused-value -Wno-pointer-sign -working-directory $WORKING_DIR -c "$CLANG_SOURCE" -target bpf -O2 -o /tmp/foo.o && file $OBJ --- First asking to put a probe in a function not present in the kernel (misses the initial _): $ perf record --event /tmp/foo.o sleep 1 Probe point 'do_fork' not found. event syntax error: '/tmp/foo.o' \___ You need to check probing points in BPF file (add -v to see detail) Run 'perf list' for a list of valid events Usage: perf record [<options>] [<command>] or: perf record [<options>] -- <command> [<options>] -e, --event <event> event selector. use 'perf list' to list available events $ --- Now, with "__attribute__((section("fork=_do_fork"), used)): $ grep _do_fork /proc/kallsyms ffffffff81099ab0 T _do_fork $ perf record --event /tmp/foo.o sleep 1 Failed to open kprobe_events: Permission denied event syntax error: '/tmp/foo.o' \___ Permission denied --- Cool, we need to provide some better hints, "kprobe_events" is too low level, one doesn't strictly need to know the precise details of how these things are put in place, so something that shows the command needed to fix the permissions would be more helpful. Lets try as root instead: # perf record --event /tmp/foo.o sleep 1 Lowering default frequency rate to 1000. Please consider tweaking /proc/sys/kernel/perf_event_max_sample_rate. [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.013 MB perf.data ] # perf evlist /tmp/foo.o [root@felicio ~]# perf evlist -v /tmp/foo.o: type: 1, size: 112, config: 0x9, { sample_period, sample_freq }: 1000, sample_type: IP|TID|TIME|PERIOD, disabled: 1, inherit: 1, mmap: 1, comm: 1, freq: 1, enable_on_exec: 1, task: 1, sample_id_all: 1, exclude_guest: 1, mmap2: 1, comm_exec: 1 --- Signed-off-by: Wang Nan <wangnan0@huawei.com> Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Alexei Starovoitov <ast@plumgrid.com> Cc: Brendan Gregg <brendan.d.gregg@gmail.com> Cc: Daniel Borkmann <daniel@iogearbox.net> Cc: David Ahern <dsahern@gmail.com> Cc: He Kuang <hekuang@huawei.com> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Kaixu Xia <xiakaixu@huawei.com> Cc: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Zefan Li <lizefan@huawei.com> Cc: pi3orama@163.com Link: http://lkml.kernel.org/r/1444826502-49291-5-git-send-email-wangnan0@huawei.com Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2015-10-14 19:41:15 +07:00
config_str);
perf bpf: Improve BPF related error messages A series of bpf loader related error codes were introduced to help error reporting. Functions were improved to return these new error codes. Functions which return pointers were adjusted to encode error codes into return value using the ERR_PTR() interface. bpf_loader_strerror() was improved to convert these error messages to strings. It checks the error codes and calls libbpf_strerror() and strerror_r() accordingly, so caller don't need to consider checking the range of the error code. In bpf__strerror_load(), print kernel version of running kernel and the object's 'version' section to notify user how to fix his/her program. v1 -> v2: Use macro for error code. Fetch error message based on array index, eliminate for-loop. Print version strings. Before: # perf record -e ./test_kversion_nomatch_program.o sleep 1 event syntax error: './test_kversion_nomatch_program.o' \___ Failed to load program: Validate your program and check 'license'/'version' sections in your object SKIP After: # perf record -e ./test_kversion_nomatch_program.o ls event syntax error: './test_kversion_nomatch_program.o' \___ 'version' (4.4.0) doesn't match running kernel (4.3.0) SKIP Signed-off-by: Wang Nan <wangnan0@huawei.com> Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Zefan Li <lizefan@huawei.com> Cc: pi3orama@163.com Link: http://lkml.kernel.org/r/1446818289-87444-1-git-send-email-wangnan0@huawei.com [ Add 'static inline' to bpf__strerror_prepare_load() when LIBBPF is disabled ] Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2015-11-06 20:58:09 +07:00
err = -BPF_LOADER_ERRNO__EVENTNAME;
perf tools: Create probe points for BPF programs This patch introduces bpf__{un,}probe() functions to enable callers to create kprobe points based on section names a BPF program. It parses the section names in the program and creates corresponding 'struct perf_probe_event' structures. The parse_perf_probe_command() function is used to do the main parsing work. The resuling 'struct perf_probe_event' is stored into program private data for further using. By utilizing the new probing API, this patch creates probe points during event parsing. To ensure probe points be removed correctly, register an atexit hook so even perf quit through exit() bpf__clear() is still called, so probing points are cleared. Note that bpf_clear() should be registered before bpf__probe() is called, so failure of bpf__probe() can still trigger bpf__clear() to remove probe points which are already probed. strerror style error reporting scaffold is created by this patch. bpf__strerror_probe() is the first error reporting function in bpf-loader.c. Committer note: Trying it: To build a test eBPF object file: I am testing using a script I built from the 'perf test -v LLVM' output: $ cat ~/bin/hello-ebpf export KERNEL_INC_OPTIONS="-nostdinc -isystem /usr/lib/gcc/x86_64-redhat-linux/4.8.3/include -I/home/acme/git/linux/arch/x86/include -Iarch/x86/include/generated/uapi -Iarch/x86/include/generated -I/home/acme/git/linux/include -Iinclude -I/home/acme/git/linux/arch/x86/include/uapi -Iarch/x86/include/generated/uapi -I/home/acme/git/linux/include/uapi -Iinclude/generated/uapi -include /home/acme/git/linux/include/linux/kconfig.h" export WORKING_DIR=/lib/modules/4.2.0/build export CLANG_SOURCE=- export CLANG_OPTIONS=-xc OBJ=/tmp/foo.o rm -f $OBJ echo '__attribute__((section("fork=do_fork"), used)) int fork(void *ctx) {return 0;} char _license[] __attribute__((section("license"), used)) = "GPL";int _version __attribute__((section("version"), used)) = 0x40100;' | \ clang -D__KERNEL__ $CLANG_OPTIONS $KERNEL_INC_OPTIONS -Wno-unused-value -Wno-pointer-sign -working-directory $WORKING_DIR -c "$CLANG_SOURCE" -target bpf -O2 -o /tmp/foo.o && file $OBJ --- First asking to put a probe in a function not present in the kernel (misses the initial _): $ perf record --event /tmp/foo.o sleep 1 Probe point 'do_fork' not found. event syntax error: '/tmp/foo.o' \___ You need to check probing points in BPF file (add -v to see detail) Run 'perf list' for a list of valid events Usage: perf record [<options>] [<command>] or: perf record [<options>] -- <command> [<options>] -e, --event <event> event selector. use 'perf list' to list available events $ --- Now, with "__attribute__((section("fork=_do_fork"), used)): $ grep _do_fork /proc/kallsyms ffffffff81099ab0 T _do_fork $ perf record --event /tmp/foo.o sleep 1 Failed to open kprobe_events: Permission denied event syntax error: '/tmp/foo.o' \___ Permission denied --- Cool, we need to provide some better hints, "kprobe_events" is too low level, one doesn't strictly need to know the precise details of how these things are put in place, so something that shows the command needed to fix the permissions would be more helpful. Lets try as root instead: # perf record --event /tmp/foo.o sleep 1 Lowering default frequency rate to 1000. Please consider tweaking /proc/sys/kernel/perf_event_max_sample_rate. [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.013 MB perf.data ] # perf evlist /tmp/foo.o [root@felicio ~]# perf evlist -v /tmp/foo.o: type: 1, size: 112, config: 0x9, { sample_period, sample_freq }: 1000, sample_type: IP|TID|TIME|PERIOD, disabled: 1, inherit: 1, mmap: 1, comm: 1, freq: 1, enable_on_exec: 1, task: 1, sample_id_all: 1, exclude_guest: 1, mmap2: 1, comm_exec: 1 --- Signed-off-by: Wang Nan <wangnan0@huawei.com> Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Alexei Starovoitov <ast@plumgrid.com> Cc: Brendan Gregg <brendan.d.gregg@gmail.com> Cc: Daniel Borkmann <daniel@iogearbox.net> Cc: David Ahern <dsahern@gmail.com> Cc: He Kuang <hekuang@huawei.com> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Kaixu Xia <xiakaixu@huawei.com> Cc: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Zefan Li <lizefan@huawei.com> Cc: pi3orama@163.com Link: http://lkml.kernel.org/r/1444826502-49291-5-git-send-email-wangnan0@huawei.com Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2015-10-14 19:41:15 +07:00
goto errout;
}
pr_debug("bpf: config '%s' is ok\n", config_str);
err = bpf_program__set_private(prog, priv, clear_prog_priv);
perf tools: Create probe points for BPF programs This patch introduces bpf__{un,}probe() functions to enable callers to create kprobe points based on section names a BPF program. It parses the section names in the program and creates corresponding 'struct perf_probe_event' structures. The parse_perf_probe_command() function is used to do the main parsing work. The resuling 'struct perf_probe_event' is stored into program private data for further using. By utilizing the new probing API, this patch creates probe points during event parsing. To ensure probe points be removed correctly, register an atexit hook so even perf quit through exit() bpf__clear() is still called, so probing points are cleared. Note that bpf_clear() should be registered before bpf__probe() is called, so failure of bpf__probe() can still trigger bpf__clear() to remove probe points which are already probed. strerror style error reporting scaffold is created by this patch. bpf__strerror_probe() is the first error reporting function in bpf-loader.c. Committer note: Trying it: To build a test eBPF object file: I am testing using a script I built from the 'perf test -v LLVM' output: $ cat ~/bin/hello-ebpf export KERNEL_INC_OPTIONS="-nostdinc -isystem /usr/lib/gcc/x86_64-redhat-linux/4.8.3/include -I/home/acme/git/linux/arch/x86/include -Iarch/x86/include/generated/uapi -Iarch/x86/include/generated -I/home/acme/git/linux/include -Iinclude -I/home/acme/git/linux/arch/x86/include/uapi -Iarch/x86/include/generated/uapi -I/home/acme/git/linux/include/uapi -Iinclude/generated/uapi -include /home/acme/git/linux/include/linux/kconfig.h" export WORKING_DIR=/lib/modules/4.2.0/build export CLANG_SOURCE=- export CLANG_OPTIONS=-xc OBJ=/tmp/foo.o rm -f $OBJ echo '__attribute__((section("fork=do_fork"), used)) int fork(void *ctx) {return 0;} char _license[] __attribute__((section("license"), used)) = "GPL";int _version __attribute__((section("version"), used)) = 0x40100;' | \ clang -D__KERNEL__ $CLANG_OPTIONS $KERNEL_INC_OPTIONS -Wno-unused-value -Wno-pointer-sign -working-directory $WORKING_DIR -c "$CLANG_SOURCE" -target bpf -O2 -o /tmp/foo.o && file $OBJ --- First asking to put a probe in a function not present in the kernel (misses the initial _): $ perf record --event /tmp/foo.o sleep 1 Probe point 'do_fork' not found. event syntax error: '/tmp/foo.o' \___ You need to check probing points in BPF file (add -v to see detail) Run 'perf list' for a list of valid events Usage: perf record [<options>] [<command>] or: perf record [<options>] -- <command> [<options>] -e, --event <event> event selector. use 'perf list' to list available events $ --- Now, with "__attribute__((section("fork=_do_fork"), used)): $ grep _do_fork /proc/kallsyms ffffffff81099ab0 T _do_fork $ perf record --event /tmp/foo.o sleep 1 Failed to open kprobe_events: Permission denied event syntax error: '/tmp/foo.o' \___ Permission denied --- Cool, we need to provide some better hints, "kprobe_events" is too low level, one doesn't strictly need to know the precise details of how these things are put in place, so something that shows the command needed to fix the permissions would be more helpful. Lets try as root instead: # perf record --event /tmp/foo.o sleep 1 Lowering default frequency rate to 1000. Please consider tweaking /proc/sys/kernel/perf_event_max_sample_rate. [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.013 MB perf.data ] # perf evlist /tmp/foo.o [root@felicio ~]# perf evlist -v /tmp/foo.o: type: 1, size: 112, config: 0x9, { sample_period, sample_freq }: 1000, sample_type: IP|TID|TIME|PERIOD, disabled: 1, inherit: 1, mmap: 1, comm: 1, freq: 1, enable_on_exec: 1, task: 1, sample_id_all: 1, exclude_guest: 1, mmap2: 1, comm_exec: 1 --- Signed-off-by: Wang Nan <wangnan0@huawei.com> Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Alexei Starovoitov <ast@plumgrid.com> Cc: Brendan Gregg <brendan.d.gregg@gmail.com> Cc: Daniel Borkmann <daniel@iogearbox.net> Cc: David Ahern <dsahern@gmail.com> Cc: He Kuang <hekuang@huawei.com> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Kaixu Xia <xiakaixu@huawei.com> Cc: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Zefan Li <lizefan@huawei.com> Cc: pi3orama@163.com Link: http://lkml.kernel.org/r/1444826502-49291-5-git-send-email-wangnan0@huawei.com Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2015-10-14 19:41:15 +07:00
if (err) {
pr_debug("Failed to set priv for program '%s'\n", config_str);
goto errout;
}
return 0;
errout:
if (pev)
clear_perf_probe_event(pev);
free(priv);
return err;
}
static int bpf__prepare_probe(void)
{
static int err = 0;
static bool initialized = false;
/*
* Make err static, so if init failed the first, bpf__prepare_probe()
* fails each time without calling init_probe_symbol_maps multiple
* times.
*/
if (initialized)
return err;
initialized = true;
err = init_probe_symbol_maps(false);
if (err < 0)
pr_debug("Failed to init_probe_symbol_maps\n");
probe_conf.max_probes = MAX_PROBES;
return err;
}
static int
preproc_gen_prologue(struct bpf_program *prog, int n,
struct bpf_insn *orig_insns, int orig_insns_cnt,
struct bpf_prog_prep_result *res)
{
struct probe_trace_event *tev;
struct perf_probe_event *pev;
struct bpf_prog_priv *priv;
struct bpf_insn *buf;
size_t prologue_cnt = 0;
perf bpf: Use same BPF program if arguments are identical This patch allows creating only one BPF program for different 'probe_trace_event'(tev) entries generated by one 'perf_probe_event'(pev) if their prologues are identical. This is done by comparing the argument list of different tev instances, and the maps type of prologue and tev using a mapping array. This patch utilizes qsort to sort the tevs. After sorting, tevs with identical argument lists will be grouped together. Test result: Sample BPF program: #define SEC(NAME) __attribute__((section(NAME), used)) SEC("inlines=no;" "func=SyS_dup? oldfd") int func(void *ctx) { return 1; } It would probe at SyS_dup2 and SyS_dup3, obtaining oldfd as its argument. The following cmdline shows a BPF program being loaded into the kernel by perf: # perf record -e ./test_bpf_arg.c sleep 4 & sleep 1 && ls /proc/$!/fd/ -l | grep bpf-prog Before this patch: # perf record -e ./test_bpf_arg.c sleep 4 & sleep 1 && ls /proc/$!/fd/ -l | grep bpf-prog [1] 24858 lrwx------ 1 root root 64 Nov 14 04:09 3 -> anon_inode:bpf-prog lrwx------ 1 root root 64 Nov 14 04:09 4 -> anon_inode:bpf-prog ... After this patch: # perf record -e ./test_bpf_arg.c sleep 4 & sleep 1 && ls /proc/$!/fd/ -l | grep bpf-prog [1] 25699 lrwx------ 1 root root 64 Nov 14 04:10 3 -> anon_inode:bpf-prog ... Signed-off-by: Wang Nan <wangnan0@huawei.com> Cc: Alexei Starovoitov <ast@kernel.org> Cc: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com> Cc: Zefan Li <lizefan@huawei.com> Cc: pi3orama@163.com Link: http://lkml.kernel.org/r/1447749170-175898-3-git-send-email-wangnan0@huawei.com Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2015-11-17 15:32:47 +07:00
int i, err;
err = bpf_program__get_private(prog, (void **)&priv);
if (err || !priv)
goto errout;
pev = &priv->pev;
perf bpf: Use same BPF program if arguments are identical This patch allows creating only one BPF program for different 'probe_trace_event'(tev) entries generated by one 'perf_probe_event'(pev) if their prologues are identical. This is done by comparing the argument list of different tev instances, and the maps type of prologue and tev using a mapping array. This patch utilizes qsort to sort the tevs. After sorting, tevs with identical argument lists will be grouped together. Test result: Sample BPF program: #define SEC(NAME) __attribute__((section(NAME), used)) SEC("inlines=no;" "func=SyS_dup? oldfd") int func(void *ctx) { return 1; } It would probe at SyS_dup2 and SyS_dup3, obtaining oldfd as its argument. The following cmdline shows a BPF program being loaded into the kernel by perf: # perf record -e ./test_bpf_arg.c sleep 4 & sleep 1 && ls /proc/$!/fd/ -l | grep bpf-prog Before this patch: # perf record -e ./test_bpf_arg.c sleep 4 & sleep 1 && ls /proc/$!/fd/ -l | grep bpf-prog [1] 24858 lrwx------ 1 root root 64 Nov 14 04:09 3 -> anon_inode:bpf-prog lrwx------ 1 root root 64 Nov 14 04:09 4 -> anon_inode:bpf-prog ... After this patch: # perf record -e ./test_bpf_arg.c sleep 4 & sleep 1 && ls /proc/$!/fd/ -l | grep bpf-prog [1] 25699 lrwx------ 1 root root 64 Nov 14 04:10 3 -> anon_inode:bpf-prog ... Signed-off-by: Wang Nan <wangnan0@huawei.com> Cc: Alexei Starovoitov <ast@kernel.org> Cc: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com> Cc: Zefan Li <lizefan@huawei.com> Cc: pi3orama@163.com Link: http://lkml.kernel.org/r/1447749170-175898-3-git-send-email-wangnan0@huawei.com Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2015-11-17 15:32:47 +07:00
if (n < 0 || n >= priv->nr_types)
goto errout;
perf bpf: Use same BPF program if arguments are identical This patch allows creating only one BPF program for different 'probe_trace_event'(tev) entries generated by one 'perf_probe_event'(pev) if their prologues are identical. This is done by comparing the argument list of different tev instances, and the maps type of prologue and tev using a mapping array. This patch utilizes qsort to sort the tevs. After sorting, tevs with identical argument lists will be grouped together. Test result: Sample BPF program: #define SEC(NAME) __attribute__((section(NAME), used)) SEC("inlines=no;" "func=SyS_dup? oldfd") int func(void *ctx) { return 1; } It would probe at SyS_dup2 and SyS_dup3, obtaining oldfd as its argument. The following cmdline shows a BPF program being loaded into the kernel by perf: # perf record -e ./test_bpf_arg.c sleep 4 & sleep 1 && ls /proc/$!/fd/ -l | grep bpf-prog Before this patch: # perf record -e ./test_bpf_arg.c sleep 4 & sleep 1 && ls /proc/$!/fd/ -l | grep bpf-prog [1] 24858 lrwx------ 1 root root 64 Nov 14 04:09 3 -> anon_inode:bpf-prog lrwx------ 1 root root 64 Nov 14 04:09 4 -> anon_inode:bpf-prog ... After this patch: # perf record -e ./test_bpf_arg.c sleep 4 & sleep 1 && ls /proc/$!/fd/ -l | grep bpf-prog [1] 25699 lrwx------ 1 root root 64 Nov 14 04:10 3 -> anon_inode:bpf-prog ... Signed-off-by: Wang Nan <wangnan0@huawei.com> Cc: Alexei Starovoitov <ast@kernel.org> Cc: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com> Cc: Zefan Li <lizefan@huawei.com> Cc: pi3orama@163.com Link: http://lkml.kernel.org/r/1447749170-175898-3-git-send-email-wangnan0@huawei.com Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2015-11-17 15:32:47 +07:00
/* Find a tev belongs to that type */
for (i = 0; i < pev->ntevs; i++) {
if (priv->type_mapping[i] == n)
break;
}
if (i >= pev->ntevs) {
pr_debug("Internal error: prologue type %d not found\n", n);
return -BPF_LOADER_ERRNO__PROLOGUE;
}
tev = &pev->tevs[i];
buf = priv->insns_buf;
err = bpf__gen_prologue(tev->args, tev->nargs,
buf, &prologue_cnt,
BPF_MAXINSNS - orig_insns_cnt);
if (err) {
const char *title;
title = bpf_program__title(prog, false);
if (!title)
title = "[unknown]";
pr_debug("Failed to generate prologue for program %s\n",
title);
return err;
}
memcpy(&buf[prologue_cnt], orig_insns,
sizeof(struct bpf_insn) * orig_insns_cnt);
res->new_insn_ptr = buf;
res->new_insn_cnt = prologue_cnt + orig_insns_cnt;
res->pfd = NULL;
return 0;
errout:
pr_debug("Internal error in preproc_gen_prologue\n");
return -BPF_LOADER_ERRNO__PROLOGUE;
}
perf bpf: Use same BPF program if arguments are identical This patch allows creating only one BPF program for different 'probe_trace_event'(tev) entries generated by one 'perf_probe_event'(pev) if their prologues are identical. This is done by comparing the argument list of different tev instances, and the maps type of prologue and tev using a mapping array. This patch utilizes qsort to sort the tevs. After sorting, tevs with identical argument lists will be grouped together. Test result: Sample BPF program: #define SEC(NAME) __attribute__((section(NAME), used)) SEC("inlines=no;" "func=SyS_dup? oldfd") int func(void *ctx) { return 1; } It would probe at SyS_dup2 and SyS_dup3, obtaining oldfd as its argument. The following cmdline shows a BPF program being loaded into the kernel by perf: # perf record -e ./test_bpf_arg.c sleep 4 & sleep 1 && ls /proc/$!/fd/ -l | grep bpf-prog Before this patch: # perf record -e ./test_bpf_arg.c sleep 4 & sleep 1 && ls /proc/$!/fd/ -l | grep bpf-prog [1] 24858 lrwx------ 1 root root 64 Nov 14 04:09 3 -> anon_inode:bpf-prog lrwx------ 1 root root 64 Nov 14 04:09 4 -> anon_inode:bpf-prog ... After this patch: # perf record -e ./test_bpf_arg.c sleep 4 & sleep 1 && ls /proc/$!/fd/ -l | grep bpf-prog [1] 25699 lrwx------ 1 root root 64 Nov 14 04:10 3 -> anon_inode:bpf-prog ... Signed-off-by: Wang Nan <wangnan0@huawei.com> Cc: Alexei Starovoitov <ast@kernel.org> Cc: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com> Cc: Zefan Li <lizefan@huawei.com> Cc: pi3orama@163.com Link: http://lkml.kernel.org/r/1447749170-175898-3-git-send-email-wangnan0@huawei.com Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2015-11-17 15:32:47 +07:00
/*
* compare_tev_args is reflexive, transitive and antisymmetric.
* I can proof it but this margin is too narrow to contain.
*/
static int compare_tev_args(const void *ptev1, const void *ptev2)
{
int i, ret;
const struct probe_trace_event *tev1 =
*(const struct probe_trace_event **)ptev1;
const struct probe_trace_event *tev2 =
*(const struct probe_trace_event **)ptev2;
ret = tev2->nargs - tev1->nargs;
if (ret)
return ret;
for (i = 0; i < tev1->nargs; i++) {
struct probe_trace_arg *arg1, *arg2;
struct probe_trace_arg_ref *ref1, *ref2;
arg1 = &tev1->args[i];
arg2 = &tev2->args[i];
ret = strcmp(arg1->value, arg2->value);
if (ret)
return ret;
ref1 = arg1->ref;
ref2 = arg2->ref;
while (ref1 && ref2) {
ret = ref2->offset - ref1->offset;
if (ret)
return ret;
ref1 = ref1->next;
ref2 = ref2->next;
}
if (ref1 || ref2)
return ref2 ? 1 : -1;
}
return 0;
}
/*
* Assign a type number to each tevs in a pev.
* mapping is an array with same slots as tevs in that pev.
* nr_types will be set to number of types.
*/
static int map_prologue(struct perf_probe_event *pev, int *mapping,
int *nr_types)
{
int i, type = 0;
struct probe_trace_event **ptevs;
size_t array_sz = sizeof(*ptevs) * pev->ntevs;
ptevs = malloc(array_sz);
if (!ptevs) {
pr_debug("No ehough memory: alloc ptevs failed\n");
return -ENOMEM;
}
pr_debug("In map_prologue, ntevs=%d\n", pev->ntevs);
for (i = 0; i < pev->ntevs; i++)
ptevs[i] = &pev->tevs[i];
qsort(ptevs, pev->ntevs, sizeof(*ptevs),
compare_tev_args);
for (i = 0; i < pev->ntevs; i++) {
int n;
n = ptevs[i] - pev->tevs;
if (i == 0) {
mapping[n] = type;
pr_debug("mapping[%d]=%d\n", n, type);
continue;
}
if (compare_tev_args(ptevs + i, ptevs + i - 1) == 0)
mapping[n] = type;
else
mapping[n] = ++type;
pr_debug("mapping[%d]=%d\n", n, mapping[n]);
}
free(ptevs);
*nr_types = type + 1;
return 0;
}
static int hook_load_preprocessor(struct bpf_program *prog)
{
struct perf_probe_event *pev;
struct bpf_prog_priv *priv;
bool need_prologue = false;
int err, i;
err = bpf_program__get_private(prog, (void **)&priv);
if (err || !priv) {
pr_debug("Internal error when hook preprocessor\n");
return -BPF_LOADER_ERRNO__INTERNAL;
}
pev = &priv->pev;
for (i = 0; i < pev->ntevs; i++) {
struct probe_trace_event *tev = &pev->tevs[i];
if (tev->nargs > 0) {
need_prologue = true;
break;
}
}
/*
* Since all tevs don't have argument, we don't need generate
* prologue.
*/
if (!need_prologue) {
priv->need_prologue = false;
return 0;
}
priv->need_prologue = true;
priv->insns_buf = malloc(sizeof(struct bpf_insn) * BPF_MAXINSNS);
if (!priv->insns_buf) {
pr_debug("No enough memory: alloc insns_buf failed\n");
return -ENOMEM;
}
perf bpf: Use same BPF program if arguments are identical This patch allows creating only one BPF program for different 'probe_trace_event'(tev) entries generated by one 'perf_probe_event'(pev) if their prologues are identical. This is done by comparing the argument list of different tev instances, and the maps type of prologue and tev using a mapping array. This patch utilizes qsort to sort the tevs. After sorting, tevs with identical argument lists will be grouped together. Test result: Sample BPF program: #define SEC(NAME) __attribute__((section(NAME), used)) SEC("inlines=no;" "func=SyS_dup? oldfd") int func(void *ctx) { return 1; } It would probe at SyS_dup2 and SyS_dup3, obtaining oldfd as its argument. The following cmdline shows a BPF program being loaded into the kernel by perf: # perf record -e ./test_bpf_arg.c sleep 4 & sleep 1 && ls /proc/$!/fd/ -l | grep bpf-prog Before this patch: # perf record -e ./test_bpf_arg.c sleep 4 & sleep 1 && ls /proc/$!/fd/ -l | grep bpf-prog [1] 24858 lrwx------ 1 root root 64 Nov 14 04:09 3 -> anon_inode:bpf-prog lrwx------ 1 root root 64 Nov 14 04:09 4 -> anon_inode:bpf-prog ... After this patch: # perf record -e ./test_bpf_arg.c sleep 4 & sleep 1 && ls /proc/$!/fd/ -l | grep bpf-prog [1] 25699 lrwx------ 1 root root 64 Nov 14 04:10 3 -> anon_inode:bpf-prog ... Signed-off-by: Wang Nan <wangnan0@huawei.com> Cc: Alexei Starovoitov <ast@kernel.org> Cc: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com> Cc: Zefan Li <lizefan@huawei.com> Cc: pi3orama@163.com Link: http://lkml.kernel.org/r/1447749170-175898-3-git-send-email-wangnan0@huawei.com Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2015-11-17 15:32:47 +07:00
priv->type_mapping = malloc(sizeof(int) * pev->ntevs);
if (!priv->type_mapping) {
pr_debug("No enough memory: alloc type_mapping failed\n");
return -ENOMEM;
}
memset(priv->type_mapping, -1,
sizeof(int) * pev->ntevs);
err = map_prologue(pev, priv->type_mapping, &priv->nr_types);
if (err)
return err;
err = bpf_program__set_prep(prog, priv->nr_types,
preproc_gen_prologue);
return err;
}
perf tools: Create probe points for BPF programs This patch introduces bpf__{un,}probe() functions to enable callers to create kprobe points based on section names a BPF program. It parses the section names in the program and creates corresponding 'struct perf_probe_event' structures. The parse_perf_probe_command() function is used to do the main parsing work. The resuling 'struct perf_probe_event' is stored into program private data for further using. By utilizing the new probing API, this patch creates probe points during event parsing. To ensure probe points be removed correctly, register an atexit hook so even perf quit through exit() bpf__clear() is still called, so probing points are cleared. Note that bpf_clear() should be registered before bpf__probe() is called, so failure of bpf__probe() can still trigger bpf__clear() to remove probe points which are already probed. strerror style error reporting scaffold is created by this patch. bpf__strerror_probe() is the first error reporting function in bpf-loader.c. Committer note: Trying it: To build a test eBPF object file: I am testing using a script I built from the 'perf test -v LLVM' output: $ cat ~/bin/hello-ebpf export KERNEL_INC_OPTIONS="-nostdinc -isystem /usr/lib/gcc/x86_64-redhat-linux/4.8.3/include -I/home/acme/git/linux/arch/x86/include -Iarch/x86/include/generated/uapi -Iarch/x86/include/generated -I/home/acme/git/linux/include -Iinclude -I/home/acme/git/linux/arch/x86/include/uapi -Iarch/x86/include/generated/uapi -I/home/acme/git/linux/include/uapi -Iinclude/generated/uapi -include /home/acme/git/linux/include/linux/kconfig.h" export WORKING_DIR=/lib/modules/4.2.0/build export CLANG_SOURCE=- export CLANG_OPTIONS=-xc OBJ=/tmp/foo.o rm -f $OBJ echo '__attribute__((section("fork=do_fork"), used)) int fork(void *ctx) {return 0;} char _license[] __attribute__((section("license"), used)) = "GPL";int _version __attribute__((section("version"), used)) = 0x40100;' | \ clang -D__KERNEL__ $CLANG_OPTIONS $KERNEL_INC_OPTIONS -Wno-unused-value -Wno-pointer-sign -working-directory $WORKING_DIR -c "$CLANG_SOURCE" -target bpf -O2 -o /tmp/foo.o && file $OBJ --- First asking to put a probe in a function not present in the kernel (misses the initial _): $ perf record --event /tmp/foo.o sleep 1 Probe point 'do_fork' not found. event syntax error: '/tmp/foo.o' \___ You need to check probing points in BPF file (add -v to see detail) Run 'perf list' for a list of valid events Usage: perf record [<options>] [<command>] or: perf record [<options>] -- <command> [<options>] -e, --event <event> event selector. use 'perf list' to list available events $ --- Now, with "__attribute__((section("fork=_do_fork"), used)): $ grep _do_fork /proc/kallsyms ffffffff81099ab0 T _do_fork $ perf record --event /tmp/foo.o sleep 1 Failed to open kprobe_events: Permission denied event syntax error: '/tmp/foo.o' \___ Permission denied --- Cool, we need to provide some better hints, "kprobe_events" is too low level, one doesn't strictly need to know the precise details of how these things are put in place, so something that shows the command needed to fix the permissions would be more helpful. Lets try as root instead: # perf record --event /tmp/foo.o sleep 1 Lowering default frequency rate to 1000. Please consider tweaking /proc/sys/kernel/perf_event_max_sample_rate. [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.013 MB perf.data ] # perf evlist /tmp/foo.o [root@felicio ~]# perf evlist -v /tmp/foo.o: type: 1, size: 112, config: 0x9, { sample_period, sample_freq }: 1000, sample_type: IP|TID|TIME|PERIOD, disabled: 1, inherit: 1, mmap: 1, comm: 1, freq: 1, enable_on_exec: 1, task: 1, sample_id_all: 1, exclude_guest: 1, mmap2: 1, comm_exec: 1 --- Signed-off-by: Wang Nan <wangnan0@huawei.com> Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Alexei Starovoitov <ast@plumgrid.com> Cc: Brendan Gregg <brendan.d.gregg@gmail.com> Cc: Daniel Borkmann <daniel@iogearbox.net> Cc: David Ahern <dsahern@gmail.com> Cc: He Kuang <hekuang@huawei.com> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Kaixu Xia <xiakaixu@huawei.com> Cc: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Zefan Li <lizefan@huawei.com> Cc: pi3orama@163.com Link: http://lkml.kernel.org/r/1444826502-49291-5-git-send-email-wangnan0@huawei.com Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2015-10-14 19:41:15 +07:00
int bpf__probe(struct bpf_object *obj)
{
int err = 0;
struct bpf_program *prog;
struct bpf_prog_priv *priv;
struct perf_probe_event *pev;
err = bpf__prepare_probe();
if (err) {
pr_debug("bpf__prepare_probe failed\n");
return err;
}
bpf_object__for_each_program(prog, obj) {
err = config_bpf_program(prog);
if (err)
goto out;
err = bpf_program__get_private(prog, (void **)&priv);
if (err || !priv)
goto out;
pev = &priv->pev;
err = convert_perf_probe_events(pev, 1);
if (err < 0) {
pr_debug("bpf_probe: failed to convert perf probe events");
goto out;
}
err = apply_perf_probe_events(pev, 1);
if (err < 0) {
pr_debug("bpf_probe: failed to apply perf probe events");
goto out;
}
/*
* After probing, let's consider prologue, which
* adds program fetcher to BPF programs.
*
* hook_load_preprocessorr() hooks pre-processor
* to bpf_program, let it generate prologue
* dynamically during loading.
*/
err = hook_load_preprocessor(prog);
if (err)
goto out;
perf tools: Create probe points for BPF programs This patch introduces bpf__{un,}probe() functions to enable callers to create kprobe points based on section names a BPF program. It parses the section names in the program and creates corresponding 'struct perf_probe_event' structures. The parse_perf_probe_command() function is used to do the main parsing work. The resuling 'struct perf_probe_event' is stored into program private data for further using. By utilizing the new probing API, this patch creates probe points during event parsing. To ensure probe points be removed correctly, register an atexit hook so even perf quit through exit() bpf__clear() is still called, so probing points are cleared. Note that bpf_clear() should be registered before bpf__probe() is called, so failure of bpf__probe() can still trigger bpf__clear() to remove probe points which are already probed. strerror style error reporting scaffold is created by this patch. bpf__strerror_probe() is the first error reporting function in bpf-loader.c. Committer note: Trying it: To build a test eBPF object file: I am testing using a script I built from the 'perf test -v LLVM' output: $ cat ~/bin/hello-ebpf export KERNEL_INC_OPTIONS="-nostdinc -isystem /usr/lib/gcc/x86_64-redhat-linux/4.8.3/include -I/home/acme/git/linux/arch/x86/include -Iarch/x86/include/generated/uapi -Iarch/x86/include/generated -I/home/acme/git/linux/include -Iinclude -I/home/acme/git/linux/arch/x86/include/uapi -Iarch/x86/include/generated/uapi -I/home/acme/git/linux/include/uapi -Iinclude/generated/uapi -include /home/acme/git/linux/include/linux/kconfig.h" export WORKING_DIR=/lib/modules/4.2.0/build export CLANG_SOURCE=- export CLANG_OPTIONS=-xc OBJ=/tmp/foo.o rm -f $OBJ echo '__attribute__((section("fork=do_fork"), used)) int fork(void *ctx) {return 0;} char _license[] __attribute__((section("license"), used)) = "GPL";int _version __attribute__((section("version"), used)) = 0x40100;' | \ clang -D__KERNEL__ $CLANG_OPTIONS $KERNEL_INC_OPTIONS -Wno-unused-value -Wno-pointer-sign -working-directory $WORKING_DIR -c "$CLANG_SOURCE" -target bpf -O2 -o /tmp/foo.o && file $OBJ --- First asking to put a probe in a function not present in the kernel (misses the initial _): $ perf record --event /tmp/foo.o sleep 1 Probe point 'do_fork' not found. event syntax error: '/tmp/foo.o' \___ You need to check probing points in BPF file (add -v to see detail) Run 'perf list' for a list of valid events Usage: perf record [<options>] [<command>] or: perf record [<options>] -- <command> [<options>] -e, --event <event> event selector. use 'perf list' to list available events $ --- Now, with "__attribute__((section("fork=_do_fork"), used)): $ grep _do_fork /proc/kallsyms ffffffff81099ab0 T _do_fork $ perf record --event /tmp/foo.o sleep 1 Failed to open kprobe_events: Permission denied event syntax error: '/tmp/foo.o' \___ Permission denied --- Cool, we need to provide some better hints, "kprobe_events" is too low level, one doesn't strictly need to know the precise details of how these things are put in place, so something that shows the command needed to fix the permissions would be more helpful. Lets try as root instead: # perf record --event /tmp/foo.o sleep 1 Lowering default frequency rate to 1000. Please consider tweaking /proc/sys/kernel/perf_event_max_sample_rate. [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.013 MB perf.data ] # perf evlist /tmp/foo.o [root@felicio ~]# perf evlist -v /tmp/foo.o: type: 1, size: 112, config: 0x9, { sample_period, sample_freq }: 1000, sample_type: IP|TID|TIME|PERIOD, disabled: 1, inherit: 1, mmap: 1, comm: 1, freq: 1, enable_on_exec: 1, task: 1, sample_id_all: 1, exclude_guest: 1, mmap2: 1, comm_exec: 1 --- Signed-off-by: Wang Nan <wangnan0@huawei.com> Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Alexei Starovoitov <ast@plumgrid.com> Cc: Brendan Gregg <brendan.d.gregg@gmail.com> Cc: Daniel Borkmann <daniel@iogearbox.net> Cc: David Ahern <dsahern@gmail.com> Cc: He Kuang <hekuang@huawei.com> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Kaixu Xia <xiakaixu@huawei.com> Cc: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Zefan Li <lizefan@huawei.com> Cc: pi3orama@163.com Link: http://lkml.kernel.org/r/1444826502-49291-5-git-send-email-wangnan0@huawei.com Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2015-10-14 19:41:15 +07:00
}
out:
return err < 0 ? err : 0;
}
#define EVENTS_WRITE_BUFSIZE 4096
int bpf__unprobe(struct bpf_object *obj)
{
int err, ret = 0;
struct bpf_program *prog;
struct bpf_prog_priv *priv;
bpf_object__for_each_program(prog, obj) {
int i;
err = bpf_program__get_private(prog, (void **)&priv);
if (err || !priv)
continue;
for (i = 0; i < priv->pev.ntevs; i++) {
struct probe_trace_event *tev = &priv->pev.tevs[i];
char name_buf[EVENTS_WRITE_BUFSIZE];
struct strfilter *delfilter;
snprintf(name_buf, EVENTS_WRITE_BUFSIZE,
"%s:%s", tev->group, tev->event);
name_buf[EVENTS_WRITE_BUFSIZE - 1] = '\0';
delfilter = strfilter__new(name_buf, NULL);
if (!delfilter) {
pr_debug("Failed to create filter for unprobing\n");
ret = -ENOMEM;
continue;
}
err = del_perf_probe_events(delfilter);
strfilter__delete(delfilter);
if (err) {
pr_debug("Failed to delete %s\n", name_buf);
ret = err;
continue;
}
}
}
return ret;
}
perf tools: Load eBPF object into kernel This patch utilizes bpf_object__load() provided by libbpf to load all objects into kernel. Committer notes: Testing it: When using an incorrect kernel version number, i.e., having this in your eBPF proggie: int _version __attribute__((section("version"), used)) = 0x40100; For a 4.3.0-rc6+ kernel, say, this happens and needs checking at event parsing time, to provide a better error report to the user: # perf record --event /tmp/foo.o sleep 1 libbpf: load bpf program failed: Invalid argument libbpf: -- BEGIN DUMP LOG --- libbpf: libbpf: -- END LOG -- libbpf: failed to load program 'fork=_do_fork' libbpf: failed to load object '/tmp/foo.o' event syntax error: '/tmp/foo.o' \___ Invalid argument: Are you root and runing a CONFIG_BPF_SYSCALL kernel? (add -v to see detail) Run 'perf list' for a list of valid events Usage: perf record [<options>] [<command>] or: perf record [<options>] -- <command> [<options>] -e, --event <event> event selector. use 'perf list' to list available events If we instead make it match, i.e. use 0x40300 on this v4.3.0-rc6+ kernel, the whole process goes thru: # perf record --event /tmp/foo.o -a usleep 1 [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.202 MB perf.data ] # perf evlist -v /tmp/foo.o: type: 1, size: 112, config: 0x9, { sample_period, sample_freq }: 4000, sample_type: IP|TID|TIME|CPU|PERIOD, disabled: 1, inherit: 1, mmap: 1, comm: 1, freq: 1, task: 1, sample_id_all: 1, exclude_guest: 1, mmap2: 1, comm_exec: 1 # Signed-off-by: Wang Nan <wangnan0@huawei.com> Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Alexei Starovoitov <ast@plumgrid.com> Cc: Brendan Gregg <brendan.d.gregg@gmail.com> Cc: Daniel Borkmann <daniel@iogearbox.net> Cc: David Ahern <dsahern@gmail.com> Cc: He Kuang <hekuang@huawei.com> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Kaixu Xia <xiakaixu@huawei.com> Cc: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Paul Mackerras <paulus@samba.org> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Zefan Li <lizefan@huawei.com> Cc: pi3orama@163.com Link: http://lkml.kernel.org/r/1444826502-49291-6-git-send-email-wangnan0@huawei.com Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2015-10-14 19:41:16 +07:00
int bpf__load(struct bpf_object *obj)
{
int err;
err = bpf_object__load(obj);
if (err) {
pr_debug("bpf: load objects failed\n");
return err;
}
return 0;
}
perf bpf: Collect perf_evsel in BPF object files This patch creates a 'struct perf_evsel' for every probe in a BPF object file(s) and fills 'struct evlist' with them. The previously introduced dummy event is now removed. After this patch, the following command: # perf record --event filter.o ls Can trace on each of the probes defined in filter.o. The core of this patch is bpf__foreach_tev(), which calls a callback function for each 'struct probe_trace_event' event for a bpf program with each associated file descriptors. The add_bpf_event() callback creates evsels by calling parse_events_add_tracepoint(). Since bpf-loader.c will not be built if libbpf is turned off, an empty bpf__foreach_tev() is defined in bpf-loader.h to avoid build errors. Committer notes: Before: # /tmp/oldperf record --event /tmp/foo.o -a usleep 1 [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.198 MB perf.data ] # perf evlist /tmp/foo.o # perf evlist -v /tmp/foo.o: type: 1, size: 112, config: 0x9, { sample_period, sample_freq }: 4000, sample_type: IP|TID|TIME|CPU|PERIOD, disabled: 1, inherit: 1, mmap: 1, comm: 1, freq: 1, task: 1, sample_id_all: 1, exclude_guest: 1, mmap2: 1, comm_exec: 1 I.e. we create just the PERF_TYPE_SOFTWARE (type: 1), PERF_COUNT_SW_DUMMY(config 0x9) event, now, with this patch: # perf record --event /tmp/foo.o -a usleep 1 [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.210 MB perf.data ] # perf evlist -v perf_bpf_probe:fork: type: 2, size: 112, config: 0x6bd, { sample_period, sample_freq }: 1, sample_type: IP|TID|TIME|CPU|PERIOD|RAW, disabled: 1, inherit: 1, mmap: 1, comm: 1, task: 1, sample_id_all: 1, exclude_guest: 1, mmap2: 1, comm_exec: 1 # We now have a PERF_TYPE_SOFTWARE (type: 1), but the config states 0x6bd, which is how, after setting up the event via the kprobes interface, the 'perf_bpf_probe:fork' event is accessible via the perf_event_open syscall. This is all transient, as soon as the 'perf record' session ends, these probes will go away. To see how it looks like, lets try doing a neverending session, one that expects a control+C to end: # perf record --event /tmp/foo.o -a So, with that in place, we can use 'perf probe' to see what is in place: # perf probe -l perf_bpf_probe:fork (on _do_fork@acme/git/linux/kernel/fork.c) We also can use debugfs: [root@felicio ~]# cat /sys/kernel/debug/tracing/kprobe_events p:perf_bpf_probe/fork _text+638512 Ok, now lets stop and see if we got some forks: [root@felicio linux]# perf record --event /tmp/foo.o -a ^C[ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.325 MB perf.data (111 samples) ] [root@felicio linux]# perf script sshd 1271 [003] 81797.507678: perf_bpf_probe:fork: (ffffffff8109be30) sshd 18309 [000] 81797.524917: perf_bpf_probe:fork: (ffffffff8109be30) sshd 18309 [001] 81799.381603: perf_bpf_probe:fork: (ffffffff8109be30) sshd 18309 [001] 81799.408635: perf_bpf_probe:fork: (ffffffff8109be30) <SNIP> Sure enough, we have 111 forks :-) Callchains seems to work as well: # perf report --stdio --no-child # To display the perf.data header info, please use --header/--header-only options. # # Total Lost Samples: 0 # # Samples: 562 of event 'perf_bpf_probe:fork' # Event count (approx.): 562 # # Overhead Command Shared Object Symbol # ........ ........ ................ ............ # 44.66% sh [kernel.vmlinux] [k] _do_fork | ---_do_fork entry_SYSCALL_64_fastpath __libc_fork make_child 26.16% make [kernel.vmlinux] [k] _do_fork <SNIP> # Signed-off-by: Wang Nan <wangnan0@huawei.com> Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Alexei Starovoitov <ast@plumgrid.com> Cc: Brendan Gregg <brendan.d.gregg@gmail.com> Cc: Daniel Borkmann <daniel@iogearbox.net> Cc: David Ahern <dsahern@gmail.com> Cc: He Kuang <hekuang@huawei.com> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Kaixu Xia <xiakaixu@huawei.com> Cc: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Zefan Li <lizefan@huawei.com> Cc: pi3orama@163.com Link: http://lkml.kernel.org/r/1444826502-49291-7-git-send-email-wangnan0@huawei.com Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2015-10-14 19:41:17 +07:00
int bpf__foreach_tev(struct bpf_object *obj,
bpf_prog_iter_callback_t func,
void *arg)
{
struct bpf_program *prog;
int err;
bpf_object__for_each_program(prog, obj) {
struct probe_trace_event *tev;
struct perf_probe_event *pev;
struct bpf_prog_priv *priv;
int i, fd;
err = bpf_program__get_private(prog,
(void **)&priv);
if (err || !priv) {
pr_debug("bpf: failed to get private field\n");
perf bpf: Improve BPF related error messages A series of bpf loader related error codes were introduced to help error reporting. Functions were improved to return these new error codes. Functions which return pointers were adjusted to encode error codes into return value using the ERR_PTR() interface. bpf_loader_strerror() was improved to convert these error messages to strings. It checks the error codes and calls libbpf_strerror() and strerror_r() accordingly, so caller don't need to consider checking the range of the error code. In bpf__strerror_load(), print kernel version of running kernel and the object's 'version' section to notify user how to fix his/her program. v1 -> v2: Use macro for error code. Fetch error message based on array index, eliminate for-loop. Print version strings. Before: # perf record -e ./test_kversion_nomatch_program.o sleep 1 event syntax error: './test_kversion_nomatch_program.o' \___ Failed to load program: Validate your program and check 'license'/'version' sections in your object SKIP After: # perf record -e ./test_kversion_nomatch_program.o ls event syntax error: './test_kversion_nomatch_program.o' \___ 'version' (4.4.0) doesn't match running kernel (4.3.0) SKIP Signed-off-by: Wang Nan <wangnan0@huawei.com> Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Zefan Li <lizefan@huawei.com> Cc: pi3orama@163.com Link: http://lkml.kernel.org/r/1446818289-87444-1-git-send-email-wangnan0@huawei.com [ Add 'static inline' to bpf__strerror_prepare_load() when LIBBPF is disabled ] Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2015-11-06 20:58:09 +07:00
return -BPF_LOADER_ERRNO__INTERNAL;
perf bpf: Collect perf_evsel in BPF object files This patch creates a 'struct perf_evsel' for every probe in a BPF object file(s) and fills 'struct evlist' with them. The previously introduced dummy event is now removed. After this patch, the following command: # perf record --event filter.o ls Can trace on each of the probes defined in filter.o. The core of this patch is bpf__foreach_tev(), which calls a callback function for each 'struct probe_trace_event' event for a bpf program with each associated file descriptors. The add_bpf_event() callback creates evsels by calling parse_events_add_tracepoint(). Since bpf-loader.c will not be built if libbpf is turned off, an empty bpf__foreach_tev() is defined in bpf-loader.h to avoid build errors. Committer notes: Before: # /tmp/oldperf record --event /tmp/foo.o -a usleep 1 [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.198 MB perf.data ] # perf evlist /tmp/foo.o # perf evlist -v /tmp/foo.o: type: 1, size: 112, config: 0x9, { sample_period, sample_freq }: 4000, sample_type: IP|TID|TIME|CPU|PERIOD, disabled: 1, inherit: 1, mmap: 1, comm: 1, freq: 1, task: 1, sample_id_all: 1, exclude_guest: 1, mmap2: 1, comm_exec: 1 I.e. we create just the PERF_TYPE_SOFTWARE (type: 1), PERF_COUNT_SW_DUMMY(config 0x9) event, now, with this patch: # perf record --event /tmp/foo.o -a usleep 1 [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.210 MB perf.data ] # perf evlist -v perf_bpf_probe:fork: type: 2, size: 112, config: 0x6bd, { sample_period, sample_freq }: 1, sample_type: IP|TID|TIME|CPU|PERIOD|RAW, disabled: 1, inherit: 1, mmap: 1, comm: 1, task: 1, sample_id_all: 1, exclude_guest: 1, mmap2: 1, comm_exec: 1 # We now have a PERF_TYPE_SOFTWARE (type: 1), but the config states 0x6bd, which is how, after setting up the event via the kprobes interface, the 'perf_bpf_probe:fork' event is accessible via the perf_event_open syscall. This is all transient, as soon as the 'perf record' session ends, these probes will go away. To see how it looks like, lets try doing a neverending session, one that expects a control+C to end: # perf record --event /tmp/foo.o -a So, with that in place, we can use 'perf probe' to see what is in place: # perf probe -l perf_bpf_probe:fork (on _do_fork@acme/git/linux/kernel/fork.c) We also can use debugfs: [root@felicio ~]# cat /sys/kernel/debug/tracing/kprobe_events p:perf_bpf_probe/fork _text+638512 Ok, now lets stop and see if we got some forks: [root@felicio linux]# perf record --event /tmp/foo.o -a ^C[ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.325 MB perf.data (111 samples) ] [root@felicio linux]# perf script sshd 1271 [003] 81797.507678: perf_bpf_probe:fork: (ffffffff8109be30) sshd 18309 [000] 81797.524917: perf_bpf_probe:fork: (ffffffff8109be30) sshd 18309 [001] 81799.381603: perf_bpf_probe:fork: (ffffffff8109be30) sshd 18309 [001] 81799.408635: perf_bpf_probe:fork: (ffffffff8109be30) <SNIP> Sure enough, we have 111 forks :-) Callchains seems to work as well: # perf report --stdio --no-child # To display the perf.data header info, please use --header/--header-only options. # # Total Lost Samples: 0 # # Samples: 562 of event 'perf_bpf_probe:fork' # Event count (approx.): 562 # # Overhead Command Shared Object Symbol # ........ ........ ................ ............ # 44.66% sh [kernel.vmlinux] [k] _do_fork | ---_do_fork entry_SYSCALL_64_fastpath __libc_fork make_child 26.16% make [kernel.vmlinux] [k] _do_fork <SNIP> # Signed-off-by: Wang Nan <wangnan0@huawei.com> Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Alexei Starovoitov <ast@plumgrid.com> Cc: Brendan Gregg <brendan.d.gregg@gmail.com> Cc: Daniel Borkmann <daniel@iogearbox.net> Cc: David Ahern <dsahern@gmail.com> Cc: He Kuang <hekuang@huawei.com> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Kaixu Xia <xiakaixu@huawei.com> Cc: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Zefan Li <lizefan@huawei.com> Cc: pi3orama@163.com Link: http://lkml.kernel.org/r/1444826502-49291-7-git-send-email-wangnan0@huawei.com Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2015-10-14 19:41:17 +07:00
}
pev = &priv->pev;
for (i = 0; i < pev->ntevs; i++) {
tev = &pev->tevs[i];
perf bpf: Use same BPF program if arguments are identical This patch allows creating only one BPF program for different 'probe_trace_event'(tev) entries generated by one 'perf_probe_event'(pev) if their prologues are identical. This is done by comparing the argument list of different tev instances, and the maps type of prologue and tev using a mapping array. This patch utilizes qsort to sort the tevs. After sorting, tevs with identical argument lists will be grouped together. Test result: Sample BPF program: #define SEC(NAME) __attribute__((section(NAME), used)) SEC("inlines=no;" "func=SyS_dup? oldfd") int func(void *ctx) { return 1; } It would probe at SyS_dup2 and SyS_dup3, obtaining oldfd as its argument. The following cmdline shows a BPF program being loaded into the kernel by perf: # perf record -e ./test_bpf_arg.c sleep 4 & sleep 1 && ls /proc/$!/fd/ -l | grep bpf-prog Before this patch: # perf record -e ./test_bpf_arg.c sleep 4 & sleep 1 && ls /proc/$!/fd/ -l | grep bpf-prog [1] 24858 lrwx------ 1 root root 64 Nov 14 04:09 3 -> anon_inode:bpf-prog lrwx------ 1 root root 64 Nov 14 04:09 4 -> anon_inode:bpf-prog ... After this patch: # perf record -e ./test_bpf_arg.c sleep 4 & sleep 1 && ls /proc/$!/fd/ -l | grep bpf-prog [1] 25699 lrwx------ 1 root root 64 Nov 14 04:10 3 -> anon_inode:bpf-prog ... Signed-off-by: Wang Nan <wangnan0@huawei.com> Cc: Alexei Starovoitov <ast@kernel.org> Cc: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com> Cc: Zefan Li <lizefan@huawei.com> Cc: pi3orama@163.com Link: http://lkml.kernel.org/r/1447749170-175898-3-git-send-email-wangnan0@huawei.com Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2015-11-17 15:32:47 +07:00
if (priv->need_prologue) {
int type = priv->type_mapping[i];
fd = bpf_program__nth_fd(prog, type);
} else {
fd = bpf_program__fd(prog);
perf bpf: Use same BPF program if arguments are identical This patch allows creating only one BPF program for different 'probe_trace_event'(tev) entries generated by one 'perf_probe_event'(pev) if their prologues are identical. This is done by comparing the argument list of different tev instances, and the maps type of prologue and tev using a mapping array. This patch utilizes qsort to sort the tevs. After sorting, tevs with identical argument lists will be grouped together. Test result: Sample BPF program: #define SEC(NAME) __attribute__((section(NAME), used)) SEC("inlines=no;" "func=SyS_dup? oldfd") int func(void *ctx) { return 1; } It would probe at SyS_dup2 and SyS_dup3, obtaining oldfd as its argument. The following cmdline shows a BPF program being loaded into the kernel by perf: # perf record -e ./test_bpf_arg.c sleep 4 & sleep 1 && ls /proc/$!/fd/ -l | grep bpf-prog Before this patch: # perf record -e ./test_bpf_arg.c sleep 4 & sleep 1 && ls /proc/$!/fd/ -l | grep bpf-prog [1] 24858 lrwx------ 1 root root 64 Nov 14 04:09 3 -> anon_inode:bpf-prog lrwx------ 1 root root 64 Nov 14 04:09 4 -> anon_inode:bpf-prog ... After this patch: # perf record -e ./test_bpf_arg.c sleep 4 & sleep 1 && ls /proc/$!/fd/ -l | grep bpf-prog [1] 25699 lrwx------ 1 root root 64 Nov 14 04:10 3 -> anon_inode:bpf-prog ... Signed-off-by: Wang Nan <wangnan0@huawei.com> Cc: Alexei Starovoitov <ast@kernel.org> Cc: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com> Cc: Zefan Li <lizefan@huawei.com> Cc: pi3orama@163.com Link: http://lkml.kernel.org/r/1447749170-175898-3-git-send-email-wangnan0@huawei.com Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2015-11-17 15:32:47 +07:00
}
perf bpf: Collect perf_evsel in BPF object files This patch creates a 'struct perf_evsel' for every probe in a BPF object file(s) and fills 'struct evlist' with them. The previously introduced dummy event is now removed. After this patch, the following command: # perf record --event filter.o ls Can trace on each of the probes defined in filter.o. The core of this patch is bpf__foreach_tev(), which calls a callback function for each 'struct probe_trace_event' event for a bpf program with each associated file descriptors. The add_bpf_event() callback creates evsels by calling parse_events_add_tracepoint(). Since bpf-loader.c will not be built if libbpf is turned off, an empty bpf__foreach_tev() is defined in bpf-loader.h to avoid build errors. Committer notes: Before: # /tmp/oldperf record --event /tmp/foo.o -a usleep 1 [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.198 MB perf.data ] # perf evlist /tmp/foo.o # perf evlist -v /tmp/foo.o: type: 1, size: 112, config: 0x9, { sample_period, sample_freq }: 4000, sample_type: IP|TID|TIME|CPU|PERIOD, disabled: 1, inherit: 1, mmap: 1, comm: 1, freq: 1, task: 1, sample_id_all: 1, exclude_guest: 1, mmap2: 1, comm_exec: 1 I.e. we create just the PERF_TYPE_SOFTWARE (type: 1), PERF_COUNT_SW_DUMMY(config 0x9) event, now, with this patch: # perf record --event /tmp/foo.o -a usleep 1 [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.210 MB perf.data ] # perf evlist -v perf_bpf_probe:fork: type: 2, size: 112, config: 0x6bd, { sample_period, sample_freq }: 1, sample_type: IP|TID|TIME|CPU|PERIOD|RAW, disabled: 1, inherit: 1, mmap: 1, comm: 1, task: 1, sample_id_all: 1, exclude_guest: 1, mmap2: 1, comm_exec: 1 # We now have a PERF_TYPE_SOFTWARE (type: 1), but the config states 0x6bd, which is how, after setting up the event via the kprobes interface, the 'perf_bpf_probe:fork' event is accessible via the perf_event_open syscall. This is all transient, as soon as the 'perf record' session ends, these probes will go away. To see how it looks like, lets try doing a neverending session, one that expects a control+C to end: # perf record --event /tmp/foo.o -a So, with that in place, we can use 'perf probe' to see what is in place: # perf probe -l perf_bpf_probe:fork (on _do_fork@acme/git/linux/kernel/fork.c) We also can use debugfs: [root@felicio ~]# cat /sys/kernel/debug/tracing/kprobe_events p:perf_bpf_probe/fork _text+638512 Ok, now lets stop and see if we got some forks: [root@felicio linux]# perf record --event /tmp/foo.o -a ^C[ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.325 MB perf.data (111 samples) ] [root@felicio linux]# perf script sshd 1271 [003] 81797.507678: perf_bpf_probe:fork: (ffffffff8109be30) sshd 18309 [000] 81797.524917: perf_bpf_probe:fork: (ffffffff8109be30) sshd 18309 [001] 81799.381603: perf_bpf_probe:fork: (ffffffff8109be30) sshd 18309 [001] 81799.408635: perf_bpf_probe:fork: (ffffffff8109be30) <SNIP> Sure enough, we have 111 forks :-) Callchains seems to work as well: # perf report --stdio --no-child # To display the perf.data header info, please use --header/--header-only options. # # Total Lost Samples: 0 # # Samples: 562 of event 'perf_bpf_probe:fork' # Event count (approx.): 562 # # Overhead Command Shared Object Symbol # ........ ........ ................ ............ # 44.66% sh [kernel.vmlinux] [k] _do_fork | ---_do_fork entry_SYSCALL_64_fastpath __libc_fork make_child 26.16% make [kernel.vmlinux] [k] _do_fork <SNIP> # Signed-off-by: Wang Nan <wangnan0@huawei.com> Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Alexei Starovoitov <ast@plumgrid.com> Cc: Brendan Gregg <brendan.d.gregg@gmail.com> Cc: Daniel Borkmann <daniel@iogearbox.net> Cc: David Ahern <dsahern@gmail.com> Cc: He Kuang <hekuang@huawei.com> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Kaixu Xia <xiakaixu@huawei.com> Cc: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Zefan Li <lizefan@huawei.com> Cc: pi3orama@163.com Link: http://lkml.kernel.org/r/1444826502-49291-7-git-send-email-wangnan0@huawei.com Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2015-10-14 19:41:17 +07:00
if (fd < 0) {
pr_debug("bpf: failed to get file descriptor\n");
return fd;
}
err = (*func)(tev, fd, arg);
if (err) {
pr_debug("bpf: call back failed, stop iterate\n");
return err;
}
}
}
return 0;
}
perf bpf: Add API to set values to map entries in a bpf object bpf__config_obj() is introduced as a core API to config BPF object after loading. One configuration option of maps is introduced. After this patch BPF object can accept assignments like: map:my_map.value=1234 (map.my_map.value looks pretty. However, there's a small but hard to fix problem related to flex's greedy matching. Please see [1]. Choose ':' to avoid it in a simpler way.) This patch is more complex than the work it does because the consideration of extension. In designing BPF map configuration, the following things should be considered: 1. Array indices selection: perf should allow user setting different value for different slots in an array, with syntax like: map:my_map.value[0,3...6]=1234; 2. A map should be set by different config terms, each for a part of it. For example, set each slot to the pid of a thread; 3. Type of value: integer is not the only valid value type. A perf counter can also be put into a map after commit 35578d798400 ("bpf: Implement function bpf_perf_event_read() that get the selected hardware PMU counter") 4. For a hash table, it should be possible to use a string or other value as a key; 5. It is possible that map configuration is unable to be setup during parsing. A perf counter is an example. Therefore, this patch does the following: 1. Instead of updating map element during parsing, this patch stores map config options in 'struct bpf_map_priv'. Following patches will apply those configs at an appropriate time; 2. Link map operations in a list so a map can have multiple config terms attached, so different parts can be configured separately; 3. Make 'struct bpf_map_priv' extensible so that the following patches can add new types of keys and operations; 4. Use bpf_obj_config__map_funcs array to support more map config options. Since the patch changing the event parser to parse BPF object config is relative large, I've put it in another commit. Code in this patch can be tested after applying the next patch. [1] http://lkml.kernel.org/g/564ED621.4050500@huawei.com Signed-off-by: Wang Nan <wangnan0@huawei.com> Cc: Adrian Hunter <adrian.hunter@intel.com> Cc: Alexei Starovoitov <ast@kernel.org> Cc: Brendan Gregg <brendan.d.gregg@gmail.com> Cc: Cody P Schafer <dev@codyps.com> Cc: He Kuang <hekuang@huawei.com> Cc: Jeremie Galarneau <jeremie.galarneau@efficios.com> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Kirill Smelkov <kirr@nexedi.com> Cc: Li Zefan <lizefan@huawei.com> Cc: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Zefan Li <lizefan@huawei.com> Cc: pi3orama@163.com Link: http://lkml.kernel.org/r/1456132275-98875-4-git-send-email-wangnan0@huawei.com Signed-off-by: He Kuang <hekuang@huawei.com> [ Changes "maps:my_map.value" to "map:my_map.value", improved error messages ] Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2016-02-22 16:10:30 +07:00
enum bpf_map_op_type {
BPF_MAP_OP_SET_VALUE,
perf tools: Enable passing event to BPF object A new syntax is added to the parser so that the user can access predefined perf events in BPF objects. After this patch, BPF programs for perf are finally able to utilize bpf_perf_event_read() introduced in commit 35578d798400 ("bpf: Implement function bpf_perf_event_read() that get the selected hardware PMU counter"). Test result: # cat test_bpf_map_2.c /************************ BEGIN **************************/ #include <uapi/linux/bpf.h> #define SEC(NAME) __attribute__((section(NAME), used)) struct bpf_map_def { unsigned int type; unsigned int key_size; unsigned int value_size; unsigned int max_entries; }; static int (*trace_printk)(const char *fmt, int fmt_size, ...) = (void *)BPF_FUNC_trace_printk; static int (*get_smp_processor_id)(void) = (void *)BPF_FUNC_get_smp_processor_id; static int (*perf_event_read)(struct bpf_map_def *, int) = (void *)BPF_FUNC_perf_event_read; struct bpf_map_def SEC("maps") pmu_map = { .type = BPF_MAP_TYPE_PERF_EVENT_ARRAY, .key_size = sizeof(int), .value_size = sizeof(int), .max_entries = __NR_CPUS__, }; SEC("func_write=sys_write") int func_write(void *ctx) { unsigned long long val; char fmt[] = "sys_write: pmu=%llu\n"; val = perf_event_read(&pmu_map, get_smp_processor_id()); trace_printk(fmt, sizeof(fmt), val); return 0; } SEC("func_write_return=sys_write%return") int func_write_return(void *ctx) { unsigned long long val = 0; char fmt[] = "sys_write_return: pmu=%llu\n"; val = perf_event_read(&pmu_map, get_smp_processor_id()); trace_printk(fmt, sizeof(fmt), val); return 0; } char _license[] SEC("license") = "GPL"; int _version SEC("version") = LINUX_VERSION_CODE; /************************* END ***************************/ Normal case: # echo "" > /sys/kernel/debug/tracing/trace # perf record -i -e cycles -e './test_bpf_map_2.c/map:pmu_map.event=cycles/' ls / [SNIP] [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.013 MB perf.data (7 samples) ] # cat /sys/kernel/debug/tracing/trace | grep ls ls-17066 [000] d... 938449.863301: : sys_write: pmu=1157327 ls-17066 [000] dN.. 938449.863342: : sys_write_return: pmu=1225218 ls-17066 [000] d... 938449.863349: : sys_write: pmu=1241922 ls-17066 [000] dN.. 938449.863369: : sys_write_return: pmu=1267445 Normal case (system wide): # echo "" > /sys/kernel/debug/tracing/trace # perf record -i -e cycles -e './test_bpf_map_2.c/map:pmu_map.event=cycles/' -a ^C[ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.811 MB perf.data (120 samples) ] # cat /sys/kernel/debug/tracing/trace | grep -v '18446744073709551594' | grep -v perf | head -n 20 [SNIP] # TASK-PID CPU# |||| TIMESTAMP FUNCTION # | | | |||| | | gmain-30828 [002] d... 2740551.068992: : sys_write: pmu=84373 gmain-30828 [002] d... 2740551.068992: : sys_write_return: pmu=87696 gmain-30828 [002] d... 2740551.068996: : sys_write: pmu=100658 gmain-30828 [002] d... 2740551.068997: : sys_write_return: pmu=102572 Error case 1: # perf record -e './test_bpf_map_2.c' ls / [SNIP] [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.014 MB perf.data ] # cat /sys/kernel/debug/tracing/trace | grep ls ls-17115 [007] d... 2724279.665625: : sys_write: pmu=18446744073709551614 ls-17115 [007] dN.. 2724279.665651: : sys_write_return: pmu=18446744073709551614 ls-17115 [007] d... 2724279.665658: : sys_write: pmu=18446744073709551614 ls-17115 [007] dN.. 2724279.665677: : sys_write_return: pmu=18446744073709551614 (18446744073709551614 is 0xfffffffffffffffe (-2)) Error case 2: # perf record -e cycles -e './test_bpf_map_2.c/map:pmu_map.event=evt/' -a event syntax error: '..ps:pmu_map.event=evt/' \___ Event not found for map setting Hint: Valid config terms: map:[<arraymap>].value=[value] map:[<eventmap>].event=[event] [SNIP] Error case 3: # ls /proc/2348/task/ 2348 2505 2506 2507 2508 # perf record -i -e cycles -e './test_bpf_map_2.c/map:pmu_map.event=cycles/' -p 2348 ERROR: Apply config to BPF failed: Cannot set event to BPF map in multi-thread tracing Error case 4: # perf record -e cycles -e './test_bpf_map_2.c/map:pmu_map.event=cycles/' ls / ERROR: Apply config to BPF failed: Doesn't support inherit event (Hint: use -i to turn off inherit) Error case 5: # perf record -i -e raw_syscalls:sys_enter -e './test_bpf_map_2.c/map:pmu_map.event=raw_syscalls:sys_enter/' ls ERROR: Apply config to BPF failed: Can only put raw, hardware and BPF output event into a BPF map Error case 6: # perf record -i -e './test_bpf_map_2.c/map:pmu_map.event=123/' ls / event syntax error: '.._map.event=123/' \___ Incorrect value type for map [SNIP] Signed-off-by: Wang Nan <wangnan0@huawei.com> Acked-by: Jiri Olsa <jolsa@kernel.org> Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Adrian Hunter <adrian.hunter@intel.com> Cc: Alexei Starovoitov <ast@kernel.org> Cc: Brendan Gregg <brendan.d.gregg@gmail.com> Cc: Cody P Schafer <dev@codyps.com> Cc: He Kuang <hekuang@huawei.com> Cc: Jeremie Galarneau <jeremie.galarneau@efficios.com> Cc: Kirill Smelkov <kirr@nexedi.com> Cc: Li Zefan <lizefan@huawei.com> Cc: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Zefan Li <lizefan@huawei.com> Cc: pi3orama@163.com Link: http://lkml.kernel.org/r/1456132275-98875-7-git-send-email-wangnan0@huawei.com Signed-off-by: He Kuang <hekuang@huawei.com> Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2016-02-22 16:10:33 +07:00
BPF_MAP_OP_SET_EVSEL,
perf bpf: Add API to set values to map entries in a bpf object bpf__config_obj() is introduced as a core API to config BPF object after loading. One configuration option of maps is introduced. After this patch BPF object can accept assignments like: map:my_map.value=1234 (map.my_map.value looks pretty. However, there's a small but hard to fix problem related to flex's greedy matching. Please see [1]. Choose ':' to avoid it in a simpler way.) This patch is more complex than the work it does because the consideration of extension. In designing BPF map configuration, the following things should be considered: 1. Array indices selection: perf should allow user setting different value for different slots in an array, with syntax like: map:my_map.value[0,3...6]=1234; 2. A map should be set by different config terms, each for a part of it. For example, set each slot to the pid of a thread; 3. Type of value: integer is not the only valid value type. A perf counter can also be put into a map after commit 35578d798400 ("bpf: Implement function bpf_perf_event_read() that get the selected hardware PMU counter") 4. For a hash table, it should be possible to use a string or other value as a key; 5. It is possible that map configuration is unable to be setup during parsing. A perf counter is an example. Therefore, this patch does the following: 1. Instead of updating map element during parsing, this patch stores map config options in 'struct bpf_map_priv'. Following patches will apply those configs at an appropriate time; 2. Link map operations in a list so a map can have multiple config terms attached, so different parts can be configured separately; 3. Make 'struct bpf_map_priv' extensible so that the following patches can add new types of keys and operations; 4. Use bpf_obj_config__map_funcs array to support more map config options. Since the patch changing the event parser to parse BPF object config is relative large, I've put it in another commit. Code in this patch can be tested after applying the next patch. [1] http://lkml.kernel.org/g/564ED621.4050500@huawei.com Signed-off-by: Wang Nan <wangnan0@huawei.com> Cc: Adrian Hunter <adrian.hunter@intel.com> Cc: Alexei Starovoitov <ast@kernel.org> Cc: Brendan Gregg <brendan.d.gregg@gmail.com> Cc: Cody P Schafer <dev@codyps.com> Cc: He Kuang <hekuang@huawei.com> Cc: Jeremie Galarneau <jeremie.galarneau@efficios.com> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Kirill Smelkov <kirr@nexedi.com> Cc: Li Zefan <lizefan@huawei.com> Cc: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Zefan Li <lizefan@huawei.com> Cc: pi3orama@163.com Link: http://lkml.kernel.org/r/1456132275-98875-4-git-send-email-wangnan0@huawei.com Signed-off-by: He Kuang <hekuang@huawei.com> [ Changes "maps:my_map.value" to "map:my_map.value", improved error messages ] Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2016-02-22 16:10:30 +07:00
};
enum bpf_map_key_type {
BPF_MAP_KEY_ALL,
2016-02-22 16:10:34 +07:00
BPF_MAP_KEY_RANGES,
perf bpf: Add API to set values to map entries in a bpf object bpf__config_obj() is introduced as a core API to config BPF object after loading. One configuration option of maps is introduced. After this patch BPF object can accept assignments like: map:my_map.value=1234 (map.my_map.value looks pretty. However, there's a small but hard to fix problem related to flex's greedy matching. Please see [1]. Choose ':' to avoid it in a simpler way.) This patch is more complex than the work it does because the consideration of extension. In designing BPF map configuration, the following things should be considered: 1. Array indices selection: perf should allow user setting different value for different slots in an array, with syntax like: map:my_map.value[0,3...6]=1234; 2. A map should be set by different config terms, each for a part of it. For example, set each slot to the pid of a thread; 3. Type of value: integer is not the only valid value type. A perf counter can also be put into a map after commit 35578d798400 ("bpf: Implement function bpf_perf_event_read() that get the selected hardware PMU counter") 4. For a hash table, it should be possible to use a string or other value as a key; 5. It is possible that map configuration is unable to be setup during parsing. A perf counter is an example. Therefore, this patch does the following: 1. Instead of updating map element during parsing, this patch stores map config options in 'struct bpf_map_priv'. Following patches will apply those configs at an appropriate time; 2. Link map operations in a list so a map can have multiple config terms attached, so different parts can be configured separately; 3. Make 'struct bpf_map_priv' extensible so that the following patches can add new types of keys and operations; 4. Use bpf_obj_config__map_funcs array to support more map config options. Since the patch changing the event parser to parse BPF object config is relative large, I've put it in another commit. Code in this patch can be tested after applying the next patch. [1] http://lkml.kernel.org/g/564ED621.4050500@huawei.com Signed-off-by: Wang Nan <wangnan0@huawei.com> Cc: Adrian Hunter <adrian.hunter@intel.com> Cc: Alexei Starovoitov <ast@kernel.org> Cc: Brendan Gregg <brendan.d.gregg@gmail.com> Cc: Cody P Schafer <dev@codyps.com> Cc: He Kuang <hekuang@huawei.com> Cc: Jeremie Galarneau <jeremie.galarneau@efficios.com> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Kirill Smelkov <kirr@nexedi.com> Cc: Li Zefan <lizefan@huawei.com> Cc: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Zefan Li <lizefan@huawei.com> Cc: pi3orama@163.com Link: http://lkml.kernel.org/r/1456132275-98875-4-git-send-email-wangnan0@huawei.com Signed-off-by: He Kuang <hekuang@huawei.com> [ Changes "maps:my_map.value" to "map:my_map.value", improved error messages ] Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2016-02-22 16:10:30 +07:00
};
struct bpf_map_op {
struct list_head list;
enum bpf_map_op_type op_type;
enum bpf_map_key_type key_type;
2016-02-22 16:10:34 +07:00
union {
struct parse_events_array array;
} k;
perf bpf: Add API to set values to map entries in a bpf object bpf__config_obj() is introduced as a core API to config BPF object after loading. One configuration option of maps is introduced. After this patch BPF object can accept assignments like: map:my_map.value=1234 (map.my_map.value looks pretty. However, there's a small but hard to fix problem related to flex's greedy matching. Please see [1]. Choose ':' to avoid it in a simpler way.) This patch is more complex than the work it does because the consideration of extension. In designing BPF map configuration, the following things should be considered: 1. Array indices selection: perf should allow user setting different value for different slots in an array, with syntax like: map:my_map.value[0,3...6]=1234; 2. A map should be set by different config terms, each for a part of it. For example, set each slot to the pid of a thread; 3. Type of value: integer is not the only valid value type. A perf counter can also be put into a map after commit 35578d798400 ("bpf: Implement function bpf_perf_event_read() that get the selected hardware PMU counter") 4. For a hash table, it should be possible to use a string or other value as a key; 5. It is possible that map configuration is unable to be setup during parsing. A perf counter is an example. Therefore, this patch does the following: 1. Instead of updating map element during parsing, this patch stores map config options in 'struct bpf_map_priv'. Following patches will apply those configs at an appropriate time; 2. Link map operations in a list so a map can have multiple config terms attached, so different parts can be configured separately; 3. Make 'struct bpf_map_priv' extensible so that the following patches can add new types of keys and operations; 4. Use bpf_obj_config__map_funcs array to support more map config options. Since the patch changing the event parser to parse BPF object config is relative large, I've put it in another commit. Code in this patch can be tested after applying the next patch. [1] http://lkml.kernel.org/g/564ED621.4050500@huawei.com Signed-off-by: Wang Nan <wangnan0@huawei.com> Cc: Adrian Hunter <adrian.hunter@intel.com> Cc: Alexei Starovoitov <ast@kernel.org> Cc: Brendan Gregg <brendan.d.gregg@gmail.com> Cc: Cody P Schafer <dev@codyps.com> Cc: He Kuang <hekuang@huawei.com> Cc: Jeremie Galarneau <jeremie.galarneau@efficios.com> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Kirill Smelkov <kirr@nexedi.com> Cc: Li Zefan <lizefan@huawei.com> Cc: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Zefan Li <lizefan@huawei.com> Cc: pi3orama@163.com Link: http://lkml.kernel.org/r/1456132275-98875-4-git-send-email-wangnan0@huawei.com Signed-off-by: He Kuang <hekuang@huawei.com> [ Changes "maps:my_map.value" to "map:my_map.value", improved error messages ] Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2016-02-22 16:10:30 +07:00
union {
u64 value;
perf tools: Enable passing event to BPF object A new syntax is added to the parser so that the user can access predefined perf events in BPF objects. After this patch, BPF programs for perf are finally able to utilize bpf_perf_event_read() introduced in commit 35578d798400 ("bpf: Implement function bpf_perf_event_read() that get the selected hardware PMU counter"). Test result: # cat test_bpf_map_2.c /************************ BEGIN **************************/ #include <uapi/linux/bpf.h> #define SEC(NAME) __attribute__((section(NAME), used)) struct bpf_map_def { unsigned int type; unsigned int key_size; unsigned int value_size; unsigned int max_entries; }; static int (*trace_printk)(const char *fmt, int fmt_size, ...) = (void *)BPF_FUNC_trace_printk; static int (*get_smp_processor_id)(void) = (void *)BPF_FUNC_get_smp_processor_id; static int (*perf_event_read)(struct bpf_map_def *, int) = (void *)BPF_FUNC_perf_event_read; struct bpf_map_def SEC("maps") pmu_map = { .type = BPF_MAP_TYPE_PERF_EVENT_ARRAY, .key_size = sizeof(int), .value_size = sizeof(int), .max_entries = __NR_CPUS__, }; SEC("func_write=sys_write") int func_write(void *ctx) { unsigned long long val; char fmt[] = "sys_write: pmu=%llu\n"; val = perf_event_read(&pmu_map, get_smp_processor_id()); trace_printk(fmt, sizeof(fmt), val); return 0; } SEC("func_write_return=sys_write%return") int func_write_return(void *ctx) { unsigned long long val = 0; char fmt[] = "sys_write_return: pmu=%llu\n"; val = perf_event_read(&pmu_map, get_smp_processor_id()); trace_printk(fmt, sizeof(fmt), val); return 0; } char _license[] SEC("license") = "GPL"; int _version SEC("version") = LINUX_VERSION_CODE; /************************* END ***************************/ Normal case: # echo "" > /sys/kernel/debug/tracing/trace # perf record -i -e cycles -e './test_bpf_map_2.c/map:pmu_map.event=cycles/' ls / [SNIP] [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.013 MB perf.data (7 samples) ] # cat /sys/kernel/debug/tracing/trace | grep ls ls-17066 [000] d... 938449.863301: : sys_write: pmu=1157327 ls-17066 [000] dN.. 938449.863342: : sys_write_return: pmu=1225218 ls-17066 [000] d... 938449.863349: : sys_write: pmu=1241922 ls-17066 [000] dN.. 938449.863369: : sys_write_return: pmu=1267445 Normal case (system wide): # echo "" > /sys/kernel/debug/tracing/trace # perf record -i -e cycles -e './test_bpf_map_2.c/map:pmu_map.event=cycles/' -a ^C[ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.811 MB perf.data (120 samples) ] # cat /sys/kernel/debug/tracing/trace | grep -v '18446744073709551594' | grep -v perf | head -n 20 [SNIP] # TASK-PID CPU# |||| TIMESTAMP FUNCTION # | | | |||| | | gmain-30828 [002] d... 2740551.068992: : sys_write: pmu=84373 gmain-30828 [002] d... 2740551.068992: : sys_write_return: pmu=87696 gmain-30828 [002] d... 2740551.068996: : sys_write: pmu=100658 gmain-30828 [002] d... 2740551.068997: : sys_write_return: pmu=102572 Error case 1: # perf record -e './test_bpf_map_2.c' ls / [SNIP] [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.014 MB perf.data ] # cat /sys/kernel/debug/tracing/trace | grep ls ls-17115 [007] d... 2724279.665625: : sys_write: pmu=18446744073709551614 ls-17115 [007] dN.. 2724279.665651: : sys_write_return: pmu=18446744073709551614 ls-17115 [007] d... 2724279.665658: : sys_write: pmu=18446744073709551614 ls-17115 [007] dN.. 2724279.665677: : sys_write_return: pmu=18446744073709551614 (18446744073709551614 is 0xfffffffffffffffe (-2)) Error case 2: # perf record -e cycles -e './test_bpf_map_2.c/map:pmu_map.event=evt/' -a event syntax error: '..ps:pmu_map.event=evt/' \___ Event not found for map setting Hint: Valid config terms: map:[<arraymap>].value=[value] map:[<eventmap>].event=[event] [SNIP] Error case 3: # ls /proc/2348/task/ 2348 2505 2506 2507 2508 # perf record -i -e cycles -e './test_bpf_map_2.c/map:pmu_map.event=cycles/' -p 2348 ERROR: Apply config to BPF failed: Cannot set event to BPF map in multi-thread tracing Error case 4: # perf record -e cycles -e './test_bpf_map_2.c/map:pmu_map.event=cycles/' ls / ERROR: Apply config to BPF failed: Doesn't support inherit event (Hint: use -i to turn off inherit) Error case 5: # perf record -i -e raw_syscalls:sys_enter -e './test_bpf_map_2.c/map:pmu_map.event=raw_syscalls:sys_enter/' ls ERROR: Apply config to BPF failed: Can only put raw, hardware and BPF output event into a BPF map Error case 6: # perf record -i -e './test_bpf_map_2.c/map:pmu_map.event=123/' ls / event syntax error: '.._map.event=123/' \___ Incorrect value type for map [SNIP] Signed-off-by: Wang Nan <wangnan0@huawei.com> Acked-by: Jiri Olsa <jolsa@kernel.org> Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Adrian Hunter <adrian.hunter@intel.com> Cc: Alexei Starovoitov <ast@kernel.org> Cc: Brendan Gregg <brendan.d.gregg@gmail.com> Cc: Cody P Schafer <dev@codyps.com> Cc: He Kuang <hekuang@huawei.com> Cc: Jeremie Galarneau <jeremie.galarneau@efficios.com> Cc: Kirill Smelkov <kirr@nexedi.com> Cc: Li Zefan <lizefan@huawei.com> Cc: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Zefan Li <lizefan@huawei.com> Cc: pi3orama@163.com Link: http://lkml.kernel.org/r/1456132275-98875-7-git-send-email-wangnan0@huawei.com Signed-off-by: He Kuang <hekuang@huawei.com> Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2016-02-22 16:10:33 +07:00
struct perf_evsel *evsel;
perf bpf: Add API to set values to map entries in a bpf object bpf__config_obj() is introduced as a core API to config BPF object after loading. One configuration option of maps is introduced. After this patch BPF object can accept assignments like: map:my_map.value=1234 (map.my_map.value looks pretty. However, there's a small but hard to fix problem related to flex's greedy matching. Please see [1]. Choose ':' to avoid it in a simpler way.) This patch is more complex than the work it does because the consideration of extension. In designing BPF map configuration, the following things should be considered: 1. Array indices selection: perf should allow user setting different value for different slots in an array, with syntax like: map:my_map.value[0,3...6]=1234; 2. A map should be set by different config terms, each for a part of it. For example, set each slot to the pid of a thread; 3. Type of value: integer is not the only valid value type. A perf counter can also be put into a map after commit 35578d798400 ("bpf: Implement function bpf_perf_event_read() that get the selected hardware PMU counter") 4. For a hash table, it should be possible to use a string or other value as a key; 5. It is possible that map configuration is unable to be setup during parsing. A perf counter is an example. Therefore, this patch does the following: 1. Instead of updating map element during parsing, this patch stores map config options in 'struct bpf_map_priv'. Following patches will apply those configs at an appropriate time; 2. Link map operations in a list so a map can have multiple config terms attached, so different parts can be configured separately; 3. Make 'struct bpf_map_priv' extensible so that the following patches can add new types of keys and operations; 4. Use bpf_obj_config__map_funcs array to support more map config options. Since the patch changing the event parser to parse BPF object config is relative large, I've put it in another commit. Code in this patch can be tested after applying the next patch. [1] http://lkml.kernel.org/g/564ED621.4050500@huawei.com Signed-off-by: Wang Nan <wangnan0@huawei.com> Cc: Adrian Hunter <adrian.hunter@intel.com> Cc: Alexei Starovoitov <ast@kernel.org> Cc: Brendan Gregg <brendan.d.gregg@gmail.com> Cc: Cody P Schafer <dev@codyps.com> Cc: He Kuang <hekuang@huawei.com> Cc: Jeremie Galarneau <jeremie.galarneau@efficios.com> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Kirill Smelkov <kirr@nexedi.com> Cc: Li Zefan <lizefan@huawei.com> Cc: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Zefan Li <lizefan@huawei.com> Cc: pi3orama@163.com Link: http://lkml.kernel.org/r/1456132275-98875-4-git-send-email-wangnan0@huawei.com Signed-off-by: He Kuang <hekuang@huawei.com> [ Changes "maps:my_map.value" to "map:my_map.value", improved error messages ] Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2016-02-22 16:10:30 +07:00
} v;
};
struct bpf_map_priv {
struct list_head ops_list;
};
static void
bpf_map_op__delete(struct bpf_map_op *op)
{
if (!list_empty(&op->list))
list_del(&op->list);
2016-02-22 16:10:34 +07:00
if (op->key_type == BPF_MAP_KEY_RANGES)
parse_events__clear_array(&op->k.array);
perf bpf: Add API to set values to map entries in a bpf object bpf__config_obj() is introduced as a core API to config BPF object after loading. One configuration option of maps is introduced. After this patch BPF object can accept assignments like: map:my_map.value=1234 (map.my_map.value looks pretty. However, there's a small but hard to fix problem related to flex's greedy matching. Please see [1]. Choose ':' to avoid it in a simpler way.) This patch is more complex than the work it does because the consideration of extension. In designing BPF map configuration, the following things should be considered: 1. Array indices selection: perf should allow user setting different value for different slots in an array, with syntax like: map:my_map.value[0,3...6]=1234; 2. A map should be set by different config terms, each for a part of it. For example, set each slot to the pid of a thread; 3. Type of value: integer is not the only valid value type. A perf counter can also be put into a map after commit 35578d798400 ("bpf: Implement function bpf_perf_event_read() that get the selected hardware PMU counter") 4. For a hash table, it should be possible to use a string or other value as a key; 5. It is possible that map configuration is unable to be setup during parsing. A perf counter is an example. Therefore, this patch does the following: 1. Instead of updating map element during parsing, this patch stores map config options in 'struct bpf_map_priv'. Following patches will apply those configs at an appropriate time; 2. Link map operations in a list so a map can have multiple config terms attached, so different parts can be configured separately; 3. Make 'struct bpf_map_priv' extensible so that the following patches can add new types of keys and operations; 4. Use bpf_obj_config__map_funcs array to support more map config options. Since the patch changing the event parser to parse BPF object config is relative large, I've put it in another commit. Code in this patch can be tested after applying the next patch. [1] http://lkml.kernel.org/g/564ED621.4050500@huawei.com Signed-off-by: Wang Nan <wangnan0@huawei.com> Cc: Adrian Hunter <adrian.hunter@intel.com> Cc: Alexei Starovoitov <ast@kernel.org> Cc: Brendan Gregg <brendan.d.gregg@gmail.com> Cc: Cody P Schafer <dev@codyps.com> Cc: He Kuang <hekuang@huawei.com> Cc: Jeremie Galarneau <jeremie.galarneau@efficios.com> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Kirill Smelkov <kirr@nexedi.com> Cc: Li Zefan <lizefan@huawei.com> Cc: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Zefan Li <lizefan@huawei.com> Cc: pi3orama@163.com Link: http://lkml.kernel.org/r/1456132275-98875-4-git-send-email-wangnan0@huawei.com Signed-off-by: He Kuang <hekuang@huawei.com> [ Changes "maps:my_map.value" to "map:my_map.value", improved error messages ] Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2016-02-22 16:10:30 +07:00
free(op);
}
static void
bpf_map_priv__purge(struct bpf_map_priv *priv)
{
struct bpf_map_op *pos, *n;
list_for_each_entry_safe(pos, n, &priv->ops_list, list) {
list_del_init(&pos->list);
bpf_map_op__delete(pos);
}
}
static void
bpf_map_priv__clear(struct bpf_map *map __maybe_unused,
void *_priv)
{
struct bpf_map_priv *priv = _priv;
bpf_map_priv__purge(priv);
free(priv);
}
2016-02-22 16:10:34 +07:00
static int
bpf_map_op_setkey(struct bpf_map_op *op, struct parse_events_term *term)
{
op->key_type = BPF_MAP_KEY_ALL;
if (!term)
return 0;
if (term->array.nr_ranges) {
size_t memsz = term->array.nr_ranges *
sizeof(op->k.array.ranges[0]);
op->k.array.ranges = memdup(term->array.ranges, memsz);
if (!op->k.array.ranges) {
pr_debug("No enough memory to alloc indices for map\n");
return -ENOMEM;
}
op->key_type = BPF_MAP_KEY_RANGES;
op->k.array.nr_ranges = term->array.nr_ranges;
}
return 0;
}
perf bpf: Add API to set values to map entries in a bpf object bpf__config_obj() is introduced as a core API to config BPF object after loading. One configuration option of maps is introduced. After this patch BPF object can accept assignments like: map:my_map.value=1234 (map.my_map.value looks pretty. However, there's a small but hard to fix problem related to flex's greedy matching. Please see [1]. Choose ':' to avoid it in a simpler way.) This patch is more complex than the work it does because the consideration of extension. In designing BPF map configuration, the following things should be considered: 1. Array indices selection: perf should allow user setting different value for different slots in an array, with syntax like: map:my_map.value[0,3...6]=1234; 2. A map should be set by different config terms, each for a part of it. For example, set each slot to the pid of a thread; 3. Type of value: integer is not the only valid value type. A perf counter can also be put into a map after commit 35578d798400 ("bpf: Implement function bpf_perf_event_read() that get the selected hardware PMU counter") 4. For a hash table, it should be possible to use a string or other value as a key; 5. It is possible that map configuration is unable to be setup during parsing. A perf counter is an example. Therefore, this patch does the following: 1. Instead of updating map element during parsing, this patch stores map config options in 'struct bpf_map_priv'. Following patches will apply those configs at an appropriate time; 2. Link map operations in a list so a map can have multiple config terms attached, so different parts can be configured separately; 3. Make 'struct bpf_map_priv' extensible so that the following patches can add new types of keys and operations; 4. Use bpf_obj_config__map_funcs array to support more map config options. Since the patch changing the event parser to parse BPF object config is relative large, I've put it in another commit. Code in this patch can be tested after applying the next patch. [1] http://lkml.kernel.org/g/564ED621.4050500@huawei.com Signed-off-by: Wang Nan <wangnan0@huawei.com> Cc: Adrian Hunter <adrian.hunter@intel.com> Cc: Alexei Starovoitov <ast@kernel.org> Cc: Brendan Gregg <brendan.d.gregg@gmail.com> Cc: Cody P Schafer <dev@codyps.com> Cc: He Kuang <hekuang@huawei.com> Cc: Jeremie Galarneau <jeremie.galarneau@efficios.com> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Kirill Smelkov <kirr@nexedi.com> Cc: Li Zefan <lizefan@huawei.com> Cc: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Zefan Li <lizefan@huawei.com> Cc: pi3orama@163.com Link: http://lkml.kernel.org/r/1456132275-98875-4-git-send-email-wangnan0@huawei.com Signed-off-by: He Kuang <hekuang@huawei.com> [ Changes "maps:my_map.value" to "map:my_map.value", improved error messages ] Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2016-02-22 16:10:30 +07:00
static struct bpf_map_op *
2016-02-22 16:10:34 +07:00
bpf_map_op__new(struct parse_events_term *term)
perf bpf: Add API to set values to map entries in a bpf object bpf__config_obj() is introduced as a core API to config BPF object after loading. One configuration option of maps is introduced. After this patch BPF object can accept assignments like: map:my_map.value=1234 (map.my_map.value looks pretty. However, there's a small but hard to fix problem related to flex's greedy matching. Please see [1]. Choose ':' to avoid it in a simpler way.) This patch is more complex than the work it does because the consideration of extension. In designing BPF map configuration, the following things should be considered: 1. Array indices selection: perf should allow user setting different value for different slots in an array, with syntax like: map:my_map.value[0,3...6]=1234; 2. A map should be set by different config terms, each for a part of it. For example, set each slot to the pid of a thread; 3. Type of value: integer is not the only valid value type. A perf counter can also be put into a map after commit 35578d798400 ("bpf: Implement function bpf_perf_event_read() that get the selected hardware PMU counter") 4. For a hash table, it should be possible to use a string or other value as a key; 5. It is possible that map configuration is unable to be setup during parsing. A perf counter is an example. Therefore, this patch does the following: 1. Instead of updating map element during parsing, this patch stores map config options in 'struct bpf_map_priv'. Following patches will apply those configs at an appropriate time; 2. Link map operations in a list so a map can have multiple config terms attached, so different parts can be configured separately; 3. Make 'struct bpf_map_priv' extensible so that the following patches can add new types of keys and operations; 4. Use bpf_obj_config__map_funcs array to support more map config options. Since the patch changing the event parser to parse BPF object config is relative large, I've put it in another commit. Code in this patch can be tested after applying the next patch. [1] http://lkml.kernel.org/g/564ED621.4050500@huawei.com Signed-off-by: Wang Nan <wangnan0@huawei.com> Cc: Adrian Hunter <adrian.hunter@intel.com> Cc: Alexei Starovoitov <ast@kernel.org> Cc: Brendan Gregg <brendan.d.gregg@gmail.com> Cc: Cody P Schafer <dev@codyps.com> Cc: He Kuang <hekuang@huawei.com> Cc: Jeremie Galarneau <jeremie.galarneau@efficios.com> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Kirill Smelkov <kirr@nexedi.com> Cc: Li Zefan <lizefan@huawei.com> Cc: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Zefan Li <lizefan@huawei.com> Cc: pi3orama@163.com Link: http://lkml.kernel.org/r/1456132275-98875-4-git-send-email-wangnan0@huawei.com Signed-off-by: He Kuang <hekuang@huawei.com> [ Changes "maps:my_map.value" to "map:my_map.value", improved error messages ] Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2016-02-22 16:10:30 +07:00
{
struct bpf_map_op *op;
2016-02-22 16:10:34 +07:00
int err;
perf bpf: Add API to set values to map entries in a bpf object bpf__config_obj() is introduced as a core API to config BPF object after loading. One configuration option of maps is introduced. After this patch BPF object can accept assignments like: map:my_map.value=1234 (map.my_map.value looks pretty. However, there's a small but hard to fix problem related to flex's greedy matching. Please see [1]. Choose ':' to avoid it in a simpler way.) This patch is more complex than the work it does because the consideration of extension. In designing BPF map configuration, the following things should be considered: 1. Array indices selection: perf should allow user setting different value for different slots in an array, with syntax like: map:my_map.value[0,3...6]=1234; 2. A map should be set by different config terms, each for a part of it. For example, set each slot to the pid of a thread; 3. Type of value: integer is not the only valid value type. A perf counter can also be put into a map after commit 35578d798400 ("bpf: Implement function bpf_perf_event_read() that get the selected hardware PMU counter") 4. For a hash table, it should be possible to use a string or other value as a key; 5. It is possible that map configuration is unable to be setup during parsing. A perf counter is an example. Therefore, this patch does the following: 1. Instead of updating map element during parsing, this patch stores map config options in 'struct bpf_map_priv'. Following patches will apply those configs at an appropriate time; 2. Link map operations in a list so a map can have multiple config terms attached, so different parts can be configured separately; 3. Make 'struct bpf_map_priv' extensible so that the following patches can add new types of keys and operations; 4. Use bpf_obj_config__map_funcs array to support more map config options. Since the patch changing the event parser to parse BPF object config is relative large, I've put it in another commit. Code in this patch can be tested after applying the next patch. [1] http://lkml.kernel.org/g/564ED621.4050500@huawei.com Signed-off-by: Wang Nan <wangnan0@huawei.com> Cc: Adrian Hunter <adrian.hunter@intel.com> Cc: Alexei Starovoitov <ast@kernel.org> Cc: Brendan Gregg <brendan.d.gregg@gmail.com> Cc: Cody P Schafer <dev@codyps.com> Cc: He Kuang <hekuang@huawei.com> Cc: Jeremie Galarneau <jeremie.galarneau@efficios.com> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Kirill Smelkov <kirr@nexedi.com> Cc: Li Zefan <lizefan@huawei.com> Cc: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Zefan Li <lizefan@huawei.com> Cc: pi3orama@163.com Link: http://lkml.kernel.org/r/1456132275-98875-4-git-send-email-wangnan0@huawei.com Signed-off-by: He Kuang <hekuang@huawei.com> [ Changes "maps:my_map.value" to "map:my_map.value", improved error messages ] Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2016-02-22 16:10:30 +07:00
op = zalloc(sizeof(*op));
if (!op) {
pr_debug("Failed to alloc bpf_map_op\n");
return ERR_PTR(-ENOMEM);
}
INIT_LIST_HEAD(&op->list);
2016-02-22 16:10:34 +07:00
err = bpf_map_op_setkey(op, term);
if (err) {
free(op);
return ERR_PTR(err);
}
perf bpf: Add API to set values to map entries in a bpf object bpf__config_obj() is introduced as a core API to config BPF object after loading. One configuration option of maps is introduced. After this patch BPF object can accept assignments like: map:my_map.value=1234 (map.my_map.value looks pretty. However, there's a small but hard to fix problem related to flex's greedy matching. Please see [1]. Choose ':' to avoid it in a simpler way.) This patch is more complex than the work it does because the consideration of extension. In designing BPF map configuration, the following things should be considered: 1. Array indices selection: perf should allow user setting different value for different slots in an array, with syntax like: map:my_map.value[0,3...6]=1234; 2. A map should be set by different config terms, each for a part of it. For example, set each slot to the pid of a thread; 3. Type of value: integer is not the only valid value type. A perf counter can also be put into a map after commit 35578d798400 ("bpf: Implement function bpf_perf_event_read() that get the selected hardware PMU counter") 4. For a hash table, it should be possible to use a string or other value as a key; 5. It is possible that map configuration is unable to be setup during parsing. A perf counter is an example. Therefore, this patch does the following: 1. Instead of updating map element during parsing, this patch stores map config options in 'struct bpf_map_priv'. Following patches will apply those configs at an appropriate time; 2. Link map operations in a list so a map can have multiple config terms attached, so different parts can be configured separately; 3. Make 'struct bpf_map_priv' extensible so that the following patches can add new types of keys and operations; 4. Use bpf_obj_config__map_funcs array to support more map config options. Since the patch changing the event parser to parse BPF object config is relative large, I've put it in another commit. Code in this patch can be tested after applying the next patch. [1] http://lkml.kernel.org/g/564ED621.4050500@huawei.com Signed-off-by: Wang Nan <wangnan0@huawei.com> Cc: Adrian Hunter <adrian.hunter@intel.com> Cc: Alexei Starovoitov <ast@kernel.org> Cc: Brendan Gregg <brendan.d.gregg@gmail.com> Cc: Cody P Schafer <dev@codyps.com> Cc: He Kuang <hekuang@huawei.com> Cc: Jeremie Galarneau <jeremie.galarneau@efficios.com> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Kirill Smelkov <kirr@nexedi.com> Cc: Li Zefan <lizefan@huawei.com> Cc: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Zefan Li <lizefan@huawei.com> Cc: pi3orama@163.com Link: http://lkml.kernel.org/r/1456132275-98875-4-git-send-email-wangnan0@huawei.com Signed-off-by: He Kuang <hekuang@huawei.com> [ Changes "maps:my_map.value" to "map:my_map.value", improved error messages ] Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2016-02-22 16:10:30 +07:00
return op;
}
static int
bpf_map__add_op(struct bpf_map *map, struct bpf_map_op *op)
{
struct bpf_map_priv *priv;
const char *map_name;
int err;
map_name = bpf_map__get_name(map);
err = bpf_map__get_private(map, (void **)&priv);
if (err) {
pr_debug("Failed to get private from map %s\n", map_name);
return err;
}
if (!priv) {
priv = zalloc(sizeof(*priv));
if (!priv) {
pr_debug("No enough memory to alloc map private\n");
return -ENOMEM;
}
INIT_LIST_HEAD(&priv->ops_list);
if (bpf_map__set_private(map, priv, bpf_map_priv__clear)) {
free(priv);
return -BPF_LOADER_ERRNO__INTERNAL;
}
}
list_add_tail(&op->list, &priv->ops_list);
return 0;
}
perf tools: Enable passing event to BPF object A new syntax is added to the parser so that the user can access predefined perf events in BPF objects. After this patch, BPF programs for perf are finally able to utilize bpf_perf_event_read() introduced in commit 35578d798400 ("bpf: Implement function bpf_perf_event_read() that get the selected hardware PMU counter"). Test result: # cat test_bpf_map_2.c /************************ BEGIN **************************/ #include <uapi/linux/bpf.h> #define SEC(NAME) __attribute__((section(NAME), used)) struct bpf_map_def { unsigned int type; unsigned int key_size; unsigned int value_size; unsigned int max_entries; }; static int (*trace_printk)(const char *fmt, int fmt_size, ...) = (void *)BPF_FUNC_trace_printk; static int (*get_smp_processor_id)(void) = (void *)BPF_FUNC_get_smp_processor_id; static int (*perf_event_read)(struct bpf_map_def *, int) = (void *)BPF_FUNC_perf_event_read; struct bpf_map_def SEC("maps") pmu_map = { .type = BPF_MAP_TYPE_PERF_EVENT_ARRAY, .key_size = sizeof(int), .value_size = sizeof(int), .max_entries = __NR_CPUS__, }; SEC("func_write=sys_write") int func_write(void *ctx) { unsigned long long val; char fmt[] = "sys_write: pmu=%llu\n"; val = perf_event_read(&pmu_map, get_smp_processor_id()); trace_printk(fmt, sizeof(fmt), val); return 0; } SEC("func_write_return=sys_write%return") int func_write_return(void *ctx) { unsigned long long val = 0; char fmt[] = "sys_write_return: pmu=%llu\n"; val = perf_event_read(&pmu_map, get_smp_processor_id()); trace_printk(fmt, sizeof(fmt), val); return 0; } char _license[] SEC("license") = "GPL"; int _version SEC("version") = LINUX_VERSION_CODE; /************************* END ***************************/ Normal case: # echo "" > /sys/kernel/debug/tracing/trace # perf record -i -e cycles -e './test_bpf_map_2.c/map:pmu_map.event=cycles/' ls / [SNIP] [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.013 MB perf.data (7 samples) ] # cat /sys/kernel/debug/tracing/trace | grep ls ls-17066 [000] d... 938449.863301: : sys_write: pmu=1157327 ls-17066 [000] dN.. 938449.863342: : sys_write_return: pmu=1225218 ls-17066 [000] d... 938449.863349: : sys_write: pmu=1241922 ls-17066 [000] dN.. 938449.863369: : sys_write_return: pmu=1267445 Normal case (system wide): # echo "" > /sys/kernel/debug/tracing/trace # perf record -i -e cycles -e './test_bpf_map_2.c/map:pmu_map.event=cycles/' -a ^C[ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.811 MB perf.data (120 samples) ] # cat /sys/kernel/debug/tracing/trace | grep -v '18446744073709551594' | grep -v perf | head -n 20 [SNIP] # TASK-PID CPU# |||| TIMESTAMP FUNCTION # | | | |||| | | gmain-30828 [002] d... 2740551.068992: : sys_write: pmu=84373 gmain-30828 [002] d... 2740551.068992: : sys_write_return: pmu=87696 gmain-30828 [002] d... 2740551.068996: : sys_write: pmu=100658 gmain-30828 [002] d... 2740551.068997: : sys_write_return: pmu=102572 Error case 1: # perf record -e './test_bpf_map_2.c' ls / [SNIP] [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.014 MB perf.data ] # cat /sys/kernel/debug/tracing/trace | grep ls ls-17115 [007] d... 2724279.665625: : sys_write: pmu=18446744073709551614 ls-17115 [007] dN.. 2724279.665651: : sys_write_return: pmu=18446744073709551614 ls-17115 [007] d... 2724279.665658: : sys_write: pmu=18446744073709551614 ls-17115 [007] dN.. 2724279.665677: : sys_write_return: pmu=18446744073709551614 (18446744073709551614 is 0xfffffffffffffffe (-2)) Error case 2: # perf record -e cycles -e './test_bpf_map_2.c/map:pmu_map.event=evt/' -a event syntax error: '..ps:pmu_map.event=evt/' \___ Event not found for map setting Hint: Valid config terms: map:[<arraymap>].value=[value] map:[<eventmap>].event=[event] [SNIP] Error case 3: # ls /proc/2348/task/ 2348 2505 2506 2507 2508 # perf record -i -e cycles -e './test_bpf_map_2.c/map:pmu_map.event=cycles/' -p 2348 ERROR: Apply config to BPF failed: Cannot set event to BPF map in multi-thread tracing Error case 4: # perf record -e cycles -e './test_bpf_map_2.c/map:pmu_map.event=cycles/' ls / ERROR: Apply config to BPF failed: Doesn't support inherit event (Hint: use -i to turn off inherit) Error case 5: # perf record -i -e raw_syscalls:sys_enter -e './test_bpf_map_2.c/map:pmu_map.event=raw_syscalls:sys_enter/' ls ERROR: Apply config to BPF failed: Can only put raw, hardware and BPF output event into a BPF map Error case 6: # perf record -i -e './test_bpf_map_2.c/map:pmu_map.event=123/' ls / event syntax error: '.._map.event=123/' \___ Incorrect value type for map [SNIP] Signed-off-by: Wang Nan <wangnan0@huawei.com> Acked-by: Jiri Olsa <jolsa@kernel.org> Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Adrian Hunter <adrian.hunter@intel.com> Cc: Alexei Starovoitov <ast@kernel.org> Cc: Brendan Gregg <brendan.d.gregg@gmail.com> Cc: Cody P Schafer <dev@codyps.com> Cc: He Kuang <hekuang@huawei.com> Cc: Jeremie Galarneau <jeremie.galarneau@efficios.com> Cc: Kirill Smelkov <kirr@nexedi.com> Cc: Li Zefan <lizefan@huawei.com> Cc: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Zefan Li <lizefan@huawei.com> Cc: pi3orama@163.com Link: http://lkml.kernel.org/r/1456132275-98875-7-git-send-email-wangnan0@huawei.com Signed-off-by: He Kuang <hekuang@huawei.com> Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2016-02-22 16:10:33 +07:00
static struct bpf_map_op *
2016-02-22 16:10:34 +07:00
bpf_map__add_newop(struct bpf_map *map, struct parse_events_term *term)
perf tools: Enable passing event to BPF object A new syntax is added to the parser so that the user can access predefined perf events in BPF objects. After this patch, BPF programs for perf are finally able to utilize bpf_perf_event_read() introduced in commit 35578d798400 ("bpf: Implement function bpf_perf_event_read() that get the selected hardware PMU counter"). Test result: # cat test_bpf_map_2.c /************************ BEGIN **************************/ #include <uapi/linux/bpf.h> #define SEC(NAME) __attribute__((section(NAME), used)) struct bpf_map_def { unsigned int type; unsigned int key_size; unsigned int value_size; unsigned int max_entries; }; static int (*trace_printk)(const char *fmt, int fmt_size, ...) = (void *)BPF_FUNC_trace_printk; static int (*get_smp_processor_id)(void) = (void *)BPF_FUNC_get_smp_processor_id; static int (*perf_event_read)(struct bpf_map_def *, int) = (void *)BPF_FUNC_perf_event_read; struct bpf_map_def SEC("maps") pmu_map = { .type = BPF_MAP_TYPE_PERF_EVENT_ARRAY, .key_size = sizeof(int), .value_size = sizeof(int), .max_entries = __NR_CPUS__, }; SEC("func_write=sys_write") int func_write(void *ctx) { unsigned long long val; char fmt[] = "sys_write: pmu=%llu\n"; val = perf_event_read(&pmu_map, get_smp_processor_id()); trace_printk(fmt, sizeof(fmt), val); return 0; } SEC("func_write_return=sys_write%return") int func_write_return(void *ctx) { unsigned long long val = 0; char fmt[] = "sys_write_return: pmu=%llu\n"; val = perf_event_read(&pmu_map, get_smp_processor_id()); trace_printk(fmt, sizeof(fmt), val); return 0; } char _license[] SEC("license") = "GPL"; int _version SEC("version") = LINUX_VERSION_CODE; /************************* END ***************************/ Normal case: # echo "" > /sys/kernel/debug/tracing/trace # perf record -i -e cycles -e './test_bpf_map_2.c/map:pmu_map.event=cycles/' ls / [SNIP] [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.013 MB perf.data (7 samples) ] # cat /sys/kernel/debug/tracing/trace | grep ls ls-17066 [000] d... 938449.863301: : sys_write: pmu=1157327 ls-17066 [000] dN.. 938449.863342: : sys_write_return: pmu=1225218 ls-17066 [000] d... 938449.863349: : sys_write: pmu=1241922 ls-17066 [000] dN.. 938449.863369: : sys_write_return: pmu=1267445 Normal case (system wide): # echo "" > /sys/kernel/debug/tracing/trace # perf record -i -e cycles -e './test_bpf_map_2.c/map:pmu_map.event=cycles/' -a ^C[ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.811 MB perf.data (120 samples) ] # cat /sys/kernel/debug/tracing/trace | grep -v '18446744073709551594' | grep -v perf | head -n 20 [SNIP] # TASK-PID CPU# |||| TIMESTAMP FUNCTION # | | | |||| | | gmain-30828 [002] d... 2740551.068992: : sys_write: pmu=84373 gmain-30828 [002] d... 2740551.068992: : sys_write_return: pmu=87696 gmain-30828 [002] d... 2740551.068996: : sys_write: pmu=100658 gmain-30828 [002] d... 2740551.068997: : sys_write_return: pmu=102572 Error case 1: # perf record -e './test_bpf_map_2.c' ls / [SNIP] [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.014 MB perf.data ] # cat /sys/kernel/debug/tracing/trace | grep ls ls-17115 [007] d... 2724279.665625: : sys_write: pmu=18446744073709551614 ls-17115 [007] dN.. 2724279.665651: : sys_write_return: pmu=18446744073709551614 ls-17115 [007] d... 2724279.665658: : sys_write: pmu=18446744073709551614 ls-17115 [007] dN.. 2724279.665677: : sys_write_return: pmu=18446744073709551614 (18446744073709551614 is 0xfffffffffffffffe (-2)) Error case 2: # perf record -e cycles -e './test_bpf_map_2.c/map:pmu_map.event=evt/' -a event syntax error: '..ps:pmu_map.event=evt/' \___ Event not found for map setting Hint: Valid config terms: map:[<arraymap>].value=[value] map:[<eventmap>].event=[event] [SNIP] Error case 3: # ls /proc/2348/task/ 2348 2505 2506 2507 2508 # perf record -i -e cycles -e './test_bpf_map_2.c/map:pmu_map.event=cycles/' -p 2348 ERROR: Apply config to BPF failed: Cannot set event to BPF map in multi-thread tracing Error case 4: # perf record -e cycles -e './test_bpf_map_2.c/map:pmu_map.event=cycles/' ls / ERROR: Apply config to BPF failed: Doesn't support inherit event (Hint: use -i to turn off inherit) Error case 5: # perf record -i -e raw_syscalls:sys_enter -e './test_bpf_map_2.c/map:pmu_map.event=raw_syscalls:sys_enter/' ls ERROR: Apply config to BPF failed: Can only put raw, hardware and BPF output event into a BPF map Error case 6: # perf record -i -e './test_bpf_map_2.c/map:pmu_map.event=123/' ls / event syntax error: '.._map.event=123/' \___ Incorrect value type for map [SNIP] Signed-off-by: Wang Nan <wangnan0@huawei.com> Acked-by: Jiri Olsa <jolsa@kernel.org> Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Adrian Hunter <adrian.hunter@intel.com> Cc: Alexei Starovoitov <ast@kernel.org> Cc: Brendan Gregg <brendan.d.gregg@gmail.com> Cc: Cody P Schafer <dev@codyps.com> Cc: He Kuang <hekuang@huawei.com> Cc: Jeremie Galarneau <jeremie.galarneau@efficios.com> Cc: Kirill Smelkov <kirr@nexedi.com> Cc: Li Zefan <lizefan@huawei.com> Cc: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Zefan Li <lizefan@huawei.com> Cc: pi3orama@163.com Link: http://lkml.kernel.org/r/1456132275-98875-7-git-send-email-wangnan0@huawei.com Signed-off-by: He Kuang <hekuang@huawei.com> Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2016-02-22 16:10:33 +07:00
{
struct bpf_map_op *op;
int err;
2016-02-22 16:10:34 +07:00
op = bpf_map_op__new(term);
perf tools: Enable passing event to BPF object A new syntax is added to the parser so that the user can access predefined perf events in BPF objects. After this patch, BPF programs for perf are finally able to utilize bpf_perf_event_read() introduced in commit 35578d798400 ("bpf: Implement function bpf_perf_event_read() that get the selected hardware PMU counter"). Test result: # cat test_bpf_map_2.c /************************ BEGIN **************************/ #include <uapi/linux/bpf.h> #define SEC(NAME) __attribute__((section(NAME), used)) struct bpf_map_def { unsigned int type; unsigned int key_size; unsigned int value_size; unsigned int max_entries; }; static int (*trace_printk)(const char *fmt, int fmt_size, ...) = (void *)BPF_FUNC_trace_printk; static int (*get_smp_processor_id)(void) = (void *)BPF_FUNC_get_smp_processor_id; static int (*perf_event_read)(struct bpf_map_def *, int) = (void *)BPF_FUNC_perf_event_read; struct bpf_map_def SEC("maps") pmu_map = { .type = BPF_MAP_TYPE_PERF_EVENT_ARRAY, .key_size = sizeof(int), .value_size = sizeof(int), .max_entries = __NR_CPUS__, }; SEC("func_write=sys_write") int func_write(void *ctx) { unsigned long long val; char fmt[] = "sys_write: pmu=%llu\n"; val = perf_event_read(&pmu_map, get_smp_processor_id()); trace_printk(fmt, sizeof(fmt), val); return 0; } SEC("func_write_return=sys_write%return") int func_write_return(void *ctx) { unsigned long long val = 0; char fmt[] = "sys_write_return: pmu=%llu\n"; val = perf_event_read(&pmu_map, get_smp_processor_id()); trace_printk(fmt, sizeof(fmt), val); return 0; } char _license[] SEC("license") = "GPL"; int _version SEC("version") = LINUX_VERSION_CODE; /************************* END ***************************/ Normal case: # echo "" > /sys/kernel/debug/tracing/trace # perf record -i -e cycles -e './test_bpf_map_2.c/map:pmu_map.event=cycles/' ls / [SNIP] [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.013 MB perf.data (7 samples) ] # cat /sys/kernel/debug/tracing/trace | grep ls ls-17066 [000] d... 938449.863301: : sys_write: pmu=1157327 ls-17066 [000] dN.. 938449.863342: : sys_write_return: pmu=1225218 ls-17066 [000] d... 938449.863349: : sys_write: pmu=1241922 ls-17066 [000] dN.. 938449.863369: : sys_write_return: pmu=1267445 Normal case (system wide): # echo "" > /sys/kernel/debug/tracing/trace # perf record -i -e cycles -e './test_bpf_map_2.c/map:pmu_map.event=cycles/' -a ^C[ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.811 MB perf.data (120 samples) ] # cat /sys/kernel/debug/tracing/trace | grep -v '18446744073709551594' | grep -v perf | head -n 20 [SNIP] # TASK-PID CPU# |||| TIMESTAMP FUNCTION # | | | |||| | | gmain-30828 [002] d... 2740551.068992: : sys_write: pmu=84373 gmain-30828 [002] d... 2740551.068992: : sys_write_return: pmu=87696 gmain-30828 [002] d... 2740551.068996: : sys_write: pmu=100658 gmain-30828 [002] d... 2740551.068997: : sys_write_return: pmu=102572 Error case 1: # perf record -e './test_bpf_map_2.c' ls / [SNIP] [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.014 MB perf.data ] # cat /sys/kernel/debug/tracing/trace | grep ls ls-17115 [007] d... 2724279.665625: : sys_write: pmu=18446744073709551614 ls-17115 [007] dN.. 2724279.665651: : sys_write_return: pmu=18446744073709551614 ls-17115 [007] d... 2724279.665658: : sys_write: pmu=18446744073709551614 ls-17115 [007] dN.. 2724279.665677: : sys_write_return: pmu=18446744073709551614 (18446744073709551614 is 0xfffffffffffffffe (-2)) Error case 2: # perf record -e cycles -e './test_bpf_map_2.c/map:pmu_map.event=evt/' -a event syntax error: '..ps:pmu_map.event=evt/' \___ Event not found for map setting Hint: Valid config terms: map:[<arraymap>].value=[value] map:[<eventmap>].event=[event] [SNIP] Error case 3: # ls /proc/2348/task/ 2348 2505 2506 2507 2508 # perf record -i -e cycles -e './test_bpf_map_2.c/map:pmu_map.event=cycles/' -p 2348 ERROR: Apply config to BPF failed: Cannot set event to BPF map in multi-thread tracing Error case 4: # perf record -e cycles -e './test_bpf_map_2.c/map:pmu_map.event=cycles/' ls / ERROR: Apply config to BPF failed: Doesn't support inherit event (Hint: use -i to turn off inherit) Error case 5: # perf record -i -e raw_syscalls:sys_enter -e './test_bpf_map_2.c/map:pmu_map.event=raw_syscalls:sys_enter/' ls ERROR: Apply config to BPF failed: Can only put raw, hardware and BPF output event into a BPF map Error case 6: # perf record -i -e './test_bpf_map_2.c/map:pmu_map.event=123/' ls / event syntax error: '.._map.event=123/' \___ Incorrect value type for map [SNIP] Signed-off-by: Wang Nan <wangnan0@huawei.com> Acked-by: Jiri Olsa <jolsa@kernel.org> Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Adrian Hunter <adrian.hunter@intel.com> Cc: Alexei Starovoitov <ast@kernel.org> Cc: Brendan Gregg <brendan.d.gregg@gmail.com> Cc: Cody P Schafer <dev@codyps.com> Cc: He Kuang <hekuang@huawei.com> Cc: Jeremie Galarneau <jeremie.galarneau@efficios.com> Cc: Kirill Smelkov <kirr@nexedi.com> Cc: Li Zefan <lizefan@huawei.com> Cc: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Zefan Li <lizefan@huawei.com> Cc: pi3orama@163.com Link: http://lkml.kernel.org/r/1456132275-98875-7-git-send-email-wangnan0@huawei.com Signed-off-by: He Kuang <hekuang@huawei.com> Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2016-02-22 16:10:33 +07:00
if (IS_ERR(op))
return op;
err = bpf_map__add_op(map, op);
if (err) {
bpf_map_op__delete(op);
return ERR_PTR(err);
}
return op;
}
perf bpf: Add API to set values to map entries in a bpf object bpf__config_obj() is introduced as a core API to config BPF object after loading. One configuration option of maps is introduced. After this patch BPF object can accept assignments like: map:my_map.value=1234 (map.my_map.value looks pretty. However, there's a small but hard to fix problem related to flex's greedy matching. Please see [1]. Choose ':' to avoid it in a simpler way.) This patch is more complex than the work it does because the consideration of extension. In designing BPF map configuration, the following things should be considered: 1. Array indices selection: perf should allow user setting different value for different slots in an array, with syntax like: map:my_map.value[0,3...6]=1234; 2. A map should be set by different config terms, each for a part of it. For example, set each slot to the pid of a thread; 3. Type of value: integer is not the only valid value type. A perf counter can also be put into a map after commit 35578d798400 ("bpf: Implement function bpf_perf_event_read() that get the selected hardware PMU counter") 4. For a hash table, it should be possible to use a string or other value as a key; 5. It is possible that map configuration is unable to be setup during parsing. A perf counter is an example. Therefore, this patch does the following: 1. Instead of updating map element during parsing, this patch stores map config options in 'struct bpf_map_priv'. Following patches will apply those configs at an appropriate time; 2. Link map operations in a list so a map can have multiple config terms attached, so different parts can be configured separately; 3. Make 'struct bpf_map_priv' extensible so that the following patches can add new types of keys and operations; 4. Use bpf_obj_config__map_funcs array to support more map config options. Since the patch changing the event parser to parse BPF object config is relative large, I've put it in another commit. Code in this patch can be tested after applying the next patch. [1] http://lkml.kernel.org/g/564ED621.4050500@huawei.com Signed-off-by: Wang Nan <wangnan0@huawei.com> Cc: Adrian Hunter <adrian.hunter@intel.com> Cc: Alexei Starovoitov <ast@kernel.org> Cc: Brendan Gregg <brendan.d.gregg@gmail.com> Cc: Cody P Schafer <dev@codyps.com> Cc: He Kuang <hekuang@huawei.com> Cc: Jeremie Galarneau <jeremie.galarneau@efficios.com> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Kirill Smelkov <kirr@nexedi.com> Cc: Li Zefan <lizefan@huawei.com> Cc: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Zefan Li <lizefan@huawei.com> Cc: pi3orama@163.com Link: http://lkml.kernel.org/r/1456132275-98875-4-git-send-email-wangnan0@huawei.com Signed-off-by: He Kuang <hekuang@huawei.com> [ Changes "maps:my_map.value" to "map:my_map.value", improved error messages ] Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2016-02-22 16:10:30 +07:00
static int
__bpf_map__config_value(struct bpf_map *map,
struct parse_events_term *term)
{
struct bpf_map_def def;
struct bpf_map_op *op;
const char *map_name;
int err;
map_name = bpf_map__get_name(map);
err = bpf_map__get_def(map, &def);
if (err) {
pr_debug("Unable to get map definition from '%s'\n",
map_name);
return -BPF_LOADER_ERRNO__INTERNAL;
}
if (def.type != BPF_MAP_TYPE_ARRAY) {
pr_debug("Map %s type is not BPF_MAP_TYPE_ARRAY\n",
map_name);
return -BPF_LOADER_ERRNO__OBJCONF_MAP_TYPE;
}
if (def.key_size < sizeof(unsigned int)) {
pr_debug("Map %s has incorrect key size\n", map_name);
return -BPF_LOADER_ERRNO__OBJCONF_MAP_KEYSIZE;
}
switch (def.value_size) {
case 1:
case 2:
case 4:
case 8:
break;
default:
pr_debug("Map %s has incorrect value size\n", map_name);
return -BPF_LOADER_ERRNO__OBJCONF_MAP_VALUESIZE;
}
2016-02-22 16:10:34 +07:00
op = bpf_map__add_newop(map, term);
perf bpf: Add API to set values to map entries in a bpf object bpf__config_obj() is introduced as a core API to config BPF object after loading. One configuration option of maps is introduced. After this patch BPF object can accept assignments like: map:my_map.value=1234 (map.my_map.value looks pretty. However, there's a small but hard to fix problem related to flex's greedy matching. Please see [1]. Choose ':' to avoid it in a simpler way.) This patch is more complex than the work it does because the consideration of extension. In designing BPF map configuration, the following things should be considered: 1. Array indices selection: perf should allow user setting different value for different slots in an array, with syntax like: map:my_map.value[0,3...6]=1234; 2. A map should be set by different config terms, each for a part of it. For example, set each slot to the pid of a thread; 3. Type of value: integer is not the only valid value type. A perf counter can also be put into a map after commit 35578d798400 ("bpf: Implement function bpf_perf_event_read() that get the selected hardware PMU counter") 4. For a hash table, it should be possible to use a string or other value as a key; 5. It is possible that map configuration is unable to be setup during parsing. A perf counter is an example. Therefore, this patch does the following: 1. Instead of updating map element during parsing, this patch stores map config options in 'struct bpf_map_priv'. Following patches will apply those configs at an appropriate time; 2. Link map operations in a list so a map can have multiple config terms attached, so different parts can be configured separately; 3. Make 'struct bpf_map_priv' extensible so that the following patches can add new types of keys and operations; 4. Use bpf_obj_config__map_funcs array to support more map config options. Since the patch changing the event parser to parse BPF object config is relative large, I've put it in another commit. Code in this patch can be tested after applying the next patch. [1] http://lkml.kernel.org/g/564ED621.4050500@huawei.com Signed-off-by: Wang Nan <wangnan0@huawei.com> Cc: Adrian Hunter <adrian.hunter@intel.com> Cc: Alexei Starovoitov <ast@kernel.org> Cc: Brendan Gregg <brendan.d.gregg@gmail.com> Cc: Cody P Schafer <dev@codyps.com> Cc: He Kuang <hekuang@huawei.com> Cc: Jeremie Galarneau <jeremie.galarneau@efficios.com> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Kirill Smelkov <kirr@nexedi.com> Cc: Li Zefan <lizefan@huawei.com> Cc: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Zefan Li <lizefan@huawei.com> Cc: pi3orama@163.com Link: http://lkml.kernel.org/r/1456132275-98875-4-git-send-email-wangnan0@huawei.com Signed-off-by: He Kuang <hekuang@huawei.com> [ Changes "maps:my_map.value" to "map:my_map.value", improved error messages ] Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2016-02-22 16:10:30 +07:00
if (IS_ERR(op))
return PTR_ERR(op);
op->op_type = BPF_MAP_OP_SET_VALUE;
op->v.value = term->val.num;
perf tools: Enable passing event to BPF object A new syntax is added to the parser so that the user can access predefined perf events in BPF objects. After this patch, BPF programs for perf are finally able to utilize bpf_perf_event_read() introduced in commit 35578d798400 ("bpf: Implement function bpf_perf_event_read() that get the selected hardware PMU counter"). Test result: # cat test_bpf_map_2.c /************************ BEGIN **************************/ #include <uapi/linux/bpf.h> #define SEC(NAME) __attribute__((section(NAME), used)) struct bpf_map_def { unsigned int type; unsigned int key_size; unsigned int value_size; unsigned int max_entries; }; static int (*trace_printk)(const char *fmt, int fmt_size, ...) = (void *)BPF_FUNC_trace_printk; static int (*get_smp_processor_id)(void) = (void *)BPF_FUNC_get_smp_processor_id; static int (*perf_event_read)(struct bpf_map_def *, int) = (void *)BPF_FUNC_perf_event_read; struct bpf_map_def SEC("maps") pmu_map = { .type = BPF_MAP_TYPE_PERF_EVENT_ARRAY, .key_size = sizeof(int), .value_size = sizeof(int), .max_entries = __NR_CPUS__, }; SEC("func_write=sys_write") int func_write(void *ctx) { unsigned long long val; char fmt[] = "sys_write: pmu=%llu\n"; val = perf_event_read(&pmu_map, get_smp_processor_id()); trace_printk(fmt, sizeof(fmt), val); return 0; } SEC("func_write_return=sys_write%return") int func_write_return(void *ctx) { unsigned long long val = 0; char fmt[] = "sys_write_return: pmu=%llu\n"; val = perf_event_read(&pmu_map, get_smp_processor_id()); trace_printk(fmt, sizeof(fmt), val); return 0; } char _license[] SEC("license") = "GPL"; int _version SEC("version") = LINUX_VERSION_CODE; /************************* END ***************************/ Normal case: # echo "" > /sys/kernel/debug/tracing/trace # perf record -i -e cycles -e './test_bpf_map_2.c/map:pmu_map.event=cycles/' ls / [SNIP] [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.013 MB perf.data (7 samples) ] # cat /sys/kernel/debug/tracing/trace | grep ls ls-17066 [000] d... 938449.863301: : sys_write: pmu=1157327 ls-17066 [000] dN.. 938449.863342: : sys_write_return: pmu=1225218 ls-17066 [000] d... 938449.863349: : sys_write: pmu=1241922 ls-17066 [000] dN.. 938449.863369: : sys_write_return: pmu=1267445 Normal case (system wide): # echo "" > /sys/kernel/debug/tracing/trace # perf record -i -e cycles -e './test_bpf_map_2.c/map:pmu_map.event=cycles/' -a ^C[ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.811 MB perf.data (120 samples) ] # cat /sys/kernel/debug/tracing/trace | grep -v '18446744073709551594' | grep -v perf | head -n 20 [SNIP] # TASK-PID CPU# |||| TIMESTAMP FUNCTION # | | | |||| | | gmain-30828 [002] d... 2740551.068992: : sys_write: pmu=84373 gmain-30828 [002] d... 2740551.068992: : sys_write_return: pmu=87696 gmain-30828 [002] d... 2740551.068996: : sys_write: pmu=100658 gmain-30828 [002] d... 2740551.068997: : sys_write_return: pmu=102572 Error case 1: # perf record -e './test_bpf_map_2.c' ls / [SNIP] [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.014 MB perf.data ] # cat /sys/kernel/debug/tracing/trace | grep ls ls-17115 [007] d... 2724279.665625: : sys_write: pmu=18446744073709551614 ls-17115 [007] dN.. 2724279.665651: : sys_write_return: pmu=18446744073709551614 ls-17115 [007] d... 2724279.665658: : sys_write: pmu=18446744073709551614 ls-17115 [007] dN.. 2724279.665677: : sys_write_return: pmu=18446744073709551614 (18446744073709551614 is 0xfffffffffffffffe (-2)) Error case 2: # perf record -e cycles -e './test_bpf_map_2.c/map:pmu_map.event=evt/' -a event syntax error: '..ps:pmu_map.event=evt/' \___ Event not found for map setting Hint: Valid config terms: map:[<arraymap>].value=[value] map:[<eventmap>].event=[event] [SNIP] Error case 3: # ls /proc/2348/task/ 2348 2505 2506 2507 2508 # perf record -i -e cycles -e './test_bpf_map_2.c/map:pmu_map.event=cycles/' -p 2348 ERROR: Apply config to BPF failed: Cannot set event to BPF map in multi-thread tracing Error case 4: # perf record -e cycles -e './test_bpf_map_2.c/map:pmu_map.event=cycles/' ls / ERROR: Apply config to BPF failed: Doesn't support inherit event (Hint: use -i to turn off inherit) Error case 5: # perf record -i -e raw_syscalls:sys_enter -e './test_bpf_map_2.c/map:pmu_map.event=raw_syscalls:sys_enter/' ls ERROR: Apply config to BPF failed: Can only put raw, hardware and BPF output event into a BPF map Error case 6: # perf record -i -e './test_bpf_map_2.c/map:pmu_map.event=123/' ls / event syntax error: '.._map.event=123/' \___ Incorrect value type for map [SNIP] Signed-off-by: Wang Nan <wangnan0@huawei.com> Acked-by: Jiri Olsa <jolsa@kernel.org> Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Adrian Hunter <adrian.hunter@intel.com> Cc: Alexei Starovoitov <ast@kernel.org> Cc: Brendan Gregg <brendan.d.gregg@gmail.com> Cc: Cody P Schafer <dev@codyps.com> Cc: He Kuang <hekuang@huawei.com> Cc: Jeremie Galarneau <jeremie.galarneau@efficios.com> Cc: Kirill Smelkov <kirr@nexedi.com> Cc: Li Zefan <lizefan@huawei.com> Cc: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Zefan Li <lizefan@huawei.com> Cc: pi3orama@163.com Link: http://lkml.kernel.org/r/1456132275-98875-7-git-send-email-wangnan0@huawei.com Signed-off-by: He Kuang <hekuang@huawei.com> Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2016-02-22 16:10:33 +07:00
return 0;
perf bpf: Add API to set values to map entries in a bpf object bpf__config_obj() is introduced as a core API to config BPF object after loading. One configuration option of maps is introduced. After this patch BPF object can accept assignments like: map:my_map.value=1234 (map.my_map.value looks pretty. However, there's a small but hard to fix problem related to flex's greedy matching. Please see [1]. Choose ':' to avoid it in a simpler way.) This patch is more complex than the work it does because the consideration of extension. In designing BPF map configuration, the following things should be considered: 1. Array indices selection: perf should allow user setting different value for different slots in an array, with syntax like: map:my_map.value[0,3...6]=1234; 2. A map should be set by different config terms, each for a part of it. For example, set each slot to the pid of a thread; 3. Type of value: integer is not the only valid value type. A perf counter can also be put into a map after commit 35578d798400 ("bpf: Implement function bpf_perf_event_read() that get the selected hardware PMU counter") 4. For a hash table, it should be possible to use a string or other value as a key; 5. It is possible that map configuration is unable to be setup during parsing. A perf counter is an example. Therefore, this patch does the following: 1. Instead of updating map element during parsing, this patch stores map config options in 'struct bpf_map_priv'. Following patches will apply those configs at an appropriate time; 2. Link map operations in a list so a map can have multiple config terms attached, so different parts can be configured separately; 3. Make 'struct bpf_map_priv' extensible so that the following patches can add new types of keys and operations; 4. Use bpf_obj_config__map_funcs array to support more map config options. Since the patch changing the event parser to parse BPF object config is relative large, I've put it in another commit. Code in this patch can be tested after applying the next patch. [1] http://lkml.kernel.org/g/564ED621.4050500@huawei.com Signed-off-by: Wang Nan <wangnan0@huawei.com> Cc: Adrian Hunter <adrian.hunter@intel.com> Cc: Alexei Starovoitov <ast@kernel.org> Cc: Brendan Gregg <brendan.d.gregg@gmail.com> Cc: Cody P Schafer <dev@codyps.com> Cc: He Kuang <hekuang@huawei.com> Cc: Jeremie Galarneau <jeremie.galarneau@efficios.com> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Kirill Smelkov <kirr@nexedi.com> Cc: Li Zefan <lizefan@huawei.com> Cc: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Zefan Li <lizefan@huawei.com> Cc: pi3orama@163.com Link: http://lkml.kernel.org/r/1456132275-98875-4-git-send-email-wangnan0@huawei.com Signed-off-by: He Kuang <hekuang@huawei.com> [ Changes "maps:my_map.value" to "map:my_map.value", improved error messages ] Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2016-02-22 16:10:30 +07:00
}
static int
bpf_map__config_value(struct bpf_map *map,
struct parse_events_term *term,
struct perf_evlist *evlist __maybe_unused)
{
if (!term->err_val) {
pr_debug("Config value not set\n");
return -BPF_LOADER_ERRNO__OBJCONF_CONF;
}
if (term->type_val != PARSE_EVENTS__TERM_TYPE_NUM) {
perf tools: Enable passing event to BPF object A new syntax is added to the parser so that the user can access predefined perf events in BPF objects. After this patch, BPF programs for perf are finally able to utilize bpf_perf_event_read() introduced in commit 35578d798400 ("bpf: Implement function bpf_perf_event_read() that get the selected hardware PMU counter"). Test result: # cat test_bpf_map_2.c /************************ BEGIN **************************/ #include <uapi/linux/bpf.h> #define SEC(NAME) __attribute__((section(NAME), used)) struct bpf_map_def { unsigned int type; unsigned int key_size; unsigned int value_size; unsigned int max_entries; }; static int (*trace_printk)(const char *fmt, int fmt_size, ...) = (void *)BPF_FUNC_trace_printk; static int (*get_smp_processor_id)(void) = (void *)BPF_FUNC_get_smp_processor_id; static int (*perf_event_read)(struct bpf_map_def *, int) = (void *)BPF_FUNC_perf_event_read; struct bpf_map_def SEC("maps") pmu_map = { .type = BPF_MAP_TYPE_PERF_EVENT_ARRAY, .key_size = sizeof(int), .value_size = sizeof(int), .max_entries = __NR_CPUS__, }; SEC("func_write=sys_write") int func_write(void *ctx) { unsigned long long val; char fmt[] = "sys_write: pmu=%llu\n"; val = perf_event_read(&pmu_map, get_smp_processor_id()); trace_printk(fmt, sizeof(fmt), val); return 0; } SEC("func_write_return=sys_write%return") int func_write_return(void *ctx) { unsigned long long val = 0; char fmt[] = "sys_write_return: pmu=%llu\n"; val = perf_event_read(&pmu_map, get_smp_processor_id()); trace_printk(fmt, sizeof(fmt), val); return 0; } char _license[] SEC("license") = "GPL"; int _version SEC("version") = LINUX_VERSION_CODE; /************************* END ***************************/ Normal case: # echo "" > /sys/kernel/debug/tracing/trace # perf record -i -e cycles -e './test_bpf_map_2.c/map:pmu_map.event=cycles/' ls / [SNIP] [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.013 MB perf.data (7 samples) ] # cat /sys/kernel/debug/tracing/trace | grep ls ls-17066 [000] d... 938449.863301: : sys_write: pmu=1157327 ls-17066 [000] dN.. 938449.863342: : sys_write_return: pmu=1225218 ls-17066 [000] d... 938449.863349: : sys_write: pmu=1241922 ls-17066 [000] dN.. 938449.863369: : sys_write_return: pmu=1267445 Normal case (system wide): # echo "" > /sys/kernel/debug/tracing/trace # perf record -i -e cycles -e './test_bpf_map_2.c/map:pmu_map.event=cycles/' -a ^C[ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.811 MB perf.data (120 samples) ] # cat /sys/kernel/debug/tracing/trace | grep -v '18446744073709551594' | grep -v perf | head -n 20 [SNIP] # TASK-PID CPU# |||| TIMESTAMP FUNCTION # | | | |||| | | gmain-30828 [002] d... 2740551.068992: : sys_write: pmu=84373 gmain-30828 [002] d... 2740551.068992: : sys_write_return: pmu=87696 gmain-30828 [002] d... 2740551.068996: : sys_write: pmu=100658 gmain-30828 [002] d... 2740551.068997: : sys_write_return: pmu=102572 Error case 1: # perf record -e './test_bpf_map_2.c' ls / [SNIP] [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.014 MB perf.data ] # cat /sys/kernel/debug/tracing/trace | grep ls ls-17115 [007] d... 2724279.665625: : sys_write: pmu=18446744073709551614 ls-17115 [007] dN.. 2724279.665651: : sys_write_return: pmu=18446744073709551614 ls-17115 [007] d... 2724279.665658: : sys_write: pmu=18446744073709551614 ls-17115 [007] dN.. 2724279.665677: : sys_write_return: pmu=18446744073709551614 (18446744073709551614 is 0xfffffffffffffffe (-2)) Error case 2: # perf record -e cycles -e './test_bpf_map_2.c/map:pmu_map.event=evt/' -a event syntax error: '..ps:pmu_map.event=evt/' \___ Event not found for map setting Hint: Valid config terms: map:[<arraymap>].value=[value] map:[<eventmap>].event=[event] [SNIP] Error case 3: # ls /proc/2348/task/ 2348 2505 2506 2507 2508 # perf record -i -e cycles -e './test_bpf_map_2.c/map:pmu_map.event=cycles/' -p 2348 ERROR: Apply config to BPF failed: Cannot set event to BPF map in multi-thread tracing Error case 4: # perf record -e cycles -e './test_bpf_map_2.c/map:pmu_map.event=cycles/' ls / ERROR: Apply config to BPF failed: Doesn't support inherit event (Hint: use -i to turn off inherit) Error case 5: # perf record -i -e raw_syscalls:sys_enter -e './test_bpf_map_2.c/map:pmu_map.event=raw_syscalls:sys_enter/' ls ERROR: Apply config to BPF failed: Can only put raw, hardware and BPF output event into a BPF map Error case 6: # perf record -i -e './test_bpf_map_2.c/map:pmu_map.event=123/' ls / event syntax error: '.._map.event=123/' \___ Incorrect value type for map [SNIP] Signed-off-by: Wang Nan <wangnan0@huawei.com> Acked-by: Jiri Olsa <jolsa@kernel.org> Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Adrian Hunter <adrian.hunter@intel.com> Cc: Alexei Starovoitov <ast@kernel.org> Cc: Brendan Gregg <brendan.d.gregg@gmail.com> Cc: Cody P Schafer <dev@codyps.com> Cc: He Kuang <hekuang@huawei.com> Cc: Jeremie Galarneau <jeremie.galarneau@efficios.com> Cc: Kirill Smelkov <kirr@nexedi.com> Cc: Li Zefan <lizefan@huawei.com> Cc: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Zefan Li <lizefan@huawei.com> Cc: pi3orama@163.com Link: http://lkml.kernel.org/r/1456132275-98875-7-git-send-email-wangnan0@huawei.com Signed-off-by: He Kuang <hekuang@huawei.com> Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2016-02-22 16:10:33 +07:00
pr_debug("ERROR: wrong value type for 'value'\n");
perf bpf: Add API to set values to map entries in a bpf object bpf__config_obj() is introduced as a core API to config BPF object after loading. One configuration option of maps is introduced. After this patch BPF object can accept assignments like: map:my_map.value=1234 (map.my_map.value looks pretty. However, there's a small but hard to fix problem related to flex's greedy matching. Please see [1]. Choose ':' to avoid it in a simpler way.) This patch is more complex than the work it does because the consideration of extension. In designing BPF map configuration, the following things should be considered: 1. Array indices selection: perf should allow user setting different value for different slots in an array, with syntax like: map:my_map.value[0,3...6]=1234; 2. A map should be set by different config terms, each for a part of it. For example, set each slot to the pid of a thread; 3. Type of value: integer is not the only valid value type. A perf counter can also be put into a map after commit 35578d798400 ("bpf: Implement function bpf_perf_event_read() that get the selected hardware PMU counter") 4. For a hash table, it should be possible to use a string or other value as a key; 5. It is possible that map configuration is unable to be setup during parsing. A perf counter is an example. Therefore, this patch does the following: 1. Instead of updating map element during parsing, this patch stores map config options in 'struct bpf_map_priv'. Following patches will apply those configs at an appropriate time; 2. Link map operations in a list so a map can have multiple config terms attached, so different parts can be configured separately; 3. Make 'struct bpf_map_priv' extensible so that the following patches can add new types of keys and operations; 4. Use bpf_obj_config__map_funcs array to support more map config options. Since the patch changing the event parser to parse BPF object config is relative large, I've put it in another commit. Code in this patch can be tested after applying the next patch. [1] http://lkml.kernel.org/g/564ED621.4050500@huawei.com Signed-off-by: Wang Nan <wangnan0@huawei.com> Cc: Adrian Hunter <adrian.hunter@intel.com> Cc: Alexei Starovoitov <ast@kernel.org> Cc: Brendan Gregg <brendan.d.gregg@gmail.com> Cc: Cody P Schafer <dev@codyps.com> Cc: He Kuang <hekuang@huawei.com> Cc: Jeremie Galarneau <jeremie.galarneau@efficios.com> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Kirill Smelkov <kirr@nexedi.com> Cc: Li Zefan <lizefan@huawei.com> Cc: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Zefan Li <lizefan@huawei.com> Cc: pi3orama@163.com Link: http://lkml.kernel.org/r/1456132275-98875-4-git-send-email-wangnan0@huawei.com Signed-off-by: He Kuang <hekuang@huawei.com> [ Changes "maps:my_map.value" to "map:my_map.value", improved error messages ] Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2016-02-22 16:10:30 +07:00
return -BPF_LOADER_ERRNO__OBJCONF_MAP_VALUE;
}
return __bpf_map__config_value(map, term);
}
perf tools: Enable passing event to BPF object A new syntax is added to the parser so that the user can access predefined perf events in BPF objects. After this patch, BPF programs for perf are finally able to utilize bpf_perf_event_read() introduced in commit 35578d798400 ("bpf: Implement function bpf_perf_event_read() that get the selected hardware PMU counter"). Test result: # cat test_bpf_map_2.c /************************ BEGIN **************************/ #include <uapi/linux/bpf.h> #define SEC(NAME) __attribute__((section(NAME), used)) struct bpf_map_def { unsigned int type; unsigned int key_size; unsigned int value_size; unsigned int max_entries; }; static int (*trace_printk)(const char *fmt, int fmt_size, ...) = (void *)BPF_FUNC_trace_printk; static int (*get_smp_processor_id)(void) = (void *)BPF_FUNC_get_smp_processor_id; static int (*perf_event_read)(struct bpf_map_def *, int) = (void *)BPF_FUNC_perf_event_read; struct bpf_map_def SEC("maps") pmu_map = { .type = BPF_MAP_TYPE_PERF_EVENT_ARRAY, .key_size = sizeof(int), .value_size = sizeof(int), .max_entries = __NR_CPUS__, }; SEC("func_write=sys_write") int func_write(void *ctx) { unsigned long long val; char fmt[] = "sys_write: pmu=%llu\n"; val = perf_event_read(&pmu_map, get_smp_processor_id()); trace_printk(fmt, sizeof(fmt), val); return 0; } SEC("func_write_return=sys_write%return") int func_write_return(void *ctx) { unsigned long long val = 0; char fmt[] = "sys_write_return: pmu=%llu\n"; val = perf_event_read(&pmu_map, get_smp_processor_id()); trace_printk(fmt, sizeof(fmt), val); return 0; } char _license[] SEC("license") = "GPL"; int _version SEC("version") = LINUX_VERSION_CODE; /************************* END ***************************/ Normal case: # echo "" > /sys/kernel/debug/tracing/trace # perf record -i -e cycles -e './test_bpf_map_2.c/map:pmu_map.event=cycles/' ls / [SNIP] [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.013 MB perf.data (7 samples) ] # cat /sys/kernel/debug/tracing/trace | grep ls ls-17066 [000] d... 938449.863301: : sys_write: pmu=1157327 ls-17066 [000] dN.. 938449.863342: : sys_write_return: pmu=1225218 ls-17066 [000] d... 938449.863349: : sys_write: pmu=1241922 ls-17066 [000] dN.. 938449.863369: : sys_write_return: pmu=1267445 Normal case (system wide): # echo "" > /sys/kernel/debug/tracing/trace # perf record -i -e cycles -e './test_bpf_map_2.c/map:pmu_map.event=cycles/' -a ^C[ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.811 MB perf.data (120 samples) ] # cat /sys/kernel/debug/tracing/trace | grep -v '18446744073709551594' | grep -v perf | head -n 20 [SNIP] # TASK-PID CPU# |||| TIMESTAMP FUNCTION # | | | |||| | | gmain-30828 [002] d... 2740551.068992: : sys_write: pmu=84373 gmain-30828 [002] d... 2740551.068992: : sys_write_return: pmu=87696 gmain-30828 [002] d... 2740551.068996: : sys_write: pmu=100658 gmain-30828 [002] d... 2740551.068997: : sys_write_return: pmu=102572 Error case 1: # perf record -e './test_bpf_map_2.c' ls / [SNIP] [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.014 MB perf.data ] # cat /sys/kernel/debug/tracing/trace | grep ls ls-17115 [007] d... 2724279.665625: : sys_write: pmu=18446744073709551614 ls-17115 [007] dN.. 2724279.665651: : sys_write_return: pmu=18446744073709551614 ls-17115 [007] d... 2724279.665658: : sys_write: pmu=18446744073709551614 ls-17115 [007] dN.. 2724279.665677: : sys_write_return: pmu=18446744073709551614 (18446744073709551614 is 0xfffffffffffffffe (-2)) Error case 2: # perf record -e cycles -e './test_bpf_map_2.c/map:pmu_map.event=evt/' -a event syntax error: '..ps:pmu_map.event=evt/' \___ Event not found for map setting Hint: Valid config terms: map:[<arraymap>].value=[value] map:[<eventmap>].event=[event] [SNIP] Error case 3: # ls /proc/2348/task/ 2348 2505 2506 2507 2508 # perf record -i -e cycles -e './test_bpf_map_2.c/map:pmu_map.event=cycles/' -p 2348 ERROR: Apply config to BPF failed: Cannot set event to BPF map in multi-thread tracing Error case 4: # perf record -e cycles -e './test_bpf_map_2.c/map:pmu_map.event=cycles/' ls / ERROR: Apply config to BPF failed: Doesn't support inherit event (Hint: use -i to turn off inherit) Error case 5: # perf record -i -e raw_syscalls:sys_enter -e './test_bpf_map_2.c/map:pmu_map.event=raw_syscalls:sys_enter/' ls ERROR: Apply config to BPF failed: Can only put raw, hardware and BPF output event into a BPF map Error case 6: # perf record -i -e './test_bpf_map_2.c/map:pmu_map.event=123/' ls / event syntax error: '.._map.event=123/' \___ Incorrect value type for map [SNIP] Signed-off-by: Wang Nan <wangnan0@huawei.com> Acked-by: Jiri Olsa <jolsa@kernel.org> Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Adrian Hunter <adrian.hunter@intel.com> Cc: Alexei Starovoitov <ast@kernel.org> Cc: Brendan Gregg <brendan.d.gregg@gmail.com> Cc: Cody P Schafer <dev@codyps.com> Cc: He Kuang <hekuang@huawei.com> Cc: Jeremie Galarneau <jeremie.galarneau@efficios.com> Cc: Kirill Smelkov <kirr@nexedi.com> Cc: Li Zefan <lizefan@huawei.com> Cc: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Zefan Li <lizefan@huawei.com> Cc: pi3orama@163.com Link: http://lkml.kernel.org/r/1456132275-98875-7-git-send-email-wangnan0@huawei.com Signed-off-by: He Kuang <hekuang@huawei.com> Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2016-02-22 16:10:33 +07:00
static int
__bpf_map__config_event(struct bpf_map *map,
struct parse_events_term *term,
struct perf_evlist *evlist)
{
struct perf_evsel *evsel;
struct bpf_map_def def;
struct bpf_map_op *op;
const char *map_name;
int err;
map_name = bpf_map__get_name(map);
evsel = perf_evlist__find_evsel_by_str(evlist, term->val.str);
if (!evsel) {
pr_debug("Event (for '%s') '%s' doesn't exist\n",
map_name, term->val.str);
return -BPF_LOADER_ERRNO__OBJCONF_MAP_NOEVT;
}
err = bpf_map__get_def(map, &def);
if (err) {
pr_debug("Unable to get map definition from '%s'\n",
map_name);
return err;
}
/*
* No need to check key_size and value_size:
* kernel has already checked them.
*/
if (def.type != BPF_MAP_TYPE_PERF_EVENT_ARRAY) {
pr_debug("Map %s type is not BPF_MAP_TYPE_PERF_EVENT_ARRAY\n",
map_name);
return -BPF_LOADER_ERRNO__OBJCONF_MAP_TYPE;
}
2016-02-22 16:10:34 +07:00
op = bpf_map__add_newop(map, term);
perf tools: Enable passing event to BPF object A new syntax is added to the parser so that the user can access predefined perf events in BPF objects. After this patch, BPF programs for perf are finally able to utilize bpf_perf_event_read() introduced in commit 35578d798400 ("bpf: Implement function bpf_perf_event_read() that get the selected hardware PMU counter"). Test result: # cat test_bpf_map_2.c /************************ BEGIN **************************/ #include <uapi/linux/bpf.h> #define SEC(NAME) __attribute__((section(NAME), used)) struct bpf_map_def { unsigned int type; unsigned int key_size; unsigned int value_size; unsigned int max_entries; }; static int (*trace_printk)(const char *fmt, int fmt_size, ...) = (void *)BPF_FUNC_trace_printk; static int (*get_smp_processor_id)(void) = (void *)BPF_FUNC_get_smp_processor_id; static int (*perf_event_read)(struct bpf_map_def *, int) = (void *)BPF_FUNC_perf_event_read; struct bpf_map_def SEC("maps") pmu_map = { .type = BPF_MAP_TYPE_PERF_EVENT_ARRAY, .key_size = sizeof(int), .value_size = sizeof(int), .max_entries = __NR_CPUS__, }; SEC("func_write=sys_write") int func_write(void *ctx) { unsigned long long val; char fmt[] = "sys_write: pmu=%llu\n"; val = perf_event_read(&pmu_map, get_smp_processor_id()); trace_printk(fmt, sizeof(fmt), val); return 0; } SEC("func_write_return=sys_write%return") int func_write_return(void *ctx) { unsigned long long val = 0; char fmt[] = "sys_write_return: pmu=%llu\n"; val = perf_event_read(&pmu_map, get_smp_processor_id()); trace_printk(fmt, sizeof(fmt), val); return 0; } char _license[] SEC("license") = "GPL"; int _version SEC("version") = LINUX_VERSION_CODE; /************************* END ***************************/ Normal case: # echo "" > /sys/kernel/debug/tracing/trace # perf record -i -e cycles -e './test_bpf_map_2.c/map:pmu_map.event=cycles/' ls / [SNIP] [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.013 MB perf.data (7 samples) ] # cat /sys/kernel/debug/tracing/trace | grep ls ls-17066 [000] d... 938449.863301: : sys_write: pmu=1157327 ls-17066 [000] dN.. 938449.863342: : sys_write_return: pmu=1225218 ls-17066 [000] d... 938449.863349: : sys_write: pmu=1241922 ls-17066 [000] dN.. 938449.863369: : sys_write_return: pmu=1267445 Normal case (system wide): # echo "" > /sys/kernel/debug/tracing/trace # perf record -i -e cycles -e './test_bpf_map_2.c/map:pmu_map.event=cycles/' -a ^C[ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.811 MB perf.data (120 samples) ] # cat /sys/kernel/debug/tracing/trace | grep -v '18446744073709551594' | grep -v perf | head -n 20 [SNIP] # TASK-PID CPU# |||| TIMESTAMP FUNCTION # | | | |||| | | gmain-30828 [002] d... 2740551.068992: : sys_write: pmu=84373 gmain-30828 [002] d... 2740551.068992: : sys_write_return: pmu=87696 gmain-30828 [002] d... 2740551.068996: : sys_write: pmu=100658 gmain-30828 [002] d... 2740551.068997: : sys_write_return: pmu=102572 Error case 1: # perf record -e './test_bpf_map_2.c' ls / [SNIP] [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.014 MB perf.data ] # cat /sys/kernel/debug/tracing/trace | grep ls ls-17115 [007] d... 2724279.665625: : sys_write: pmu=18446744073709551614 ls-17115 [007] dN.. 2724279.665651: : sys_write_return: pmu=18446744073709551614 ls-17115 [007] d... 2724279.665658: : sys_write: pmu=18446744073709551614 ls-17115 [007] dN.. 2724279.665677: : sys_write_return: pmu=18446744073709551614 (18446744073709551614 is 0xfffffffffffffffe (-2)) Error case 2: # perf record -e cycles -e './test_bpf_map_2.c/map:pmu_map.event=evt/' -a event syntax error: '..ps:pmu_map.event=evt/' \___ Event not found for map setting Hint: Valid config terms: map:[<arraymap>].value=[value] map:[<eventmap>].event=[event] [SNIP] Error case 3: # ls /proc/2348/task/ 2348 2505 2506 2507 2508 # perf record -i -e cycles -e './test_bpf_map_2.c/map:pmu_map.event=cycles/' -p 2348 ERROR: Apply config to BPF failed: Cannot set event to BPF map in multi-thread tracing Error case 4: # perf record -e cycles -e './test_bpf_map_2.c/map:pmu_map.event=cycles/' ls / ERROR: Apply config to BPF failed: Doesn't support inherit event (Hint: use -i to turn off inherit) Error case 5: # perf record -i -e raw_syscalls:sys_enter -e './test_bpf_map_2.c/map:pmu_map.event=raw_syscalls:sys_enter/' ls ERROR: Apply config to BPF failed: Can only put raw, hardware and BPF output event into a BPF map Error case 6: # perf record -i -e './test_bpf_map_2.c/map:pmu_map.event=123/' ls / event syntax error: '.._map.event=123/' \___ Incorrect value type for map [SNIP] Signed-off-by: Wang Nan <wangnan0@huawei.com> Acked-by: Jiri Olsa <jolsa@kernel.org> Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Adrian Hunter <adrian.hunter@intel.com> Cc: Alexei Starovoitov <ast@kernel.org> Cc: Brendan Gregg <brendan.d.gregg@gmail.com> Cc: Cody P Schafer <dev@codyps.com> Cc: He Kuang <hekuang@huawei.com> Cc: Jeremie Galarneau <jeremie.galarneau@efficios.com> Cc: Kirill Smelkov <kirr@nexedi.com> Cc: Li Zefan <lizefan@huawei.com> Cc: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Zefan Li <lizefan@huawei.com> Cc: pi3orama@163.com Link: http://lkml.kernel.org/r/1456132275-98875-7-git-send-email-wangnan0@huawei.com Signed-off-by: He Kuang <hekuang@huawei.com> Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2016-02-22 16:10:33 +07:00
if (IS_ERR(op))
return PTR_ERR(op);
op->op_type = BPF_MAP_OP_SET_EVSEL;
op->v.evsel = evsel;
return 0;
}
static int
bpf_map__config_event(struct bpf_map *map,
struct parse_events_term *term,
struct perf_evlist *evlist)
{
if (!term->err_val) {
pr_debug("Config value not set\n");
return -BPF_LOADER_ERRNO__OBJCONF_CONF;
}
if (term->type_val != PARSE_EVENTS__TERM_TYPE_STR) {
pr_debug("ERROR: wrong value type for 'event'\n");
return -BPF_LOADER_ERRNO__OBJCONF_MAP_VALUE;
}
return __bpf_map__config_event(map, term, evlist);
}
perf bpf: Add API to set values to map entries in a bpf object bpf__config_obj() is introduced as a core API to config BPF object after loading. One configuration option of maps is introduced. After this patch BPF object can accept assignments like: map:my_map.value=1234 (map.my_map.value looks pretty. However, there's a small but hard to fix problem related to flex's greedy matching. Please see [1]. Choose ':' to avoid it in a simpler way.) This patch is more complex than the work it does because the consideration of extension. In designing BPF map configuration, the following things should be considered: 1. Array indices selection: perf should allow user setting different value for different slots in an array, with syntax like: map:my_map.value[0,3...6]=1234; 2. A map should be set by different config terms, each for a part of it. For example, set each slot to the pid of a thread; 3. Type of value: integer is not the only valid value type. A perf counter can also be put into a map after commit 35578d798400 ("bpf: Implement function bpf_perf_event_read() that get the selected hardware PMU counter") 4. For a hash table, it should be possible to use a string or other value as a key; 5. It is possible that map configuration is unable to be setup during parsing. A perf counter is an example. Therefore, this patch does the following: 1. Instead of updating map element during parsing, this patch stores map config options in 'struct bpf_map_priv'. Following patches will apply those configs at an appropriate time; 2. Link map operations in a list so a map can have multiple config terms attached, so different parts can be configured separately; 3. Make 'struct bpf_map_priv' extensible so that the following patches can add new types of keys and operations; 4. Use bpf_obj_config__map_funcs array to support more map config options. Since the patch changing the event parser to parse BPF object config is relative large, I've put it in another commit. Code in this patch can be tested after applying the next patch. [1] http://lkml.kernel.org/g/564ED621.4050500@huawei.com Signed-off-by: Wang Nan <wangnan0@huawei.com> Cc: Adrian Hunter <adrian.hunter@intel.com> Cc: Alexei Starovoitov <ast@kernel.org> Cc: Brendan Gregg <brendan.d.gregg@gmail.com> Cc: Cody P Schafer <dev@codyps.com> Cc: He Kuang <hekuang@huawei.com> Cc: Jeremie Galarneau <jeremie.galarneau@efficios.com> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Kirill Smelkov <kirr@nexedi.com> Cc: Li Zefan <lizefan@huawei.com> Cc: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Zefan Li <lizefan@huawei.com> Cc: pi3orama@163.com Link: http://lkml.kernel.org/r/1456132275-98875-4-git-send-email-wangnan0@huawei.com Signed-off-by: He Kuang <hekuang@huawei.com> [ Changes "maps:my_map.value" to "map:my_map.value", improved error messages ] Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2016-02-22 16:10:30 +07:00
struct bpf_obj_config__map_func {
const char *config_opt;
int (*config_func)(struct bpf_map *, struct parse_events_term *,
struct perf_evlist *);
};
struct bpf_obj_config__map_func bpf_obj_config__map_funcs[] = {
{"value", bpf_map__config_value},
perf tools: Enable passing event to BPF object A new syntax is added to the parser so that the user can access predefined perf events in BPF objects. After this patch, BPF programs for perf are finally able to utilize bpf_perf_event_read() introduced in commit 35578d798400 ("bpf: Implement function bpf_perf_event_read() that get the selected hardware PMU counter"). Test result: # cat test_bpf_map_2.c /************************ BEGIN **************************/ #include <uapi/linux/bpf.h> #define SEC(NAME) __attribute__((section(NAME), used)) struct bpf_map_def { unsigned int type; unsigned int key_size; unsigned int value_size; unsigned int max_entries; }; static int (*trace_printk)(const char *fmt, int fmt_size, ...) = (void *)BPF_FUNC_trace_printk; static int (*get_smp_processor_id)(void) = (void *)BPF_FUNC_get_smp_processor_id; static int (*perf_event_read)(struct bpf_map_def *, int) = (void *)BPF_FUNC_perf_event_read; struct bpf_map_def SEC("maps") pmu_map = { .type = BPF_MAP_TYPE_PERF_EVENT_ARRAY, .key_size = sizeof(int), .value_size = sizeof(int), .max_entries = __NR_CPUS__, }; SEC("func_write=sys_write") int func_write(void *ctx) { unsigned long long val; char fmt[] = "sys_write: pmu=%llu\n"; val = perf_event_read(&pmu_map, get_smp_processor_id()); trace_printk(fmt, sizeof(fmt), val); return 0; } SEC("func_write_return=sys_write%return") int func_write_return(void *ctx) { unsigned long long val = 0; char fmt[] = "sys_write_return: pmu=%llu\n"; val = perf_event_read(&pmu_map, get_smp_processor_id()); trace_printk(fmt, sizeof(fmt), val); return 0; } char _license[] SEC("license") = "GPL"; int _version SEC("version") = LINUX_VERSION_CODE; /************************* END ***************************/ Normal case: # echo "" > /sys/kernel/debug/tracing/trace # perf record -i -e cycles -e './test_bpf_map_2.c/map:pmu_map.event=cycles/' ls / [SNIP] [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.013 MB perf.data (7 samples) ] # cat /sys/kernel/debug/tracing/trace | grep ls ls-17066 [000] d... 938449.863301: : sys_write: pmu=1157327 ls-17066 [000] dN.. 938449.863342: : sys_write_return: pmu=1225218 ls-17066 [000] d... 938449.863349: : sys_write: pmu=1241922 ls-17066 [000] dN.. 938449.863369: : sys_write_return: pmu=1267445 Normal case (system wide): # echo "" > /sys/kernel/debug/tracing/trace # perf record -i -e cycles -e './test_bpf_map_2.c/map:pmu_map.event=cycles/' -a ^C[ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.811 MB perf.data (120 samples) ] # cat /sys/kernel/debug/tracing/trace | grep -v '18446744073709551594' | grep -v perf | head -n 20 [SNIP] # TASK-PID CPU# |||| TIMESTAMP FUNCTION # | | | |||| | | gmain-30828 [002] d... 2740551.068992: : sys_write: pmu=84373 gmain-30828 [002] d... 2740551.068992: : sys_write_return: pmu=87696 gmain-30828 [002] d... 2740551.068996: : sys_write: pmu=100658 gmain-30828 [002] d... 2740551.068997: : sys_write_return: pmu=102572 Error case 1: # perf record -e './test_bpf_map_2.c' ls / [SNIP] [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.014 MB perf.data ] # cat /sys/kernel/debug/tracing/trace | grep ls ls-17115 [007] d... 2724279.665625: : sys_write: pmu=18446744073709551614 ls-17115 [007] dN.. 2724279.665651: : sys_write_return: pmu=18446744073709551614 ls-17115 [007] d... 2724279.665658: : sys_write: pmu=18446744073709551614 ls-17115 [007] dN.. 2724279.665677: : sys_write_return: pmu=18446744073709551614 (18446744073709551614 is 0xfffffffffffffffe (-2)) Error case 2: # perf record -e cycles -e './test_bpf_map_2.c/map:pmu_map.event=evt/' -a event syntax error: '..ps:pmu_map.event=evt/' \___ Event not found for map setting Hint: Valid config terms: map:[<arraymap>].value=[value] map:[<eventmap>].event=[event] [SNIP] Error case 3: # ls /proc/2348/task/ 2348 2505 2506 2507 2508 # perf record -i -e cycles -e './test_bpf_map_2.c/map:pmu_map.event=cycles/' -p 2348 ERROR: Apply config to BPF failed: Cannot set event to BPF map in multi-thread tracing Error case 4: # perf record -e cycles -e './test_bpf_map_2.c/map:pmu_map.event=cycles/' ls / ERROR: Apply config to BPF failed: Doesn't support inherit event (Hint: use -i to turn off inherit) Error case 5: # perf record -i -e raw_syscalls:sys_enter -e './test_bpf_map_2.c/map:pmu_map.event=raw_syscalls:sys_enter/' ls ERROR: Apply config to BPF failed: Can only put raw, hardware and BPF output event into a BPF map Error case 6: # perf record -i -e './test_bpf_map_2.c/map:pmu_map.event=123/' ls / event syntax error: '.._map.event=123/' \___ Incorrect value type for map [SNIP] Signed-off-by: Wang Nan <wangnan0@huawei.com> Acked-by: Jiri Olsa <jolsa@kernel.org> Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Adrian Hunter <adrian.hunter@intel.com> Cc: Alexei Starovoitov <ast@kernel.org> Cc: Brendan Gregg <brendan.d.gregg@gmail.com> Cc: Cody P Schafer <dev@codyps.com> Cc: He Kuang <hekuang@huawei.com> Cc: Jeremie Galarneau <jeremie.galarneau@efficios.com> Cc: Kirill Smelkov <kirr@nexedi.com> Cc: Li Zefan <lizefan@huawei.com> Cc: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Zefan Li <lizefan@huawei.com> Cc: pi3orama@163.com Link: http://lkml.kernel.org/r/1456132275-98875-7-git-send-email-wangnan0@huawei.com Signed-off-by: He Kuang <hekuang@huawei.com> Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2016-02-22 16:10:33 +07:00
{"event", bpf_map__config_event},
perf bpf: Add API to set values to map entries in a bpf object bpf__config_obj() is introduced as a core API to config BPF object after loading. One configuration option of maps is introduced. After this patch BPF object can accept assignments like: map:my_map.value=1234 (map.my_map.value looks pretty. However, there's a small but hard to fix problem related to flex's greedy matching. Please see [1]. Choose ':' to avoid it in a simpler way.) This patch is more complex than the work it does because the consideration of extension. In designing BPF map configuration, the following things should be considered: 1. Array indices selection: perf should allow user setting different value for different slots in an array, with syntax like: map:my_map.value[0,3...6]=1234; 2. A map should be set by different config terms, each for a part of it. For example, set each slot to the pid of a thread; 3. Type of value: integer is not the only valid value type. A perf counter can also be put into a map after commit 35578d798400 ("bpf: Implement function bpf_perf_event_read() that get the selected hardware PMU counter") 4. For a hash table, it should be possible to use a string or other value as a key; 5. It is possible that map configuration is unable to be setup during parsing. A perf counter is an example. Therefore, this patch does the following: 1. Instead of updating map element during parsing, this patch stores map config options in 'struct bpf_map_priv'. Following patches will apply those configs at an appropriate time; 2. Link map operations in a list so a map can have multiple config terms attached, so different parts can be configured separately; 3. Make 'struct bpf_map_priv' extensible so that the following patches can add new types of keys and operations; 4. Use bpf_obj_config__map_funcs array to support more map config options. Since the patch changing the event parser to parse BPF object config is relative large, I've put it in another commit. Code in this patch can be tested after applying the next patch. [1] http://lkml.kernel.org/g/564ED621.4050500@huawei.com Signed-off-by: Wang Nan <wangnan0@huawei.com> Cc: Adrian Hunter <adrian.hunter@intel.com> Cc: Alexei Starovoitov <ast@kernel.org> Cc: Brendan Gregg <brendan.d.gregg@gmail.com> Cc: Cody P Schafer <dev@codyps.com> Cc: He Kuang <hekuang@huawei.com> Cc: Jeremie Galarneau <jeremie.galarneau@efficios.com> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Kirill Smelkov <kirr@nexedi.com> Cc: Li Zefan <lizefan@huawei.com> Cc: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Zefan Li <lizefan@huawei.com> Cc: pi3orama@163.com Link: http://lkml.kernel.org/r/1456132275-98875-4-git-send-email-wangnan0@huawei.com Signed-off-by: He Kuang <hekuang@huawei.com> [ Changes "maps:my_map.value" to "map:my_map.value", improved error messages ] Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2016-02-22 16:10:30 +07:00
};
2016-02-22 16:10:34 +07:00
static int
config_map_indices_range_check(struct parse_events_term *term,
struct bpf_map *map,
const char *map_name)
{
struct parse_events_array *array = &term->array;
struct bpf_map_def def;
unsigned int i;
int err;
if (!array->nr_ranges)
return 0;
if (!array->ranges) {
pr_debug("ERROR: map %s: array->nr_ranges is %d but range array is NULL\n",
map_name, (int)array->nr_ranges);
return -BPF_LOADER_ERRNO__INTERNAL;
}
err = bpf_map__get_def(map, &def);
if (err) {
pr_debug("ERROR: Unable to get map definition from '%s'\n",
map_name);
return -BPF_LOADER_ERRNO__INTERNAL;
}
for (i = 0; i < array->nr_ranges; i++) {
unsigned int start = array->ranges[i].start;
size_t length = array->ranges[i].length;
unsigned int idx = start + length - 1;
if (idx >= def.max_entries) {
pr_debug("ERROR: index %d too large\n", idx);
return -BPF_LOADER_ERRNO__OBJCONF_MAP_IDX2BIG;
}
}
return 0;
}
perf bpf: Add API to set values to map entries in a bpf object bpf__config_obj() is introduced as a core API to config BPF object after loading. One configuration option of maps is introduced. After this patch BPF object can accept assignments like: map:my_map.value=1234 (map.my_map.value looks pretty. However, there's a small but hard to fix problem related to flex's greedy matching. Please see [1]. Choose ':' to avoid it in a simpler way.) This patch is more complex than the work it does because the consideration of extension. In designing BPF map configuration, the following things should be considered: 1. Array indices selection: perf should allow user setting different value for different slots in an array, with syntax like: map:my_map.value[0,3...6]=1234; 2. A map should be set by different config terms, each for a part of it. For example, set each slot to the pid of a thread; 3. Type of value: integer is not the only valid value type. A perf counter can also be put into a map after commit 35578d798400 ("bpf: Implement function bpf_perf_event_read() that get the selected hardware PMU counter") 4. For a hash table, it should be possible to use a string or other value as a key; 5. It is possible that map configuration is unable to be setup during parsing. A perf counter is an example. Therefore, this patch does the following: 1. Instead of updating map element during parsing, this patch stores map config options in 'struct bpf_map_priv'. Following patches will apply those configs at an appropriate time; 2. Link map operations in a list so a map can have multiple config terms attached, so different parts can be configured separately; 3. Make 'struct bpf_map_priv' extensible so that the following patches can add new types of keys and operations; 4. Use bpf_obj_config__map_funcs array to support more map config options. Since the patch changing the event parser to parse BPF object config is relative large, I've put it in another commit. Code in this patch can be tested after applying the next patch. [1] http://lkml.kernel.org/g/564ED621.4050500@huawei.com Signed-off-by: Wang Nan <wangnan0@huawei.com> Cc: Adrian Hunter <adrian.hunter@intel.com> Cc: Alexei Starovoitov <ast@kernel.org> Cc: Brendan Gregg <brendan.d.gregg@gmail.com> Cc: Cody P Schafer <dev@codyps.com> Cc: He Kuang <hekuang@huawei.com> Cc: Jeremie Galarneau <jeremie.galarneau@efficios.com> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Kirill Smelkov <kirr@nexedi.com> Cc: Li Zefan <lizefan@huawei.com> Cc: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Zefan Li <lizefan@huawei.com> Cc: pi3orama@163.com Link: http://lkml.kernel.org/r/1456132275-98875-4-git-send-email-wangnan0@huawei.com Signed-off-by: He Kuang <hekuang@huawei.com> [ Changes "maps:my_map.value" to "map:my_map.value", improved error messages ] Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2016-02-22 16:10:30 +07:00
static int
bpf__obj_config_map(struct bpf_object *obj,
struct parse_events_term *term,
struct perf_evlist *evlist,
int *key_scan_pos)
{
/* key is "map:<mapname>.<config opt>" */
char *map_name = strdup(term->config + sizeof("map:") - 1);
struct bpf_map *map;
int err = -BPF_LOADER_ERRNO__OBJCONF_OPT;
char *map_opt;
size_t i;
if (!map_name)
return -ENOMEM;
map_opt = strchr(map_name, '.');
if (!map_opt) {
pr_debug("ERROR: Invalid map config: %s\n", map_name);
goto out;
}
*map_opt++ = '\0';
if (*map_opt == '\0') {
pr_debug("ERROR: Invalid map option: %s\n", term->config);
goto out;
}
map = bpf_object__get_map_by_name(obj, map_name);
if (!map) {
pr_debug("ERROR: Map %s doesn't exist\n", map_name);
err = -BPF_LOADER_ERRNO__OBJCONF_MAP_NOTEXIST;
goto out;
}
2016-02-22 16:10:34 +07:00
*key_scan_pos += strlen(map_opt);
err = config_map_indices_range_check(term, map, map_name);
if (err)
goto out;
*key_scan_pos -= strlen(map_opt);
perf bpf: Add API to set values to map entries in a bpf object bpf__config_obj() is introduced as a core API to config BPF object after loading. One configuration option of maps is introduced. After this patch BPF object can accept assignments like: map:my_map.value=1234 (map.my_map.value looks pretty. However, there's a small but hard to fix problem related to flex's greedy matching. Please see [1]. Choose ':' to avoid it in a simpler way.) This patch is more complex than the work it does because the consideration of extension. In designing BPF map configuration, the following things should be considered: 1. Array indices selection: perf should allow user setting different value for different slots in an array, with syntax like: map:my_map.value[0,3...6]=1234; 2. A map should be set by different config terms, each for a part of it. For example, set each slot to the pid of a thread; 3. Type of value: integer is not the only valid value type. A perf counter can also be put into a map after commit 35578d798400 ("bpf: Implement function bpf_perf_event_read() that get the selected hardware PMU counter") 4. For a hash table, it should be possible to use a string or other value as a key; 5. It is possible that map configuration is unable to be setup during parsing. A perf counter is an example. Therefore, this patch does the following: 1. Instead of updating map element during parsing, this patch stores map config options in 'struct bpf_map_priv'. Following patches will apply those configs at an appropriate time; 2. Link map operations in a list so a map can have multiple config terms attached, so different parts can be configured separately; 3. Make 'struct bpf_map_priv' extensible so that the following patches can add new types of keys and operations; 4. Use bpf_obj_config__map_funcs array to support more map config options. Since the patch changing the event parser to parse BPF object config is relative large, I've put it in another commit. Code in this patch can be tested after applying the next patch. [1] http://lkml.kernel.org/g/564ED621.4050500@huawei.com Signed-off-by: Wang Nan <wangnan0@huawei.com> Cc: Adrian Hunter <adrian.hunter@intel.com> Cc: Alexei Starovoitov <ast@kernel.org> Cc: Brendan Gregg <brendan.d.gregg@gmail.com> Cc: Cody P Schafer <dev@codyps.com> Cc: He Kuang <hekuang@huawei.com> Cc: Jeremie Galarneau <jeremie.galarneau@efficios.com> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Kirill Smelkov <kirr@nexedi.com> Cc: Li Zefan <lizefan@huawei.com> Cc: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Zefan Li <lizefan@huawei.com> Cc: pi3orama@163.com Link: http://lkml.kernel.org/r/1456132275-98875-4-git-send-email-wangnan0@huawei.com Signed-off-by: He Kuang <hekuang@huawei.com> [ Changes "maps:my_map.value" to "map:my_map.value", improved error messages ] Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2016-02-22 16:10:30 +07:00
for (i = 0; i < ARRAY_SIZE(bpf_obj_config__map_funcs); i++) {
struct bpf_obj_config__map_func *func =
&bpf_obj_config__map_funcs[i];
if (strcmp(map_opt, func->config_opt) == 0) {
err = func->config_func(map, term, evlist);
goto out;
}
}
pr_debug("ERROR: Invalid map config option '%s'\n", map_opt);
err = -BPF_LOADER_ERRNO__OBJCONF_MAP_OPT;
out:
free(map_name);
if (!err)
key_scan_pos += strlen(map_opt);
return err;
}
int bpf__config_obj(struct bpf_object *obj,
struct parse_events_term *term,
struct perf_evlist *evlist,
int *error_pos)
{
int key_scan_pos = 0;
int err;
if (!obj || !term || !term->config)
return -EINVAL;
if (!prefixcmp(term->config, "map:")) {
key_scan_pos = sizeof("map:") - 1;
err = bpf__obj_config_map(obj, term, evlist, &key_scan_pos);
goto out;
}
err = -BPF_LOADER_ERRNO__OBJCONF_OPT;
out:
if (error_pos)
*error_pos = key_scan_pos;
return err;
}
perf record: Apply config to BPF objects before recording bpf__apply_obj_config() is introduced as the core API to apply object config options to all BPF objects. This patch also does the real work for setting values for BPF_MAP_TYPE_PERF_ARRAY maps by inserting value stored in map's private field into the BPF map. This patch is required because we are not always able to set all BPF config during parsing. Further patch will set events created by perf to BPF_MAP_TYPE_PERF_EVENT_ARRAY maps, which is not exist until perf_evsel__open(). bpf_map_foreach_key() is introduced to iterate over each key needs to be configured. This function would be extended to support more map types and different key settings. In perf record, before start recording, call bpf__apply_config() to turn on all BPF config options. Test result: # cat ./test_bpf_map_1.c /************************ BEGIN **************************/ #include <uapi/linux/bpf.h> #define SEC(NAME) __attribute__((section(NAME), used)) struct bpf_map_def { unsigned int type; unsigned int key_size; unsigned int value_size; unsigned int max_entries; }; static void *(*map_lookup_elem)(struct bpf_map_def *, void *) = (void *)BPF_FUNC_map_lookup_elem; static int (*trace_printk)(const char *fmt, int fmt_size, ...) = (void *)BPF_FUNC_trace_printk; struct bpf_map_def SEC("maps") channel = { .type = BPF_MAP_TYPE_ARRAY, .key_size = sizeof(int), .value_size = sizeof(int), .max_entries = 1, }; SEC("func=sys_nanosleep") int func(void *ctx) { int key = 0; char fmt[] = "%d\n"; int *pval = map_lookup_elem(&channel, &key); if (!pval) return 0; trace_printk(fmt, sizeof(fmt), *pval); return 0; } char _license[] SEC("license") = "GPL"; int _version SEC("version") = LINUX_VERSION_CODE; /************************* END ***************************/ # echo "" > /sys/kernel/debug/tracing/trace # ./perf record -e './test_bpf_map_1.c/map:channel.value=11/' usleep 10 [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.012 MB perf.data ] # cat /sys/kernel/debug/tracing/trace # tracer: nop # # entries-in-buffer/entries-written: 1/1 #P:8 [SNIP] # TASK-PID CPU# |||| TIMESTAMP FUNCTION # | | | |||| | | usleep-18593 [007] d... 2394714.395539: : 11 # ./perf record -e './test_bpf_map_1.c/map:channel.value=101/' usleep 10 [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.012 MB perf.data ] # cat /sys/kernel/debug/tracing/trace # tracer: nop # # entries-in-buffer/entries-written: 1/1 #P:8 [SNIP] # TASK-PID CPU# |||| TIMESTAMP FUNCTION # | | | |||| | | usleep-18593 [007] d... 2394714.395539: : 11 usleep-19000 [006] d... 2394831.057840: : 101 Signed-off-by: Wang Nan <wangnan0@huawei.com> Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Adrian Hunter <adrian.hunter@intel.com> Cc: Alexei Starovoitov <ast@kernel.org> Cc: Brendan Gregg <brendan.d.gregg@gmail.com> Cc: Cody P Schafer <dev@codyps.com> Cc: He Kuang <hekuang@huawei.com> Cc: Jeremie Galarneau <jeremie.galarneau@efficios.com> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Kirill Smelkov <kirr@nexedi.com> Cc: Li Zefan <lizefan@huawei.com> Cc: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Zefan Li <lizefan@huawei.com> Cc: pi3orama@163.com Link: http://lkml.kernel.org/r/1456132275-98875-6-git-send-email-wangnan0@huawei.com Signed-off-by: He Kuang <hekuang@huawei.com> Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2016-02-22 16:10:32 +07:00
typedef int (*map_config_func_t)(const char *name, int map_fd,
struct bpf_map_def *pdef,
struct bpf_map_op *op,
void *pkey, void *arg);
static int
foreach_key_array_all(map_config_func_t func,
void *arg, const char *name,
int map_fd, struct bpf_map_def *pdef,
struct bpf_map_op *op)
{
unsigned int i;
int err;
for (i = 0; i < pdef->max_entries; i++) {
err = func(name, map_fd, pdef, op, &i, arg);
if (err) {
pr_debug("ERROR: failed to insert value to %s[%u]\n",
name, i);
return err;
}
}
return 0;
}
2016-02-22 16:10:34 +07:00
static int
foreach_key_array_ranges(map_config_func_t func, void *arg,
const char *name, int map_fd,
struct bpf_map_def *pdef,
struct bpf_map_op *op)
{
unsigned int i, j;
int err;
for (i = 0; i < op->k.array.nr_ranges; i++) {
unsigned int start = op->k.array.ranges[i].start;
size_t length = op->k.array.ranges[i].length;
for (j = 0; j < length; j++) {
unsigned int idx = start + j;
err = func(name, map_fd, pdef, op, &idx, arg);
if (err) {
pr_debug("ERROR: failed to insert value to %s[%u]\n",
name, idx);
return err;
}
}
}
return 0;
}
perf record: Apply config to BPF objects before recording bpf__apply_obj_config() is introduced as the core API to apply object config options to all BPF objects. This patch also does the real work for setting values for BPF_MAP_TYPE_PERF_ARRAY maps by inserting value stored in map's private field into the BPF map. This patch is required because we are not always able to set all BPF config during parsing. Further patch will set events created by perf to BPF_MAP_TYPE_PERF_EVENT_ARRAY maps, which is not exist until perf_evsel__open(). bpf_map_foreach_key() is introduced to iterate over each key needs to be configured. This function would be extended to support more map types and different key settings. In perf record, before start recording, call bpf__apply_config() to turn on all BPF config options. Test result: # cat ./test_bpf_map_1.c /************************ BEGIN **************************/ #include <uapi/linux/bpf.h> #define SEC(NAME) __attribute__((section(NAME), used)) struct bpf_map_def { unsigned int type; unsigned int key_size; unsigned int value_size; unsigned int max_entries; }; static void *(*map_lookup_elem)(struct bpf_map_def *, void *) = (void *)BPF_FUNC_map_lookup_elem; static int (*trace_printk)(const char *fmt, int fmt_size, ...) = (void *)BPF_FUNC_trace_printk; struct bpf_map_def SEC("maps") channel = { .type = BPF_MAP_TYPE_ARRAY, .key_size = sizeof(int), .value_size = sizeof(int), .max_entries = 1, }; SEC("func=sys_nanosleep") int func(void *ctx) { int key = 0; char fmt[] = "%d\n"; int *pval = map_lookup_elem(&channel, &key); if (!pval) return 0; trace_printk(fmt, sizeof(fmt), *pval); return 0; } char _license[] SEC("license") = "GPL"; int _version SEC("version") = LINUX_VERSION_CODE; /************************* END ***************************/ # echo "" > /sys/kernel/debug/tracing/trace # ./perf record -e './test_bpf_map_1.c/map:channel.value=11/' usleep 10 [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.012 MB perf.data ] # cat /sys/kernel/debug/tracing/trace # tracer: nop # # entries-in-buffer/entries-written: 1/1 #P:8 [SNIP] # TASK-PID CPU# |||| TIMESTAMP FUNCTION # | | | |||| | | usleep-18593 [007] d... 2394714.395539: : 11 # ./perf record -e './test_bpf_map_1.c/map:channel.value=101/' usleep 10 [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.012 MB perf.data ] # cat /sys/kernel/debug/tracing/trace # tracer: nop # # entries-in-buffer/entries-written: 1/1 #P:8 [SNIP] # TASK-PID CPU# |||| TIMESTAMP FUNCTION # | | | |||| | | usleep-18593 [007] d... 2394714.395539: : 11 usleep-19000 [006] d... 2394831.057840: : 101 Signed-off-by: Wang Nan <wangnan0@huawei.com> Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Adrian Hunter <adrian.hunter@intel.com> Cc: Alexei Starovoitov <ast@kernel.org> Cc: Brendan Gregg <brendan.d.gregg@gmail.com> Cc: Cody P Schafer <dev@codyps.com> Cc: He Kuang <hekuang@huawei.com> Cc: Jeremie Galarneau <jeremie.galarneau@efficios.com> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Kirill Smelkov <kirr@nexedi.com> Cc: Li Zefan <lizefan@huawei.com> Cc: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Zefan Li <lizefan@huawei.com> Cc: pi3orama@163.com Link: http://lkml.kernel.org/r/1456132275-98875-6-git-send-email-wangnan0@huawei.com Signed-off-by: He Kuang <hekuang@huawei.com> Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2016-02-22 16:10:32 +07:00
static int
bpf_map_config_foreach_key(struct bpf_map *map,
map_config_func_t func,
void *arg)
{
int err, map_fd;
const char *name;
struct bpf_map_op *op;
struct bpf_map_def def;
struct bpf_map_priv *priv;
name = bpf_map__get_name(map);
err = bpf_map__get_private(map, (void **)&priv);
if (err) {
pr_debug("ERROR: failed to get private from map %s\n", name);
return -BPF_LOADER_ERRNO__INTERNAL;
}
if (!priv || list_empty(&priv->ops_list)) {
pr_debug("INFO: nothing to config for map %s\n", name);
return 0;
}
err = bpf_map__get_def(map, &def);
if (err) {
pr_debug("ERROR: failed to get definition from map %s\n", name);
return -BPF_LOADER_ERRNO__INTERNAL;
}
map_fd = bpf_map__get_fd(map);
if (map_fd < 0) {
pr_debug("ERROR: failed to get fd from map %s\n", name);
return map_fd;
}
list_for_each_entry(op, &priv->ops_list, list) {
switch (def.type) {
case BPF_MAP_TYPE_ARRAY:
perf tools: Enable passing event to BPF object A new syntax is added to the parser so that the user can access predefined perf events in BPF objects. After this patch, BPF programs for perf are finally able to utilize bpf_perf_event_read() introduced in commit 35578d798400 ("bpf: Implement function bpf_perf_event_read() that get the selected hardware PMU counter"). Test result: # cat test_bpf_map_2.c /************************ BEGIN **************************/ #include <uapi/linux/bpf.h> #define SEC(NAME) __attribute__((section(NAME), used)) struct bpf_map_def { unsigned int type; unsigned int key_size; unsigned int value_size; unsigned int max_entries; }; static int (*trace_printk)(const char *fmt, int fmt_size, ...) = (void *)BPF_FUNC_trace_printk; static int (*get_smp_processor_id)(void) = (void *)BPF_FUNC_get_smp_processor_id; static int (*perf_event_read)(struct bpf_map_def *, int) = (void *)BPF_FUNC_perf_event_read; struct bpf_map_def SEC("maps") pmu_map = { .type = BPF_MAP_TYPE_PERF_EVENT_ARRAY, .key_size = sizeof(int), .value_size = sizeof(int), .max_entries = __NR_CPUS__, }; SEC("func_write=sys_write") int func_write(void *ctx) { unsigned long long val; char fmt[] = "sys_write: pmu=%llu\n"; val = perf_event_read(&pmu_map, get_smp_processor_id()); trace_printk(fmt, sizeof(fmt), val); return 0; } SEC("func_write_return=sys_write%return") int func_write_return(void *ctx) { unsigned long long val = 0; char fmt[] = "sys_write_return: pmu=%llu\n"; val = perf_event_read(&pmu_map, get_smp_processor_id()); trace_printk(fmt, sizeof(fmt), val); return 0; } char _license[] SEC("license") = "GPL"; int _version SEC("version") = LINUX_VERSION_CODE; /************************* END ***************************/ Normal case: # echo "" > /sys/kernel/debug/tracing/trace # perf record -i -e cycles -e './test_bpf_map_2.c/map:pmu_map.event=cycles/' ls / [SNIP] [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.013 MB perf.data (7 samples) ] # cat /sys/kernel/debug/tracing/trace | grep ls ls-17066 [000] d... 938449.863301: : sys_write: pmu=1157327 ls-17066 [000] dN.. 938449.863342: : sys_write_return: pmu=1225218 ls-17066 [000] d... 938449.863349: : sys_write: pmu=1241922 ls-17066 [000] dN.. 938449.863369: : sys_write_return: pmu=1267445 Normal case (system wide): # echo "" > /sys/kernel/debug/tracing/trace # perf record -i -e cycles -e './test_bpf_map_2.c/map:pmu_map.event=cycles/' -a ^C[ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.811 MB perf.data (120 samples) ] # cat /sys/kernel/debug/tracing/trace | grep -v '18446744073709551594' | grep -v perf | head -n 20 [SNIP] # TASK-PID CPU# |||| TIMESTAMP FUNCTION # | | | |||| | | gmain-30828 [002] d... 2740551.068992: : sys_write: pmu=84373 gmain-30828 [002] d... 2740551.068992: : sys_write_return: pmu=87696 gmain-30828 [002] d... 2740551.068996: : sys_write: pmu=100658 gmain-30828 [002] d... 2740551.068997: : sys_write_return: pmu=102572 Error case 1: # perf record -e './test_bpf_map_2.c' ls / [SNIP] [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.014 MB perf.data ] # cat /sys/kernel/debug/tracing/trace | grep ls ls-17115 [007] d... 2724279.665625: : sys_write: pmu=18446744073709551614 ls-17115 [007] dN.. 2724279.665651: : sys_write_return: pmu=18446744073709551614 ls-17115 [007] d... 2724279.665658: : sys_write: pmu=18446744073709551614 ls-17115 [007] dN.. 2724279.665677: : sys_write_return: pmu=18446744073709551614 (18446744073709551614 is 0xfffffffffffffffe (-2)) Error case 2: # perf record -e cycles -e './test_bpf_map_2.c/map:pmu_map.event=evt/' -a event syntax error: '..ps:pmu_map.event=evt/' \___ Event not found for map setting Hint: Valid config terms: map:[<arraymap>].value=[value] map:[<eventmap>].event=[event] [SNIP] Error case 3: # ls /proc/2348/task/ 2348 2505 2506 2507 2508 # perf record -i -e cycles -e './test_bpf_map_2.c/map:pmu_map.event=cycles/' -p 2348 ERROR: Apply config to BPF failed: Cannot set event to BPF map in multi-thread tracing Error case 4: # perf record -e cycles -e './test_bpf_map_2.c/map:pmu_map.event=cycles/' ls / ERROR: Apply config to BPF failed: Doesn't support inherit event (Hint: use -i to turn off inherit) Error case 5: # perf record -i -e raw_syscalls:sys_enter -e './test_bpf_map_2.c/map:pmu_map.event=raw_syscalls:sys_enter/' ls ERROR: Apply config to BPF failed: Can only put raw, hardware and BPF output event into a BPF map Error case 6: # perf record -i -e './test_bpf_map_2.c/map:pmu_map.event=123/' ls / event syntax error: '.._map.event=123/' \___ Incorrect value type for map [SNIP] Signed-off-by: Wang Nan <wangnan0@huawei.com> Acked-by: Jiri Olsa <jolsa@kernel.org> Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Adrian Hunter <adrian.hunter@intel.com> Cc: Alexei Starovoitov <ast@kernel.org> Cc: Brendan Gregg <brendan.d.gregg@gmail.com> Cc: Cody P Schafer <dev@codyps.com> Cc: He Kuang <hekuang@huawei.com> Cc: Jeremie Galarneau <jeremie.galarneau@efficios.com> Cc: Kirill Smelkov <kirr@nexedi.com> Cc: Li Zefan <lizefan@huawei.com> Cc: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Zefan Li <lizefan@huawei.com> Cc: pi3orama@163.com Link: http://lkml.kernel.org/r/1456132275-98875-7-git-send-email-wangnan0@huawei.com Signed-off-by: He Kuang <hekuang@huawei.com> Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2016-02-22 16:10:33 +07:00
case BPF_MAP_TYPE_PERF_EVENT_ARRAY:
perf record: Apply config to BPF objects before recording bpf__apply_obj_config() is introduced as the core API to apply object config options to all BPF objects. This patch also does the real work for setting values for BPF_MAP_TYPE_PERF_ARRAY maps by inserting value stored in map's private field into the BPF map. This patch is required because we are not always able to set all BPF config during parsing. Further patch will set events created by perf to BPF_MAP_TYPE_PERF_EVENT_ARRAY maps, which is not exist until perf_evsel__open(). bpf_map_foreach_key() is introduced to iterate over each key needs to be configured. This function would be extended to support more map types and different key settings. In perf record, before start recording, call bpf__apply_config() to turn on all BPF config options. Test result: # cat ./test_bpf_map_1.c /************************ BEGIN **************************/ #include <uapi/linux/bpf.h> #define SEC(NAME) __attribute__((section(NAME), used)) struct bpf_map_def { unsigned int type; unsigned int key_size; unsigned int value_size; unsigned int max_entries; }; static void *(*map_lookup_elem)(struct bpf_map_def *, void *) = (void *)BPF_FUNC_map_lookup_elem; static int (*trace_printk)(const char *fmt, int fmt_size, ...) = (void *)BPF_FUNC_trace_printk; struct bpf_map_def SEC("maps") channel = { .type = BPF_MAP_TYPE_ARRAY, .key_size = sizeof(int), .value_size = sizeof(int), .max_entries = 1, }; SEC("func=sys_nanosleep") int func(void *ctx) { int key = 0; char fmt[] = "%d\n"; int *pval = map_lookup_elem(&channel, &key); if (!pval) return 0; trace_printk(fmt, sizeof(fmt), *pval); return 0; } char _license[] SEC("license") = "GPL"; int _version SEC("version") = LINUX_VERSION_CODE; /************************* END ***************************/ # echo "" > /sys/kernel/debug/tracing/trace # ./perf record -e './test_bpf_map_1.c/map:channel.value=11/' usleep 10 [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.012 MB perf.data ] # cat /sys/kernel/debug/tracing/trace # tracer: nop # # entries-in-buffer/entries-written: 1/1 #P:8 [SNIP] # TASK-PID CPU# |||| TIMESTAMP FUNCTION # | | | |||| | | usleep-18593 [007] d... 2394714.395539: : 11 # ./perf record -e './test_bpf_map_1.c/map:channel.value=101/' usleep 10 [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.012 MB perf.data ] # cat /sys/kernel/debug/tracing/trace # tracer: nop # # entries-in-buffer/entries-written: 1/1 #P:8 [SNIP] # TASK-PID CPU# |||| TIMESTAMP FUNCTION # | | | |||| | | usleep-18593 [007] d... 2394714.395539: : 11 usleep-19000 [006] d... 2394831.057840: : 101 Signed-off-by: Wang Nan <wangnan0@huawei.com> Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Adrian Hunter <adrian.hunter@intel.com> Cc: Alexei Starovoitov <ast@kernel.org> Cc: Brendan Gregg <brendan.d.gregg@gmail.com> Cc: Cody P Schafer <dev@codyps.com> Cc: He Kuang <hekuang@huawei.com> Cc: Jeremie Galarneau <jeremie.galarneau@efficios.com> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Kirill Smelkov <kirr@nexedi.com> Cc: Li Zefan <lizefan@huawei.com> Cc: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Zefan Li <lizefan@huawei.com> Cc: pi3orama@163.com Link: http://lkml.kernel.org/r/1456132275-98875-6-git-send-email-wangnan0@huawei.com Signed-off-by: He Kuang <hekuang@huawei.com> Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2016-02-22 16:10:32 +07:00
switch (op->key_type) {
case BPF_MAP_KEY_ALL:
err = foreach_key_array_all(func, arg, name,
map_fd, &def, op);
2016-02-22 16:10:34 +07:00
break;
case BPF_MAP_KEY_RANGES:
err = foreach_key_array_ranges(func, arg, name,
map_fd, &def,
op);
perf record: Apply config to BPF objects before recording bpf__apply_obj_config() is introduced as the core API to apply object config options to all BPF objects. This patch also does the real work for setting values for BPF_MAP_TYPE_PERF_ARRAY maps by inserting value stored in map's private field into the BPF map. This patch is required because we are not always able to set all BPF config during parsing. Further patch will set events created by perf to BPF_MAP_TYPE_PERF_EVENT_ARRAY maps, which is not exist until perf_evsel__open(). bpf_map_foreach_key() is introduced to iterate over each key needs to be configured. This function would be extended to support more map types and different key settings. In perf record, before start recording, call bpf__apply_config() to turn on all BPF config options. Test result: # cat ./test_bpf_map_1.c /************************ BEGIN **************************/ #include <uapi/linux/bpf.h> #define SEC(NAME) __attribute__((section(NAME), used)) struct bpf_map_def { unsigned int type; unsigned int key_size; unsigned int value_size; unsigned int max_entries; }; static void *(*map_lookup_elem)(struct bpf_map_def *, void *) = (void *)BPF_FUNC_map_lookup_elem; static int (*trace_printk)(const char *fmt, int fmt_size, ...) = (void *)BPF_FUNC_trace_printk; struct bpf_map_def SEC("maps") channel = { .type = BPF_MAP_TYPE_ARRAY, .key_size = sizeof(int), .value_size = sizeof(int), .max_entries = 1, }; SEC("func=sys_nanosleep") int func(void *ctx) { int key = 0; char fmt[] = "%d\n"; int *pval = map_lookup_elem(&channel, &key); if (!pval) return 0; trace_printk(fmt, sizeof(fmt), *pval); return 0; } char _license[] SEC("license") = "GPL"; int _version SEC("version") = LINUX_VERSION_CODE; /************************* END ***************************/ # echo "" > /sys/kernel/debug/tracing/trace # ./perf record -e './test_bpf_map_1.c/map:channel.value=11/' usleep 10 [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.012 MB perf.data ] # cat /sys/kernel/debug/tracing/trace # tracer: nop # # entries-in-buffer/entries-written: 1/1 #P:8 [SNIP] # TASK-PID CPU# |||| TIMESTAMP FUNCTION # | | | |||| | | usleep-18593 [007] d... 2394714.395539: : 11 # ./perf record -e './test_bpf_map_1.c/map:channel.value=101/' usleep 10 [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.012 MB perf.data ] # cat /sys/kernel/debug/tracing/trace # tracer: nop # # entries-in-buffer/entries-written: 1/1 #P:8 [SNIP] # TASK-PID CPU# |||| TIMESTAMP FUNCTION # | | | |||| | | usleep-18593 [007] d... 2394714.395539: : 11 usleep-19000 [006] d... 2394831.057840: : 101 Signed-off-by: Wang Nan <wangnan0@huawei.com> Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Adrian Hunter <adrian.hunter@intel.com> Cc: Alexei Starovoitov <ast@kernel.org> Cc: Brendan Gregg <brendan.d.gregg@gmail.com> Cc: Cody P Schafer <dev@codyps.com> Cc: He Kuang <hekuang@huawei.com> Cc: Jeremie Galarneau <jeremie.galarneau@efficios.com> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Kirill Smelkov <kirr@nexedi.com> Cc: Li Zefan <lizefan@huawei.com> Cc: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Zefan Li <lizefan@huawei.com> Cc: pi3orama@163.com Link: http://lkml.kernel.org/r/1456132275-98875-6-git-send-email-wangnan0@huawei.com Signed-off-by: He Kuang <hekuang@huawei.com> Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2016-02-22 16:10:32 +07:00
break;
default:
pr_debug("ERROR: keytype for map '%s' invalid\n",
name);
return -BPF_LOADER_ERRNO__INTERNAL;
}
2016-02-22 16:10:34 +07:00
if (err)
return err;
perf record: Apply config to BPF objects before recording bpf__apply_obj_config() is introduced as the core API to apply object config options to all BPF objects. This patch also does the real work for setting values for BPF_MAP_TYPE_PERF_ARRAY maps by inserting value stored in map's private field into the BPF map. This patch is required because we are not always able to set all BPF config during parsing. Further patch will set events created by perf to BPF_MAP_TYPE_PERF_EVENT_ARRAY maps, which is not exist until perf_evsel__open(). bpf_map_foreach_key() is introduced to iterate over each key needs to be configured. This function would be extended to support more map types and different key settings. In perf record, before start recording, call bpf__apply_config() to turn on all BPF config options. Test result: # cat ./test_bpf_map_1.c /************************ BEGIN **************************/ #include <uapi/linux/bpf.h> #define SEC(NAME) __attribute__((section(NAME), used)) struct bpf_map_def { unsigned int type; unsigned int key_size; unsigned int value_size; unsigned int max_entries; }; static void *(*map_lookup_elem)(struct bpf_map_def *, void *) = (void *)BPF_FUNC_map_lookup_elem; static int (*trace_printk)(const char *fmt, int fmt_size, ...) = (void *)BPF_FUNC_trace_printk; struct bpf_map_def SEC("maps") channel = { .type = BPF_MAP_TYPE_ARRAY, .key_size = sizeof(int), .value_size = sizeof(int), .max_entries = 1, }; SEC("func=sys_nanosleep") int func(void *ctx) { int key = 0; char fmt[] = "%d\n"; int *pval = map_lookup_elem(&channel, &key); if (!pval) return 0; trace_printk(fmt, sizeof(fmt), *pval); return 0; } char _license[] SEC("license") = "GPL"; int _version SEC("version") = LINUX_VERSION_CODE; /************************* END ***************************/ # echo "" > /sys/kernel/debug/tracing/trace # ./perf record -e './test_bpf_map_1.c/map:channel.value=11/' usleep 10 [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.012 MB perf.data ] # cat /sys/kernel/debug/tracing/trace # tracer: nop # # entries-in-buffer/entries-written: 1/1 #P:8 [SNIP] # TASK-PID CPU# |||| TIMESTAMP FUNCTION # | | | |||| | | usleep-18593 [007] d... 2394714.395539: : 11 # ./perf record -e './test_bpf_map_1.c/map:channel.value=101/' usleep 10 [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.012 MB perf.data ] # cat /sys/kernel/debug/tracing/trace # tracer: nop # # entries-in-buffer/entries-written: 1/1 #P:8 [SNIP] # TASK-PID CPU# |||| TIMESTAMP FUNCTION # | | | |||| | | usleep-18593 [007] d... 2394714.395539: : 11 usleep-19000 [006] d... 2394831.057840: : 101 Signed-off-by: Wang Nan <wangnan0@huawei.com> Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Adrian Hunter <adrian.hunter@intel.com> Cc: Alexei Starovoitov <ast@kernel.org> Cc: Brendan Gregg <brendan.d.gregg@gmail.com> Cc: Cody P Schafer <dev@codyps.com> Cc: He Kuang <hekuang@huawei.com> Cc: Jeremie Galarneau <jeremie.galarneau@efficios.com> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Kirill Smelkov <kirr@nexedi.com> Cc: Li Zefan <lizefan@huawei.com> Cc: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Zefan Li <lizefan@huawei.com> Cc: pi3orama@163.com Link: http://lkml.kernel.org/r/1456132275-98875-6-git-send-email-wangnan0@huawei.com Signed-off-by: He Kuang <hekuang@huawei.com> Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2016-02-22 16:10:32 +07:00
break;
default:
pr_debug("ERROR: type of '%s' incorrect\n", name);
return -BPF_LOADER_ERRNO__OBJCONF_MAP_TYPE;
}
}
return 0;
}
static int
apply_config_value_for_key(int map_fd, void *pkey,
size_t val_size, u64 val)
{
int err = 0;
switch (val_size) {
case 1: {
u8 _val = (u8)(val);
err = bpf_map_update_elem(map_fd, pkey, &_val, BPF_ANY);
break;
}
case 2: {
u16 _val = (u16)(val);
err = bpf_map_update_elem(map_fd, pkey, &_val, BPF_ANY);
break;
}
case 4: {
u32 _val = (u32)(val);
err = bpf_map_update_elem(map_fd, pkey, &_val, BPF_ANY);
break;
}
case 8: {
err = bpf_map_update_elem(map_fd, pkey, &val, BPF_ANY);
break;
}
default:
pr_debug("ERROR: invalid value size\n");
return -BPF_LOADER_ERRNO__OBJCONF_MAP_VALUESIZE;
}
if (err && errno)
err = -errno;
return err;
}
perf tools: Enable passing event to BPF object A new syntax is added to the parser so that the user can access predefined perf events in BPF objects. After this patch, BPF programs for perf are finally able to utilize bpf_perf_event_read() introduced in commit 35578d798400 ("bpf: Implement function bpf_perf_event_read() that get the selected hardware PMU counter"). Test result: # cat test_bpf_map_2.c /************************ BEGIN **************************/ #include <uapi/linux/bpf.h> #define SEC(NAME) __attribute__((section(NAME), used)) struct bpf_map_def { unsigned int type; unsigned int key_size; unsigned int value_size; unsigned int max_entries; }; static int (*trace_printk)(const char *fmt, int fmt_size, ...) = (void *)BPF_FUNC_trace_printk; static int (*get_smp_processor_id)(void) = (void *)BPF_FUNC_get_smp_processor_id; static int (*perf_event_read)(struct bpf_map_def *, int) = (void *)BPF_FUNC_perf_event_read; struct bpf_map_def SEC("maps") pmu_map = { .type = BPF_MAP_TYPE_PERF_EVENT_ARRAY, .key_size = sizeof(int), .value_size = sizeof(int), .max_entries = __NR_CPUS__, }; SEC("func_write=sys_write") int func_write(void *ctx) { unsigned long long val; char fmt[] = "sys_write: pmu=%llu\n"; val = perf_event_read(&pmu_map, get_smp_processor_id()); trace_printk(fmt, sizeof(fmt), val); return 0; } SEC("func_write_return=sys_write%return") int func_write_return(void *ctx) { unsigned long long val = 0; char fmt[] = "sys_write_return: pmu=%llu\n"; val = perf_event_read(&pmu_map, get_smp_processor_id()); trace_printk(fmt, sizeof(fmt), val); return 0; } char _license[] SEC("license") = "GPL"; int _version SEC("version") = LINUX_VERSION_CODE; /************************* END ***************************/ Normal case: # echo "" > /sys/kernel/debug/tracing/trace # perf record -i -e cycles -e './test_bpf_map_2.c/map:pmu_map.event=cycles/' ls / [SNIP] [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.013 MB perf.data (7 samples) ] # cat /sys/kernel/debug/tracing/trace | grep ls ls-17066 [000] d... 938449.863301: : sys_write: pmu=1157327 ls-17066 [000] dN.. 938449.863342: : sys_write_return: pmu=1225218 ls-17066 [000] d... 938449.863349: : sys_write: pmu=1241922 ls-17066 [000] dN.. 938449.863369: : sys_write_return: pmu=1267445 Normal case (system wide): # echo "" > /sys/kernel/debug/tracing/trace # perf record -i -e cycles -e './test_bpf_map_2.c/map:pmu_map.event=cycles/' -a ^C[ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.811 MB perf.data (120 samples) ] # cat /sys/kernel/debug/tracing/trace | grep -v '18446744073709551594' | grep -v perf | head -n 20 [SNIP] # TASK-PID CPU# |||| TIMESTAMP FUNCTION # | | | |||| | | gmain-30828 [002] d... 2740551.068992: : sys_write: pmu=84373 gmain-30828 [002] d... 2740551.068992: : sys_write_return: pmu=87696 gmain-30828 [002] d... 2740551.068996: : sys_write: pmu=100658 gmain-30828 [002] d... 2740551.068997: : sys_write_return: pmu=102572 Error case 1: # perf record -e './test_bpf_map_2.c' ls / [SNIP] [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.014 MB perf.data ] # cat /sys/kernel/debug/tracing/trace | grep ls ls-17115 [007] d... 2724279.665625: : sys_write: pmu=18446744073709551614 ls-17115 [007] dN.. 2724279.665651: : sys_write_return: pmu=18446744073709551614 ls-17115 [007] d... 2724279.665658: : sys_write: pmu=18446744073709551614 ls-17115 [007] dN.. 2724279.665677: : sys_write_return: pmu=18446744073709551614 (18446744073709551614 is 0xfffffffffffffffe (-2)) Error case 2: # perf record -e cycles -e './test_bpf_map_2.c/map:pmu_map.event=evt/' -a event syntax error: '..ps:pmu_map.event=evt/' \___ Event not found for map setting Hint: Valid config terms: map:[<arraymap>].value=[value] map:[<eventmap>].event=[event] [SNIP] Error case 3: # ls /proc/2348/task/ 2348 2505 2506 2507 2508 # perf record -i -e cycles -e './test_bpf_map_2.c/map:pmu_map.event=cycles/' -p 2348 ERROR: Apply config to BPF failed: Cannot set event to BPF map in multi-thread tracing Error case 4: # perf record -e cycles -e './test_bpf_map_2.c/map:pmu_map.event=cycles/' ls / ERROR: Apply config to BPF failed: Doesn't support inherit event (Hint: use -i to turn off inherit) Error case 5: # perf record -i -e raw_syscalls:sys_enter -e './test_bpf_map_2.c/map:pmu_map.event=raw_syscalls:sys_enter/' ls ERROR: Apply config to BPF failed: Can only put raw, hardware and BPF output event into a BPF map Error case 6: # perf record -i -e './test_bpf_map_2.c/map:pmu_map.event=123/' ls / event syntax error: '.._map.event=123/' \___ Incorrect value type for map [SNIP] Signed-off-by: Wang Nan <wangnan0@huawei.com> Acked-by: Jiri Olsa <jolsa@kernel.org> Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Adrian Hunter <adrian.hunter@intel.com> Cc: Alexei Starovoitov <ast@kernel.org> Cc: Brendan Gregg <brendan.d.gregg@gmail.com> Cc: Cody P Schafer <dev@codyps.com> Cc: He Kuang <hekuang@huawei.com> Cc: Jeremie Galarneau <jeremie.galarneau@efficios.com> Cc: Kirill Smelkov <kirr@nexedi.com> Cc: Li Zefan <lizefan@huawei.com> Cc: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Zefan Li <lizefan@huawei.com> Cc: pi3orama@163.com Link: http://lkml.kernel.org/r/1456132275-98875-7-git-send-email-wangnan0@huawei.com Signed-off-by: He Kuang <hekuang@huawei.com> Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2016-02-22 16:10:33 +07:00
static int
apply_config_evsel_for_key(const char *name, int map_fd, void *pkey,
struct perf_evsel *evsel)
{
struct xyarray *xy = evsel->fd;
struct perf_event_attr *attr;
unsigned int key, events;
bool check_pass = false;
int *evt_fd;
int err;
if (!xy) {
pr_debug("ERROR: evsel not ready for map %s\n", name);
return -BPF_LOADER_ERRNO__INTERNAL;
}
if (xy->row_size / xy->entry_size != 1) {
pr_debug("ERROR: Dimension of target event is incorrect for map %s\n",
name);
return -BPF_LOADER_ERRNO__OBJCONF_MAP_EVTDIM;
}
attr = &evsel->attr;
if (attr->inherit) {
pr_debug("ERROR: Can't put inherit event into map %s\n", name);
return -BPF_LOADER_ERRNO__OBJCONF_MAP_EVTINH;
}
perf tools: Introduce bpf-output event Commit a43eec304259 ("bpf: introduce bpf_perf_event_output() helper") adds a helper to enable a BPF program to output data to a perf ring buffer through a new type of perf event, PERF_COUNT_SW_BPF_OUTPUT. This patch enables perf to create events of that type. Now a perf user can use the following cmdline to receive output data from BPF programs: # perf record -a -e bpf-output/no-inherit,name=evt/ \ -e ./test_bpf_output.c/map:channel.event=evt/ ls / # perf script perf 1560 [004] 347747.086295: evt: ffffffff811fd201 sys_write ... perf 1560 [004] 347747.086300: evt: ffffffff811fd201 sys_write ... perf 1560 [004] 347747.086315: evt: ffffffff811fd201 sys_write ... ... Test result: # cat test_bpf_output.c /************************ BEGIN **************************/ #include <uapi/linux/bpf.h> struct bpf_map_def { unsigned int type; unsigned int key_size; unsigned int value_size; unsigned int max_entries; }; #define SEC(NAME) __attribute__((section(NAME), used)) static u64 (*ktime_get_ns)(void) = (void *)BPF_FUNC_ktime_get_ns; static int (*trace_printk)(const char *fmt, int fmt_size, ...) = (void *)BPF_FUNC_trace_printk; static int (*get_smp_processor_id)(void) = (void *)BPF_FUNC_get_smp_processor_id; static int (*perf_event_output)(void *, struct bpf_map_def *, int, void *, unsigned long) = (void *)BPF_FUNC_perf_event_output; struct bpf_map_def SEC("maps") channel = { .type = BPF_MAP_TYPE_PERF_EVENT_ARRAY, .key_size = sizeof(int), .value_size = sizeof(u32), .max_entries = __NR_CPUS__, }; SEC("func_write=sys_write") int func_write(void *ctx) { struct { u64 ktime; int cpuid; } __attribute__((packed)) output_data; char error_data[] = "Error: failed to output: %d\n"; output_data.cpuid = get_smp_processor_id(); output_data.ktime = ktime_get_ns(); int err = perf_event_output(ctx, &channel, get_smp_processor_id(), &output_data, sizeof(output_data)); if (err) trace_printk(error_data, sizeof(error_data), err); return 0; } char _license[] SEC("license") = "GPL"; int _version SEC("version") = LINUX_VERSION_CODE; /************************ END ***************************/ # perf record -a -e bpf-output/no-inherit,name=evt/ \ -e ./test_bpf_output.c/map:channel.event=evt/ ls / # perf script | grep ls ls 2242 [003] 347851.557563: evt: ffffffff811fd201 sys_write ... ls 2242 [003] 347851.557571: evt: ffffffff811fd201 sys_write ... Signed-off-by: Wang Nan <wangnan0@huawei.com> Cc: Adrian Hunter <adrian.hunter@intel.com> Cc: Alexei Starovoitov <ast@kernel.org> Cc: Brendan Gregg <brendan.d.gregg@gmail.com> Cc: Cody P Schafer <dev@codyps.com> Cc: He Kuang <hekuang@huawei.com> Cc: Jeremie Galarneau <jeremie.galarneau@efficios.com> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Kirill Smelkov <kirr@nexedi.com> Cc: Li Zefan <lizefan@huawei.com> Cc: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Zefan Li <lizefan@huawei.com> Cc: pi3orama@163.com Link: http://lkml.kernel.org/r/1456132275-98875-11-git-send-email-wangnan0@huawei.com Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2016-02-22 16:10:37 +07:00
if (perf_evsel__is_bpf_output(evsel))
check_pass = true;
perf tools: Enable passing event to BPF object A new syntax is added to the parser so that the user can access predefined perf events in BPF objects. After this patch, BPF programs for perf are finally able to utilize bpf_perf_event_read() introduced in commit 35578d798400 ("bpf: Implement function bpf_perf_event_read() that get the selected hardware PMU counter"). Test result: # cat test_bpf_map_2.c /************************ BEGIN **************************/ #include <uapi/linux/bpf.h> #define SEC(NAME) __attribute__((section(NAME), used)) struct bpf_map_def { unsigned int type; unsigned int key_size; unsigned int value_size; unsigned int max_entries; }; static int (*trace_printk)(const char *fmt, int fmt_size, ...) = (void *)BPF_FUNC_trace_printk; static int (*get_smp_processor_id)(void) = (void *)BPF_FUNC_get_smp_processor_id; static int (*perf_event_read)(struct bpf_map_def *, int) = (void *)BPF_FUNC_perf_event_read; struct bpf_map_def SEC("maps") pmu_map = { .type = BPF_MAP_TYPE_PERF_EVENT_ARRAY, .key_size = sizeof(int), .value_size = sizeof(int), .max_entries = __NR_CPUS__, }; SEC("func_write=sys_write") int func_write(void *ctx) { unsigned long long val; char fmt[] = "sys_write: pmu=%llu\n"; val = perf_event_read(&pmu_map, get_smp_processor_id()); trace_printk(fmt, sizeof(fmt), val); return 0; } SEC("func_write_return=sys_write%return") int func_write_return(void *ctx) { unsigned long long val = 0; char fmt[] = "sys_write_return: pmu=%llu\n"; val = perf_event_read(&pmu_map, get_smp_processor_id()); trace_printk(fmt, sizeof(fmt), val); return 0; } char _license[] SEC("license") = "GPL"; int _version SEC("version") = LINUX_VERSION_CODE; /************************* END ***************************/ Normal case: # echo "" > /sys/kernel/debug/tracing/trace # perf record -i -e cycles -e './test_bpf_map_2.c/map:pmu_map.event=cycles/' ls / [SNIP] [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.013 MB perf.data (7 samples) ] # cat /sys/kernel/debug/tracing/trace | grep ls ls-17066 [000] d... 938449.863301: : sys_write: pmu=1157327 ls-17066 [000] dN.. 938449.863342: : sys_write_return: pmu=1225218 ls-17066 [000] d... 938449.863349: : sys_write: pmu=1241922 ls-17066 [000] dN.. 938449.863369: : sys_write_return: pmu=1267445 Normal case (system wide): # echo "" > /sys/kernel/debug/tracing/trace # perf record -i -e cycles -e './test_bpf_map_2.c/map:pmu_map.event=cycles/' -a ^C[ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.811 MB perf.data (120 samples) ] # cat /sys/kernel/debug/tracing/trace | grep -v '18446744073709551594' | grep -v perf | head -n 20 [SNIP] # TASK-PID CPU# |||| TIMESTAMP FUNCTION # | | | |||| | | gmain-30828 [002] d... 2740551.068992: : sys_write: pmu=84373 gmain-30828 [002] d... 2740551.068992: : sys_write_return: pmu=87696 gmain-30828 [002] d... 2740551.068996: : sys_write: pmu=100658 gmain-30828 [002] d... 2740551.068997: : sys_write_return: pmu=102572 Error case 1: # perf record -e './test_bpf_map_2.c' ls / [SNIP] [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.014 MB perf.data ] # cat /sys/kernel/debug/tracing/trace | grep ls ls-17115 [007] d... 2724279.665625: : sys_write: pmu=18446744073709551614 ls-17115 [007] dN.. 2724279.665651: : sys_write_return: pmu=18446744073709551614 ls-17115 [007] d... 2724279.665658: : sys_write: pmu=18446744073709551614 ls-17115 [007] dN.. 2724279.665677: : sys_write_return: pmu=18446744073709551614 (18446744073709551614 is 0xfffffffffffffffe (-2)) Error case 2: # perf record -e cycles -e './test_bpf_map_2.c/map:pmu_map.event=evt/' -a event syntax error: '..ps:pmu_map.event=evt/' \___ Event not found for map setting Hint: Valid config terms: map:[<arraymap>].value=[value] map:[<eventmap>].event=[event] [SNIP] Error case 3: # ls /proc/2348/task/ 2348 2505 2506 2507 2508 # perf record -i -e cycles -e './test_bpf_map_2.c/map:pmu_map.event=cycles/' -p 2348 ERROR: Apply config to BPF failed: Cannot set event to BPF map in multi-thread tracing Error case 4: # perf record -e cycles -e './test_bpf_map_2.c/map:pmu_map.event=cycles/' ls / ERROR: Apply config to BPF failed: Doesn't support inherit event (Hint: use -i to turn off inherit) Error case 5: # perf record -i -e raw_syscalls:sys_enter -e './test_bpf_map_2.c/map:pmu_map.event=raw_syscalls:sys_enter/' ls ERROR: Apply config to BPF failed: Can only put raw, hardware and BPF output event into a BPF map Error case 6: # perf record -i -e './test_bpf_map_2.c/map:pmu_map.event=123/' ls / event syntax error: '.._map.event=123/' \___ Incorrect value type for map [SNIP] Signed-off-by: Wang Nan <wangnan0@huawei.com> Acked-by: Jiri Olsa <jolsa@kernel.org> Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Adrian Hunter <adrian.hunter@intel.com> Cc: Alexei Starovoitov <ast@kernel.org> Cc: Brendan Gregg <brendan.d.gregg@gmail.com> Cc: Cody P Schafer <dev@codyps.com> Cc: He Kuang <hekuang@huawei.com> Cc: Jeremie Galarneau <jeremie.galarneau@efficios.com> Cc: Kirill Smelkov <kirr@nexedi.com> Cc: Li Zefan <lizefan@huawei.com> Cc: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Zefan Li <lizefan@huawei.com> Cc: pi3orama@163.com Link: http://lkml.kernel.org/r/1456132275-98875-7-git-send-email-wangnan0@huawei.com Signed-off-by: He Kuang <hekuang@huawei.com> Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2016-02-22 16:10:33 +07:00
if (attr->type == PERF_TYPE_RAW)
check_pass = true;
if (attr->type == PERF_TYPE_HARDWARE)
check_pass = true;
if (!check_pass) {
pr_debug("ERROR: Event type is wrong for map %s\n", name);
return -BPF_LOADER_ERRNO__OBJCONF_MAP_EVTTYPE;
}
events = xy->entries / (xy->row_size / xy->entry_size);
key = *((unsigned int *)pkey);
if (key >= events) {
pr_debug("ERROR: there is no event %d for map %s\n",
key, name);
return -BPF_LOADER_ERRNO__OBJCONF_MAP_MAPSIZE;
}
evt_fd = xyarray__entry(xy, key, 0);
err = bpf_map_update_elem(map_fd, pkey, evt_fd, BPF_ANY);
if (err && errno)
err = -errno;
return err;
}
perf record: Apply config to BPF objects before recording bpf__apply_obj_config() is introduced as the core API to apply object config options to all BPF objects. This patch also does the real work for setting values for BPF_MAP_TYPE_PERF_ARRAY maps by inserting value stored in map's private field into the BPF map. This patch is required because we are not always able to set all BPF config during parsing. Further patch will set events created by perf to BPF_MAP_TYPE_PERF_EVENT_ARRAY maps, which is not exist until perf_evsel__open(). bpf_map_foreach_key() is introduced to iterate over each key needs to be configured. This function would be extended to support more map types and different key settings. In perf record, before start recording, call bpf__apply_config() to turn on all BPF config options. Test result: # cat ./test_bpf_map_1.c /************************ BEGIN **************************/ #include <uapi/linux/bpf.h> #define SEC(NAME) __attribute__((section(NAME), used)) struct bpf_map_def { unsigned int type; unsigned int key_size; unsigned int value_size; unsigned int max_entries; }; static void *(*map_lookup_elem)(struct bpf_map_def *, void *) = (void *)BPF_FUNC_map_lookup_elem; static int (*trace_printk)(const char *fmt, int fmt_size, ...) = (void *)BPF_FUNC_trace_printk; struct bpf_map_def SEC("maps") channel = { .type = BPF_MAP_TYPE_ARRAY, .key_size = sizeof(int), .value_size = sizeof(int), .max_entries = 1, }; SEC("func=sys_nanosleep") int func(void *ctx) { int key = 0; char fmt[] = "%d\n"; int *pval = map_lookup_elem(&channel, &key); if (!pval) return 0; trace_printk(fmt, sizeof(fmt), *pval); return 0; } char _license[] SEC("license") = "GPL"; int _version SEC("version") = LINUX_VERSION_CODE; /************************* END ***************************/ # echo "" > /sys/kernel/debug/tracing/trace # ./perf record -e './test_bpf_map_1.c/map:channel.value=11/' usleep 10 [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.012 MB perf.data ] # cat /sys/kernel/debug/tracing/trace # tracer: nop # # entries-in-buffer/entries-written: 1/1 #P:8 [SNIP] # TASK-PID CPU# |||| TIMESTAMP FUNCTION # | | | |||| | | usleep-18593 [007] d... 2394714.395539: : 11 # ./perf record -e './test_bpf_map_1.c/map:channel.value=101/' usleep 10 [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.012 MB perf.data ] # cat /sys/kernel/debug/tracing/trace # tracer: nop # # entries-in-buffer/entries-written: 1/1 #P:8 [SNIP] # TASK-PID CPU# |||| TIMESTAMP FUNCTION # | | | |||| | | usleep-18593 [007] d... 2394714.395539: : 11 usleep-19000 [006] d... 2394831.057840: : 101 Signed-off-by: Wang Nan <wangnan0@huawei.com> Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Adrian Hunter <adrian.hunter@intel.com> Cc: Alexei Starovoitov <ast@kernel.org> Cc: Brendan Gregg <brendan.d.gregg@gmail.com> Cc: Cody P Schafer <dev@codyps.com> Cc: He Kuang <hekuang@huawei.com> Cc: Jeremie Galarneau <jeremie.galarneau@efficios.com> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Kirill Smelkov <kirr@nexedi.com> Cc: Li Zefan <lizefan@huawei.com> Cc: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Zefan Li <lizefan@huawei.com> Cc: pi3orama@163.com Link: http://lkml.kernel.org/r/1456132275-98875-6-git-send-email-wangnan0@huawei.com Signed-off-by: He Kuang <hekuang@huawei.com> Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2016-02-22 16:10:32 +07:00
static int
apply_obj_config_map_for_key(const char *name, int map_fd,
struct bpf_map_def *pdef __maybe_unused,
struct bpf_map_op *op,
void *pkey, void *arg __maybe_unused)
{
int err;
switch (op->op_type) {
case BPF_MAP_OP_SET_VALUE:
err = apply_config_value_for_key(map_fd, pkey,
pdef->value_size,
op->v.value);
break;
perf tools: Enable passing event to BPF object A new syntax is added to the parser so that the user can access predefined perf events in BPF objects. After this patch, BPF programs for perf are finally able to utilize bpf_perf_event_read() introduced in commit 35578d798400 ("bpf: Implement function bpf_perf_event_read() that get the selected hardware PMU counter"). Test result: # cat test_bpf_map_2.c /************************ BEGIN **************************/ #include <uapi/linux/bpf.h> #define SEC(NAME) __attribute__((section(NAME), used)) struct bpf_map_def { unsigned int type; unsigned int key_size; unsigned int value_size; unsigned int max_entries; }; static int (*trace_printk)(const char *fmt, int fmt_size, ...) = (void *)BPF_FUNC_trace_printk; static int (*get_smp_processor_id)(void) = (void *)BPF_FUNC_get_smp_processor_id; static int (*perf_event_read)(struct bpf_map_def *, int) = (void *)BPF_FUNC_perf_event_read; struct bpf_map_def SEC("maps") pmu_map = { .type = BPF_MAP_TYPE_PERF_EVENT_ARRAY, .key_size = sizeof(int), .value_size = sizeof(int), .max_entries = __NR_CPUS__, }; SEC("func_write=sys_write") int func_write(void *ctx) { unsigned long long val; char fmt[] = "sys_write: pmu=%llu\n"; val = perf_event_read(&pmu_map, get_smp_processor_id()); trace_printk(fmt, sizeof(fmt), val); return 0; } SEC("func_write_return=sys_write%return") int func_write_return(void *ctx) { unsigned long long val = 0; char fmt[] = "sys_write_return: pmu=%llu\n"; val = perf_event_read(&pmu_map, get_smp_processor_id()); trace_printk(fmt, sizeof(fmt), val); return 0; } char _license[] SEC("license") = "GPL"; int _version SEC("version") = LINUX_VERSION_CODE; /************************* END ***************************/ Normal case: # echo "" > /sys/kernel/debug/tracing/trace # perf record -i -e cycles -e './test_bpf_map_2.c/map:pmu_map.event=cycles/' ls / [SNIP] [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.013 MB perf.data (7 samples) ] # cat /sys/kernel/debug/tracing/trace | grep ls ls-17066 [000] d... 938449.863301: : sys_write: pmu=1157327 ls-17066 [000] dN.. 938449.863342: : sys_write_return: pmu=1225218 ls-17066 [000] d... 938449.863349: : sys_write: pmu=1241922 ls-17066 [000] dN.. 938449.863369: : sys_write_return: pmu=1267445 Normal case (system wide): # echo "" > /sys/kernel/debug/tracing/trace # perf record -i -e cycles -e './test_bpf_map_2.c/map:pmu_map.event=cycles/' -a ^C[ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.811 MB perf.data (120 samples) ] # cat /sys/kernel/debug/tracing/trace | grep -v '18446744073709551594' | grep -v perf | head -n 20 [SNIP] # TASK-PID CPU# |||| TIMESTAMP FUNCTION # | | | |||| | | gmain-30828 [002] d... 2740551.068992: : sys_write: pmu=84373 gmain-30828 [002] d... 2740551.068992: : sys_write_return: pmu=87696 gmain-30828 [002] d... 2740551.068996: : sys_write: pmu=100658 gmain-30828 [002] d... 2740551.068997: : sys_write_return: pmu=102572 Error case 1: # perf record -e './test_bpf_map_2.c' ls / [SNIP] [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.014 MB perf.data ] # cat /sys/kernel/debug/tracing/trace | grep ls ls-17115 [007] d... 2724279.665625: : sys_write: pmu=18446744073709551614 ls-17115 [007] dN.. 2724279.665651: : sys_write_return: pmu=18446744073709551614 ls-17115 [007] d... 2724279.665658: : sys_write: pmu=18446744073709551614 ls-17115 [007] dN.. 2724279.665677: : sys_write_return: pmu=18446744073709551614 (18446744073709551614 is 0xfffffffffffffffe (-2)) Error case 2: # perf record -e cycles -e './test_bpf_map_2.c/map:pmu_map.event=evt/' -a event syntax error: '..ps:pmu_map.event=evt/' \___ Event not found for map setting Hint: Valid config terms: map:[<arraymap>].value=[value] map:[<eventmap>].event=[event] [SNIP] Error case 3: # ls /proc/2348/task/ 2348 2505 2506 2507 2508 # perf record -i -e cycles -e './test_bpf_map_2.c/map:pmu_map.event=cycles/' -p 2348 ERROR: Apply config to BPF failed: Cannot set event to BPF map in multi-thread tracing Error case 4: # perf record -e cycles -e './test_bpf_map_2.c/map:pmu_map.event=cycles/' ls / ERROR: Apply config to BPF failed: Doesn't support inherit event (Hint: use -i to turn off inherit) Error case 5: # perf record -i -e raw_syscalls:sys_enter -e './test_bpf_map_2.c/map:pmu_map.event=raw_syscalls:sys_enter/' ls ERROR: Apply config to BPF failed: Can only put raw, hardware and BPF output event into a BPF map Error case 6: # perf record -i -e './test_bpf_map_2.c/map:pmu_map.event=123/' ls / event syntax error: '.._map.event=123/' \___ Incorrect value type for map [SNIP] Signed-off-by: Wang Nan <wangnan0@huawei.com> Acked-by: Jiri Olsa <jolsa@kernel.org> Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Adrian Hunter <adrian.hunter@intel.com> Cc: Alexei Starovoitov <ast@kernel.org> Cc: Brendan Gregg <brendan.d.gregg@gmail.com> Cc: Cody P Schafer <dev@codyps.com> Cc: He Kuang <hekuang@huawei.com> Cc: Jeremie Galarneau <jeremie.galarneau@efficios.com> Cc: Kirill Smelkov <kirr@nexedi.com> Cc: Li Zefan <lizefan@huawei.com> Cc: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Zefan Li <lizefan@huawei.com> Cc: pi3orama@163.com Link: http://lkml.kernel.org/r/1456132275-98875-7-git-send-email-wangnan0@huawei.com Signed-off-by: He Kuang <hekuang@huawei.com> Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2016-02-22 16:10:33 +07:00
case BPF_MAP_OP_SET_EVSEL:
err = apply_config_evsel_for_key(name, map_fd, pkey,
op->v.evsel);
break;
perf record: Apply config to BPF objects before recording bpf__apply_obj_config() is introduced as the core API to apply object config options to all BPF objects. This patch also does the real work for setting values for BPF_MAP_TYPE_PERF_ARRAY maps by inserting value stored in map's private field into the BPF map. This patch is required because we are not always able to set all BPF config during parsing. Further patch will set events created by perf to BPF_MAP_TYPE_PERF_EVENT_ARRAY maps, which is not exist until perf_evsel__open(). bpf_map_foreach_key() is introduced to iterate over each key needs to be configured. This function would be extended to support more map types and different key settings. In perf record, before start recording, call bpf__apply_config() to turn on all BPF config options. Test result: # cat ./test_bpf_map_1.c /************************ BEGIN **************************/ #include <uapi/linux/bpf.h> #define SEC(NAME) __attribute__((section(NAME), used)) struct bpf_map_def { unsigned int type; unsigned int key_size; unsigned int value_size; unsigned int max_entries; }; static void *(*map_lookup_elem)(struct bpf_map_def *, void *) = (void *)BPF_FUNC_map_lookup_elem; static int (*trace_printk)(const char *fmt, int fmt_size, ...) = (void *)BPF_FUNC_trace_printk; struct bpf_map_def SEC("maps") channel = { .type = BPF_MAP_TYPE_ARRAY, .key_size = sizeof(int), .value_size = sizeof(int), .max_entries = 1, }; SEC("func=sys_nanosleep") int func(void *ctx) { int key = 0; char fmt[] = "%d\n"; int *pval = map_lookup_elem(&channel, &key); if (!pval) return 0; trace_printk(fmt, sizeof(fmt), *pval); return 0; } char _license[] SEC("license") = "GPL"; int _version SEC("version") = LINUX_VERSION_CODE; /************************* END ***************************/ # echo "" > /sys/kernel/debug/tracing/trace # ./perf record -e './test_bpf_map_1.c/map:channel.value=11/' usleep 10 [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.012 MB perf.data ] # cat /sys/kernel/debug/tracing/trace # tracer: nop # # entries-in-buffer/entries-written: 1/1 #P:8 [SNIP] # TASK-PID CPU# |||| TIMESTAMP FUNCTION # | | | |||| | | usleep-18593 [007] d... 2394714.395539: : 11 # ./perf record -e './test_bpf_map_1.c/map:channel.value=101/' usleep 10 [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.012 MB perf.data ] # cat /sys/kernel/debug/tracing/trace # tracer: nop # # entries-in-buffer/entries-written: 1/1 #P:8 [SNIP] # TASK-PID CPU# |||| TIMESTAMP FUNCTION # | | | |||| | | usleep-18593 [007] d... 2394714.395539: : 11 usleep-19000 [006] d... 2394831.057840: : 101 Signed-off-by: Wang Nan <wangnan0@huawei.com> Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Adrian Hunter <adrian.hunter@intel.com> Cc: Alexei Starovoitov <ast@kernel.org> Cc: Brendan Gregg <brendan.d.gregg@gmail.com> Cc: Cody P Schafer <dev@codyps.com> Cc: He Kuang <hekuang@huawei.com> Cc: Jeremie Galarneau <jeremie.galarneau@efficios.com> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Kirill Smelkov <kirr@nexedi.com> Cc: Li Zefan <lizefan@huawei.com> Cc: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Zefan Li <lizefan@huawei.com> Cc: pi3orama@163.com Link: http://lkml.kernel.org/r/1456132275-98875-6-git-send-email-wangnan0@huawei.com Signed-off-by: He Kuang <hekuang@huawei.com> Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2016-02-22 16:10:32 +07:00
default:
pr_debug("ERROR: unknown value type for '%s'\n", name);
err = -BPF_LOADER_ERRNO__INTERNAL;
}
return err;
}
static int
apply_obj_config_map(struct bpf_map *map)
{
return bpf_map_config_foreach_key(map,
apply_obj_config_map_for_key,
NULL);
}
static int
apply_obj_config_object(struct bpf_object *obj)
{
struct bpf_map *map;
int err;
bpf_map__for_each(map, obj) {
err = apply_obj_config_map(map);
if (err)
return err;
}
return 0;
}
int bpf__apply_obj_config(void)
{
struct bpf_object *obj, *tmp;
int err;
bpf_object__for_each_safe(obj, tmp) {
err = apply_obj_config_object(obj);
if (err)
return err;
}
return 0;
}
perf bpf: Improve BPF related error messages A series of bpf loader related error codes were introduced to help error reporting. Functions were improved to return these new error codes. Functions which return pointers were adjusted to encode error codes into return value using the ERR_PTR() interface. bpf_loader_strerror() was improved to convert these error messages to strings. It checks the error codes and calls libbpf_strerror() and strerror_r() accordingly, so caller don't need to consider checking the range of the error code. In bpf__strerror_load(), print kernel version of running kernel and the object's 'version' section to notify user how to fix his/her program. v1 -> v2: Use macro for error code. Fetch error message based on array index, eliminate for-loop. Print version strings. Before: # perf record -e ./test_kversion_nomatch_program.o sleep 1 event syntax error: './test_kversion_nomatch_program.o' \___ Failed to load program: Validate your program and check 'license'/'version' sections in your object SKIP After: # perf record -e ./test_kversion_nomatch_program.o ls event syntax error: './test_kversion_nomatch_program.o' \___ 'version' (4.4.0) doesn't match running kernel (4.3.0) SKIP Signed-off-by: Wang Nan <wangnan0@huawei.com> Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Zefan Li <lizefan@huawei.com> Cc: pi3orama@163.com Link: http://lkml.kernel.org/r/1446818289-87444-1-git-send-email-wangnan0@huawei.com [ Add 'static inline' to bpf__strerror_prepare_load() when LIBBPF is disabled ] Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2015-11-06 20:58:09 +07:00
#define ERRNO_OFFSET(e) ((e) - __BPF_LOADER_ERRNO__START)
#define ERRCODE_OFFSET(c) ERRNO_OFFSET(BPF_LOADER_ERRNO__##c)
#define NR_ERRNO (__BPF_LOADER_ERRNO__END - __BPF_LOADER_ERRNO__START)
static const char *bpf_loader_strerror_table[NR_ERRNO] = {
[ERRCODE_OFFSET(CONFIG)] = "Invalid config string",
[ERRCODE_OFFSET(GROUP)] = "Invalid group name",
[ERRCODE_OFFSET(EVENTNAME)] = "No event name found in config string",
[ERRCODE_OFFSET(INTERNAL)] = "BPF loader internal error",
[ERRCODE_OFFSET(COMPILE)] = "Error when compiling BPF scriptlet",
[ERRCODE_OFFSET(PROGCONF_TERM)] = "Invalid program config term in config string",
perf bpf: Add prologue for BPF programs for fetching arguments This patch generates a prologue for a BPF program which fetches arguments for it. With this patch, the program can have arguments as follow: SEC("lock_page=__lock_page page->flags") int lock_page(struct pt_regs *ctx, int err, unsigned long flags) { return 1; } This patch passes at most 3 arguments from r3, r4 and r5. r1 is still the ctx pointer. r2 is used to indicate if dereferencing was done successfully. This patch uses r6 to hold ctx (struct pt_regs) and r7 to hold stack pointer for result. Result of each arguments first store on stack: low address BPF_REG_FP - 24 ARG3 BPF_REG_FP - 16 ARG2 BPF_REG_FP - 8 ARG1 BPF_REG_FP high address Then loaded into r3, r4 and r5. The output prologue for offn(...off2(off1(reg)))) should be: r6 <- r1 // save ctx into a callee saved register r7 <- fp r7 <- r7 - stack_offset // pointer to result slot /* load r3 with the offset in pt_regs of 'reg' */ (r7) <- r3 // make slot valid r3 <- r3 + off1 // prepare to read unsafe pointer r2 <- 8 r1 <- r7 // result put onto stack call probe_read // read unsafe pointer jnei r0, 0, err // error checking r3 <- (r7) // read result r3 <- r3 + off2 // prepare to read unsafe pointer r2 <- 8 r1 <- r7 call probe_read jnei r0, 0, err ... /* load r2, r3, r4 from stack */ goto success err: r2 <- 1 /* load r3, r4, r5 with 0 */ goto usercode success: r2 <- 0 usercode: r1 <- r6 // restore ctx // original user code If all of arguments reside in register (dereferencing is not required), gen_prologue_fastpath() will be used to create fast prologue: r3 <- (r1 + offset of reg1) r4 <- (r1 + offset of reg2) r5 <- (r1 + offset of reg3) r2 <- 0 P.S. eBPF calling convention is defined as: * r0 - return value from in-kernel function, and exit value for eBPF program * r1 - r5 - arguments from eBPF program to in-kernel function * r6 - r9 - callee saved registers that in-kernel function will preserve * r10 - read-only frame pointer to access stack Committer note: At least testing if it builds and loads: # cat test_probe_arg.c struct pt_regs; __attribute__((section("lock_page=__lock_page page->flags"), used)) int func(struct pt_regs *ctx, int err, unsigned long flags) { return 1; } char _license[] __attribute__((section("license"), used)) = "GPL"; int _version __attribute__((section("version"), used)) = 0x40300; # perf record -e ./test_probe_arg.c usleep 1 [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.016 MB perf.data ] # perf evlist perf_bpf_probe:lock_page # Signed-off-by: He Kuang <hekuang@huawei.com> Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Alexei Starovoitov <ast@kernel.org> Cc: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com> Cc: Wang Nan <wangnan0@huawei.com> Cc: Zefan Li <lizefan@huawei.com> Cc: pi3orama@163.com Link: http://lkml.kernel.org/r/1447675815-166222-11-git-send-email-wangnan0@huawei.com Signed-off-by: Wang Nan <wangnan0@huawei.com> Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2015-11-16 19:10:12 +07:00
[ERRCODE_OFFSET(PROLOGUE)] = "Failed to generate prologue",
[ERRCODE_OFFSET(PROLOGUE2BIG)] = "Prologue too big for program",
[ERRCODE_OFFSET(PROLOGUEOOB)] = "Offset out of bound for prologue",
perf bpf: Add API to set values to map entries in a bpf object bpf__config_obj() is introduced as a core API to config BPF object after loading. One configuration option of maps is introduced. After this patch BPF object can accept assignments like: map:my_map.value=1234 (map.my_map.value looks pretty. However, there's a small but hard to fix problem related to flex's greedy matching. Please see [1]. Choose ':' to avoid it in a simpler way.) This patch is more complex than the work it does because the consideration of extension. In designing BPF map configuration, the following things should be considered: 1. Array indices selection: perf should allow user setting different value for different slots in an array, with syntax like: map:my_map.value[0,3...6]=1234; 2. A map should be set by different config terms, each for a part of it. For example, set each slot to the pid of a thread; 3. Type of value: integer is not the only valid value type. A perf counter can also be put into a map after commit 35578d798400 ("bpf: Implement function bpf_perf_event_read() that get the selected hardware PMU counter") 4. For a hash table, it should be possible to use a string or other value as a key; 5. It is possible that map configuration is unable to be setup during parsing. A perf counter is an example. Therefore, this patch does the following: 1. Instead of updating map element during parsing, this patch stores map config options in 'struct bpf_map_priv'. Following patches will apply those configs at an appropriate time; 2. Link map operations in a list so a map can have multiple config terms attached, so different parts can be configured separately; 3. Make 'struct bpf_map_priv' extensible so that the following patches can add new types of keys and operations; 4. Use bpf_obj_config__map_funcs array to support more map config options. Since the patch changing the event parser to parse BPF object config is relative large, I've put it in another commit. Code in this patch can be tested after applying the next patch. [1] http://lkml.kernel.org/g/564ED621.4050500@huawei.com Signed-off-by: Wang Nan <wangnan0@huawei.com> Cc: Adrian Hunter <adrian.hunter@intel.com> Cc: Alexei Starovoitov <ast@kernel.org> Cc: Brendan Gregg <brendan.d.gregg@gmail.com> Cc: Cody P Schafer <dev@codyps.com> Cc: He Kuang <hekuang@huawei.com> Cc: Jeremie Galarneau <jeremie.galarneau@efficios.com> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Kirill Smelkov <kirr@nexedi.com> Cc: Li Zefan <lizefan@huawei.com> Cc: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Zefan Li <lizefan@huawei.com> Cc: pi3orama@163.com Link: http://lkml.kernel.org/r/1456132275-98875-4-git-send-email-wangnan0@huawei.com Signed-off-by: He Kuang <hekuang@huawei.com> [ Changes "maps:my_map.value" to "map:my_map.value", improved error messages ] Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2016-02-22 16:10:30 +07:00
[ERRCODE_OFFSET(OBJCONF_OPT)] = "Invalid object config option",
[ERRCODE_OFFSET(OBJCONF_CONF)] = "Config value not set (missing '=')",
[ERRCODE_OFFSET(OBJCONF_MAP_OPT)] = "Invalid object map config option",
[ERRCODE_OFFSET(OBJCONF_MAP_NOTEXIST)] = "Target map doesn't exist",
[ERRCODE_OFFSET(OBJCONF_MAP_VALUE)] = "Incorrect value type for map",
[ERRCODE_OFFSET(OBJCONF_MAP_TYPE)] = "Incorrect map type",
[ERRCODE_OFFSET(OBJCONF_MAP_KEYSIZE)] = "Incorrect map key size",
[ERRCODE_OFFSET(OBJCONF_MAP_VALUESIZE)] = "Incorrect map value size",
perf tools: Enable passing event to BPF object A new syntax is added to the parser so that the user can access predefined perf events in BPF objects. After this patch, BPF programs for perf are finally able to utilize bpf_perf_event_read() introduced in commit 35578d798400 ("bpf: Implement function bpf_perf_event_read() that get the selected hardware PMU counter"). Test result: # cat test_bpf_map_2.c /************************ BEGIN **************************/ #include <uapi/linux/bpf.h> #define SEC(NAME) __attribute__((section(NAME), used)) struct bpf_map_def { unsigned int type; unsigned int key_size; unsigned int value_size; unsigned int max_entries; }; static int (*trace_printk)(const char *fmt, int fmt_size, ...) = (void *)BPF_FUNC_trace_printk; static int (*get_smp_processor_id)(void) = (void *)BPF_FUNC_get_smp_processor_id; static int (*perf_event_read)(struct bpf_map_def *, int) = (void *)BPF_FUNC_perf_event_read; struct bpf_map_def SEC("maps") pmu_map = { .type = BPF_MAP_TYPE_PERF_EVENT_ARRAY, .key_size = sizeof(int), .value_size = sizeof(int), .max_entries = __NR_CPUS__, }; SEC("func_write=sys_write") int func_write(void *ctx) { unsigned long long val; char fmt[] = "sys_write: pmu=%llu\n"; val = perf_event_read(&pmu_map, get_smp_processor_id()); trace_printk(fmt, sizeof(fmt), val); return 0; } SEC("func_write_return=sys_write%return") int func_write_return(void *ctx) { unsigned long long val = 0; char fmt[] = "sys_write_return: pmu=%llu\n"; val = perf_event_read(&pmu_map, get_smp_processor_id()); trace_printk(fmt, sizeof(fmt), val); return 0; } char _license[] SEC("license") = "GPL"; int _version SEC("version") = LINUX_VERSION_CODE; /************************* END ***************************/ Normal case: # echo "" > /sys/kernel/debug/tracing/trace # perf record -i -e cycles -e './test_bpf_map_2.c/map:pmu_map.event=cycles/' ls / [SNIP] [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.013 MB perf.data (7 samples) ] # cat /sys/kernel/debug/tracing/trace | grep ls ls-17066 [000] d... 938449.863301: : sys_write: pmu=1157327 ls-17066 [000] dN.. 938449.863342: : sys_write_return: pmu=1225218 ls-17066 [000] d... 938449.863349: : sys_write: pmu=1241922 ls-17066 [000] dN.. 938449.863369: : sys_write_return: pmu=1267445 Normal case (system wide): # echo "" > /sys/kernel/debug/tracing/trace # perf record -i -e cycles -e './test_bpf_map_2.c/map:pmu_map.event=cycles/' -a ^C[ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.811 MB perf.data (120 samples) ] # cat /sys/kernel/debug/tracing/trace | grep -v '18446744073709551594' | grep -v perf | head -n 20 [SNIP] # TASK-PID CPU# |||| TIMESTAMP FUNCTION # | | | |||| | | gmain-30828 [002] d... 2740551.068992: : sys_write: pmu=84373 gmain-30828 [002] d... 2740551.068992: : sys_write_return: pmu=87696 gmain-30828 [002] d... 2740551.068996: : sys_write: pmu=100658 gmain-30828 [002] d... 2740551.068997: : sys_write_return: pmu=102572 Error case 1: # perf record -e './test_bpf_map_2.c' ls / [SNIP] [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.014 MB perf.data ] # cat /sys/kernel/debug/tracing/trace | grep ls ls-17115 [007] d... 2724279.665625: : sys_write: pmu=18446744073709551614 ls-17115 [007] dN.. 2724279.665651: : sys_write_return: pmu=18446744073709551614 ls-17115 [007] d... 2724279.665658: : sys_write: pmu=18446744073709551614 ls-17115 [007] dN.. 2724279.665677: : sys_write_return: pmu=18446744073709551614 (18446744073709551614 is 0xfffffffffffffffe (-2)) Error case 2: # perf record -e cycles -e './test_bpf_map_2.c/map:pmu_map.event=evt/' -a event syntax error: '..ps:pmu_map.event=evt/' \___ Event not found for map setting Hint: Valid config terms: map:[<arraymap>].value=[value] map:[<eventmap>].event=[event] [SNIP] Error case 3: # ls /proc/2348/task/ 2348 2505 2506 2507 2508 # perf record -i -e cycles -e './test_bpf_map_2.c/map:pmu_map.event=cycles/' -p 2348 ERROR: Apply config to BPF failed: Cannot set event to BPF map in multi-thread tracing Error case 4: # perf record -e cycles -e './test_bpf_map_2.c/map:pmu_map.event=cycles/' ls / ERROR: Apply config to BPF failed: Doesn't support inherit event (Hint: use -i to turn off inherit) Error case 5: # perf record -i -e raw_syscalls:sys_enter -e './test_bpf_map_2.c/map:pmu_map.event=raw_syscalls:sys_enter/' ls ERROR: Apply config to BPF failed: Can only put raw, hardware and BPF output event into a BPF map Error case 6: # perf record -i -e './test_bpf_map_2.c/map:pmu_map.event=123/' ls / event syntax error: '.._map.event=123/' \___ Incorrect value type for map [SNIP] Signed-off-by: Wang Nan <wangnan0@huawei.com> Acked-by: Jiri Olsa <jolsa@kernel.org> Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Adrian Hunter <adrian.hunter@intel.com> Cc: Alexei Starovoitov <ast@kernel.org> Cc: Brendan Gregg <brendan.d.gregg@gmail.com> Cc: Cody P Schafer <dev@codyps.com> Cc: He Kuang <hekuang@huawei.com> Cc: Jeremie Galarneau <jeremie.galarneau@efficios.com> Cc: Kirill Smelkov <kirr@nexedi.com> Cc: Li Zefan <lizefan@huawei.com> Cc: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Zefan Li <lizefan@huawei.com> Cc: pi3orama@163.com Link: http://lkml.kernel.org/r/1456132275-98875-7-git-send-email-wangnan0@huawei.com Signed-off-by: He Kuang <hekuang@huawei.com> Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2016-02-22 16:10:33 +07:00
[ERRCODE_OFFSET(OBJCONF_MAP_NOEVT)] = "Event not found for map setting",
[ERRCODE_OFFSET(OBJCONF_MAP_MAPSIZE)] = "Invalid map size for event setting",
[ERRCODE_OFFSET(OBJCONF_MAP_EVTDIM)] = "Event dimension too large",
[ERRCODE_OFFSET(OBJCONF_MAP_EVTINH)] = "Doesn't support inherit event",
[ERRCODE_OFFSET(OBJCONF_MAP_EVTTYPE)] = "Wrong event type for map",
2016-02-22 16:10:34 +07:00
[ERRCODE_OFFSET(OBJCONF_MAP_IDX2BIG)] = "Index too large",
perf bpf: Improve BPF related error messages A series of bpf loader related error codes were introduced to help error reporting. Functions were improved to return these new error codes. Functions which return pointers were adjusted to encode error codes into return value using the ERR_PTR() interface. bpf_loader_strerror() was improved to convert these error messages to strings. It checks the error codes and calls libbpf_strerror() and strerror_r() accordingly, so caller don't need to consider checking the range of the error code. In bpf__strerror_load(), print kernel version of running kernel and the object's 'version' section to notify user how to fix his/her program. v1 -> v2: Use macro for error code. Fetch error message based on array index, eliminate for-loop. Print version strings. Before: # perf record -e ./test_kversion_nomatch_program.o sleep 1 event syntax error: './test_kversion_nomatch_program.o' \___ Failed to load program: Validate your program and check 'license'/'version' sections in your object SKIP After: # perf record -e ./test_kversion_nomatch_program.o ls event syntax error: './test_kversion_nomatch_program.o' \___ 'version' (4.4.0) doesn't match running kernel (4.3.0) SKIP Signed-off-by: Wang Nan <wangnan0@huawei.com> Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Zefan Li <lizefan@huawei.com> Cc: pi3orama@163.com Link: http://lkml.kernel.org/r/1446818289-87444-1-git-send-email-wangnan0@huawei.com [ Add 'static inline' to bpf__strerror_prepare_load() when LIBBPF is disabled ] Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2015-11-06 20:58:09 +07:00
};
bpf tools: Improve libbpf error reporting In this patch, a series of libbpf specific error numbers and libbpf_strerror() are introduced to help reporting errors. Functions are updated to pass correct the error number through the CHECK_ERR() macro. All users of bpf_object__open{_buffer}() and bpf_program__title() in perf are modified accordingly. In addition, due to the error codes changing, bpf__strerror_load() is also modified to use them. bpf__strerror_head() is also changed accordingly so it can parse libbpf errors. bpf_loader_strerror() is introduced for that purpose, and will be improved by the following patch. load_program() is improved not to dump log buffer if it is empty. log buffer is also used to deduce whether the error was caused by an invalid program or other problem. v1 -> v2: - Using macro for error code. - Fetch error message based on array index, eliminate for-loop. - Use log buffer to detect the reason of failure. 3 new error code are introduced to replace LIBBPF_ERRNO__LOAD. In v1: # perf record -e ./test_ill_program.o ls event syntax error: './test_ill_program.o' \___ Failed to load program: Validate your program and check 'license'/'version' sections in your object SKIP # perf record -e ./test_kversion_nomatch_program.o ls event syntax error: './test_kversion_nomatch_program.o' \___ Failed to load program: Validate your program and check 'license'/'version' sections in your object SKIP # perf record -e ./test_big_program.o ls event syntax error: './test_big_program.o' \___ Failed to load program: Validate your program and check 'license'/'version' sections in your object SKIP In v2: # perf record -e ./test_ill_program.o ls event syntax error: './test_ill_program.o' \___ Kernel verifier blocks program loading SKIP # perf record -e ./test_kversion_nomatch_program.o event syntax error: './test_kversion_nomatch_program.o' \___ Incorrect kernel version SKIP (Will be further improved by following patches) # perf record -e ./test_big_program.o event syntax error: './test_big_program.o' \___ Program too big SKIP Signed-off-by: Wang Nan <wangnan0@huawei.com> Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Zefan Li <lizefan@huawei.com> Cc: pi3orama@163.com Link: http://lkml.kernel.org/r/1446817783-86722-2-git-send-email-wangnan0@huawei.com Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2015-11-06 20:49:37 +07:00
static int
bpf_loader_strerror(int err, char *buf, size_t size)
{
char sbuf[STRERR_BUFSIZE];
const char *msg;
if (!buf || !size)
return -1;
err = err > 0 ? err : -err;
if (err >= __LIBBPF_ERRNO__START)
return libbpf_strerror(err, buf, size);
perf bpf: Improve BPF related error messages A series of bpf loader related error codes were introduced to help error reporting. Functions were improved to return these new error codes. Functions which return pointers were adjusted to encode error codes into return value using the ERR_PTR() interface. bpf_loader_strerror() was improved to convert these error messages to strings. It checks the error codes and calls libbpf_strerror() and strerror_r() accordingly, so caller don't need to consider checking the range of the error code. In bpf__strerror_load(), print kernel version of running kernel and the object's 'version' section to notify user how to fix his/her program. v1 -> v2: Use macro for error code. Fetch error message based on array index, eliminate for-loop. Print version strings. Before: # perf record -e ./test_kversion_nomatch_program.o sleep 1 event syntax error: './test_kversion_nomatch_program.o' \___ Failed to load program: Validate your program and check 'license'/'version' sections in your object SKIP After: # perf record -e ./test_kversion_nomatch_program.o ls event syntax error: './test_kversion_nomatch_program.o' \___ 'version' (4.4.0) doesn't match running kernel (4.3.0) SKIP Signed-off-by: Wang Nan <wangnan0@huawei.com> Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Zefan Li <lizefan@huawei.com> Cc: pi3orama@163.com Link: http://lkml.kernel.org/r/1446818289-87444-1-git-send-email-wangnan0@huawei.com [ Add 'static inline' to bpf__strerror_prepare_load() when LIBBPF is disabled ] Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2015-11-06 20:58:09 +07:00
if (err >= __BPF_LOADER_ERRNO__START && err < __BPF_LOADER_ERRNO__END) {
msg = bpf_loader_strerror_table[ERRNO_OFFSET(err)];
snprintf(buf, size, "%s", msg);
buf[size - 1] = '\0';
return 0;
}
if (err >= __BPF_LOADER_ERRNO__END)
snprintf(buf, size, "Unknown bpf loader error %d", err);
else
snprintf(buf, size, "%s",
strerror_r(err, sbuf, sizeof(sbuf)));
bpf tools: Improve libbpf error reporting In this patch, a series of libbpf specific error numbers and libbpf_strerror() are introduced to help reporting errors. Functions are updated to pass correct the error number through the CHECK_ERR() macro. All users of bpf_object__open{_buffer}() and bpf_program__title() in perf are modified accordingly. In addition, due to the error codes changing, bpf__strerror_load() is also modified to use them. bpf__strerror_head() is also changed accordingly so it can parse libbpf errors. bpf_loader_strerror() is introduced for that purpose, and will be improved by the following patch. load_program() is improved not to dump log buffer if it is empty. log buffer is also used to deduce whether the error was caused by an invalid program or other problem. v1 -> v2: - Using macro for error code. - Fetch error message based on array index, eliminate for-loop. - Use log buffer to detect the reason of failure. 3 new error code are introduced to replace LIBBPF_ERRNO__LOAD. In v1: # perf record -e ./test_ill_program.o ls event syntax error: './test_ill_program.o' \___ Failed to load program: Validate your program and check 'license'/'version' sections in your object SKIP # perf record -e ./test_kversion_nomatch_program.o ls event syntax error: './test_kversion_nomatch_program.o' \___ Failed to load program: Validate your program and check 'license'/'version' sections in your object SKIP # perf record -e ./test_big_program.o ls event syntax error: './test_big_program.o' \___ Failed to load program: Validate your program and check 'license'/'version' sections in your object SKIP In v2: # perf record -e ./test_ill_program.o ls event syntax error: './test_ill_program.o' \___ Kernel verifier blocks program loading SKIP # perf record -e ./test_kversion_nomatch_program.o event syntax error: './test_kversion_nomatch_program.o' \___ Incorrect kernel version SKIP (Will be further improved by following patches) # perf record -e ./test_big_program.o event syntax error: './test_big_program.o' \___ Program too big SKIP Signed-off-by: Wang Nan <wangnan0@huawei.com> Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Zefan Li <lizefan@huawei.com> Cc: pi3orama@163.com Link: http://lkml.kernel.org/r/1446817783-86722-2-git-send-email-wangnan0@huawei.com Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2015-11-06 20:49:37 +07:00
buf[size - 1] = '\0';
perf bpf: Improve BPF related error messages A series of bpf loader related error codes were introduced to help error reporting. Functions were improved to return these new error codes. Functions which return pointers were adjusted to encode error codes into return value using the ERR_PTR() interface. bpf_loader_strerror() was improved to convert these error messages to strings. It checks the error codes and calls libbpf_strerror() and strerror_r() accordingly, so caller don't need to consider checking the range of the error code. In bpf__strerror_load(), print kernel version of running kernel and the object's 'version' section to notify user how to fix his/her program. v1 -> v2: Use macro for error code. Fetch error message based on array index, eliminate for-loop. Print version strings. Before: # perf record -e ./test_kversion_nomatch_program.o sleep 1 event syntax error: './test_kversion_nomatch_program.o' \___ Failed to load program: Validate your program and check 'license'/'version' sections in your object SKIP After: # perf record -e ./test_kversion_nomatch_program.o ls event syntax error: './test_kversion_nomatch_program.o' \___ 'version' (4.4.0) doesn't match running kernel (4.3.0) SKIP Signed-off-by: Wang Nan <wangnan0@huawei.com> Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Zefan Li <lizefan@huawei.com> Cc: pi3orama@163.com Link: http://lkml.kernel.org/r/1446818289-87444-1-git-send-email-wangnan0@huawei.com [ Add 'static inline' to bpf__strerror_prepare_load() when LIBBPF is disabled ] Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2015-11-06 20:58:09 +07:00
return -1;
bpf tools: Improve libbpf error reporting In this patch, a series of libbpf specific error numbers and libbpf_strerror() are introduced to help reporting errors. Functions are updated to pass correct the error number through the CHECK_ERR() macro. All users of bpf_object__open{_buffer}() and bpf_program__title() in perf are modified accordingly. In addition, due to the error codes changing, bpf__strerror_load() is also modified to use them. bpf__strerror_head() is also changed accordingly so it can parse libbpf errors. bpf_loader_strerror() is introduced for that purpose, and will be improved by the following patch. load_program() is improved not to dump log buffer if it is empty. log buffer is also used to deduce whether the error was caused by an invalid program or other problem. v1 -> v2: - Using macro for error code. - Fetch error message based on array index, eliminate for-loop. - Use log buffer to detect the reason of failure. 3 new error code are introduced to replace LIBBPF_ERRNO__LOAD. In v1: # perf record -e ./test_ill_program.o ls event syntax error: './test_ill_program.o' \___ Failed to load program: Validate your program and check 'license'/'version' sections in your object SKIP # perf record -e ./test_kversion_nomatch_program.o ls event syntax error: './test_kversion_nomatch_program.o' \___ Failed to load program: Validate your program and check 'license'/'version' sections in your object SKIP # perf record -e ./test_big_program.o ls event syntax error: './test_big_program.o' \___ Failed to load program: Validate your program and check 'license'/'version' sections in your object SKIP In v2: # perf record -e ./test_ill_program.o ls event syntax error: './test_ill_program.o' \___ Kernel verifier blocks program loading SKIP # perf record -e ./test_kversion_nomatch_program.o event syntax error: './test_kversion_nomatch_program.o' \___ Incorrect kernel version SKIP (Will be further improved by following patches) # perf record -e ./test_big_program.o event syntax error: './test_big_program.o' \___ Program too big SKIP Signed-off-by: Wang Nan <wangnan0@huawei.com> Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Zefan Li <lizefan@huawei.com> Cc: pi3orama@163.com Link: http://lkml.kernel.org/r/1446817783-86722-2-git-send-email-wangnan0@huawei.com Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2015-11-06 20:49:37 +07:00
}
perf tools: Create probe points for BPF programs This patch introduces bpf__{un,}probe() functions to enable callers to create kprobe points based on section names a BPF program. It parses the section names in the program and creates corresponding 'struct perf_probe_event' structures. The parse_perf_probe_command() function is used to do the main parsing work. The resuling 'struct perf_probe_event' is stored into program private data for further using. By utilizing the new probing API, this patch creates probe points during event parsing. To ensure probe points be removed correctly, register an atexit hook so even perf quit through exit() bpf__clear() is still called, so probing points are cleared. Note that bpf_clear() should be registered before bpf__probe() is called, so failure of bpf__probe() can still trigger bpf__clear() to remove probe points which are already probed. strerror style error reporting scaffold is created by this patch. bpf__strerror_probe() is the first error reporting function in bpf-loader.c. Committer note: Trying it: To build a test eBPF object file: I am testing using a script I built from the 'perf test -v LLVM' output: $ cat ~/bin/hello-ebpf export KERNEL_INC_OPTIONS="-nostdinc -isystem /usr/lib/gcc/x86_64-redhat-linux/4.8.3/include -I/home/acme/git/linux/arch/x86/include -Iarch/x86/include/generated/uapi -Iarch/x86/include/generated -I/home/acme/git/linux/include -Iinclude -I/home/acme/git/linux/arch/x86/include/uapi -Iarch/x86/include/generated/uapi -I/home/acme/git/linux/include/uapi -Iinclude/generated/uapi -include /home/acme/git/linux/include/linux/kconfig.h" export WORKING_DIR=/lib/modules/4.2.0/build export CLANG_SOURCE=- export CLANG_OPTIONS=-xc OBJ=/tmp/foo.o rm -f $OBJ echo '__attribute__((section("fork=do_fork"), used)) int fork(void *ctx) {return 0;} char _license[] __attribute__((section("license"), used)) = "GPL";int _version __attribute__((section("version"), used)) = 0x40100;' | \ clang -D__KERNEL__ $CLANG_OPTIONS $KERNEL_INC_OPTIONS -Wno-unused-value -Wno-pointer-sign -working-directory $WORKING_DIR -c "$CLANG_SOURCE" -target bpf -O2 -o /tmp/foo.o && file $OBJ --- First asking to put a probe in a function not present in the kernel (misses the initial _): $ perf record --event /tmp/foo.o sleep 1 Probe point 'do_fork' not found. event syntax error: '/tmp/foo.o' \___ You need to check probing points in BPF file (add -v to see detail) Run 'perf list' for a list of valid events Usage: perf record [<options>] [<command>] or: perf record [<options>] -- <command> [<options>] -e, --event <event> event selector. use 'perf list' to list available events $ --- Now, with "__attribute__((section("fork=_do_fork"), used)): $ grep _do_fork /proc/kallsyms ffffffff81099ab0 T _do_fork $ perf record --event /tmp/foo.o sleep 1 Failed to open kprobe_events: Permission denied event syntax error: '/tmp/foo.o' \___ Permission denied --- Cool, we need to provide some better hints, "kprobe_events" is too low level, one doesn't strictly need to know the precise details of how these things are put in place, so something that shows the command needed to fix the permissions would be more helpful. Lets try as root instead: # perf record --event /tmp/foo.o sleep 1 Lowering default frequency rate to 1000. Please consider tweaking /proc/sys/kernel/perf_event_max_sample_rate. [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.013 MB perf.data ] # perf evlist /tmp/foo.o [root@felicio ~]# perf evlist -v /tmp/foo.o: type: 1, size: 112, config: 0x9, { sample_period, sample_freq }: 1000, sample_type: IP|TID|TIME|PERIOD, disabled: 1, inherit: 1, mmap: 1, comm: 1, freq: 1, enable_on_exec: 1, task: 1, sample_id_all: 1, exclude_guest: 1, mmap2: 1, comm_exec: 1 --- Signed-off-by: Wang Nan <wangnan0@huawei.com> Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Alexei Starovoitov <ast@plumgrid.com> Cc: Brendan Gregg <brendan.d.gregg@gmail.com> Cc: Daniel Borkmann <daniel@iogearbox.net> Cc: David Ahern <dsahern@gmail.com> Cc: He Kuang <hekuang@huawei.com> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Kaixu Xia <xiakaixu@huawei.com> Cc: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Zefan Li <lizefan@huawei.com> Cc: pi3orama@163.com Link: http://lkml.kernel.org/r/1444826502-49291-5-git-send-email-wangnan0@huawei.com Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2015-10-14 19:41:15 +07:00
#define bpf__strerror_head(err, buf, size) \
char sbuf[STRERR_BUFSIZE], *emsg;\
if (!size)\
return 0;\
if (err < 0)\
err = -err;\
bpf tools: Improve libbpf error reporting In this patch, a series of libbpf specific error numbers and libbpf_strerror() are introduced to help reporting errors. Functions are updated to pass correct the error number through the CHECK_ERR() macro. All users of bpf_object__open{_buffer}() and bpf_program__title() in perf are modified accordingly. In addition, due to the error codes changing, bpf__strerror_load() is also modified to use them. bpf__strerror_head() is also changed accordingly so it can parse libbpf errors. bpf_loader_strerror() is introduced for that purpose, and will be improved by the following patch. load_program() is improved not to dump log buffer if it is empty. log buffer is also used to deduce whether the error was caused by an invalid program or other problem. v1 -> v2: - Using macro for error code. - Fetch error message based on array index, eliminate for-loop. - Use log buffer to detect the reason of failure. 3 new error code are introduced to replace LIBBPF_ERRNO__LOAD. In v1: # perf record -e ./test_ill_program.o ls event syntax error: './test_ill_program.o' \___ Failed to load program: Validate your program and check 'license'/'version' sections in your object SKIP # perf record -e ./test_kversion_nomatch_program.o ls event syntax error: './test_kversion_nomatch_program.o' \___ Failed to load program: Validate your program and check 'license'/'version' sections in your object SKIP # perf record -e ./test_big_program.o ls event syntax error: './test_big_program.o' \___ Failed to load program: Validate your program and check 'license'/'version' sections in your object SKIP In v2: # perf record -e ./test_ill_program.o ls event syntax error: './test_ill_program.o' \___ Kernel verifier blocks program loading SKIP # perf record -e ./test_kversion_nomatch_program.o event syntax error: './test_kversion_nomatch_program.o' \___ Incorrect kernel version SKIP (Will be further improved by following patches) # perf record -e ./test_big_program.o event syntax error: './test_big_program.o' \___ Program too big SKIP Signed-off-by: Wang Nan <wangnan0@huawei.com> Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Zefan Li <lizefan@huawei.com> Cc: pi3orama@163.com Link: http://lkml.kernel.org/r/1446817783-86722-2-git-send-email-wangnan0@huawei.com Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2015-11-06 20:49:37 +07:00
bpf_loader_strerror(err, sbuf, sizeof(sbuf));\
emsg = sbuf;\
perf tools: Create probe points for BPF programs This patch introduces bpf__{un,}probe() functions to enable callers to create kprobe points based on section names a BPF program. It parses the section names in the program and creates corresponding 'struct perf_probe_event' structures. The parse_perf_probe_command() function is used to do the main parsing work. The resuling 'struct perf_probe_event' is stored into program private data for further using. By utilizing the new probing API, this patch creates probe points during event parsing. To ensure probe points be removed correctly, register an atexit hook so even perf quit through exit() bpf__clear() is still called, so probing points are cleared. Note that bpf_clear() should be registered before bpf__probe() is called, so failure of bpf__probe() can still trigger bpf__clear() to remove probe points which are already probed. strerror style error reporting scaffold is created by this patch. bpf__strerror_probe() is the first error reporting function in bpf-loader.c. Committer note: Trying it: To build a test eBPF object file: I am testing using a script I built from the 'perf test -v LLVM' output: $ cat ~/bin/hello-ebpf export KERNEL_INC_OPTIONS="-nostdinc -isystem /usr/lib/gcc/x86_64-redhat-linux/4.8.3/include -I/home/acme/git/linux/arch/x86/include -Iarch/x86/include/generated/uapi -Iarch/x86/include/generated -I/home/acme/git/linux/include -Iinclude -I/home/acme/git/linux/arch/x86/include/uapi -Iarch/x86/include/generated/uapi -I/home/acme/git/linux/include/uapi -Iinclude/generated/uapi -include /home/acme/git/linux/include/linux/kconfig.h" export WORKING_DIR=/lib/modules/4.2.0/build export CLANG_SOURCE=- export CLANG_OPTIONS=-xc OBJ=/tmp/foo.o rm -f $OBJ echo '__attribute__((section("fork=do_fork"), used)) int fork(void *ctx) {return 0;} char _license[] __attribute__((section("license"), used)) = "GPL";int _version __attribute__((section("version"), used)) = 0x40100;' | \ clang -D__KERNEL__ $CLANG_OPTIONS $KERNEL_INC_OPTIONS -Wno-unused-value -Wno-pointer-sign -working-directory $WORKING_DIR -c "$CLANG_SOURCE" -target bpf -O2 -o /tmp/foo.o && file $OBJ --- First asking to put a probe in a function not present in the kernel (misses the initial _): $ perf record --event /tmp/foo.o sleep 1 Probe point 'do_fork' not found. event syntax error: '/tmp/foo.o' \___ You need to check probing points in BPF file (add -v to see detail) Run 'perf list' for a list of valid events Usage: perf record [<options>] [<command>] or: perf record [<options>] -- <command> [<options>] -e, --event <event> event selector. use 'perf list' to list available events $ --- Now, with "__attribute__((section("fork=_do_fork"), used)): $ grep _do_fork /proc/kallsyms ffffffff81099ab0 T _do_fork $ perf record --event /tmp/foo.o sleep 1 Failed to open kprobe_events: Permission denied event syntax error: '/tmp/foo.o' \___ Permission denied --- Cool, we need to provide some better hints, "kprobe_events" is too low level, one doesn't strictly need to know the precise details of how these things are put in place, so something that shows the command needed to fix the permissions would be more helpful. Lets try as root instead: # perf record --event /tmp/foo.o sleep 1 Lowering default frequency rate to 1000. Please consider tweaking /proc/sys/kernel/perf_event_max_sample_rate. [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.013 MB perf.data ] # perf evlist /tmp/foo.o [root@felicio ~]# perf evlist -v /tmp/foo.o: type: 1, size: 112, config: 0x9, { sample_period, sample_freq }: 1000, sample_type: IP|TID|TIME|PERIOD, disabled: 1, inherit: 1, mmap: 1, comm: 1, freq: 1, enable_on_exec: 1, task: 1, sample_id_all: 1, exclude_guest: 1, mmap2: 1, comm_exec: 1 --- Signed-off-by: Wang Nan <wangnan0@huawei.com> Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Alexei Starovoitov <ast@plumgrid.com> Cc: Brendan Gregg <brendan.d.gregg@gmail.com> Cc: Daniel Borkmann <daniel@iogearbox.net> Cc: David Ahern <dsahern@gmail.com> Cc: He Kuang <hekuang@huawei.com> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Kaixu Xia <xiakaixu@huawei.com> Cc: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Zefan Li <lizefan@huawei.com> Cc: pi3orama@163.com Link: http://lkml.kernel.org/r/1444826502-49291-5-git-send-email-wangnan0@huawei.com Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2015-10-14 19:41:15 +07:00
switch (err) {\
default:\
scnprintf(buf, size, "%s", emsg);\
break;
#define bpf__strerror_entry(val, fmt...)\
case val: {\
scnprintf(buf, size, fmt);\
break;\
}
#define bpf__strerror_end(buf, size)\
}\
buf[size - 1] = '\0';
perf bpf: Improve BPF related error messages A series of bpf loader related error codes were introduced to help error reporting. Functions were improved to return these new error codes. Functions which return pointers were adjusted to encode error codes into return value using the ERR_PTR() interface. bpf_loader_strerror() was improved to convert these error messages to strings. It checks the error codes and calls libbpf_strerror() and strerror_r() accordingly, so caller don't need to consider checking the range of the error code. In bpf__strerror_load(), print kernel version of running kernel and the object's 'version' section to notify user how to fix his/her program. v1 -> v2: Use macro for error code. Fetch error message based on array index, eliminate for-loop. Print version strings. Before: # perf record -e ./test_kversion_nomatch_program.o sleep 1 event syntax error: './test_kversion_nomatch_program.o' \___ Failed to load program: Validate your program and check 'license'/'version' sections in your object SKIP After: # perf record -e ./test_kversion_nomatch_program.o ls event syntax error: './test_kversion_nomatch_program.o' \___ 'version' (4.4.0) doesn't match running kernel (4.3.0) SKIP Signed-off-by: Wang Nan <wangnan0@huawei.com> Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Zefan Li <lizefan@huawei.com> Cc: pi3orama@163.com Link: http://lkml.kernel.org/r/1446818289-87444-1-git-send-email-wangnan0@huawei.com [ Add 'static inline' to bpf__strerror_prepare_load() when LIBBPF is disabled ] Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2015-11-06 20:58:09 +07:00
int bpf__strerror_prepare_load(const char *filename, bool source,
int err, char *buf, size_t size)
{
size_t n;
int ret;
n = snprintf(buf, size, "Failed to load %s%s: ",
filename, source ? " from source" : "");
if (n >= size) {
buf[size - 1] = '\0';
return 0;
}
buf += n;
size -= n;
ret = bpf_loader_strerror(err, buf, size);
buf[size - 1] = '\0';
return ret;
}
perf tools: Create probe points for BPF programs This patch introduces bpf__{un,}probe() functions to enable callers to create kprobe points based on section names a BPF program. It parses the section names in the program and creates corresponding 'struct perf_probe_event' structures. The parse_perf_probe_command() function is used to do the main parsing work. The resuling 'struct perf_probe_event' is stored into program private data for further using. By utilizing the new probing API, this patch creates probe points during event parsing. To ensure probe points be removed correctly, register an atexit hook so even perf quit through exit() bpf__clear() is still called, so probing points are cleared. Note that bpf_clear() should be registered before bpf__probe() is called, so failure of bpf__probe() can still trigger bpf__clear() to remove probe points which are already probed. strerror style error reporting scaffold is created by this patch. bpf__strerror_probe() is the first error reporting function in bpf-loader.c. Committer note: Trying it: To build a test eBPF object file: I am testing using a script I built from the 'perf test -v LLVM' output: $ cat ~/bin/hello-ebpf export KERNEL_INC_OPTIONS="-nostdinc -isystem /usr/lib/gcc/x86_64-redhat-linux/4.8.3/include -I/home/acme/git/linux/arch/x86/include -Iarch/x86/include/generated/uapi -Iarch/x86/include/generated -I/home/acme/git/linux/include -Iinclude -I/home/acme/git/linux/arch/x86/include/uapi -Iarch/x86/include/generated/uapi -I/home/acme/git/linux/include/uapi -Iinclude/generated/uapi -include /home/acme/git/linux/include/linux/kconfig.h" export WORKING_DIR=/lib/modules/4.2.0/build export CLANG_SOURCE=- export CLANG_OPTIONS=-xc OBJ=/tmp/foo.o rm -f $OBJ echo '__attribute__((section("fork=do_fork"), used)) int fork(void *ctx) {return 0;} char _license[] __attribute__((section("license"), used)) = "GPL";int _version __attribute__((section("version"), used)) = 0x40100;' | \ clang -D__KERNEL__ $CLANG_OPTIONS $KERNEL_INC_OPTIONS -Wno-unused-value -Wno-pointer-sign -working-directory $WORKING_DIR -c "$CLANG_SOURCE" -target bpf -O2 -o /tmp/foo.o && file $OBJ --- First asking to put a probe in a function not present in the kernel (misses the initial _): $ perf record --event /tmp/foo.o sleep 1 Probe point 'do_fork' not found. event syntax error: '/tmp/foo.o' \___ You need to check probing points in BPF file (add -v to see detail) Run 'perf list' for a list of valid events Usage: perf record [<options>] [<command>] or: perf record [<options>] -- <command> [<options>] -e, --event <event> event selector. use 'perf list' to list available events $ --- Now, with "__attribute__((section("fork=_do_fork"), used)): $ grep _do_fork /proc/kallsyms ffffffff81099ab0 T _do_fork $ perf record --event /tmp/foo.o sleep 1 Failed to open kprobe_events: Permission denied event syntax error: '/tmp/foo.o' \___ Permission denied --- Cool, we need to provide some better hints, "kprobe_events" is too low level, one doesn't strictly need to know the precise details of how these things are put in place, so something that shows the command needed to fix the permissions would be more helpful. Lets try as root instead: # perf record --event /tmp/foo.o sleep 1 Lowering default frequency rate to 1000. Please consider tweaking /proc/sys/kernel/perf_event_max_sample_rate. [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.013 MB perf.data ] # perf evlist /tmp/foo.o [root@felicio ~]# perf evlist -v /tmp/foo.o: type: 1, size: 112, config: 0x9, { sample_period, sample_freq }: 1000, sample_type: IP|TID|TIME|PERIOD, disabled: 1, inherit: 1, mmap: 1, comm: 1, freq: 1, enable_on_exec: 1, task: 1, sample_id_all: 1, exclude_guest: 1, mmap2: 1, comm_exec: 1 --- Signed-off-by: Wang Nan <wangnan0@huawei.com> Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Alexei Starovoitov <ast@plumgrid.com> Cc: Brendan Gregg <brendan.d.gregg@gmail.com> Cc: Daniel Borkmann <daniel@iogearbox.net> Cc: David Ahern <dsahern@gmail.com> Cc: He Kuang <hekuang@huawei.com> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Kaixu Xia <xiakaixu@huawei.com> Cc: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Zefan Li <lizefan@huawei.com> Cc: pi3orama@163.com Link: http://lkml.kernel.org/r/1444826502-49291-5-git-send-email-wangnan0@huawei.com Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2015-10-14 19:41:15 +07:00
int bpf__strerror_probe(struct bpf_object *obj __maybe_unused,
int err, char *buf, size_t size)
{
bpf__strerror_head(err, buf, size);
case BPF_LOADER_ERRNO__PROGCONF_TERM: {
scnprintf(buf, size, "%s (add -v to see detail)", emsg);
break;
}
perf bpf: Allow BPF program config probing options By extending the syntax of BPF object section names, this patch allows users to config probing options like what they can do in 'perf probe'. The error message in 'perf probe' is also updated. Test result: For following BPF file test_probe_glob.c: # cat test_probe_glob.c __attribute__((section("inlines=no;func=SyS_dup?"), used)) int func(void *ctx) { return 1; } char _license[] __attribute__((section("license"), used)) = "GPL"; int _version __attribute__((section("version"), used)) = 0x40300; # # ./perf record -e ./test_probe_glob.c ls / ... [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.013 MB perf.data ] # ./perf evlist perf_bpf_probe:func_1 perf_bpf_probe:func After changing "inlines=no" to "inlines=yes": # ./perf record -e ./test_probe_glob.c ls / ... [ perf record: Woken up 2 times to write data ] [ perf record: Captured and wrote 0.013 MB perf.data ] # ./perf evlist perf_bpf_probe:func_3 perf_bpf_probe:func_2 perf_bpf_probe:func_1 perf_bpf_probe:func Then test 'force': Use following program: # cat test_probe_force.c __attribute__((section("func=sys_write"), used)) int funca(void *ctx) { return 1; } __attribute__((section("force=yes;func=sys_write"), used)) int funcb(void *ctx) { return 1; } char _license[] __attribute__((section("license"), used)) = "GPL"; int _version __attribute__((section("version"), used)) = 0x40300; # # perf record -e ./test_probe_force.c usleep 1 Error: event "func" already exists. Hint: Remove existing event by 'perf probe -d' or force duplicates by 'perf probe -f' or set 'force=yes' in BPF source. event syntax error: './test_probe_force.c' \___ Probe point exist. Try 'perf probe -d "*"' and set 'force=yes' (add -v to see detail) ... Then replace 'force=no' to 'force=yes': # vim test_probe_force.c # perf record -e ./test_probe_force.c usleep 1 [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.017 MB perf.data ] # perf evlist perf_bpf_probe:func_1 perf_bpf_probe:func # Signed-off-by: Wang Nan <wangnan0@huawei.com> Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Alexei Starovoitov <ast@kernel.org> Cc: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com> Cc: Zefan Li <lizefan@huawei.com> Cc: pi3orama@163.com Link: http://lkml.kernel.org/r/1447675815-166222-7-git-send-email-wangnan0@huawei.com Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2015-11-16 19:10:08 +07:00
bpf__strerror_entry(EEXIST, "Probe point exist. Try 'perf probe -d \"*\"' and set 'force=yes'");
perf bpf: Improve BPF related error messages A series of bpf loader related error codes were introduced to help error reporting. Functions were improved to return these new error codes. Functions which return pointers were adjusted to encode error codes into return value using the ERR_PTR() interface. bpf_loader_strerror() was improved to convert these error messages to strings. It checks the error codes and calls libbpf_strerror() and strerror_r() accordingly, so caller don't need to consider checking the range of the error code. In bpf__strerror_load(), print kernel version of running kernel and the object's 'version' section to notify user how to fix his/her program. v1 -> v2: Use macro for error code. Fetch error message based on array index, eliminate for-loop. Print version strings. Before: # perf record -e ./test_kversion_nomatch_program.o sleep 1 event syntax error: './test_kversion_nomatch_program.o' \___ Failed to load program: Validate your program and check 'license'/'version' sections in your object SKIP After: # perf record -e ./test_kversion_nomatch_program.o ls event syntax error: './test_kversion_nomatch_program.o' \___ 'version' (4.4.0) doesn't match running kernel (4.3.0) SKIP Signed-off-by: Wang Nan <wangnan0@huawei.com> Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Zefan Li <lizefan@huawei.com> Cc: pi3orama@163.com Link: http://lkml.kernel.org/r/1446818289-87444-1-git-send-email-wangnan0@huawei.com [ Add 'static inline' to bpf__strerror_prepare_load() when LIBBPF is disabled ] Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2015-11-06 20:58:09 +07:00
bpf__strerror_entry(EACCES, "You need to be root");
bpf__strerror_entry(EPERM, "You need to be root, and /proc/sys/kernel/kptr_restrict should be 0");
bpf__strerror_entry(ENOENT, "You need to check probing points in BPF file");
perf tools: Create probe points for BPF programs This patch introduces bpf__{un,}probe() functions to enable callers to create kprobe points based on section names a BPF program. It parses the section names in the program and creates corresponding 'struct perf_probe_event' structures. The parse_perf_probe_command() function is used to do the main parsing work. The resuling 'struct perf_probe_event' is stored into program private data for further using. By utilizing the new probing API, this patch creates probe points during event parsing. To ensure probe points be removed correctly, register an atexit hook so even perf quit through exit() bpf__clear() is still called, so probing points are cleared. Note that bpf_clear() should be registered before bpf__probe() is called, so failure of bpf__probe() can still trigger bpf__clear() to remove probe points which are already probed. strerror style error reporting scaffold is created by this patch. bpf__strerror_probe() is the first error reporting function in bpf-loader.c. Committer note: Trying it: To build a test eBPF object file: I am testing using a script I built from the 'perf test -v LLVM' output: $ cat ~/bin/hello-ebpf export KERNEL_INC_OPTIONS="-nostdinc -isystem /usr/lib/gcc/x86_64-redhat-linux/4.8.3/include -I/home/acme/git/linux/arch/x86/include -Iarch/x86/include/generated/uapi -Iarch/x86/include/generated -I/home/acme/git/linux/include -Iinclude -I/home/acme/git/linux/arch/x86/include/uapi -Iarch/x86/include/generated/uapi -I/home/acme/git/linux/include/uapi -Iinclude/generated/uapi -include /home/acme/git/linux/include/linux/kconfig.h" export WORKING_DIR=/lib/modules/4.2.0/build export CLANG_SOURCE=- export CLANG_OPTIONS=-xc OBJ=/tmp/foo.o rm -f $OBJ echo '__attribute__((section("fork=do_fork"), used)) int fork(void *ctx) {return 0;} char _license[] __attribute__((section("license"), used)) = "GPL";int _version __attribute__((section("version"), used)) = 0x40100;' | \ clang -D__KERNEL__ $CLANG_OPTIONS $KERNEL_INC_OPTIONS -Wno-unused-value -Wno-pointer-sign -working-directory $WORKING_DIR -c "$CLANG_SOURCE" -target bpf -O2 -o /tmp/foo.o && file $OBJ --- First asking to put a probe in a function not present in the kernel (misses the initial _): $ perf record --event /tmp/foo.o sleep 1 Probe point 'do_fork' not found. event syntax error: '/tmp/foo.o' \___ You need to check probing points in BPF file (add -v to see detail) Run 'perf list' for a list of valid events Usage: perf record [<options>] [<command>] or: perf record [<options>] -- <command> [<options>] -e, --event <event> event selector. use 'perf list' to list available events $ --- Now, with "__attribute__((section("fork=_do_fork"), used)): $ grep _do_fork /proc/kallsyms ffffffff81099ab0 T _do_fork $ perf record --event /tmp/foo.o sleep 1 Failed to open kprobe_events: Permission denied event syntax error: '/tmp/foo.o' \___ Permission denied --- Cool, we need to provide some better hints, "kprobe_events" is too low level, one doesn't strictly need to know the precise details of how these things are put in place, so something that shows the command needed to fix the permissions would be more helpful. Lets try as root instead: # perf record --event /tmp/foo.o sleep 1 Lowering default frequency rate to 1000. Please consider tweaking /proc/sys/kernel/perf_event_max_sample_rate. [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.013 MB perf.data ] # perf evlist /tmp/foo.o [root@felicio ~]# perf evlist -v /tmp/foo.o: type: 1, size: 112, config: 0x9, { sample_period, sample_freq }: 1000, sample_type: IP|TID|TIME|PERIOD, disabled: 1, inherit: 1, mmap: 1, comm: 1, freq: 1, enable_on_exec: 1, task: 1, sample_id_all: 1, exclude_guest: 1, mmap2: 1, comm_exec: 1 --- Signed-off-by: Wang Nan <wangnan0@huawei.com> Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Alexei Starovoitov <ast@plumgrid.com> Cc: Brendan Gregg <brendan.d.gregg@gmail.com> Cc: Daniel Borkmann <daniel@iogearbox.net> Cc: David Ahern <dsahern@gmail.com> Cc: He Kuang <hekuang@huawei.com> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Kaixu Xia <xiakaixu@huawei.com> Cc: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Zefan Li <lizefan@huawei.com> Cc: pi3orama@163.com Link: http://lkml.kernel.org/r/1444826502-49291-5-git-send-email-wangnan0@huawei.com Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2015-10-14 19:41:15 +07:00
bpf__strerror_end(buf, size);
return 0;
}
perf tools: Load eBPF object into kernel This patch utilizes bpf_object__load() provided by libbpf to load all objects into kernel. Committer notes: Testing it: When using an incorrect kernel version number, i.e., having this in your eBPF proggie: int _version __attribute__((section("version"), used)) = 0x40100; For a 4.3.0-rc6+ kernel, say, this happens and needs checking at event parsing time, to provide a better error report to the user: # perf record --event /tmp/foo.o sleep 1 libbpf: load bpf program failed: Invalid argument libbpf: -- BEGIN DUMP LOG --- libbpf: libbpf: -- END LOG -- libbpf: failed to load program 'fork=_do_fork' libbpf: failed to load object '/tmp/foo.o' event syntax error: '/tmp/foo.o' \___ Invalid argument: Are you root and runing a CONFIG_BPF_SYSCALL kernel? (add -v to see detail) Run 'perf list' for a list of valid events Usage: perf record [<options>] [<command>] or: perf record [<options>] -- <command> [<options>] -e, --event <event> event selector. use 'perf list' to list available events If we instead make it match, i.e. use 0x40300 on this v4.3.0-rc6+ kernel, the whole process goes thru: # perf record --event /tmp/foo.o -a usleep 1 [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.202 MB perf.data ] # perf evlist -v /tmp/foo.o: type: 1, size: 112, config: 0x9, { sample_period, sample_freq }: 4000, sample_type: IP|TID|TIME|CPU|PERIOD, disabled: 1, inherit: 1, mmap: 1, comm: 1, freq: 1, task: 1, sample_id_all: 1, exclude_guest: 1, mmap2: 1, comm_exec: 1 # Signed-off-by: Wang Nan <wangnan0@huawei.com> Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Alexei Starovoitov <ast@plumgrid.com> Cc: Brendan Gregg <brendan.d.gregg@gmail.com> Cc: Daniel Borkmann <daniel@iogearbox.net> Cc: David Ahern <dsahern@gmail.com> Cc: He Kuang <hekuang@huawei.com> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Kaixu Xia <xiakaixu@huawei.com> Cc: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Paul Mackerras <paulus@samba.org> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Zefan Li <lizefan@huawei.com> Cc: pi3orama@163.com Link: http://lkml.kernel.org/r/1444826502-49291-6-git-send-email-wangnan0@huawei.com Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2015-10-14 19:41:16 +07:00
perf bpf: Improve BPF related error messages A series of bpf loader related error codes were introduced to help error reporting. Functions were improved to return these new error codes. Functions which return pointers were adjusted to encode error codes into return value using the ERR_PTR() interface. bpf_loader_strerror() was improved to convert these error messages to strings. It checks the error codes and calls libbpf_strerror() and strerror_r() accordingly, so caller don't need to consider checking the range of the error code. In bpf__strerror_load(), print kernel version of running kernel and the object's 'version' section to notify user how to fix his/her program. v1 -> v2: Use macro for error code. Fetch error message based on array index, eliminate for-loop. Print version strings. Before: # perf record -e ./test_kversion_nomatch_program.o sleep 1 event syntax error: './test_kversion_nomatch_program.o' \___ Failed to load program: Validate your program and check 'license'/'version' sections in your object SKIP After: # perf record -e ./test_kversion_nomatch_program.o ls event syntax error: './test_kversion_nomatch_program.o' \___ 'version' (4.4.0) doesn't match running kernel (4.3.0) SKIP Signed-off-by: Wang Nan <wangnan0@huawei.com> Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Zefan Li <lizefan@huawei.com> Cc: pi3orama@163.com Link: http://lkml.kernel.org/r/1446818289-87444-1-git-send-email-wangnan0@huawei.com [ Add 'static inline' to bpf__strerror_prepare_load() when LIBBPF is disabled ] Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2015-11-06 20:58:09 +07:00
int bpf__strerror_load(struct bpf_object *obj,
perf tools: Load eBPF object into kernel This patch utilizes bpf_object__load() provided by libbpf to load all objects into kernel. Committer notes: Testing it: When using an incorrect kernel version number, i.e., having this in your eBPF proggie: int _version __attribute__((section("version"), used)) = 0x40100; For a 4.3.0-rc6+ kernel, say, this happens and needs checking at event parsing time, to provide a better error report to the user: # perf record --event /tmp/foo.o sleep 1 libbpf: load bpf program failed: Invalid argument libbpf: -- BEGIN DUMP LOG --- libbpf: libbpf: -- END LOG -- libbpf: failed to load program 'fork=_do_fork' libbpf: failed to load object '/tmp/foo.o' event syntax error: '/tmp/foo.o' \___ Invalid argument: Are you root and runing a CONFIG_BPF_SYSCALL kernel? (add -v to see detail) Run 'perf list' for a list of valid events Usage: perf record [<options>] [<command>] or: perf record [<options>] -- <command> [<options>] -e, --event <event> event selector. use 'perf list' to list available events If we instead make it match, i.e. use 0x40300 on this v4.3.0-rc6+ kernel, the whole process goes thru: # perf record --event /tmp/foo.o -a usleep 1 [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.202 MB perf.data ] # perf evlist -v /tmp/foo.o: type: 1, size: 112, config: 0x9, { sample_period, sample_freq }: 4000, sample_type: IP|TID|TIME|CPU|PERIOD, disabled: 1, inherit: 1, mmap: 1, comm: 1, freq: 1, task: 1, sample_id_all: 1, exclude_guest: 1, mmap2: 1, comm_exec: 1 # Signed-off-by: Wang Nan <wangnan0@huawei.com> Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Alexei Starovoitov <ast@plumgrid.com> Cc: Brendan Gregg <brendan.d.gregg@gmail.com> Cc: Daniel Borkmann <daniel@iogearbox.net> Cc: David Ahern <dsahern@gmail.com> Cc: He Kuang <hekuang@huawei.com> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Kaixu Xia <xiakaixu@huawei.com> Cc: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Paul Mackerras <paulus@samba.org> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Zefan Li <lizefan@huawei.com> Cc: pi3orama@163.com Link: http://lkml.kernel.org/r/1444826502-49291-6-git-send-email-wangnan0@huawei.com Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2015-10-14 19:41:16 +07:00
int err, char *buf, size_t size)
{
bpf__strerror_head(err, buf, size);
perf bpf: Improve BPF related error messages A series of bpf loader related error codes were introduced to help error reporting. Functions were improved to return these new error codes. Functions which return pointers were adjusted to encode error codes into return value using the ERR_PTR() interface. bpf_loader_strerror() was improved to convert these error messages to strings. It checks the error codes and calls libbpf_strerror() and strerror_r() accordingly, so caller don't need to consider checking the range of the error code. In bpf__strerror_load(), print kernel version of running kernel and the object's 'version' section to notify user how to fix his/her program. v1 -> v2: Use macro for error code. Fetch error message based on array index, eliminate for-loop. Print version strings. Before: # perf record -e ./test_kversion_nomatch_program.o sleep 1 event syntax error: './test_kversion_nomatch_program.o' \___ Failed to load program: Validate your program and check 'license'/'version' sections in your object SKIP After: # perf record -e ./test_kversion_nomatch_program.o ls event syntax error: './test_kversion_nomatch_program.o' \___ 'version' (4.4.0) doesn't match running kernel (4.3.0) SKIP Signed-off-by: Wang Nan <wangnan0@huawei.com> Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Zefan Li <lizefan@huawei.com> Cc: pi3orama@163.com Link: http://lkml.kernel.org/r/1446818289-87444-1-git-send-email-wangnan0@huawei.com [ Add 'static inline' to bpf__strerror_prepare_load() when LIBBPF is disabled ] Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2015-11-06 20:58:09 +07:00
case LIBBPF_ERRNO__KVER: {
unsigned int obj_kver = bpf_object__get_kversion(obj);
unsigned int real_kver;
if (fetch_kernel_version(&real_kver, NULL, 0)) {
scnprintf(buf, size, "Unable to fetch kernel version");
break;
}
if (obj_kver != real_kver) {
scnprintf(buf, size,
"'version' ("KVER_FMT") doesn't match running kernel ("KVER_FMT")",
KVER_PARAM(obj_kver),
KVER_PARAM(real_kver));
break;
}
scnprintf(buf, size, "Failed to load program for unknown reason");
break;
}
perf tools: Load eBPF object into kernel This patch utilizes bpf_object__load() provided by libbpf to load all objects into kernel. Committer notes: Testing it: When using an incorrect kernel version number, i.e., having this in your eBPF proggie: int _version __attribute__((section("version"), used)) = 0x40100; For a 4.3.0-rc6+ kernel, say, this happens and needs checking at event parsing time, to provide a better error report to the user: # perf record --event /tmp/foo.o sleep 1 libbpf: load bpf program failed: Invalid argument libbpf: -- BEGIN DUMP LOG --- libbpf: libbpf: -- END LOG -- libbpf: failed to load program 'fork=_do_fork' libbpf: failed to load object '/tmp/foo.o' event syntax error: '/tmp/foo.o' \___ Invalid argument: Are you root and runing a CONFIG_BPF_SYSCALL kernel? (add -v to see detail) Run 'perf list' for a list of valid events Usage: perf record [<options>] [<command>] or: perf record [<options>] -- <command> [<options>] -e, --event <event> event selector. use 'perf list' to list available events If we instead make it match, i.e. use 0x40300 on this v4.3.0-rc6+ kernel, the whole process goes thru: # perf record --event /tmp/foo.o -a usleep 1 [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.202 MB perf.data ] # perf evlist -v /tmp/foo.o: type: 1, size: 112, config: 0x9, { sample_period, sample_freq }: 4000, sample_type: IP|TID|TIME|CPU|PERIOD, disabled: 1, inherit: 1, mmap: 1, comm: 1, freq: 1, task: 1, sample_id_all: 1, exclude_guest: 1, mmap2: 1, comm_exec: 1 # Signed-off-by: Wang Nan <wangnan0@huawei.com> Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Alexei Starovoitov <ast@plumgrid.com> Cc: Brendan Gregg <brendan.d.gregg@gmail.com> Cc: Daniel Borkmann <daniel@iogearbox.net> Cc: David Ahern <dsahern@gmail.com> Cc: He Kuang <hekuang@huawei.com> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Kaixu Xia <xiakaixu@huawei.com> Cc: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Paul Mackerras <paulus@samba.org> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Zefan Li <lizefan@huawei.com> Cc: pi3orama@163.com Link: http://lkml.kernel.org/r/1444826502-49291-6-git-send-email-wangnan0@huawei.com Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2015-10-14 19:41:16 +07:00
bpf__strerror_end(buf, size);
return 0;
}
perf bpf: Add API to set values to map entries in a bpf object bpf__config_obj() is introduced as a core API to config BPF object after loading. One configuration option of maps is introduced. After this patch BPF object can accept assignments like: map:my_map.value=1234 (map.my_map.value looks pretty. However, there's a small but hard to fix problem related to flex's greedy matching. Please see [1]. Choose ':' to avoid it in a simpler way.) This patch is more complex than the work it does because the consideration of extension. In designing BPF map configuration, the following things should be considered: 1. Array indices selection: perf should allow user setting different value for different slots in an array, with syntax like: map:my_map.value[0,3...6]=1234; 2. A map should be set by different config terms, each for a part of it. For example, set each slot to the pid of a thread; 3. Type of value: integer is not the only valid value type. A perf counter can also be put into a map after commit 35578d798400 ("bpf: Implement function bpf_perf_event_read() that get the selected hardware PMU counter") 4. For a hash table, it should be possible to use a string or other value as a key; 5. It is possible that map configuration is unable to be setup during parsing. A perf counter is an example. Therefore, this patch does the following: 1. Instead of updating map element during parsing, this patch stores map config options in 'struct bpf_map_priv'. Following patches will apply those configs at an appropriate time; 2. Link map operations in a list so a map can have multiple config terms attached, so different parts can be configured separately; 3. Make 'struct bpf_map_priv' extensible so that the following patches can add new types of keys and operations; 4. Use bpf_obj_config__map_funcs array to support more map config options. Since the patch changing the event parser to parse BPF object config is relative large, I've put it in another commit. Code in this patch can be tested after applying the next patch. [1] http://lkml.kernel.org/g/564ED621.4050500@huawei.com Signed-off-by: Wang Nan <wangnan0@huawei.com> Cc: Adrian Hunter <adrian.hunter@intel.com> Cc: Alexei Starovoitov <ast@kernel.org> Cc: Brendan Gregg <brendan.d.gregg@gmail.com> Cc: Cody P Schafer <dev@codyps.com> Cc: He Kuang <hekuang@huawei.com> Cc: Jeremie Galarneau <jeremie.galarneau@efficios.com> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Kirill Smelkov <kirr@nexedi.com> Cc: Li Zefan <lizefan@huawei.com> Cc: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Zefan Li <lizefan@huawei.com> Cc: pi3orama@163.com Link: http://lkml.kernel.org/r/1456132275-98875-4-git-send-email-wangnan0@huawei.com Signed-off-by: He Kuang <hekuang@huawei.com> [ Changes "maps:my_map.value" to "map:my_map.value", improved error messages ] Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2016-02-22 16:10:30 +07:00
int bpf__strerror_config_obj(struct bpf_object *obj __maybe_unused,
struct parse_events_term *term __maybe_unused,
struct perf_evlist *evlist __maybe_unused,
int *error_pos __maybe_unused, int err,
char *buf, size_t size)
{
bpf__strerror_head(err, buf, size);
bpf__strerror_entry(BPF_LOADER_ERRNO__OBJCONF_MAP_TYPE,
"Can't use this config term with this map type");
bpf__strerror_end(buf, size);
return 0;
}
perf record: Apply config to BPF objects before recording bpf__apply_obj_config() is introduced as the core API to apply object config options to all BPF objects. This patch also does the real work for setting values for BPF_MAP_TYPE_PERF_ARRAY maps by inserting value stored in map's private field into the BPF map. This patch is required because we are not always able to set all BPF config during parsing. Further patch will set events created by perf to BPF_MAP_TYPE_PERF_EVENT_ARRAY maps, which is not exist until perf_evsel__open(). bpf_map_foreach_key() is introduced to iterate over each key needs to be configured. This function would be extended to support more map types and different key settings. In perf record, before start recording, call bpf__apply_config() to turn on all BPF config options. Test result: # cat ./test_bpf_map_1.c /************************ BEGIN **************************/ #include <uapi/linux/bpf.h> #define SEC(NAME) __attribute__((section(NAME), used)) struct bpf_map_def { unsigned int type; unsigned int key_size; unsigned int value_size; unsigned int max_entries; }; static void *(*map_lookup_elem)(struct bpf_map_def *, void *) = (void *)BPF_FUNC_map_lookup_elem; static int (*trace_printk)(const char *fmt, int fmt_size, ...) = (void *)BPF_FUNC_trace_printk; struct bpf_map_def SEC("maps") channel = { .type = BPF_MAP_TYPE_ARRAY, .key_size = sizeof(int), .value_size = sizeof(int), .max_entries = 1, }; SEC("func=sys_nanosleep") int func(void *ctx) { int key = 0; char fmt[] = "%d\n"; int *pval = map_lookup_elem(&channel, &key); if (!pval) return 0; trace_printk(fmt, sizeof(fmt), *pval); return 0; } char _license[] SEC("license") = "GPL"; int _version SEC("version") = LINUX_VERSION_CODE; /************************* END ***************************/ # echo "" > /sys/kernel/debug/tracing/trace # ./perf record -e './test_bpf_map_1.c/map:channel.value=11/' usleep 10 [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.012 MB perf.data ] # cat /sys/kernel/debug/tracing/trace # tracer: nop # # entries-in-buffer/entries-written: 1/1 #P:8 [SNIP] # TASK-PID CPU# |||| TIMESTAMP FUNCTION # | | | |||| | | usleep-18593 [007] d... 2394714.395539: : 11 # ./perf record -e './test_bpf_map_1.c/map:channel.value=101/' usleep 10 [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.012 MB perf.data ] # cat /sys/kernel/debug/tracing/trace # tracer: nop # # entries-in-buffer/entries-written: 1/1 #P:8 [SNIP] # TASK-PID CPU# |||| TIMESTAMP FUNCTION # | | | |||| | | usleep-18593 [007] d... 2394714.395539: : 11 usleep-19000 [006] d... 2394831.057840: : 101 Signed-off-by: Wang Nan <wangnan0@huawei.com> Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Adrian Hunter <adrian.hunter@intel.com> Cc: Alexei Starovoitov <ast@kernel.org> Cc: Brendan Gregg <brendan.d.gregg@gmail.com> Cc: Cody P Schafer <dev@codyps.com> Cc: He Kuang <hekuang@huawei.com> Cc: Jeremie Galarneau <jeremie.galarneau@efficios.com> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Kirill Smelkov <kirr@nexedi.com> Cc: Li Zefan <lizefan@huawei.com> Cc: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Zefan Li <lizefan@huawei.com> Cc: pi3orama@163.com Link: http://lkml.kernel.org/r/1456132275-98875-6-git-send-email-wangnan0@huawei.com Signed-off-by: He Kuang <hekuang@huawei.com> Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2016-02-22 16:10:32 +07:00
int bpf__strerror_apply_obj_config(int err, char *buf, size_t size)
{
bpf__strerror_head(err, buf, size);
perf tools: Enable passing event to BPF object A new syntax is added to the parser so that the user can access predefined perf events in BPF objects. After this patch, BPF programs for perf are finally able to utilize bpf_perf_event_read() introduced in commit 35578d798400 ("bpf: Implement function bpf_perf_event_read() that get the selected hardware PMU counter"). Test result: # cat test_bpf_map_2.c /************************ BEGIN **************************/ #include <uapi/linux/bpf.h> #define SEC(NAME) __attribute__((section(NAME), used)) struct bpf_map_def { unsigned int type; unsigned int key_size; unsigned int value_size; unsigned int max_entries; }; static int (*trace_printk)(const char *fmt, int fmt_size, ...) = (void *)BPF_FUNC_trace_printk; static int (*get_smp_processor_id)(void) = (void *)BPF_FUNC_get_smp_processor_id; static int (*perf_event_read)(struct bpf_map_def *, int) = (void *)BPF_FUNC_perf_event_read; struct bpf_map_def SEC("maps") pmu_map = { .type = BPF_MAP_TYPE_PERF_EVENT_ARRAY, .key_size = sizeof(int), .value_size = sizeof(int), .max_entries = __NR_CPUS__, }; SEC("func_write=sys_write") int func_write(void *ctx) { unsigned long long val; char fmt[] = "sys_write: pmu=%llu\n"; val = perf_event_read(&pmu_map, get_smp_processor_id()); trace_printk(fmt, sizeof(fmt), val); return 0; } SEC("func_write_return=sys_write%return") int func_write_return(void *ctx) { unsigned long long val = 0; char fmt[] = "sys_write_return: pmu=%llu\n"; val = perf_event_read(&pmu_map, get_smp_processor_id()); trace_printk(fmt, sizeof(fmt), val); return 0; } char _license[] SEC("license") = "GPL"; int _version SEC("version") = LINUX_VERSION_CODE; /************************* END ***************************/ Normal case: # echo "" > /sys/kernel/debug/tracing/trace # perf record -i -e cycles -e './test_bpf_map_2.c/map:pmu_map.event=cycles/' ls / [SNIP] [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.013 MB perf.data (7 samples) ] # cat /sys/kernel/debug/tracing/trace | grep ls ls-17066 [000] d... 938449.863301: : sys_write: pmu=1157327 ls-17066 [000] dN.. 938449.863342: : sys_write_return: pmu=1225218 ls-17066 [000] d... 938449.863349: : sys_write: pmu=1241922 ls-17066 [000] dN.. 938449.863369: : sys_write_return: pmu=1267445 Normal case (system wide): # echo "" > /sys/kernel/debug/tracing/trace # perf record -i -e cycles -e './test_bpf_map_2.c/map:pmu_map.event=cycles/' -a ^C[ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.811 MB perf.data (120 samples) ] # cat /sys/kernel/debug/tracing/trace | grep -v '18446744073709551594' | grep -v perf | head -n 20 [SNIP] # TASK-PID CPU# |||| TIMESTAMP FUNCTION # | | | |||| | | gmain-30828 [002] d... 2740551.068992: : sys_write: pmu=84373 gmain-30828 [002] d... 2740551.068992: : sys_write_return: pmu=87696 gmain-30828 [002] d... 2740551.068996: : sys_write: pmu=100658 gmain-30828 [002] d... 2740551.068997: : sys_write_return: pmu=102572 Error case 1: # perf record -e './test_bpf_map_2.c' ls / [SNIP] [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.014 MB perf.data ] # cat /sys/kernel/debug/tracing/trace | grep ls ls-17115 [007] d... 2724279.665625: : sys_write: pmu=18446744073709551614 ls-17115 [007] dN.. 2724279.665651: : sys_write_return: pmu=18446744073709551614 ls-17115 [007] d... 2724279.665658: : sys_write: pmu=18446744073709551614 ls-17115 [007] dN.. 2724279.665677: : sys_write_return: pmu=18446744073709551614 (18446744073709551614 is 0xfffffffffffffffe (-2)) Error case 2: # perf record -e cycles -e './test_bpf_map_2.c/map:pmu_map.event=evt/' -a event syntax error: '..ps:pmu_map.event=evt/' \___ Event not found for map setting Hint: Valid config terms: map:[<arraymap>].value=[value] map:[<eventmap>].event=[event] [SNIP] Error case 3: # ls /proc/2348/task/ 2348 2505 2506 2507 2508 # perf record -i -e cycles -e './test_bpf_map_2.c/map:pmu_map.event=cycles/' -p 2348 ERROR: Apply config to BPF failed: Cannot set event to BPF map in multi-thread tracing Error case 4: # perf record -e cycles -e './test_bpf_map_2.c/map:pmu_map.event=cycles/' ls / ERROR: Apply config to BPF failed: Doesn't support inherit event (Hint: use -i to turn off inherit) Error case 5: # perf record -i -e raw_syscalls:sys_enter -e './test_bpf_map_2.c/map:pmu_map.event=raw_syscalls:sys_enter/' ls ERROR: Apply config to BPF failed: Can only put raw, hardware and BPF output event into a BPF map Error case 6: # perf record -i -e './test_bpf_map_2.c/map:pmu_map.event=123/' ls / event syntax error: '.._map.event=123/' \___ Incorrect value type for map [SNIP] Signed-off-by: Wang Nan <wangnan0@huawei.com> Acked-by: Jiri Olsa <jolsa@kernel.org> Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Adrian Hunter <adrian.hunter@intel.com> Cc: Alexei Starovoitov <ast@kernel.org> Cc: Brendan Gregg <brendan.d.gregg@gmail.com> Cc: Cody P Schafer <dev@codyps.com> Cc: He Kuang <hekuang@huawei.com> Cc: Jeremie Galarneau <jeremie.galarneau@efficios.com> Cc: Kirill Smelkov <kirr@nexedi.com> Cc: Li Zefan <lizefan@huawei.com> Cc: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Zefan Li <lizefan@huawei.com> Cc: pi3orama@163.com Link: http://lkml.kernel.org/r/1456132275-98875-7-git-send-email-wangnan0@huawei.com Signed-off-by: He Kuang <hekuang@huawei.com> Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2016-02-22 16:10:33 +07:00
bpf__strerror_entry(BPF_LOADER_ERRNO__OBJCONF_MAP_EVTDIM,
"Cannot set event to BPF map in multi-thread tracing");
bpf__strerror_entry(BPF_LOADER_ERRNO__OBJCONF_MAP_EVTINH,
"%s (Hint: use -i to turn off inherit)", emsg);
bpf__strerror_entry(BPF_LOADER_ERRNO__OBJCONF_MAP_EVTTYPE,
"Can only put raw, hardware and BPF output event into a BPF map");
perf record: Apply config to BPF objects before recording bpf__apply_obj_config() is introduced as the core API to apply object config options to all BPF objects. This patch also does the real work for setting values for BPF_MAP_TYPE_PERF_ARRAY maps by inserting value stored in map's private field into the BPF map. This patch is required because we are not always able to set all BPF config during parsing. Further patch will set events created by perf to BPF_MAP_TYPE_PERF_EVENT_ARRAY maps, which is not exist until perf_evsel__open(). bpf_map_foreach_key() is introduced to iterate over each key needs to be configured. This function would be extended to support more map types and different key settings. In perf record, before start recording, call bpf__apply_config() to turn on all BPF config options. Test result: # cat ./test_bpf_map_1.c /************************ BEGIN **************************/ #include <uapi/linux/bpf.h> #define SEC(NAME) __attribute__((section(NAME), used)) struct bpf_map_def { unsigned int type; unsigned int key_size; unsigned int value_size; unsigned int max_entries; }; static void *(*map_lookup_elem)(struct bpf_map_def *, void *) = (void *)BPF_FUNC_map_lookup_elem; static int (*trace_printk)(const char *fmt, int fmt_size, ...) = (void *)BPF_FUNC_trace_printk; struct bpf_map_def SEC("maps") channel = { .type = BPF_MAP_TYPE_ARRAY, .key_size = sizeof(int), .value_size = sizeof(int), .max_entries = 1, }; SEC("func=sys_nanosleep") int func(void *ctx) { int key = 0; char fmt[] = "%d\n"; int *pval = map_lookup_elem(&channel, &key); if (!pval) return 0; trace_printk(fmt, sizeof(fmt), *pval); return 0; } char _license[] SEC("license") = "GPL"; int _version SEC("version") = LINUX_VERSION_CODE; /************************* END ***************************/ # echo "" > /sys/kernel/debug/tracing/trace # ./perf record -e './test_bpf_map_1.c/map:channel.value=11/' usleep 10 [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.012 MB perf.data ] # cat /sys/kernel/debug/tracing/trace # tracer: nop # # entries-in-buffer/entries-written: 1/1 #P:8 [SNIP] # TASK-PID CPU# |||| TIMESTAMP FUNCTION # | | | |||| | | usleep-18593 [007] d... 2394714.395539: : 11 # ./perf record -e './test_bpf_map_1.c/map:channel.value=101/' usleep 10 [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.012 MB perf.data ] # cat /sys/kernel/debug/tracing/trace # tracer: nop # # entries-in-buffer/entries-written: 1/1 #P:8 [SNIP] # TASK-PID CPU# |||| TIMESTAMP FUNCTION # | | | |||| | | usleep-18593 [007] d... 2394714.395539: : 11 usleep-19000 [006] d... 2394831.057840: : 101 Signed-off-by: Wang Nan <wangnan0@huawei.com> Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Adrian Hunter <adrian.hunter@intel.com> Cc: Alexei Starovoitov <ast@kernel.org> Cc: Brendan Gregg <brendan.d.gregg@gmail.com> Cc: Cody P Schafer <dev@codyps.com> Cc: He Kuang <hekuang@huawei.com> Cc: Jeremie Galarneau <jeremie.galarneau@efficios.com> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Kirill Smelkov <kirr@nexedi.com> Cc: Li Zefan <lizefan@huawei.com> Cc: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Zefan Li <lizefan@huawei.com> Cc: pi3orama@163.com Link: http://lkml.kernel.org/r/1456132275-98875-6-git-send-email-wangnan0@huawei.com Signed-off-by: He Kuang <hekuang@huawei.com> Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2016-02-22 16:10:32 +07:00
bpf__strerror_end(buf, size);
return 0;
}