mirror of
https://github.com/AuxXxilium/linux_dsm_epyc7002.git
synced 2024-12-15 03:06:41 +07:00
Merge branch 'bpf-task-fd-query'
Yonghong Song says: ==================== Currently, suppose a userspace application has loaded a bpf program and attached it to a tracepoint/kprobe/uprobe, and a bpf introspection tool, e.g., bpftool, wants to show which bpf program is attached to which tracepoint/kprobe/uprobe. Such attachment information will be really useful to understand the overall bpf deployment in the system. There is a name field (16 bytes) for each program, which could be used to encode the attachment point. There are some drawbacks for this approaches. First, bpftool user (e.g., an admin) may not really understand the association between the name and the attachment point. Second, if one program is attached to multiple places, encoding a proper name which can imply all these attachments becomes difficult. This patch introduces a new bpf subcommand BPF_TASK_FD_QUERY. Given a pid and fd, this command will return bpf related information to user space. Right now it only supports tracepoint/kprobe/uprobe perf event fd's. For such a fd, BPF_TASK_FD_QUERY will return . prog_id . tracepoint name, or . k[ret]probe funcname + offset or kernel addr, or . u[ret]probe filename + offset to the userspace. The user can use "bpftool prog" to find more information about bpf program itself with prog_id. Patch #1 adds function perf_get_event() in kernel/events/core.c. Patch #2 implements the bpf subcommand BPF_TASK_FD_QUERY. Patch #3 syncs tools bpf.h header and also add bpf_task_fd_query() in the libbpf library for samples/selftests/bpftool to use. Patch #4 adds ksym_get_addr() utility function. Patch #5 add a test in samples/bpf for querying k[ret]probes and u[ret]probes. Patch #6 add a test in tools/testing/selftests/bpf for querying raw_tracepoint and tracepoint. Patch #7 add a new subcommand "perf" to bpftool. Changelogs: v4 -> v5: . return strlen(buf) instead of strlen(buf) + 1 in the attr.buf_len. As long as user provides non-empty buffer, it will be filed with empty string, truncated string, or full string based on the buffer size and the length of to-be-copied string. v3 -> v4: . made attr buf_len input/output. The length of actual buffter is written to buf_len so user space knows what is actually needed. If user provides a buffer with length >= 1 but less than required, do partial copy and return -ENOSPC. . code simplification with put_user. . changed query result attach_info to fd_type. . add tests at selftests/bpf to test zero len, null buf and insufficient buf. v2 -> v3: . made perf_get_event() return perf_event pointer const. this was to ensure that event fields are not meddled. . detect whether newly BPF_TASK_FD_QUERY is supported or not in "bpftool perf" and warn users if it is not. v1 -> v2: . changed bpf subcommand name from BPF_PERF_EVENT_QUERY to BPF_TASK_FD_QUERY. . fixed various "bpftool perf" issues and added documentation and auto-completion. ==================== Acked-by: Daniel Borkmann <daniel@iogearbox.net> Signed-off-by: Alexei Starovoitov <ast@kernel.org>
This commit is contained in:
commit
f80acbd233
@ -868,6 +868,7 @@ extern void perf_event_exit_task(struct task_struct *child);
|
||||
extern void perf_event_free_task(struct task_struct *task);
|
||||
extern void perf_event_delayed_put(struct task_struct *task);
|
||||
extern struct file *perf_event_get(unsigned int fd);
|
||||
extern const struct perf_event *perf_get_event(struct file *file);
|
||||
extern const struct perf_event_attr *perf_event_attrs(struct perf_event *event);
|
||||
extern void perf_event_print_debug(void);
|
||||
extern void perf_pmu_disable(struct pmu *pmu);
|
||||
@ -1289,6 +1290,10 @@ static inline void perf_event_exit_task(struct task_struct *child) { }
|
||||
static inline void perf_event_free_task(struct task_struct *task) { }
|
||||
static inline void perf_event_delayed_put(struct task_struct *task) { }
|
||||
static inline struct file *perf_event_get(unsigned int fd) { return ERR_PTR(-EINVAL); }
|
||||
static inline const struct perf_event *perf_get_event(struct file *file)
|
||||
{
|
||||
return ERR_PTR(-EINVAL);
|
||||
}
|
||||
static inline const struct perf_event_attr *perf_event_attrs(struct perf_event *event)
|
||||
{
|
||||
return ERR_PTR(-EINVAL);
|
||||
|
@ -473,6 +473,9 @@ int perf_event_query_prog_array(struct perf_event *event, void __user *info);
|
||||
int bpf_probe_register(struct bpf_raw_event_map *btp, struct bpf_prog *prog);
|
||||
int bpf_probe_unregister(struct bpf_raw_event_map *btp, struct bpf_prog *prog);
|
||||
struct bpf_raw_event_map *bpf_find_raw_tracepoint(const char *name);
|
||||
int bpf_get_perf_event_info(const struct perf_event *event, u32 *prog_id,
|
||||
u32 *fd_type, const char **buf,
|
||||
u64 *probe_offset, u64 *probe_addr);
|
||||
#else
|
||||
static inline unsigned int trace_call_bpf(struct trace_event_call *call, void *ctx)
|
||||
{
|
||||
@ -504,6 +507,13 @@ static inline struct bpf_raw_event_map *bpf_find_raw_tracepoint(const char *name
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
static inline int bpf_get_perf_event_info(const struct perf_event *event,
|
||||
u32 *prog_id, u32 *fd_type,
|
||||
const char **buf, u64 *probe_offset,
|
||||
u64 *probe_addr)
|
||||
{
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
#endif
|
||||
|
||||
enum {
|
||||
@ -560,10 +570,17 @@ extern void perf_trace_del(struct perf_event *event, int flags);
|
||||
#ifdef CONFIG_KPROBE_EVENTS
|
||||
extern int perf_kprobe_init(struct perf_event *event, bool is_retprobe);
|
||||
extern void perf_kprobe_destroy(struct perf_event *event);
|
||||
extern int bpf_get_kprobe_info(const struct perf_event *event,
|
||||
u32 *fd_type, const char **symbol,
|
||||
u64 *probe_offset, u64 *probe_addr,
|
||||
bool perf_type_tracepoint);
|
||||
#endif
|
||||
#ifdef CONFIG_UPROBE_EVENTS
|
||||
extern int perf_uprobe_init(struct perf_event *event, bool is_retprobe);
|
||||
extern void perf_uprobe_destroy(struct perf_event *event);
|
||||
extern int bpf_get_uprobe_info(const struct perf_event *event,
|
||||
u32 *fd_type, const char **filename,
|
||||
u64 *probe_offset, bool perf_type_tracepoint);
|
||||
#endif
|
||||
extern int ftrace_profile_set_filter(struct perf_event *event, int event_id,
|
||||
char *filter_str);
|
||||
|
@ -97,6 +97,7 @@ enum bpf_cmd {
|
||||
BPF_RAW_TRACEPOINT_OPEN,
|
||||
BPF_BTF_LOAD,
|
||||
BPF_BTF_GET_FD_BY_ID,
|
||||
BPF_TASK_FD_QUERY,
|
||||
};
|
||||
|
||||
enum bpf_map_type {
|
||||
@ -380,6 +381,22 @@ union bpf_attr {
|
||||
__u32 btf_log_size;
|
||||
__u32 btf_log_level;
|
||||
};
|
||||
|
||||
struct {
|
||||
__u32 pid; /* input: pid */
|
||||
__u32 fd; /* input: fd */
|
||||
__u32 flags; /* input: flags */
|
||||
__u32 buf_len; /* input/output: buf len */
|
||||
__aligned_u64 buf; /* input/output:
|
||||
* tp_name for tracepoint
|
||||
* symbol for kprobe
|
||||
* filename for uprobe
|
||||
*/
|
||||
__u32 prog_id; /* output: prod_id */
|
||||
__u32 fd_type; /* output: BPF_FD_TYPE_* */
|
||||
__u64 probe_offset; /* output: probe_offset */
|
||||
__u64 probe_addr; /* output: probe_addr */
|
||||
} task_fd_query;
|
||||
} __attribute__((aligned(8)));
|
||||
|
||||
/* The description below is an attempt at providing documentation to eBPF
|
||||
@ -2557,4 +2574,13 @@ struct bpf_fib_lookup {
|
||||
__u8 dmac[6]; /* ETH_ALEN */
|
||||
};
|
||||
|
||||
enum bpf_task_fd_type {
|
||||
BPF_FD_TYPE_RAW_TRACEPOINT, /* tp name */
|
||||
BPF_FD_TYPE_TRACEPOINT, /* tp name */
|
||||
BPF_FD_TYPE_KPROBE, /* (symbol + offset) or addr */
|
||||
BPF_FD_TYPE_KRETPROBE, /* (symbol + offset) or addr */
|
||||
BPF_FD_TYPE_UPROBE, /* filename + offset */
|
||||
BPF_FD_TYPE_URETPROBE, /* filename + offset */
|
||||
};
|
||||
|
||||
#endif /* _UAPI__LINUX_BPF_H__ */
|
||||
|
@ -18,7 +18,9 @@
|
||||
#include <linux/vmalloc.h>
|
||||
#include <linux/mmzone.h>
|
||||
#include <linux/anon_inodes.h>
|
||||
#include <linux/fdtable.h>
|
||||
#include <linux/file.h>
|
||||
#include <linux/fs.h>
|
||||
#include <linux/license.h>
|
||||
#include <linux/filter.h>
|
||||
#include <linux/version.h>
|
||||
@ -2178,6 +2180,132 @@ static int bpf_btf_get_fd_by_id(const union bpf_attr *attr)
|
||||
return btf_get_fd_by_id(attr->btf_id);
|
||||
}
|
||||
|
||||
static int bpf_task_fd_query_copy(const union bpf_attr *attr,
|
||||
union bpf_attr __user *uattr,
|
||||
u32 prog_id, u32 fd_type,
|
||||
const char *buf, u64 probe_offset,
|
||||
u64 probe_addr)
|
||||
{
|
||||
char __user *ubuf = u64_to_user_ptr(attr->task_fd_query.buf);
|
||||
u32 len = buf ? strlen(buf) : 0, input_len;
|
||||
int err = 0;
|
||||
|
||||
if (put_user(len, &uattr->task_fd_query.buf_len))
|
||||
return -EFAULT;
|
||||
input_len = attr->task_fd_query.buf_len;
|
||||
if (input_len && ubuf) {
|
||||
if (!len) {
|
||||
/* nothing to copy, just make ubuf NULL terminated */
|
||||
char zero = '\0';
|
||||
|
||||
if (put_user(zero, ubuf))
|
||||
return -EFAULT;
|
||||
} else if (input_len >= len + 1) {
|
||||
/* ubuf can hold the string with NULL terminator */
|
||||
if (copy_to_user(ubuf, buf, len + 1))
|
||||
return -EFAULT;
|
||||
} else {
|
||||
/* ubuf cannot hold the string with NULL terminator,
|
||||
* do a partial copy with NULL terminator.
|
||||
*/
|
||||
char zero = '\0';
|
||||
|
||||
err = -ENOSPC;
|
||||
if (copy_to_user(ubuf, buf, input_len - 1))
|
||||
return -EFAULT;
|
||||
if (put_user(zero, ubuf + input_len - 1))
|
||||
return -EFAULT;
|
||||
}
|
||||
}
|
||||
|
||||
if (put_user(prog_id, &uattr->task_fd_query.prog_id) ||
|
||||
put_user(fd_type, &uattr->task_fd_query.fd_type) ||
|
||||
put_user(probe_offset, &uattr->task_fd_query.probe_offset) ||
|
||||
put_user(probe_addr, &uattr->task_fd_query.probe_addr))
|
||||
return -EFAULT;
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
#define BPF_TASK_FD_QUERY_LAST_FIELD task_fd_query.probe_addr
|
||||
|
||||
static int bpf_task_fd_query(const union bpf_attr *attr,
|
||||
union bpf_attr __user *uattr)
|
||||
{
|
||||
pid_t pid = attr->task_fd_query.pid;
|
||||
u32 fd = attr->task_fd_query.fd;
|
||||
const struct perf_event *event;
|
||||
struct files_struct *files;
|
||||
struct task_struct *task;
|
||||
struct file *file;
|
||||
int err;
|
||||
|
||||
if (CHECK_ATTR(BPF_TASK_FD_QUERY))
|
||||
return -EINVAL;
|
||||
|
||||
if (!capable(CAP_SYS_ADMIN))
|
||||
return -EPERM;
|
||||
|
||||
if (attr->task_fd_query.flags != 0)
|
||||
return -EINVAL;
|
||||
|
||||
task = get_pid_task(find_vpid(pid), PIDTYPE_PID);
|
||||
if (!task)
|
||||
return -ENOENT;
|
||||
|
||||
files = get_files_struct(task);
|
||||
put_task_struct(task);
|
||||
if (!files)
|
||||
return -ENOENT;
|
||||
|
||||
err = 0;
|
||||
spin_lock(&files->file_lock);
|
||||
file = fcheck_files(files, fd);
|
||||
if (!file)
|
||||
err = -EBADF;
|
||||
else
|
||||
get_file(file);
|
||||
spin_unlock(&files->file_lock);
|
||||
put_files_struct(files);
|
||||
|
||||
if (err)
|
||||
goto out;
|
||||
|
||||
if (file->f_op == &bpf_raw_tp_fops) {
|
||||
struct bpf_raw_tracepoint *raw_tp = file->private_data;
|
||||
struct bpf_raw_event_map *btp = raw_tp->btp;
|
||||
|
||||
err = bpf_task_fd_query_copy(attr, uattr,
|
||||
raw_tp->prog->aux->id,
|
||||
BPF_FD_TYPE_RAW_TRACEPOINT,
|
||||
btp->tp->name, 0, 0);
|
||||
goto put_file;
|
||||
}
|
||||
|
||||
event = perf_get_event(file);
|
||||
if (!IS_ERR(event)) {
|
||||
u64 probe_offset, probe_addr;
|
||||
u32 prog_id, fd_type;
|
||||
const char *buf;
|
||||
|
||||
err = bpf_get_perf_event_info(event, &prog_id, &fd_type,
|
||||
&buf, &probe_offset,
|
||||
&probe_addr);
|
||||
if (!err)
|
||||
err = bpf_task_fd_query_copy(attr, uattr, prog_id,
|
||||
fd_type, buf,
|
||||
probe_offset,
|
||||
probe_addr);
|
||||
goto put_file;
|
||||
}
|
||||
|
||||
err = -ENOTSUPP;
|
||||
put_file:
|
||||
fput(file);
|
||||
out:
|
||||
return err;
|
||||
}
|
||||
|
||||
SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, size)
|
||||
{
|
||||
union bpf_attr attr = {};
|
||||
@ -2264,6 +2392,9 @@ SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, siz
|
||||
case BPF_BTF_GET_FD_BY_ID:
|
||||
err = bpf_btf_get_fd_by_id(&attr);
|
||||
break;
|
||||
case BPF_TASK_FD_QUERY:
|
||||
err = bpf_task_fd_query(&attr, uattr);
|
||||
break;
|
||||
default:
|
||||
err = -EINVAL;
|
||||
break;
|
||||
|
@ -11212,6 +11212,14 @@ struct file *perf_event_get(unsigned int fd)
|
||||
return file;
|
||||
}
|
||||
|
||||
const struct perf_event *perf_get_event(struct file *file)
|
||||
{
|
||||
if (file->f_op != &perf_fops)
|
||||
return ERR_PTR(-EINVAL);
|
||||
|
||||
return file->private_data;
|
||||
}
|
||||
|
||||
const struct perf_event_attr *perf_event_attrs(struct perf_event *event)
|
||||
{
|
||||
if (!event)
|
||||
|
@ -14,6 +14,7 @@
|
||||
#include <linux/uaccess.h>
|
||||
#include <linux/ctype.h>
|
||||
#include <linux/kprobes.h>
|
||||
#include <linux/syscalls.h>
|
||||
#include <linux/error-injection.h>
|
||||
|
||||
#include "trace_probe.h"
|
||||
@ -1163,3 +1164,50 @@ int bpf_probe_unregister(struct bpf_raw_event_map *btp, struct bpf_prog *prog)
|
||||
mutex_unlock(&bpf_event_mutex);
|
||||
return err;
|
||||
}
|
||||
|
||||
int bpf_get_perf_event_info(const struct perf_event *event, u32 *prog_id,
|
||||
u32 *fd_type, const char **buf,
|
||||
u64 *probe_offset, u64 *probe_addr)
|
||||
{
|
||||
bool is_tracepoint, is_syscall_tp;
|
||||
struct bpf_prog *prog;
|
||||
int flags, err = 0;
|
||||
|
||||
prog = event->prog;
|
||||
if (!prog)
|
||||
return -ENOENT;
|
||||
|
||||
/* not supporting BPF_PROG_TYPE_PERF_EVENT yet */
|
||||
if (prog->type == BPF_PROG_TYPE_PERF_EVENT)
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
*prog_id = prog->aux->id;
|
||||
flags = event->tp_event->flags;
|
||||
is_tracepoint = flags & TRACE_EVENT_FL_TRACEPOINT;
|
||||
is_syscall_tp = is_syscall_trace_event(event->tp_event);
|
||||
|
||||
if (is_tracepoint || is_syscall_tp) {
|
||||
*buf = is_tracepoint ? event->tp_event->tp->name
|
||||
: event->tp_event->name;
|
||||
*fd_type = BPF_FD_TYPE_TRACEPOINT;
|
||||
*probe_offset = 0x0;
|
||||
*probe_addr = 0x0;
|
||||
} else {
|
||||
/* kprobe/uprobe */
|
||||
err = -EOPNOTSUPP;
|
||||
#ifdef CONFIG_KPROBE_EVENTS
|
||||
if (flags & TRACE_EVENT_FL_KPROBE)
|
||||
err = bpf_get_kprobe_info(event, fd_type, buf,
|
||||
probe_offset, probe_addr,
|
||||
event->attr.type == PERF_TYPE_TRACEPOINT);
|
||||
#endif
|
||||
#ifdef CONFIG_UPROBE_EVENTS
|
||||
if (flags & TRACE_EVENT_FL_UPROBE)
|
||||
err = bpf_get_uprobe_info(event, fd_type, buf,
|
||||
probe_offset,
|
||||
event->attr.type == PERF_TYPE_TRACEPOINT);
|
||||
#endif
|
||||
}
|
||||
|
||||
return err;
|
||||
}
|
||||
|
@ -1287,6 +1287,35 @@ kretprobe_perf_func(struct trace_kprobe *tk, struct kretprobe_instance *ri,
|
||||
head, NULL);
|
||||
}
|
||||
NOKPROBE_SYMBOL(kretprobe_perf_func);
|
||||
|
||||
int bpf_get_kprobe_info(const struct perf_event *event, u32 *fd_type,
|
||||
const char **symbol, u64 *probe_offset,
|
||||
u64 *probe_addr, bool perf_type_tracepoint)
|
||||
{
|
||||
const char *pevent = trace_event_name(event->tp_event);
|
||||
const char *group = event->tp_event->class->system;
|
||||
struct trace_kprobe *tk;
|
||||
|
||||
if (perf_type_tracepoint)
|
||||
tk = find_trace_kprobe(pevent, group);
|
||||
else
|
||||
tk = event->tp_event->data;
|
||||
if (!tk)
|
||||
return -EINVAL;
|
||||
|
||||
*fd_type = trace_kprobe_is_return(tk) ? BPF_FD_TYPE_KRETPROBE
|
||||
: BPF_FD_TYPE_KPROBE;
|
||||
if (tk->symbol) {
|
||||
*symbol = tk->symbol;
|
||||
*probe_offset = tk->rp.kp.offset;
|
||||
*probe_addr = 0;
|
||||
} else {
|
||||
*symbol = NULL;
|
||||
*probe_offset = 0;
|
||||
*probe_addr = (unsigned long)tk->rp.kp.addr;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
#endif /* CONFIG_PERF_EVENTS */
|
||||
|
||||
/*
|
||||
|
@ -1161,6 +1161,28 @@ static void uretprobe_perf_func(struct trace_uprobe *tu, unsigned long func,
|
||||
{
|
||||
__uprobe_perf_func(tu, func, regs, ucb, dsize);
|
||||
}
|
||||
|
||||
int bpf_get_uprobe_info(const struct perf_event *event, u32 *fd_type,
|
||||
const char **filename, u64 *probe_offset,
|
||||
bool perf_type_tracepoint)
|
||||
{
|
||||
const char *pevent = trace_event_name(event->tp_event);
|
||||
const char *group = event->tp_event->class->system;
|
||||
struct trace_uprobe *tu;
|
||||
|
||||
if (perf_type_tracepoint)
|
||||
tu = find_probe_event(pevent, group);
|
||||
else
|
||||
tu = event->tp_event->data;
|
||||
if (!tu)
|
||||
return -EINVAL;
|
||||
|
||||
*fd_type = is_ret_probe(tu) ? BPF_FD_TYPE_URETPROBE
|
||||
: BPF_FD_TYPE_UPROBE;
|
||||
*filename = tu->filename;
|
||||
*probe_offset = tu->offset;
|
||||
return 0;
|
||||
}
|
||||
#endif /* CONFIG_PERF_EVENTS */
|
||||
|
||||
static int
|
||||
|
@ -51,6 +51,7 @@ hostprogs-y += cpustat
|
||||
hostprogs-y += xdp_adjust_tail
|
||||
hostprogs-y += xdpsock
|
||||
hostprogs-y += xdp_fwd
|
||||
hostprogs-y += task_fd_query
|
||||
|
||||
# Libbpf dependencies
|
||||
LIBBPF = $(TOOLS_PATH)/lib/bpf/libbpf.a
|
||||
@ -105,6 +106,7 @@ cpustat-objs := bpf_load.o cpustat_user.o
|
||||
xdp_adjust_tail-objs := xdp_adjust_tail_user.o
|
||||
xdpsock-objs := bpf_load.o xdpsock_user.o
|
||||
xdp_fwd-objs := bpf_load.o xdp_fwd_user.o
|
||||
task_fd_query-objs := bpf_load.o task_fd_query_user.o $(TRACE_HELPERS)
|
||||
|
||||
# Tell kbuild to always build the programs
|
||||
always := $(hostprogs-y)
|
||||
@ -160,6 +162,7 @@ always += cpustat_kern.o
|
||||
always += xdp_adjust_tail_kern.o
|
||||
always += xdpsock_kern.o
|
||||
always += xdp_fwd_kern.o
|
||||
always += task_fd_query_kern.o
|
||||
|
||||
HOSTCFLAGS += -I$(objtree)/usr/include
|
||||
HOSTCFLAGS += -I$(srctree)/tools/lib/
|
||||
@ -175,6 +178,7 @@ HOSTCFLAGS_offwaketime_user.o += -I$(srctree)/tools/lib/bpf/
|
||||
HOSTCFLAGS_spintest_user.o += -I$(srctree)/tools/lib/bpf/
|
||||
HOSTCFLAGS_trace_event_user.o += -I$(srctree)/tools/lib/bpf/
|
||||
HOSTCFLAGS_sampleip_user.o += -I$(srctree)/tools/lib/bpf/
|
||||
HOSTCFLAGS_task_fd_query_user.o += -I$(srctree)/tools/lib/bpf/
|
||||
|
||||
HOST_LOADLIBES += $(LIBBPF) -lelf
|
||||
HOSTLOADLIBES_tracex4 += -lrt
|
||||
|
19
samples/bpf/task_fd_query_kern.c
Normal file
19
samples/bpf/task_fd_query_kern.c
Normal file
@ -0,0 +1,19 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
#include <linux/version.h>
|
||||
#include <linux/ptrace.h>
|
||||
#include <uapi/linux/bpf.h>
|
||||
#include "bpf_helpers.h"
|
||||
|
||||
SEC("kprobe/blk_start_request")
|
||||
int bpf_prog1(struct pt_regs *ctx)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
SEC("kretprobe/blk_account_io_completion")
|
||||
int bpf_prog2(struct pt_regs *ctx)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
char _license[] SEC("license") = "GPL";
|
||||
u32 _version SEC("version") = LINUX_VERSION_CODE;
|
382
samples/bpf/task_fd_query_user.c
Normal file
382
samples/bpf/task_fd_query_user.c
Normal file
@ -0,0 +1,382 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <signal.h>
|
||||
#include <unistd.h>
|
||||
#include <stdbool.h>
|
||||
#include <string.h>
|
||||
#include <stdint.h>
|
||||
#include <fcntl.h>
|
||||
#include <linux/bpf.h>
|
||||
#include <sys/ioctl.h>
|
||||
#include <sys/resource.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/stat.h>
|
||||
|
||||
#include "libbpf.h"
|
||||
#include "bpf_load.h"
|
||||
#include "bpf_util.h"
|
||||
#include "perf-sys.h"
|
||||
#include "trace_helpers.h"
|
||||
|
||||
#define CHECK_PERROR_RET(condition) ({ \
|
||||
int __ret = !!(condition); \
|
||||
if (__ret) { \
|
||||
printf("FAIL: %s:\n", __func__); \
|
||||
perror(" "); \
|
||||
return -1; \
|
||||
} \
|
||||
})
|
||||
|
||||
#define CHECK_AND_RET(condition) ({ \
|
||||
int __ret = !!(condition); \
|
||||
if (__ret) \
|
||||
return -1; \
|
||||
})
|
||||
|
||||
static __u64 ptr_to_u64(void *ptr)
|
||||
{
|
||||
return (__u64) (unsigned long) ptr;
|
||||
}
|
||||
|
||||
#define PMU_TYPE_FILE "/sys/bus/event_source/devices/%s/type"
|
||||
static int bpf_find_probe_type(const char *event_type)
|
||||
{
|
||||
char buf[256];
|
||||
int fd, ret;
|
||||
|
||||
ret = snprintf(buf, sizeof(buf), PMU_TYPE_FILE, event_type);
|
||||
CHECK_PERROR_RET(ret < 0 || ret >= sizeof(buf));
|
||||
|
||||
fd = open(buf, O_RDONLY);
|
||||
CHECK_PERROR_RET(fd < 0);
|
||||
|
||||
ret = read(fd, buf, sizeof(buf));
|
||||
close(fd);
|
||||
CHECK_PERROR_RET(ret < 0 || ret >= sizeof(buf));
|
||||
|
||||
errno = 0;
|
||||
ret = (int)strtol(buf, NULL, 10);
|
||||
CHECK_PERROR_RET(errno);
|
||||
return ret;
|
||||
}
|
||||
|
||||
#define PMU_RETPROBE_FILE "/sys/bus/event_source/devices/%s/format/retprobe"
|
||||
static int bpf_get_retprobe_bit(const char *event_type)
|
||||
{
|
||||
char buf[256];
|
||||
int fd, ret;
|
||||
|
||||
ret = snprintf(buf, sizeof(buf), PMU_RETPROBE_FILE, event_type);
|
||||
CHECK_PERROR_RET(ret < 0 || ret >= sizeof(buf));
|
||||
|
||||
fd = open(buf, O_RDONLY);
|
||||
CHECK_PERROR_RET(fd < 0);
|
||||
|
||||
ret = read(fd, buf, sizeof(buf));
|
||||
close(fd);
|
||||
CHECK_PERROR_RET(ret < 0 || ret >= sizeof(buf));
|
||||
CHECK_PERROR_RET(strlen(buf) < strlen("config:"));
|
||||
|
||||
errno = 0;
|
||||
ret = (int)strtol(buf + strlen("config:"), NULL, 10);
|
||||
CHECK_PERROR_RET(errno);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int test_debug_fs_kprobe(int prog_fd_idx, const char *fn_name,
|
||||
__u32 expected_fd_type)
|
||||
{
|
||||
__u64 probe_offset, probe_addr;
|
||||
__u32 len, prog_id, fd_type;
|
||||
char buf[256];
|
||||
int err;
|
||||
|
||||
len = sizeof(buf);
|
||||
err = bpf_task_fd_query(getpid(), event_fd[prog_fd_idx], 0, buf, &len,
|
||||
&prog_id, &fd_type, &probe_offset,
|
||||
&probe_addr);
|
||||
if (err < 0) {
|
||||
printf("FAIL: %s, for event_fd idx %d, fn_name %s\n",
|
||||
__func__, prog_fd_idx, fn_name);
|
||||
perror(" :");
|
||||
return -1;
|
||||
}
|
||||
if (strcmp(buf, fn_name) != 0 ||
|
||||
fd_type != expected_fd_type ||
|
||||
probe_offset != 0x0 || probe_addr != 0x0) {
|
||||
printf("FAIL: bpf_trace_event_query(event_fd[%d]):\n",
|
||||
prog_fd_idx);
|
||||
printf("buf: %s, fd_type: %u, probe_offset: 0x%llx,"
|
||||
" probe_addr: 0x%llx\n",
|
||||
buf, fd_type, probe_offset, probe_addr);
|
||||
return -1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int test_nondebug_fs_kuprobe_common(const char *event_type,
|
||||
const char *name, __u64 offset, __u64 addr, bool is_return,
|
||||
char *buf, __u32 *buf_len, __u32 *prog_id, __u32 *fd_type,
|
||||
__u64 *probe_offset, __u64 *probe_addr)
|
||||
{
|
||||
int is_return_bit = bpf_get_retprobe_bit(event_type);
|
||||
int type = bpf_find_probe_type(event_type);
|
||||
struct perf_event_attr attr = {};
|
||||
int fd;
|
||||
|
||||
if (type < 0 || is_return_bit < 0) {
|
||||
printf("FAIL: %s incorrect type (%d) or is_return_bit (%d)\n",
|
||||
__func__, type, is_return_bit);
|
||||
return -1;
|
||||
}
|
||||
|
||||
attr.sample_period = 1;
|
||||
attr.wakeup_events = 1;
|
||||
if (is_return)
|
||||
attr.config |= 1 << is_return_bit;
|
||||
|
||||
if (name) {
|
||||
attr.config1 = ptr_to_u64((void *)name);
|
||||
attr.config2 = offset;
|
||||
} else {
|
||||
attr.config1 = 0;
|
||||
attr.config2 = addr;
|
||||
}
|
||||
attr.size = sizeof(attr);
|
||||
attr.type = type;
|
||||
|
||||
fd = sys_perf_event_open(&attr, -1, 0, -1, 0);
|
||||
CHECK_PERROR_RET(fd < 0);
|
||||
|
||||
CHECK_PERROR_RET(ioctl(fd, PERF_EVENT_IOC_ENABLE, 0) < 0);
|
||||
CHECK_PERROR_RET(ioctl(fd, PERF_EVENT_IOC_SET_BPF, prog_fd[0]) < 0);
|
||||
CHECK_PERROR_RET(bpf_task_fd_query(getpid(), fd, 0, buf, buf_len,
|
||||
prog_id, fd_type, probe_offset, probe_addr) < 0);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int test_nondebug_fs_probe(const char *event_type, const char *name,
|
||||
__u64 offset, __u64 addr, bool is_return,
|
||||
__u32 expected_fd_type,
|
||||
__u32 expected_ret_fd_type,
|
||||
char *buf, __u32 buf_len)
|
||||
{
|
||||
__u64 probe_offset, probe_addr;
|
||||
__u32 prog_id, fd_type;
|
||||
int err;
|
||||
|
||||
err = test_nondebug_fs_kuprobe_common(event_type, name,
|
||||
offset, addr, is_return,
|
||||
buf, &buf_len, &prog_id,
|
||||
&fd_type, &probe_offset,
|
||||
&probe_addr);
|
||||
if (err < 0) {
|
||||
printf("FAIL: %s, "
|
||||
"for name %s, offset 0x%llx, addr 0x%llx, is_return %d\n",
|
||||
__func__, name ? name : "", offset, addr, is_return);
|
||||
perror(" :");
|
||||
return -1;
|
||||
}
|
||||
if ((is_return && fd_type != expected_ret_fd_type) ||
|
||||
(!is_return && fd_type != expected_fd_type)) {
|
||||
printf("FAIL: %s, incorrect fd_type %u\n",
|
||||
__func__, fd_type);
|
||||
return -1;
|
||||
}
|
||||
if (name) {
|
||||
if (strcmp(name, buf) != 0) {
|
||||
printf("FAIL: %s, incorrect buf %s\n", __func__, buf);
|
||||
return -1;
|
||||
}
|
||||
if (probe_offset != offset) {
|
||||
printf("FAIL: %s, incorrect probe_offset 0x%llx\n",
|
||||
__func__, probe_offset);
|
||||
return -1;
|
||||
}
|
||||
} else {
|
||||
if (buf_len != 0) {
|
||||
printf("FAIL: %s, incorrect buf %p\n",
|
||||
__func__, buf);
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (probe_addr != addr) {
|
||||
printf("FAIL: %s, incorrect probe_addr 0x%llx\n",
|
||||
__func__, probe_addr);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int test_debug_fs_uprobe(char *binary_path, long offset, bool is_return)
|
||||
{
|
||||
const char *event_type = "uprobe";
|
||||
struct perf_event_attr attr = {};
|
||||
char buf[256], event_alias[256];
|
||||
__u64 probe_offset, probe_addr;
|
||||
__u32 len, prog_id, fd_type;
|
||||
int err, res, kfd, efd;
|
||||
ssize_t bytes;
|
||||
|
||||
snprintf(buf, sizeof(buf), "/sys/kernel/debug/tracing/%s_events",
|
||||
event_type);
|
||||
kfd = open(buf, O_WRONLY | O_APPEND, 0);
|
||||
CHECK_PERROR_RET(kfd < 0);
|
||||
|
||||
res = snprintf(event_alias, sizeof(event_alias), "test_%d", getpid());
|
||||
CHECK_PERROR_RET(res < 0 || res >= sizeof(event_alias));
|
||||
|
||||
res = snprintf(buf, sizeof(buf), "%c:%ss/%s %s:0x%lx",
|
||||
is_return ? 'r' : 'p', event_type, event_alias,
|
||||
binary_path, offset);
|
||||
CHECK_PERROR_RET(res < 0 || res >= sizeof(buf));
|
||||
CHECK_PERROR_RET(write(kfd, buf, strlen(buf)) < 0);
|
||||
|
||||
close(kfd);
|
||||
kfd = -1;
|
||||
|
||||
snprintf(buf, sizeof(buf), "/sys/kernel/debug/tracing/events/%ss/%s/id",
|
||||
event_type, event_alias);
|
||||
efd = open(buf, O_RDONLY, 0);
|
||||
CHECK_PERROR_RET(efd < 0);
|
||||
|
||||
bytes = read(efd, buf, sizeof(buf));
|
||||
CHECK_PERROR_RET(bytes <= 0 || bytes >= sizeof(buf));
|
||||
close(efd);
|
||||
buf[bytes] = '\0';
|
||||
|
||||
attr.config = strtol(buf, NULL, 0);
|
||||
attr.type = PERF_TYPE_TRACEPOINT;
|
||||
attr.sample_period = 1;
|
||||
attr.wakeup_events = 1;
|
||||
kfd = sys_perf_event_open(&attr, -1, 0, -1, PERF_FLAG_FD_CLOEXEC);
|
||||
CHECK_PERROR_RET(kfd < 0);
|
||||
CHECK_PERROR_RET(ioctl(kfd, PERF_EVENT_IOC_SET_BPF, prog_fd[0]) < 0);
|
||||
CHECK_PERROR_RET(ioctl(kfd, PERF_EVENT_IOC_ENABLE, 0) < 0);
|
||||
|
||||
len = sizeof(buf);
|
||||
err = bpf_task_fd_query(getpid(), kfd, 0, buf, &len,
|
||||
&prog_id, &fd_type, &probe_offset,
|
||||
&probe_addr);
|
||||
if (err < 0) {
|
||||
printf("FAIL: %s, binary_path %s\n", __func__, binary_path);
|
||||
perror(" :");
|
||||
return -1;
|
||||
}
|
||||
if ((is_return && fd_type != BPF_FD_TYPE_URETPROBE) ||
|
||||
(!is_return && fd_type != BPF_FD_TYPE_UPROBE)) {
|
||||
printf("FAIL: %s, incorrect fd_type %u\n", __func__,
|
||||
fd_type);
|
||||
return -1;
|
||||
}
|
||||
if (strcmp(binary_path, buf) != 0) {
|
||||
printf("FAIL: %s, incorrect buf %s\n", __func__, buf);
|
||||
return -1;
|
||||
}
|
||||
if (probe_offset != offset) {
|
||||
printf("FAIL: %s, incorrect probe_offset 0x%llx\n", __func__,
|
||||
probe_offset);
|
||||
return -1;
|
||||
}
|
||||
|
||||
close(kfd);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
struct rlimit r = {1024*1024, RLIM_INFINITY};
|
||||
extern char __executable_start;
|
||||
char filename[256], buf[256];
|
||||
__u64 uprobe_file_offset;
|
||||
|
||||
snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
|
||||
if (setrlimit(RLIMIT_MEMLOCK, &r)) {
|
||||
perror("setrlimit(RLIMIT_MEMLOCK)");
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (load_kallsyms()) {
|
||||
printf("failed to process /proc/kallsyms\n");
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (load_bpf_file(filename)) {
|
||||
printf("%s", bpf_log_buf);
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* test two functions in the corresponding *_kern.c file */
|
||||
CHECK_AND_RET(test_debug_fs_kprobe(0, "blk_start_request",
|
||||
BPF_FD_TYPE_KPROBE));
|
||||
CHECK_AND_RET(test_debug_fs_kprobe(1, "blk_account_io_completion",
|
||||
BPF_FD_TYPE_KRETPROBE));
|
||||
|
||||
/* test nondebug fs kprobe */
|
||||
CHECK_AND_RET(test_nondebug_fs_probe("kprobe", "bpf_check", 0x0, 0x0,
|
||||
false, BPF_FD_TYPE_KPROBE,
|
||||
BPF_FD_TYPE_KRETPROBE,
|
||||
buf, sizeof(buf)));
|
||||
#ifdef __x86_64__
|
||||
/* set a kprobe on "bpf_check + 0x5", which is x64 specific */
|
||||
CHECK_AND_RET(test_nondebug_fs_probe("kprobe", "bpf_check", 0x5, 0x0,
|
||||
false, BPF_FD_TYPE_KPROBE,
|
||||
BPF_FD_TYPE_KRETPROBE,
|
||||
buf, sizeof(buf)));
|
||||
#endif
|
||||
CHECK_AND_RET(test_nondebug_fs_probe("kprobe", "bpf_check", 0x0, 0x0,
|
||||
true, BPF_FD_TYPE_KPROBE,
|
||||
BPF_FD_TYPE_KRETPROBE,
|
||||
buf, sizeof(buf)));
|
||||
CHECK_AND_RET(test_nondebug_fs_probe("kprobe", NULL, 0x0,
|
||||
ksym_get_addr("bpf_check"), false,
|
||||
BPF_FD_TYPE_KPROBE,
|
||||
BPF_FD_TYPE_KRETPROBE,
|
||||
buf, sizeof(buf)));
|
||||
CHECK_AND_RET(test_nondebug_fs_probe("kprobe", NULL, 0x0,
|
||||
ksym_get_addr("bpf_check"), false,
|
||||
BPF_FD_TYPE_KPROBE,
|
||||
BPF_FD_TYPE_KRETPROBE,
|
||||
NULL, 0));
|
||||
CHECK_AND_RET(test_nondebug_fs_probe("kprobe", NULL, 0x0,
|
||||
ksym_get_addr("bpf_check"), true,
|
||||
BPF_FD_TYPE_KPROBE,
|
||||
BPF_FD_TYPE_KRETPROBE,
|
||||
buf, sizeof(buf)));
|
||||
CHECK_AND_RET(test_nondebug_fs_probe("kprobe", NULL, 0x0,
|
||||
ksym_get_addr("bpf_check"), true,
|
||||
BPF_FD_TYPE_KPROBE,
|
||||
BPF_FD_TYPE_KRETPROBE,
|
||||
0, 0));
|
||||
|
||||
/* test nondebug fs uprobe */
|
||||
/* the calculation of uprobe file offset is based on gcc 7.3.1 on x64
|
||||
* and the default linker script, which defines __executable_start as
|
||||
* the start of the .text section. The calculation could be different
|
||||
* on different systems with different compilers. The right way is
|
||||
* to parse the ELF file. We took a shortcut here.
|
||||
*/
|
||||
uprobe_file_offset = (__u64)main - (__u64)&__executable_start;
|
||||
CHECK_AND_RET(test_nondebug_fs_probe("uprobe", (char *)argv[0],
|
||||
uprobe_file_offset, 0x0, false,
|
||||
BPF_FD_TYPE_UPROBE,
|
||||
BPF_FD_TYPE_URETPROBE,
|
||||
buf, sizeof(buf)));
|
||||
CHECK_AND_RET(test_nondebug_fs_probe("uprobe", (char *)argv[0],
|
||||
uprobe_file_offset, 0x0, true,
|
||||
BPF_FD_TYPE_UPROBE,
|
||||
BPF_FD_TYPE_URETPROBE,
|
||||
buf, sizeof(buf)));
|
||||
|
||||
/* test debug fs uprobe */
|
||||
CHECK_AND_RET(test_debug_fs_uprobe((char *)argv[0], uprobe_file_offset,
|
||||
false));
|
||||
CHECK_AND_RET(test_debug_fs_uprobe((char *)argv[0], uprobe_file_offset,
|
||||
true));
|
||||
|
||||
return 0;
|
||||
}
|
81
tools/bpf/bpftool/Documentation/bpftool-perf.rst
Normal file
81
tools/bpf/bpftool/Documentation/bpftool-perf.rst
Normal file
@ -0,0 +1,81 @@
|
||||
================
|
||||
bpftool-perf
|
||||
================
|
||||
-------------------------------------------------------------------------------
|
||||
tool for inspection of perf related bpf prog attachments
|
||||
-------------------------------------------------------------------------------
|
||||
|
||||
:Manual section: 8
|
||||
|
||||
SYNOPSIS
|
||||
========
|
||||
|
||||
**bpftool** [*OPTIONS*] **perf** *COMMAND*
|
||||
|
||||
*OPTIONS* := { [{ **-j** | **--json** }] [{ **-p** | **--pretty** }] }
|
||||
|
||||
*COMMANDS* :=
|
||||
{ **show** | **list** | **help** }
|
||||
|
||||
PERF COMMANDS
|
||||
=============
|
||||
|
||||
| **bpftool** **perf { show | list }**
|
||||
| **bpftool** **perf help**
|
||||
|
||||
DESCRIPTION
|
||||
===========
|
||||
**bpftool perf { show | list }**
|
||||
List all raw_tracepoint, tracepoint, kprobe attachment in the system.
|
||||
|
||||
Output will start with process id and file descriptor in that process,
|
||||
followed by bpf program id, attachment information, and attachment point.
|
||||
The attachment point for raw_tracepoint/tracepoint is the trace probe name.
|
||||
The attachment point for k[ret]probe is either symbol name and offset,
|
||||
or a kernel virtual address.
|
||||
The attachment point for u[ret]probe is the file name and the file offset.
|
||||
|
||||
**bpftool perf help**
|
||||
Print short help message.
|
||||
|
||||
OPTIONS
|
||||
=======
|
||||
-h, --help
|
||||
Print short generic help message (similar to **bpftool help**).
|
||||
|
||||
-v, --version
|
||||
Print version number (similar to **bpftool version**).
|
||||
|
||||
-j, --json
|
||||
Generate JSON output. For commands that cannot produce JSON, this
|
||||
option has no effect.
|
||||
|
||||
-p, --pretty
|
||||
Generate human-readable JSON output. Implies **-j**.
|
||||
|
||||
EXAMPLES
|
||||
========
|
||||
|
||||
| **# bpftool perf**
|
||||
|
||||
::
|
||||
|
||||
pid 21711 fd 5: prog_id 5 kprobe func __x64_sys_write offset 0
|
||||
pid 21765 fd 5: prog_id 7 kretprobe func __x64_sys_nanosleep offset 0
|
||||
pid 21767 fd 5: prog_id 8 tracepoint sys_enter_nanosleep
|
||||
pid 21800 fd 5: prog_id 9 uprobe filename /home/yhs/a.out offset 1159
|
||||
|
||||
|
|
||||
| **# bpftool -j perf**
|
||||
|
||||
::
|
||||
|
||||
[{"pid":21711,"fd":5,"prog_id":5,"fd_type":"kprobe","func":"__x64_sys_write","offset":0}, \
|
||||
{"pid":21765,"fd":5,"prog_id":7,"fd_type":"kretprobe","func":"__x64_sys_nanosleep","offset":0}, \
|
||||
{"pid":21767,"fd":5,"prog_id":8,"fd_type":"tracepoint","tracepoint":"sys_enter_nanosleep"}, \
|
||||
{"pid":21800,"fd":5,"prog_id":9,"fd_type":"uprobe","filename":"/home/yhs/a.out","offset":1159}]
|
||||
|
||||
|
||||
SEE ALSO
|
||||
========
|
||||
**bpftool**\ (8), **bpftool-prog**\ (8), **bpftool-map**\ (8)
|
@ -16,7 +16,7 @@ SYNOPSIS
|
||||
|
||||
**bpftool** **version**
|
||||
|
||||
*OBJECT* := { **map** | **program** | **cgroup** }
|
||||
*OBJECT* := { **map** | **program** | **cgroup** | **perf** }
|
||||
|
||||
*OPTIONS* := { { **-V** | **--version** } | { **-h** | **--help** }
|
||||
| { **-j** | **--json** } [{ **-p** | **--pretty** }] }
|
||||
@ -30,6 +30,8 @@ SYNOPSIS
|
||||
|
||||
*CGROUP-COMMANDS* := { **show** | **list** | **attach** | **detach** | **help** }
|
||||
|
||||
*PERF-COMMANDS* := { **show** | **list** | **help** }
|
||||
|
||||
DESCRIPTION
|
||||
===========
|
||||
*bpftool* allows for inspection and simple modification of BPF objects
|
||||
@ -56,3 +58,4 @@ OPTIONS
|
||||
SEE ALSO
|
||||
========
|
||||
**bpftool-map**\ (8), **bpftool-prog**\ (8), **bpftool-cgroup**\ (8)
|
||||
**bpftool-perf**\ (8)
|
||||
|
@ -448,6 +448,15 @@ _bpftool()
|
||||
;;
|
||||
esac
|
||||
;;
|
||||
perf)
|
||||
case $command in
|
||||
*)
|
||||
[[ $prev == $object ]] && \
|
||||
COMPREPLY=( $( compgen -W 'help \
|
||||
show list' -- "$cur" ) )
|
||||
;;
|
||||
esac
|
||||
;;
|
||||
esac
|
||||
} &&
|
||||
complete -F _bpftool bpftool
|
||||
|
@ -87,7 +87,7 @@ static int do_help(int argc, char **argv)
|
||||
" %s batch file FILE\n"
|
||||
" %s version\n"
|
||||
"\n"
|
||||
" OBJECT := { prog | map | cgroup }\n"
|
||||
" OBJECT := { prog | map | cgroup | perf }\n"
|
||||
" " HELP_SPEC_OPTIONS "\n"
|
||||
"",
|
||||
bin_name, bin_name, bin_name);
|
||||
@ -216,6 +216,7 @@ static const struct cmd cmds[] = {
|
||||
{ "prog", do_prog },
|
||||
{ "map", do_map },
|
||||
{ "cgroup", do_cgroup },
|
||||
{ "perf", do_perf },
|
||||
{ "version", do_version },
|
||||
{ 0 }
|
||||
};
|
||||
|
@ -119,6 +119,7 @@ int do_prog(int argc, char **arg);
|
||||
int do_map(int argc, char **arg);
|
||||
int do_event_pipe(int argc, char **argv);
|
||||
int do_cgroup(int argc, char **arg);
|
||||
int do_perf(int argc, char **arg);
|
||||
|
||||
int prog_parse_fd(int *argc, char ***argv);
|
||||
int map_parse_fd_and_info(int *argc, char ***argv, void *info, __u32 *info_len);
|
||||
|
246
tools/bpf/bpftool/perf.c
Normal file
246
tools/bpf/bpftool/perf.c
Normal file
@ -0,0 +1,246 @@
|
||||
// SPDX-License-Identifier: GPL-2.0+
|
||||
// Copyright (C) 2018 Facebook
|
||||
// Author: Yonghong Song <yhs@fb.com>
|
||||
|
||||
#define _GNU_SOURCE
|
||||
#include <ctype.h>
|
||||
#include <errno.h>
|
||||
#include <fcntl.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <sys/stat.h>
|
||||
#include <sys/types.h>
|
||||
#include <unistd.h>
|
||||
#include <ftw.h>
|
||||
|
||||
#include <bpf.h>
|
||||
|
||||
#include "main.h"
|
||||
|
||||
/* 0: undecided, 1: supported, 2: not supported */
|
||||
static int perf_query_supported;
|
||||
static bool has_perf_query_support(void)
|
||||
{
|
||||
__u64 probe_offset, probe_addr;
|
||||
__u32 len, prog_id, fd_type;
|
||||
char buf[256];
|
||||
int fd;
|
||||
|
||||
if (perf_query_supported)
|
||||
goto out;
|
||||
|
||||
fd = open(bin_name, O_RDONLY);
|
||||
if (fd < 0) {
|
||||
p_err("perf_query_support: %s", strerror(errno));
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* the following query will fail as no bpf attachment,
|
||||
* the expected errno is ENOTSUPP
|
||||
*/
|
||||
errno = 0;
|
||||
len = sizeof(buf);
|
||||
bpf_task_fd_query(getpid(), fd, 0, buf, &len, &prog_id,
|
||||
&fd_type, &probe_offset, &probe_addr);
|
||||
|
||||
if (errno == 524 /* ENOTSUPP */) {
|
||||
perf_query_supported = 1;
|
||||
goto close_fd;
|
||||
}
|
||||
|
||||
perf_query_supported = 2;
|
||||
p_err("perf_query_support: %s", strerror(errno));
|
||||
fprintf(stderr,
|
||||
"HINT: non root or kernel doesn't support TASK_FD_QUERY\n");
|
||||
|
||||
close_fd:
|
||||
close(fd);
|
||||
out:
|
||||
return perf_query_supported == 1;
|
||||
}
|
||||
|
||||
static void print_perf_json(int pid, int fd, __u32 prog_id, __u32 fd_type,
|
||||
char *buf, __u64 probe_offset, __u64 probe_addr)
|
||||
{
|
||||
jsonw_start_object(json_wtr);
|
||||
jsonw_int_field(json_wtr, "pid", pid);
|
||||
jsonw_int_field(json_wtr, "fd", fd);
|
||||
jsonw_uint_field(json_wtr, "prog_id", prog_id);
|
||||
switch (fd_type) {
|
||||
case BPF_FD_TYPE_RAW_TRACEPOINT:
|
||||
jsonw_string_field(json_wtr, "fd_type", "raw_tracepoint");
|
||||
jsonw_string_field(json_wtr, "tracepoint", buf);
|
||||
break;
|
||||
case BPF_FD_TYPE_TRACEPOINT:
|
||||
jsonw_string_field(json_wtr, "fd_type", "tracepoint");
|
||||
jsonw_string_field(json_wtr, "tracepoint", buf);
|
||||
break;
|
||||
case BPF_FD_TYPE_KPROBE:
|
||||
jsonw_string_field(json_wtr, "fd_type", "kprobe");
|
||||
if (buf[0] != '\0') {
|
||||
jsonw_string_field(json_wtr, "func", buf);
|
||||
jsonw_lluint_field(json_wtr, "offset", probe_offset);
|
||||
} else {
|
||||
jsonw_lluint_field(json_wtr, "addr", probe_addr);
|
||||
}
|
||||
break;
|
||||
case BPF_FD_TYPE_KRETPROBE:
|
||||
jsonw_string_field(json_wtr, "fd_type", "kretprobe");
|
||||
if (buf[0] != '\0') {
|
||||
jsonw_string_field(json_wtr, "func", buf);
|
||||
jsonw_lluint_field(json_wtr, "offset", probe_offset);
|
||||
} else {
|
||||
jsonw_lluint_field(json_wtr, "addr", probe_addr);
|
||||
}
|
||||
break;
|
||||
case BPF_FD_TYPE_UPROBE:
|
||||
jsonw_string_field(json_wtr, "fd_type", "uprobe");
|
||||
jsonw_string_field(json_wtr, "filename", buf);
|
||||
jsonw_lluint_field(json_wtr, "offset", probe_offset);
|
||||
break;
|
||||
case BPF_FD_TYPE_URETPROBE:
|
||||
jsonw_string_field(json_wtr, "fd_type", "uretprobe");
|
||||
jsonw_string_field(json_wtr, "filename", buf);
|
||||
jsonw_lluint_field(json_wtr, "offset", probe_offset);
|
||||
break;
|
||||
}
|
||||
jsonw_end_object(json_wtr);
|
||||
}
|
||||
|
||||
static void print_perf_plain(int pid, int fd, __u32 prog_id, __u32 fd_type,
|
||||
char *buf, __u64 probe_offset, __u64 probe_addr)
|
||||
{
|
||||
printf("pid %d fd %d: prog_id %u ", pid, fd, prog_id);
|
||||
switch (fd_type) {
|
||||
case BPF_FD_TYPE_RAW_TRACEPOINT:
|
||||
printf("raw_tracepoint %s\n", buf);
|
||||
break;
|
||||
case BPF_FD_TYPE_TRACEPOINT:
|
||||
printf("tracepoint %s\n", buf);
|
||||
break;
|
||||
case BPF_FD_TYPE_KPROBE:
|
||||
if (buf[0] != '\0')
|
||||
printf("kprobe func %s offset %llu\n", buf,
|
||||
probe_offset);
|
||||
else
|
||||
printf("kprobe addr %llu\n", probe_addr);
|
||||
break;
|
||||
case BPF_FD_TYPE_KRETPROBE:
|
||||
if (buf[0] != '\0')
|
||||
printf("kretprobe func %s offset %llu\n", buf,
|
||||
probe_offset);
|
||||
else
|
||||
printf("kretprobe addr %llu\n", probe_addr);
|
||||
break;
|
||||
case BPF_FD_TYPE_UPROBE:
|
||||
printf("uprobe filename %s offset %llu\n", buf, probe_offset);
|
||||
break;
|
||||
case BPF_FD_TYPE_URETPROBE:
|
||||
printf("uretprobe filename %s offset %llu\n", buf,
|
||||
probe_offset);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
static int show_proc(const char *fpath, const struct stat *sb,
|
||||
int tflag, struct FTW *ftwbuf)
|
||||
{
|
||||
__u64 probe_offset, probe_addr;
|
||||
__u32 len, prog_id, fd_type;
|
||||
int err, pid = 0, fd = 0;
|
||||
const char *pch;
|
||||
char buf[4096];
|
||||
|
||||
/* prefix always /proc */
|
||||
pch = fpath + 5;
|
||||
if (*pch == '\0')
|
||||
return 0;
|
||||
|
||||
/* pid should be all numbers */
|
||||
pch++;
|
||||
while (isdigit(*pch)) {
|
||||
pid = pid * 10 + *pch - '0';
|
||||
pch++;
|
||||
}
|
||||
if (*pch == '\0')
|
||||
return 0;
|
||||
if (*pch != '/')
|
||||
return FTW_SKIP_SUBTREE;
|
||||
|
||||
/* check /proc/<pid>/fd directory */
|
||||
pch++;
|
||||
if (strncmp(pch, "fd", 2))
|
||||
return FTW_SKIP_SUBTREE;
|
||||
pch += 2;
|
||||
if (*pch == '\0')
|
||||
return 0;
|
||||
if (*pch != '/')
|
||||
return FTW_SKIP_SUBTREE;
|
||||
|
||||
/* check /proc/<pid>/fd/<fd_num> */
|
||||
pch++;
|
||||
while (isdigit(*pch)) {
|
||||
fd = fd * 10 + *pch - '0';
|
||||
pch++;
|
||||
}
|
||||
if (*pch != '\0')
|
||||
return FTW_SKIP_SUBTREE;
|
||||
|
||||
/* query (pid, fd) for potential perf events */
|
||||
len = sizeof(buf);
|
||||
err = bpf_task_fd_query(pid, fd, 0, buf, &len, &prog_id, &fd_type,
|
||||
&probe_offset, &probe_addr);
|
||||
if (err < 0)
|
||||
return 0;
|
||||
|
||||
if (json_output)
|
||||
print_perf_json(pid, fd, prog_id, fd_type, buf, probe_offset,
|
||||
probe_addr);
|
||||
else
|
||||
print_perf_plain(pid, fd, prog_id, fd_type, buf, probe_offset,
|
||||
probe_addr);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int do_show(int argc, char **argv)
|
||||
{
|
||||
int flags = FTW_ACTIONRETVAL | FTW_PHYS;
|
||||
int err = 0, nopenfd = 16;
|
||||
|
||||
if (!has_perf_query_support())
|
||||
return -1;
|
||||
|
||||
if (json_output)
|
||||
jsonw_start_array(json_wtr);
|
||||
if (nftw("/proc", show_proc, nopenfd, flags) == -1) {
|
||||
p_err("%s", strerror(errno));
|
||||
err = -1;
|
||||
}
|
||||
if (json_output)
|
||||
jsonw_end_array(json_wtr);
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
static int do_help(int argc, char **argv)
|
||||
{
|
||||
fprintf(stderr,
|
||||
"Usage: %s %s { show | list | help }\n"
|
||||
"",
|
||||
bin_name, argv[-2]);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static const struct cmd cmds[] = {
|
||||
{ "show", do_show },
|
||||
{ "list", do_show },
|
||||
{ "help", do_help },
|
||||
{ 0 }
|
||||
};
|
||||
|
||||
int do_perf(int argc, char **argv)
|
||||
{
|
||||
return cmd_select(cmds, argc, argv, do_help);
|
||||
}
|
@ -97,6 +97,7 @@ enum bpf_cmd {
|
||||
BPF_RAW_TRACEPOINT_OPEN,
|
||||
BPF_BTF_LOAD,
|
||||
BPF_BTF_GET_FD_BY_ID,
|
||||
BPF_TASK_FD_QUERY,
|
||||
};
|
||||
|
||||
enum bpf_map_type {
|
||||
@ -380,6 +381,22 @@ union bpf_attr {
|
||||
__u32 btf_log_size;
|
||||
__u32 btf_log_level;
|
||||
};
|
||||
|
||||
struct {
|
||||
__u32 pid; /* input: pid */
|
||||
__u32 fd; /* input: fd */
|
||||
__u32 flags; /* input: flags */
|
||||
__u32 buf_len; /* input/output: buf len */
|
||||
__aligned_u64 buf; /* input/output:
|
||||
* tp_name for tracepoint
|
||||
* symbol for kprobe
|
||||
* filename for uprobe
|
||||
*/
|
||||
__u32 prog_id; /* output: prod_id */
|
||||
__u32 fd_type; /* output: BPF_FD_TYPE_* */
|
||||
__u64 probe_offset; /* output: probe_offset */
|
||||
__u64 probe_addr; /* output: probe_addr */
|
||||
} task_fd_query;
|
||||
} __attribute__((aligned(8)));
|
||||
|
||||
/* The description below is an attempt at providing documentation to eBPF
|
||||
@ -2557,4 +2574,13 @@ struct bpf_fib_lookup {
|
||||
__u8 dmac[6]; /* ETH_ALEN */
|
||||
};
|
||||
|
||||
enum bpf_task_fd_type {
|
||||
BPF_FD_TYPE_RAW_TRACEPOINT, /* tp name */
|
||||
BPF_FD_TYPE_TRACEPOINT, /* tp name */
|
||||
BPF_FD_TYPE_KPROBE, /* (symbol + offset) or addr */
|
||||
BPF_FD_TYPE_KRETPROBE, /* (symbol + offset) or addr */
|
||||
BPF_FD_TYPE_UPROBE, /* filename + offset */
|
||||
BPF_FD_TYPE_URETPROBE, /* filename + offset */
|
||||
};
|
||||
|
||||
#endif /* _UAPI__LINUX_BPF_H__ */
|
||||
|
@ -643,3 +643,26 @@ int bpf_load_btf(void *btf, __u32 btf_size, char *log_buf, __u32 log_buf_size,
|
||||
|
||||
return fd;
|
||||
}
|
||||
|
||||
int bpf_task_fd_query(int pid, int fd, __u32 flags, char *buf, __u32 *buf_len,
|
||||
__u32 *prog_id, __u32 *fd_type, __u64 *probe_offset,
|
||||
__u64 *probe_addr)
|
||||
{
|
||||
union bpf_attr attr = {};
|
||||
int err;
|
||||
|
||||
attr.task_fd_query.pid = pid;
|
||||
attr.task_fd_query.fd = fd;
|
||||
attr.task_fd_query.flags = flags;
|
||||
attr.task_fd_query.buf = ptr_to_u64(buf);
|
||||
attr.task_fd_query.buf_len = *buf_len;
|
||||
|
||||
err = sys_bpf(BPF_TASK_FD_QUERY, &attr, sizeof(attr));
|
||||
*buf_len = attr.task_fd_query.buf_len;
|
||||
*prog_id = attr.task_fd_query.prog_id;
|
||||
*fd_type = attr.task_fd_query.fd_type;
|
||||
*probe_offset = attr.task_fd_query.probe_offset;
|
||||
*probe_addr = attr.task_fd_query.probe_addr;
|
||||
|
||||
return err;
|
||||
}
|
||||
|
@ -107,4 +107,7 @@ int bpf_prog_query(int target_fd, enum bpf_attach_type type, __u32 query_flags,
|
||||
int bpf_raw_tracepoint_open(const char *name, int prog_fd);
|
||||
int bpf_load_btf(void *btf, __u32 btf_size, char *log_buf, __u32 log_buf_size,
|
||||
bool do_log);
|
||||
int bpf_task_fd_query(int pid, int fd, __u32 flags, char *buf, __u32 *buf_len,
|
||||
__u32 *prog_id, __u32 *fd_type, __u64 *probe_offset,
|
||||
__u64 *probe_addr);
|
||||
#endif
|
||||
|
@ -1542,6 +1542,162 @@ static void test_get_stack_raw_tp(void)
|
||||
bpf_object__close(obj);
|
||||
}
|
||||
|
||||
static void test_task_fd_query_rawtp(void)
|
||||
{
|
||||
const char *file = "./test_get_stack_rawtp.o";
|
||||
__u64 probe_offset, probe_addr;
|
||||
__u32 len, prog_id, fd_type;
|
||||
struct bpf_object *obj;
|
||||
int efd, err, prog_fd;
|
||||
__u32 duration = 0;
|
||||
char buf[256];
|
||||
|
||||
err = bpf_prog_load(file, BPF_PROG_TYPE_RAW_TRACEPOINT, &obj, &prog_fd);
|
||||
if (CHECK(err, "prog_load raw tp", "err %d errno %d\n", err, errno))
|
||||
return;
|
||||
|
||||
efd = bpf_raw_tracepoint_open("sys_enter", prog_fd);
|
||||
if (CHECK(efd < 0, "raw_tp_open", "err %d errno %d\n", efd, errno))
|
||||
goto close_prog;
|
||||
|
||||
/* query (getpid(), efd) */
|
||||
len = sizeof(buf);
|
||||
err = bpf_task_fd_query(getpid(), efd, 0, buf, &len, &prog_id,
|
||||
&fd_type, &probe_offset, &probe_addr);
|
||||
if (CHECK(err < 0, "bpf_task_fd_query", "err %d errno %d\n", err,
|
||||
errno))
|
||||
goto close_prog;
|
||||
|
||||
err = fd_type == BPF_FD_TYPE_RAW_TRACEPOINT &&
|
||||
strcmp(buf, "sys_enter") == 0;
|
||||
if (CHECK(!err, "check_results", "fd_type %d tp_name %s\n",
|
||||
fd_type, buf))
|
||||
goto close_prog;
|
||||
|
||||
/* test zero len */
|
||||
len = 0;
|
||||
err = bpf_task_fd_query(getpid(), efd, 0, buf, &len, &prog_id,
|
||||
&fd_type, &probe_offset, &probe_addr);
|
||||
if (CHECK(err < 0, "bpf_task_fd_query (len = 0)", "err %d errno %d\n",
|
||||
err, errno))
|
||||
goto close_prog;
|
||||
err = fd_type == BPF_FD_TYPE_RAW_TRACEPOINT &&
|
||||
len == strlen("sys_enter");
|
||||
if (CHECK(!err, "check_results", "fd_type %d len %u\n", fd_type, len))
|
||||
goto close_prog;
|
||||
|
||||
/* test empty buffer */
|
||||
len = sizeof(buf);
|
||||
err = bpf_task_fd_query(getpid(), efd, 0, 0, &len, &prog_id,
|
||||
&fd_type, &probe_offset, &probe_addr);
|
||||
if (CHECK(err < 0, "bpf_task_fd_query (buf = 0)", "err %d errno %d\n",
|
||||
err, errno))
|
||||
goto close_prog;
|
||||
err = fd_type == BPF_FD_TYPE_RAW_TRACEPOINT &&
|
||||
len == strlen("sys_enter");
|
||||
if (CHECK(!err, "check_results", "fd_type %d len %u\n", fd_type, len))
|
||||
goto close_prog;
|
||||
|
||||
/* test smaller buffer */
|
||||
len = 3;
|
||||
err = bpf_task_fd_query(getpid(), efd, 0, buf, &len, &prog_id,
|
||||
&fd_type, &probe_offset, &probe_addr);
|
||||
if (CHECK(err >= 0 || errno != ENOSPC, "bpf_task_fd_query (len = 3)",
|
||||
"err %d errno %d\n", err, errno))
|
||||
goto close_prog;
|
||||
err = fd_type == BPF_FD_TYPE_RAW_TRACEPOINT &&
|
||||
len == strlen("sys_enter") &&
|
||||
strcmp(buf, "sy") == 0;
|
||||
if (CHECK(!err, "check_results", "fd_type %d len %u\n", fd_type, len))
|
||||
goto close_prog;
|
||||
|
||||
goto close_prog_noerr;
|
||||
close_prog:
|
||||
error_cnt++;
|
||||
close_prog_noerr:
|
||||
bpf_object__close(obj);
|
||||
}
|
||||
|
||||
static void test_task_fd_query_tp_core(const char *probe_name,
|
||||
const char *tp_name)
|
||||
{
|
||||
const char *file = "./test_tracepoint.o";
|
||||
int err, bytes, efd, prog_fd, pmu_fd;
|
||||
struct perf_event_attr attr = {};
|
||||
__u64 probe_offset, probe_addr;
|
||||
__u32 len, prog_id, fd_type;
|
||||
struct bpf_object *obj;
|
||||
__u32 duration = 0;
|
||||
char buf[256];
|
||||
|
||||
err = bpf_prog_load(file, BPF_PROG_TYPE_TRACEPOINT, &obj, &prog_fd);
|
||||
if (CHECK(err, "bpf_prog_load", "err %d errno %d\n", err, errno))
|
||||
goto close_prog;
|
||||
|
||||
snprintf(buf, sizeof(buf),
|
||||
"/sys/kernel/debug/tracing/events/%s/id", probe_name);
|
||||
efd = open(buf, O_RDONLY, 0);
|
||||
if (CHECK(efd < 0, "open", "err %d errno %d\n", efd, errno))
|
||||
goto close_prog;
|
||||
bytes = read(efd, buf, sizeof(buf));
|
||||
close(efd);
|
||||
if (CHECK(bytes <= 0 || bytes >= sizeof(buf), "read",
|
||||
"bytes %d errno %d\n", bytes, errno))
|
||||
goto close_prog;
|
||||
|
||||
attr.config = strtol(buf, NULL, 0);
|
||||
attr.type = PERF_TYPE_TRACEPOINT;
|
||||
attr.sample_type = PERF_SAMPLE_RAW;
|
||||
attr.sample_period = 1;
|
||||
attr.wakeup_events = 1;
|
||||
pmu_fd = syscall(__NR_perf_event_open, &attr, -1 /* pid */,
|
||||
0 /* cpu 0 */, -1 /* group id */,
|
||||
0 /* flags */);
|
||||
if (CHECK(err, "perf_event_open", "err %d errno %d\n", err, errno))
|
||||
goto close_pmu;
|
||||
|
||||
err = ioctl(pmu_fd, PERF_EVENT_IOC_ENABLE, 0);
|
||||
if (CHECK(err, "perf_event_ioc_enable", "err %d errno %d\n", err,
|
||||
errno))
|
||||
goto close_pmu;
|
||||
|
||||
err = ioctl(pmu_fd, PERF_EVENT_IOC_SET_BPF, prog_fd);
|
||||
if (CHECK(err, "perf_event_ioc_set_bpf", "err %d errno %d\n", err,
|
||||
errno))
|
||||
goto close_pmu;
|
||||
|
||||
/* query (getpid(), pmu_fd) */
|
||||
len = sizeof(buf);
|
||||
err = bpf_task_fd_query(getpid(), pmu_fd, 0, buf, &len, &prog_id,
|
||||
&fd_type, &probe_offset, &probe_addr);
|
||||
if (CHECK(err < 0, "bpf_task_fd_query", "err %d errno %d\n", err,
|
||||
errno))
|
||||
goto close_pmu;
|
||||
|
||||
err = (fd_type == BPF_FD_TYPE_TRACEPOINT) && !strcmp(buf, tp_name);
|
||||
if (CHECK(!err, "check_results", "fd_type %d tp_name %s\n",
|
||||
fd_type, buf))
|
||||
goto close_pmu;
|
||||
|
||||
close(pmu_fd);
|
||||
goto close_prog_noerr;
|
||||
|
||||
close_pmu:
|
||||
close(pmu_fd);
|
||||
close_prog:
|
||||
error_cnt++;
|
||||
close_prog_noerr:
|
||||
bpf_object__close(obj);
|
||||
}
|
||||
|
||||
static void test_task_fd_query_tp(void)
|
||||
{
|
||||
test_task_fd_query_tp_core("sched/sched_switch",
|
||||
"sched_switch");
|
||||
test_task_fd_query_tp_core("syscalls/sys_enter_read",
|
||||
"sys_enter_read");
|
||||
}
|
||||
|
||||
int main(void)
|
||||
{
|
||||
jit_enabled = is_jit_enabled();
|
||||
@ -1561,6 +1717,8 @@ int main(void)
|
||||
test_stacktrace_build_id_nmi();
|
||||
test_stacktrace_map_raw_tp();
|
||||
test_get_stack_raw_tp();
|
||||
test_task_fd_query_rawtp();
|
||||
test_task_fd_query_tp();
|
||||
|
||||
printf("Summary: %d PASSED, %d FAILED\n", pass_cnt, error_cnt);
|
||||
return error_cnt ? EXIT_FAILURE : EXIT_SUCCESS;
|
||||
|
@ -72,6 +72,18 @@ struct ksym *ksym_search(long key)
|
||||
return &syms[0];
|
||||
}
|
||||
|
||||
long ksym_get_addr(const char *name)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < sym_cnt; i++) {
|
||||
if (strcmp(syms[i].name, name) == 0)
|
||||
return syms[i].addr;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int page_size;
|
||||
static int page_cnt = 8;
|
||||
static struct perf_event_mmap_page *header;
|
||||
|
@ -11,6 +11,7 @@ struct ksym {
|
||||
|
||||
int load_kallsyms(void);
|
||||
struct ksym *ksym_search(long key);
|
||||
long ksym_get_addr(const char *name);
|
||||
|
||||
typedef enum bpf_perf_event_ret (*perf_event_print_fn)(void *data, int size);
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user