mirror of
https://github.com/AuxXxilium/linux_dsm_epyc7002.git
synced 2024-12-28 11:18:45 +07:00
Merge branch 'bpf_get_stack'
Yonghong Song says: ==================== Currently, stackmap and bpf_get_stackid helper are provided for bpf program to get the stack trace. This approach has a limitation though. If two stack traces have the same hash, only one will get stored in the stackmap table regardless of whether BPF_F_REUSE_STACKID is specified or not, so some stack traces may be missing from user perspective. This patch implements a new helper, bpf_get_stack, will send stack traces directly to bpf program. The bpf program is able to see all stack traces, and then can do in-kernel processing or send stack traces to user space through shared map or bpf_perf_event_output. Patches #1 and #2 implemented the core kernel support. Patch #3 removes two never-hit branches in verifier. Patches #4 and #5 are two verifier improves to make bpf programming easier. Patch #6 synced the new helper to tools headers. Patch #7 moved perf_event polling code and ksym lookup code from samples/bpf to tools/testing/selftests/bpf. Patch #8 added a verifier test in tools/bpf for new verifier change. Patches #9 and #10 added tests for raw tracepoint prog and tracepoint prog respectively. Changelogs: v8 -> v9: . make function perf_event_mmap (in trace_helpers.c) extern to decouple perf_event_mmap and perf_event_poller. . add jit enabled handling for kernel stack verification in Patch #9. Since we did not have a good way to verify jit enabled kernel stack, just return true if the kernel stack is not empty. . In path #9, using raw_syscalls/sys_enter instead of sched/sched_switch, removed calling cmd "task 1 dd if=/dev/zero of=/dev/null" which is left with dangling process after the program exited. v7 -> v8: . rebase on top of latest bpf-next . simplify BPF_ARSH dst_reg->smin_val/smax_value tracking . rewrite the description of bpf_get_stack() in uapi bpf.h based on new format. v6 -> v7: . do perf callchain buffer allocation inside the verifier. so if the prog->has_callchain_buf is set, it is guaranteed that the buffer has been allocated. . change condition "trace_nr <= skip" to "trace_nr < skip" so that for zero size buffer, return 0 instead of -EFAULT v5 -> v6: . after refining return register smax_value and umax_value for helpers bpf_get_stack and bpf_probe_read_str, bounds and var_off of the return register are further refined. . added missing commit message for tools header sync commit. . removed one unnecessary empty line. v4 -> v5: . relied on dst_reg->var_off to refine umin_val/umax_val in verifier handling BPF_ARSH value range tracking, suggested by Edward. v3 -> v4: . fixed a bug when meta ptr is set to NULL in check_func_arg. . introduced tnum_arshift and added detailed comments for the underlying implementation . avoided using VLA in tools/bpf test_progs. v2 -> v3: . used meta to track helper memory size argument . implemented range checking for ARSH in verifier . moved perf event polling and ksym related functions from samples/bpf to tools/bpf . added test to compare build id's between bpf_get_stackid and bpf_get_stack v1 -> v2: . fixed compilation error when CONFIG_PERF_EVENTS is not enabled ==================== Signed-off-by: Alexei Starovoitov <ast@kernel.org>
This commit is contained in:
commit
f60ad0a0c4
@ -692,6 +692,7 @@ extern const struct bpf_func_proto bpf_get_current_comm_proto;
|
||||
extern const struct bpf_func_proto bpf_skb_vlan_push_proto;
|
||||
extern const struct bpf_func_proto bpf_skb_vlan_pop_proto;
|
||||
extern const struct bpf_func_proto bpf_get_stackid_proto;
|
||||
extern const struct bpf_func_proto bpf_get_stack_proto;
|
||||
extern const struct bpf_func_proto bpf_sock_map_update_proto;
|
||||
|
||||
/* Shared helpers among cBPF and eBPF. */
|
||||
|
@ -468,7 +468,8 @@ struct bpf_prog {
|
||||
dst_needed:1, /* Do we need dst entry? */
|
||||
blinded:1, /* Was blinded */
|
||||
is_func:1, /* program is a bpf function */
|
||||
kprobe_override:1; /* Do we override a kprobe? */
|
||||
kprobe_override:1, /* Do we override a kprobe? */
|
||||
has_callchain_buf:1; /* callchain buffer allocated? */
|
||||
enum bpf_prog_type type; /* Type of BPF program */
|
||||
enum bpf_attach_type expected_attach_type; /* For some prog types */
|
||||
u32 len; /* Number of filter blocks */
|
||||
|
@ -23,8 +23,10 @@ struct tnum tnum_range(u64 min, u64 max);
|
||||
/* Arithmetic and logical ops */
|
||||
/* Shift a tnum left (by a fixed shift) */
|
||||
struct tnum tnum_lshift(struct tnum a, u8 shift);
|
||||
/* Shift a tnum right (by a fixed shift) */
|
||||
/* Shift (rsh) a tnum right (by a fixed shift) */
|
||||
struct tnum tnum_rshift(struct tnum a, u8 shift);
|
||||
/* Shift (arsh) a tnum right (by a fixed min_shift) */
|
||||
struct tnum tnum_arshift(struct tnum a, u8 min_shift);
|
||||
/* Add two tnums, return @a + @b */
|
||||
struct tnum tnum_add(struct tnum a, struct tnum b);
|
||||
/* Subtract two tnums, return @a - @b */
|
||||
|
@ -1767,6 +1767,40 @@ union bpf_attr {
|
||||
* **CONFIG_XFRM** configuration option.
|
||||
* Return
|
||||
* 0 on success, or a negative error in case of failure.
|
||||
*
|
||||
* int bpf_get_stack(struct pt_regs *regs, void *buf, u32 size, u64 flags)
|
||||
* Description
|
||||
* Return a user or a kernel stack in bpf program provided buffer.
|
||||
* To achieve this, the helper needs *ctx*, which is a pointer
|
||||
* to the context on which the tracing program is executed.
|
||||
* To store the stacktrace, the bpf program provides *buf* with
|
||||
* a nonnegative *size*.
|
||||
*
|
||||
* The last argument, *flags*, holds the number of stack frames to
|
||||
* skip (from 0 to 255), masked with
|
||||
* **BPF_F_SKIP_FIELD_MASK**. The next bits can be used to set
|
||||
* the following flags:
|
||||
*
|
||||
* **BPF_F_USER_STACK**
|
||||
* Collect a user space stack instead of a kernel stack.
|
||||
* **BPF_F_USER_BUILD_ID**
|
||||
* Collect buildid+offset instead of ips for user stack,
|
||||
* only valid if **BPF_F_USER_STACK** is also specified.
|
||||
*
|
||||
* **bpf_get_stack**\ () can collect up to
|
||||
* **PERF_MAX_STACK_DEPTH** both kernel and user frames, subject
|
||||
* to sufficient large buffer size. Note that
|
||||
* this limit can be controlled with the **sysctl** program, and
|
||||
* that it should be manually increased in order to profile long
|
||||
* user stacks (such as stacks for Java programs). To do so, use:
|
||||
*
|
||||
* ::
|
||||
*
|
||||
* # sysctl kernel.perf_event_max_stack=<new value>
|
||||
*
|
||||
* Return
|
||||
* a non-negative value equal to or less than size on success, or
|
||||
* a negative error in case of failure.
|
||||
*/
|
||||
#define __BPF_FUNC_MAPPER(FN) \
|
||||
FN(unspec), \
|
||||
@ -1835,7 +1869,8 @@ union bpf_attr {
|
||||
FN(msg_pull_data), \
|
||||
FN(bind), \
|
||||
FN(xdp_adjust_tail), \
|
||||
FN(skb_get_xfrm_state),
|
||||
FN(skb_get_xfrm_state), \
|
||||
FN(get_stack),
|
||||
|
||||
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
|
||||
* function eBPF program intends to call
|
||||
@ -1869,11 +1904,14 @@ enum bpf_func_id {
|
||||
/* BPF_FUNC_skb_set_tunnel_key and BPF_FUNC_skb_get_tunnel_key flags. */
|
||||
#define BPF_F_TUNINFO_IPV6 (1ULL << 0)
|
||||
|
||||
/* BPF_FUNC_get_stackid flags. */
|
||||
/* flags for both BPF_FUNC_get_stackid and BPF_FUNC_get_stack. */
|
||||
#define BPF_F_SKIP_FIELD_MASK 0xffULL
|
||||
#define BPF_F_USER_STACK (1ULL << 8)
|
||||
/* flags used by BPF_FUNC_get_stackid only. */
|
||||
#define BPF_F_FAST_STACK_CMP (1ULL << 9)
|
||||
#define BPF_F_REUSE_STACKID (1ULL << 10)
|
||||
/* flags used by BPF_FUNC_get_stack only. */
|
||||
#define BPF_F_USER_BUILD_ID (1ULL << 11)
|
||||
|
||||
/* BPF_FUNC_skb_set_tunnel_key flags. */
|
||||
#define BPF_F_ZERO_CSUM_TX (1ULL << 1)
|
||||
|
@ -31,6 +31,7 @@
|
||||
#include <linux/rbtree_latch.h>
|
||||
#include <linux/kallsyms.h>
|
||||
#include <linux/rcupdate.h>
|
||||
#include <linux/perf_event.h>
|
||||
|
||||
#include <asm/unaligned.h>
|
||||
|
||||
@ -1722,6 +1723,10 @@ static void bpf_prog_free_deferred(struct work_struct *work)
|
||||
aux = container_of(work, struct bpf_prog_aux, work);
|
||||
if (bpf_prog_is_dev_bound(aux))
|
||||
bpf_prog_offload_destroy(aux->prog);
|
||||
#ifdef CONFIG_PERF_EVENTS
|
||||
if (aux->prog->has_callchain_buf)
|
||||
put_callchain_buffers();
|
||||
#endif
|
||||
for (i = 0; i < aux->func_cnt; i++)
|
||||
bpf_jit_free(aux->func[i]);
|
||||
if (aux->func_cnt) {
|
||||
|
@ -262,16 +262,11 @@ static int stack_map_get_build_id(struct vm_area_struct *vma,
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void stack_map_get_build_id_offset(struct bpf_map *map,
|
||||
struct stack_map_bucket *bucket,
|
||||
static void stack_map_get_build_id_offset(struct bpf_stack_build_id *id_offs,
|
||||
u64 *ips, u32 trace_nr, bool user)
|
||||
{
|
||||
int i;
|
||||
struct vm_area_struct *vma;
|
||||
struct bpf_stack_build_id *id_offs;
|
||||
|
||||
bucket->nr = trace_nr;
|
||||
id_offs = (struct bpf_stack_build_id *)bucket->data;
|
||||
|
||||
/*
|
||||
* We cannot do up_read() in nmi context, so build_id lookup is
|
||||
@ -361,8 +356,10 @@ BPF_CALL_3(bpf_get_stackid, struct pt_regs *, regs, struct bpf_map *, map,
|
||||
pcpu_freelist_pop(&smap->freelist);
|
||||
if (unlikely(!new_bucket))
|
||||
return -ENOMEM;
|
||||
stack_map_get_build_id_offset(map, new_bucket, ips,
|
||||
trace_nr, user);
|
||||
new_bucket->nr = trace_nr;
|
||||
stack_map_get_build_id_offset(
|
||||
(struct bpf_stack_build_id *)new_bucket->data,
|
||||
ips, trace_nr, user);
|
||||
trace_len = trace_nr * sizeof(struct bpf_stack_build_id);
|
||||
if (hash_matches && bucket->nr == trace_nr &&
|
||||
memcmp(bucket->data, new_bucket->data, trace_len) == 0) {
|
||||
@ -405,6 +402,73 @@ const struct bpf_func_proto bpf_get_stackid_proto = {
|
||||
.arg3_type = ARG_ANYTHING,
|
||||
};
|
||||
|
||||
BPF_CALL_4(bpf_get_stack, struct pt_regs *, regs, void *, buf, u32, size,
|
||||
u64, flags)
|
||||
{
|
||||
u32 init_nr, trace_nr, copy_len, elem_size, num_elem;
|
||||
bool user_build_id = flags & BPF_F_USER_BUILD_ID;
|
||||
u32 skip = flags & BPF_F_SKIP_FIELD_MASK;
|
||||
bool user = flags & BPF_F_USER_STACK;
|
||||
struct perf_callchain_entry *trace;
|
||||
bool kernel = !user;
|
||||
int err = -EINVAL;
|
||||
u64 *ips;
|
||||
|
||||
if (unlikely(flags & ~(BPF_F_SKIP_FIELD_MASK | BPF_F_USER_STACK |
|
||||
BPF_F_USER_BUILD_ID)))
|
||||
goto clear;
|
||||
if (kernel && user_build_id)
|
||||
goto clear;
|
||||
|
||||
elem_size = (user && user_build_id) ? sizeof(struct bpf_stack_build_id)
|
||||
: sizeof(u64);
|
||||
if (unlikely(size % elem_size))
|
||||
goto clear;
|
||||
|
||||
num_elem = size / elem_size;
|
||||
if (sysctl_perf_event_max_stack < num_elem)
|
||||
init_nr = 0;
|
||||
else
|
||||
init_nr = sysctl_perf_event_max_stack - num_elem;
|
||||
trace = get_perf_callchain(regs, init_nr, kernel, user,
|
||||
sysctl_perf_event_max_stack, false, false);
|
||||
if (unlikely(!trace))
|
||||
goto err_fault;
|
||||
|
||||
trace_nr = trace->nr - init_nr;
|
||||
if (trace_nr < skip)
|
||||
goto err_fault;
|
||||
|
||||
trace_nr -= skip;
|
||||
trace_nr = (trace_nr <= num_elem) ? trace_nr : num_elem;
|
||||
copy_len = trace_nr * elem_size;
|
||||
ips = trace->ip + skip + init_nr;
|
||||
if (user && user_build_id)
|
||||
stack_map_get_build_id_offset(buf, ips, trace_nr, user);
|
||||
else
|
||||
memcpy(buf, ips, copy_len);
|
||||
|
||||
if (size > copy_len)
|
||||
memset(buf + copy_len, 0, size - copy_len);
|
||||
return copy_len;
|
||||
|
||||
err_fault:
|
||||
err = -EFAULT;
|
||||
clear:
|
||||
memset(buf, 0, size);
|
||||
return err;
|
||||
}
|
||||
|
||||
const struct bpf_func_proto bpf_get_stack_proto = {
|
||||
.func = bpf_get_stack,
|
||||
.gpl_only = true,
|
||||
.ret_type = RET_INTEGER,
|
||||
.arg1_type = ARG_PTR_TO_CTX,
|
||||
.arg2_type = ARG_PTR_TO_UNINIT_MEM,
|
||||
.arg3_type = ARG_CONST_SIZE_OR_ZERO,
|
||||
.arg4_type = ARG_ANYTHING,
|
||||
};
|
||||
|
||||
/* Called from eBPF program */
|
||||
static void *stack_map_lookup_elem(struct bpf_map *map, void *key)
|
||||
{
|
||||
|
@ -43,6 +43,16 @@ struct tnum tnum_rshift(struct tnum a, u8 shift)
|
||||
return TNUM(a.value >> shift, a.mask >> shift);
|
||||
}
|
||||
|
||||
struct tnum tnum_arshift(struct tnum a, u8 min_shift)
|
||||
{
|
||||
/* if a.value is negative, arithmetic shifting by minimum shift
|
||||
* will have larger negative offset compared to more shifting.
|
||||
* If a.value is nonnegative, arithmetic shifting by minimum shift
|
||||
* will have larger positive offset compare to more shifting.
|
||||
*/
|
||||
return TNUM((s64)a.value >> min_shift, (s64)a.mask >> min_shift);
|
||||
}
|
||||
|
||||
struct tnum tnum_add(struct tnum a, struct tnum b)
|
||||
{
|
||||
u64 sm, sv, sigma, chi, mu;
|
||||
|
@ -22,6 +22,7 @@
|
||||
#include <linux/stringify.h>
|
||||
#include <linux/bsearch.h>
|
||||
#include <linux/sort.h>
|
||||
#include <linux/perf_event.h>
|
||||
|
||||
#include "disasm.h"
|
||||
|
||||
@ -164,6 +165,8 @@ struct bpf_call_arg_meta {
|
||||
bool pkt_access;
|
||||
int regno;
|
||||
int access_size;
|
||||
s64 msize_smax_value;
|
||||
u64 msize_umax_value;
|
||||
};
|
||||
|
||||
static DEFINE_MUTEX(bpf_verifier_lock);
|
||||
@ -1984,6 +1987,12 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 regno,
|
||||
} else if (arg_type_is_mem_size(arg_type)) {
|
||||
bool zero_size_allowed = (arg_type == ARG_CONST_SIZE_OR_ZERO);
|
||||
|
||||
/* remember the mem_size which may be used later
|
||||
* to refine return values.
|
||||
*/
|
||||
meta->msize_smax_value = reg->smax_value;
|
||||
meta->msize_umax_value = reg->umax_value;
|
||||
|
||||
/* The register is SCALAR_VALUE; the access check
|
||||
* happens using its boundaries.
|
||||
*/
|
||||
@ -2323,6 +2332,23 @@ static int prepare_func_exit(struct bpf_verifier_env *env, int *insn_idx)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void do_refine_retval_range(struct bpf_reg_state *regs, int ret_type,
|
||||
int func_id,
|
||||
struct bpf_call_arg_meta *meta)
|
||||
{
|
||||
struct bpf_reg_state *ret_reg = ®s[BPF_REG_0];
|
||||
|
||||
if (ret_type != RET_INTEGER ||
|
||||
(func_id != BPF_FUNC_get_stack &&
|
||||
func_id != BPF_FUNC_probe_read_str))
|
||||
return;
|
||||
|
||||
ret_reg->smax_value = meta->msize_smax_value;
|
||||
ret_reg->umax_value = meta->msize_umax_value;
|
||||
__reg_deduce_bounds(ret_reg);
|
||||
__reg_bound_offset(ret_reg);
|
||||
}
|
||||
|
||||
static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn_idx)
|
||||
{
|
||||
const struct bpf_func_proto *fn = NULL;
|
||||
@ -2446,10 +2472,30 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
do_refine_retval_range(regs, fn->ret_type, func_id, &meta);
|
||||
|
||||
err = check_map_func_compatibility(env, meta.map_ptr, func_id);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
if (func_id == BPF_FUNC_get_stack && !env->prog->has_callchain_buf) {
|
||||
const char *err_str;
|
||||
|
||||
#ifdef CONFIG_PERF_EVENTS
|
||||
err = get_callchain_buffers(sysctl_perf_event_max_stack);
|
||||
err_str = "cannot get callchain buffer for func %s#%d\n";
|
||||
#else
|
||||
err = -ENOTSUPP;
|
||||
err_str = "func %s#%d not supported without CONFIG_PERF_EVENTS\n";
|
||||
#endif
|
||||
if (err) {
|
||||
verbose(env, err_str, func_id_name(func_id), func_id);
|
||||
return err;
|
||||
}
|
||||
|
||||
env->prog->has_callchain_buf = true;
|
||||
}
|
||||
|
||||
if (changes_data)
|
||||
clear_all_pkt_pointers(env);
|
||||
return 0;
|
||||
@ -2894,10 +2940,7 @@ static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env,
|
||||
dst_reg->umin_value <<= umin_val;
|
||||
dst_reg->umax_value <<= umax_val;
|
||||
}
|
||||
if (src_known)
|
||||
dst_reg->var_off = tnum_lshift(dst_reg->var_off, umin_val);
|
||||
else
|
||||
dst_reg->var_off = tnum_lshift(tnum_unknown, umin_val);
|
||||
/* We may learn something more from the var_off */
|
||||
__update_reg_bounds(dst_reg);
|
||||
break;
|
||||
@ -2925,16 +2968,35 @@ static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env,
|
||||
*/
|
||||
dst_reg->smin_value = S64_MIN;
|
||||
dst_reg->smax_value = S64_MAX;
|
||||
if (src_known)
|
||||
dst_reg->var_off = tnum_rshift(dst_reg->var_off,
|
||||
umin_val);
|
||||
else
|
||||
dst_reg->var_off = tnum_rshift(tnum_unknown, umin_val);
|
||||
dst_reg->var_off = tnum_rshift(dst_reg->var_off, umin_val);
|
||||
dst_reg->umin_value >>= umax_val;
|
||||
dst_reg->umax_value >>= umin_val;
|
||||
/* We may learn something more from the var_off */
|
||||
__update_reg_bounds(dst_reg);
|
||||
break;
|
||||
case BPF_ARSH:
|
||||
if (umax_val >= insn_bitness) {
|
||||
/* Shifts greater than 31 or 63 are undefined.
|
||||
* This includes shifts by a negative number.
|
||||
*/
|
||||
mark_reg_unknown(env, regs, insn->dst_reg);
|
||||
break;
|
||||
}
|
||||
|
||||
/* Upon reaching here, src_known is true and
|
||||
* umax_val is equal to umin_val.
|
||||
*/
|
||||
dst_reg->smin_value >>= umin_val;
|
||||
dst_reg->smax_value >>= umin_val;
|
||||
dst_reg->var_off = tnum_arshift(dst_reg->var_off, umin_val);
|
||||
|
||||
/* blow away the dst_reg umin_value/umax_value and rely on
|
||||
* dst_reg var_off to refine the result.
|
||||
*/
|
||||
dst_reg->umin_value = 0;
|
||||
dst_reg->umax_value = U64_MAX;
|
||||
__update_reg_bounds(dst_reg);
|
||||
break;
|
||||
default:
|
||||
mark_reg_unknown(env, regs, insn->dst_reg);
|
||||
break;
|
||||
|
@ -20,6 +20,7 @@
|
||||
#include "trace.h"
|
||||
|
||||
u64 bpf_get_stackid(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5);
|
||||
u64 bpf_get_stack(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5);
|
||||
|
||||
/**
|
||||
* trace_call_bpf - invoke BPF program
|
||||
@ -577,6 +578,8 @@ kprobe_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
|
||||
return &bpf_perf_event_output_proto;
|
||||
case BPF_FUNC_get_stackid:
|
||||
return &bpf_get_stackid_proto;
|
||||
case BPF_FUNC_get_stack:
|
||||
return &bpf_get_stack_proto;
|
||||
case BPF_FUNC_perf_event_read_value:
|
||||
return &bpf_perf_event_read_value_proto;
|
||||
#ifdef CONFIG_BPF_KPROBE_OVERRIDE
|
||||
@ -664,6 +667,25 @@ static const struct bpf_func_proto bpf_get_stackid_proto_tp = {
|
||||
.arg3_type = ARG_ANYTHING,
|
||||
};
|
||||
|
||||
BPF_CALL_4(bpf_get_stack_tp, void *, tp_buff, void *, buf, u32, size,
|
||||
u64, flags)
|
||||
{
|
||||
struct pt_regs *regs = *(struct pt_regs **)tp_buff;
|
||||
|
||||
return bpf_get_stack((unsigned long) regs, (unsigned long) buf,
|
||||
(unsigned long) size, flags, 0);
|
||||
}
|
||||
|
||||
static const struct bpf_func_proto bpf_get_stack_proto_tp = {
|
||||
.func = bpf_get_stack_tp,
|
||||
.gpl_only = true,
|
||||
.ret_type = RET_INTEGER,
|
||||
.arg1_type = ARG_PTR_TO_CTX,
|
||||
.arg2_type = ARG_PTR_TO_UNINIT_MEM,
|
||||
.arg3_type = ARG_CONST_SIZE_OR_ZERO,
|
||||
.arg4_type = ARG_ANYTHING,
|
||||
};
|
||||
|
||||
static const struct bpf_func_proto *
|
||||
tp_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
|
||||
{
|
||||
@ -672,6 +694,8 @@ tp_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
|
||||
return &bpf_perf_event_output_proto_tp;
|
||||
case BPF_FUNC_get_stackid:
|
||||
return &bpf_get_stackid_proto_tp;
|
||||
case BPF_FUNC_get_stack:
|
||||
return &bpf_get_stack_proto_tp;
|
||||
default:
|
||||
return tracing_func_proto(func_id, prog);
|
||||
}
|
||||
@ -734,6 +758,8 @@ pe_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
|
||||
return &bpf_perf_event_output_proto_tp;
|
||||
case BPF_FUNC_get_stackid:
|
||||
return &bpf_get_stackid_proto_tp;
|
||||
case BPF_FUNC_get_stack:
|
||||
return &bpf_get_stack_proto_tp;
|
||||
case BPF_FUNC_perf_prog_read_value:
|
||||
return &bpf_perf_prog_read_value_proto;
|
||||
default:
|
||||
@ -744,7 +770,7 @@ pe_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
|
||||
/*
|
||||
* bpf_raw_tp_regs are separate from bpf_pt_regs used from skb/xdp
|
||||
* to avoid potential recursive reuse issue when/if tracepoints are added
|
||||
* inside bpf_*_event_output and/or bpf_get_stack_id
|
||||
* inside bpf_*_event_output, bpf_get_stackid and/or bpf_get_stack
|
||||
*/
|
||||
static DEFINE_PER_CPU(struct pt_regs, bpf_raw_tp_regs);
|
||||
BPF_CALL_5(bpf_perf_event_output_raw_tp, struct bpf_raw_tracepoint_args *, args,
|
||||
@ -787,6 +813,26 @@ static const struct bpf_func_proto bpf_get_stackid_proto_raw_tp = {
|
||||
.arg3_type = ARG_ANYTHING,
|
||||
};
|
||||
|
||||
BPF_CALL_4(bpf_get_stack_raw_tp, struct bpf_raw_tracepoint_args *, args,
|
||||
void *, buf, u32, size, u64, flags)
|
||||
{
|
||||
struct pt_regs *regs = this_cpu_ptr(&bpf_raw_tp_regs);
|
||||
|
||||
perf_fetch_caller_regs(regs);
|
||||
return bpf_get_stack((unsigned long) regs, (unsigned long) buf,
|
||||
(unsigned long) size, flags, 0);
|
||||
}
|
||||
|
||||
static const struct bpf_func_proto bpf_get_stack_proto_raw_tp = {
|
||||
.func = bpf_get_stack_raw_tp,
|
||||
.gpl_only = true,
|
||||
.ret_type = RET_INTEGER,
|
||||
.arg1_type = ARG_PTR_TO_CTX,
|
||||
.arg2_type = ARG_PTR_TO_MEM,
|
||||
.arg3_type = ARG_CONST_SIZE_OR_ZERO,
|
||||
.arg4_type = ARG_ANYTHING,
|
||||
};
|
||||
|
||||
static const struct bpf_func_proto *
|
||||
raw_tp_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
|
||||
{
|
||||
@ -795,6 +841,8 @@ raw_tp_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
|
||||
return &bpf_perf_event_output_proto_raw_tp;
|
||||
case BPF_FUNC_get_stackid:
|
||||
return &bpf_get_stackid_proto_raw_tp;
|
||||
case BPF_FUNC_get_stack:
|
||||
return &bpf_get_stack_proto_raw_tp;
|
||||
default:
|
||||
return tracing_func_proto(func_id, prog);
|
||||
}
|
||||
|
@ -49,6 +49,7 @@ hostprogs-y += xdp_adjust_tail
|
||||
# Libbpf dependencies
|
||||
LIBBPF := ../../tools/lib/bpf/bpf.o ../../tools/lib/bpf/nlattr.o
|
||||
CGROUP_HELPERS := ../../tools/testing/selftests/bpf/cgroup_helpers.o
|
||||
TRACE_HELPERS := ../../tools/testing/selftests/bpf/trace_helpers.o
|
||||
|
||||
test_lru_dist-objs := test_lru_dist.o $(LIBBPF)
|
||||
sock_example-objs := sock_example.o $(LIBBPF)
|
||||
@ -65,10 +66,10 @@ tracex6-objs := bpf_load.o $(LIBBPF) tracex6_user.o
|
||||
tracex7-objs := bpf_load.o $(LIBBPF) tracex7_user.o
|
||||
load_sock_ops-objs := bpf_load.o $(LIBBPF) load_sock_ops.o
|
||||
test_probe_write_user-objs := bpf_load.o $(LIBBPF) test_probe_write_user_user.o
|
||||
trace_output-objs := bpf_load.o $(LIBBPF) trace_output_user.o
|
||||
trace_output-objs := bpf_load.o $(LIBBPF) trace_output_user.o $(TRACE_HELPERS)
|
||||
lathist-objs := bpf_load.o $(LIBBPF) lathist_user.o
|
||||
offwaketime-objs := bpf_load.o $(LIBBPF) offwaketime_user.o
|
||||
spintest-objs := bpf_load.o $(LIBBPF) spintest_user.o
|
||||
offwaketime-objs := bpf_load.o $(LIBBPF) offwaketime_user.o $(TRACE_HELPERS)
|
||||
spintest-objs := bpf_load.o $(LIBBPF) spintest_user.o $(TRACE_HELPERS)
|
||||
map_perf_test-objs := bpf_load.o $(LIBBPF) map_perf_test_user.o
|
||||
test_overhead-objs := bpf_load.o $(LIBBPF) test_overhead_user.o
|
||||
test_cgrp2_array_pin-objs := $(LIBBPF) test_cgrp2_array_pin.o
|
||||
@ -82,8 +83,8 @@ xdp2-objs := bpf_load.o $(LIBBPF) xdp1_user.o
|
||||
xdp_router_ipv4-objs := bpf_load.o $(LIBBPF) xdp_router_ipv4_user.o
|
||||
test_current_task_under_cgroup-objs := bpf_load.o $(LIBBPF) $(CGROUP_HELPERS) \
|
||||
test_current_task_under_cgroup_user.o
|
||||
trace_event-objs := bpf_load.o $(LIBBPF) trace_event_user.o
|
||||
sampleip-objs := bpf_load.o $(LIBBPF) sampleip_user.o
|
||||
trace_event-objs := bpf_load.o $(LIBBPF) trace_event_user.o $(TRACE_HELPERS)
|
||||
sampleip-objs := bpf_load.o $(LIBBPF) sampleip_user.o $(TRACE_HELPERS)
|
||||
tc_l2_redirect-objs := bpf_load.o $(LIBBPF) tc_l2_redirect_user.o
|
||||
lwt_len_hist-objs := bpf_load.o $(LIBBPF) lwt_len_hist_user.o
|
||||
xdp_tx_iptunnel-objs := bpf_load.o $(LIBBPF) xdp_tx_iptunnel_user.o
|
||||
|
@ -648,66 +648,3 @@ void read_trace_pipe(void)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#define MAX_SYMS 300000
|
||||
static struct ksym syms[MAX_SYMS];
|
||||
static int sym_cnt;
|
||||
|
||||
static int ksym_cmp(const void *p1, const void *p2)
|
||||
{
|
||||
return ((struct ksym *)p1)->addr - ((struct ksym *)p2)->addr;
|
||||
}
|
||||
|
||||
int load_kallsyms(void)
|
||||
{
|
||||
FILE *f = fopen("/proc/kallsyms", "r");
|
||||
char func[256], buf[256];
|
||||
char symbol;
|
||||
void *addr;
|
||||
int i = 0;
|
||||
|
||||
if (!f)
|
||||
return -ENOENT;
|
||||
|
||||
while (!feof(f)) {
|
||||
if (!fgets(buf, sizeof(buf), f))
|
||||
break;
|
||||
if (sscanf(buf, "%p %c %s", &addr, &symbol, func) != 3)
|
||||
break;
|
||||
if (!addr)
|
||||
continue;
|
||||
syms[i].addr = (long) addr;
|
||||
syms[i].name = strdup(func);
|
||||
i++;
|
||||
}
|
||||
sym_cnt = i;
|
||||
qsort(syms, sym_cnt, sizeof(struct ksym), ksym_cmp);
|
||||
return 0;
|
||||
}
|
||||
|
||||
struct ksym *ksym_search(long key)
|
||||
{
|
||||
int start = 0, end = sym_cnt;
|
||||
int result;
|
||||
|
||||
while (start < end) {
|
||||
size_t mid = start + (end - start) / 2;
|
||||
|
||||
result = key - syms[mid].addr;
|
||||
if (result < 0)
|
||||
end = mid;
|
||||
else if (result > 0)
|
||||
start = mid + 1;
|
||||
else
|
||||
return &syms[mid];
|
||||
}
|
||||
|
||||
if (start >= 1 && syms[start - 1].addr < key &&
|
||||
key < syms[start].addr)
|
||||
/* valid ksym */
|
||||
return &syms[start - 1];
|
||||
|
||||
/* out of range. return _stext */
|
||||
return &syms[0];
|
||||
}
|
||||
|
||||
|
@ -54,12 +54,5 @@ int load_bpf_file(char *path);
|
||||
int load_bpf_file_fixup_map(const char *path, fixup_map_cb fixup_map);
|
||||
|
||||
void read_trace_pipe(void);
|
||||
struct ksym {
|
||||
long addr;
|
||||
char *name;
|
||||
};
|
||||
|
||||
int load_kallsyms(void);
|
||||
struct ksym *ksym_search(long key);
|
||||
int bpf_set_link_xdp_fd(int ifindex, int fd, __u32 flags);
|
||||
#endif
|
||||
|
@ -17,6 +17,7 @@
|
||||
#include <sys/resource.h>
|
||||
#include "libbpf.h"
|
||||
#include "bpf_load.h"
|
||||
#include "trace_helpers.h"
|
||||
|
||||
#define PRINT_RAW_ADDR 0
|
||||
|
||||
|
@ -22,6 +22,7 @@
|
||||
#include "libbpf.h"
|
||||
#include "bpf_load.h"
|
||||
#include "perf-sys.h"
|
||||
#include "trace_helpers.h"
|
||||
|
||||
#define DEFAULT_FREQ 99
|
||||
#define DEFAULT_SECS 5
|
||||
|
@ -7,6 +7,7 @@
|
||||
#include <sys/resource.h>
|
||||
#include "libbpf.h"
|
||||
#include "bpf_load.h"
|
||||
#include "trace_helpers.h"
|
||||
|
||||
int main(int ac, char **argv)
|
||||
{
|
||||
|
@ -21,6 +21,7 @@
|
||||
#include "libbpf.h"
|
||||
#include "bpf_load.h"
|
||||
#include "perf-sys.h"
|
||||
#include "trace_helpers.h"
|
||||
|
||||
#define SAMPLE_FREQ 50
|
||||
|
||||
|
@ -21,100 +21,10 @@
|
||||
#include "libbpf.h"
|
||||
#include "bpf_load.h"
|
||||
#include "perf-sys.h"
|
||||
#include "trace_helpers.h"
|
||||
|
||||
static int pmu_fd;
|
||||
|
||||
int page_size;
|
||||
int page_cnt = 8;
|
||||
volatile struct perf_event_mmap_page *header;
|
||||
|
||||
typedef void (*print_fn)(void *data, int size);
|
||||
|
||||
static int perf_event_mmap(int fd)
|
||||
{
|
||||
void *base;
|
||||
int mmap_size;
|
||||
|
||||
page_size = getpagesize();
|
||||
mmap_size = page_size * (page_cnt + 1);
|
||||
|
||||
base = mmap(NULL, mmap_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
|
||||
if (base == MAP_FAILED) {
|
||||
printf("mmap err\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
header = base;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int perf_event_poll(int fd)
|
||||
{
|
||||
struct pollfd pfd = { .fd = fd, .events = POLLIN };
|
||||
|
||||
return poll(&pfd, 1, 1000);
|
||||
}
|
||||
|
||||
struct perf_event_sample {
|
||||
struct perf_event_header header;
|
||||
__u32 size;
|
||||
char data[];
|
||||
};
|
||||
|
||||
static void perf_event_read(print_fn fn)
|
||||
{
|
||||
__u64 data_tail = header->data_tail;
|
||||
__u64 data_head = header->data_head;
|
||||
__u64 buffer_size = page_cnt * page_size;
|
||||
void *base, *begin, *end;
|
||||
char buf[256];
|
||||
|
||||
asm volatile("" ::: "memory"); /* in real code it should be smp_rmb() */
|
||||
if (data_head == data_tail)
|
||||
return;
|
||||
|
||||
base = ((char *)header) + page_size;
|
||||
|
||||
begin = base + data_tail % buffer_size;
|
||||
end = base + data_head % buffer_size;
|
||||
|
||||
while (begin != end) {
|
||||
struct perf_event_sample *e;
|
||||
|
||||
e = begin;
|
||||
if (begin + e->header.size > base + buffer_size) {
|
||||
long len = base + buffer_size - begin;
|
||||
|
||||
assert(len < e->header.size);
|
||||
memcpy(buf, begin, len);
|
||||
memcpy(buf + len, base, e->header.size - len);
|
||||
e = (void *) buf;
|
||||
begin = base + e->header.size - len;
|
||||
} else if (begin + e->header.size == base + buffer_size) {
|
||||
begin = base;
|
||||
} else {
|
||||
begin += e->header.size;
|
||||
}
|
||||
|
||||
if (e->header.type == PERF_RECORD_SAMPLE) {
|
||||
fn(e->data, e->size);
|
||||
} else if (e->header.type == PERF_RECORD_LOST) {
|
||||
struct {
|
||||
struct perf_event_header header;
|
||||
__u64 id;
|
||||
__u64 lost;
|
||||
} *lost = (void *) e;
|
||||
printf("lost %lld events\n", lost->lost);
|
||||
} else {
|
||||
printf("unknown event type=%d size=%d\n",
|
||||
e->header.type, e->header.size);
|
||||
}
|
||||
}
|
||||
|
||||
__sync_synchronize(); /* smp_mb() */
|
||||
header->data_tail = data_head;
|
||||
}
|
||||
|
||||
static __u64 time_get_ns(void)
|
||||
{
|
||||
struct timespec ts;
|
||||
@ -127,7 +37,7 @@ static __u64 start_time;
|
||||
|
||||
#define MAX_CNT 100000ll
|
||||
|
||||
static void print_bpf_output(void *data, int size)
|
||||
static int print_bpf_output(void *data, int size)
|
||||
{
|
||||
static __u64 cnt;
|
||||
struct {
|
||||
@ -138,7 +48,7 @@ static void print_bpf_output(void *data, int size)
|
||||
if (e->cookie != 0x12345678) {
|
||||
printf("BUG pid %llx cookie %llx sized %d\n",
|
||||
e->pid, e->cookie, size);
|
||||
kill(0, SIGINT);
|
||||
return PERF_EVENT_ERROR;
|
||||
}
|
||||
|
||||
cnt++;
|
||||
@ -146,8 +56,10 @@ static void print_bpf_output(void *data, int size)
|
||||
if (cnt == MAX_CNT) {
|
||||
printf("recv %lld events per sec\n",
|
||||
MAX_CNT * 1000000000ll / (time_get_ns() - start_time));
|
||||
kill(0, SIGINT);
|
||||
return PERF_EVENT_DONE;
|
||||
}
|
||||
|
||||
return PERF_EVENT_CONT;
|
||||
}
|
||||
|
||||
static void test_bpf_perf_event(void)
|
||||
@ -170,6 +82,7 @@ int main(int argc, char **argv)
|
||||
{
|
||||
char filename[256];
|
||||
FILE *f;
|
||||
int ret;
|
||||
|
||||
snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
|
||||
|
||||
@ -187,10 +100,7 @@ int main(int argc, char **argv)
|
||||
(void) f;
|
||||
|
||||
start_time = time_get_ns();
|
||||
for (;;) {
|
||||
perf_event_poll(pmu_fd);
|
||||
perf_event_read(print_bpf_output);
|
||||
}
|
||||
|
||||
return 0;
|
||||
ret = perf_event_poller(pmu_fd, print_bpf_output);
|
||||
kill(0, SIGINT);
|
||||
return ret;
|
||||
}
|
||||
|
@ -1767,6 +1767,40 @@ union bpf_attr {
|
||||
* **CONFIG_XFRM** configuration option.
|
||||
* Return
|
||||
* 0 on success, or a negative error in case of failure.
|
||||
*
|
||||
* int bpf_get_stack(struct pt_regs *regs, void *buf, u32 size, u64 flags)
|
||||
* Description
|
||||
* Return a user or a kernel stack in bpf program provided buffer.
|
||||
* To achieve this, the helper needs *ctx*, which is a pointer
|
||||
* to the context on which the tracing program is executed.
|
||||
* To store the stacktrace, the bpf program provides *buf* with
|
||||
* a nonnegative *size*.
|
||||
*
|
||||
* The last argument, *flags*, holds the number of stack frames to
|
||||
* skip (from 0 to 255), masked with
|
||||
* **BPF_F_SKIP_FIELD_MASK**. The next bits can be used to set
|
||||
* the following flags:
|
||||
*
|
||||
* **BPF_F_USER_STACK**
|
||||
* Collect a user space stack instead of a kernel stack.
|
||||
* **BPF_F_USER_BUILD_ID**
|
||||
* Collect buildid+offset instead of ips for user stack,
|
||||
* only valid if **BPF_F_USER_STACK** is also specified.
|
||||
*
|
||||
* **bpf_get_stack**\ () can collect up to
|
||||
* **PERF_MAX_STACK_DEPTH** both kernel and user frames, subject
|
||||
* to sufficient large buffer size. Note that
|
||||
* this limit can be controlled with the **sysctl** program, and
|
||||
* that it should be manually increased in order to profile long
|
||||
* user stacks (such as stacks for Java programs). To do so, use:
|
||||
*
|
||||
* ::
|
||||
*
|
||||
* # sysctl kernel.perf_event_max_stack=<new value>
|
||||
*
|
||||
* Return
|
||||
* a non-negative value equal to or less than size on success, or
|
||||
* a negative error in case of failure.
|
||||
*/
|
||||
#define __BPF_FUNC_MAPPER(FN) \
|
||||
FN(unspec), \
|
||||
@ -1835,7 +1869,8 @@ union bpf_attr {
|
||||
FN(msg_pull_data), \
|
||||
FN(bind), \
|
||||
FN(xdp_adjust_tail), \
|
||||
FN(skb_get_xfrm_state),
|
||||
FN(skb_get_xfrm_state), \
|
||||
FN(get_stack),
|
||||
|
||||
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
|
||||
* function eBPF program intends to call
|
||||
@ -1869,11 +1904,14 @@ enum bpf_func_id {
|
||||
/* BPF_FUNC_skb_set_tunnel_key and BPF_FUNC_skb_get_tunnel_key flags. */
|
||||
#define BPF_F_TUNINFO_IPV6 (1ULL << 0)
|
||||
|
||||
/* BPF_FUNC_get_stackid flags. */
|
||||
/* flags for both BPF_FUNC_get_stackid and BPF_FUNC_get_stack. */
|
||||
#define BPF_F_SKIP_FIELD_MASK 0xffULL
|
||||
#define BPF_F_USER_STACK (1ULL << 8)
|
||||
/* flags used by BPF_FUNC_get_stackid only. */
|
||||
#define BPF_F_FAST_STACK_CMP (1ULL << 9)
|
||||
#define BPF_F_REUSE_STACKID (1ULL << 10)
|
||||
/* flags used by BPF_FUNC_get_stack only. */
|
||||
#define BPF_F_USER_BUILD_ID (1ULL << 11)
|
||||
|
||||
/* BPF_FUNC_skb_set_tunnel_key flags. */
|
||||
#define BPF_F_ZERO_CSUM_TX (1ULL << 1)
|
||||
|
@ -32,7 +32,8 @@ TEST_GEN_FILES = test_pkt_access.o test_xdp.o test_l4lb.o test_tcp_estats.o test
|
||||
test_l4lb_noinline.o test_xdp_noinline.o test_stacktrace_map.o \
|
||||
sample_map_ret0.o test_tcpbpf_kern.o test_stacktrace_build_id.o \
|
||||
sockmap_tcp_msg_prog.o connect4_prog.o connect6_prog.o test_adjust_tail.o \
|
||||
test_btf_haskv.o test_btf_nokv.o test_sockmap_kern.o test_tunnel_kern.o
|
||||
test_btf_haskv.o test_btf_nokv.o test_sockmap_kern.o test_tunnel_kern.o \
|
||||
test_get_stack_rawtp.o
|
||||
|
||||
# Order correspond to 'make run_tests' order
|
||||
TEST_PROGS := test_kmod.sh \
|
||||
@ -58,6 +59,7 @@ $(OUTPUT)/test_dev_cgroup: cgroup_helpers.c
|
||||
$(OUTPUT)/test_sock: cgroup_helpers.c
|
||||
$(OUTPUT)/test_sock_addr: cgroup_helpers.c
|
||||
$(OUTPUT)/test_sockmap: cgroup_helpers.c
|
||||
$(OUTPUT)/test_progs: trace_helpers.c
|
||||
|
||||
.PHONY: force
|
||||
|
||||
|
@ -101,6 +101,8 @@ static int (*bpf_xdp_adjust_tail)(void *ctx, int offset) =
|
||||
static int (*bpf_skb_get_xfrm_state)(void *ctx, int index, void *state,
|
||||
int size, int flags) =
|
||||
(void *) BPF_FUNC_skb_get_xfrm_state;
|
||||
static int (*bpf_get_stack)(void *ctx, void *buf, int size, int flags) =
|
||||
(void *) BPF_FUNC_get_stack;
|
||||
|
||||
/* llvm builtin functions that eBPF C program may use to
|
||||
* emit BPF_LD_ABS and BPF_LD_IND instructions
|
||||
|
102
tools/testing/selftests/bpf/test_get_stack_rawtp.c
Normal file
102
tools/testing/selftests/bpf/test_get_stack_rawtp.c
Normal file
@ -0,0 +1,102 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
#include <linux/bpf.h>
|
||||
#include "bpf_helpers.h"
|
||||
|
||||
/* Permit pretty deep stack traces */
|
||||
#define MAX_STACK_RAWTP 100
|
||||
struct stack_trace_t {
|
||||
int pid;
|
||||
int kern_stack_size;
|
||||
int user_stack_size;
|
||||
int user_stack_buildid_size;
|
||||
__u64 kern_stack[MAX_STACK_RAWTP];
|
||||
__u64 user_stack[MAX_STACK_RAWTP];
|
||||
struct bpf_stack_build_id user_stack_buildid[MAX_STACK_RAWTP];
|
||||
};
|
||||
|
||||
struct bpf_map_def SEC("maps") perfmap = {
|
||||
.type = BPF_MAP_TYPE_PERF_EVENT_ARRAY,
|
||||
.key_size = sizeof(int),
|
||||
.value_size = sizeof(__u32),
|
||||
.max_entries = 2,
|
||||
};
|
||||
|
||||
struct bpf_map_def SEC("maps") stackdata_map = {
|
||||
.type = BPF_MAP_TYPE_PERCPU_ARRAY,
|
||||
.key_size = sizeof(__u32),
|
||||
.value_size = sizeof(struct stack_trace_t),
|
||||
.max_entries = 1,
|
||||
};
|
||||
|
||||
/* Allocate per-cpu space twice the needed. For the code below
|
||||
* usize = bpf_get_stack(ctx, raw_data, max_len, BPF_F_USER_STACK);
|
||||
* if (usize < 0)
|
||||
* return 0;
|
||||
* ksize = bpf_get_stack(ctx, raw_data + usize, max_len - usize, 0);
|
||||
*
|
||||
* If we have value_size = MAX_STACK_RAWTP * sizeof(__u64),
|
||||
* verifier will complain that access "raw_data + usize"
|
||||
* with size "max_len - usize" may be out of bound.
|
||||
* The maximum "raw_data + usize" is "raw_data + max_len"
|
||||
* and the maximum "max_len - usize" is "max_len", verifier
|
||||
* concludes that the maximum buffer access range is
|
||||
* "raw_data[0...max_len * 2 - 1]" and hence reject the program.
|
||||
*
|
||||
* Doubling the to-be-used max buffer size can fix this verifier
|
||||
* issue and avoid complicated C programming massaging.
|
||||
* This is an acceptable workaround since there is one entry here.
|
||||
*/
|
||||
struct bpf_map_def SEC("maps") rawdata_map = {
|
||||
.type = BPF_MAP_TYPE_PERCPU_ARRAY,
|
||||
.key_size = sizeof(__u32),
|
||||
.value_size = MAX_STACK_RAWTP * sizeof(__u64) * 2,
|
||||
.max_entries = 1,
|
||||
};
|
||||
|
||||
SEC("tracepoint/raw_syscalls/sys_enter")
|
||||
int bpf_prog1(void *ctx)
|
||||
{
|
||||
int max_len, max_buildid_len, usize, ksize, total_size;
|
||||
struct stack_trace_t *data;
|
||||
void *raw_data;
|
||||
__u32 key = 0;
|
||||
|
||||
data = bpf_map_lookup_elem(&stackdata_map, &key);
|
||||
if (!data)
|
||||
return 0;
|
||||
|
||||
max_len = MAX_STACK_RAWTP * sizeof(__u64);
|
||||
max_buildid_len = MAX_STACK_RAWTP * sizeof(struct bpf_stack_build_id);
|
||||
data->pid = bpf_get_current_pid_tgid();
|
||||
data->kern_stack_size = bpf_get_stack(ctx, data->kern_stack,
|
||||
max_len, 0);
|
||||
data->user_stack_size = bpf_get_stack(ctx, data->user_stack, max_len,
|
||||
BPF_F_USER_STACK);
|
||||
data->user_stack_buildid_size = bpf_get_stack(
|
||||
ctx, data->user_stack_buildid, max_buildid_len,
|
||||
BPF_F_USER_STACK | BPF_F_USER_BUILD_ID);
|
||||
bpf_perf_event_output(ctx, &perfmap, 0, data, sizeof(*data));
|
||||
|
||||
/* write both kernel and user stacks to the same buffer */
|
||||
raw_data = bpf_map_lookup_elem(&rawdata_map, &key);
|
||||
if (!raw_data)
|
||||
return 0;
|
||||
|
||||
usize = bpf_get_stack(ctx, raw_data, max_len, BPF_F_USER_STACK);
|
||||
if (usize < 0)
|
||||
return 0;
|
||||
|
||||
ksize = bpf_get_stack(ctx, raw_data + usize, max_len - usize, 0);
|
||||
if (ksize < 0)
|
||||
return 0;
|
||||
|
||||
total_size = usize + ksize;
|
||||
if (total_size > 0 && total_size <= max_len)
|
||||
bpf_perf_event_output(ctx, &perfmap, 0, raw_data, total_size);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
char _license[] SEC("license") = "GPL";
|
||||
__u32 _version SEC("version") = 1; /* ignored by tracepoints, required by libbpf.a */
|
@ -38,8 +38,10 @@ typedef __u16 __sum16;
|
||||
#include "bpf_util.h"
|
||||
#include "bpf_endian.h"
|
||||
#include "bpf_rlimit.h"
|
||||
#include "trace_helpers.h"
|
||||
|
||||
static int error_cnt, pass_cnt;
|
||||
static bool jit_enabled;
|
||||
|
||||
#define MAGIC_BYTES 123
|
||||
|
||||
@ -391,13 +393,30 @@ static inline __u64 ptr_to_u64(const void *ptr)
|
||||
return (__u64) (unsigned long) ptr;
|
||||
}
|
||||
|
||||
static bool is_jit_enabled(void)
|
||||
{
|
||||
const char *jit_sysctl = "/proc/sys/net/core/bpf_jit_enable";
|
||||
bool enabled = false;
|
||||
int sysctl_fd;
|
||||
|
||||
sysctl_fd = open(jit_sysctl, 0, O_RDONLY);
|
||||
if (sysctl_fd != -1) {
|
||||
char tmpc;
|
||||
|
||||
if (read(sysctl_fd, &tmpc, sizeof(tmpc)) == 1)
|
||||
enabled = (tmpc != '0');
|
||||
close(sysctl_fd);
|
||||
}
|
||||
|
||||
return enabled;
|
||||
}
|
||||
|
||||
static void test_bpf_obj_id(void)
|
||||
{
|
||||
const __u64 array_magic_value = 0xfaceb00c;
|
||||
const __u32 array_key = 0;
|
||||
const int nr_iters = 2;
|
||||
const char *file = "./test_obj_id.o";
|
||||
const char *jit_sysctl = "/proc/sys/net/core/bpf_jit_enable";
|
||||
const char *expected_prog_name = "test_obj_id";
|
||||
const char *expected_map_name = "test_map_id";
|
||||
const __u64 nsec_per_sec = 1000000000;
|
||||
@ -414,20 +433,11 @@ static void test_bpf_obj_id(void)
|
||||
char jited_insns[128], xlated_insns[128], zeros[128];
|
||||
__u32 i, next_id, info_len, nr_id_found, duration = 0;
|
||||
struct timespec real_time_ts, boot_time_ts;
|
||||
int sysctl_fd, jit_enabled = 0, err = 0;
|
||||
int err = 0;
|
||||
__u64 array_value;
|
||||
uid_t my_uid = getuid();
|
||||
time_t now, load_time;
|
||||
|
||||
sysctl_fd = open(jit_sysctl, 0, O_RDONLY);
|
||||
if (sysctl_fd != -1) {
|
||||
char tmpc;
|
||||
|
||||
if (read(sysctl_fd, &tmpc, sizeof(tmpc)) == 1)
|
||||
jit_enabled = (tmpc != '0');
|
||||
close(sysctl_fd);
|
||||
}
|
||||
|
||||
err = bpf_prog_get_fd_by_id(0);
|
||||
CHECK(err >= 0 || errno != ENOENT,
|
||||
"get-fd-by-notexist-prog-id", "err %d errno %d\n", err, errno);
|
||||
@ -896,11 +906,47 @@ static int compare_map_keys(int map1_fd, int map2_fd)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int compare_stack_ips(int smap_fd, int amap_fd, int stack_trace_len)
|
||||
{
|
||||
__u32 key, next_key, *cur_key_p, *next_key_p;
|
||||
char *val_buf1, *val_buf2;
|
||||
int i, err = 0;
|
||||
|
||||
val_buf1 = malloc(stack_trace_len);
|
||||
val_buf2 = malloc(stack_trace_len);
|
||||
cur_key_p = NULL;
|
||||
next_key_p = &key;
|
||||
while (bpf_map_get_next_key(smap_fd, cur_key_p, next_key_p) == 0) {
|
||||
err = bpf_map_lookup_elem(smap_fd, next_key_p, val_buf1);
|
||||
if (err)
|
||||
goto out;
|
||||
err = bpf_map_lookup_elem(amap_fd, next_key_p, val_buf2);
|
||||
if (err)
|
||||
goto out;
|
||||
for (i = 0; i < stack_trace_len; i++) {
|
||||
if (val_buf1[i] != val_buf2[i]) {
|
||||
err = -1;
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
key = *next_key_p;
|
||||
cur_key_p = &key;
|
||||
next_key_p = &next_key;
|
||||
}
|
||||
if (errno != ENOENT)
|
||||
err = -1;
|
||||
|
||||
out:
|
||||
free(val_buf1);
|
||||
free(val_buf2);
|
||||
return err;
|
||||
}
|
||||
|
||||
static void test_stacktrace_map()
|
||||
{
|
||||
int control_map_fd, stackid_hmap_fd, stackmap_fd;
|
||||
int control_map_fd, stackid_hmap_fd, stackmap_fd, stack_amap_fd;
|
||||
const char *file = "./test_stacktrace_map.o";
|
||||
int bytes, efd, err, pmu_fd, prog_fd;
|
||||
int bytes, efd, err, pmu_fd, prog_fd, stack_trace_len;
|
||||
struct perf_event_attr attr = {};
|
||||
__u32 key, val, duration = 0;
|
||||
struct bpf_object *obj;
|
||||
@ -956,6 +1002,10 @@ static void test_stacktrace_map()
|
||||
if (stackmap_fd < 0)
|
||||
goto disable_pmu;
|
||||
|
||||
stack_amap_fd = bpf_find_map(__func__, obj, "stack_amap");
|
||||
if (stack_amap_fd < 0)
|
||||
goto disable_pmu;
|
||||
|
||||
/* give some time for bpf program run */
|
||||
sleep(1);
|
||||
|
||||
@ -977,6 +1027,12 @@ static void test_stacktrace_map()
|
||||
"err %d errno %d\n", err, errno))
|
||||
goto disable_pmu_noerr;
|
||||
|
||||
stack_trace_len = PERF_MAX_STACK_DEPTH * sizeof(__u64);
|
||||
err = compare_stack_ips(stackmap_fd, stack_amap_fd, stack_trace_len);
|
||||
if (CHECK(err, "compare_stack_ips stackmap vs. stack_amap",
|
||||
"err %d errno %d\n", err, errno))
|
||||
goto disable_pmu_noerr;
|
||||
|
||||
goto disable_pmu_noerr;
|
||||
disable_pmu:
|
||||
error_cnt++;
|
||||
@ -1070,9 +1126,9 @@ static int extract_build_id(char *build_id, size_t size)
|
||||
|
||||
static void test_stacktrace_build_id(void)
|
||||
{
|
||||
int control_map_fd, stackid_hmap_fd, stackmap_fd;
|
||||
int control_map_fd, stackid_hmap_fd, stackmap_fd, stack_amap_fd;
|
||||
const char *file = "./test_stacktrace_build_id.o";
|
||||
int bytes, efd, err, pmu_fd, prog_fd;
|
||||
int bytes, efd, err, pmu_fd, prog_fd, stack_trace_len;
|
||||
struct perf_event_attr attr = {};
|
||||
__u32 key, previous_key, val, duration = 0;
|
||||
struct bpf_object *obj;
|
||||
@ -1137,6 +1193,11 @@ static void test_stacktrace_build_id(void)
|
||||
err, errno))
|
||||
goto disable_pmu;
|
||||
|
||||
stack_amap_fd = bpf_find_map(__func__, obj, "stack_amap");
|
||||
if (CHECK(stack_amap_fd < 0, "bpf_find_map stack_amap",
|
||||
"err %d errno %d\n", err, errno))
|
||||
goto disable_pmu;
|
||||
|
||||
assert(system("dd if=/dev/urandom of=/dev/zero count=4 2> /dev/null")
|
||||
== 0);
|
||||
assert(system("./urandom_read if=/dev/urandom of=/dev/zero count=4 2> /dev/null") == 0);
|
||||
@ -1188,8 +1249,15 @@ static void test_stacktrace_build_id(void)
|
||||
previous_key = key;
|
||||
} while (bpf_map_get_next_key(stackmap_fd, &previous_key, &key) == 0);
|
||||
|
||||
CHECK(build_id_matches < 1, "build id match",
|
||||
"Didn't find expected build ID from the map");
|
||||
if (CHECK(build_id_matches < 1, "build id match",
|
||||
"Didn't find expected build ID from the map"))
|
||||
goto disable_pmu;
|
||||
|
||||
stack_trace_len = PERF_MAX_STACK_DEPTH
|
||||
* sizeof(struct bpf_stack_build_id);
|
||||
err = compare_stack_ips(stackmap_fd, stack_amap_fd, stack_trace_len);
|
||||
CHECK(err, "compare_stack_ips stackmap vs. stack_amap",
|
||||
"err %d errno %d\n", err, errno);
|
||||
|
||||
disable_pmu:
|
||||
ioctl(pmu_fd, PERF_EVENT_IOC_DISABLE);
|
||||
@ -1204,8 +1272,147 @@ static void test_stacktrace_build_id(void)
|
||||
return;
|
||||
}
|
||||
|
||||
#define MAX_CNT_RAWTP 10ull
|
||||
#define MAX_STACK_RAWTP 100
|
||||
struct get_stack_trace_t {
|
||||
int pid;
|
||||
int kern_stack_size;
|
||||
int user_stack_size;
|
||||
int user_stack_buildid_size;
|
||||
__u64 kern_stack[MAX_STACK_RAWTP];
|
||||
__u64 user_stack[MAX_STACK_RAWTP];
|
||||
struct bpf_stack_build_id user_stack_buildid[MAX_STACK_RAWTP];
|
||||
};
|
||||
|
||||
static int get_stack_print_output(void *data, int size)
|
||||
{
|
||||
bool good_kern_stack = false, good_user_stack = false;
|
||||
const char *nonjit_func = "___bpf_prog_run";
|
||||
struct get_stack_trace_t *e = data;
|
||||
int i, num_stack;
|
||||
static __u64 cnt;
|
||||
struct ksym *ks;
|
||||
|
||||
cnt++;
|
||||
|
||||
if (size < sizeof(struct get_stack_trace_t)) {
|
||||
__u64 *raw_data = data;
|
||||
bool found = false;
|
||||
|
||||
num_stack = size / sizeof(__u64);
|
||||
/* If jit is enabled, we do not have a good way to
|
||||
* verify the sanity of the kernel stack. So we
|
||||
* just assume it is good if the stack is not empty.
|
||||
* This could be improved in the future.
|
||||
*/
|
||||
if (jit_enabled) {
|
||||
found = num_stack > 0;
|
||||
} else {
|
||||
for (i = 0; i < num_stack; i++) {
|
||||
ks = ksym_search(raw_data[i]);
|
||||
if (strcmp(ks->name, nonjit_func) == 0) {
|
||||
found = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (found) {
|
||||
good_kern_stack = true;
|
||||
good_user_stack = true;
|
||||
}
|
||||
} else {
|
||||
num_stack = e->kern_stack_size / sizeof(__u64);
|
||||
if (jit_enabled) {
|
||||
good_kern_stack = num_stack > 0;
|
||||
} else {
|
||||
for (i = 0; i < num_stack; i++) {
|
||||
ks = ksym_search(e->kern_stack[i]);
|
||||
if (strcmp(ks->name, nonjit_func) == 0) {
|
||||
good_kern_stack = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (e->user_stack_size > 0 && e->user_stack_buildid_size > 0)
|
||||
good_user_stack = true;
|
||||
}
|
||||
if (!good_kern_stack || !good_user_stack)
|
||||
return PERF_EVENT_ERROR;
|
||||
|
||||
if (cnt == MAX_CNT_RAWTP)
|
||||
return PERF_EVENT_DONE;
|
||||
|
||||
return PERF_EVENT_CONT;
|
||||
}
|
||||
|
||||
static void test_get_stack_raw_tp(void)
|
||||
{
|
||||
const char *file = "./test_get_stack_rawtp.o";
|
||||
int i, efd, err, prog_fd, pmu_fd, perfmap_fd;
|
||||
struct perf_event_attr attr = {};
|
||||
struct timespec tv = {0, 10};
|
||||
__u32 key = 0, duration = 0;
|
||||
struct bpf_object *obj;
|
||||
|
||||
err = bpf_prog_load(file, BPF_PROG_TYPE_RAW_TRACEPOINT, &obj, &prog_fd);
|
||||
if (CHECK(err, "prog_load raw tp", "err %d errno %d\n", err, errno))
|
||||
return;
|
||||
|
||||
efd = bpf_raw_tracepoint_open("sys_enter", prog_fd);
|
||||
if (CHECK(efd < 0, "raw_tp_open", "err %d errno %d\n", efd, errno))
|
||||
goto close_prog;
|
||||
|
||||
perfmap_fd = bpf_find_map(__func__, obj, "perfmap");
|
||||
if (CHECK(perfmap_fd < 0, "bpf_find_map", "err %d errno %d\n",
|
||||
perfmap_fd, errno))
|
||||
goto close_prog;
|
||||
|
||||
err = load_kallsyms();
|
||||
if (CHECK(err < 0, "load_kallsyms", "err %d errno %d\n", err, errno))
|
||||
goto close_prog;
|
||||
|
||||
attr.sample_type = PERF_SAMPLE_RAW;
|
||||
attr.type = PERF_TYPE_SOFTWARE;
|
||||
attr.config = PERF_COUNT_SW_BPF_OUTPUT;
|
||||
pmu_fd = syscall(__NR_perf_event_open, &attr, getpid()/*pid*/, -1/*cpu*/,
|
||||
-1/*group_fd*/, 0);
|
||||
if (CHECK(pmu_fd < 0, "perf_event_open", "err %d errno %d\n", pmu_fd,
|
||||
errno))
|
||||
goto close_prog;
|
||||
|
||||
err = bpf_map_update_elem(perfmap_fd, &key, &pmu_fd, BPF_ANY);
|
||||
if (CHECK(err < 0, "bpf_map_update_elem", "err %d errno %d\n", err,
|
||||
errno))
|
||||
goto close_prog;
|
||||
|
||||
err = ioctl(pmu_fd, PERF_EVENT_IOC_ENABLE, 0);
|
||||
if (CHECK(err < 0, "ioctl PERF_EVENT_IOC_ENABLE", "err %d errno %d\n",
|
||||
err, errno))
|
||||
goto close_prog;
|
||||
|
||||
err = perf_event_mmap(pmu_fd);
|
||||
if (CHECK(err < 0, "perf_event_mmap", "err %d errno %d\n", err, errno))
|
||||
goto close_prog;
|
||||
|
||||
/* trigger some syscall action */
|
||||
for (i = 0; i < MAX_CNT_RAWTP; i++)
|
||||
nanosleep(&tv, NULL);
|
||||
|
||||
err = perf_event_poller(pmu_fd, get_stack_print_output);
|
||||
if (CHECK(err < 0, "perf_event_poller", "err %d errno %d\n", err, errno))
|
||||
goto close_prog;
|
||||
|
||||
goto close_prog_noerr;
|
||||
close_prog:
|
||||
error_cnt++;
|
||||
close_prog_noerr:
|
||||
bpf_object__close(obj);
|
||||
}
|
||||
|
||||
int main(void)
|
||||
{
|
||||
jit_enabled = is_jit_enabled();
|
||||
|
||||
test_pkt_access();
|
||||
test_xdp();
|
||||
test_xdp_adjust_tail();
|
||||
@ -1219,6 +1426,7 @@ int main(void)
|
||||
test_stacktrace_map();
|
||||
test_stacktrace_build_id();
|
||||
test_stacktrace_map_raw_tp();
|
||||
test_get_stack_raw_tp();
|
||||
|
||||
printf("Summary: %d PASSED, %d FAILED\n", pass_cnt, error_cnt);
|
||||
return error_cnt ? EXIT_FAILURE : EXIT_SUCCESS;
|
||||
|
@ -19,7 +19,7 @@ struct bpf_map_def SEC("maps") stackid_hmap = {
|
||||
.type = BPF_MAP_TYPE_HASH,
|
||||
.key_size = sizeof(__u32),
|
||||
.value_size = sizeof(__u32),
|
||||
.max_entries = 10000,
|
||||
.max_entries = 16384,
|
||||
};
|
||||
|
||||
struct bpf_map_def SEC("maps") stackmap = {
|
||||
@ -31,6 +31,14 @@ struct bpf_map_def SEC("maps") stackmap = {
|
||||
.map_flags = BPF_F_STACK_BUILD_ID,
|
||||
};
|
||||
|
||||
struct bpf_map_def SEC("maps") stack_amap = {
|
||||
.type = BPF_MAP_TYPE_ARRAY,
|
||||
.key_size = sizeof(__u32),
|
||||
.value_size = sizeof(struct bpf_stack_build_id)
|
||||
* PERF_MAX_STACK_DEPTH,
|
||||
.max_entries = 128,
|
||||
};
|
||||
|
||||
/* taken from /sys/kernel/debug/tracing/events/random/urandom_read/format */
|
||||
struct random_urandom_args {
|
||||
unsigned long long pad;
|
||||
@ -42,7 +50,10 @@ struct random_urandom_args {
|
||||
SEC("tracepoint/random/urandom_read")
|
||||
int oncpu(struct random_urandom_args *args)
|
||||
{
|
||||
__u32 max_len = sizeof(struct bpf_stack_build_id)
|
||||
* PERF_MAX_STACK_DEPTH;
|
||||
__u32 key = 0, val = 0, *value_p;
|
||||
void *stack_p;
|
||||
|
||||
value_p = bpf_map_lookup_elem(&control_map, &key);
|
||||
if (value_p && *value_p)
|
||||
@ -50,8 +61,13 @@ int oncpu(struct random_urandom_args *args)
|
||||
|
||||
/* The size of stackmap and stackid_hmap should be the same */
|
||||
key = bpf_get_stackid(args, &stackmap, BPF_F_USER_STACK);
|
||||
if ((int)key >= 0)
|
||||
if ((int)key >= 0) {
|
||||
bpf_map_update_elem(&stackid_hmap, &key, &val, 0);
|
||||
stack_p = bpf_map_lookup_elem(&stack_amap, &key);
|
||||
if (stack_p)
|
||||
bpf_get_stack(args, stack_p, max_len,
|
||||
BPF_F_USER_STACK | BPF_F_USER_BUILD_ID);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -19,14 +19,21 @@ struct bpf_map_def SEC("maps") stackid_hmap = {
|
||||
.type = BPF_MAP_TYPE_HASH,
|
||||
.key_size = sizeof(__u32),
|
||||
.value_size = sizeof(__u32),
|
||||
.max_entries = 10000,
|
||||
.max_entries = 16384,
|
||||
};
|
||||
|
||||
struct bpf_map_def SEC("maps") stackmap = {
|
||||
.type = BPF_MAP_TYPE_STACK_TRACE,
|
||||
.key_size = sizeof(__u32),
|
||||
.value_size = sizeof(__u64) * PERF_MAX_STACK_DEPTH,
|
||||
.max_entries = 10000,
|
||||
.max_entries = 16384,
|
||||
};
|
||||
|
||||
struct bpf_map_def SEC("maps") stack_amap = {
|
||||
.type = BPF_MAP_TYPE_ARRAY,
|
||||
.key_size = sizeof(__u32),
|
||||
.value_size = sizeof(__u64) * PERF_MAX_STACK_DEPTH,
|
||||
.max_entries = 16384,
|
||||
};
|
||||
|
||||
/* taken from /sys/kernel/debug/tracing/events/sched/sched_switch/format */
|
||||
@ -44,7 +51,9 @@ struct sched_switch_args {
|
||||
SEC("tracepoint/sched/sched_switch")
|
||||
int oncpu(struct sched_switch_args *ctx)
|
||||
{
|
||||
__u32 max_len = PERF_MAX_STACK_DEPTH * sizeof(__u64);
|
||||
__u32 key = 0, val = 0, *value_p;
|
||||
void *stack_p;
|
||||
|
||||
value_p = bpf_map_lookup_elem(&control_map, &key);
|
||||
if (value_p && *value_p)
|
||||
@ -52,8 +61,12 @@ int oncpu(struct sched_switch_args *ctx)
|
||||
|
||||
/* The size of stackmap and stackid_hmap should be the same */
|
||||
key = bpf_get_stackid(ctx, &stackmap, 0);
|
||||
if ((int)key >= 0)
|
||||
if ((int)key >= 0) {
|
||||
bpf_map_update_elem(&stackid_hmap, &key, &val, 0);
|
||||
stack_p = bpf_map_lookup_elem(&stack_amap, &key);
|
||||
if (stack_p)
|
||||
bpf_get_stack(ctx, stack_p, max_len, 0);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -11680,6 +11680,51 @@ static struct bpf_test tests[] = {
|
||||
.errstr = "BPF_XADD stores into R2 packet",
|
||||
.prog_type = BPF_PROG_TYPE_XDP,
|
||||
},
|
||||
{
|
||||
"bpf_get_stack return R0 within range",
|
||||
.insns = {
|
||||
BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
|
||||
BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
|
||||
BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
|
||||
BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
|
||||
BPF_LD_MAP_FD(BPF_REG_1, 0),
|
||||
BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
|
||||
BPF_FUNC_map_lookup_elem),
|
||||
BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 28),
|
||||
BPF_MOV64_REG(BPF_REG_7, BPF_REG_0),
|
||||
BPF_MOV64_IMM(BPF_REG_9, sizeof(struct test_val)),
|
||||
BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
|
||||
BPF_MOV64_REG(BPF_REG_2, BPF_REG_7),
|
||||
BPF_MOV64_IMM(BPF_REG_3, sizeof(struct test_val)),
|
||||
BPF_MOV64_IMM(BPF_REG_4, 256),
|
||||
BPF_EMIT_CALL(BPF_FUNC_get_stack),
|
||||
BPF_MOV64_IMM(BPF_REG_1, 0),
|
||||
BPF_MOV64_REG(BPF_REG_8, BPF_REG_0),
|
||||
BPF_ALU64_IMM(BPF_LSH, BPF_REG_8, 32),
|
||||
BPF_ALU64_IMM(BPF_ARSH, BPF_REG_8, 32),
|
||||
BPF_JMP_REG(BPF_JSLT, BPF_REG_1, BPF_REG_8, 16),
|
||||
BPF_ALU64_REG(BPF_SUB, BPF_REG_9, BPF_REG_8),
|
||||
BPF_MOV64_REG(BPF_REG_2, BPF_REG_7),
|
||||
BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_8),
|
||||
BPF_MOV64_REG(BPF_REG_1, BPF_REG_9),
|
||||
BPF_ALU64_IMM(BPF_LSH, BPF_REG_1, 32),
|
||||
BPF_ALU64_IMM(BPF_ARSH, BPF_REG_1, 32),
|
||||
BPF_MOV64_REG(BPF_REG_3, BPF_REG_2),
|
||||
BPF_ALU64_REG(BPF_ADD, BPF_REG_3, BPF_REG_1),
|
||||
BPF_MOV64_REG(BPF_REG_1, BPF_REG_7),
|
||||
BPF_MOV64_IMM(BPF_REG_5, sizeof(struct test_val)),
|
||||
BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_5),
|
||||
BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_1, 4),
|
||||
BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
|
||||
BPF_MOV64_REG(BPF_REG_3, BPF_REG_9),
|
||||
BPF_MOV64_IMM(BPF_REG_4, 0),
|
||||
BPF_EMIT_CALL(BPF_FUNC_get_stack),
|
||||
BPF_EXIT_INSN(),
|
||||
},
|
||||
.fixup_map2 = { 4 },
|
||||
.result = ACCEPT,
|
||||
.prog_type = BPF_PROG_TYPE_TRACEPOINT,
|
||||
},
|
||||
};
|
||||
|
||||
static int probe_filter_length(const struct bpf_insn *fp)
|
||||
|
180
tools/testing/selftests/bpf/trace_helpers.c
Normal file
180
tools/testing/selftests/bpf/trace_helpers.c
Normal file
@ -0,0 +1,180 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <assert.h>
|
||||
#include <errno.h>
|
||||
#include <poll.h>
|
||||
#include <unistd.h>
|
||||
#include <linux/perf_event.h>
|
||||
#include <sys/mman.h>
|
||||
#include "trace_helpers.h"
|
||||
|
||||
#define MAX_SYMS 300000
|
||||
static struct ksym syms[MAX_SYMS];
|
||||
static int sym_cnt;
|
||||
|
||||
static int ksym_cmp(const void *p1, const void *p2)
|
||||
{
|
||||
return ((struct ksym *)p1)->addr - ((struct ksym *)p2)->addr;
|
||||
}
|
||||
|
||||
int load_kallsyms(void)
|
||||
{
|
||||
FILE *f = fopen("/proc/kallsyms", "r");
|
||||
char func[256], buf[256];
|
||||
char symbol;
|
||||
void *addr;
|
||||
int i = 0;
|
||||
|
||||
if (!f)
|
||||
return -ENOENT;
|
||||
|
||||
while (!feof(f)) {
|
||||
if (!fgets(buf, sizeof(buf), f))
|
||||
break;
|
||||
if (sscanf(buf, "%p %c %s", &addr, &symbol, func) != 3)
|
||||
break;
|
||||
if (!addr)
|
||||
continue;
|
||||
syms[i].addr = (long) addr;
|
||||
syms[i].name = strdup(func);
|
||||
i++;
|
||||
}
|
||||
sym_cnt = i;
|
||||
qsort(syms, sym_cnt, sizeof(struct ksym), ksym_cmp);
|
||||
return 0;
|
||||
}
|
||||
|
||||
struct ksym *ksym_search(long key)
|
||||
{
|
||||
int start = 0, end = sym_cnt;
|
||||
int result;
|
||||
|
||||
while (start < end) {
|
||||
size_t mid = start + (end - start) / 2;
|
||||
|
||||
result = key - syms[mid].addr;
|
||||
if (result < 0)
|
||||
end = mid;
|
||||
else if (result > 0)
|
||||
start = mid + 1;
|
||||
else
|
||||
return &syms[mid];
|
||||
}
|
||||
|
||||
if (start >= 1 && syms[start - 1].addr < key &&
|
||||
key < syms[start].addr)
|
||||
/* valid ksym */
|
||||
return &syms[start - 1];
|
||||
|
||||
/* out of range. return _stext */
|
||||
return &syms[0];
|
||||
}
|
||||
|
||||
static int page_size;
|
||||
static int page_cnt = 8;
|
||||
static volatile struct perf_event_mmap_page *header;
|
||||
|
||||
int perf_event_mmap(int fd)
|
||||
{
|
||||
void *base;
|
||||
int mmap_size;
|
||||
|
||||
page_size = getpagesize();
|
||||
mmap_size = page_size * (page_cnt + 1);
|
||||
|
||||
base = mmap(NULL, mmap_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
|
||||
if (base == MAP_FAILED) {
|
||||
printf("mmap err\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
header = base;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int perf_event_poll(int fd)
|
||||
{
|
||||
struct pollfd pfd = { .fd = fd, .events = POLLIN };
|
||||
|
||||
return poll(&pfd, 1, 1000);
|
||||
}
|
||||
|
||||
struct perf_event_sample {
|
||||
struct perf_event_header header;
|
||||
__u32 size;
|
||||
char data[];
|
||||
};
|
||||
|
||||
static int perf_event_read(perf_event_print_fn fn)
|
||||
{
|
||||
__u64 data_tail = header->data_tail;
|
||||
__u64 data_head = header->data_head;
|
||||
__u64 buffer_size = page_cnt * page_size;
|
||||
void *base, *begin, *end;
|
||||
char buf[256];
|
||||
int ret;
|
||||
|
||||
asm volatile("" ::: "memory"); /* in real code it should be smp_rmb() */
|
||||
if (data_head == data_tail)
|
||||
return PERF_EVENT_CONT;
|
||||
|
||||
base = ((char *)header) + page_size;
|
||||
|
||||
begin = base + data_tail % buffer_size;
|
||||
end = base + data_head % buffer_size;
|
||||
|
||||
while (begin != end) {
|
||||
struct perf_event_sample *e;
|
||||
|
||||
e = begin;
|
||||
if (begin + e->header.size > base + buffer_size) {
|
||||
long len = base + buffer_size - begin;
|
||||
|
||||
assert(len < e->header.size);
|
||||
memcpy(buf, begin, len);
|
||||
memcpy(buf + len, base, e->header.size - len);
|
||||
e = (void *) buf;
|
||||
begin = base + e->header.size - len;
|
||||
} else if (begin + e->header.size == base + buffer_size) {
|
||||
begin = base;
|
||||
} else {
|
||||
begin += e->header.size;
|
||||
}
|
||||
|
||||
if (e->header.type == PERF_RECORD_SAMPLE) {
|
||||
ret = fn(e->data, e->size);
|
||||
if (ret != PERF_EVENT_CONT)
|
||||
return ret;
|
||||
} else if (e->header.type == PERF_RECORD_LOST) {
|
||||
struct {
|
||||
struct perf_event_header header;
|
||||
__u64 id;
|
||||
__u64 lost;
|
||||
} *lost = (void *) e;
|
||||
printf("lost %lld events\n", lost->lost);
|
||||
} else {
|
||||
printf("unknown event type=%d size=%d\n",
|
||||
e->header.type, e->header.size);
|
||||
}
|
||||
}
|
||||
|
||||
__sync_synchronize(); /* smp_mb() */
|
||||
header->data_tail = data_head;
|
||||
return PERF_EVENT_CONT;
|
||||
}
|
||||
|
||||
int perf_event_poller(int fd, perf_event_print_fn output_fn)
|
||||
{
|
||||
int ret;
|
||||
|
||||
for (;;) {
|
||||
perf_event_poll(fd);
|
||||
ret = perf_event_read(output_fn);
|
||||
if (ret != PERF_EVENT_CONT)
|
||||
return ret;
|
||||
}
|
||||
|
||||
return PERF_EVENT_DONE;
|
||||
}
|
23
tools/testing/selftests/bpf/trace_helpers.h
Normal file
23
tools/testing/selftests/bpf/trace_helpers.h
Normal file
@ -0,0 +1,23 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
#ifndef __TRACE_HELPER_H
|
||||
#define __TRACE_HELPER_H
|
||||
|
||||
struct ksym {
|
||||
long addr;
|
||||
char *name;
|
||||
};
|
||||
|
||||
int load_kallsyms(void);
|
||||
struct ksym *ksym_search(long key);
|
||||
|
||||
typedef int (*perf_event_print_fn)(void *data, int size);
|
||||
|
||||
/* return code for perf_event_print_fn */
|
||||
#define PERF_EVENT_DONE 0
|
||||
#define PERF_EVENT_ERROR -1
|
||||
#define PERF_EVENT_CONT -2
|
||||
|
||||
int perf_event_mmap(int fd);
|
||||
/* return PERF_EVENT_DONE or PERF_EVENT_ERROR */
|
||||
int perf_event_poller(int fd, perf_event_print_fn output_fn);
|
||||
#endif
|
Loading…
Reference in New Issue
Block a user