Merge git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next

Daniel Borkmann says:

====================
pull-request: bpf-next 2019-12-27

The following pull-request contains BPF updates for your *net-next* tree.

We've added 127 non-merge commits during the last 17 day(s) which contain
a total of 110 files changed, 6901 insertions(+), 2721 deletions(-).

There are three merge conflicts. Conflicts and resolution looks as follows:

1) Merge conflict in net/bpf/test_run.c:

There was a tree-wide cleanup c593642c8b ("treewide: Use sizeof_field() macro")
which gets in the way with b590cb5f80 ("bpf: Switch to offsetofend in
BPF_PROG_TEST_RUN"):

  <<<<<<< HEAD
          if (!range_is_zero(__skb, offsetof(struct __sk_buff, priority) +
                             sizeof_field(struct __sk_buff, priority),
  =======
          if (!range_is_zero(__skb, offsetofend(struct __sk_buff, priority),
  >>>>>>> 7c8dce4b16

There are a few occasions that look similar to this. Always take the chunk with
offsetofend(). Note that there is one where the fields differ in here:

  <<<<<<< HEAD
          if (!range_is_zero(__skb, offsetof(struct __sk_buff, tstamp) +
                             sizeof_field(struct __sk_buff, tstamp),
  =======
          if (!range_is_zero(__skb, offsetofend(struct __sk_buff, gso_segs),
  >>>>>>> 7c8dce4b16

Just take the one with offsetofend() /and/ gso_segs. Latter is correct due to
850a88cc40 ("bpf: Expose __sk_buff wire_len/gso_segs to BPF_PROG_TEST_RUN").

2) Merge conflict in arch/riscv/net/bpf_jit_comp.c:

(I'm keeping Bjorn in Cc here for a double-check in case I got it wrong.)

  <<<<<<< HEAD
          if (is_13b_check(off, insn))
                  return -1;
          emit(rv_blt(tcc, RV_REG_ZERO, off >> 1), ctx);
  =======
          emit_branch(BPF_JSLT, RV_REG_T1, RV_REG_ZERO, off, ctx);
  >>>>>>> 7c8dce4b16

Result should look like:

          emit_branch(BPF_JSLT, tcc, RV_REG_ZERO, off, ctx);

3) Merge conflict in arch/riscv/include/asm/pgtable.h:

  <<<<<<< HEAD
  =======
  #define VMALLOC_SIZE     (KERN_VIRT_SIZE >> 1)
  #define VMALLOC_END      (PAGE_OFFSET - 1)
  #define VMALLOC_START    (PAGE_OFFSET - VMALLOC_SIZE)

  #define BPF_JIT_REGION_SIZE     (SZ_128M)
  #define BPF_JIT_REGION_START    (PAGE_OFFSET - BPF_JIT_REGION_SIZE)
  #define BPF_JIT_REGION_END      (VMALLOC_END)

  /*
   * Roughly size the vmemmap space to be large enough to fit enough
   * struct pages to map half the virtual address space. Then
   * position vmemmap directly below the VMALLOC region.
   */
  #define VMEMMAP_SHIFT \
          (CONFIG_VA_BITS - PAGE_SHIFT - 1 + STRUCT_PAGE_MAX_SHIFT)
  #define VMEMMAP_SIZE    BIT(VMEMMAP_SHIFT)
  #define VMEMMAP_END     (VMALLOC_START - 1)
  #define VMEMMAP_START   (VMALLOC_START - VMEMMAP_SIZE)

  #define vmemmap         ((struct page *)VMEMMAP_START)

  >>>>>>> 7c8dce4b16

Only take the BPF_* defines from there and move them higher up in the
same file. Remove the rest from the chunk. The VMALLOC_* etc defines
got moved via 01f52e16b8 ("riscv: define vmemmap before pfn_to_page
calls"). Result:

  [...]
  #define __S101  PAGE_READ_EXEC
  #define __S110  PAGE_SHARED_EXEC
  #define __S111  PAGE_SHARED_EXEC

  #define VMALLOC_SIZE     (KERN_VIRT_SIZE >> 1)
  #define VMALLOC_END      (PAGE_OFFSET - 1)
  #define VMALLOC_START    (PAGE_OFFSET - VMALLOC_SIZE)

  #define BPF_JIT_REGION_SIZE     (SZ_128M)
  #define BPF_JIT_REGION_START    (PAGE_OFFSET - BPF_JIT_REGION_SIZE)
  #define BPF_JIT_REGION_END      (VMALLOC_END)

  /*
   * Roughly size the vmemmap space to be large enough to fit enough
   * struct pages to map half the virtual address space. Then
   * position vmemmap directly below the VMALLOC region.
   */
  #define VMEMMAP_SHIFT \
          (CONFIG_VA_BITS - PAGE_SHIFT - 1 + STRUCT_PAGE_MAX_SHIFT)
  #define VMEMMAP_SIZE    BIT(VMEMMAP_SHIFT)
  #define VMEMMAP_END     (VMALLOC_START - 1)
  #define VMEMMAP_START   (VMALLOC_START - VMEMMAP_SIZE)

  [...]

Let me know if there are any other issues.

Anyway, the main changes are:

1) Extend bpftool to produce a struct (aka "skeleton") tailored and specific
   to a provided BPF object file. This provides an alternative, simplified API
   compared to standard libbpf interaction. Also, add libbpf extern variable
   resolution for .kconfig section to import Kconfig data, from Andrii Nakryiko.

2) Add BPF dispatcher for XDP which is a mechanism to avoid indirect calls by
   generating a branch funnel as discussed back in bpfconf'19 at LSF/MM. Also,
   add various BPF riscv JIT improvements, from Björn Töpel.

3) Extend bpftool to allow matching BPF programs and maps by name,
   from Paul Chaignon.

4) Support for replacing cgroup BPF programs attached with BPF_F_ALLOW_MULTI
   flag for allowing updates without service interruption, from Andrey Ignatov.

5) Cleanup and simplification of ring access functions for AF_XDP with a
   bonus of 0-5% performance improvement, from Magnus Karlsson.

6) Enable BPF JITs for x86-64 and arm64 by default. Also, final version of
   audit support for BPF, from Daniel Borkmann and latter with Jiri Olsa.

7) Move and extend test_select_reuseport into BPF program tests under
   BPF selftests, from Jakub Sitnicki.

8) Various BPF sample improvements for xdpsock for customizing parameters
   to set up and benchmark AF_XDP, from Jay Jayatheerthan.

9) Improve libbpf to provide a ulimit hint on permission denied errors.
   Also change XDP sample programs to attach in driver mode by default,
   from Toke Høiland-Jørgensen.

10) Extend BPF test infrastructure to allow changing skb mark from tc BPF
    programs, from Nikita V. Shirokov.

11) Optimize prologue code sequence in BPF arm32 JIT, from Russell King.

12) Fix xdp_redirect_cpu BPF sample to manually attach to tracepoints after
    libbpf conversion, from Jesper Dangaard Brouer.

13) Minor misc improvements from various others.
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
David S. Miller 2019-12-27 14:20:10 -08:00
commit 2bbc078f81
110 changed files with 6923 additions and 2743 deletions

View File

@ -1260,12 +1260,9 @@ static inline void emit_push_r64(const s8 src[], struct jit_ctx *ctx)
static void build_prologue(struct jit_ctx *ctx)
{
const s8 r0 = bpf2a32[BPF_REG_0][1];
const s8 r2 = bpf2a32[BPF_REG_1][1];
const s8 r3 = bpf2a32[BPF_REG_1][0];
const s8 r4 = bpf2a32[BPF_REG_6][1];
const s8 fplo = bpf2a32[BPF_REG_FP][1];
const s8 fphi = bpf2a32[BPF_REG_FP][0];
const s8 arm_r0 = bpf2a32[BPF_REG_0][1];
const s8 *bpf_r1 = bpf2a32[BPF_REG_1];
const s8 *bpf_fp = bpf2a32[BPF_REG_FP];
const s8 *tcc = bpf2a32[TCALL_CNT];
/* Save callee saved registers. */
@ -1278,8 +1275,10 @@ static void build_prologue(struct jit_ctx *ctx)
emit(ARM_PUSH(CALLEE_PUSH_MASK), ctx);
emit(ARM_MOV_R(ARM_FP, ARM_SP), ctx);
#endif
/* Save frame pointer for later */
emit(ARM_SUB_I(ARM_IP, ARM_SP, SCRATCH_SIZE), ctx);
/* mov r3, #0 */
/* sub r2, sp, #SCRATCH_SIZE */
emit(ARM_MOV_I(bpf_r1[0], 0), ctx);
emit(ARM_SUB_I(bpf_r1[1], ARM_SP, SCRATCH_SIZE), ctx);
ctx->stack_size = imm8m(STACK_SIZE);
@ -1287,18 +1286,15 @@ static void build_prologue(struct jit_ctx *ctx)
emit(ARM_SUB_I(ARM_SP, ARM_SP, ctx->stack_size), ctx);
/* Set up BPF prog stack base register */
emit_a32_mov_r(fplo, ARM_IP, ctx);
emit_a32_mov_i(fphi, 0, ctx);
emit_a32_mov_r64(true, bpf_fp, bpf_r1, ctx);
/* mov r4, 0 */
emit(ARM_MOV_I(r4, 0), ctx);
/* Initialize Tail Count */
emit(ARM_MOV_I(bpf_r1[1], 0), ctx);
emit_a32_mov_r64(true, tcc, bpf_r1, ctx);
/* Move BPF_CTX to BPF_R1 */
emit(ARM_MOV_R(r3, r4), ctx);
emit(ARM_MOV_R(r2, r0), ctx);
/* Initialize Tail Count */
emit(ARM_STR_I(r4, ARM_FP, EBPF_SCRATCH_TO_ARM_FP(tcc[0])), ctx);
emit(ARM_STR_I(r4, ARM_FP, EBPF_SCRATCH_TO_ARM_FP(tcc[1])), ctx);
emit(ARM_MOV_R(bpf_r1[1], arm_r0), ctx);
/* end of prologue */
}

View File

@ -69,6 +69,7 @@ config ARM64
select ARCH_SUPPORTS_INT128 if CC_HAS_INT128 && (GCC_VERSION >= 50000 || CC_IS_CLANG)
select ARCH_SUPPORTS_NUMA_BALANCING
select ARCH_WANT_COMPAT_IPC_PARSE_VERSION if COMPAT
select ARCH_WANT_DEFAULT_BPF_JIT
select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT
select ARCH_WANT_FRAME_POINTERS
select ARCH_WANT_HUGE_PMD_SHARE if ARM64_4K_PAGES || (ARM64_16K_PAGES && !ARM64_VA_BITS_36)

View File

@ -82,4 +82,8 @@ struct riscv_pmu {
int irq;
};
#ifdef CONFIG_PERF_EVENTS
#define perf_arch_bpf_user_pt_regs(regs) (struct user_regs_struct *)regs
#endif
#endif /* _ASM_RISCV_PERF_EVENT_H */

View File

@ -94,6 +94,10 @@ extern pgd_t swapper_pg_dir[];
#define VMALLOC_END (PAGE_OFFSET - 1)
#define VMALLOC_START (PAGE_OFFSET - VMALLOC_SIZE)
#define BPF_JIT_REGION_SIZE (SZ_128M)
#define BPF_JIT_REGION_START (PAGE_OFFSET - BPF_JIT_REGION_SIZE)
#define BPF_JIT_REGION_END (VMALLOC_END)
/*
* Roughly size the vmemmap space to be large enough to fit enough
* struct pages to map half the virtual address space. Then

View File

@ -0,0 +1,9 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
#ifndef _UAPI__ASM_BPF_PERF_EVENT_H__
#define _UAPI__ASM_BPF_PERF_EVENT_H__
#include <asm/ptrace.h>
typedef struct user_regs_struct bpf_user_pt_regs_t;
#endif /* _UAPI__ASM_BPF_PERF_EVENT_H__ */

View File

@ -120,6 +120,11 @@ static bool seen_reg(int reg, struct rv_jit_context *ctx)
return false;
}
static void mark_fp(struct rv_jit_context *ctx)
{
__set_bit(RV_CTX_F_SEEN_S5, &ctx->flags);
}
static void mark_call(struct rv_jit_context *ctx)
{
__set_bit(RV_CTX_F_SEEN_CALL, &ctx->flags);
@ -456,6 +461,11 @@ static u32 rv_amoadd_d(u8 rd, u8 rs2, u8 rs1, u8 aq, u8 rl)
return rv_amo_insn(0, aq, rl, rs2, rs1, 3, rd, 0x2f);
}
static u32 rv_auipc(u8 rd, u32 imm31_12)
{
return rv_u_insn(imm31_12, rd, 0x17);
}
static bool is_12b_int(s64 val)
{
return -(1 << 11) <= val && val < (1 << 11);
@ -479,27 +489,7 @@ static bool is_32b_int(s64 val)
static int is_12b_check(int off, int insn)
{
if (!is_12b_int(off)) {
pr_err("bpf-jit: insn=%d offset=%d not supported yet!\n",
insn, (int)off);
return -1;
}
return 0;
}
static int is_13b_check(int off, int insn)
{
if (!is_13b_int(off)) {
pr_err("bpf-jit: insn=%d offset=%d not supported yet!\n",
insn, (int)off);
return -1;
}
return 0;
}
static int is_21b_check(int off, int insn)
{
if (!is_21b_int(off)) {
pr_err("bpf-jit: insn=%d offset=%d not supported yet!\n",
pr_err("bpf-jit: insn=%d 12b < offset=%d not supported yet!\n",
insn, (int)off);
return -1;
}
@ -545,10 +535,13 @@ static void emit_imm(u8 rd, s64 val, struct rv_jit_context *ctx)
emit(rv_addi(rd, rd, lower), ctx);
}
static int rv_offset(int bpf_to, int bpf_from, struct rv_jit_context *ctx)
static int rv_offset(int insn, int off, struct rv_jit_context *ctx)
{
int from = ctx->offset[bpf_from] - 1, to = ctx->offset[bpf_to];
int from, to;
off++; /* BPF branch is from PC+1, RV is from PC */
from = (insn > 0) ? ctx->offset[insn - 1] : 0;
to = (insn + off > 0) ? ctx->offset[insn + off - 1] : 0;
return (to - from) << 2;
}
@ -559,7 +552,7 @@ static int epilogue_offset(struct rv_jit_context *ctx)
return (to - from) << 2;
}
static void __build_epilogue(u8 reg, struct rv_jit_context *ctx)
static void __build_epilogue(bool is_tail_call, struct rv_jit_context *ctx)
{
int stack_adjust = ctx->stack_size, store_offset = stack_adjust - 8;
@ -596,8 +589,114 @@ static void __build_epilogue(u8 reg, struct rv_jit_context *ctx)
emit(rv_addi(RV_REG_SP, RV_REG_SP, stack_adjust), ctx);
/* Set return value. */
emit(rv_addi(RV_REG_A0, RV_REG_A5, 0), ctx);
emit(rv_jalr(RV_REG_ZERO, reg, 0), ctx);
if (!is_tail_call)
emit(rv_addi(RV_REG_A0, RV_REG_A5, 0), ctx);
emit(rv_jalr(RV_REG_ZERO, is_tail_call ? RV_REG_T3 : RV_REG_RA,
is_tail_call ? 4 : 0), /* skip TCC init */
ctx);
}
/* return -1 or inverted cond */
static int invert_bpf_cond(u8 cond)
{
switch (cond) {
case BPF_JEQ:
return BPF_JNE;
case BPF_JGT:
return BPF_JLE;
case BPF_JLT:
return BPF_JGE;
case BPF_JGE:
return BPF_JLT;
case BPF_JLE:
return BPF_JGT;
case BPF_JNE:
return BPF_JEQ;
case BPF_JSGT:
return BPF_JSLE;
case BPF_JSLT:
return BPF_JSGE;
case BPF_JSGE:
return BPF_JSLT;
case BPF_JSLE:
return BPF_JSGT;
}
return -1;
}
static void emit_bcc(u8 cond, u8 rd, u8 rs, int rvoff,
struct rv_jit_context *ctx)
{
switch (cond) {
case BPF_JEQ:
emit(rv_beq(rd, rs, rvoff >> 1), ctx);
return;
case BPF_JGT:
emit(rv_bltu(rs, rd, rvoff >> 1), ctx);
return;
case BPF_JLT:
emit(rv_bltu(rd, rs, rvoff >> 1), ctx);
return;
case BPF_JGE:
emit(rv_bgeu(rd, rs, rvoff >> 1), ctx);
return;
case BPF_JLE:
emit(rv_bgeu(rs, rd, rvoff >> 1), ctx);
return;
case BPF_JNE:
emit(rv_bne(rd, rs, rvoff >> 1), ctx);
return;
case BPF_JSGT:
emit(rv_blt(rs, rd, rvoff >> 1), ctx);
return;
case BPF_JSLT:
emit(rv_blt(rd, rs, rvoff >> 1), ctx);
return;
case BPF_JSGE:
emit(rv_bge(rd, rs, rvoff >> 1), ctx);
return;
case BPF_JSLE:
emit(rv_bge(rs, rd, rvoff >> 1), ctx);
}
}
static void emit_branch(u8 cond, u8 rd, u8 rs, int rvoff,
struct rv_jit_context *ctx)
{
s64 upper, lower;
if (is_13b_int(rvoff)) {
emit_bcc(cond, rd, rs, rvoff, ctx);
return;
}
/* Adjust for jal */
rvoff -= 4;
/* Transform, e.g.:
* bne rd,rs,foo
* to
* beq rd,rs,<.L1>
* (auipc foo)
* jal(r) foo
* .L1
*/
cond = invert_bpf_cond(cond);
if (is_21b_int(rvoff)) {
emit_bcc(cond, rd, rs, 8, ctx);
emit(rv_jal(RV_REG_ZERO, rvoff >> 1), ctx);
return;
}
/* 32b No need for an additional rvoff adjustment, since we
* get that from the auipc at PC', where PC = PC' + 4.
*/
upper = (rvoff + (1 << 11)) >> 12;
lower = rvoff & 0xfff;
emit_bcc(cond, rd, rs, 12, ctx);
emit(rv_auipc(RV_REG_T1, upper), ctx);
emit(rv_jalr(RV_REG_ZERO, RV_REG_T1, lower), ctx);
}
static void emit_zext_32(u8 reg, struct rv_jit_context *ctx)
@ -627,18 +726,14 @@ static int emit_bpf_tail_call(int insn, struct rv_jit_context *ctx)
return -1;
emit(rv_lwu(RV_REG_T1, off, RV_REG_A1), ctx);
off = (tc_ninsn - (ctx->ninsns - start_insn)) << 2;
if (is_13b_check(off, insn))
return -1;
emit(rv_bgeu(RV_REG_A2, RV_REG_T1, off >> 1), ctx);
emit_branch(BPF_JGE, RV_REG_A2, RV_REG_T1, off, ctx);
/* if (TCC-- < 0)
* goto out;
*/
emit(rv_addi(RV_REG_T1, tcc, -1), ctx);
off = (tc_ninsn - (ctx->ninsns - start_insn)) << 2;
if (is_13b_check(off, insn))
return -1;
emit(rv_blt(tcc, RV_REG_ZERO, off >> 1), ctx);
emit_branch(BPF_JSLT, tcc, RV_REG_ZERO, off, ctx);
/* prog = array->ptrs[index];
* if (!prog)
@ -651,18 +746,15 @@ static int emit_bpf_tail_call(int insn, struct rv_jit_context *ctx)
return -1;
emit(rv_ld(RV_REG_T2, off, RV_REG_T2), ctx);
off = (tc_ninsn - (ctx->ninsns - start_insn)) << 2;
if (is_13b_check(off, insn))
return -1;
emit(rv_beq(RV_REG_T2, RV_REG_ZERO, off >> 1), ctx);
emit_branch(BPF_JEQ, RV_REG_T2, RV_REG_ZERO, off, ctx);
/* goto *(prog->bpf_func + 4); */
off = offsetof(struct bpf_prog, bpf_func);
if (is_12b_check(off, insn))
return -1;
emit(rv_ld(RV_REG_T3, off, RV_REG_T2), ctx);
emit(rv_addi(RV_REG_T3, RV_REG_T3, 4), ctx);
emit(rv_addi(RV_REG_TCC, RV_REG_T1, 0), ctx);
__build_epilogue(RV_REG_T3, ctx);
__build_epilogue(true, ctx);
return 0;
}
@ -687,13 +779,6 @@ static void init_regs(u8 *rd, u8 *rs, const struct bpf_insn *insn,
*rs = bpf_to_rv_reg(insn->src_reg, ctx);
}
static int rv_offset_check(int *rvoff, s16 off, int insn,
struct rv_jit_context *ctx)
{
*rvoff = rv_offset(insn + off, insn, ctx);
return is_13b_check(*rvoff, insn);
}
static void emit_zext_32_rd_rs(u8 *rd, u8 *rs, struct rv_jit_context *ctx)
{
emit(rv_addi(RV_REG_T2, *rd, 0), ctx);
@ -726,13 +811,57 @@ static void emit_sext_32_rd(u8 *rd, struct rv_jit_context *ctx)
*rd = RV_REG_T2;
}
static void emit_jump_and_link(u8 rd, s64 rvoff, bool force_jalr,
struct rv_jit_context *ctx)
{
s64 upper, lower;
if (rvoff && is_21b_int(rvoff) && !force_jalr) {
emit(rv_jal(rd, rvoff >> 1), ctx);
return;
}
upper = (rvoff + (1 << 11)) >> 12;
lower = rvoff & 0xfff;
emit(rv_auipc(RV_REG_T1, upper), ctx);
emit(rv_jalr(rd, RV_REG_T1, lower), ctx);
}
static bool is_signed_bpf_cond(u8 cond)
{
return cond == BPF_JSGT || cond == BPF_JSLT ||
cond == BPF_JSGE || cond == BPF_JSLE;
}
static int emit_call(bool fixed, u64 addr, struct rv_jit_context *ctx)
{
s64 off = 0;
u64 ip;
u8 rd;
if (addr && ctx->insns) {
ip = (u64)(long)(ctx->insns + ctx->ninsns);
off = addr - ip;
if (!is_32b_int(off)) {
pr_err("bpf-jit: target call addr %pK is out of range\n",
(void *)addr);
return -ERANGE;
}
}
emit_jump_and_link(RV_REG_RA, off, !fixed, ctx);
rd = bpf_to_rv_reg(BPF_REG_0, ctx);
emit(rv_addi(rd, RV_REG_A0, 0), ctx);
return 0;
}
static int emit_insn(const struct bpf_insn *insn, struct rv_jit_context *ctx,
bool extra_pass)
{
bool is64 = BPF_CLASS(insn->code) == BPF_ALU64 ||
BPF_CLASS(insn->code) == BPF_JMP;
int s, e, rvoff, i = insn - ctx->prog->insnsi;
struct bpf_prog_aux *aux = ctx->prog->aux;
int rvoff, i = insn - ctx->prog->insnsi;
u8 rd = -1, rs = -1, code = insn->code;
s16 off = insn->off;
s32 imm = insn->imm;
@ -1000,214 +1129,110 @@ static int emit_insn(const struct bpf_insn *insn, struct rv_jit_context *ctx,
/* JUMP off */
case BPF_JMP | BPF_JA:
rvoff = rv_offset(i + off, i, ctx);
if (!is_21b_int(rvoff)) {
pr_err("bpf-jit: insn=%d offset=%d not supported yet!\n",
i, rvoff);
return -1;
}
emit(rv_jal(RV_REG_ZERO, rvoff >> 1), ctx);
rvoff = rv_offset(i, off, ctx);
emit_jump_and_link(RV_REG_ZERO, rvoff, false, ctx);
break;
/* IF (dst COND src) JUMP off */
case BPF_JMP | BPF_JEQ | BPF_X:
case BPF_JMP32 | BPF_JEQ | BPF_X:
if (rv_offset_check(&rvoff, off, i, ctx))
return -1;
if (!is64)
emit_zext_32_rd_rs(&rd, &rs, ctx);
emit(rv_beq(rd, rs, rvoff >> 1), ctx);
break;
case BPF_JMP | BPF_JGT | BPF_X:
case BPF_JMP32 | BPF_JGT | BPF_X:
if (rv_offset_check(&rvoff, off, i, ctx))
return -1;
if (!is64)
emit_zext_32_rd_rs(&rd, &rs, ctx);
emit(rv_bltu(rs, rd, rvoff >> 1), ctx);
break;
case BPF_JMP | BPF_JLT | BPF_X:
case BPF_JMP32 | BPF_JLT | BPF_X:
if (rv_offset_check(&rvoff, off, i, ctx))
return -1;
if (!is64)
emit_zext_32_rd_rs(&rd, &rs, ctx);
emit(rv_bltu(rd, rs, rvoff >> 1), ctx);
break;
case BPF_JMP | BPF_JGE | BPF_X:
case BPF_JMP32 | BPF_JGE | BPF_X:
if (rv_offset_check(&rvoff, off, i, ctx))
return -1;
if (!is64)
emit_zext_32_rd_rs(&rd, &rs, ctx);
emit(rv_bgeu(rd, rs, rvoff >> 1), ctx);
break;
case BPF_JMP | BPF_JLE | BPF_X:
case BPF_JMP32 | BPF_JLE | BPF_X:
if (rv_offset_check(&rvoff, off, i, ctx))
return -1;
if (!is64)
emit_zext_32_rd_rs(&rd, &rs, ctx);
emit(rv_bgeu(rs, rd, rvoff >> 1), ctx);
break;
case BPF_JMP | BPF_JNE | BPF_X:
case BPF_JMP32 | BPF_JNE | BPF_X:
if (rv_offset_check(&rvoff, off, i, ctx))
return -1;
if (!is64)
emit_zext_32_rd_rs(&rd, &rs, ctx);
emit(rv_bne(rd, rs, rvoff >> 1), ctx);
break;
case BPF_JMP | BPF_JSGT | BPF_X:
case BPF_JMP32 | BPF_JSGT | BPF_X:
if (rv_offset_check(&rvoff, off, i, ctx))
return -1;
if (!is64)
emit_sext_32_rd_rs(&rd, &rs, ctx);
emit(rv_blt(rs, rd, rvoff >> 1), ctx);
break;
case BPF_JMP | BPF_JSLT | BPF_X:
case BPF_JMP32 | BPF_JSLT | BPF_X:
if (rv_offset_check(&rvoff, off, i, ctx))
return -1;
if (!is64)
emit_sext_32_rd_rs(&rd, &rs, ctx);
emit(rv_blt(rd, rs, rvoff >> 1), ctx);
break;
case BPF_JMP | BPF_JSGE | BPF_X:
case BPF_JMP32 | BPF_JSGE | BPF_X:
if (rv_offset_check(&rvoff, off, i, ctx))
return -1;
if (!is64)
emit_sext_32_rd_rs(&rd, &rs, ctx);
emit(rv_bge(rd, rs, rvoff >> 1), ctx);
break;
case BPF_JMP | BPF_JSLE | BPF_X:
case BPF_JMP32 | BPF_JSLE | BPF_X:
if (rv_offset_check(&rvoff, off, i, ctx))
return -1;
if (!is64)
emit_sext_32_rd_rs(&rd, &rs, ctx);
emit(rv_bge(rs, rd, rvoff >> 1), ctx);
break;
case BPF_JMP | BPF_JSET | BPF_X:
case BPF_JMP32 | BPF_JSET | BPF_X:
if (rv_offset_check(&rvoff, off, i, ctx))
return -1;
if (!is64)
emit_zext_32_rd_rs(&rd, &rs, ctx);
emit(rv_and(RV_REG_T1, rd, rs), ctx);
emit(rv_bne(RV_REG_T1, RV_REG_ZERO, rvoff >> 1), ctx);
rvoff = rv_offset(i, off, ctx);
if (!is64) {
s = ctx->ninsns;
if (is_signed_bpf_cond(BPF_OP(code)))
emit_sext_32_rd_rs(&rd, &rs, ctx);
else
emit_zext_32_rd_rs(&rd, &rs, ctx);
e = ctx->ninsns;
/* Adjust for extra insns */
rvoff -= (e - s) << 2;
}
if (BPF_OP(code) == BPF_JSET) {
/* Adjust for and */
rvoff -= 4;
emit(rv_and(RV_REG_T1, rd, rs), ctx);
emit_branch(BPF_JNE, RV_REG_T1, RV_REG_ZERO, rvoff,
ctx);
} else {
emit_branch(BPF_OP(code), rd, rs, rvoff, ctx);
}
break;
/* IF (dst COND imm) JUMP off */
case BPF_JMP | BPF_JEQ | BPF_K:
case BPF_JMP32 | BPF_JEQ | BPF_K:
if (rv_offset_check(&rvoff, off, i, ctx))
return -1;
emit_imm(RV_REG_T1, imm, ctx);
if (!is64)
emit_zext_32_rd_t1(&rd, ctx);
emit(rv_beq(rd, RV_REG_T1, rvoff >> 1), ctx);
break;
case BPF_JMP | BPF_JGT | BPF_K:
case BPF_JMP32 | BPF_JGT | BPF_K:
if (rv_offset_check(&rvoff, off, i, ctx))
return -1;
emit_imm(RV_REG_T1, imm, ctx);
if (!is64)
emit_zext_32_rd_t1(&rd, ctx);
emit(rv_bltu(RV_REG_T1, rd, rvoff >> 1), ctx);
break;
case BPF_JMP | BPF_JLT | BPF_K:
case BPF_JMP32 | BPF_JLT | BPF_K:
if (rv_offset_check(&rvoff, off, i, ctx))
return -1;
emit_imm(RV_REG_T1, imm, ctx);
if (!is64)
emit_zext_32_rd_t1(&rd, ctx);
emit(rv_bltu(rd, RV_REG_T1, rvoff >> 1), ctx);
break;
case BPF_JMP | BPF_JGE | BPF_K:
case BPF_JMP32 | BPF_JGE | BPF_K:
if (rv_offset_check(&rvoff, off, i, ctx))
return -1;
emit_imm(RV_REG_T1, imm, ctx);
if (!is64)
emit_zext_32_rd_t1(&rd, ctx);
emit(rv_bgeu(rd, RV_REG_T1, rvoff >> 1), ctx);
break;
case BPF_JMP | BPF_JLE | BPF_K:
case BPF_JMP32 | BPF_JLE | BPF_K:
if (rv_offset_check(&rvoff, off, i, ctx))
return -1;
emit_imm(RV_REG_T1, imm, ctx);
if (!is64)
emit_zext_32_rd_t1(&rd, ctx);
emit(rv_bgeu(RV_REG_T1, rd, rvoff >> 1), ctx);
break;
case BPF_JMP | BPF_JNE | BPF_K:
case BPF_JMP32 | BPF_JNE | BPF_K:
if (rv_offset_check(&rvoff, off, i, ctx))
return -1;
emit_imm(RV_REG_T1, imm, ctx);
if (!is64)
emit_zext_32_rd_t1(&rd, ctx);
emit(rv_bne(rd, RV_REG_T1, rvoff >> 1), ctx);
break;
case BPF_JMP | BPF_JSGT | BPF_K:
case BPF_JMP32 | BPF_JSGT | BPF_K:
if (rv_offset_check(&rvoff, off, i, ctx))
return -1;
emit_imm(RV_REG_T1, imm, ctx);
if (!is64)
emit_sext_32_rd(&rd, ctx);
emit(rv_blt(RV_REG_T1, rd, rvoff >> 1), ctx);
break;
case BPF_JMP | BPF_JSLT | BPF_K:
case BPF_JMP32 | BPF_JSLT | BPF_K:
if (rv_offset_check(&rvoff, off, i, ctx))
return -1;
emit_imm(RV_REG_T1, imm, ctx);
if (!is64)
emit_sext_32_rd(&rd, ctx);
emit(rv_blt(rd, RV_REG_T1, rvoff >> 1), ctx);
break;
case BPF_JMP | BPF_JSGE | BPF_K:
case BPF_JMP32 | BPF_JSGE | BPF_K:
if (rv_offset_check(&rvoff, off, i, ctx))
return -1;
emit_imm(RV_REG_T1, imm, ctx);
if (!is64)
emit_sext_32_rd(&rd, ctx);
emit(rv_bge(rd, RV_REG_T1, rvoff >> 1), ctx);
break;
case BPF_JMP | BPF_JSLE | BPF_K:
case BPF_JMP32 | BPF_JSLE | BPF_K:
if (rv_offset_check(&rvoff, off, i, ctx))
return -1;
emit_imm(RV_REG_T1, imm, ctx);
if (!is64)
emit_sext_32_rd(&rd, ctx);
emit(rv_bge(RV_REG_T1, rd, rvoff >> 1), ctx);
break;
case BPF_JMP | BPF_JSET | BPF_K:
case BPF_JMP32 | BPF_JSET | BPF_K:
if (rv_offset_check(&rvoff, off, i, ctx))
return -1;
rvoff = rv_offset(i, off, ctx);
s = ctx->ninsns;
emit_imm(RV_REG_T1, imm, ctx);
if (!is64)
emit_zext_32_rd_t1(&rd, ctx);
emit(rv_and(RV_REG_T1, rd, RV_REG_T1), ctx);
emit(rv_bne(RV_REG_T1, RV_REG_ZERO, rvoff >> 1), ctx);
if (!is64) {
if (is_signed_bpf_cond(BPF_OP(code)))
emit_sext_32_rd(&rd, ctx);
else
emit_zext_32_rd_t1(&rd, ctx);
}
e = ctx->ninsns;
/* Adjust for extra insns */
rvoff -= (e - s) << 2;
if (BPF_OP(code) == BPF_JSET) {
/* Adjust for and */
rvoff -= 4;
emit(rv_and(RV_REG_T1, rd, RV_REG_T1), ctx);
emit_branch(BPF_JNE, RV_REG_T1, RV_REG_ZERO, rvoff,
ctx);
} else {
emit_branch(BPF_OP(code), rd, RV_REG_T1, rvoff, ctx);
}
break;
/* function call */
case BPF_JMP | BPF_CALL:
{
bool fixed;
int i, ret;
int ret;
u64 addr;
mark_call(ctx);
@ -1215,20 +1240,9 @@ static int emit_insn(const struct bpf_insn *insn, struct rv_jit_context *ctx,
&fixed);
if (ret < 0)
return ret;
if (fixed) {
emit_imm(RV_REG_T1, addr, ctx);
} else {
i = ctx->ninsns;
emit_imm(RV_REG_T1, addr, ctx);
for (i = ctx->ninsns - i; i < 8; i++) {
/* nop */
emit(rv_addi(RV_REG_ZERO, RV_REG_ZERO, 0),
ctx);
}
}
emit(rv_jalr(RV_REG_RA, RV_REG_T1, 0), ctx);
rd = bpf_to_rv_reg(BPF_REG_0, ctx);
emit(rv_addi(rd, RV_REG_A0, 0), ctx);
ret = emit_call(fixed, addr, ctx);
if (ret)
return ret;
break;
}
/* tail call */
@ -1243,9 +1257,7 @@ static int emit_insn(const struct bpf_insn *insn, struct rv_jit_context *ctx,
break;
rvoff = epilogue_offset(ctx);
if (is_21b_check(rvoff, i))
return -1;
emit(rv_jal(RV_REG_ZERO, rvoff >> 1), ctx);
emit_jump_and_link(RV_REG_ZERO, rvoff, false, ctx);
break;
/* dst = imm64 */
@ -1426,6 +1438,10 @@ static void build_prologue(struct rv_jit_context *ctx)
{
int stack_adjust = 0, store_offset, bpf_stack_adjust;
bpf_stack_adjust = round_up(ctx->prog->aux->stack_depth, 16);
if (bpf_stack_adjust)
mark_fp(ctx);
if (seen_reg(RV_REG_RA, ctx))
stack_adjust += 8;
stack_adjust += 8; /* RV_REG_FP */
@ -1443,7 +1459,6 @@ static void build_prologue(struct rv_jit_context *ctx)
stack_adjust += 8;
stack_adjust = round_up(stack_adjust, 16);
bpf_stack_adjust = round_up(ctx->prog->aux->stack_depth, 16);
stack_adjust += bpf_stack_adjust;
store_offset = stack_adjust - 8;
@ -1502,10 +1517,10 @@ static void build_prologue(struct rv_jit_context *ctx)
static void build_epilogue(struct rv_jit_context *ctx)
{
__build_epilogue(RV_REG_RA, ctx);
__build_epilogue(false, ctx);
}
static int build_body(struct rv_jit_context *ctx, bool extra_pass)
static int build_body(struct rv_jit_context *ctx, bool extra_pass, int *offset)
{
const struct bpf_prog *prog = ctx->prog;
int i;
@ -1517,12 +1532,12 @@ static int build_body(struct rv_jit_context *ctx, bool extra_pass)
ret = emit_insn(insn, ctx, extra_pass);
if (ret > 0) {
i++;
if (ctx->insns == NULL)
ctx->offset[i] = ctx->ninsns;
if (offset)
offset[i] = ctx->ninsns;
continue;
}
if (ctx->insns == NULL)
ctx->offset[i] = ctx->ninsns;
if (offset)
offset[i] = ctx->ninsns;
if (ret)
return ret;
}
@ -1548,9 +1563,10 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
{
bool tmp_blinded = false, extra_pass = false;
struct bpf_prog *tmp, *orig_prog = prog;
int pass = 0, prev_ninsns = 0, i;
struct rv_jit_data *jit_data;
unsigned int image_size = 0;
struct rv_jit_context *ctx;
unsigned int image_size;
if (!prog->jit_requested)
return orig_prog;
@ -1587,33 +1603,59 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
prog = orig_prog;
goto out_offset;
}
/* First pass generates the ctx->offset, but does not emit an image. */
if (build_body(ctx, extra_pass)) {
prog = orig_prog;
goto out_offset;
for (i = 0; i < prog->len; i++) {
prev_ninsns += 32;
ctx->offset[i] = prev_ninsns;
}
build_prologue(ctx);
ctx->epilogue_offset = ctx->ninsns;
build_epilogue(ctx);
/* Allocate image, now that we know the size. */
image_size = sizeof(u32) * ctx->ninsns;
jit_data->header = bpf_jit_binary_alloc(image_size, &jit_data->image,
sizeof(u32),
bpf_fill_ill_insns);
if (!jit_data->header) {
for (i = 0; i < 16; i++) {
pass++;
ctx->ninsns = 0;
if (build_body(ctx, extra_pass, ctx->offset)) {
prog = orig_prog;
goto out_offset;
}
build_prologue(ctx);
ctx->epilogue_offset = ctx->ninsns;
build_epilogue(ctx);
if (ctx->ninsns == prev_ninsns) {
if (jit_data->header)
break;
image_size = sizeof(u32) * ctx->ninsns;
jit_data->header =
bpf_jit_binary_alloc(image_size,
&jit_data->image,
sizeof(u32),
bpf_fill_ill_insns);
if (!jit_data->header) {
prog = orig_prog;
goto out_offset;
}
ctx->insns = (u32 *)jit_data->image;
/* Now, when the image is allocated, the image
* can potentially shrink more (auipc/jalr ->
* jal).
*/
}
prev_ninsns = ctx->ninsns;
}
if (i == 16) {
pr_err("bpf-jit: image did not converge in <%d passes!\n", i);
bpf_jit_binary_free(jit_data->header);
prog = orig_prog;
goto out_offset;
}
/* Second, real pass, that acutally emits the image. */
ctx->insns = (u32 *)jit_data->image;
skip_init_ctx:
pass++;
ctx->ninsns = 0;
build_prologue(ctx);
if (build_body(ctx, extra_pass)) {
if (build_body(ctx, extra_pass, NULL)) {
bpf_jit_binary_free(jit_data->header);
prog = orig_prog;
goto out_offset;
@ -1621,7 +1663,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
build_epilogue(ctx);
if (bpf_jit_enable > 1)
bpf_jit_dump(prog->len, image_size, 2, ctx->insns);
bpf_jit_dump(prog->len, image_size, pass, ctx->insns);
prog->bpf_func = (void *)ctx->insns;
prog->jited = 1;
@ -1641,3 +1683,16 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
tmp : orig_prog);
return prog;
}
void *bpf_jit_alloc_exec(unsigned long size)
{
return __vmalloc_node_range(size, PAGE_SIZE, BPF_JIT_REGION_START,
BPF_JIT_REGION_END, GFP_KERNEL,
PAGE_KERNEL_EXEC, 0, NUMA_NO_NODE,
__builtin_return_address(0));
}
void bpf_jit_free_exec(void *addr)
{
return vfree(addr);
}

View File

@ -93,6 +93,7 @@ config X86
select ARCH_USE_QUEUED_RWLOCKS
select ARCH_USE_QUEUED_SPINLOCKS
select ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH
select ARCH_WANT_DEFAULT_BPF_JIT if X86_64
select ARCH_WANTS_DYNAMIC_TASK_STRUCT
select ARCH_WANT_HUGE_PMD_SHARE
select ARCH_WANTS_THP_SWAP if X86_64

View File

@ -10,10 +10,12 @@
#include <linux/if_vlan.h>
#include <linux/bpf.h>
#include <linux/memory.h>
#include <linux/sort.h>
#include <asm/extable.h>
#include <asm/set_memory.h>
#include <asm/nospec-branch.h>
#include <asm/text-patching.h>
#include <asm/asm-prototypes.h>
static u8 *emit_code(u8 *ptr, u32 bytes, unsigned int len)
{
@ -1530,6 +1532,154 @@ int arch_prepare_bpf_trampoline(void *image, struct btf_func_model *m, u32 flags
return 0;
}
static int emit_cond_near_jump(u8 **pprog, void *func, void *ip, u8 jmp_cond)
{
u8 *prog = *pprog;
int cnt = 0;
s64 offset;
offset = func - (ip + 2 + 4);
if (!is_simm32(offset)) {
pr_err("Target %p is out of range\n", func);
return -EINVAL;
}
EMIT2_off32(0x0F, jmp_cond + 0x10, offset);
*pprog = prog;
return 0;
}
static void emit_nops(u8 **pprog, unsigned int len)
{
unsigned int i, noplen;
u8 *prog = *pprog;
int cnt = 0;
while (len > 0) {
noplen = len;
if (noplen > ASM_NOP_MAX)
noplen = ASM_NOP_MAX;
for (i = 0; i < noplen; i++)
EMIT1(ideal_nops[noplen][i]);
len -= noplen;
}
*pprog = prog;
}
static int emit_fallback_jump(u8 **pprog)
{
u8 *prog = *pprog;
int err = 0;
#ifdef CONFIG_RETPOLINE
/* Note that this assumes the the compiler uses external
* thunks for indirect calls. Both clang and GCC use the same
* naming convention for external thunks.
*/
err = emit_jump(&prog, __x86_indirect_thunk_rdx, prog);
#else
int cnt = 0;
EMIT2(0xFF, 0xE2); /* jmp rdx */
#endif
*pprog = prog;
return err;
}
static int emit_bpf_dispatcher(u8 **pprog, int a, int b, s64 *progs)
{
u8 *jg_reloc, *jg_target, *prog = *pprog;
int pivot, err, jg_bytes = 1, cnt = 0;
s64 jg_offset;
if (a == b) {
/* Leaf node of recursion, i.e. not a range of indices
* anymore.
*/
EMIT1(add_1mod(0x48, BPF_REG_3)); /* cmp rdx,func */
if (!is_simm32(progs[a]))
return -1;
EMIT2_off32(0x81, add_1reg(0xF8, BPF_REG_3),
progs[a]);
err = emit_cond_near_jump(&prog, /* je func */
(void *)progs[a], prog,
X86_JE);
if (err)
return err;
err = emit_fallback_jump(&prog); /* jmp thunk/indirect */
if (err)
return err;
*pprog = prog;
return 0;
}
/* Not a leaf node, so we pivot, and recursively descend into
* the lower and upper ranges.
*/
pivot = (b - a) / 2;
EMIT1(add_1mod(0x48, BPF_REG_3)); /* cmp rdx,func */
if (!is_simm32(progs[a + pivot]))
return -1;
EMIT2_off32(0x81, add_1reg(0xF8, BPF_REG_3), progs[a + pivot]);
if (pivot > 2) { /* jg upper_part */
/* Require near jump. */
jg_bytes = 4;
EMIT2_off32(0x0F, X86_JG + 0x10, 0);
} else {
EMIT2(X86_JG, 0);
}
jg_reloc = prog;
err = emit_bpf_dispatcher(&prog, a, a + pivot, /* emit lower_part */
progs);
if (err)
return err;
/* From Intel 64 and IA-32 Architectures Optimization
* Reference Manual, 3.4.1.4 Code Alignment, Assembly/Compiler
* Coding Rule 11: All branch targets should be 16-byte
* aligned.
*/
jg_target = PTR_ALIGN(prog, 16);
if (jg_target != prog)
emit_nops(&prog, jg_target - prog);
jg_offset = prog - jg_reloc;
emit_code(jg_reloc - jg_bytes, jg_offset, jg_bytes);
err = emit_bpf_dispatcher(&prog, a + pivot + 1, /* emit upper_part */
b, progs);
if (err)
return err;
*pprog = prog;
return 0;
}
static int cmp_ips(const void *a, const void *b)
{
const s64 *ipa = a;
const s64 *ipb = b;
if (*ipa > *ipb)
return 1;
if (*ipa < *ipb)
return -1;
return 0;
}
int arch_prepare_bpf_dispatcher(void *image, s64 *funcs, int num_funcs)
{
u8 *prog = image;
sort(funcs, num_funcs, sizeof(funcs[0]), cmp_ips, NULL);
return emit_bpf_dispatcher(&prog, 0, num_funcs - 1, funcs);
}
struct x64_jit_data {
struct bpf_binary_header *header;
int *addrs;

View File

@ -269,7 +269,7 @@ static bool i40e_alloc_buffer_zc(struct i40e_ring *rx_ring,
bi->handle = xsk_umem_adjust_offset(umem, handle, umem->headroom);
xsk_umem_discard_addr(umem);
xsk_umem_release_addr(umem);
return true;
}
@ -306,7 +306,7 @@ static bool i40e_alloc_buffer_slow_zc(struct i40e_ring *rx_ring,
bi->handle = xsk_umem_adjust_offset(umem, handle, umem->headroom);
xsk_umem_discard_addr_rq(umem);
xsk_umem_release_addr_rq(umem);
return true;
}

View File

@ -555,7 +555,7 @@ ice_alloc_buf_fast_zc(struct ice_ring *rx_ring, struct ice_rx_buf *rx_buf)
rx_buf->handle = handle + umem->headroom;
xsk_umem_discard_addr(umem);
xsk_umem_release_addr(umem);
return true;
}
@ -591,7 +591,7 @@ ice_alloc_buf_slow_zc(struct ice_ring *rx_ring, struct ice_rx_buf *rx_buf)
rx_buf->handle = handle + umem->headroom;
xsk_umem_discard_addr_rq(umem);
xsk_umem_release_addr_rq(umem);
return true;
}

View File

@ -277,7 +277,7 @@ static bool ixgbe_alloc_buffer_zc(struct ixgbe_ring *rx_ring,
bi->handle = xsk_umem_adjust_offset(umem, handle, umem->headroom);
xsk_umem_discard_addr(umem);
xsk_umem_release_addr(umem);
return true;
}
@ -304,7 +304,7 @@ static bool ixgbe_alloc_buffer_slow_zc(struct ixgbe_ring *rx_ring,
bi->handle = xsk_umem_adjust_offset(umem, handle, umem->headroom);
xsk_umem_discard_addr_rq(umem);
xsk_umem_release_addr_rq(umem);
return true;
}

View File

@ -35,7 +35,7 @@ int mlx5e_xsk_page_alloc_umem(struct mlx5e_rq *rq,
*/
dma_info->addr = xdp_umem_get_dma(umem, handle);
xsk_umem_discard_addr_rq(umem);
xsk_umem_release_addr_rq(umem);
dma_sync_single_for_device(rq->pdev, dma_info->addr, PAGE_SIZE,
DMA_BIDIRECTIONAL);

View File

@ -85,6 +85,7 @@ int cgroup_bpf_inherit(struct cgroup *cgrp);
void cgroup_bpf_offline(struct cgroup *cgrp);
int __cgroup_bpf_attach(struct cgroup *cgrp, struct bpf_prog *prog,
struct bpf_prog *replace_prog,
enum bpf_attach_type type, u32 flags);
int __cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog,
enum bpf_attach_type type);
@ -93,7 +94,8 @@ int __cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr,
/* Wrapper for __cgroup_bpf_*() protected by cgroup_mutex */
int cgroup_bpf_attach(struct cgroup *cgrp, struct bpf_prog *prog,
enum bpf_attach_type type, u32 flags);
struct bpf_prog *replace_prog, enum bpf_attach_type type,
u32 flags);
int cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog,
enum bpf_attach_type type, u32 flags);
int cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr,

View File

@ -471,11 +471,69 @@ struct bpf_trampoline {
void *image;
u64 selector;
};
#define BPF_DISPATCHER_MAX 48 /* Fits in 2048B */
struct bpf_dispatcher_prog {
struct bpf_prog *prog;
refcount_t users;
};
struct bpf_dispatcher {
/* dispatcher mutex */
struct mutex mutex;
void *func;
struct bpf_dispatcher_prog progs[BPF_DISPATCHER_MAX];
int num_progs;
void *image;
u32 image_off;
};
static __always_inline unsigned int bpf_dispatcher_nopfunc(
const void *ctx,
const struct bpf_insn *insnsi,
unsigned int (*bpf_func)(const void *,
const struct bpf_insn *))
{
return bpf_func(ctx, insnsi);
}
#ifdef CONFIG_BPF_JIT
struct bpf_trampoline *bpf_trampoline_lookup(u64 key);
int bpf_trampoline_link_prog(struct bpf_prog *prog);
int bpf_trampoline_unlink_prog(struct bpf_prog *prog);
void bpf_trampoline_put(struct bpf_trampoline *tr);
void *bpf_jit_alloc_exec_page(void);
#define BPF_DISPATCHER_INIT(name) { \
.mutex = __MUTEX_INITIALIZER(name.mutex), \
.func = &name##func, \
.progs = {}, \
.num_progs = 0, \
.image = NULL, \
.image_off = 0 \
}
#define DEFINE_BPF_DISPATCHER(name) \
noinline unsigned int name##func( \
const void *ctx, \
const struct bpf_insn *insnsi, \
unsigned int (*bpf_func)(const void *, \
const struct bpf_insn *)) \
{ \
return bpf_func(ctx, insnsi); \
} \
EXPORT_SYMBOL(name##func); \
struct bpf_dispatcher name = BPF_DISPATCHER_INIT(name);
#define DECLARE_BPF_DISPATCHER(name) \
unsigned int name##func( \
const void *ctx, \
const struct bpf_insn *insnsi, \
unsigned int (*bpf_func)(const void *, \
const struct bpf_insn *)); \
extern struct bpf_dispatcher name;
#define BPF_DISPATCHER_FUNC(name) name##func
#define BPF_DISPATCHER_PTR(name) (&name)
void bpf_dispatcher_change_prog(struct bpf_dispatcher *d, struct bpf_prog *from,
struct bpf_prog *to);
#else
static inline struct bpf_trampoline *bpf_trampoline_lookup(u64 key)
{
@ -490,6 +548,13 @@ static inline int bpf_trampoline_unlink_prog(struct bpf_prog *prog)
return -ENOTSUPP;
}
static inline void bpf_trampoline_put(struct bpf_trampoline *tr) {}
#define DEFINE_BPF_DISPATCHER(name)
#define DECLARE_BPF_DISPATCHER(name)
#define BPF_DISPATCHER_FUNC(name) bpf_dispatcher_nopfunc
#define BPF_DISPATCHER_PTR(name) NULL
static inline void bpf_dispatcher_change_prog(struct bpf_dispatcher *d,
struct bpf_prog *from,
struct bpf_prog *to) {}
#endif
struct bpf_func_info_aux {
@ -897,14 +962,14 @@ struct sk_buff;
struct bpf_dtab_netdev *__dev_map_lookup_elem(struct bpf_map *map, u32 key);
struct bpf_dtab_netdev *__dev_map_hash_lookup_elem(struct bpf_map *map, u32 key);
void __dev_map_flush(struct bpf_map *map);
void __dev_map_flush(void);
int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_buff *xdp,
struct net_device *dev_rx);
int dev_map_generic_redirect(struct bpf_dtab_netdev *dst, struct sk_buff *skb,
struct bpf_prog *xdp_prog);
struct bpf_cpu_map_entry *__cpu_map_lookup_elem(struct bpf_map *map, u32 key);
void __cpu_map_flush(struct bpf_map *map);
void __cpu_map_flush(void);
int cpu_map_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_buff *xdp,
struct net_device *dev_rx);
@ -943,6 +1008,8 @@ int btf_distill_func_proto(struct bpf_verifier_log *log,
int btf_check_func_arg_match(struct bpf_verifier_env *env, int subprog);
struct bpf_prog *bpf_prog_by_id(u32 id);
#else /* !CONFIG_BPF_SYSCALL */
static inline struct bpf_prog *bpf_prog_get(u32 ufd)
{
@ -1004,7 +1071,7 @@ static inline struct net_device *__dev_map_hash_lookup_elem(struct bpf_map *map
return NULL;
}
static inline void __dev_map_flush(struct bpf_map *map)
static inline void __dev_map_flush(void)
{
}
@ -1033,7 +1100,7 @@ struct bpf_cpu_map_entry *__cpu_map_lookup_elem(struct bpf_map *map, u32 key)
return NULL;
}
static inline void __cpu_map_flush(struct bpf_map *map)
static inline void __cpu_map_flush(void)
{
}
@ -1074,6 +1141,11 @@ static inline int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog,
static inline void bpf_map_put(struct bpf_map *map)
{
}
static inline struct bpf_prog *bpf_prog_by_id(u32 id)
{
return ERR_PTR(-ENOTSUPP);
}
#endif /* CONFIG_BPF_SYSCALL */
static inline struct bpf_prog *bpf_prog_get_type(u32 ufd,

View File

@ -559,23 +559,26 @@ struct sk_filter {
DECLARE_STATIC_KEY_FALSE(bpf_stats_enabled_key);
#define BPF_PROG_RUN(prog, ctx) ({ \
u32 ret; \
cant_sleep(); \
if (static_branch_unlikely(&bpf_stats_enabled_key)) { \
struct bpf_prog_stats *stats; \
u64 start = sched_clock(); \
ret = (*(prog)->bpf_func)(ctx, (prog)->insnsi); \
stats = this_cpu_ptr(prog->aux->stats); \
u64_stats_update_begin(&stats->syncp); \
stats->cnt++; \
stats->nsecs += sched_clock() - start; \
u64_stats_update_end(&stats->syncp); \
} else { \
ret = (*(prog)->bpf_func)(ctx, (prog)->insnsi); \
} \
#define __BPF_PROG_RUN(prog, ctx, dfunc) ({ \
u32 ret; \
cant_sleep(); \
if (static_branch_unlikely(&bpf_stats_enabled_key)) { \
struct bpf_prog_stats *stats; \
u64 start = sched_clock(); \
ret = dfunc(ctx, (prog)->insnsi, (prog)->bpf_func); \
stats = this_cpu_ptr(prog->aux->stats); \
u64_stats_update_begin(&stats->syncp); \
stats->cnt++; \
stats->nsecs += sched_clock() - start; \
u64_stats_update_end(&stats->syncp); \
} else { \
ret = dfunc(ctx, (prog)->insnsi, (prog)->bpf_func); \
} \
ret; })
#define BPF_PROG_RUN(prog, ctx) __BPF_PROG_RUN(prog, ctx, \
bpf_dispatcher_nopfunc)
#define BPF_SKB_CB_LEN QDISC_CB_PRIV_LEN
struct bpf_skb_data_end {
@ -589,7 +592,6 @@ struct bpf_redirect_info {
u32 tgt_index;
void *tgt_value;
struct bpf_map *map;
struct bpf_map *map_to_flush;
u32 kern_flags;
};
@ -699,6 +701,8 @@ static inline u32 bpf_prog_run_clear_cb(const struct bpf_prog *prog,
return res;
}
DECLARE_BPF_DISPATCHER(bpf_dispatcher_xdp)
static __always_inline u32 bpf_prog_run_xdp(const struct bpf_prog *prog,
struct xdp_buff *xdp)
{
@ -708,9 +712,12 @@ static __always_inline u32 bpf_prog_run_xdp(const struct bpf_prog *prog,
* already takes rcu_read_lock() when fetching the program, so
* it's not necessary here anymore.
*/
return BPF_PROG_RUN(prog, xdp);
return __BPF_PROG_RUN(prog, xdp,
BPF_DISPATCHER_FUNC(bpf_dispatcher_xdp));
}
void bpf_prog_change_xdp(struct bpf_prog *prev_prog, struct bpf_prog *prog);
static inline u32 bpf_prog_insn_size(const struct bpf_prog *prog)
{
return prog->len * sizeof(struct bpf_insn);

View File

@ -72,7 +72,6 @@ struct xdp_umem {
struct xsk_map {
struct bpf_map map;
struct list_head __percpu *flush_list;
spinlock_t lock; /* Synchronize map updates */
struct xdp_sock *xsk_map[];
};
@ -119,8 +118,8 @@ int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp);
bool xsk_is_setup_for_bpf_map(struct xdp_sock *xs);
/* Used from netdev driver */
bool xsk_umem_has_addrs(struct xdp_umem *umem, u32 cnt);
u64 *xsk_umem_peek_addr(struct xdp_umem *umem, u64 *addr);
void xsk_umem_discard_addr(struct xdp_umem *umem);
bool xsk_umem_peek_addr(struct xdp_umem *umem, u64 *addr);
void xsk_umem_release_addr(struct xdp_umem *umem);
void xsk_umem_complete_tx(struct xdp_umem *umem, u32 nb_entries);
bool xsk_umem_consume_tx(struct xdp_umem *umem, struct xdp_desc *desc);
void xsk_umem_consume_tx_done(struct xdp_umem *umem);
@ -139,9 +138,8 @@ void xsk_map_try_sock_delete(struct xsk_map *map, struct xdp_sock *xs,
struct xdp_sock **map_entry);
int xsk_map_inc(struct xsk_map *map);
void xsk_map_put(struct xsk_map *map);
int __xsk_map_redirect(struct bpf_map *map, struct xdp_buff *xdp,
struct xdp_sock *xs);
void __xsk_map_flush(struct bpf_map *map);
int __xsk_map_redirect(struct xdp_sock *xs, struct xdp_buff *xdp);
void __xsk_map_flush(void);
static inline struct xdp_sock *__xsk_map_lookup_elem(struct bpf_map *map,
u32 key)
@ -199,7 +197,7 @@ static inline bool xsk_umem_has_addrs_rq(struct xdp_umem *umem, u32 cnt)
return xsk_umem_has_addrs(umem, cnt - rq->length);
}
static inline u64 *xsk_umem_peek_addr_rq(struct xdp_umem *umem, u64 *addr)
static inline bool xsk_umem_peek_addr_rq(struct xdp_umem *umem, u64 *addr)
{
struct xdp_umem_fq_reuse *rq = umem->fq_reuse;
@ -210,12 +208,12 @@ static inline u64 *xsk_umem_peek_addr_rq(struct xdp_umem *umem, u64 *addr)
return addr;
}
static inline void xsk_umem_discard_addr_rq(struct xdp_umem *umem)
static inline void xsk_umem_release_addr_rq(struct xdp_umem *umem)
{
struct xdp_umem_fq_reuse *rq = umem->fq_reuse;
if (!rq->length)
xsk_umem_discard_addr(umem);
xsk_umem_release_addr(umem);
else
rq->length--;
}
@ -260,7 +258,7 @@ static inline u64 *xsk_umem_peek_addr(struct xdp_umem *umem, u64 *addr)
return NULL;
}
static inline void xsk_umem_discard_addr(struct xdp_umem *umem)
static inline void xsk_umem_release_addr(struct xdp_umem *umem)
{
}
@ -334,7 +332,7 @@ static inline u64 *xsk_umem_peek_addr_rq(struct xdp_umem *umem, u64 *addr)
return NULL;
}
static inline void xsk_umem_discard_addr_rq(struct xdp_umem *umem)
static inline void xsk_umem_release_addr_rq(struct xdp_umem *umem)
{
}
@ -369,13 +367,12 @@ static inline u64 xsk_umem_adjust_offset(struct xdp_umem *umem, u64 handle,
return 0;
}
static inline int __xsk_map_redirect(struct bpf_map *map, struct xdp_buff *xdp,
struct xdp_sock *xs)
static inline int __xsk_map_redirect(struct xdp_sock *xs, struct xdp_buff *xdp)
{
return -EOPNOTSUPP;
}
static inline void __xsk_map_flush(struct bpf_map *map)
static inline void __xsk_map_flush(void)
{
}

View File

@ -116,6 +116,7 @@
#define AUDIT_FANOTIFY 1331 /* Fanotify access decision */
#define AUDIT_TIME_INJOFFSET 1332 /* Timekeeping offset injected */
#define AUDIT_TIME_ADJNTPVAL 1333 /* NTP value adjustment */
#define AUDIT_BPF 1334 /* BPF subsystem */
#define AUDIT_AVC 1400 /* SE Linux avc denial or grant */
#define AUDIT_SELINUX_ERR 1401 /* Internal SE Linux Errors */

View File

@ -231,6 +231,11 @@ enum bpf_attach_type {
* When children program makes decision (like picking TCP CA or sock bind)
* parent program has a chance to override it.
*
* With BPF_F_ALLOW_MULTI a new program is added to the end of the list of
* programs for a cgroup. Though it's possible to replace an old program at
* any position by also specifying BPF_F_REPLACE flag and position itself in
* replace_bpf_fd attribute. Old program at this position will be released.
*
* A cgroup with MULTI or OVERRIDE flag allows any attach flags in sub-cgroups.
* A cgroup with NONE doesn't allow any programs in sub-cgroups.
* Ex1:
@ -249,6 +254,7 @@ enum bpf_attach_type {
*/
#define BPF_F_ALLOW_OVERRIDE (1U << 0)
#define BPF_F_ALLOW_MULTI (1U << 1)
#define BPF_F_REPLACE (1U << 2)
/* If BPF_F_STRICT_ALIGNMENT is used in BPF_PROG_LOAD command, the
* verifier will perform strict alignment checking as if the kernel
@ -442,6 +448,10 @@ union bpf_attr {
__u32 attach_bpf_fd; /* eBPF program to attach */
__u32 attach_type;
__u32 attach_flags;
__u32 replace_bpf_fd; /* previously attached eBPF
* program to replace if
* BPF_F_REPLACE is used
*/
};
struct { /* anonymous struct used by BPF_PROG_TEST_RUN command */

View File

@ -142,7 +142,8 @@ struct btf_param {
enum {
BTF_VAR_STATIC = 0,
BTF_VAR_GLOBAL_ALLOCATED,
BTF_VAR_GLOBAL_ALLOCATED = 1,
BTF_VAR_GLOBAL_EXTERN = 2,
};
/* BTF_KIND_VAR is followed by a single "struct btf_var" to describe

View File

@ -1604,6 +1604,9 @@ config BPF_SYSCALL
Enable the bpf() system call that allows to manipulate eBPF
programs and maps via file descriptors.
config ARCH_WANT_DEFAULT_BPF_JIT
bool
config BPF_JIT_ALWAYS_ON
bool "Permanently enable BPF JIT and remove BPF interpreter"
depends on BPF_SYSCALL && HAVE_EBPF_JIT && BPF_JIT
@ -1611,6 +1614,10 @@ config BPF_JIT_ALWAYS_ON
Enables BPF JIT and removes BPF interpreter to avoid
speculative execution of BPF instructions by the interpreter
config BPF_JIT_DEFAULT_ON
def_bool ARCH_WANT_DEFAULT_BPF_JIT || BPF_JIT_ALWAYS_ON
depends on HAVE_EBPF_JIT && BPF_JIT
config USERFAULTFD
bool "Enable userfaultfd() system call"
depends on MMU

View File

@ -8,6 +8,7 @@ obj-$(CONFIG_BPF_SYSCALL) += local_storage.o queue_stack_maps.o
obj-$(CONFIG_BPF_SYSCALL) += disasm.o
obj-$(CONFIG_BPF_JIT) += trampoline.o
obj-$(CONFIG_BPF_SYSCALL) += btf.o
obj-$(CONFIG_BPF_JIT) += dispatcher.o
ifeq ($(CONFIG_NET),y)
obj-$(CONFIG_BPF_SYSCALL) += devmap.o
obj-$(CONFIG_BPF_SYSCALL) += cpumap.o

View File

@ -103,8 +103,7 @@ static u32 prog_list_length(struct list_head *head)
* if parent has overridable or multi-prog, allow attaching
*/
static bool hierarchy_allows_attach(struct cgroup *cgrp,
enum bpf_attach_type type,
u32 new_flags)
enum bpf_attach_type type)
{
struct cgroup *p;
@ -283,31 +282,34 @@ static int update_effective_progs(struct cgroup *cgrp,
* propagate the change to descendants
* @cgrp: The cgroup which descendants to traverse
* @prog: A program to attach
* @replace_prog: Previously attached program to replace if BPF_F_REPLACE is set
* @type: Type of attach operation
* @flags: Option flags
*
* Must be called with cgroup_mutex held.
*/
int __cgroup_bpf_attach(struct cgroup *cgrp, struct bpf_prog *prog,
struct bpf_prog *replace_prog,
enum bpf_attach_type type, u32 flags)
{
u32 saved_flags = (flags & (BPF_F_ALLOW_OVERRIDE | BPF_F_ALLOW_MULTI));
struct list_head *progs = &cgrp->bpf.progs[type];
struct bpf_prog *old_prog = NULL;
struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE],
*old_storage[MAX_BPF_CGROUP_STORAGE_TYPE] = {NULL};
struct bpf_prog_list *pl, *replace_pl = NULL;
enum bpf_cgroup_storage_type stype;
struct bpf_prog_list *pl;
bool pl_was_allocated;
int err;
if ((flags & BPF_F_ALLOW_OVERRIDE) && (flags & BPF_F_ALLOW_MULTI))
if (((flags & BPF_F_ALLOW_OVERRIDE) && (flags & BPF_F_ALLOW_MULTI)) ||
((flags & BPF_F_REPLACE) && !(flags & BPF_F_ALLOW_MULTI)))
/* invalid combination */
return -EINVAL;
if (!hierarchy_allows_attach(cgrp, type, flags))
if (!hierarchy_allows_attach(cgrp, type))
return -EPERM;
if (!list_empty(progs) && cgrp->bpf.flags[type] != flags)
if (!list_empty(progs) && cgrp->bpf.flags[type] != saved_flags)
/* Disallow attaching non-overridable on top
* of existing overridable in this cgroup.
* Disallow attaching multi-prog if overridable or none
@ -317,6 +319,21 @@ int __cgroup_bpf_attach(struct cgroup *cgrp, struct bpf_prog *prog,
if (prog_list_length(progs) >= BPF_CGROUP_MAX_PROGS)
return -E2BIG;
if (flags & BPF_F_ALLOW_MULTI) {
list_for_each_entry(pl, progs, node) {
if (pl->prog == prog)
/* disallow attaching the same prog twice */
return -EINVAL;
if (pl->prog == replace_prog)
replace_pl = pl;
}
if ((flags & BPF_F_REPLACE) && !replace_pl)
/* prog to replace not found for cgroup */
return -ENOENT;
} else if (!list_empty(progs)) {
replace_pl = list_first_entry(progs, typeof(*pl), node);
}
for_each_cgroup_storage_type(stype) {
storage[stype] = bpf_cgroup_storage_alloc(prog, stype);
if (IS_ERR(storage[stype])) {
@ -327,53 +344,28 @@ int __cgroup_bpf_attach(struct cgroup *cgrp, struct bpf_prog *prog,
}
}
if (flags & BPF_F_ALLOW_MULTI) {
list_for_each_entry(pl, progs, node) {
if (pl->prog == prog) {
/* disallow attaching the same prog twice */
for_each_cgroup_storage_type(stype)
bpf_cgroup_storage_free(storage[stype]);
return -EINVAL;
}
if (replace_pl) {
pl = replace_pl;
old_prog = pl->prog;
for_each_cgroup_storage_type(stype) {
old_storage[stype] = pl->storage[stype];
bpf_cgroup_storage_unlink(old_storage[stype]);
}
} else {
pl = kmalloc(sizeof(*pl), GFP_KERNEL);
if (!pl) {
for_each_cgroup_storage_type(stype)
bpf_cgroup_storage_free(storage[stype]);
return -ENOMEM;
}
pl_was_allocated = true;
pl->prog = prog;
for_each_cgroup_storage_type(stype)
pl->storage[stype] = storage[stype];
list_add_tail(&pl->node, progs);
} else {
if (list_empty(progs)) {
pl = kmalloc(sizeof(*pl), GFP_KERNEL);
if (!pl) {
for_each_cgroup_storage_type(stype)
bpf_cgroup_storage_free(storage[stype]);
return -ENOMEM;
}
pl_was_allocated = true;
list_add_tail(&pl->node, progs);
} else {
pl = list_first_entry(progs, typeof(*pl), node);
old_prog = pl->prog;
for_each_cgroup_storage_type(stype) {
old_storage[stype] = pl->storage[stype];
bpf_cgroup_storage_unlink(old_storage[stype]);
}
pl_was_allocated = false;
}
pl->prog = prog;
for_each_cgroup_storage_type(stype)
pl->storage[stype] = storage[stype];
}
cgrp->bpf.flags[type] = flags;
pl->prog = prog;
for_each_cgroup_storage_type(stype)
pl->storage[stype] = storage[stype];
cgrp->bpf.flags[type] = saved_flags;
err = update_effective_progs(cgrp, type);
if (err)
@ -401,7 +393,7 @@ int __cgroup_bpf_attach(struct cgroup *cgrp, struct bpf_prog *prog,
pl->storage[stype] = old_storage[stype];
bpf_cgroup_storage_link(old_storage[stype], cgrp, type);
}
if (pl_was_allocated) {
if (!replace_pl) {
list_del(&pl->node);
kfree(pl);
}
@ -539,6 +531,7 @@ int __cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr,
int cgroup_bpf_prog_attach(const union bpf_attr *attr,
enum bpf_prog_type ptype, struct bpf_prog *prog)
{
struct bpf_prog *replace_prog = NULL;
struct cgroup *cgrp;
int ret;
@ -546,8 +539,20 @@ int cgroup_bpf_prog_attach(const union bpf_attr *attr,
if (IS_ERR(cgrp))
return PTR_ERR(cgrp);
ret = cgroup_bpf_attach(cgrp, prog, attr->attach_type,
if ((attr->attach_flags & BPF_F_ALLOW_MULTI) &&
(attr->attach_flags & BPF_F_REPLACE)) {
replace_prog = bpf_prog_get_type(attr->replace_bpf_fd, ptype);
if (IS_ERR(replace_prog)) {
cgroup_put(cgrp);
return PTR_ERR(replace_prog);
}
}
ret = cgroup_bpf_attach(cgrp, prog, replace_prog, attr->attach_type,
attr->attach_flags);
if (replace_prog)
bpf_prog_put(replace_prog);
cgroup_put(cgrp);
return ret;
}

View File

@ -222,8 +222,6 @@ struct bpf_prog *bpf_prog_realloc(struct bpf_prog *fp_old, unsigned int size,
u32 pages, delta;
int ret;
BUG_ON(fp_old == NULL);
size = round_up(size, PAGE_SIZE);
pages = size / PAGE_SIZE;
if (pages <= fp_old->pages)
@ -520,9 +518,9 @@ void bpf_prog_kallsyms_del_all(struct bpf_prog *fp)
#ifdef CONFIG_BPF_JIT
/* All BPF JIT sysctl knobs here. */
int bpf_jit_enable __read_mostly = IS_BUILTIN(CONFIG_BPF_JIT_ALWAYS_ON);
int bpf_jit_enable __read_mostly = IS_BUILTIN(CONFIG_BPF_JIT_DEFAULT_ON);
int bpf_jit_kallsyms __read_mostly = IS_BUILTIN(CONFIG_BPF_JIT_DEFAULT_ON);
int bpf_jit_harden __read_mostly;
int bpf_jit_kallsyms __read_mostly;
long bpf_jit_limit __read_mostly;
static __always_inline void

View File

@ -72,17 +72,18 @@ struct bpf_cpu_map {
struct bpf_map map;
/* Below members specific for map type */
struct bpf_cpu_map_entry **cpu_map;
struct list_head __percpu *flush_list;
};
static int bq_flush_to_queue(struct xdp_bulk_queue *bq, bool in_napi_ctx);
static DEFINE_PER_CPU(struct list_head, cpu_map_flush_list);
static int bq_flush_to_queue(struct xdp_bulk_queue *bq);
static struct bpf_map *cpu_map_alloc(union bpf_attr *attr)
{
struct bpf_cpu_map *cmap;
int err = -ENOMEM;
int ret, cpu;
u64 cost;
int ret;
if (!capable(CAP_SYS_ADMIN))
return ERR_PTR(-EPERM);
@ -106,7 +107,6 @@ static struct bpf_map *cpu_map_alloc(union bpf_attr *attr)
/* make sure page count doesn't overflow */
cost = (u64) cmap->map.max_entries * sizeof(struct bpf_cpu_map_entry *);
cost += sizeof(struct list_head) * num_possible_cpus();
/* Notice returns -EPERM on if map size is larger than memlock limit */
ret = bpf_map_charge_init(&cmap->map.memory, cost);
@ -115,23 +115,14 @@ static struct bpf_map *cpu_map_alloc(union bpf_attr *attr)
goto free_cmap;
}
cmap->flush_list = alloc_percpu(struct list_head);
if (!cmap->flush_list)
goto free_charge;
for_each_possible_cpu(cpu)
INIT_LIST_HEAD(per_cpu_ptr(cmap->flush_list, cpu));
/* Alloc array for possible remote "destination" CPUs */
cmap->cpu_map = bpf_map_area_alloc(cmap->map.max_entries *
sizeof(struct bpf_cpu_map_entry *),
cmap->map.numa_node);
if (!cmap->cpu_map)
goto free_percpu;
goto free_charge;
return &cmap->map;
free_percpu:
free_percpu(cmap->flush_list);
free_charge:
bpf_map_charge_finish(&cmap->map.memory);
free_cmap:
@ -399,22 +390,14 @@ static struct bpf_cpu_map_entry *__cpu_map_entry_alloc(u32 qsize, u32 cpu,
static void __cpu_map_entry_free(struct rcu_head *rcu)
{
struct bpf_cpu_map_entry *rcpu;
int cpu;
/* This cpu_map_entry have been disconnected from map and one
* RCU graze-period have elapsed. Thus, XDP cannot queue any
* RCU grace-period have elapsed. Thus, XDP cannot queue any
* new packets and cannot change/set flush_needed that can
* find this entry.
*/
rcpu = container_of(rcu, struct bpf_cpu_map_entry, rcu);
/* Flush remaining packets in percpu bulkq */
for_each_online_cpu(cpu) {
struct xdp_bulk_queue *bq = per_cpu_ptr(rcpu->bulkq, cpu);
/* No concurrent bq_enqueue can run at this point */
bq_flush_to_queue(bq, false);
}
free_percpu(rcpu->bulkq);
/* Cannot kthread_stop() here, last put free rcpu resources */
put_cpu_map_entry(rcpu);
@ -436,7 +419,7 @@ static void __cpu_map_entry_free(struct rcu_head *rcu)
* percpu bulkq to queue. Due to caller map_delete_elem() disable
* preemption, cannot call kthread_stop() to make sure queue is empty.
* Instead a work_queue is started for stopping kthread,
* cpu_map_kthread_stop, which waits for an RCU graze period before
* cpu_map_kthread_stop, which waits for an RCU grace period before
* stopping kthread, emptying the queue.
*/
static void __cpu_map_entry_replace(struct bpf_cpu_map *cmap,
@ -507,7 +490,6 @@ static int cpu_map_update_elem(struct bpf_map *map, void *key, void *value,
static void cpu_map_free(struct bpf_map *map)
{
struct bpf_cpu_map *cmap = container_of(map, struct bpf_cpu_map, map);
int cpu;
u32 i;
/* At this point bpf_prog->aux->refcnt == 0 and this map->refcnt == 0,
@ -522,18 +504,6 @@ static void cpu_map_free(struct bpf_map *map)
bpf_clear_redirect_map(map);
synchronize_rcu();
/* To ensure all pending flush operations have completed wait for flush
* list be empty on _all_ cpus. Because the above synchronize_rcu()
* ensures the map is disconnected from the program we can assume no new
* items will be added to the list.
*/
for_each_online_cpu(cpu) {
struct list_head *flush_list = per_cpu_ptr(cmap->flush_list, cpu);
while (!list_empty(flush_list))
cond_resched();
}
/* For cpu_map the remote CPUs can still be using the entries
* (struct bpf_cpu_map_entry).
*/
@ -544,10 +514,9 @@ static void cpu_map_free(struct bpf_map *map)
if (!rcpu)
continue;
/* bq flush and cleanup happens after RCU graze-period */
/* bq flush and cleanup happens after RCU grace-period */
__cpu_map_entry_replace(cmap, i, NULL); /* call_rcu */
}
free_percpu(cmap->flush_list);
bpf_map_area_free(cmap->cpu_map);
kfree(cmap);
}
@ -599,7 +568,7 @@ const struct bpf_map_ops cpu_map_ops = {
.map_check_btf = map_check_no_btf,
};
static int bq_flush_to_queue(struct xdp_bulk_queue *bq, bool in_napi_ctx)
static int bq_flush_to_queue(struct xdp_bulk_queue *bq)
{
struct bpf_cpu_map_entry *rcpu = bq->obj;
unsigned int processed = 0, drops = 0;
@ -620,10 +589,7 @@ static int bq_flush_to_queue(struct xdp_bulk_queue *bq, bool in_napi_ctx)
err = __ptr_ring_produce(q, xdpf);
if (err) {
drops++;
if (likely(in_napi_ctx))
xdp_return_frame_rx_napi(xdpf);
else
xdp_return_frame(xdpf);
xdp_return_frame_rx_napi(xdpf);
}
processed++;
}
@ -642,11 +608,11 @@ static int bq_flush_to_queue(struct xdp_bulk_queue *bq, bool in_napi_ctx)
*/
static int bq_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_frame *xdpf)
{
struct list_head *flush_list = this_cpu_ptr(rcpu->cmap->flush_list);
struct list_head *flush_list = this_cpu_ptr(&cpu_map_flush_list);
struct xdp_bulk_queue *bq = this_cpu_ptr(rcpu->bulkq);
if (unlikely(bq->count == CPU_MAP_BULK_SIZE))
bq_flush_to_queue(bq, true);
bq_flush_to_queue(bq);
/* Notice, xdp_buff/page MUST be queued here, long enough for
* driver to code invoking us to finished, due to driver
@ -681,16 +647,26 @@ int cpu_map_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_buff *xdp,
return 0;
}
void __cpu_map_flush(struct bpf_map *map)
void __cpu_map_flush(void)
{
struct bpf_cpu_map *cmap = container_of(map, struct bpf_cpu_map, map);
struct list_head *flush_list = this_cpu_ptr(cmap->flush_list);
struct list_head *flush_list = this_cpu_ptr(&cpu_map_flush_list);
struct xdp_bulk_queue *bq, *tmp;
list_for_each_entry_safe(bq, tmp, flush_list, flush_node) {
bq_flush_to_queue(bq, true);
bq_flush_to_queue(bq);
/* If already running, costs spin_lock_irqsave + smb_mb */
wake_up_process(bq->obj->kthread);
}
}
static int __init cpu_map_init(void)
{
int cpu;
for_each_possible_cpu(cpu)
INIT_LIST_HEAD(&per_cpu(cpu_map_flush_list, cpu));
return 0;
}
subsys_initcall(cpu_map_init);

View File

@ -75,7 +75,6 @@ struct bpf_dtab_netdev {
struct bpf_dtab {
struct bpf_map map;
struct bpf_dtab_netdev **netdev_map; /* DEVMAP type only */
struct list_head __percpu *flush_list;
struct list_head list;
/* these are only used for DEVMAP_HASH type maps */
@ -85,6 +84,7 @@ struct bpf_dtab {
u32 n_buckets;
};
static DEFINE_PER_CPU(struct list_head, dev_map_flush_list);
static DEFINE_SPINLOCK(dev_map_lock);
static LIST_HEAD(dev_map_list);
@ -109,8 +109,8 @@ static inline struct hlist_head *dev_map_index_hash(struct bpf_dtab *dtab,
static int dev_map_init_map(struct bpf_dtab *dtab, union bpf_attr *attr)
{
int err, cpu;
u64 cost;
u64 cost = 0;
int err;
/* check sanity of attributes */
if (attr->max_entries == 0 || attr->key_size != 4 ||
@ -125,9 +125,6 @@ static int dev_map_init_map(struct bpf_dtab *dtab, union bpf_attr *attr)
bpf_map_init_from_attr(&dtab->map, attr);
/* make sure page count doesn't overflow */
cost = (u64) sizeof(struct list_head) * num_possible_cpus();
if (attr->map_type == BPF_MAP_TYPE_DEVMAP_HASH) {
dtab->n_buckets = roundup_pow_of_two(dtab->map.max_entries);
@ -143,17 +140,10 @@ static int dev_map_init_map(struct bpf_dtab *dtab, union bpf_attr *attr)
if (err)
return -EINVAL;
dtab->flush_list = alloc_percpu(struct list_head);
if (!dtab->flush_list)
goto free_charge;
for_each_possible_cpu(cpu)
INIT_LIST_HEAD(per_cpu_ptr(dtab->flush_list, cpu));
if (attr->map_type == BPF_MAP_TYPE_DEVMAP_HASH) {
dtab->dev_index_head = dev_map_create_hash(dtab->n_buckets);
if (!dtab->dev_index_head)
goto free_percpu;
goto free_charge;
spin_lock_init(&dtab->index_lock);
} else {
@ -161,13 +151,11 @@ static int dev_map_init_map(struct bpf_dtab *dtab, union bpf_attr *attr)
sizeof(struct bpf_dtab_netdev *),
dtab->map.numa_node);
if (!dtab->netdev_map)
goto free_percpu;
goto free_charge;
}
return 0;
free_percpu:
free_percpu(dtab->flush_list);
free_charge:
bpf_map_charge_finish(&dtab->map.memory);
return -ENOMEM;
@ -201,7 +189,7 @@ static struct bpf_map *dev_map_alloc(union bpf_attr *attr)
static void dev_map_free(struct bpf_map *map)
{
struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map);
int i, cpu;
int i;
/* At this point bpf_prog->aux->refcnt == 0 and this map->refcnt == 0,
* so the programs (can be more than one that used this map) were
@ -221,18 +209,6 @@ static void dev_map_free(struct bpf_map *map)
/* Make sure prior __dev_map_entry_free() have completed. */
rcu_barrier();
/* To ensure all pending flush operations have completed wait for flush
* list to empty on _all_ cpus.
* Because the above synchronize_rcu() ensures the map is disconnected
* from the program we can assume no new items will be added.
*/
for_each_online_cpu(cpu) {
struct list_head *flush_list = per_cpu_ptr(dtab->flush_list, cpu);
while (!list_empty(flush_list))
cond_resched();
}
if (dtab->map.map_type == BPF_MAP_TYPE_DEVMAP_HASH) {
for (i = 0; i < dtab->n_buckets; i++) {
struct bpf_dtab_netdev *dev;
@ -266,7 +242,6 @@ static void dev_map_free(struct bpf_map *map)
bpf_map_area_free(dtab->netdev_map);
}
free_percpu(dtab->flush_list);
kfree(dtab);
}
@ -345,8 +320,7 @@ static int dev_map_hash_get_next_key(struct bpf_map *map, void *key,
return -ENOENT;
}
static int bq_xmit_all(struct xdp_bulk_queue *bq, u32 flags,
bool in_napi_ctx)
static int bq_xmit_all(struct xdp_bulk_queue *bq, u32 flags)
{
struct bpf_dtab_netdev *obj = bq->obj;
struct net_device *dev = obj->dev;
@ -384,11 +358,7 @@ static int bq_xmit_all(struct xdp_bulk_queue *bq, u32 flags,
for (i = 0; i < bq->count; i++) {
struct xdp_frame *xdpf = bq->q[i];
/* RX path under NAPI protection, can return frames faster */
if (likely(in_napi_ctx))
xdp_return_frame_rx_napi(xdpf);
else
xdp_return_frame(xdpf);
xdp_return_frame_rx_napi(xdpf);
drops++;
}
goto out;
@ -401,15 +371,14 @@ static int bq_xmit_all(struct xdp_bulk_queue *bq, u32 flags,
* net device can be torn down. On devmap tear down we ensure the flush list
* is empty before completing to ensure all flush operations have completed.
*/
void __dev_map_flush(struct bpf_map *map)
void __dev_map_flush(void)
{
struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map);
struct list_head *flush_list = this_cpu_ptr(dtab->flush_list);
struct list_head *flush_list = this_cpu_ptr(&dev_map_flush_list);
struct xdp_bulk_queue *bq, *tmp;
rcu_read_lock();
list_for_each_entry_safe(bq, tmp, flush_list, flush_node)
bq_xmit_all(bq, XDP_XMIT_FLUSH, true);
bq_xmit_all(bq, XDP_XMIT_FLUSH);
rcu_read_unlock();
}
@ -436,11 +405,11 @@ static int bq_enqueue(struct bpf_dtab_netdev *obj, struct xdp_frame *xdpf,
struct net_device *dev_rx)
{
struct list_head *flush_list = this_cpu_ptr(obj->dtab->flush_list);
struct list_head *flush_list = this_cpu_ptr(&dev_map_flush_list);
struct xdp_bulk_queue *bq = this_cpu_ptr(obj->bulkq);
if (unlikely(bq->count == DEV_MAP_BULK_SIZE))
bq_xmit_all(bq, 0, true);
bq_xmit_all(bq, 0);
/* Ingress dev_rx will be the same for all xdp_frame's in
* bulk_queue, because bq stored per-CPU and must be flushed
@ -509,27 +478,11 @@ static void *dev_map_hash_lookup_elem(struct bpf_map *map, void *key)
return dev ? &dev->ifindex : NULL;
}
static void dev_map_flush_old(struct bpf_dtab_netdev *dev)
{
if (dev->dev->netdev_ops->ndo_xdp_xmit) {
struct xdp_bulk_queue *bq;
int cpu;
rcu_read_lock();
for_each_online_cpu(cpu) {
bq = per_cpu_ptr(dev->bulkq, cpu);
bq_xmit_all(bq, XDP_XMIT_FLUSH, false);
}
rcu_read_unlock();
}
}
static void __dev_map_entry_free(struct rcu_head *rcu)
{
struct bpf_dtab_netdev *dev;
dev = container_of(rcu, struct bpf_dtab_netdev, rcu);
dev_map_flush_old(dev);
free_percpu(dev->bulkq);
dev_put(dev->dev);
kfree(dev);
@ -810,10 +763,15 @@ static struct notifier_block dev_map_notifier = {
static int __init dev_map_init(void)
{
int cpu;
/* Assure tracepoint shadow struct _bpf_dtab_netdev is in sync */
BUILD_BUG_ON(offsetof(struct bpf_dtab_netdev, dev) !=
offsetof(struct _bpf_dtab_netdev, dev));
register_netdevice_notifier(&dev_map_notifier);
for_each_possible_cpu(cpu)
INIT_LIST_HEAD(&per_cpu(dev_map_flush_list, cpu));
return 0;
}

158
kernel/bpf/dispatcher.c Normal file
View File

@ -0,0 +1,158 @@
// SPDX-License-Identifier: GPL-2.0-only
/* Copyright(c) 2019 Intel Corporation. */
#include <linux/hash.h>
#include <linux/bpf.h>
#include <linux/filter.h>
/* The BPF dispatcher is a multiway branch code generator. The
* dispatcher is a mechanism to avoid the performance penalty of an
* indirect call, which is expensive when retpolines are enabled. A
* dispatch client registers a BPF program into the dispatcher, and if
* there is available room in the dispatcher a direct call to the BPF
* program will be generated. All calls to the BPF programs called via
* the dispatcher will then be a direct call, instead of an
* indirect. The dispatcher hijacks a trampoline function it via the
* __fentry__ of the trampoline. The trampoline function has the
* following signature:
*
* unsigned int trampoline(const void *ctx, const struct bpf_insn *insnsi,
* unsigned int (*bpf_func)(const void *,
* const struct bpf_insn *));
*/
static struct bpf_dispatcher_prog *bpf_dispatcher_find_prog(
struct bpf_dispatcher *d, struct bpf_prog *prog)
{
int i;
for (i = 0; i < BPF_DISPATCHER_MAX; i++) {
if (prog == d->progs[i].prog)
return &d->progs[i];
}
return NULL;
}
static struct bpf_dispatcher_prog *bpf_dispatcher_find_free(
struct bpf_dispatcher *d)
{
return bpf_dispatcher_find_prog(d, NULL);
}
static bool bpf_dispatcher_add_prog(struct bpf_dispatcher *d,
struct bpf_prog *prog)
{
struct bpf_dispatcher_prog *entry;
if (!prog)
return false;
entry = bpf_dispatcher_find_prog(d, prog);
if (entry) {
refcount_inc(&entry->users);
return false;
}
entry = bpf_dispatcher_find_free(d);
if (!entry)
return false;
bpf_prog_inc(prog);
entry->prog = prog;
refcount_set(&entry->users, 1);
d->num_progs++;
return true;
}
static bool bpf_dispatcher_remove_prog(struct bpf_dispatcher *d,
struct bpf_prog *prog)
{
struct bpf_dispatcher_prog *entry;
if (!prog)
return false;
entry = bpf_dispatcher_find_prog(d, prog);
if (!entry)
return false;
if (refcount_dec_and_test(&entry->users)) {
entry->prog = NULL;
bpf_prog_put(prog);
d->num_progs--;
return true;
}
return false;
}
int __weak arch_prepare_bpf_dispatcher(void *image, s64 *funcs, int num_funcs)
{
return -ENOTSUPP;
}
static int bpf_dispatcher_prepare(struct bpf_dispatcher *d, void *image)
{
s64 ips[BPF_DISPATCHER_MAX] = {}, *ipsp = &ips[0];
int i;
for (i = 0; i < BPF_DISPATCHER_MAX; i++) {
if (d->progs[i].prog)
*ipsp++ = (s64)(uintptr_t)d->progs[i].prog->bpf_func;
}
return arch_prepare_bpf_dispatcher(image, &ips[0], d->num_progs);
}
static void bpf_dispatcher_update(struct bpf_dispatcher *d, int prev_num_progs)
{
void *old, *new;
u32 noff;
int err;
if (!prev_num_progs) {
old = NULL;
noff = 0;
} else {
old = d->image + d->image_off;
noff = d->image_off ^ (PAGE_SIZE / 2);
}
new = d->num_progs ? d->image + noff : NULL;
if (new) {
if (bpf_dispatcher_prepare(d, new))
return;
}
err = bpf_arch_text_poke(d->func, BPF_MOD_JUMP, old, new);
if (err || !new)
return;
d->image_off = noff;
}
void bpf_dispatcher_change_prog(struct bpf_dispatcher *d, struct bpf_prog *from,
struct bpf_prog *to)
{
bool changed = false;
int prev_num_progs;
if (from == to)
return;
mutex_lock(&d->mutex);
if (!d->image) {
d->image = bpf_jit_alloc_exec_page();
if (!d->image)
goto out;
}
prev_num_progs = d->num_progs;
changed |= bpf_dispatcher_remove_prog(d, from);
changed |= bpf_dispatcher_add_prog(d, to);
if (!changed)
goto out;
bpf_dispatcher_update(d, prev_num_progs);
out:
mutex_unlock(&d->mutex);
}

View File

@ -23,6 +23,7 @@
#include <linux/timekeeping.h>
#include <linux/ctype.h>
#include <linux/nospec.h>
#include <linux/audit.h>
#include <uapi/linux/btf.h>
#define IS_FD_ARRAY(map) ((map)->map_type == BPF_MAP_TYPE_PERF_EVENT_ARRAY || \
@ -1306,6 +1307,36 @@ static int find_prog_type(enum bpf_prog_type type, struct bpf_prog *prog)
return 0;
}
enum bpf_audit {
BPF_AUDIT_LOAD,
BPF_AUDIT_UNLOAD,
BPF_AUDIT_MAX,
};
static const char * const bpf_audit_str[BPF_AUDIT_MAX] = {
[BPF_AUDIT_LOAD] = "LOAD",
[BPF_AUDIT_UNLOAD] = "UNLOAD",
};
static void bpf_audit_prog(const struct bpf_prog *prog, unsigned int op)
{
struct audit_context *ctx = NULL;
struct audit_buffer *ab;
if (WARN_ON_ONCE(op >= BPF_AUDIT_MAX))
return;
if (audit_enabled == AUDIT_OFF)
return;
if (op == BPF_AUDIT_LOAD)
ctx = audit_context();
ab = audit_log_start(ctx, GFP_ATOMIC, AUDIT_BPF);
if (unlikely(!ab))
return;
audit_log_format(ab, "prog-id=%u op=%s",
prog->aux->id, bpf_audit_str[op]);
audit_log_end(ab);
}
int __bpf_prog_charge(struct user_struct *user, u32 pages)
{
unsigned long memlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
@ -1421,6 +1452,7 @@ static void __bpf_prog_put(struct bpf_prog *prog, bool do_idr_lock)
{
if (atomic64_dec_and_test(&prog->aux->refcnt)) {
perf_event_bpf_event(prog, PERF_BPF_EVENT_PROG_UNLOAD, 0);
bpf_audit_prog(prog, BPF_AUDIT_UNLOAD);
/* bpf_prog_free_id() must be called first */
bpf_prog_free_id(prog, do_idr_lock);
__bpf_prog_put_noref(prog, true);
@ -1830,6 +1862,7 @@ static int bpf_prog_load(union bpf_attr *attr, union bpf_attr __user *uattr)
*/
bpf_prog_kallsyms_add(prog);
perf_event_bpf_event(prog, PERF_BPF_EVENT_PROG_LOAD, 0);
bpf_audit_prog(prog, BPF_AUDIT_LOAD);
err = bpf_prog_new_fd(prog);
if (err < 0)
@ -2040,10 +2073,10 @@ static int bpf_prog_attach_check_attach_type(const struct bpf_prog *prog,
}
}
#define BPF_PROG_ATTACH_LAST_FIELD attach_flags
#define BPF_PROG_ATTACH_LAST_FIELD replace_bpf_fd
#define BPF_F_ATTACH_MASK \
(BPF_F_ALLOW_OVERRIDE | BPF_F_ALLOW_MULTI)
(BPF_F_ALLOW_OVERRIDE | BPF_F_ALLOW_MULTI | BPF_F_REPLACE)
static int bpf_prog_attach(const union bpf_attr *attr)
{
@ -2305,6 +2338,23 @@ static int bpf_obj_get_next_id(const union bpf_attr *attr,
#define BPF_PROG_GET_FD_BY_ID_LAST_FIELD prog_id
struct bpf_prog *bpf_prog_by_id(u32 id)
{
struct bpf_prog *prog;
if (!id)
return ERR_PTR(-ENOENT);
spin_lock_bh(&prog_idr_lock);
prog = idr_find(&prog_idr, id);
if (prog)
prog = bpf_prog_inc_not_zero(prog);
else
prog = ERR_PTR(-ENOENT);
spin_unlock_bh(&prog_idr_lock);
return prog;
}
static int bpf_prog_get_fd_by_id(const union bpf_attr *attr)
{
struct bpf_prog *prog;
@ -2317,14 +2367,7 @@ static int bpf_prog_get_fd_by_id(const union bpf_attr *attr)
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
spin_lock_bh(&prog_idr_lock);
prog = idr_find(&prog_idr, id);
if (prog)
prog = bpf_prog_inc_not_zero(prog);
else
prog = ERR_PTR(-ENOENT);
spin_unlock_bh(&prog_idr_lock);
prog = bpf_prog_by_id(id);
if (IS_ERR(prog))
return PTR_ERR(prog);

View File

@ -14,6 +14,22 @@ static struct hlist_head trampoline_table[TRAMPOLINE_TABLE_SIZE];
/* serializes access to trampoline_table */
static DEFINE_MUTEX(trampoline_mutex);
void *bpf_jit_alloc_exec_page(void)
{
void *image;
image = bpf_jit_alloc_exec(PAGE_SIZE);
if (!image)
return NULL;
set_vm_flush_reset_perms(image);
/* Keep image as writeable. The alternative is to keep flipping ro/rw
* everytime new program is attached or detached.
*/
set_memory_x((long)image, 1);
return image;
}
struct bpf_trampoline *bpf_trampoline_lookup(u64 key)
{
struct bpf_trampoline *tr;
@ -34,7 +50,7 @@ struct bpf_trampoline *bpf_trampoline_lookup(u64 key)
goto out;
/* is_root was checked earlier. No need for bpf_jit_charge_modmem() */
image = bpf_jit_alloc_exec(PAGE_SIZE);
image = bpf_jit_alloc_exec_page();
if (!image) {
kfree(tr);
tr = NULL;
@ -48,12 +64,6 @@ struct bpf_trampoline *bpf_trampoline_lookup(u64 key)
mutex_init(&tr->mutex);
for (i = 0; i < BPF_TRAMP_MAX; i++)
INIT_HLIST_HEAD(&tr->progs_hlist[i]);
set_vm_flush_reset_perms(image);
/* Keep image as writeable. The alternative is to keep flipping ro/rw
* everytime new program is attached or detached.
*/
set_memory_x((long)image, 1);
tr->image = image;
out:
mutex_unlock(&trampoline_mutex);

View File

@ -72,9 +72,9 @@ static void xsk_map_sock_delete(struct xdp_sock *xs,
static struct bpf_map *xsk_map_alloc(union bpf_attr *attr)
{
struct bpf_map_memory mem;
int cpu, err, numa_node;
int err, numa_node;
struct xsk_map *m;
u64 cost, size;
u64 size;
if (!capable(CAP_NET_ADMIN))
return ERR_PTR(-EPERM);
@ -86,9 +86,8 @@ static struct bpf_map *xsk_map_alloc(union bpf_attr *attr)
numa_node = bpf_map_attr_numa_node(attr);
size = struct_size(m, xsk_map, attr->max_entries);
cost = size + array_size(sizeof(*m->flush_list), num_possible_cpus());
err = bpf_map_charge_init(&mem, cost);
err = bpf_map_charge_init(&mem, size);
if (err < 0)
return ERR_PTR(err);
@ -102,16 +101,6 @@ static struct bpf_map *xsk_map_alloc(union bpf_attr *attr)
bpf_map_charge_move(&m->map.memory, &mem);
spin_lock_init(&m->lock);
m->flush_list = alloc_percpu(struct list_head);
if (!m->flush_list) {
bpf_map_charge_finish(&m->map.memory);
bpf_map_area_free(m);
return ERR_PTR(-ENOMEM);
}
for_each_possible_cpu(cpu)
INIT_LIST_HEAD(per_cpu_ptr(m->flush_list, cpu));
return &m->map;
}
@ -121,7 +110,6 @@ static void xsk_map_free(struct bpf_map *map)
bpf_clear_redirect_map(map);
synchronize_net();
free_percpu(m->flush_list);
bpf_map_area_free(m);
}

View File

@ -6288,12 +6288,13 @@ void cgroup_sk_free(struct sock_cgroup_data *skcd)
#ifdef CONFIG_CGROUP_BPF
int cgroup_bpf_attach(struct cgroup *cgrp, struct bpf_prog *prog,
enum bpf_attach_type type, u32 flags)
struct bpf_prog *replace_prog, enum bpf_attach_type type,
u32 flags)
{
int ret;
mutex_lock(&cgroup_mutex);
ret = __cgroup_bpf_attach(cgrp, prog, type, flags);
ret = __cgroup_bpf_attach(cgrp, prog, replace_prog, type, flags);
mutex_unlock(&cgroup_mutex);
return ret;
}

View File

@ -15,7 +15,7 @@
#include <trace/events/bpf_test_run.h>
static int bpf_test_run(struct bpf_prog *prog, void *ctx, u32 repeat,
u32 *retval, u32 *time)
u32 *retval, u32 *time, bool xdp)
{
struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE] = { NULL };
enum bpf_cgroup_storage_type stype;
@ -41,7 +41,11 @@ static int bpf_test_run(struct bpf_prog *prog, void *ctx, u32 repeat,
time_start = ktime_get_ns();
for (i = 0; i < repeat; i++) {
bpf_cgroup_storage_set(storage);
*retval = BPF_PROG_RUN(prog, ctx);
if (xdp)
*retval = bpf_prog_run_xdp(prog, ctx);
else
*retval = BPF_PROG_RUN(prog, ctx);
if (signal_pending(current)) {
ret = -EINTR;
@ -247,34 +251,53 @@ static int convert___skb_to_skb(struct sk_buff *skb, struct __sk_buff *__skb)
return 0;
/* make sure the fields we don't use are zeroed */
if (!range_is_zero(__skb, 0, offsetof(struct __sk_buff, priority)))
if (!range_is_zero(__skb, 0, offsetof(struct __sk_buff, mark)))
return -EINVAL;
/* mark is allowed */
if (!range_is_zero(__skb, offsetofend(struct __sk_buff, mark),
offsetof(struct __sk_buff, priority)))
return -EINVAL;
/* priority is allowed */
if (!range_is_zero(__skb, offsetof(struct __sk_buff, priority) +
sizeof_field(struct __sk_buff, priority),
if (!range_is_zero(__skb, offsetofend(struct __sk_buff, priority),
offsetof(struct __sk_buff, cb)))
return -EINVAL;
/* cb is allowed */
if (!range_is_zero(__skb, offsetof(struct __sk_buff, cb) +
sizeof_field(struct __sk_buff, cb),
if (!range_is_zero(__skb, offsetofend(struct __sk_buff, cb),
offsetof(struct __sk_buff, tstamp)))
return -EINVAL;
/* tstamp is allowed */
/* wire_len is allowed */
/* gso_segs is allowed */
if (!range_is_zero(__skb, offsetof(struct __sk_buff, tstamp) +
sizeof_field(struct __sk_buff, tstamp),
if (!range_is_zero(__skb, offsetofend(struct __sk_buff, gso_segs),
sizeof(struct __sk_buff)))
return -EINVAL;
skb->mark = __skb->mark;
skb->priority = __skb->priority;
skb->tstamp = __skb->tstamp;
memcpy(&cb->data, __skb->cb, QDISC_CB_PRIV_LEN);
if (__skb->wire_len == 0) {
cb->pkt_len = skb->len;
} else {
if (__skb->wire_len < skb->len ||
__skb->wire_len > GSO_MAX_SIZE)
return -EINVAL;
cb->pkt_len = __skb->wire_len;
}
if (__skb->gso_segs > GSO_MAX_SEGS)
return -EINVAL;
skb_shinfo(skb)->gso_segs = __skb->gso_segs;
return 0;
}
@ -285,9 +308,12 @@ static void convert_skb_to___skb(struct sk_buff *skb, struct __sk_buff *__skb)
if (!__skb)
return;
__skb->mark = skb->mark;
__skb->priority = skb->priority;
__skb->tstamp = skb->tstamp;
memcpy(__skb->cb, &cb->data, QDISC_CB_PRIV_LEN);
__skb->wire_len = cb->pkt_len;
__skb->gso_segs = skb_shinfo(skb)->gso_segs;
}
int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr,
@ -359,7 +385,7 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr,
ret = convert___skb_to_skb(skb, ctx);
if (ret)
goto out;
ret = bpf_test_run(prog, skb, repeat, &retval, &duration);
ret = bpf_test_run(prog, skb, repeat, &retval, &duration, false);
if (ret)
goto out;
if (!is_l2) {
@ -416,8 +442,8 @@ int bpf_prog_test_run_xdp(struct bpf_prog *prog, const union bpf_attr *kattr,
rxqueue = __netif_get_rx_queue(current->nsproxy->net_ns->loopback_dev, 0);
xdp.rxq = &rxqueue->xdp_rxq;
ret = bpf_test_run(prog, &xdp, repeat, &retval, &duration);
bpf_prog_change_xdp(NULL, prog);
ret = bpf_test_run(prog, &xdp, repeat, &retval, &duration, true);
if (ret)
goto out;
if (xdp.data != data + XDP_PACKET_HEADROOM + NET_IP_ALIGN ||
@ -425,6 +451,7 @@ int bpf_prog_test_run_xdp(struct bpf_prog *prog, const union bpf_attr *kattr,
size = xdp.data_end - xdp.data;
ret = bpf_test_finish(kattr, uattr, xdp.data, size, retval, duration);
out:
bpf_prog_change_xdp(prog, NULL);
kfree(data);
return ret;
}
@ -437,8 +464,7 @@ static int verify_user_bpf_flow_keys(struct bpf_flow_keys *ctx)
/* flags is allowed */
if (!range_is_zero(ctx, offsetof(struct bpf_flow_keys, flags) +
sizeof_field(struct bpf_flow_keys, flags),
if (!range_is_zero(ctx, offsetofend(struct bpf_flow_keys, flags),
sizeof(struct bpf_flow_keys)))
return -EINVAL;

View File

@ -8542,7 +8542,17 @@ static int dev_xdp_install(struct net_device *dev, bpf_op_t bpf_op,
struct netlink_ext_ack *extack, u32 flags,
struct bpf_prog *prog)
{
bool non_hw = !(flags & XDP_FLAGS_HW_MODE);
struct bpf_prog *prev_prog = NULL;
struct netdev_bpf xdp;
int err;
if (non_hw) {
prev_prog = bpf_prog_by_id(__dev_xdp_query(dev, bpf_op,
XDP_QUERY_PROG));
if (IS_ERR(prev_prog))
prev_prog = NULL;
}
memset(&xdp, 0, sizeof(xdp));
if (flags & XDP_FLAGS_HW_MODE)
@ -8553,7 +8563,14 @@ static int dev_xdp_install(struct net_device *dev, bpf_op_t bpf_op,
xdp.flags = flags;
xdp.prog = prog;
return bpf_op(dev, &xdp);
err = bpf_op(dev, &xdp);
if (!err && non_hw)
bpf_prog_change_xdp(prev_prog, prog);
if (prev_prog)
bpf_prog_put(prev_prog);
return err;
}
static void dev_xdp_uninstall(struct net_device *dev)

View File

@ -3511,36 +3511,16 @@ xdp_do_redirect_slow(struct net_device *dev, struct xdp_buff *xdp,
}
static int __bpf_tx_xdp_map(struct net_device *dev_rx, void *fwd,
struct bpf_map *map,
struct xdp_buff *xdp,
u32 index)
struct bpf_map *map, struct xdp_buff *xdp)
{
int err;
switch (map->map_type) {
case BPF_MAP_TYPE_DEVMAP:
case BPF_MAP_TYPE_DEVMAP_HASH: {
struct bpf_dtab_netdev *dst = fwd;
err = dev_map_enqueue(dst, xdp, dev_rx);
if (unlikely(err))
return err;
break;
}
case BPF_MAP_TYPE_CPUMAP: {
struct bpf_cpu_map_entry *rcpu = fwd;
err = cpu_map_enqueue(rcpu, xdp, dev_rx);
if (unlikely(err))
return err;
break;
}
case BPF_MAP_TYPE_XSKMAP: {
struct xdp_sock *xs = fwd;
err = __xsk_map_redirect(map, xdp, xs);
return err;
}
case BPF_MAP_TYPE_DEVMAP_HASH:
return dev_map_enqueue(fwd, xdp, dev_rx);
case BPF_MAP_TYPE_CPUMAP:
return cpu_map_enqueue(fwd, xdp, dev_rx);
case BPF_MAP_TYPE_XSKMAP:
return __xsk_map_redirect(fwd, xdp);
default:
break;
}
@ -3549,26 +3529,9 @@ static int __bpf_tx_xdp_map(struct net_device *dev_rx, void *fwd,
void xdp_do_flush_map(void)
{
struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
struct bpf_map *map = ri->map_to_flush;
ri->map_to_flush = NULL;
if (map) {
switch (map->map_type) {
case BPF_MAP_TYPE_DEVMAP:
case BPF_MAP_TYPE_DEVMAP_HASH:
__dev_map_flush(map);
break;
case BPF_MAP_TYPE_CPUMAP:
__cpu_map_flush(map);
break;
case BPF_MAP_TYPE_XSKMAP:
__xsk_map_flush(map);
break;
default:
break;
}
}
__dev_map_flush();
__cpu_map_flush();
__xsk_map_flush();
}
EXPORT_SYMBOL_GPL(xdp_do_flush_map);
@ -3617,14 +3580,10 @@ static int xdp_do_redirect_map(struct net_device *dev, struct xdp_buff *xdp,
ri->tgt_value = NULL;
WRITE_ONCE(ri->map, NULL);
if (ri->map_to_flush && unlikely(ri->map_to_flush != map))
xdp_do_flush_map();
err = __bpf_tx_xdp_map(dev, fwd, map, xdp, index);
err = __bpf_tx_xdp_map(dev, fwd, map, xdp);
if (unlikely(err))
goto err;
ri->map_to_flush = map;
_trace_xdp_redirect_map(dev, xdp_prog, fwd, map, index);
return 0;
err:
@ -8941,3 +8900,11 @@ const struct bpf_verifier_ops sk_reuseport_verifier_ops = {
const struct bpf_prog_ops sk_reuseport_prog_ops = {
};
#endif /* CONFIG_INET */
DEFINE_BPF_DISPATCHER(bpf_dispatcher_xdp)
void bpf_prog_change_xdp(struct bpf_prog *prev_prog, struct bpf_prog *prog)
{
bpf_dispatcher_change_prog(BPF_DISPATCHER_PTR(bpf_dispatcher_xdp),
prev_prog, prog);
}

View File

@ -31,6 +31,8 @@
#define TX_BATCH_SIZE 16
static DEFINE_PER_CPU(struct list_head, xskmap_flush_list);
bool xsk_is_setup_for_bpf_map(struct xdp_sock *xs)
{
return READ_ONCE(xs->rx) && READ_ONCE(xs->umem) &&
@ -39,21 +41,21 @@ bool xsk_is_setup_for_bpf_map(struct xdp_sock *xs)
bool xsk_umem_has_addrs(struct xdp_umem *umem, u32 cnt)
{
return xskq_has_addrs(umem->fq, cnt);
return xskq_cons_has_entries(umem->fq, cnt);
}
EXPORT_SYMBOL(xsk_umem_has_addrs);
u64 *xsk_umem_peek_addr(struct xdp_umem *umem, u64 *addr)
bool xsk_umem_peek_addr(struct xdp_umem *umem, u64 *addr)
{
return xskq_peek_addr(umem->fq, addr, umem);
return xskq_cons_peek_addr(umem->fq, addr, umem);
}
EXPORT_SYMBOL(xsk_umem_peek_addr);
void xsk_umem_discard_addr(struct xdp_umem *umem)
void xsk_umem_release_addr(struct xdp_umem *umem)
{
xskq_discard_addr(umem->fq);
xskq_cons_release(umem->fq);
}
EXPORT_SYMBOL(xsk_umem_discard_addr);
EXPORT_SYMBOL(xsk_umem_release_addr);
void xsk_set_rx_need_wakeup(struct xdp_umem *umem)
{
@ -124,7 +126,7 @@ static void __xsk_rcv_memcpy(struct xdp_umem *umem, u64 addr, void *from_buf,
void *to_buf = xdp_umem_get_data(umem, addr);
addr = xsk_umem_add_offset_to_addr(addr);
if (xskq_crosses_non_contig_pg(umem, addr, len + metalen)) {
if (xskq_cons_crosses_non_contig_pg(umem, addr, len + metalen)) {
void *next_pg_addr = umem->pages[(addr >> PAGE_SHIFT) + 1].addr;
u64 page_start = addr & ~(PAGE_SIZE - 1);
u64 first_len = PAGE_SIZE - (addr - page_start);
@ -146,7 +148,7 @@ static int __xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp, u32 len)
u32 metalen;
int err;
if (!xskq_peek_addr(xs->umem->fq, &addr, xs->umem) ||
if (!xskq_cons_peek_addr(xs->umem->fq, &addr, xs->umem) ||
len > xs->umem->chunk_size_nohr - XDP_PACKET_HEADROOM) {
xs->rx_dropped++;
return -ENOSPC;
@ -165,9 +167,9 @@ static int __xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp, u32 len)
offset += metalen;
addr = xsk_umem_adjust_offset(xs->umem, addr, offset);
err = xskq_produce_batch_desc(xs->rx, addr, len);
err = xskq_prod_reserve_desc(xs->rx, addr, len);
if (!err) {
xskq_discard_addr(xs->umem->fq);
xskq_cons_release(xs->umem->fq);
xdp_return_buff(xdp);
return 0;
}
@ -178,7 +180,7 @@ static int __xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp, u32 len)
static int __xsk_rcv_zc(struct xdp_sock *xs, struct xdp_buff *xdp, u32 len)
{
int err = xskq_produce_batch_desc(xs->rx, (u64)xdp->handle, len);
int err = xskq_prod_reserve_desc(xs->rx, xdp->handle, len);
if (err)
xs->rx_dropped++;
@ -214,7 +216,7 @@ static int xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
static void xsk_flush(struct xdp_sock *xs)
{
xskq_produce_flush_desc(xs->rx);
xskq_prod_submit(xs->rx);
xs->sk.sk_data_ready(&xs->sk);
}
@ -234,7 +236,7 @@ int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
goto out_unlock;
}
if (!xskq_peek_addr(xs->umem->fq, &addr, xs->umem) ||
if (!xskq_cons_peek_addr(xs->umem->fq, &addr, xs->umem) ||
len > xs->umem->chunk_size_nohr - XDP_PACKET_HEADROOM) {
err = -ENOSPC;
goto out_drop;
@ -245,12 +247,12 @@ int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
memcpy(buffer, xdp->data_meta, len + metalen);
addr = xsk_umem_adjust_offset(xs->umem, addr, metalen);
err = xskq_produce_batch_desc(xs->rx, addr, len);
err = xskq_prod_reserve_desc(xs->rx, addr, len);
if (err)
goto out_drop;
xskq_discard_addr(xs->umem->fq);
xskq_produce_flush_desc(xs->rx);
xskq_cons_release(xs->umem->fq);
xskq_prod_submit(xs->rx);
spin_unlock_bh(&xs->rx_lock);
@ -264,11 +266,9 @@ int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
return err;
}
int __xsk_map_redirect(struct bpf_map *map, struct xdp_buff *xdp,
struct xdp_sock *xs)
int __xsk_map_redirect(struct xdp_sock *xs, struct xdp_buff *xdp)
{
struct xsk_map *m = container_of(map, struct xsk_map, map);
struct list_head *flush_list = this_cpu_ptr(m->flush_list);
struct list_head *flush_list = this_cpu_ptr(&xskmap_flush_list);
int err;
err = xsk_rcv(xs, xdp);
@ -281,10 +281,9 @@ int __xsk_map_redirect(struct bpf_map *map, struct xdp_buff *xdp,
return 0;
}
void __xsk_map_flush(struct bpf_map *map)
void __xsk_map_flush(void)
{
struct xsk_map *m = container_of(map, struct xsk_map, map);
struct list_head *flush_list = this_cpu_ptr(m->flush_list);
struct list_head *flush_list = this_cpu_ptr(&xskmap_flush_list);
struct xdp_sock *xs, *tmp;
list_for_each_entry_safe(xs, tmp, flush_list, flush_node) {
@ -295,7 +294,7 @@ void __xsk_map_flush(struct bpf_map *map)
void xsk_umem_complete_tx(struct xdp_umem *umem, u32 nb_entries)
{
xskq_produce_flush_addr_n(umem->cq, nb_entries);
xskq_prod_submit_n(umem->cq, nb_entries);
}
EXPORT_SYMBOL(xsk_umem_complete_tx);
@ -317,13 +316,18 @@ bool xsk_umem_consume_tx(struct xdp_umem *umem, struct xdp_desc *desc)
rcu_read_lock();
list_for_each_entry_rcu(xs, &umem->xsk_list, list) {
if (!xskq_peek_desc(xs->tx, desc, umem))
if (!xskq_cons_peek_desc(xs->tx, desc, umem))
continue;
if (xskq_produce_addr_lazy(umem->cq, desc->addr))
/* This is the backpreassure mechanism for the Tx path.
* Reserve space in the completion queue and only proceed
* if there is space in it. This avoids having to implement
* any buffering in the Tx path.
*/
if (xskq_prod_reserve_addr(umem->cq, desc->addr))
goto out;
xskq_discard_desc(xs->tx);
xskq_cons_release(xs->tx);
rcu_read_unlock();
return true;
}
@ -358,7 +362,7 @@ static void xsk_destruct_skb(struct sk_buff *skb)
unsigned long flags;
spin_lock_irqsave(&xs->tx_completion_lock, flags);
WARN_ON_ONCE(xskq_produce_addr(xs->umem->cq, addr));
xskq_prod_submit_addr(xs->umem->cq, addr);
spin_unlock_irqrestore(&xs->tx_completion_lock, flags);
sock_wfree(skb);
@ -378,7 +382,7 @@ static int xsk_generic_xmit(struct sock *sk)
if (xs->queue_id >= xs->dev->real_num_tx_queues)
goto out;
while (xskq_peek_desc(xs->tx, &desc, xs->umem)) {
while (xskq_cons_peek_desc(xs->tx, &desc, xs->umem)) {
char *buffer;
u64 addr;
u32 len;
@ -399,7 +403,12 @@ static int xsk_generic_xmit(struct sock *sk)
addr = desc.addr;
buffer = xdp_umem_get_data(xs->umem, addr);
err = skb_store_bits(skb, 0, buffer, len);
if (unlikely(err) || xskq_reserve_addr(xs->umem->cq)) {
/* This is the backpreassure mechanism for the Tx path.
* Reserve space in the completion queue and only proceed
* if there is space in it. This avoids having to implement
* any buffering in the Tx path.
*/
if (unlikely(err) || xskq_prod_reserve(xs->umem->cq)) {
kfree_skb(skb);
goto out;
}
@ -411,7 +420,7 @@ static int xsk_generic_xmit(struct sock *sk)
skb->destructor = xsk_destruct_skb;
err = dev_direct_xmit(skb, xs->queue_id);
xskq_discard_desc(xs->tx);
xskq_cons_release(xs->tx);
/* Ignore NET_XMIT_CN as packet might have been sent */
if (err == NET_XMIT_DROP || err == NETDEV_TX_BUSY) {
/* SKB completed but not sent */
@ -477,9 +486,9 @@ static __poll_t xsk_poll(struct file *file, struct socket *sock,
__xsk_sendmsg(sk);
}
if (xs->rx && !xskq_empty_desc(xs->rx))
if (xs->rx && !xskq_prod_is_empty(xs->rx))
mask |= EPOLLIN | EPOLLRDNORM;
if (xs->tx && !xskq_full_desc(xs->tx))
if (xs->tx && !xskq_cons_is_full(xs->tx))
mask |= EPOLLOUT | EPOLLWRNORM;
return mask;
@ -1183,7 +1192,7 @@ static struct pernet_operations xsk_net_ops = {
static int __init xsk_init(void)
{
int err;
int err, cpu;
err = proto_register(&xsk_proto, 0 /* no slab */);
if (err)
@ -1201,6 +1210,8 @@ static int __init xsk_init(void)
if (err)
goto out_pernet;
for_each_possible_cpu(cpu)
INIT_LIST_HEAD(&per_cpu(xskmap_flush_list, cpu));
return 0;
out_pernet:

View File

@ -18,14 +18,14 @@ void xskq_set_umem(struct xsk_queue *q, u64 size, u64 chunk_mask)
q->chunk_mask = chunk_mask;
}
static u32 xskq_umem_get_ring_size(struct xsk_queue *q)
static size_t xskq_get_ring_size(struct xsk_queue *q, bool umem_queue)
{
return sizeof(struct xdp_umem_ring) + q->nentries * sizeof(u64);
}
struct xdp_umem_ring *umem_ring;
struct xdp_rxtx_ring *rxtx_ring;
static u32 xskq_rxtx_get_ring_size(struct xsk_queue *q)
{
return sizeof(struct xdp_ring) + q->nentries * sizeof(struct xdp_desc);
if (umem_queue)
return struct_size(umem_ring, desc, q->nentries);
return struct_size(rxtx_ring, desc, q->nentries);
}
struct xsk_queue *xskq_create(u32 nentries, bool umem_queue)
@ -43,8 +43,7 @@ struct xsk_queue *xskq_create(u32 nentries, bool umem_queue)
gfp_flags = GFP_KERNEL | __GFP_ZERO | __GFP_NOWARN |
__GFP_COMP | __GFP_NORETRY;
size = umem_queue ? xskq_umem_get_ring_size(q) :
xskq_rxtx_get_ring_size(q);
size = xskq_get_ring_size(q, umem_queue);
q->ring = (struct xdp_ring *)__get_free_pages(gfp_flags,
get_order(size));

View File

@ -10,9 +10,6 @@
#include <linux/if_xdp.h>
#include <net/xdp_sock.h>
#define RX_BATCH_SIZE 16
#define LAZY_UPDATE_THRESHOLD 128
struct xdp_ring {
u32 producer ____cacheline_aligned_in_smp;
u32 consumer ____cacheline_aligned_in_smp;
@ -36,10 +33,8 @@ struct xsk_queue {
u64 size;
u32 ring_mask;
u32 nentries;
u32 prod_head;
u32 prod_tail;
u32 cons_head;
u32 cons_tail;
u32 cached_prod;
u32 cached_cons;
struct xdp_ring *ring;
u64 invalid_descs;
};
@ -86,56 +81,31 @@ struct xsk_queue {
* now and again after circling through the ring.
*/
/* Common functions operating for both RXTX and umem queues */
/* The operations on the rings are the following:
*
* producer consumer
*
* RESERVE entries PEEK in the ring for entries
* WRITE data into the ring READ data from the ring
* SUBMIT entries RELEASE entries
*
* The producer reserves one or more entries in the ring. It can then
* fill in these entries and finally submit them so that they can be
* seen and read by the consumer.
*
* The consumer peeks into the ring to see if the producer has written
* any new entries. If so, the producer can then read these entries
* and when it is done reading them release them back to the producer
* so that the producer can use these slots to fill in new entries.
*
* The function names below reflect these operations.
*/
static inline u64 xskq_nb_invalid_descs(struct xsk_queue *q)
{
return q ? q->invalid_descs : 0;
}
/* Functions that read and validate content from consumer rings. */
static inline u32 xskq_nb_avail(struct xsk_queue *q, u32 dcnt)
{
u32 entries = q->prod_tail - q->cons_tail;
if (entries == 0) {
/* Refresh the local pointer */
q->prod_tail = READ_ONCE(q->ring->producer);
entries = q->prod_tail - q->cons_tail;
}
return (entries > dcnt) ? dcnt : entries;
}
static inline u32 xskq_nb_free(struct xsk_queue *q, u32 producer, u32 dcnt)
{
u32 free_entries = q->nentries - (producer - q->cons_tail);
if (free_entries >= dcnt)
return free_entries;
/* Refresh the local tail pointer */
q->cons_tail = READ_ONCE(q->ring->consumer);
return q->nentries - (producer - q->cons_tail);
}
static inline bool xskq_has_addrs(struct xsk_queue *q, u32 cnt)
{
u32 entries = q->prod_tail - q->cons_tail;
if (entries >= cnt)
return true;
/* Refresh the local pointer. */
q->prod_tail = READ_ONCE(q->ring->producer);
entries = q->prod_tail - q->cons_tail;
return entries >= cnt;
}
/* UMEM queue */
static inline bool xskq_crosses_non_contig_pg(struct xdp_umem *umem, u64 addr,
u64 length)
static inline bool xskq_cons_crosses_non_contig_pg(struct xdp_umem *umem,
u64 addr,
u64 length)
{
bool cross_pg = (addr & (PAGE_SIZE - 1)) + length > PAGE_SIZE;
bool next_pg_contig =
@ -145,7 +115,24 @@ static inline bool xskq_crosses_non_contig_pg(struct xdp_umem *umem, u64 addr,
return cross_pg && !next_pg_contig;
}
static inline bool xskq_is_valid_addr(struct xsk_queue *q, u64 addr)
static inline bool xskq_cons_is_valid_unaligned(struct xsk_queue *q,
u64 addr,
u64 length,
struct xdp_umem *umem)
{
u64 base_addr = xsk_umem_extract_addr(addr);
addr = xsk_umem_add_offset_to_addr(addr);
if (base_addr >= q->size || addr >= q->size ||
xskq_cons_crosses_non_contig_pg(umem, addr, length)) {
q->invalid_descs++;
return false;
}
return true;
}
static inline bool xskq_cons_is_valid_addr(struct xsk_queue *q, u64 addr)
{
if (addr >= q->size) {
q->invalid_descs++;
@ -155,125 +142,40 @@ static inline bool xskq_is_valid_addr(struct xsk_queue *q, u64 addr)
return true;
}
static inline bool xskq_is_valid_addr_unaligned(struct xsk_queue *q, u64 addr,
u64 length,
struct xdp_umem *umem)
static inline bool xskq_cons_read_addr(struct xsk_queue *q, u64 *addr,
struct xdp_umem *umem)
{
u64 base_addr = xsk_umem_extract_addr(addr);
struct xdp_umem_ring *ring = (struct xdp_umem_ring *)q->ring;
addr = xsk_umem_add_offset_to_addr(addr);
if (base_addr >= q->size || addr >= q->size ||
xskq_crosses_non_contig_pg(umem, addr, length)) {
q->invalid_descs++;
return false;
}
while (q->cached_cons != q->cached_prod) {
u32 idx = q->cached_cons & q->ring_mask;
return true;
}
static inline u64 *xskq_validate_addr(struct xsk_queue *q, u64 *addr,
struct xdp_umem *umem)
{
while (q->cons_tail != q->cons_head) {
struct xdp_umem_ring *ring = (struct xdp_umem_ring *)q->ring;
unsigned int idx = q->cons_tail & q->ring_mask;
*addr = READ_ONCE(ring->desc[idx]) & q->chunk_mask;
*addr = ring->desc[idx] & q->chunk_mask;
if (umem->flags & XDP_UMEM_UNALIGNED_CHUNK_FLAG) {
if (xskq_is_valid_addr_unaligned(q, *addr,
if (xskq_cons_is_valid_unaligned(q, *addr,
umem->chunk_size_nohr,
umem))
return addr;
return true;
goto out;
}
if (xskq_is_valid_addr(q, *addr))
return addr;
if (xskq_cons_is_valid_addr(q, *addr))
return true;
out:
q->cons_tail++;
q->cached_cons++;
}
return NULL;
return false;
}
static inline u64 *xskq_peek_addr(struct xsk_queue *q, u64 *addr,
struct xdp_umem *umem)
{
if (q->cons_tail == q->cons_head) {
smp_mb(); /* D, matches A */
WRITE_ONCE(q->ring->consumer, q->cons_tail);
q->cons_head = q->cons_tail + xskq_nb_avail(q, RX_BATCH_SIZE);
/* Order consumer and data */
smp_rmb();
}
return xskq_validate_addr(q, addr, umem);
}
static inline void xskq_discard_addr(struct xsk_queue *q)
{
q->cons_tail++;
}
static inline int xskq_produce_addr(struct xsk_queue *q, u64 addr)
{
struct xdp_umem_ring *ring = (struct xdp_umem_ring *)q->ring;
if (xskq_nb_free(q, q->prod_tail, 1) == 0)
return -ENOSPC;
/* A, matches D */
ring->desc[q->prod_tail++ & q->ring_mask] = addr;
/* Order producer and data */
smp_wmb(); /* B, matches C */
WRITE_ONCE(q->ring->producer, q->prod_tail);
return 0;
}
static inline int xskq_produce_addr_lazy(struct xsk_queue *q, u64 addr)
{
struct xdp_umem_ring *ring = (struct xdp_umem_ring *)q->ring;
if (xskq_nb_free(q, q->prod_head, LAZY_UPDATE_THRESHOLD) == 0)
return -ENOSPC;
/* A, matches D */
ring->desc[q->prod_head++ & q->ring_mask] = addr;
return 0;
}
static inline void xskq_produce_flush_addr_n(struct xsk_queue *q,
u32 nb_entries)
{
/* Order producer and data */
smp_wmb(); /* B, matches C */
q->prod_tail += nb_entries;
WRITE_ONCE(q->ring->producer, q->prod_tail);
}
static inline int xskq_reserve_addr(struct xsk_queue *q)
{
if (xskq_nb_free(q, q->prod_head, 1) == 0)
return -ENOSPC;
/* A, matches D */
q->prod_head++;
return 0;
}
/* Rx/Tx queue */
static inline bool xskq_is_valid_desc(struct xsk_queue *q, struct xdp_desc *d,
struct xdp_umem *umem)
static inline bool xskq_cons_is_valid_desc(struct xsk_queue *q,
struct xdp_desc *d,
struct xdp_umem *umem)
{
if (umem->flags & XDP_UMEM_UNALIGNED_CHUNK_FLAG) {
if (!xskq_is_valid_addr_unaligned(q, d->addr, d->len, umem))
if (!xskq_cons_is_valid_unaligned(q, d->addr, d->len, umem))
return false;
if (d->len > umem->chunk_size_nohr || d->options) {
@ -284,7 +186,7 @@ static inline bool xskq_is_valid_desc(struct xsk_queue *q, struct xdp_desc *d,
return true;
}
if (!xskq_is_valid_addr(q, d->addr))
if (!xskq_cons_is_valid_addr(q, d->addr))
return false;
if (((d->addr + d->len) & q->chunk_mask) != (d->addr & q->chunk_mask) ||
@ -296,79 +198,184 @@ static inline bool xskq_is_valid_desc(struct xsk_queue *q, struct xdp_desc *d,
return true;
}
static inline struct xdp_desc *xskq_validate_desc(struct xsk_queue *q,
struct xdp_desc *desc,
struct xdp_umem *umem)
static inline bool xskq_cons_read_desc(struct xsk_queue *q,
struct xdp_desc *desc,
struct xdp_umem *umem)
{
while (q->cons_tail != q->cons_head) {
while (q->cached_cons != q->cached_prod) {
struct xdp_rxtx_ring *ring = (struct xdp_rxtx_ring *)q->ring;
unsigned int idx = q->cons_tail & q->ring_mask;
u32 idx = q->cached_cons & q->ring_mask;
*desc = READ_ONCE(ring->desc[idx]);
if (xskq_is_valid_desc(q, desc, umem))
return desc;
*desc = ring->desc[idx];
if (xskq_cons_is_valid_desc(q, desc, umem))
return true;
q->cons_tail++;
q->cached_cons++;
}
return NULL;
return false;
}
static inline struct xdp_desc *xskq_peek_desc(struct xsk_queue *q,
struct xdp_desc *desc,
struct xdp_umem *umem)
/* Functions for consumers */
static inline void __xskq_cons_release(struct xsk_queue *q)
{
if (q->cons_tail == q->cons_head) {
smp_mb(); /* D, matches A */
WRITE_ONCE(q->ring->consumer, q->cons_tail);
q->cons_head = q->cons_tail + xskq_nb_avail(q, RX_BATCH_SIZE);
/* Order consumer and data */
smp_rmb(); /* C, matches B */
}
return xskq_validate_desc(q, desc, umem);
smp_mb(); /* D, matches A */
WRITE_ONCE(q->ring->consumer, q->cached_cons);
}
static inline void xskq_discard_desc(struct xsk_queue *q)
static inline void __xskq_cons_peek(struct xsk_queue *q)
{
q->cons_tail++;
/* Refresh the local pointer */
q->cached_prod = READ_ONCE(q->ring->producer);
smp_rmb(); /* C, matches B */
}
static inline int xskq_produce_batch_desc(struct xsk_queue *q,
u64 addr, u32 len)
static inline void xskq_cons_get_entries(struct xsk_queue *q)
{
struct xdp_rxtx_ring *ring = (struct xdp_rxtx_ring *)q->ring;
unsigned int idx;
__xskq_cons_release(q);
__xskq_cons_peek(q);
}
if (xskq_nb_free(q, q->prod_head, 1) == 0)
static inline bool xskq_cons_has_entries(struct xsk_queue *q, u32 cnt)
{
u32 entries = q->cached_prod - q->cached_cons;
if (entries >= cnt)
return true;
__xskq_cons_peek(q);
entries = q->cached_prod - q->cached_cons;
return entries >= cnt;
}
static inline bool xskq_cons_peek_addr(struct xsk_queue *q, u64 *addr,
struct xdp_umem *umem)
{
if (q->cached_prod == q->cached_cons)
xskq_cons_get_entries(q);
return xskq_cons_read_addr(q, addr, umem);
}
static inline bool xskq_cons_peek_desc(struct xsk_queue *q,
struct xdp_desc *desc,
struct xdp_umem *umem)
{
if (q->cached_prod == q->cached_cons)
xskq_cons_get_entries(q);
return xskq_cons_read_desc(q, desc, umem);
}
static inline void xskq_cons_release(struct xsk_queue *q)
{
/* To improve performance, only update local state here.
* Reflect this to global state when we get new entries
* from the ring in xskq_cons_get_entries().
*/
q->cached_cons++;
}
static inline bool xskq_cons_is_full(struct xsk_queue *q)
{
/* No barriers needed since data is not accessed */
return READ_ONCE(q->ring->producer) - READ_ONCE(q->ring->consumer) ==
q->nentries;
}
/* Functions for producers */
static inline bool xskq_prod_is_full(struct xsk_queue *q)
{
u32 free_entries = q->nentries - (q->cached_prod - q->cached_cons);
if (free_entries)
return false;
/* Refresh the local tail pointer */
q->cached_cons = READ_ONCE(q->ring->consumer);
free_entries = q->nentries - (q->cached_prod - q->cached_cons);
return !free_entries;
}
static inline int xskq_prod_reserve(struct xsk_queue *q)
{
if (xskq_prod_is_full(q))
return -ENOSPC;
/* A, matches D */
idx = (q->prod_head++) & q->ring_mask;
q->cached_prod++;
return 0;
}
static inline int xskq_prod_reserve_addr(struct xsk_queue *q, u64 addr)
{
struct xdp_umem_ring *ring = (struct xdp_umem_ring *)q->ring;
if (xskq_prod_is_full(q))
return -ENOSPC;
/* A, matches D */
ring->desc[q->cached_prod++ & q->ring_mask] = addr;
return 0;
}
static inline int xskq_prod_reserve_desc(struct xsk_queue *q,
u64 addr, u32 len)
{
struct xdp_rxtx_ring *ring = (struct xdp_rxtx_ring *)q->ring;
u32 idx;
if (xskq_prod_is_full(q))
return -ENOSPC;
/* A, matches D */
idx = q->cached_prod++ & q->ring_mask;
ring->desc[idx].addr = addr;
ring->desc[idx].len = len;
return 0;
}
static inline void xskq_produce_flush_desc(struct xsk_queue *q)
static inline void __xskq_prod_submit(struct xsk_queue *q, u32 idx)
{
/* Order producer and data */
smp_wmb(); /* B, matches C */
q->prod_tail = q->prod_head;
WRITE_ONCE(q->ring->producer, q->prod_tail);
WRITE_ONCE(q->ring->producer, idx);
}
static inline bool xskq_full_desc(struct xsk_queue *q)
static inline void xskq_prod_submit(struct xsk_queue *q)
{
return xskq_nb_avail(q, q->nentries) == q->nentries;
__xskq_prod_submit(q, q->cached_prod);
}
static inline bool xskq_empty_desc(struct xsk_queue *q)
static inline void xskq_prod_submit_addr(struct xsk_queue *q, u64 addr)
{
return xskq_nb_free(q, q->prod_tail, q->nentries) == q->nentries;
struct xdp_umem_ring *ring = (struct xdp_umem_ring *)q->ring;
u32 idx = q->ring->producer;
ring->desc[idx++ & q->ring_mask] = addr;
__xskq_prod_submit(q, idx);
}
static inline void xskq_prod_submit_n(struct xsk_queue *q, u32 nb_entries)
{
__xskq_prod_submit(q, q->ring->producer + nb_entries);
}
static inline bool xskq_prod_is_empty(struct xsk_queue *q)
{
/* No barriers needed since data is not accessed */
return READ_ONCE(q->ring->consumer) == READ_ONCE(q->ring->producer);
}
/* For both producers and consumers */
static inline u64 xskq_nb_invalid_descs(struct xsk_queue *q)
{
return q ? q->invalid_descs : 0;
}
void xskq_set_umem(struct xsk_queue *q, u64 size, u64 chunk_mask);

View File

@ -38,6 +38,8 @@ tprogs-y += tc_l2_redirect
tprogs-y += lwt_len_hist
tprogs-y += xdp_tx_iptunnel
tprogs-y += test_map_in_map
tprogs-y += per_socket_stats_example
tprogs-y += xdp_redirect
tprogs-y += xdp_redirect_map
tprogs-y += xdp_redirect_cpu
tprogs-y += xdp_monitor
@ -196,7 +198,7 @@ endif
TPROGCFLAGS_bpf_load.o += -Wno-unused-variable
TPROGS_LDLIBS += $(LIBBPF) -lelf
TPROGS_LDLIBS += $(LIBBPF) -lelf -lz
TPROGLDLIBS_tracex4 += -lrt
TPROGLDLIBS_trace_output += -lrt
TPROGLDLIBS_map_perf_test += -lrt
@ -234,6 +236,7 @@ BTF_LLVM_PROBE := $(shell echo "int main() { return 0; }" | \
readelf -S ./llvm_btf_verify.o | grep BTF; \
/bin/rm -f ./llvm_btf_verify.o)
BPF_EXTRA_CFLAGS += -fno-stack-protector
ifneq ($(BTF_LLVM_PROBE),)
BPF_EXTRA_CFLAGS += -g
else

View File

@ -98,7 +98,7 @@ int main(int argc, char **argv)
xdp_flags |= XDP_FLAGS_SKB_MODE;
break;
case 'N':
xdp_flags |= XDP_FLAGS_DRV_MODE;
/* default, set below */
break;
case 'F':
xdp_flags &= ~XDP_FLAGS_UPDATE_IF_NOEXIST;
@ -109,6 +109,9 @@ int main(int argc, char **argv)
}
}
if (!(xdp_flags & XDP_FLAGS_SKB_MODE))
xdp_flags |= XDP_FLAGS_DRV_MODE;
if (optind == argc) {
usage(basename(argv[0]));
return 1;

View File

@ -120,7 +120,7 @@ int main(int argc, char **argv)
xdp_flags |= XDP_FLAGS_SKB_MODE;
break;
case 'N':
xdp_flags |= XDP_FLAGS_DRV_MODE;
/* default, set below */
break;
case 'F':
xdp_flags &= ~XDP_FLAGS_UPDATE_IF_NOEXIST;
@ -132,6 +132,9 @@ int main(int argc, char **argv)
opt_flags[opt] = 0;
}
if (!(xdp_flags & XDP_FLAGS_SKB_MODE))
xdp_flags |= XDP_FLAGS_DRV_MODE;
for (i = 0; i < strlen(optstr); i++) {
if (opt_flags[(unsigned int)optstr[i]]) {
fprintf(stderr, "Missing argument -%c\n", optstr[i]);

View File

@ -27,11 +27,13 @@
#include "libbpf.h"
#include <bpf/bpf.h>
static __u32 xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST;
static int do_attach(int idx, int prog_fd, int map_fd, const char *name)
{
int err;
err = bpf_set_link_xdp_fd(idx, prog_fd, 0);
err = bpf_set_link_xdp_fd(idx, prog_fd, xdp_flags);
if (err < 0) {
printf("ERROR: failed to attach program to %s\n", name);
return err;
@ -49,7 +51,7 @@ static int do_detach(int idx, const char *name)
{
int err;
err = bpf_set_link_xdp_fd(idx, -1, 0);
err = bpf_set_link_xdp_fd(idx, -1, xdp_flags);
if (err < 0)
printf("ERROR: failed to detach program from %s\n", name);
@ -83,11 +85,17 @@ int main(int argc, char **argv)
int attach = 1;
int ret = 0;
while ((opt = getopt(argc, argv, ":dD")) != -1) {
while ((opt = getopt(argc, argv, ":dDSF")) != -1) {
switch (opt) {
case 'd':
attach = 0;
break;
case 'S':
xdp_flags |= XDP_FLAGS_SKB_MODE;
break;
case 'F':
xdp_flags &= ~XDP_FLAGS_UPDATE_IF_NOEXIST;
break;
case 'D':
prog_name = "xdp_fwd_direct";
break;
@ -97,6 +105,9 @@ int main(int argc, char **argv)
}
}
if (!(xdp_flags & XDP_FLAGS_SKB_MODE))
xdp_flags |= XDP_FLAGS_DRV_MODE;
if (optind == argc) {
usage(basename(argv[0]));
return 1;

View File

@ -16,6 +16,10 @@ static const char *__doc__ =
#include <getopt.h>
#include <net/if.h>
#include <time.h>
#include <linux/limits.h>
#define __must_check
#include <linux/err.h>
#include <arpa/inet.h>
#include <linux/if_link.h>
@ -46,6 +50,10 @@ static int cpus_count_map_fd;
static int cpus_iterator_map_fd;
static int exception_cnt_map_fd;
#define NUM_TP 5
struct bpf_link *tp_links[NUM_TP] = { 0 };
static int tp_cnt = 0;
/* Exit return codes */
#define EXIT_OK 0
#define EXIT_FAIL 1
@ -88,6 +96,10 @@ static void int_exit(int sig)
printf("program on interface changed, not removing\n");
}
}
/* Detach tracepoints */
while (tp_cnt)
bpf_link__destroy(tp_links[--tp_cnt]);
exit(EXIT_OK);
}
@ -588,23 +600,61 @@ static void stats_poll(int interval, bool use_separators, char *prog_name,
free_stats_record(prev);
}
static struct bpf_link * attach_tp(struct bpf_object *obj,
const char *tp_category,
const char* tp_name)
{
struct bpf_program *prog;
struct bpf_link *link;
char sec_name[PATH_MAX];
int len;
len = snprintf(sec_name, PATH_MAX, "tracepoint/%s/%s",
tp_category, tp_name);
if (len < 0)
exit(EXIT_FAIL);
prog = bpf_object__find_program_by_title(obj, sec_name);
if (!prog) {
fprintf(stderr, "ERR: finding progsec: %s\n", sec_name);
exit(EXIT_FAIL_BPF);
}
link = bpf_program__attach_tracepoint(prog, tp_category, tp_name);
if (IS_ERR(link))
exit(EXIT_FAIL_BPF);
return link;
}
static void init_tracepoints(struct bpf_object *obj) {
tp_links[tp_cnt++] = attach_tp(obj, "xdp", "xdp_redirect_err");
tp_links[tp_cnt++] = attach_tp(obj, "xdp", "xdp_redirect_map_err");
tp_links[tp_cnt++] = attach_tp(obj, "xdp", "xdp_exception");
tp_links[tp_cnt++] = attach_tp(obj, "xdp", "xdp_cpumap_enqueue");
tp_links[tp_cnt++] = attach_tp(obj, "xdp", "xdp_cpumap_kthread");
}
static int init_map_fds(struct bpf_object *obj)
{
cpu_map_fd = bpf_object__find_map_fd_by_name(obj, "cpu_map");
rx_cnt_map_fd = bpf_object__find_map_fd_by_name(obj, "rx_cnt");
/* Maps updated by tracepoints */
redirect_err_cnt_map_fd =
bpf_object__find_map_fd_by_name(obj, "redirect_err_cnt");
exception_cnt_map_fd =
bpf_object__find_map_fd_by_name(obj, "exception_cnt");
cpumap_enqueue_cnt_map_fd =
bpf_object__find_map_fd_by_name(obj, "cpumap_enqueue_cnt");
cpumap_kthread_cnt_map_fd =
bpf_object__find_map_fd_by_name(obj, "cpumap_kthread_cnt");
/* Maps used by XDP */
rx_cnt_map_fd = bpf_object__find_map_fd_by_name(obj, "rx_cnt");
cpu_map_fd = bpf_object__find_map_fd_by_name(obj, "cpu_map");
cpus_available_map_fd =
bpf_object__find_map_fd_by_name(obj, "cpus_available");
cpus_count_map_fd = bpf_object__find_map_fd_by_name(obj, "cpus_count");
cpus_iterator_map_fd =
bpf_object__find_map_fd_by_name(obj, "cpus_iterator");
exception_cnt_map_fd =
bpf_object__find_map_fd_by_name(obj, "exception_cnt");
if (cpu_map_fd < 0 || rx_cnt_map_fd < 0 ||
redirect_err_cnt_map_fd < 0 || cpumap_enqueue_cnt_map_fd < 0 ||
@ -662,6 +712,7 @@ int main(int argc, char **argv)
strerror(errno));
return EXIT_FAIL;
}
init_tracepoints(obj);
if (init_map_fds(obj) < 0) {
fprintf(stderr, "bpf_object__find_map_fd_by_name failed\n");
return EXIT_FAIL;
@ -728,6 +779,10 @@ int main(int argc, char **argv)
return EXIT_FAIL_OPTION;
}
}
if (!(xdp_flags & XDP_FLAGS_SKB_MODE))
xdp_flags |= XDP_FLAGS_DRV_MODE;
/* Required option */
if (ifindex == -1) {
fprintf(stderr, "ERR: required option --dev missing\n");

View File

@ -116,7 +116,7 @@ int main(int argc, char **argv)
xdp_flags |= XDP_FLAGS_SKB_MODE;
break;
case 'N':
xdp_flags |= XDP_FLAGS_DRV_MODE;
/* default, set below */
break;
case 'F':
xdp_flags &= ~XDP_FLAGS_UPDATE_IF_NOEXIST;
@ -127,6 +127,9 @@ int main(int argc, char **argv)
}
}
if (!(xdp_flags & XDP_FLAGS_SKB_MODE))
xdp_flags |= XDP_FLAGS_DRV_MODE;
if (optind == argc) {
printf("usage: %s <IFNAME|IFINDEX>_IN <IFNAME|IFINDEX>_OUT\n", argv[0]);
return 1;

View File

@ -117,7 +117,7 @@ int main(int argc, char **argv)
xdp_flags |= XDP_FLAGS_SKB_MODE;
break;
case 'N':
xdp_flags |= XDP_FLAGS_DRV_MODE;
/* default, set below */
break;
case 'F':
xdp_flags &= ~XDP_FLAGS_UPDATE_IF_NOEXIST;
@ -128,6 +128,9 @@ int main(int argc, char **argv)
}
}
if (!(xdp_flags & XDP_FLAGS_SKB_MODE))
xdp_flags |= XDP_FLAGS_DRV_MODE;
if (optind == argc) {
printf("usage: %s <IFNAME|IFINDEX>_IN <IFNAME|IFINDEX>_OUT\n", argv[0]);
return 1;

View File

@ -662,6 +662,9 @@ int main(int ac, char **argv)
}
}
if (!(flags & XDP_FLAGS_SKB_MODE))
flags |= XDP_FLAGS_DRV_MODE;
if (optind == ac) {
usage(basename(argv[0]));
return 1;

View File

@ -551,6 +551,10 @@ int main(int argc, char **argv)
return EXIT_FAIL_OPTION;
}
}
if (!(xdp_flags & XDP_FLAGS_SKB_MODE))
xdp_flags |= XDP_FLAGS_DRV_MODE;
/* Required option */
if (ifindex == -1) {
fprintf(stderr, "ERR: required option --dev missing\n");

View File

@ -52,13 +52,13 @@ static int do_detach(int idx, const char *name)
__u32 curr_prog_id = 0;
int err = 0;
err = bpf_get_link_xdp_id(idx, &curr_prog_id, 0);
err = bpf_get_link_xdp_id(idx, &curr_prog_id, xdp_flags);
if (err) {
printf("bpf_get_link_xdp_id failed\n");
return err;
}
if (prog_id == curr_prog_id) {
err = bpf_set_link_xdp_fd(idx, -1, 0);
err = bpf_set_link_xdp_fd(idx, -1, xdp_flags);
if (err < 0)
printf("ERROR: failed to detach prog from %s\n", name);
} else if (!curr_prog_id) {
@ -115,7 +115,7 @@ int main(int argc, char **argv)
.prog_type = BPF_PROG_TYPE_XDP,
};
struct perf_buffer_opts pb_opts = {};
const char *optstr = "F";
const char *optstr = "FS";
int prog_fd, map_fd, opt;
struct bpf_object *obj;
struct bpf_map *map;
@ -127,12 +127,18 @@ int main(int argc, char **argv)
case 'F':
xdp_flags &= ~XDP_FLAGS_UPDATE_IF_NOEXIST;
break;
case 'S':
xdp_flags |= XDP_FLAGS_SKB_MODE;
break;
default:
usage(basename(argv[0]));
return 1;
}
}
if (!(xdp_flags & XDP_FLAGS_SKB_MODE))
xdp_flags |= XDP_FLAGS_DRV_MODE;
if (optind == argc) {
usage(basename(argv[0]));
return 1;

View File

@ -231,7 +231,7 @@ int main(int argc, char **argv)
xdp_flags |= XDP_FLAGS_SKB_MODE;
break;
case 'N':
xdp_flags |= XDP_FLAGS_DRV_MODE;
/* default, set below */
break;
case 'F':
xdp_flags &= ~XDP_FLAGS_UPDATE_IF_NOEXIST;
@ -243,6 +243,9 @@ int main(int argc, char **argv)
opt_flags[opt] = 0;
}
if (!(xdp_flags & XDP_FLAGS_SKB_MODE))
xdp_flags |= XDP_FLAGS_DRV_MODE;
for (i = 0; i < strlen(optstr); i++) {
if (opt_flags[(unsigned int)optstr[i]]) {
fprintf(stderr, "Missing argument -%c\n", optstr[i]);

View File

@ -10,6 +10,9 @@
#include <linux/if_link.h>
#include <linux/if_xdp.h>
#include <linux/if_ether.h>
#include <linux/ip.h>
#include <linux/udp.h>
#include <arpa/inet.h>
#include <locale.h>
#include <net/ethernet.h>
#include <net/if.h>
@ -45,12 +48,14 @@
#endif
#define NUM_FRAMES (4 * 1024)
#define BATCH_SIZE 64
#define MIN_PKT_SIZE 64
#define DEBUG_HEXDUMP 0
typedef __u64 u64;
typedef __u32 u32;
typedef __u16 u16;
typedef __u8 u8;
static unsigned long prev_time;
@ -65,6 +70,13 @@ static u32 opt_xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST;
static const char *opt_if = "";
static int opt_ifindex;
static int opt_queue;
static unsigned long opt_duration;
static unsigned long start_time;
static bool benchmark_done;
static u32 opt_batch_size = 64;
static int opt_pkt_count;
static u16 opt_pkt_size = MIN_PKT_SIZE;
static u32 opt_pkt_fill_pattern = 0x12345678;
static int opt_poll;
static int opt_interval = 1;
static u32 opt_xdp_bind_flags = XDP_USE_NEED_WAKEUP;
@ -167,10 +179,21 @@ static void dump_stats(void)
}
}
static bool is_benchmark_done(void)
{
if (opt_duration > 0) {
unsigned long dt = (get_nsecs() - start_time);
if (dt >= opt_duration)
benchmark_done = true;
}
return benchmark_done;
}
static void *poller(void *arg)
{
(void)arg;
for (;;) {
while (!is_benchmark_done()) {
sleep(opt_interval);
dump_stats();
}
@ -195,6 +218,11 @@ static void remove_xdp_program(void)
}
static void int_exit(int sig)
{
benchmark_done = true;
}
static void xdpsock_cleanup(void)
{
struct xsk_umem *umem = xsks[0]->umem->umem;
int i;
@ -204,8 +232,6 @@ static void int_exit(int sig)
xsk_socket__delete(xsks[i]->xsk);
(void)xsk_umem__delete(umem);
remove_xdp_program();
exit(EXIT_SUCCESS);
}
static void __exit_with_error(int error, const char *file, const char *func,
@ -220,13 +246,6 @@ static void __exit_with_error(int error, const char *file, const char *func,
#define exit_with_error(error) __exit_with_error(error, __FILE__, __func__, \
__LINE__)
static const char pkt_data[] =
"\x3c\xfd\xfe\x9e\x7f\x71\xec\xb1\xd7\x98\x3a\xc0\x08\x00\x45\x00"
"\x00\x2e\x00\x00\x00\x00\x40\x11\x88\x97\x05\x08\x07\x08\xc8\x14"
"\x1e\x04\x10\x92\x10\x92\x00\x1a\x6d\xa3\x34\x33\x1f\x69\x40\x6b"
"\x54\x59\xb6\x14\x2d\x11\x44\xbf\xaf\xd9\xbe\xaa";
static void swap_mac_addresses(void *data)
{
struct ether_header *eth = (struct ether_header *)data;
@ -274,11 +293,243 @@ static void hex_dump(void *pkt, size_t length, u64 addr)
printf("\n");
}
static size_t gen_eth_frame(struct xsk_umem_info *umem, u64 addr)
static void *memset32_htonl(void *dest, u32 val, u32 size)
{
u32 *ptr = (u32 *)dest;
int i;
val = htonl(val);
for (i = 0; i < (size & (~0x3)); i += 4)
ptr[i >> 2] = val;
for (; i < size; i++)
((char *)dest)[i] = ((char *)&val)[i & 3];
return dest;
}
/*
* This function code has been taken from
* Linux kernel lib/checksum.c
*/
static inline unsigned short from32to16(unsigned int x)
{
/* add up 16-bit and 16-bit for 16+c bit */
x = (x & 0xffff) + (x >> 16);
/* add up carry.. */
x = (x & 0xffff) + (x >> 16);
return x;
}
/*
* This function code has been taken from
* Linux kernel lib/checksum.c
*/
static unsigned int do_csum(const unsigned char *buff, int len)
{
unsigned int result = 0;
int odd;
if (len <= 0)
goto out;
odd = 1 & (unsigned long)buff;
if (odd) {
#ifdef __LITTLE_ENDIAN
result += (*buff << 8);
#else
result = *buff;
#endif
len--;
buff++;
}
if (len >= 2) {
if (2 & (unsigned long)buff) {
result += *(unsigned short *)buff;
len -= 2;
buff += 2;
}
if (len >= 4) {
const unsigned char *end = buff +
((unsigned int)len & ~3);
unsigned int carry = 0;
do {
unsigned int w = *(unsigned int *)buff;
buff += 4;
result += carry;
result += w;
carry = (w > result);
} while (buff < end);
result += carry;
result = (result & 0xffff) + (result >> 16);
}
if (len & 2) {
result += *(unsigned short *)buff;
buff += 2;
}
}
if (len & 1)
#ifdef __LITTLE_ENDIAN
result += *buff;
#else
result += (*buff << 8);
#endif
result = from32to16(result);
if (odd)
result = ((result >> 8) & 0xff) | ((result & 0xff) << 8);
out:
return result;
}
__sum16 ip_fast_csum(const void *iph, unsigned int ihl);
/*
* This is a version of ip_compute_csum() optimized for IP headers,
* which always checksum on 4 octet boundaries.
* This function code has been taken from
* Linux kernel lib/checksum.c
*/
__sum16 ip_fast_csum(const void *iph, unsigned int ihl)
{
return (__force __sum16)~do_csum(iph, ihl * 4);
}
/*
* Fold a partial checksum
* This function code has been taken from
* Linux kernel include/asm-generic/checksum.h
*/
static inline __sum16 csum_fold(__wsum csum)
{
u32 sum = (__force u32)csum;
sum = (sum & 0xffff) + (sum >> 16);
sum = (sum & 0xffff) + (sum >> 16);
return (__force __sum16)~sum;
}
/*
* This function code has been taken from
* Linux kernel lib/checksum.c
*/
static inline u32 from64to32(u64 x)
{
/* add up 32-bit and 32-bit for 32+c bit */
x = (x & 0xffffffff) + (x >> 32);
/* add up carry.. */
x = (x & 0xffffffff) + (x >> 32);
return (u32)x;
}
__wsum csum_tcpudp_nofold(__be32 saddr, __be32 daddr,
__u32 len, __u8 proto, __wsum sum);
/*
* This function code has been taken from
* Linux kernel lib/checksum.c
*/
__wsum csum_tcpudp_nofold(__be32 saddr, __be32 daddr,
__u32 len, __u8 proto, __wsum sum)
{
unsigned long long s = (__force u32)sum;
s += (__force u32)saddr;
s += (__force u32)daddr;
#ifdef __BIG_ENDIAN__
s += proto + len;
#else
s += (proto + len) << 8;
#endif
return (__force __wsum)from64to32(s);
}
/*
* This function has been taken from
* Linux kernel include/asm-generic/checksum.h
*/
static inline __sum16
csum_tcpudp_magic(__be32 saddr, __be32 daddr, __u32 len,
__u8 proto, __wsum sum)
{
return csum_fold(csum_tcpudp_nofold(saddr, daddr, len, proto, sum));
}
static inline u16 udp_csum(u32 saddr, u32 daddr, u32 len,
u8 proto, u16 *udp_pkt)
{
u32 csum = 0;
u32 cnt = 0;
/* udp hdr and data */
for (; cnt < len; cnt += 2)
csum += udp_pkt[cnt >> 1];
return csum_tcpudp_magic(saddr, daddr, len, proto, csum);
}
#define ETH_FCS_SIZE 4
#define PKT_HDR_SIZE (sizeof(struct ethhdr) + sizeof(struct iphdr) + \
sizeof(struct udphdr))
#define PKT_SIZE (opt_pkt_size - ETH_FCS_SIZE)
#define IP_PKT_SIZE (PKT_SIZE - sizeof(struct ethhdr))
#define UDP_PKT_SIZE (IP_PKT_SIZE - sizeof(struct iphdr))
#define UDP_PKT_DATA_SIZE (UDP_PKT_SIZE - sizeof(struct udphdr))
static u8 pkt_data[XSK_UMEM__DEFAULT_FRAME_SIZE];
static void gen_eth_hdr_data(void)
{
struct udphdr *udp_hdr = (struct udphdr *)(pkt_data +
sizeof(struct ethhdr) +
sizeof(struct iphdr));
struct iphdr *ip_hdr = (struct iphdr *)(pkt_data +
sizeof(struct ethhdr));
struct ethhdr *eth_hdr = (struct ethhdr *)pkt_data;
/* ethernet header */
memcpy(eth_hdr->h_dest, "\x3c\xfd\xfe\x9e\x7f\x71", ETH_ALEN);
memcpy(eth_hdr->h_source, "\xec\xb1\xd7\x98\x3a\xc0", ETH_ALEN);
eth_hdr->h_proto = htons(ETH_P_IP);
/* IP header */
ip_hdr->version = IPVERSION;
ip_hdr->ihl = 0x5; /* 20 byte header */
ip_hdr->tos = 0x0;
ip_hdr->tot_len = htons(IP_PKT_SIZE);
ip_hdr->id = 0;
ip_hdr->frag_off = 0;
ip_hdr->ttl = IPDEFTTL;
ip_hdr->protocol = IPPROTO_UDP;
ip_hdr->saddr = htonl(0x0a0a0a10);
ip_hdr->daddr = htonl(0x0a0a0a20);
/* IP header checksum */
ip_hdr->check = 0;
ip_hdr->check = ip_fast_csum((const void *)ip_hdr, ip_hdr->ihl);
/* UDP header */
udp_hdr->source = htons(0x1000);
udp_hdr->dest = htons(0x1000);
udp_hdr->len = htons(UDP_PKT_SIZE);
/* UDP data */
memset32_htonl(pkt_data + PKT_HDR_SIZE, opt_pkt_fill_pattern,
UDP_PKT_DATA_SIZE);
/* UDP header checksum */
udp_hdr->check = 0;
udp_hdr->check = udp_csum(ip_hdr->saddr, ip_hdr->daddr, UDP_PKT_SIZE,
IPPROTO_UDP, (u16 *)udp_hdr);
}
static void gen_eth_frame(struct xsk_umem_info *umem, u64 addr)
{
memcpy(xsk_umem__get_data(umem->buffer, addr), pkt_data,
sizeof(pkt_data) - 1);
return sizeof(pkt_data) - 1;
PKT_SIZE);
}
static struct xsk_umem_info *xsk_configure_umem(void *buffer, u64 size)
@ -375,6 +626,11 @@ static struct option long_options[] = {
{"unaligned", no_argument, 0, 'u'},
{"shared-umem", no_argument, 0, 'M'},
{"force", no_argument, 0, 'F'},
{"duration", required_argument, 0, 'd'},
{"batch-size", required_argument, 0, 'b'},
{"tx-pkt-count", required_argument, 0, 'C'},
{"tx-pkt-size", required_argument, 0, 's'},
{"tx-pkt-pattern", required_argument, 0, 'P'},
{0, 0, 0, 0}
};
@ -399,8 +655,21 @@ static void usage(const char *prog)
" -u, --unaligned Enable unaligned chunk placement\n"
" -M, --shared-umem Enable XDP_SHARED_UMEM\n"
" -F, --force Force loading the XDP prog\n"
" -d, --duration=n Duration in secs to run command.\n"
" Default: forever.\n"
" -b, --batch-size=n Batch size for sending or receiving\n"
" packets. Default: %d\n"
" -C, --tx-pkt-count=n Number of packets to send.\n"
" Default: Continuous packets.\n"
" -s, --tx-pkt-size=n Transmit packet size.\n"
" (Default: %d bytes)\n"
" Min size: %d, Max size %d.\n"
" -P, --tx-pkt-pattern=nPacket fill pattern. Default: 0x%x\n"
"\n";
fprintf(stderr, str, prog, XSK_UMEM__DEFAULT_FRAME_SIZE);
fprintf(stderr, str, prog, XSK_UMEM__DEFAULT_FRAME_SIZE,
opt_batch_size, MIN_PKT_SIZE, MIN_PKT_SIZE,
XSK_UMEM__DEFAULT_FRAME_SIZE, opt_pkt_fill_pattern);
exit(EXIT_FAILURE);
}
@ -411,7 +680,7 @@ static void parse_command_line(int argc, char **argv)
opterr = 0;
for (;;) {
c = getopt_long(argc, argv, "Frtli:q:psSNn:czf:muM",
c = getopt_long(argc, argv, "Frtli:q:pSNn:czf:muMd:b:C:s:P:",
long_options, &option_index);
if (c == -1)
break;
@ -440,7 +709,7 @@ static void parse_command_line(int argc, char **argv)
opt_xdp_bind_flags |= XDP_COPY;
break;
case 'N':
opt_xdp_flags |= XDP_FLAGS_DRV_MODE;
/* default, set below */
break;
case 'n':
opt_interval = atoi(optarg);
@ -469,11 +738,37 @@ static void parse_command_line(int argc, char **argv)
case 'M':
opt_num_xsks = MAX_SOCKS;
break;
case 'd':
opt_duration = atoi(optarg);
opt_duration *= 1000000000;
break;
case 'b':
opt_batch_size = atoi(optarg);
break;
case 'C':
opt_pkt_count = atoi(optarg);
break;
case 's':
opt_pkt_size = atoi(optarg);
if (opt_pkt_size > (XSK_UMEM__DEFAULT_FRAME_SIZE) ||
opt_pkt_size < MIN_PKT_SIZE) {
fprintf(stderr,
"ERROR: Invalid frame size %d\n",
opt_pkt_size);
usage(basename(argv[0]));
}
break;
case 'P':
opt_pkt_fill_pattern = strtol(optarg, NULL, 16);
break;
default:
usage(basename(argv[0]));
}
}
if (!(opt_xdp_flags & XDP_FLAGS_SKB_MODE))
opt_xdp_flags |= XDP_FLAGS_DRV_MODE;
opt_ifindex = if_nametoindex(opt_if);
if (!opt_ifindex) {
fprintf(stderr, "ERROR: interface \"%s\" does not exist\n",
@ -513,7 +808,7 @@ static inline void complete_tx_l2fwd(struct xsk_socket_info *xsk,
if (!opt_need_wakeup || xsk_ring_prod__needs_wakeup(&xsk->tx))
kick_tx(xsk);
ndescs = (xsk->outstanding_tx > BATCH_SIZE) ? BATCH_SIZE :
ndescs = (xsk->outstanding_tx > opt_batch_size) ? opt_batch_size :
xsk->outstanding_tx;
/* re-add completed Tx buffers */
@ -542,7 +837,8 @@ static inline void complete_tx_l2fwd(struct xsk_socket_info *xsk,
}
}
static inline void complete_tx_only(struct xsk_socket_info *xsk)
static inline void complete_tx_only(struct xsk_socket_info *xsk,
int batch_size)
{
unsigned int rcvd;
u32 idx;
@ -553,7 +849,7 @@ static inline void complete_tx_only(struct xsk_socket_info *xsk)
if (!opt_need_wakeup || xsk_ring_prod__needs_wakeup(&xsk->tx))
kick_tx(xsk);
rcvd = xsk_ring_cons__peek(&xsk->umem->cq, BATCH_SIZE, &idx);
rcvd = xsk_ring_cons__peek(&xsk->umem->cq, batch_size, &idx);
if (rcvd > 0) {
xsk_ring_cons__release(&xsk->umem->cq, rcvd);
xsk->outstanding_tx -= rcvd;
@ -567,7 +863,7 @@ static void rx_drop(struct xsk_socket_info *xsk, struct pollfd *fds)
u32 idx_rx = 0, idx_fq = 0;
int ret;
rcvd = xsk_ring_cons__peek(&xsk->rx, BATCH_SIZE, &idx_rx);
rcvd = xsk_ring_cons__peek(&xsk->rx, opt_batch_size, &idx_rx);
if (!rcvd) {
if (xsk_ring_prod__needs_wakeup(&xsk->umem->fq))
ret = poll(fds, num_socks, opt_timeout);
@ -619,36 +915,68 @@ static void rx_drop_all(void)
for (i = 0; i < num_socks; i++)
rx_drop(xsks[i], fds);
if (benchmark_done)
break;
}
}
static void tx_only(struct xsk_socket_info *xsk, u32 frame_nb)
static void tx_only(struct xsk_socket_info *xsk, u32 frame_nb, int batch_size)
{
u32 idx;
unsigned int i;
if (xsk_ring_prod__reserve(&xsk->tx, BATCH_SIZE, &idx) == BATCH_SIZE) {
unsigned int i;
for (i = 0; i < BATCH_SIZE; i++) {
xsk_ring_prod__tx_desc(&xsk->tx, idx + i)->addr =
(frame_nb + i) << XSK_UMEM__DEFAULT_FRAME_SHIFT;
xsk_ring_prod__tx_desc(&xsk->tx, idx + i)->len =
sizeof(pkt_data) - 1;
}
xsk_ring_prod__submit(&xsk->tx, BATCH_SIZE);
xsk->outstanding_tx += BATCH_SIZE;
frame_nb += BATCH_SIZE;
frame_nb %= NUM_FRAMES;
while (xsk_ring_prod__reserve(&xsk->tx, batch_size, &idx) <
batch_size) {
complete_tx_only(xsk, batch_size);
}
complete_tx_only(xsk);
for (i = 0; i < batch_size; i++) {
struct xdp_desc *tx_desc = xsk_ring_prod__tx_desc(&xsk->tx,
idx + i);
tx_desc->addr = (frame_nb + i) << XSK_UMEM__DEFAULT_FRAME_SHIFT;
tx_desc->len = PKT_SIZE;
}
xsk_ring_prod__submit(&xsk->tx, batch_size);
xsk->outstanding_tx += batch_size;
frame_nb += batch_size;
frame_nb %= NUM_FRAMES;
complete_tx_only(xsk, batch_size);
}
static inline int get_batch_size(int pkt_cnt)
{
if (!opt_pkt_count)
return opt_batch_size;
if (pkt_cnt + opt_batch_size <= opt_pkt_count)
return opt_batch_size;
return opt_pkt_count - pkt_cnt;
}
static void complete_tx_only_all(void)
{
bool pending;
int i;
do {
pending = false;
for (i = 0; i < num_socks; i++) {
if (xsks[i]->outstanding_tx) {
complete_tx_only(xsks[i], opt_batch_size);
pending = !!xsks[i]->outstanding_tx;
}
}
} while (pending);
}
static void tx_only_all(void)
{
struct pollfd fds[MAX_SOCKS] = {};
u32 frame_nb[MAX_SOCKS] = {};
int pkt_cnt = 0;
int i, ret;
for (i = 0; i < num_socks; i++) {
@ -656,7 +984,9 @@ static void tx_only_all(void)
fds[0].events = POLLOUT;
}
for (;;) {
while ((opt_pkt_count && pkt_cnt < opt_pkt_count) || !opt_pkt_count) {
int batch_size = get_batch_size(pkt_cnt);
if (opt_poll) {
ret = poll(fds, num_socks, opt_timeout);
if (ret <= 0)
@ -667,8 +997,16 @@ static void tx_only_all(void)
}
for (i = 0; i < num_socks; i++)
tx_only(xsks[i], frame_nb[i]);
tx_only(xsks[i], frame_nb[i], batch_size);
pkt_cnt += batch_size;
if (benchmark_done)
break;
}
if (opt_pkt_count)
complete_tx_only_all();
}
static void l2fwd(struct xsk_socket_info *xsk, struct pollfd *fds)
@ -679,7 +1017,7 @@ static void l2fwd(struct xsk_socket_info *xsk, struct pollfd *fds)
complete_tx_l2fwd(xsk, fds);
rcvd = xsk_ring_cons__peek(&xsk->rx, BATCH_SIZE, &idx_rx);
rcvd = xsk_ring_cons__peek(&xsk->rx, opt_batch_size, &idx_rx);
if (!rcvd) {
if (xsk_ring_prod__needs_wakeup(&xsk->umem->fq))
ret = poll(fds, num_socks, opt_timeout);
@ -736,6 +1074,9 @@ static void l2fwd_all(void)
for (i = 0; i < num_socks; i++)
l2fwd(xsks[i], fds);
if (benchmark_done)
break;
}
}
@ -831,9 +1172,12 @@ int main(int argc, char **argv)
for (i = 0; i < opt_num_xsks; i++)
xsks[num_socks++] = xsk_configure_socket(umem, rx, tx);
if (opt_bench == BENCH_TXONLY)
if (opt_bench == BENCH_TXONLY) {
gen_eth_hdr_data();
for (i = 0; i < NUM_FRAMES; i++)
gen_eth_frame(umem, i * opt_xsk_frame_size);
}
if (opt_num_xsks > 1 && opt_bench != BENCH_TXONLY)
enter_xsks_into_map(obj);
@ -849,6 +1193,7 @@ int main(int argc, char **argv)
exit_with_error(ret);
prev_time = get_nsecs();
start_time = prev_time;
if (opt_bench == BENCH_RXDROP)
rx_drop_all();
@ -857,5 +1202,11 @@ int main(int argc, char **argv)
else
l2fwd_all();
benchmark_done = true;
pthread_join(pt, NULL);
xdpsock_cleanup();
return 0;
}

View File

@ -0,0 +1,305 @@
================
bpftool-gen
================
-------------------------------------------------------------------------------
tool for BPF code-generation
-------------------------------------------------------------------------------
:Manual section: 8
SYNOPSIS
========
**bpftool** [*OPTIONS*] **gen** *COMMAND*
*OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] }
*COMMAND* := { **skeleton | **help** }
GEN COMMANDS
=============
| **bpftool** **gen skeleton** *FILE*
| **bpftool** **gen help**
DESCRIPTION
===========
**bpftool gen skeleton** *FILE*
Generate BPF skeleton C header file for a given *FILE*.
BPF skeleton is an alternative interface to existing libbpf
APIs for working with BPF objects. Skeleton code is intended
to significantly shorten and simplify code to load and work
with BPF programs from userspace side. Generated code is
tailored to specific input BPF object *FILE*, reflecting its
structure by listing out available maps, program, variables,
etc. Skeleton eliminates the need to lookup mentioned
components by name. Instead, if skeleton instantiation
succeeds, they are populated in skeleton structure as valid
libbpf types (e.g., struct bpf_map pointer) and can be
passed to existing generic libbpf APIs.
In addition to simple and reliable access to maps and
programs, skeleton provides a storage for BPF links (struct
bpf_link) for each BPF program within BPF object. When
requested, supported BPF programs will be automatically
attached and resulting BPF links stored for further use by
user in pre-allocated fields in skeleton struct. For BPF
programs that can't be automatically attached by libbpf,
user can attach them manually, but store resulting BPF link
in per-program link field. All such set up links will be
automatically destroyed on BPF skeleton destruction. This
eliminates the need for users to manage links manually and
rely on libbpf support to detach programs and free up
resources.
Another facility provided by BPF skeleton is an interface to
global variables of all supported kinds: mutable, read-only,
as well as extern ones. This interface allows to pre-setup
initial values of variables before BPF object is loaded and
verified by kernel. For non-read-only variables, the same
interface can be used to fetch values of global variables on
userspace side, even if they are modified by BPF code.
During skeleton generation, contents of source BPF object
*FILE* is embedded within generated code and is thus not
necessary to keep around. This ensures skeleton and BPF
object file are matching 1-to-1 and always stay in sync.
Generated code is dual-licensed under LGPL-2.1 and
BSD-2-Clause licenses.
It is a design goal and guarantee that skeleton interfaces
are interoperable with generic libbpf APIs. User should
always be able to use skeleton API to create and load BPF
object, and later use libbpf APIs to keep working with
specific maps, programs, etc.
As part of skeleton, few custom functions are generated.
Each of them is prefixed with object name, derived from
object file name. I.e., if BPF object file name is
**example.o**, BPF object name will be **example**. The
following custom functions are provided in such case:
- **example__open** and **example__open_opts**.
These functions are used to instantiate skeleton. It
corresponds to libbpf's **bpf_object__open()** API.
**_opts** variants accepts extra **bpf_object_open_opts**
options.
- **example__load**.
This function creates maps, loads and verifies BPF
programs, initializes global data maps. It corresponds to
libppf's **bpf_object__load** API.
- **example__open_and_load** combines **example__open** and
**example__load** invocations in one commonly used
operation.
- **example__attach** and **example__detach**
This pair of functions allow to attach and detach,
correspondingly, already loaded BPF object. Only BPF
programs of types supported by libbpf for auto-attachment
will be auto-attached and their corresponding BPF links
instantiated. For other BPF programs, user can manually
create a BPF link and assign it to corresponding fields in
skeleton struct. **example__detach** will detach both
links created automatically, as well as those populated by
user manually.
- **example__destroy**
Detach and unload BPF programs, free up all the resources
used by skeleton and BPF object.
If BPF object has global variables, corresponding structs
with memory layout corresponding to global data data section
layout will be created. Currently supported ones are: *.data*,
*.bss*, *.rodata*, and *.kconfig* structs/data sections.
These data sections/structs can be used to set up initial
values of variables, if set before **example__load**.
Afterwards, if target kernel supports memory-mapped BPF
arrays, same structs can be used to fetch and update
(non-read-only) data from userspace, with same simplicity
as for BPF side.
**bpftool gen help**
Print short help message.
OPTIONS
=======
-h, --help
Print short generic help message (similar to **bpftool help**).
-V, --version
Print version number (similar to **bpftool version**).
-j, --json
Generate JSON output. For commands that cannot produce JSON,
this option has no effect.
-p, --pretty
Generate human-readable JSON output. Implies **-j**.
-d, --debug
Print all logs available from libbpf, including debug-level
information.
EXAMPLES
========
**$ cat example.c**
::
#include <stdbool.h>
#include <linux/ptrace.h>
#include <linux/bpf.h>
#include "bpf_helpers.h"
const volatile int param1 = 42;
bool global_flag = true;
struct { int x; } data = {};
struct {
__uint(type, BPF_MAP_TYPE_HASH);
__uint(max_entries, 128);
__type(key, int);
__type(value, long);
} my_map SEC(".maps");
SEC("raw_tp/sys_enter")
int handle_sys_enter(struct pt_regs *ctx)
{
static long my_static_var;
if (global_flag)
my_static_var++;
else
data.x += param1;
return 0;
}
SEC("raw_tp/sys_exit")
int handle_sys_exit(struct pt_regs *ctx)
{
int zero = 0;
bpf_map_lookup_elem(&my_map, &zero);
return 0;
}
This is example BPF application with two BPF programs and a mix of BPF maps
and global variables.
**$ bpftool gen skeleton example.o**
::
/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
/* THIS FILE IS AUTOGENERATED! */
#ifndef __EXAMPLE_SKEL_H__
#define __EXAMPLE_SKEL_H__
#include <stdlib.h>
#include <libbpf.h>
struct example {
struct bpf_object_skeleton *skeleton;
struct bpf_object *obj;
struct {
struct bpf_map *rodata;
struct bpf_map *data;
struct bpf_map *bss;
struct bpf_map *my_map;
} maps;
struct {
struct bpf_program *handle_sys_enter;
struct bpf_program *handle_sys_exit;
} progs;
struct {
struct bpf_link *handle_sys_enter;
struct bpf_link *handle_sys_exit;
} links;
struct example__bss {
struct {
int x;
} data;
} *bss;
struct example__data {
_Bool global_flag;
long int handle_sys_enter_my_static_var;
} *data;
struct example__rodata {
int param1;
} *rodata;
};
static void example__destroy(struct example *obj);
static inline struct example *example__open_opts(
const struct bpf_object_open_opts *opts);
static inline struct example *example__open();
static inline int example__load(struct example *obj);
static inline struct example *example__open_and_load();
static inline int example__attach(struct example *obj);
static inline void example__detach(struct example *obj);
#endif /* __EXAMPLE_SKEL_H__ */
**$ cat example_user.c**
::
#include "example.skel.h"
int main()
{
struct example *skel;
int err = 0;
skel = example__open();
if (!skel)
goto cleanup;
skel->rodata->param1 = 128;
err = example__load(skel);
if (err)
goto cleanup;
err = example__attach(skel);
if (err)
goto cleanup;
/* all libbpf APIs are usable */
printf("my_map name: %s\n", bpf_map__name(skel->maps.my_map));
printf("sys_enter prog FD: %d\n",
bpf_program__fd(skel->progs.handle_sys_enter));
/* detach and re-attach sys_exit program */
bpf_link__destroy(skel->links.handle_sys_exit);
skel->links.handle_sys_exit =
bpf_program__attach(skel->progs.handle_sys_exit);
printf("my_static_var: %ld\n",
skel->bss->handle_sys_enter_my_static_var);
cleanup:
example__destroy(skel);
return err;
}
**# ./example_user**
::
my_map name: my_map
sys_enter prog FD: 8
my_static_var: 7
This is a stripped-out version of skeleton generated for above example code.
SEE ALSO
========
**bpf**\ (2),
**bpf-helpers**\ (7),
**bpftool**\ (8),
**bpftool-map**\ (8),
**bpftool-prog**\ (8),
**bpftool-cgroup**\ (8),
**bpftool-feature**\ (8),
**bpftool-net**\ (8),
**bpftool-perf**\ (8),
**bpftool-btf**\ (8)

View File

@ -39,9 +39,9 @@ MAP COMMANDS
| **bpftool** **map freeze** *MAP*
| **bpftool** **map help**
|
| *MAP* := { **id** *MAP_ID* | **pinned** *FILE* }
| *MAP* := { **id** *MAP_ID* | **pinned** *FILE* | **name** *MAP_NAME* }
| *DATA* := { [**hex**] *BYTES* }
| *PROG* := { **id** *PROG_ID* | **pinned** *FILE* | **tag** *PROG_TAG* }
| *PROG* := { **id** *PROG_ID* | **pinned** *FILE* | **tag** *PROG_TAG* | **name** *PROG_NAME* }
| *VALUE* := { *DATA* | *MAP* | *PROG* }
| *UPDATE_FLAGS* := { **any** | **exist** | **noexist** }
| *TYPE* := { **hash** | **array** | **prog_array** | **perf_event_array** | **percpu_hash**
@ -55,8 +55,9 @@ DESCRIPTION
===========
**bpftool map { show | list }** [*MAP*]
Show information about loaded maps. If *MAP* is specified
show information only about given map, otherwise list all
maps currently loaded on the system.
show information only about given maps, otherwise list all
maps currently loaded on the system. In case of **name**,
*MAP* may match several maps which will all be shown.
Output will start with map ID followed by map type and
zero or more named attributes (depending on kernel version).
@ -66,7 +67,8 @@ DESCRIPTION
as *FILE*.
**bpftool map dump** *MAP*
Dump all entries in a given *MAP*.
Dump all entries in a given *MAP*. In case of **name**,
*MAP* may match several maps which will all be dumped.
**bpftool map update** *MAP* [**key** *DATA*] [**value** *VALUE*] [*UPDATE_FLAGS*]
Update map entry for a given *KEY*.

View File

@ -33,7 +33,7 @@ PROG COMMANDS
| **bpftool** **prog help**
|
| *MAP* := { **id** *MAP_ID* | **pinned** *FILE* }
| *PROG* := { **id** *PROG_ID* | **pinned** *FILE* | **tag** *PROG_TAG* }
| *PROG* := { **id** *PROG_ID* | **pinned** *FILE* | **tag** *PROG_TAG* | **name** *PROG_NAME* }
| *TYPE* := {
| **socket** | **kprobe** | **kretprobe** | **classifier** | **action** |
| **tracepoint** | **raw_tracepoint** | **xdp** | **perf_event** | **cgroup/skb** |
@ -53,8 +53,10 @@ DESCRIPTION
===========
**bpftool prog { show | list }** [*PROG*]
Show information about loaded programs. If *PROG* is
specified show information only about given program, otherwise
list all programs currently loaded on the system.
specified show information only about given programs,
otherwise list all programs currently loaded on the system.
In case of **tag** or **name**, *PROG* may match several
programs which will all be shown.
Output will start with program ID followed by program type and
zero or more named attributes (depending on kernel version).
@ -68,11 +70,15 @@ DESCRIPTION
performed via the **kernel.bpf_stats_enabled** sysctl knob.
**bpftool prog dump xlated** *PROG* [{ **file** *FILE* | **opcodes** | **visual** | **linum** }]
Dump eBPF instructions of the program from the kernel. By
Dump eBPF instructions of the programs from the kernel. By
default, eBPF will be disassembled and printed to standard
output in human-readable format. In this case, **opcodes**
controls if raw opcodes should be printed as well.
In case of **tag** or **name**, *PROG* may match several
programs which will all be dumped. However, if **file** or
**visual** is specified, *PROG* must match a single program.
If **file** is specified, the binary image will instead be
written to *FILE*.
@ -80,15 +86,17 @@ DESCRIPTION
built instead, and eBPF instructions will be presented with
CFG in DOT format, on standard output.
If the prog has line_info available, the source line will
If the programs have line_info available, the source line will
be displayed by default. If **linum** is specified,
the filename, line number and line column will also be
displayed on top of the source line.
**bpftool prog dump jited** *PROG* [{ **file** *FILE* | **opcodes** | **linum** }]
Dump jited image (host machine code) of the program.
If *FILE* is specified image will be written to a file,
otherwise it will be disassembled and printed to stdout.
*PROG* must match a single program when **file** is specified.
**opcodes** controls if raw opcodes will be printed.

View File

@ -81,4 +81,5 @@ SEE ALSO
**bpftool-feature**\ (8),
**bpftool-net**\ (8),
**bpftool-perf**\ (8),
**bpftool-btf**\ (8)
**bpftool-btf**\ (8),
**bpftool-gen**\ (8),

View File

@ -59,6 +59,21 @@ _bpftool_get_map_ids_for_type()
command sed -n 's/.*"id": \(.*\),$/\1/p' )" -- "$cur" ) )
}
_bpftool_get_map_names()
{
COMPREPLY+=( $( compgen -W "$( bpftool -jp map 2>&1 | \
command sed -n 's/.*"name": \(.*\),$/\1/p' )" -- "$cur" ) )
}
# Takes map type and adds matching map names to the list of suggestions.
_bpftool_get_map_names_for_type()
{
local type="$1"
COMPREPLY+=( $( compgen -W "$( bpftool -jp map 2>&1 | \
command grep -C2 "$type" | \
command sed -n 's/.*"name": \(.*\),$/\1/p' )" -- "$cur" ) )
}
_bpftool_get_prog_ids()
{
COMPREPLY+=( $( compgen -W "$( bpftool -jp prog 2>&1 | \
@ -71,6 +86,12 @@ _bpftool_get_prog_tags()
command sed -n 's/.*"tag": "\(.*\)",$/\1/p' )" -- "$cur" ) )
}
_bpftool_get_prog_names()
{
COMPREPLY+=( $( compgen -W "$( bpftool -jp prog 2>&1 | \
command sed -n 's/.*"name": "\(.*\)",$/\1/p' )" -- "$cur" ) )
}
_bpftool_get_btf_ids()
{
COMPREPLY+=( $( compgen -W "$( bpftool -jp btf 2>&1 | \
@ -180,6 +201,52 @@ _bpftool_map_update_get_id()
esac
}
_bpftool_map_update_get_name()
{
local command="$1"
# Is it the map to update, or a map to insert into the map to update?
# Search for "value" keyword.
local idx value
for (( idx=7; idx < ${#words[@]}-1; idx++ )); do
if [[ ${words[idx]} == "value" ]]; then
value=1
break
fi
done
if [[ $value -eq 0 ]]; then
case "$command" in
push)
_bpftool_get_map_names_for_type stack
;;
enqueue)
_bpftool_get_map_names_for_type queue
;;
*)
_bpftool_get_map_names
;;
esac
return 0
fi
# Name to complete is for a value. It can be either prog name or map name. This
# depends on the type of the map to update.
local type=$(_bpftool_map_guess_map_type)
case $type in
array_of_maps|hash_of_maps)
_bpftool_get_map_names
return 0
;;
prog_array)
_bpftool_get_prog_names
return 0
;;
*)
return 0
;;
esac
}
_bpftool()
{
local cur prev words objword
@ -251,7 +318,8 @@ _bpftool()
# Completion depends on object and command in use
case $object in
prog)
# Complete id, only for subcommands that use prog (but no map) ids
# Complete id and name, only for subcommands that use prog (but no
# map) ids/names.
case $command in
show|list|dump|pin)
case $prev in
@ -259,12 +327,16 @@ _bpftool()
_bpftool_get_prog_ids
return 0
;;
name)
_bpftool_get_prog_names
return 0
;;
esac
;;
esac
local PROG_TYPE='id pinned tag'
local MAP_TYPE='id pinned'
local PROG_TYPE='id pinned tag name'
local MAP_TYPE='id pinned name'
case $command in
show|list)
[[ $prev != "$command" ]] && return 0
@ -315,6 +387,9 @@ _bpftool()
id)
_bpftool_get_prog_ids
;;
name)
_bpftool_get_map_names
;;
pinned)
_filedir
;;
@ -335,6 +410,9 @@ _bpftool()
id)
_bpftool_get_map_ids
;;
name)
_bpftool_get_map_names
;;
pinned)
_filedir
;;
@ -399,6 +477,10 @@ _bpftool()
_bpftool_get_map_ids
return 0
;;
name)
_bpftool_get_map_names
return 0
;;
pinned|pinmaps)
_filedir
return 0
@ -447,7 +529,7 @@ _bpftool()
esac
;;
map)
local MAP_TYPE='id pinned'
local MAP_TYPE='id pinned name'
case $command in
show|list|dump|peek|pop|dequeue|freeze)
case $prev in
@ -473,6 +555,24 @@ _bpftool()
esac
return 0
;;
name)
case "$command" in
peek)
_bpftool_get_map_names_for_type stack
_bpftool_get_map_names_for_type queue
;;
pop)
_bpftool_get_map_names_for_type stack
;;
dequeue)
_bpftool_get_map_names_for_type queue
;;
*)
_bpftool_get_map_names
;;
esac
return 0
;;
*)
return 0
;;
@ -520,6 +620,10 @@ _bpftool()
_bpftool_get_map_ids
return 0
;;
name)
_bpftool_get_map_names
return 0
;;
key)
COMPREPLY+=( $( compgen -W 'hex' -- "$cur" ) )
;;
@ -545,6 +649,10 @@ _bpftool()
_bpftool_map_update_get_id $command
return 0
;;
name)
_bpftool_map_update_get_name $command
return 0
;;
key)
COMPREPLY+=( $( compgen -W 'hex' -- "$cur" ) )
;;
@ -553,13 +661,13 @@ _bpftool()
# map, depending on the type of the map to update.
case "$(_bpftool_map_guess_map_type)" in
array_of_maps|hash_of_maps)
local MAP_TYPE='id pinned'
local MAP_TYPE='id pinned name'
COMPREPLY+=( $( compgen -W "$MAP_TYPE" \
-- "$cur" ) )
return 0
;;
prog_array)
local PROG_TYPE='id pinned tag'
local PROG_TYPE='id pinned tag name'
COMPREPLY+=( $( compgen -W "$PROG_TYPE" \
-- "$cur" ) )
return 0
@ -621,6 +729,10 @@ _bpftool()
_bpftool_get_map_ids_for_type perf_event_array
return 0
;;
name)
_bpftool_get_map_names_for_type perf_event_array
return 0
;;
cpu)
return 0
;;
@ -644,8 +756,8 @@ _bpftool()
esac
;;
btf)
local PROG_TYPE='id pinned tag'
local MAP_TYPE='id pinned'
local PROG_TYPE='id pinned tag name'
local MAP_TYPE='id pinned name'
case $command in
dump)
case $prev in
@ -676,6 +788,17 @@ _bpftool()
esac
return 0
;;
name)
case $pprev in
prog)
_bpftool_get_prog_names
;;
map)
_bpftool_get_map_names
;;
esac
return 0
;;
format)
COMPREPLY=( $( compgen -W "c raw" -- "$cur" ) )
;;
@ -716,6 +839,17 @@ _bpftool()
;;
esac
;;
gen)
case $command in
skeleton)
_filedir
;;
*)
[[ $prev == $object ]] && \
COMPREPLY=( $( compgen -W 'skeleton help' -- "$cur" ) )
;;
esac
;;
cgroup)
case $command in
show|list|tree)
@ -735,7 +869,7 @@ _bpftool()
connect6 sendmsg4 sendmsg6 recvmsg4 recvmsg6 sysctl \
getsockopt setsockopt'
local ATTACH_FLAGS='multi override'
local PROG_TYPE='id pinned tag'
local PROG_TYPE='id pinned tag name'
case $prev in
$command)
_filedir
@ -760,7 +894,7 @@ _bpftool()
elif [[ "$command" == "attach" ]]; then
# We have an attach type on the command line,
# but it is not the previous word, or
# "id|pinned|tag" (we already checked for
# "id|pinned|tag|name" (we already checked for
# that). This should only leave the case when
# we need attach flags for "attach" commamnd.
_bpftool_one_of_list "$ATTACH_FLAGS"
@ -786,7 +920,7 @@ _bpftool()
esac
;;
net)
local PROG_TYPE='id pinned tag'
local PROG_TYPE='id pinned tag name'
local ATTACH_TYPES='xdp xdpgeneric xdpdrv xdpoffload'
case $command in
show|list)

View File

@ -117,6 +117,25 @@ static int count_attached_bpf_progs(int cgroup_fd, enum bpf_attach_type type)
return prog_cnt;
}
static int cgroup_has_attached_progs(int cgroup_fd)
{
enum bpf_attach_type type;
bool no_prog = true;
for (type = 0; type < __MAX_BPF_ATTACH_TYPE; type++) {
int count = count_attached_bpf_progs(cgroup_fd, type);
if (count < 0 && errno != EINVAL)
return -1;
if (count > 0) {
no_prog = false;
break;
}
}
return no_prog ? 0 : 1;
}
static int show_attached_bpf_progs(int cgroup_fd, enum bpf_attach_type type,
int level)
{
@ -161,6 +180,7 @@ static int show_attached_bpf_progs(int cgroup_fd, enum bpf_attach_type type,
static int do_show(int argc, char **argv)
{
enum bpf_attach_type type;
int has_attached_progs;
const char *path;
int cgroup_fd;
int ret = -1;
@ -192,6 +212,16 @@ static int do_show(int argc, char **argv)
goto exit;
}
has_attached_progs = cgroup_has_attached_progs(cgroup_fd);
if (has_attached_progs < 0) {
p_err("can't query bpf programs attached to %s: %s",
path, strerror(errno));
goto exit_cgroup;
} else if (!has_attached_progs) {
ret = 0;
goto exit_cgroup;
}
if (json_output)
jsonw_start_array(json_wtr);
else
@ -212,6 +242,7 @@ static int do_show(int argc, char **argv)
if (json_output)
jsonw_end_array(json_wtr);
exit_cgroup:
close(cgroup_fd);
exit:
return ret;
@ -228,7 +259,7 @@ static int do_show_tree_fn(const char *fpath, const struct stat *sb,
int typeflag, struct FTW *ftw)
{
enum bpf_attach_type type;
bool skip = true;
int has_attached_progs;
int cgroup_fd;
if (typeflag != FTW_D)
@ -240,22 +271,13 @@ static int do_show_tree_fn(const char *fpath, const struct stat *sb,
return SHOW_TREE_FN_ERR;
}
for (type = 0; type < __MAX_BPF_ATTACH_TYPE; type++) {
int count = count_attached_bpf_progs(cgroup_fd, type);
if (count < 0 && errno != EINVAL) {
p_err("can't query bpf programs attached to %s: %s",
fpath, strerror(errno));
close(cgroup_fd);
return SHOW_TREE_FN_ERR;
}
if (count > 0) {
skip = false;
break;
}
}
if (skip) {
has_attached_progs = cgroup_has_attached_progs(cgroup_fd);
if (has_attached_progs < 0) {
p_err("can't query bpf programs attached to %s: %s",
fpath, strerror(errno));
close(cgroup_fd);
return SHOW_TREE_FN_ERR;
} else if (!has_attached_progs) {
close(cgroup_fd);
return 0;
}

609
tools/bpf/bpftool/gen.c Normal file
View File

@ -0,0 +1,609 @@
// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
/* Copyright (C) 2019 Facebook */
#ifndef _GNU_SOURCE
#define _GNU_SOURCE
#endif
#include <ctype.h>
#include <errno.h>
#include <fcntl.h>
#include <linux/err.h>
#include <stdbool.h>
#include <stdio.h>
#include <string.h>
#include <unistd.h>
#include <bpf.h>
#include <libbpf.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <sys/mman.h>
#include <unistd.h>
#include "btf.h"
#include "libbpf_internal.h"
#include "json_writer.h"
#include "main.h"
#define MAX_OBJ_NAME_LEN 64
static void sanitize_identifier(char *name)
{
int i;
for (i = 0; name[i]; i++)
if (!isalnum(name[i]) && name[i] != '_')
name[i] = '_';
}
static bool str_has_suffix(const char *str, const char *suffix)
{
size_t i, n1 = strlen(str), n2 = strlen(suffix);
if (n1 < n2)
return false;
for (i = 0; i < n2; i++) {
if (str[n1 - i - 1] != suffix[n2 - i - 1])
return false;
}
return true;
}
static void get_obj_name(char *name, const char *file)
{
/* Using basename() GNU version which doesn't modify arg. */
strncpy(name, basename(file), MAX_OBJ_NAME_LEN - 1);
name[MAX_OBJ_NAME_LEN - 1] = '\0';
if (str_has_suffix(name, ".o"))
name[strlen(name) - 2] = '\0';
sanitize_identifier(name);
}
static void get_header_guard(char *guard, const char *obj_name)
{
int i;
sprintf(guard, "__%s_SKEL_H__", obj_name);
for (i = 0; guard[i]; i++)
guard[i] = toupper(guard[i]);
}
static const char *get_map_ident(const struct bpf_map *map)
{
const char *name = bpf_map__name(map);
if (!bpf_map__is_internal(map))
return name;
if (str_has_suffix(name, ".data"))
return "data";
else if (str_has_suffix(name, ".rodata"))
return "rodata";
else if (str_has_suffix(name, ".bss"))
return "bss";
else if (str_has_suffix(name, ".kconfig"))
return "kconfig";
else
return NULL;
}
static void codegen_btf_dump_printf(void *ct, const char *fmt, va_list args)
{
vprintf(fmt, args);
}
static int codegen_datasec_def(struct bpf_object *obj,
struct btf *btf,
struct btf_dump *d,
const struct btf_type *sec,
const char *obj_name)
{
const char *sec_name = btf__name_by_offset(btf, sec->name_off);
const struct btf_var_secinfo *sec_var = btf_var_secinfos(sec);
int i, err, off = 0, pad_cnt = 0, vlen = btf_vlen(sec);
const char *sec_ident;
char var_ident[256];
if (strcmp(sec_name, ".data") == 0)
sec_ident = "data";
else if (strcmp(sec_name, ".bss") == 0)
sec_ident = "bss";
else if (strcmp(sec_name, ".rodata") == 0)
sec_ident = "rodata";
else if (strcmp(sec_name, ".kconfig") == 0)
sec_ident = "kconfig";
else
return 0;
printf(" struct %s__%s {\n", obj_name, sec_ident);
for (i = 0; i < vlen; i++, sec_var++) {
const struct btf_type *var = btf__type_by_id(btf, sec_var->type);
const char *var_name = btf__name_by_offset(btf, var->name_off);
DECLARE_LIBBPF_OPTS(btf_dump_emit_type_decl_opts, opts,
.field_name = var_ident,
.indent_level = 2,
);
int need_off = sec_var->offset, align_off, align;
__u32 var_type_id = var->type;
const struct btf_type *t;
t = btf__type_by_id(btf, var_type_id);
while (btf_is_mod(t)) {
var_type_id = t->type;
t = btf__type_by_id(btf, var_type_id);
}
if (off > need_off) {
p_err("Something is wrong for %s's variable #%d: need offset %d, already at %d.\n",
sec_name, i, need_off, off);
return -EINVAL;
}
align = btf__align_of(btf, var->type);
if (align <= 0) {
p_err("Failed to determine alignment of variable '%s': %d",
var_name, align);
return -EINVAL;
}
align_off = (off + align - 1) / align * align;
if (align_off != need_off) {
printf("\t\tchar __pad%d[%d];\n",
pad_cnt, need_off - off);
pad_cnt++;
}
/* sanitize variable name, e.g., for static vars inside
* a function, it's name is '<function name>.<variable name>',
* which we'll turn into a '<function name>_<variable name>'
*/
var_ident[0] = '\0';
strncat(var_ident, var_name, sizeof(var_ident) - 1);
sanitize_identifier(var_ident);
printf("\t\t");
err = btf_dump__emit_type_decl(d, var_type_id, &opts);
if (err)
return err;
printf(";\n");
off = sec_var->offset + sec_var->size;
}
printf(" } *%s;\n", sec_ident);
return 0;
}
static int codegen_datasecs(struct bpf_object *obj, const char *obj_name)
{
struct btf *btf = bpf_object__btf(obj);
int n = btf__get_nr_types(btf);
struct btf_dump *d;
int i, err = 0;
d = btf_dump__new(btf, NULL, NULL, codegen_btf_dump_printf);
if (IS_ERR(d))
return PTR_ERR(d);
for (i = 1; i <= n; i++) {
const struct btf_type *t = btf__type_by_id(btf, i);
if (!btf_is_datasec(t))
continue;
err = codegen_datasec_def(obj, btf, d, t, obj_name);
if (err)
goto out;
}
out:
btf_dump__free(d);
return err;
}
static int codegen(const char *template, ...)
{
const char *src, *end;
int skip_tabs = 0, n;
char *s, *dst;
va_list args;
char c;
n = strlen(template);
s = malloc(n + 1);
if (!s)
return -ENOMEM;
src = template;
dst = s;
/* find out "baseline" indentation to skip */
while ((c = *src++)) {
if (c == '\t') {
skip_tabs++;
} else if (c == '\n') {
break;
} else {
p_err("unrecognized character at pos %td in template '%s'",
src - template - 1, template);
return -EINVAL;
}
}
while (*src) {
/* skip baseline indentation tabs */
for (n = skip_tabs; n > 0; n--, src++) {
if (*src != '\t') {
p_err("not enough tabs at pos %td in template '%s'",
src - template - 1, template);
return -EINVAL;
}
}
/* trim trailing whitespace */
end = strchrnul(src, '\n');
for (n = end - src; n > 0 && isspace(src[n - 1]); n--)
;
memcpy(dst, src, n);
dst += n;
if (*end)
*dst++ = '\n';
src = *end ? end + 1 : end;
}
*dst++ = '\0';
/* print out using adjusted template */
va_start(args, template);
n = vprintf(s, args);
va_end(args);
free(s);
return n;
}
static int do_skeleton(int argc, char **argv)
{
char header_guard[MAX_OBJ_NAME_LEN + sizeof("__SKEL_H__")];
size_t i, map_cnt = 0, prog_cnt = 0, file_sz, mmap_sz;
DECLARE_LIBBPF_OPTS(bpf_object_open_opts, opts);
char obj_name[MAX_OBJ_NAME_LEN], *obj_data;
struct bpf_object *obj = NULL;
const char *file, *ident;
struct bpf_program *prog;
int fd, len, err = -1;
struct bpf_map *map;
struct btf *btf;
struct stat st;
if (!REQ_ARGS(1)) {
usage();
return -1;
}
file = GET_ARG();
if (argc) {
p_err("extra unknown arguments");
return -1;
}
if (stat(file, &st)) {
p_err("failed to stat() %s: %s", file, strerror(errno));
return -1;
}
file_sz = st.st_size;
mmap_sz = roundup(file_sz, sysconf(_SC_PAGE_SIZE));
fd = open(file, O_RDONLY);
if (fd < 0) {
p_err("failed to open() %s: %s", file, strerror(errno));
return -1;
}
obj_data = mmap(NULL, mmap_sz, PROT_READ, MAP_PRIVATE, fd, 0);
if (obj_data == MAP_FAILED) {
obj_data = NULL;
p_err("failed to mmap() %s: %s", file, strerror(errno));
goto out;
}
get_obj_name(obj_name, file);
opts.object_name = obj_name;
obj = bpf_object__open_mem(obj_data, file_sz, &opts);
if (IS_ERR(obj)) {
obj = NULL;
p_err("failed to open BPF object file: %ld", PTR_ERR(obj));
goto out;
}
bpf_object__for_each_map(map, obj) {
ident = get_map_ident(map);
if (!ident) {
p_err("ignoring unrecognized internal map '%s'...",
bpf_map__name(map));
continue;
}
map_cnt++;
}
bpf_object__for_each_program(prog, obj) {
prog_cnt++;
}
get_header_guard(header_guard, obj_name);
codegen("\
\n\
/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ \n\
\n\
/* THIS FILE IS AUTOGENERATED! */ \n\
#ifndef %2$s \n\
#define %2$s \n\
\n\
#include <stdlib.h> \n\
#include <libbpf.h> \n\
\n\
struct %1$s { \n\
struct bpf_object_skeleton *skeleton; \n\
struct bpf_object *obj; \n\
",
obj_name, header_guard
);
if (map_cnt) {
printf("\tstruct {\n");
bpf_object__for_each_map(map, obj) {
ident = get_map_ident(map);
if (!ident)
continue;
printf("\t\tstruct bpf_map *%s;\n", ident);
}
printf("\t} maps;\n");
}
if (prog_cnt) {
printf("\tstruct {\n");
bpf_object__for_each_program(prog, obj) {
printf("\t\tstruct bpf_program *%s;\n",
bpf_program__name(prog));
}
printf("\t} progs;\n");
printf("\tstruct {\n");
bpf_object__for_each_program(prog, obj) {
printf("\t\tstruct bpf_link *%s;\n",
bpf_program__name(prog));
}
printf("\t} links;\n");
}
btf = bpf_object__btf(obj);
if (btf) {
err = codegen_datasecs(obj, obj_name);
if (err)
goto out;
}
codegen("\
\n\
}; \n\
\n\
static void \n\
%1$s__destroy(struct %1$s *obj) \n\
{ \n\
if (!obj) \n\
return; \n\
if (obj->skeleton) \n\
bpf_object__destroy_skeleton(obj->skeleton);\n\
free(obj); \n\
} \n\
\n\
static inline int \n\
%1$s__create_skeleton(struct %1$s *obj); \n\
\n\
static inline struct %1$s * \n\
%1$s__open_opts(const struct bpf_object_open_opts *opts) \n\
{ \n\
struct %1$s *obj; \n\
\n\
obj = (typeof(obj))calloc(1, sizeof(*obj)); \n\
if (!obj) \n\
return NULL; \n\
if (%1$s__create_skeleton(obj)) \n\
goto err; \n\
if (bpf_object__open_skeleton(obj->skeleton, opts)) \n\
goto err; \n\
\n\
return obj; \n\
err: \n\
%1$s__destroy(obj); \n\
return NULL; \n\
} \n\
\n\
static inline struct %1$s * \n\
%1$s__open(void) \n\
{ \n\
return %1$s__open_opts(NULL); \n\
} \n\
\n\
static inline int \n\
%1$s__load(struct %1$s *obj) \n\
{ \n\
return bpf_object__load_skeleton(obj->skeleton); \n\
} \n\
\n\
static inline struct %1$s * \n\
%1$s__open_and_load(void) \n\
{ \n\
struct %1$s *obj; \n\
\n\
obj = %1$s__open(); \n\
if (!obj) \n\
return NULL; \n\
if (%1$s__load(obj)) { \n\
%1$s__destroy(obj); \n\
return NULL; \n\
} \n\
return obj; \n\
} \n\
\n\
static inline int \n\
%1$s__attach(struct %1$s *obj) \n\
{ \n\
return bpf_object__attach_skeleton(obj->skeleton); \n\
} \n\
\n\
static inline void \n\
%1$s__detach(struct %1$s *obj) \n\
{ \n\
return bpf_object__detach_skeleton(obj->skeleton); \n\
} \n\
",
obj_name
);
codegen("\
\n\
\n\
static inline int \n\
%1$s__create_skeleton(struct %1$s *obj) \n\
{ \n\
struct bpf_object_skeleton *s; \n\
\n\
s = (typeof(s))calloc(1, sizeof(*s)); \n\
if (!s) \n\
return -1; \n\
obj->skeleton = s; \n\
\n\
s->sz = sizeof(*s); \n\
s->name = \"%1$s\"; \n\
s->obj = &obj->obj; \n\
",
obj_name
);
if (map_cnt) {
codegen("\
\n\
\n\
/* maps */ \n\
s->map_cnt = %zu; \n\
s->map_skel_sz = sizeof(*s->maps); \n\
s->maps = (typeof(s->maps))calloc(s->map_cnt, s->map_skel_sz);\n\
if (!s->maps) \n\
goto err; \n\
",
map_cnt
);
i = 0;
bpf_object__for_each_map(map, obj) {
ident = get_map_ident(map);
if (!ident)
continue;
codegen("\
\n\
\n\
s->maps[%zu].name = \"%s\"; \n\
s->maps[%zu].map = &obj->maps.%s; \n\
",
i, bpf_map__name(map), i, ident);
/* memory-mapped internal maps */
if (bpf_map__is_internal(map) &&
(bpf_map__def(map)->map_flags & BPF_F_MMAPABLE)) {
printf("\ts->maps[%zu].mmaped = (void **)&obj->%s;\n",
i, ident);
}
i++;
}
}
if (prog_cnt) {
codegen("\
\n\
\n\
/* programs */ \n\
s->prog_cnt = %zu; \n\
s->prog_skel_sz = sizeof(*s->progs); \n\
s->progs = (typeof(s->progs))calloc(s->prog_cnt, s->prog_skel_sz);\n\
if (!s->progs) \n\
goto err; \n\
",
prog_cnt
);
i = 0;
bpf_object__for_each_program(prog, obj) {
codegen("\
\n\
\n\
s->progs[%1$zu].name = \"%2$s\"; \n\
s->progs[%1$zu].prog = &obj->progs.%2$s;\n\
s->progs[%1$zu].link = &obj->links.%2$s;\n\
",
i, bpf_program__name(prog));
i++;
}
}
codegen("\
\n\
\n\
s->data_sz = %d; \n\
s->data = (void *)\"\\ \n\
",
file_sz);
/* embed contents of BPF object file */
for (i = 0, len = 0; i < file_sz; i++) {
int w = obj_data[i] ? 4 : 2;
len += w;
if (len > 78) {
printf("\\\n");
len = w;
}
if (!obj_data[i])
printf("\\0");
else
printf("\\x%02x", (unsigned char)obj_data[i]);
}
codegen("\
\n\
\"; \n\
\n\
return 0; \n\
err: \n\
bpf_object__destroy_skeleton(s); \n\
return -1; \n\
} \n\
\n\
#endif /* %s */ \n\
",
header_guard);
err = 0;
out:
bpf_object__close(obj);
if (obj_data)
munmap(obj_data, mmap_sz);
close(fd);
return err;
}
static int do_help(int argc, char **argv)
{
if (json_output) {
jsonw_null(json_wtr);
return 0;
}
fprintf(stderr,
"Usage: %1$s gen skeleton FILE\n"
" %1$s gen help\n"
"\n"
" " HELP_SPEC_OPTIONS "\n"
"",
bin_name);
return 0;
}
static const struct cmd cmds[] = {
{ "skeleton", do_skeleton },
{ "help", do_help },
{ 0 }
};
int do_gen(int argc, char **argv)
{
return cmd_select(cmds, argc, argv, do_help);
}

View File

@ -58,7 +58,7 @@ static int do_help(int argc, char **argv)
" %s batch file FILE\n"
" %s version\n"
"\n"
" OBJECT := { prog | map | cgroup | perf | net | feature | btf }\n"
" OBJECT := { prog | map | cgroup | perf | net | feature | btf | gen }\n"
" " HELP_SPEC_OPTIONS "\n"
"",
bin_name, bin_name, bin_name);
@ -227,6 +227,7 @@ static const struct cmd cmds[] = {
{ "net", do_net },
{ "feature", do_feature },
{ "btf", do_btf },
{ "gen", do_gen },
{ "version", do_version },
{ 0 }
};

View File

@ -42,12 +42,12 @@
#define BPF_TAG_FMT "%02hhx%02hhx%02hhx%02hhx%02hhx%02hhx%02hhx%02hhx"
#define HELP_SPEC_PROGRAM \
"PROG := { id PROG_ID | pinned FILE | tag PROG_TAG }"
"PROG := { id PROG_ID | pinned FILE | tag PROG_TAG | name PROG_NAME }"
#define HELP_SPEC_OPTIONS \
"OPTIONS := { {-j|--json} [{-p|--pretty}] | {-f|--bpffs} |\n" \
"\t {-m|--mapcompat} | {-n|--nomount} }"
#define HELP_SPEC_MAP \
"MAP := { id MAP_ID | pinned FILE }"
"MAP := { id MAP_ID | pinned FILE | name MAP_NAME }"
static const char * const prog_type_name[] = {
[BPF_PROG_TYPE_UNSPEC] = "unspec",
@ -155,6 +155,7 @@ int do_net(int argc, char **arg);
int do_tracelog(int argc, char **arg);
int do_feature(int argc, char **argv);
int do_btf(int argc, char **argv);
int do_gen(int argc, char **argv);
int parse_u32_arg(int *argc, char ***argv, __u32 *val, const char *what);
int prog_parse_fd(int *argc, char ***argv);

View File

@ -91,10 +91,66 @@ static void *alloc_value(struct bpf_map_info *info)
return malloc(info->value_size);
}
int map_parse_fd(int *argc, char ***argv)
static int map_fd_by_name(char *name, int **fds)
{
int fd;
unsigned int id = 0;
int fd, nb_fds = 0;
void *tmp;
int err;
while (true) {
struct bpf_map_info info = {};
__u32 len = sizeof(info);
err = bpf_map_get_next_id(id, &id);
if (err) {
if (errno != ENOENT) {
p_err("%s", strerror(errno));
goto err_close_fds;
}
return nb_fds;
}
fd = bpf_map_get_fd_by_id(id);
if (fd < 0) {
p_err("can't get map by id (%u): %s",
id, strerror(errno));
goto err_close_fds;
}
err = bpf_obj_get_info_by_fd(fd, &info, &len);
if (err) {
p_err("can't get map info (%u): %s",
id, strerror(errno));
goto err_close_fd;
}
if (strncmp(name, info.name, BPF_OBJ_NAME_LEN)) {
close(fd);
continue;
}
if (nb_fds > 0) {
tmp = realloc(*fds, (nb_fds + 1) * sizeof(int));
if (!tmp) {
p_err("failed to realloc");
goto err_close_fd;
}
*fds = tmp;
}
(*fds)[nb_fds++] = fd;
}
err_close_fd:
close(fd);
err_close_fds:
while (--nb_fds >= 0)
close((*fds)[nb_fds]);
return -1;
}
static int map_parse_fds(int *argc, char ***argv, int **fds)
{
if (is_prefix(**argv, "id")) {
unsigned int id;
char *endptr;
@ -108,10 +164,25 @@ int map_parse_fd(int *argc, char ***argv)
}
NEXT_ARGP();
fd = bpf_map_get_fd_by_id(id);
if (fd < 0)
(*fds)[0] = bpf_map_get_fd_by_id(id);
if ((*fds)[0] < 0) {
p_err("get map by id (%u): %s", id, strerror(errno));
return fd;
return -1;
}
return 1;
} else if (is_prefix(**argv, "name")) {
char *name;
NEXT_ARGP();
name = **argv;
if (strlen(name) > BPF_OBJ_NAME_LEN - 1) {
p_err("can't parse name");
return -1;
}
NEXT_ARGP();
return map_fd_by_name(name, fds);
} else if (is_prefix(**argv, "pinned")) {
char *path;
@ -120,13 +191,43 @@ int map_parse_fd(int *argc, char ***argv)
path = **argv;
NEXT_ARGP();
return open_obj_pinned_any(path, BPF_OBJ_MAP);
(*fds)[0] = open_obj_pinned_any(path, BPF_OBJ_MAP);
if ((*fds)[0] < 0)
return -1;
return 1;
}
p_err("expected 'id' or 'pinned', got: '%s'?", **argv);
p_err("expected 'id', 'name' or 'pinned', got: '%s'?", **argv);
return -1;
}
int map_parse_fd(int *argc, char ***argv)
{
int *fds = NULL;
int nb_fds, fd;
fds = malloc(sizeof(int));
if (!fds) {
p_err("mem alloc failed");
return -1;
}
nb_fds = map_parse_fds(argc, argv, &fds);
if (nb_fds != 1) {
if (nb_fds > 1) {
p_err("several maps match this handle");
while (nb_fds--)
close(fds[nb_fds]);
}
fd = -1;
goto exit_free;
}
fd = fds[0];
exit_free:
free(fds);
return fd;
}
int map_parse_fd_and_info(int *argc, char ***argv, void *info, __u32 *info_len)
{
int err;
@ -479,6 +580,21 @@ static int parse_elem(char **argv, struct bpf_map_info *info,
return -1;
}
static void show_map_header_json(struct bpf_map_info *info, json_writer_t *wtr)
{
jsonw_uint_field(wtr, "id", info->id);
if (info->type < ARRAY_SIZE(map_type_name))
jsonw_string_field(wtr, "type", map_type_name[info->type]);
else
jsonw_uint_field(wtr, "type", info->type);
if (*info->name)
jsonw_string_field(wtr, "name", info->name);
jsonw_name(wtr, "flags");
jsonw_printf(wtr, "%d", info->map_flags);
}
static int show_map_close_json(int fd, struct bpf_map_info *info)
{
char *memlock, *frozen_str;
@ -489,18 +605,7 @@ static int show_map_close_json(int fd, struct bpf_map_info *info)
jsonw_start_object(json_wtr);
jsonw_uint_field(json_wtr, "id", info->id);
if (info->type < ARRAY_SIZE(map_type_name))
jsonw_string_field(json_wtr, "type",
map_type_name[info->type]);
else
jsonw_uint_field(json_wtr, "type", info->type);
if (*info->name)
jsonw_string_field(json_wtr, "name", info->name);
jsonw_name(json_wtr, "flags");
jsonw_printf(json_wtr, "%d", info->map_flags);
show_map_header_json(info, json_wtr);
print_dev_json(info->ifindex, info->netns_dev, info->netns_ino);
@ -561,14 +666,8 @@ static int show_map_close_json(int fd, struct bpf_map_info *info)
return 0;
}
static int show_map_close_plain(int fd, struct bpf_map_info *info)
static void show_map_header_plain(struct bpf_map_info *info)
{
char *memlock, *frozen_str;
int frozen = 0;
memlock = get_fdinfo(fd, "memlock");
frozen_str = get_fdinfo(fd, "frozen");
printf("%u: ", info->id);
if (info->type < ARRAY_SIZE(map_type_name))
printf("%s ", map_type_name[info->type]);
@ -581,6 +680,17 @@ static int show_map_close_plain(int fd, struct bpf_map_info *info)
printf("flags 0x%x", info->map_flags);
print_dev_plain(info->ifindex, info->netns_dev, info->netns_ino);
printf("\n");
}
static int show_map_close_plain(int fd, struct bpf_map_info *info)
{
char *memlock, *frozen_str;
int frozen = 0;
memlock = get_fdinfo(fd, "memlock");
frozen_str = get_fdinfo(fd, "frozen");
show_map_header_plain(info);
printf("\tkey %uB value %uB max_entries %u",
info->key_size, info->value_size, info->max_entries);
@ -642,6 +752,50 @@ static int show_map_close_plain(int fd, struct bpf_map_info *info)
return 0;
}
static int do_show_subset(int argc, char **argv)
{
struct bpf_map_info info = {};
__u32 len = sizeof(info);
int *fds = NULL;
int nb_fds, i;
int err = -1;
fds = malloc(sizeof(int));
if (!fds) {
p_err("mem alloc failed");
return -1;
}
nb_fds = map_parse_fds(&argc, &argv, &fds);
if (nb_fds < 1)
goto exit_free;
if (json_output && nb_fds > 1)
jsonw_start_array(json_wtr); /* root array */
for (i = 0; i < nb_fds; i++) {
err = bpf_obj_get_info_by_fd(fds[i], &info, &len);
if (err) {
p_err("can't get map info: %s",
strerror(errno));
for (; i < nb_fds; i++)
close(fds[i]);
break;
}
if (json_output)
show_map_close_json(fds[i], &info);
else
show_map_close_plain(fds[i], &info);
close(fds[i]);
}
if (json_output && nb_fds > 1)
jsonw_end_array(json_wtr); /* root array */
exit_free:
free(fds);
return err;
}
static int do_show(int argc, char **argv)
{
struct bpf_map_info info = {};
@ -653,16 +807,8 @@ static int do_show(int argc, char **argv)
if (show_pinned)
build_pinned_obj_table(&map_table, BPF_OBJ_MAP);
if (argc == 2) {
fd = map_parse_fd_and_info(&argc, &argv, &info, &len);
if (fd < 0)
return -1;
if (json_output)
return show_map_close_json(fd, &info);
else
return show_map_close_plain(fd, &info);
}
if (argc == 2)
return do_show_subset(argc, argv);
if (argc)
return BAD_ARG();
@ -765,26 +911,49 @@ static int dump_map_elem(int fd, void *key, void *value,
return 0;
}
static int do_dump(int argc, char **argv)
static int maps_have_btf(int *fds, int nb_fds)
{
struct bpf_map_info info = {};
__u32 len = sizeof(info);
struct btf *btf = NULL;
int err, i;
for (i = 0; i < nb_fds; i++) {
err = bpf_obj_get_info_by_fd(fds[i], &info, &len);
if (err) {
p_err("can't get map info: %s", strerror(errno));
goto err_close;
}
err = btf__get_from_id(info.btf_id, &btf);
if (err) {
p_err("failed to get btf");
goto err_close;
}
if (!btf)
return 0;
}
return 1;
err_close:
for (; i < nb_fds; i++)
close(fds[i]);
return -1;
}
static int
map_dump(int fd, struct bpf_map_info *info, json_writer_t *wtr,
bool enable_btf, bool show_header)
{
void *key, *value, *prev_key;
unsigned int num_elems = 0;
__u32 len = sizeof(info);
json_writer_t *btf_wtr;
struct btf *btf = NULL;
int err;
int fd;
if (argc != 2)
usage();
fd = map_parse_fd_and_info(&argc, &argv, &info, &len);
if (fd < 0)
return -1;
key = malloc(info.key_size);
value = alloc_value(&info);
key = malloc(info->key_size);
value = alloc_value(info);
if (!key || !value) {
p_err("mem alloc failed");
err = -1;
@ -793,30 +962,32 @@ static int do_dump(int argc, char **argv)
prev_key = NULL;
err = btf__get_from_id(info.btf_id, &btf);
if (err) {
p_err("failed to get btf");
goto exit_free;
if (enable_btf) {
err = btf__get_from_id(info->btf_id, &btf);
if (err || !btf) {
/* enable_btf is true only if we've already checked
* that all maps have BTF information.
*/
p_err("failed to get btf");
goto exit_free;
}
}
if (json_output)
jsonw_start_array(json_wtr);
else
if (btf) {
btf_wtr = get_btf_writer();
if (!btf_wtr) {
p_info("failed to create json writer for btf. falling back to plain output");
btf__free(btf);
btf = NULL;
} else {
jsonw_start_array(btf_wtr);
}
if (wtr) {
if (show_header) {
jsonw_start_object(wtr); /* map object */
show_map_header_json(info, wtr);
jsonw_name(wtr, "elements");
}
jsonw_start_array(wtr); /* elements */
} else if (show_header) {
show_map_header_plain(info);
}
if (info.type == BPF_MAP_TYPE_REUSEPORT_SOCKARRAY &&
info.value_size != 8)
if (info->type == BPF_MAP_TYPE_REUSEPORT_SOCKARRAY &&
info->value_size != 8)
p_info("Warning: cannot read values from %s map with value_size != 8",
map_type_name[info.type]);
map_type_name[info->type]);
while (true) {
err = bpf_map_get_next_key(fd, prev_key, key);
if (err) {
@ -824,15 +995,14 @@ static int do_dump(int argc, char **argv)
err = 0;
break;
}
num_elems += dump_map_elem(fd, key, value, &info, btf, btf_wtr);
num_elems += dump_map_elem(fd, key, value, info, btf, wtr);
prev_key = key;
}
if (json_output)
jsonw_end_array(json_wtr);
else if (btf) {
jsonw_end_array(btf_wtr);
jsonw_destroy(&btf_wtr);
if (wtr) {
jsonw_end_array(wtr); /* elements */
if (show_header)
jsonw_end_object(wtr); /* map object */
} else {
printf("Found %u element%s\n", num_elems,
num_elems != 1 ? "s" : "");
@ -847,6 +1017,72 @@ static int do_dump(int argc, char **argv)
return err;
}
static int do_dump(int argc, char **argv)
{
json_writer_t *wtr = NULL, *btf_wtr = NULL;
struct bpf_map_info info = {};
int nb_fds, i = 0, btf = 0;
__u32 len = sizeof(info);
int *fds = NULL;
int err = -1;
if (argc != 2)
usage();
fds = malloc(sizeof(int));
if (!fds) {
p_err("mem alloc failed");
return -1;
}
nb_fds = map_parse_fds(&argc, &argv, &fds);
if (nb_fds < 1)
goto exit_free;
if (json_output) {
wtr = json_wtr;
} else {
btf = maps_have_btf(fds, nb_fds);
if (btf < 0)
goto exit_close;
if (btf) {
btf_wtr = get_btf_writer();
if (btf_wtr) {
wtr = btf_wtr;
} else {
p_info("failed to create json writer for btf. falling back to plain output");
btf = 0;
}
}
}
if (wtr && nb_fds > 1)
jsonw_start_array(wtr); /* root array */
for (i = 0; i < nb_fds; i++) {
if (bpf_obj_get_info_by_fd(fds[i], &info, &len)) {
p_err("can't get map info: %s", strerror(errno));
break;
}
err = map_dump(fds[i], &info, wtr, btf, nb_fds > 1);
if (!wtr && i != nb_fds - 1)
printf("\n");
if (err)
break;
close(fds[i]);
}
if (wtr && nb_fds > 1)
jsonw_end_array(wtr); /* root array */
if (btf)
jsonw_destroy(&btf_wtr);
exit_close:
for (; i < nb_fds; i++)
close(fds[i]);
exit_free:
free(fds);
return err;
}
static int alloc_key_value(struct bpf_map_info *info, void **key, void **value)
{
*key = NULL;

View File

@ -18,6 +18,7 @@
#include <bpf.h>
#include <nlattr.h>
#include "libbpf_internal.h"
#include "main.h"
#include "netlink_dumper.h"

View File

@ -25,6 +25,11 @@
#include "main.h"
#include "xlated_dumper.h"
enum dump_mode {
DUMP_JITED,
DUMP_XLATED,
};
static const char * const attach_type_strings[] = {
[BPF_SK_SKB_STREAM_PARSER] = "stream_parser",
[BPF_SK_SKB_STREAM_VERDICT] = "stream_verdict",
@ -77,11 +82,12 @@ static void print_boot_time(__u64 nsecs, char *buf, unsigned int size)
strftime(buf, size, "%FT%T%z", &load_tm);
}
static int prog_fd_by_tag(unsigned char *tag)
static int prog_fd_by_nametag(void *nametag, int **fds, bool tag)
{
unsigned int id = 0;
int fd, nb_fds = 0;
void *tmp;
int err;
int fd;
while (true) {
struct bpf_prog_info info = {};
@ -89,36 +95,54 @@ static int prog_fd_by_tag(unsigned char *tag)
err = bpf_prog_get_next_id(id, &id);
if (err) {
p_err("%s", strerror(errno));
return -1;
if (errno != ENOENT) {
p_err("%s", strerror(errno));
goto err_close_fds;
}
return nb_fds;
}
fd = bpf_prog_get_fd_by_id(id);
if (fd < 0) {
p_err("can't get prog by id (%u): %s",
id, strerror(errno));
return -1;
goto err_close_fds;
}
err = bpf_obj_get_info_by_fd(fd, &info, &len);
if (err) {
p_err("can't get prog info (%u): %s",
id, strerror(errno));
close(fd);
return -1;
goto err_close_fd;
}
if (!memcmp(tag, info.tag, BPF_TAG_SIZE))
return fd;
if ((tag && memcmp(nametag, info.tag, BPF_TAG_SIZE)) ||
(!tag && strncmp(nametag, info.name, BPF_OBJ_NAME_LEN))) {
close(fd);
continue;
}
close(fd);
if (nb_fds > 0) {
tmp = realloc(*fds, (nb_fds + 1) * sizeof(int));
if (!tmp) {
p_err("failed to realloc");
goto err_close_fd;
}
*fds = tmp;
}
(*fds)[nb_fds++] = fd;
}
err_close_fd:
close(fd);
err_close_fds:
while (--nb_fds >= 0)
close((*fds)[nb_fds]);
return -1;
}
int prog_parse_fd(int *argc, char ***argv)
static int prog_parse_fds(int *argc, char ***argv, int **fds)
{
int fd;
if (is_prefix(**argv, "id")) {
unsigned int id;
char *endptr;
@ -132,10 +156,12 @@ int prog_parse_fd(int *argc, char ***argv)
}
NEXT_ARGP();
fd = bpf_prog_get_fd_by_id(id);
if (fd < 0)
(*fds)[0] = bpf_prog_get_fd_by_id(id);
if ((*fds)[0] < 0) {
p_err("get by id (%u): %s", id, strerror(errno));
return fd;
return -1;
}
return 1;
} else if (is_prefix(**argv, "tag")) {
unsigned char tag[BPF_TAG_SIZE];
@ -149,7 +175,20 @@ int prog_parse_fd(int *argc, char ***argv)
}
NEXT_ARGP();
return prog_fd_by_tag(tag);
return prog_fd_by_nametag(tag, fds, true);
} else if (is_prefix(**argv, "name")) {
char *name;
NEXT_ARGP();
name = **argv;
if (strlen(name) > BPF_OBJ_NAME_LEN - 1) {
p_err("can't parse name");
return -1;
}
NEXT_ARGP();
return prog_fd_by_nametag(name, fds, false);
} else if (is_prefix(**argv, "pinned")) {
char *path;
@ -158,13 +197,43 @@ int prog_parse_fd(int *argc, char ***argv)
path = **argv;
NEXT_ARGP();
return open_obj_pinned_any(path, BPF_OBJ_PROG);
(*fds)[0] = open_obj_pinned_any(path, BPF_OBJ_PROG);
if ((*fds)[0] < 0)
return -1;
return 1;
}
p_err("expected 'id', 'tag' or 'pinned', got: '%s'?", **argv);
p_err("expected 'id', 'tag', 'name' or 'pinned', got: '%s'?", **argv);
return -1;
}
int prog_parse_fd(int *argc, char ***argv)
{
int *fds = NULL;
int nb_fds, fd;
fds = malloc(sizeof(int));
if (!fds) {
p_err("mem alloc failed");
return -1;
}
nb_fds = prog_parse_fds(argc, argv, &fds);
if (nb_fds != 1) {
if (nb_fds > 1) {
p_err("several programs match this handle");
while (nb_fds--)
close(fds[nb_fds]);
}
fd = -1;
goto exit_free;
}
fd = fds[0];
exit_free:
free(fds);
return fd;
}
static void show_prog_maps(int fd, u32 num_maps)
{
struct bpf_prog_info info = {};
@ -194,11 +263,8 @@ static void show_prog_maps(int fd, u32 num_maps)
}
}
static void print_prog_json(struct bpf_prog_info *info, int fd)
static void print_prog_header_json(struct bpf_prog_info *info)
{
char *memlock;
jsonw_start_object(json_wtr);
jsonw_uint_field(json_wtr, "id", info->id);
if (info->type < ARRAY_SIZE(prog_type_name))
jsonw_string_field(json_wtr, "type",
@ -219,7 +285,14 @@ static void print_prog_json(struct bpf_prog_info *info, int fd)
jsonw_uint_field(json_wtr, "run_time_ns", info->run_time_ns);
jsonw_uint_field(json_wtr, "run_cnt", info->run_cnt);
}
}
static void print_prog_json(struct bpf_prog_info *info, int fd)
{
char *memlock;
jsonw_start_object(json_wtr);
print_prog_header_json(info);
print_dev_json(info->ifindex, info->netns_dev, info->netns_ino);
if (info->load_time) {
@ -268,10 +341,8 @@ static void print_prog_json(struct bpf_prog_info *info, int fd)
jsonw_end_object(json_wtr);
}
static void print_prog_plain(struct bpf_prog_info *info, int fd)
static void print_prog_header_plain(struct bpf_prog_info *info)
{
char *memlock;
printf("%u: ", info->id);
if (info->type < ARRAY_SIZE(prog_type_name))
printf("%s ", prog_type_name[info->type]);
@ -289,6 +360,13 @@ static void print_prog_plain(struct bpf_prog_info *info, int fd)
printf(" run_time_ns %lld run_cnt %lld",
info->run_time_ns, info->run_cnt);
printf("\n");
}
static void print_prog_plain(struct bpf_prog_info *info, int fd)
{
char *memlock;
print_prog_header_plain(info);
if (info->load_time) {
char buf[32];
@ -349,6 +427,40 @@ static int show_prog(int fd)
return 0;
}
static int do_show_subset(int argc, char **argv)
{
int *fds = NULL;
int nb_fds, i;
int err = -1;
fds = malloc(sizeof(int));
if (!fds) {
p_err("mem alloc failed");
return -1;
}
nb_fds = prog_parse_fds(&argc, &argv, &fds);
if (nb_fds < 1)
goto exit_free;
if (json_output && nb_fds > 1)
jsonw_start_array(json_wtr); /* root array */
for (i = 0; i < nb_fds; i++) {
err = show_prog(fds[i]);
if (err) {
for (; i < nb_fds; i++)
close(fds[i]);
break;
}
close(fds[i]);
}
if (json_output && nb_fds > 1)
jsonw_end_array(json_wtr); /* root array */
exit_free:
free(fds);
return err;
}
static int do_show(int argc, char **argv)
{
__u32 id = 0;
@ -358,15 +470,8 @@ static int do_show(int argc, char **argv)
if (show_pinned)
build_pinned_obj_table(&prog_table, BPF_OBJ_PROG);
if (argc == 2) {
fd = prog_parse_fd(&argc, &argv);
if (fd < 0)
return -1;
err = show_prog(fd);
close(fd);
return err;
}
if (argc == 2)
return do_show_subset(argc, argv);
if (argc)
return BAD_ARG();
@ -408,101 +513,32 @@ static int do_show(int argc, char **argv)
return err;
}
static int do_dump(int argc, char **argv)
static int
prog_dump(struct bpf_prog_info *info, enum dump_mode mode,
char *filepath, bool opcodes, bool visual, bool linum)
{
struct bpf_prog_info_linear *info_linear;
struct bpf_prog_linfo *prog_linfo = NULL;
enum {DUMP_JITED, DUMP_XLATED} mode;
const char *disasm_opt = NULL;
struct bpf_prog_info *info;
struct dump_data dd = {};
void *func_info = NULL;
struct btf *btf = NULL;
char *filepath = NULL;
bool opcodes = false;
bool visual = false;
char func_sig[1024];
unsigned char *buf;
bool linum = false;
__u32 member_len;
__u64 arrays;
ssize_t n;
int fd;
if (is_prefix(*argv, "jited")) {
if (disasm_init())
return -1;
mode = DUMP_JITED;
} else if (is_prefix(*argv, "xlated")) {
mode = DUMP_XLATED;
} else {
p_err("expected 'xlated' or 'jited', got: %s", *argv);
return -1;
}
NEXT_ARG();
if (argc < 2)
usage();
fd = prog_parse_fd(&argc, &argv);
if (fd < 0)
return -1;
if (is_prefix(*argv, "file")) {
NEXT_ARG();
if (!argc) {
p_err("expected file path");
return -1;
}
filepath = *argv;
NEXT_ARG();
} else if (is_prefix(*argv, "opcodes")) {
opcodes = true;
NEXT_ARG();
} else if (is_prefix(*argv, "visual")) {
visual = true;
NEXT_ARG();
} else if (is_prefix(*argv, "linum")) {
linum = true;
NEXT_ARG();
}
if (argc) {
usage();
return -1;
}
if (mode == DUMP_JITED)
arrays = 1UL << BPF_PROG_INFO_JITED_INSNS;
else
arrays = 1UL << BPF_PROG_INFO_XLATED_INSNS;
arrays |= 1UL << BPF_PROG_INFO_JITED_KSYMS;
arrays |= 1UL << BPF_PROG_INFO_JITED_FUNC_LENS;
arrays |= 1UL << BPF_PROG_INFO_FUNC_INFO;
arrays |= 1UL << BPF_PROG_INFO_LINE_INFO;
arrays |= 1UL << BPF_PROG_INFO_JITED_LINE_INFO;
info_linear = bpf_program__get_prog_info_linear(fd, arrays);
close(fd);
if (IS_ERR_OR_NULL(info_linear)) {
p_err("can't get prog info: %s", strerror(errno));
return -1;
}
info = &info_linear->info;
if (mode == DUMP_JITED) {
if (info->jited_prog_len == 0 || !info->jited_prog_insns) {
p_info("no instructions returned");
goto err_free;
return -1;
}
buf = (unsigned char *)(info->jited_prog_insns);
member_len = info->jited_prog_len;
} else { /* DUMP_XLATED */
if (info->xlated_prog_len == 0) {
p_err("error retrieving insn dump: kernel.kptr_restrict set?");
goto err_free;
return -1;
}
buf = (unsigned char *)info->xlated_prog_insns;
member_len = info->xlated_prog_len;
@ -510,7 +546,7 @@ static int do_dump(int argc, char **argv)
if (info->btf_id && btf__get_from_id(info->btf_id, &btf)) {
p_err("failed to get btf");
goto err_free;
return -1;
}
func_info = (void *)info->func_info;
@ -526,7 +562,7 @@ static int do_dump(int argc, char **argv)
if (fd < 0) {
p_err("can't open file %s: %s", filepath,
strerror(errno));
goto err_free;
return -1;
}
n = write(fd, buf, member_len);
@ -534,7 +570,7 @@ static int do_dump(int argc, char **argv)
if (n != member_len) {
p_err("error writing output file: %s",
n < 0 ? strerror(errno) : "short write");
goto err_free;
return -1;
}
if (json_output)
@ -548,7 +584,7 @@ static int do_dump(int argc, char **argv)
info->netns_ino,
&disasm_opt);
if (!name)
goto err_free;
return -1;
}
if (info->nr_jited_func_lens && info->jited_func_lens) {
@ -643,12 +679,130 @@ static int do_dump(int argc, char **argv)
kernel_syms_destroy(&dd);
}
free(info_linear);
return 0;
}
err_free:
free(info_linear);
return -1;
static int do_dump(int argc, char **argv)
{
struct bpf_prog_info_linear *info_linear;
char *filepath = NULL;
bool opcodes = false;
bool visual = false;
enum dump_mode mode;
bool linum = false;
int *fds = NULL;
int nb_fds, i = 0;
int err = -1;
__u64 arrays;
if (is_prefix(*argv, "jited")) {
if (disasm_init())
return -1;
mode = DUMP_JITED;
} else if (is_prefix(*argv, "xlated")) {
mode = DUMP_XLATED;
} else {
p_err("expected 'xlated' or 'jited', got: %s", *argv);
return -1;
}
NEXT_ARG();
if (argc < 2)
usage();
fds = malloc(sizeof(int));
if (!fds) {
p_err("mem alloc failed");
return -1;
}
nb_fds = prog_parse_fds(&argc, &argv, &fds);
if (nb_fds < 1)
goto exit_free;
if (is_prefix(*argv, "file")) {
NEXT_ARG();
if (!argc) {
p_err("expected file path");
goto exit_close;
}
if (nb_fds > 1) {
p_err("several programs matched");
goto exit_close;
}
filepath = *argv;
NEXT_ARG();
} else if (is_prefix(*argv, "opcodes")) {
opcodes = true;
NEXT_ARG();
} else if (is_prefix(*argv, "visual")) {
if (nb_fds > 1) {
p_err("several programs matched");
goto exit_close;
}
visual = true;
NEXT_ARG();
} else if (is_prefix(*argv, "linum")) {
linum = true;
NEXT_ARG();
}
if (argc) {
usage();
goto exit_close;
}
if (mode == DUMP_JITED)
arrays = 1UL << BPF_PROG_INFO_JITED_INSNS;
else
arrays = 1UL << BPF_PROG_INFO_XLATED_INSNS;
arrays |= 1UL << BPF_PROG_INFO_JITED_KSYMS;
arrays |= 1UL << BPF_PROG_INFO_JITED_FUNC_LENS;
arrays |= 1UL << BPF_PROG_INFO_FUNC_INFO;
arrays |= 1UL << BPF_PROG_INFO_LINE_INFO;
arrays |= 1UL << BPF_PROG_INFO_JITED_LINE_INFO;
if (json_output && nb_fds > 1)
jsonw_start_array(json_wtr); /* root array */
for (i = 0; i < nb_fds; i++) {
info_linear = bpf_program__get_prog_info_linear(fds[i], arrays);
if (IS_ERR_OR_NULL(info_linear)) {
p_err("can't get prog info: %s", strerror(errno));
break;
}
if (json_output && nb_fds > 1) {
jsonw_start_object(json_wtr); /* prog object */
print_prog_header_json(&info_linear->info);
jsonw_name(json_wtr, "insns");
} else if (nb_fds > 1) {
print_prog_header_plain(&info_linear->info);
}
err = prog_dump(&info_linear->info, mode, filepath, opcodes,
visual, linum);
if (json_output && nb_fds > 1)
jsonw_end_object(json_wtr); /* prog object */
else if (i != nb_fds - 1 && nb_fds > 1)
printf("\n");
free(info_linear);
if (err)
break;
close(fds[i]);
}
if (json_output && nb_fds > 1)
jsonw_end_array(json_wtr); /* root array */
exit_close:
for (; i < nb_fds; i++)
close(fds[i]);
exit_free:
free(fds);
return err;
}
static int do_pin(int argc, char **argv)

View File

@ -2,6 +2,8 @@
#include "../../arch/arm64/include/uapi/asm/bpf_perf_event.h"
#elif defined(__s390__)
#include "../../arch/s390/include/uapi/asm/bpf_perf_event.h"
#elif defined(__riscv)
#include "../../arch/riscv/include/uapi/asm/bpf_perf_event.h"
#else
#include <uapi/asm-generic/bpf_perf_event.h>
#endif

View File

@ -231,6 +231,11 @@ enum bpf_attach_type {
* When children program makes decision (like picking TCP CA or sock bind)
* parent program has a chance to override it.
*
* With BPF_F_ALLOW_MULTI a new program is added to the end of the list of
* programs for a cgroup. Though it's possible to replace an old program at
* any position by also specifying BPF_F_REPLACE flag and position itself in
* replace_bpf_fd attribute. Old program at this position will be released.
*
* A cgroup with MULTI or OVERRIDE flag allows any attach flags in sub-cgroups.
* A cgroup with NONE doesn't allow any programs in sub-cgroups.
* Ex1:
@ -249,6 +254,7 @@ enum bpf_attach_type {
*/
#define BPF_F_ALLOW_OVERRIDE (1U << 0)
#define BPF_F_ALLOW_MULTI (1U << 1)
#define BPF_F_REPLACE (1U << 2)
/* If BPF_F_STRICT_ALIGNMENT is used in BPF_PROG_LOAD command, the
* verifier will perform strict alignment checking as if the kernel
@ -442,6 +448,10 @@ union bpf_attr {
__u32 attach_bpf_fd; /* eBPF program to attach */
__u32 attach_type;
__u32 attach_flags;
__u32 replace_bpf_fd; /* previously attached eBPF
* program to replace if
* BPF_F_REPLACE is used
*/
};
struct { /* anonymous struct used by BPF_PROG_TEST_RUN command */

View File

@ -22,9 +22,9 @@ struct btf_header {
};
/* Max # of type identifier */
#define BTF_MAX_TYPE 0x0000ffff
#define BTF_MAX_TYPE 0x000fffff
/* Max offset into the string section */
#define BTF_MAX_NAME_OFFSET 0x0000ffff
#define BTF_MAX_NAME_OFFSET 0x00ffffff
/* Max # of struct/union/enum members or func args */
#define BTF_MAX_VLEN 0xffff
@ -142,7 +142,8 @@ struct btf_param {
enum {
BTF_VAR_STATIC = 0,
BTF_VAR_GLOBAL_ALLOCATED,
BTF_VAR_GLOBAL_ALLOCATED = 1,
BTF_VAR_GLOBAL_EXTERN = 2,
};
/* BTF_KIND_VAR is followed by a single "struct btf_var" to describe

View File

@ -56,8 +56,8 @@ ifndef VERBOSE
endif
FEATURE_USER = .libbpf
FEATURE_TESTS = libelf libelf-mmap bpf reallocarray
FEATURE_DISPLAY = libelf bpf
FEATURE_TESTS = libelf libelf-mmap zlib bpf reallocarray
FEATURE_DISPLAY = libelf zlib bpf
INCLUDES = -I. -I$(srctree)/tools/include -I$(srctree)/tools/arch/$(ARCH)/include/uapi -I$(srctree)/tools/include/uapi
FEATURE_CHECK_CFLAGS-bpf = $(INCLUDES)
@ -147,6 +147,7 @@ TAGS_PROG := $(if $(shell which etags 2>/dev/null),etags,ctags)
GLOBAL_SYM_COUNT = $(shell readelf -s --wide $(BPF_IN_SHARED) | \
cut -d "@" -f1 | sed 's/_v[0-9]_[0-9]_[0-9].*//' | \
sed 's/\[.*\]//' | \
awk '/GLOBAL/ && /DEFAULT/ && !/UND/ {print $$NF}' | \
sort -u | wc -l)
VERSIONED_SYM_COUNT = $(shell readelf -s --wide $(OUTPUT)libbpf.so | \
@ -159,7 +160,7 @@ all: fixdep
all_cmd: $(CMD_TARGETS) check
$(BPF_IN_SHARED): force elfdep bpfdep bpf_helper_defs.h
$(BPF_IN_SHARED): force elfdep zdep bpfdep bpf_helper_defs.h
@(test -f ../../include/uapi/linux/bpf.h -a -f ../../../include/uapi/linux/bpf.h && ( \
(diff -B ../../include/uapi/linux/bpf.h ../../../include/uapi/linux/bpf.h >/dev/null) || \
echo "Warning: Kernel ABI header at 'tools/include/uapi/linux/bpf.h' differs from latest version at 'include/uapi/linux/bpf.h'" >&2 )) || true
@ -177,7 +178,7 @@ $(BPF_IN_SHARED): force elfdep bpfdep bpf_helper_defs.h
echo "Warning: Kernel ABI header at 'tools/include/uapi/linux/if_xdp.h' differs from latest version at 'include/uapi/linux/if_xdp.h'" >&2 )) || true
$(Q)$(MAKE) $(build)=libbpf OUTPUT=$(SHARED_OBJDIR) CFLAGS="$(CFLAGS) $(SHLIB_FLAGS)"
$(BPF_IN_STATIC): force elfdep bpfdep bpf_helper_defs.h
$(BPF_IN_STATIC): force elfdep zdep bpfdep bpf_helper_defs.h
$(Q)$(MAKE) $(build)=libbpf OUTPUT=$(STATIC_OBJDIR)
bpf_helper_defs.h: $(srctree)/tools/include/uapi/linux/bpf.h
@ -189,7 +190,7 @@ $(OUTPUT)libbpf.so: $(OUTPUT)libbpf.so.$(LIBBPF_VERSION)
$(OUTPUT)libbpf.so.$(LIBBPF_VERSION): $(BPF_IN_SHARED)
$(QUIET_LINK)$(CC) $(LDFLAGS) \
--shared -Wl,-soname,libbpf.so.$(LIBBPF_MAJOR_VERSION) \
-Wl,--version-script=$(VERSION_SCRIPT) $^ -lelf -o $@
-Wl,--version-script=$(VERSION_SCRIPT) $^ -lelf -lz -o $@
@ln -sf $(@F) $(OUTPUT)libbpf.so
@ln -sf $(@F) $(OUTPUT)libbpf.so.$(LIBBPF_MAJOR_VERSION)
@ -213,6 +214,7 @@ check_abi: $(OUTPUT)libbpf.so
"versioned in $(VERSION_SCRIPT)." >&2; \
readelf -s --wide $(BPF_IN_SHARED) | \
cut -d "@" -f1 | sed 's/_v[0-9]_[0-9]_[0-9].*//' | \
sed 's/\[.*\]//' | \
awk '/GLOBAL/ && /DEFAULT/ && !/UND/ {print $$NF}'| \
sort -u > $(OUTPUT)libbpf_global_syms.tmp; \
readelf -s --wide $(OUTPUT)libbpf.so | \
@ -249,6 +251,7 @@ install_headers: bpf_helper_defs.h
$(call do_install,libbpf.h,$(prefix)/include/bpf,644); \
$(call do_install,btf.h,$(prefix)/include/bpf,644); \
$(call do_install,libbpf_util.h,$(prefix)/include/bpf,644); \
$(call do_install,libbpf_common.h,$(prefix)/include/bpf,644); \
$(call do_install,xsk.h,$(prefix)/include/bpf,644); \
$(call do_install,bpf_helpers.h,$(prefix)/include/bpf,644); \
$(call do_install,bpf_helper_defs.h,$(prefix)/include/bpf,644); \
@ -277,12 +280,15 @@ clean:
PHONY += force elfdep bpfdep cscope tags
PHONY += force elfdep zdep bpfdep cscope tags
force:
elfdep:
@if [ "$(feature-libelf)" != "1" ]; then echo "No libelf found"; exit 1 ; fi
zdep:
@if [ "$(feature-zlib)" != "1" ]; then echo "No zlib found"; exit 1 ; fi
bpfdep:
@if [ "$(feature-bpf)" != "1" ]; then echo "BPF API too old"; exit 1 ; fi

View File

@ -466,14 +466,29 @@ int bpf_obj_get(const char *pathname)
int bpf_prog_attach(int prog_fd, int target_fd, enum bpf_attach_type type,
unsigned int flags)
{
DECLARE_LIBBPF_OPTS(bpf_prog_attach_opts, opts,
.flags = flags,
);
return bpf_prog_attach_xattr(prog_fd, target_fd, type, &opts);
}
int bpf_prog_attach_xattr(int prog_fd, int target_fd,
enum bpf_attach_type type,
const struct bpf_prog_attach_opts *opts)
{
union bpf_attr attr;
if (!OPTS_VALID(opts, bpf_prog_attach_opts))
return -EINVAL;
memset(&attr, 0, sizeof(attr));
attr.target_fd = target_fd;
attr.attach_bpf_fd = prog_fd;
attr.attach_type = type;
attr.attach_flags = flags;
attr.attach_flags = OPTS_GET(opts, flags, 0);
attr.replace_bpf_fd = OPTS_GET(opts, replace_prog_fd, 0);
return sys_bpf(BPF_PROG_ATTACH, &attr, sizeof(attr));
}

View File

@ -28,14 +28,12 @@
#include <stddef.h>
#include <stdint.h>
#include "libbpf_common.h"
#ifdef __cplusplus
extern "C" {
#endif
#ifndef LIBBPF_API
#define LIBBPF_API __attribute__((visibility("default")))
#endif
struct bpf_create_map_attr {
const char *name;
enum bpf_map_type map_type;
@ -128,8 +126,19 @@ LIBBPF_API int bpf_map_get_next_key(int fd, const void *key, void *next_key);
LIBBPF_API int bpf_map_freeze(int fd);
LIBBPF_API int bpf_obj_pin(int fd, const char *pathname);
LIBBPF_API int bpf_obj_get(const char *pathname);
struct bpf_prog_attach_opts {
size_t sz; /* size of this struct for forward/backward compatibility */
unsigned int flags;
int replace_prog_fd;
};
#define bpf_prog_attach_opts__last_field replace_prog_fd
LIBBPF_API int bpf_prog_attach(int prog_fd, int attachable_fd,
enum bpf_attach_type type, unsigned int flags);
LIBBPF_API int bpf_prog_attach_xattr(int prog_fd, int attachable_fd,
enum bpf_attach_type type,
const struct bpf_prog_attach_opts *opts);
LIBBPF_API int bpf_prog_detach(int attachable_fd, enum bpf_attach_type type);
LIBBPF_API int bpf_prog_detach2(int prog_fd, int attachable_fd,
enum bpf_attach_type type);

View File

@ -25,6 +25,9 @@
#ifndef __always_inline
#define __always_inline __attribute__((always_inline))
#endif
#ifndef __weak
#define __weak __attribute__((weak))
#endif
/*
* Helper structure used by eBPF C program
@ -44,4 +47,12 @@ enum libbpf_pin_type {
LIBBPF_PIN_BY_NAME,
};
enum libbpf_tristate {
TRI_NO = 0,
TRI_YES = 1,
TRI_MODULE = 2,
};
#define __kconfig __attribute__((section(".kconfig")))
#endif

View File

@ -278,6 +278,45 @@ __s64 btf__resolve_size(const struct btf *btf, __u32 type_id)
return nelems * size;
}
int btf__align_of(const struct btf *btf, __u32 id)
{
const struct btf_type *t = btf__type_by_id(btf, id);
__u16 kind = btf_kind(t);
switch (kind) {
case BTF_KIND_INT:
case BTF_KIND_ENUM:
return min(sizeof(void *), t->size);
case BTF_KIND_PTR:
return sizeof(void *);
case BTF_KIND_TYPEDEF:
case BTF_KIND_VOLATILE:
case BTF_KIND_CONST:
case BTF_KIND_RESTRICT:
return btf__align_of(btf, t->type);
case BTF_KIND_ARRAY:
return btf__align_of(btf, btf_array(t)->type);
case BTF_KIND_STRUCT:
case BTF_KIND_UNION: {
const struct btf_member *m = btf_members(t);
__u16 vlen = btf_vlen(t);
int i, max_align = 1, align;
for (i = 0; i < vlen; i++, m++) {
align = btf__align_of(btf, m->type);
if (align <= 0)
return align;
max_align = max(max_align, align);
}
return max_align;
}
default:
pr_warn("unsupported BTF_KIND:%u\n", btf_kind(t));
return 0;
}
}
int btf__resolve_type(const struct btf *btf, __u32 type_id)
{
const struct btf_type *t;
@ -539,6 +578,12 @@ static int btf_fixup_datasec(struct bpf_object *obj, struct btf *btf,
return -ENOENT;
}
/* .extern datasec size and var offsets were set correctly during
* extern collection step, so just skip straight to sorting variables
*/
if (t->size)
goto sort_vars;
ret = bpf_object__section_size(obj, name, &size);
if (ret || !size || (t->size && t->size != size)) {
pr_debug("Invalid size for section %s: %u bytes\n", name, size);
@ -575,7 +620,8 @@ static int btf_fixup_datasec(struct bpf_object *obj, struct btf *btf,
vsi->offset = off;
}
qsort(t + 1, vars, sizeof(*vsi), compare_vsi_off);
sort_vars:
qsort(btf_var_secinfos(t), vars, sizeof(*vsi), compare_vsi_off);
return 0;
}

View File

@ -8,14 +8,12 @@
#include <linux/btf.h>
#include <linux/types.h>
#include "libbpf_common.h"
#ifdef __cplusplus
extern "C" {
#endif
#ifndef LIBBPF_API
#define LIBBPF_API __attribute__((visibility("default")))
#endif
#define BTF_ELF_SEC ".BTF"
#define BTF_EXT_ELF_SEC ".BTF.ext"
#define MAPS_ELF_SEC ".maps"
@ -79,6 +77,7 @@ LIBBPF_API const struct btf_type *btf__type_by_id(const struct btf *btf,
__u32 id);
LIBBPF_API __s64 btf__resolve_size(const struct btf *btf, __u32 type_id);
LIBBPF_API int btf__resolve_type(const struct btf *btf, __u32 type_id);
LIBBPF_API int btf__align_of(const struct btf *btf, __u32 id);
LIBBPF_API int btf__fd(const struct btf *btf);
LIBBPF_API const void *btf__get_raw_data(const struct btf *btf, __u32 *size);
LIBBPF_API const char *btf__name_by_offset(const struct btf *btf, __u32 offset);
@ -127,6 +126,28 @@ LIBBPF_API void btf_dump__free(struct btf_dump *d);
LIBBPF_API int btf_dump__dump_type(struct btf_dump *d, __u32 id);
struct btf_dump_emit_type_decl_opts {
/* size of this struct, for forward/backward compatiblity */
size_t sz;
/* optional field name for type declaration, e.g.:
* - struct my_struct <FNAME>
* - void (*<FNAME>)(int)
* - char (*<FNAME>)[123]
*/
const char *field_name;
/* extra indentation level (in number of tabs) to emit for multi-line
* type declarations (e.g., anonymous struct); applies for lines
* starting from the second one (first line is assumed to have
* necessary indentation already
*/
int indent_level;
};
#define btf_dump_emit_type_decl_opts__last_field indent_level
LIBBPF_API int
btf_dump__emit_type_decl(struct btf_dump *d, __u32 id,
const struct btf_dump_emit_type_decl_opts *opts);
/*
* A set of helpers for easier BTF types handling
*/

View File

@ -116,6 +116,8 @@ static void btf_dump_printf(const struct btf_dump *d, const char *fmt, ...)
va_end(args);
}
static int btf_dump_mark_referenced(struct btf_dump *d);
struct btf_dump *btf_dump__new(const struct btf *btf,
const struct btf_ext *btf_ext,
const struct btf_dump_opts *opts,
@ -137,18 +139,39 @@ struct btf_dump *btf_dump__new(const struct btf *btf,
if (IS_ERR(d->type_names)) {
err = PTR_ERR(d->type_names);
d->type_names = NULL;
btf_dump__free(d);
return ERR_PTR(err);
}
d->ident_names = hashmap__new(str_hash_fn, str_equal_fn, NULL);
if (IS_ERR(d->ident_names)) {
err = PTR_ERR(d->ident_names);
d->ident_names = NULL;
btf_dump__free(d);
return ERR_PTR(err);
goto err;
}
d->type_states = calloc(1 + btf__get_nr_types(d->btf),
sizeof(d->type_states[0]));
if (!d->type_states) {
err = -ENOMEM;
goto err;
}
d->cached_names = calloc(1 + btf__get_nr_types(d->btf),
sizeof(d->cached_names[0]));
if (!d->cached_names) {
err = -ENOMEM;
goto err;
}
/* VOID is special */
d->type_states[0].order_state = ORDERED;
d->type_states[0].emit_state = EMITTED;
/* eagerly determine referenced types for anon enums */
err = btf_dump_mark_referenced(d);
if (err)
goto err;
return d;
err:
btf_dump__free(d);
return ERR_PTR(err);
}
void btf_dump__free(struct btf_dump *d)
@ -175,7 +198,6 @@ void btf_dump__free(struct btf_dump *d)
free(d);
}
static int btf_dump_mark_referenced(struct btf_dump *d);
static int btf_dump_order_type(struct btf_dump *d, __u32 id, bool through_ptr);
static void btf_dump_emit_type(struct btf_dump *d, __u32 id, __u32 cont_id);
@ -202,27 +224,6 @@ int btf_dump__dump_type(struct btf_dump *d, __u32 id)
if (id > btf__get_nr_types(d->btf))
return -EINVAL;
/* type states are lazily allocated, as they might not be needed */
if (!d->type_states) {
d->type_states = calloc(1 + btf__get_nr_types(d->btf),
sizeof(d->type_states[0]));
if (!d->type_states)
return -ENOMEM;
d->cached_names = calloc(1 + btf__get_nr_types(d->btf),
sizeof(d->cached_names[0]));
if (!d->cached_names)
return -ENOMEM;
/* VOID is special */
d->type_states[0].order_state = ORDERED;
d->type_states[0].emit_state = EMITTED;
/* eagerly determine referenced types for anon enums */
err = btf_dump_mark_referenced(d);
if (err)
return err;
}
d->emit_queue_cnt = 0;
err = btf_dump_order_type(d, id, false);
if (err < 0)
@ -752,41 +753,6 @@ static void btf_dump_emit_type(struct btf_dump *d, __u32 id, __u32 cont_id)
}
}
static int btf_align_of(const struct btf *btf, __u32 id)
{
const struct btf_type *t = btf__type_by_id(btf, id);
__u16 kind = btf_kind(t);
switch (kind) {
case BTF_KIND_INT:
case BTF_KIND_ENUM:
return min(sizeof(void *), t->size);
case BTF_KIND_PTR:
return sizeof(void *);
case BTF_KIND_TYPEDEF:
case BTF_KIND_VOLATILE:
case BTF_KIND_CONST:
case BTF_KIND_RESTRICT:
return btf_align_of(btf, t->type);
case BTF_KIND_ARRAY:
return btf_align_of(btf, btf_array(t)->type);
case BTF_KIND_STRUCT:
case BTF_KIND_UNION: {
const struct btf_member *m = btf_members(t);
__u16 vlen = btf_vlen(t);
int i, align = 1;
for (i = 0; i < vlen; i++, m++)
align = max(align, btf_align_of(btf, m->type));
return align;
}
default:
pr_warn("unsupported BTF_KIND:%u\n", btf_kind(t));
return 1;
}
}
static bool btf_is_struct_packed(const struct btf *btf, __u32 id,
const struct btf_type *t)
{
@ -794,18 +760,18 @@ static bool btf_is_struct_packed(const struct btf *btf, __u32 id,
int align, i, bit_sz;
__u16 vlen;
align = btf_align_of(btf, id);
align = btf__align_of(btf, id);
/* size of a non-packed struct has to be a multiple of its alignment*/
if (t->size % align)
if (align && t->size % align)
return true;
m = btf_members(t);
vlen = btf_vlen(t);
/* all non-bitfield fields have to be naturally aligned */
for (i = 0; i < vlen; i++, m++) {
align = btf_align_of(btf, m->type);
align = btf__align_of(btf, m->type);
bit_sz = btf_member_bitfield_size(t, i);
if (bit_sz == 0 && m->offset % (8 * align) != 0)
if (align && bit_sz == 0 && m->offset % (8 * align) != 0)
return true;
}
@ -889,7 +855,7 @@ static void btf_dump_emit_struct_def(struct btf_dump *d,
fname = btf_name_of(d, m->name_off);
m_sz = btf_member_bitfield_size(t, i);
m_off = btf_member_bit_offset(t, i);
align = packed ? 1 : btf_align_of(d->btf, m->type);
align = packed ? 1 : btf__align_of(d->btf, m->type);
btf_dump_emit_bit_padding(d, off, m_off, m_sz, align, lvl + 1);
btf_dump_printf(d, "\n%s", pfx(lvl + 1));
@ -907,7 +873,7 @@ static void btf_dump_emit_struct_def(struct btf_dump *d,
/* pad at the end, if necessary */
if (is_struct) {
align = packed ? 1 : btf_align_of(d->btf, id);
align = packed ? 1 : btf__align_of(d->btf, id);
btf_dump_emit_bit_padding(d, off, t->size * 8, 0, align,
lvl + 1);
}
@ -1051,6 +1017,21 @@ static int btf_dump_push_decl_stack_id(struct btf_dump *d, __u32 id)
* of a stack frame. Some care is required to "pop" stack frames after
* processing type declaration chain.
*/
int btf_dump__emit_type_decl(struct btf_dump *d, __u32 id,
const struct btf_dump_emit_type_decl_opts *opts)
{
const char *fname;
int lvl;
if (!OPTS_VALID(opts, btf_dump_emit_type_decl_opts))
return -EINVAL;
fname = OPTS_GET(opts, field_name, NULL);
lvl = OPTS_GET(opts, indent_level, 0);
btf_dump_emit_type_decl(d, id, fname, lvl);
return 0;
}
static void btf_dump_emit_type_decl(struct btf_dump *d, __u32 id,
const char *fname, int lvl)
{

File diff suppressed because it is too large Load Diff

View File

@ -17,14 +17,12 @@
#include <sys/types.h> // for size_t
#include <linux/bpf.h>
#include "libbpf_common.h"
#ifdef __cplusplus
extern "C" {
#endif
#ifndef LIBBPF_API
#define LIBBPF_API __attribute__((visibility("default")))
#endif
enum libbpf_errno {
__LIBBPF_ERRNO__START = 4000,
@ -67,28 +65,6 @@ struct bpf_object_open_attr {
enum bpf_prog_type prog_type;
};
/* Helper macro to declare and initialize libbpf options struct
*
* This dance with uninitialized declaration, followed by memset to zero,
* followed by assignment using compound literal syntax is done to preserve
* ability to use a nice struct field initialization syntax and **hopefully**
* have all the padding bytes initialized to zero. It's not guaranteed though,
* when copying literal, that compiler won't copy garbage in literal's padding
* bytes, but that's the best way I've found and it seems to work in practice.
*
* Macro declares opts struct of given type and name, zero-initializes,
* including any extra padding, it with memset() and then assigns initial
* values provided by users in struct initializer-syntax as varargs.
*/
#define DECLARE_LIBBPF_OPTS(TYPE, NAME, ...) \
struct TYPE NAME = ({ \
memset(&NAME, 0, sizeof(struct TYPE)); \
(struct TYPE) { \
.sz = sizeof(struct TYPE), \
__VA_ARGS__ \
}; \
})
struct bpf_object_open_opts {
/* size of this struct, for forward/backward compatiblity */
size_t sz;
@ -109,15 +85,19 @@ struct bpf_object_open_opts {
*/
const char *pin_root_path;
__u32 attach_prog_fd;
/* Additional kernel config content that augments and overrides
* system Kconfig for CONFIG_xxx externs.
*/
const char *kconfig;
};
#define bpf_object_open_opts__last_field attach_prog_fd
#define bpf_object_open_opts__last_field kconfig
LIBBPF_API struct bpf_object *bpf_object__open(const char *path);
LIBBPF_API struct bpf_object *
bpf_object__open_file(const char *path, struct bpf_object_open_opts *opts);
bpf_object__open_file(const char *path, const struct bpf_object_open_opts *opts);
LIBBPF_API struct bpf_object *
bpf_object__open_mem(const void *obj_buf, size_t obj_buf_sz,
struct bpf_object_open_opts *opts);
const struct bpf_object_open_opts *opts);
/* deprecated bpf_object__open variants */
LIBBPF_API struct bpf_object *
@ -126,11 +106,6 @@ bpf_object__open_buffer(const void *obj_buf, size_t obj_buf_sz,
LIBBPF_API struct bpf_object *
bpf_object__open_xattr(struct bpf_object_open_attr *attr);
int bpf_object__section_size(const struct bpf_object *obj, const char *name,
__u32 *size);
int bpf_object__variable_offset(const struct bpf_object *obj, const char *name,
__u32 *off);
enum libbpf_pin_type {
LIBBPF_PIN_NONE,
/* PIN_BY_NAME: pin maps by name (in /sys/fs/bpf by default) */
@ -161,6 +136,7 @@ struct bpf_object_load_attr {
LIBBPF_API int bpf_object__load(struct bpf_object *obj);
LIBBPF_API int bpf_object__load_xattr(struct bpf_object_load_attr *attr);
LIBBPF_API int bpf_object__unload(struct bpf_object *obj);
LIBBPF_API const char *bpf_object__name(const struct bpf_object *obj);
LIBBPF_API unsigned int bpf_object__kversion(const struct bpf_object *obj);
@ -171,6 +147,9 @@ LIBBPF_API int bpf_object__btf_fd(const struct bpf_object *obj);
LIBBPF_API struct bpf_program *
bpf_object__find_program_by_title(const struct bpf_object *obj,
const char *title);
LIBBPF_API struct bpf_program *
bpf_object__find_program_by_name(const struct bpf_object *obj,
const char *name);
LIBBPF_API struct bpf_object *bpf_object__next(struct bpf_object *prev);
#define bpf_object__for_each_safe(pos, tmp) \
@ -214,6 +193,7 @@ LIBBPF_API void *bpf_program__priv(const struct bpf_program *prog);
LIBBPF_API void bpf_program__set_ifindex(struct bpf_program *prog,
__u32 ifindex);
LIBBPF_API const char *bpf_program__name(const struct bpf_program *prog);
LIBBPF_API const char *bpf_program__title(const struct bpf_program *prog,
bool needs_copy);
@ -235,8 +215,11 @@ LIBBPF_API void bpf_program__unload(struct bpf_program *prog);
struct bpf_link;
LIBBPF_API void bpf_link__disconnect(struct bpf_link *link);
LIBBPF_API int bpf_link__destroy(struct bpf_link *link);
LIBBPF_API struct bpf_link *
bpf_program__attach(struct bpf_program *prog);
LIBBPF_API struct bpf_link *
bpf_program__attach_perf_event(struct bpf_program *prog, int pfd);
LIBBPF_API struct bpf_link *
@ -512,18 +495,6 @@ bpf_perf_event_read_simple(void *mmap_mem, size_t mmap_size, size_t page_size,
void **copy_mem, size_t *copy_size,
bpf_perf_event_print_t fn, void *private_data);
struct nlattr;
typedef int (*libbpf_dump_nlmsg_t)(void *cookie, void *msg, struct nlattr **tb);
int libbpf_netlink_open(unsigned int *nl_pid);
int libbpf_nl_get_link(int sock, unsigned int nl_pid,
libbpf_dump_nlmsg_t dump_link_nlmsg, void *cookie);
int libbpf_nl_get_class(int sock, unsigned int nl_pid, int ifindex,
libbpf_dump_nlmsg_t dump_class_nlmsg, void *cookie);
int libbpf_nl_get_qdisc(int sock, unsigned int nl_pid, int ifindex,
libbpf_dump_nlmsg_t dump_qdisc_nlmsg, void *cookie);
int libbpf_nl_get_filter(int sock, unsigned int nl_pid, int ifindex, int handle,
libbpf_dump_nlmsg_t dump_filter_nlmsg, void *cookie);
struct bpf_prog_linfo;
struct bpf_prog_info;
@ -630,6 +601,50 @@ bpf_program__bpil_offs_to_addr(struct bpf_prog_info_linear *info_linear);
*/
LIBBPF_API int libbpf_num_possible_cpus(void);
struct bpf_map_skeleton {
const char *name;
struct bpf_map **map;
void **mmaped;
};
struct bpf_prog_skeleton {
const char *name;
struct bpf_program **prog;
struct bpf_link **link;
};
struct bpf_object_skeleton {
size_t sz; /* size of this struct, for forward/backward compatibility */
const char *name;
void *data;
size_t data_sz;
struct bpf_object **obj;
int map_cnt;
int map_skel_sz; /* sizeof(struct bpf_skeleton_map) */
struct bpf_map_skeleton *maps;
int prog_cnt;
int prog_skel_sz; /* sizeof(struct bpf_skeleton_prog) */
struct bpf_prog_skeleton *progs;
};
LIBBPF_API int
bpf_object__open_skeleton(struct bpf_object_skeleton *s,
const struct bpf_object_open_opts *opts);
LIBBPF_API int bpf_object__load_skeleton(struct bpf_object_skeleton *s);
LIBBPF_API int bpf_object__attach_skeleton(struct bpf_object_skeleton *s);
LIBBPF_API void bpf_object__detach_skeleton(struct bpf_object_skeleton *s);
LIBBPF_API void bpf_object__destroy_skeleton(struct bpf_object_skeleton *s);
enum libbpf_tristate {
TRI_NO = 0,
TRI_YES = 1,
TRI_MODULE = 2,
};
#ifdef __cplusplus
} /* extern "C" */
#endif

View File

@ -208,3 +208,19 @@ LIBBPF_0.0.6 {
btf__find_by_name_kind;
libbpf_find_vmlinux_btf_id;
} LIBBPF_0.0.5;
LIBBPF_0.0.7 {
global:
btf_dump__emit_type_decl;
bpf_link__disconnect;
bpf_object__find_program_by_name;
bpf_object__attach_skeleton;
bpf_object__destroy_skeleton;
bpf_object__detach_skeleton;
bpf_object__load_skeleton;
bpf_object__open_skeleton;
bpf_prog_attach_xattr;
bpf_program__attach;
bpf_program__name;
btf__align_of;
} LIBBPF_0.0.6;

View File

@ -8,5 +8,5 @@ Name: libbpf
Description: BPF library
Version: @VERSION@
Libs: -L${libdir} -lbpf
Requires.private: libelf
Requires.private: libelf zlib
Cflags: -I${includedir}

View File

@ -0,0 +1,40 @@
/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
/*
* Common user-facing libbpf helpers.
*
* Copyright (c) 2019 Facebook
*/
#ifndef __LIBBPF_LIBBPF_COMMON_H
#define __LIBBPF_LIBBPF_COMMON_H
#include <string.h>
#ifndef LIBBPF_API
#define LIBBPF_API __attribute__((visibility("default")))
#endif
/* Helper macro to declare and initialize libbpf options struct
*
* This dance with uninitialized declaration, followed by memset to zero,
* followed by assignment using compound literal syntax is done to preserve
* ability to use a nice struct field initialization syntax and **hopefully**
* have all the padding bytes initialized to zero. It's not guaranteed though,
* when copying literal, that compiler won't copy garbage in literal's padding
* bytes, but that's the best way I've found and it seems to work in practice.
*
* Macro declares opts struct of given type and name, zero-initializes,
* including any extra padding, it with memset() and then assigns initial
* values provided by users in struct initializer-syntax as varargs.
*/
#define DECLARE_LIBBPF_OPTS(TYPE, NAME, ...) \
struct TYPE NAME = ({ \
memset(&NAME, 0, sizeof(struct TYPE)); \
(struct TYPE) { \
.sz = sizeof(struct TYPE), \
__VA_ARGS__ \
}; \
})
#endif /* __LIBBPF_LIBBPF_COMMON_H */

View File

@ -76,7 +76,7 @@ static inline bool libbpf_validate_opts(const char *opts,
for (i = opts_sz; i < user_sz; i++) {
if (opts[i]) {
pr_warn("%s has non-zero extra bytes",
pr_warn("%s has non-zero extra bytes\n",
type_name);
return false;
}
@ -95,9 +95,28 @@ static inline bool libbpf_validate_opts(const char *opts,
#define OPTS_GET(opts, field, fallback_value) \
(OPTS_HAS(opts, field) ? (opts)->field : fallback_value)
int parse_cpu_mask_str(const char *s, bool **mask, int *mask_sz);
int parse_cpu_mask_file(const char *fcpu, bool **mask, int *mask_sz);
int libbpf__load_raw_btf(const char *raw_types, size_t types_len,
const char *str_sec, size_t str_len);
int bpf_object__section_size(const struct bpf_object *obj, const char *name,
__u32 *size);
int bpf_object__variable_offset(const struct bpf_object *obj, const char *name,
__u32 *off);
struct nlattr;
typedef int (*libbpf_dump_nlmsg_t)(void *cookie, void *msg, struct nlattr **tb);
int libbpf_netlink_open(unsigned int *nl_pid);
int libbpf_nl_get_link(int sock, unsigned int nl_pid,
libbpf_dump_nlmsg_t dump_link_nlmsg, void *cookie);
int libbpf_nl_get_class(int sock, unsigned int nl_pid, int ifindex,
libbpf_dump_nlmsg_t dump_class_nlmsg, void *cookie);
int libbpf_nl_get_qdisc(int sock, unsigned int nl_pid, int ifindex,
libbpf_dump_nlmsg_t dump_qdisc_nlmsg, void *cookie);
int libbpf_nl_get_filter(int sock, unsigned int nl_pid, int ifindex, int handle,
libbpf_dump_nlmsg_t dump_filter_nlmsg, void *cookie);
struct btf_ext_info {
/*
* info points to the individual info section (e.g. func_info and

View File

@ -21,7 +21,6 @@ test_lirc_mode2_user
get_cgroup_id_user
test_skb_cgroup_id_user
test_socket_cookie
test_cgroup_attach
test_cgroup_storage
test_select_reuseport
test_flow_dissector
@ -38,5 +37,7 @@ test_hashmap
test_btf_dump
xdping
test_cpp
*.skel.h
/no_alu32
/bpf_gcc
/tools

View File

@ -3,10 +3,12 @@ include ../../../../scripts/Kbuild.include
include ../../../scripts/Makefile.arch
CURDIR := $(abspath .)
LIBDIR := $(abspath ../../../lib)
TOOLSDIR := $(abspath ../../..)
LIBDIR := $(TOOLSDIR)/lib
BPFDIR := $(LIBDIR)/bpf
TOOLSDIR := $(abspath ../../../include)
APIDIR := $(TOOLSDIR)/uapi
TOOLSINCDIR := $(TOOLSDIR)/include
BPFTOOLDIR := $(TOOLSDIR)/bpf/bpftool
APIDIR := $(TOOLSINCDIR)/uapi
GENDIR := $(abspath ../../../../include/generated)
GENHDR := $(GENDIR)/autoconf.h
@ -19,18 +21,18 @@ LLC ?= llc
LLVM_OBJCOPY ?= llvm-objcopy
BPF_GCC ?= $(shell command -v bpf-gcc;)
CFLAGS += -g -Wall -O2 $(GENFLAGS) -I$(APIDIR) -I$(LIBDIR) -I$(BPFDIR) \
-I$(GENDIR) -I$(TOOLSDIR) -I$(CURDIR) \
-I$(GENDIR) -I$(TOOLSINCDIR) -I$(CURDIR) \
-Dbpf_prog_load=bpf_prog_test_load \
-Dbpf_load_program=bpf_test_load_program
LDLIBS += -lcap -lelf -lrt -lpthread
LDLIBS += -lcap -lelf -lz -lrt -lpthread
# Order correspond to 'make run_tests' order
TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test_progs \
test_align test_verifier_log test_dev_cgroup test_tcpbpf_user \
test_sock test_btf test_sockmap get_cgroup_id_user test_socket_cookie \
test_cgroup_storage test_select_reuseport \
test_cgroup_storage \
test_netcnt test_tcpnotify_user test_sock_fields test_sysctl test_hashmap \
test_cgroup_attach test_progs-no_alu32
test_progs-no_alu32
# Also test bpf-gcc, if present
ifneq ($(BPF_GCC),)
@ -75,6 +77,24 @@ TEST_GEN_PROGS_EXTENDED = test_sock_addr test_skb_cgroup_id_user \
TEST_CUSTOM_PROGS = urandom_read
# Emit succinct information message describing current building step
# $1 - generic step name (e.g., CC, LINK, etc);
# $2 - optional "flavor" specifier; if provided, will be emitted as [flavor];
# $3 - target (assumed to be file); only file name will be emitted;
# $4 - optional extra arg, emitted as-is, if provided.
ifeq ($(V),1)
msg =
else
msg = @$(info $(1)$(if $(2), [$(2)]) $(notdir $(3)))$(if $(4), $(4))
endif
# override lib.mk's default rules
OVERRIDE_TARGETS := 1
override define CLEAN
$(call msg, CLEAN)
$(RM) -r $(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED) $(TEST_GEN_FILES) $(EXTRA_CLEAN)
endef
include ../lib.mk
# Define simple and short `make test_progs`, `make test_sysctl`, etc targets
@ -87,10 +107,16 @@ $(notdir $(TEST_GEN_PROGS) \
$(TEST_GEN_PROGS_EXTENDED) \
$(TEST_CUSTOM_PROGS)): %: $(OUTPUT)/% ;
$(OUTPUT)/%:%.c
$(call msg, BINARY,,$@)
$(LINK.c) $^ $(LDLIBS) -o $@
$(OUTPUT)/urandom_read: urandom_read.c
$(call msg, BINARY,,$@)
$(CC) -o $@ $< -Wl,--build-id
$(OUTPUT)/test_stub.o: test_stub.c
$(call msg, CC,,$@)
$(CC) -c $(CFLAGS) -o $@ $<
BPFOBJ := $(OUTPUT)/libbpf.a
@ -110,13 +136,18 @@ $(OUTPUT)/test_cgroup_storage: cgroup_helpers.c
$(OUTPUT)/test_netcnt: cgroup_helpers.c
$(OUTPUT)/test_sock_fields: cgroup_helpers.c
$(OUTPUT)/test_sysctl: cgroup_helpers.c
$(OUTPUT)/test_cgroup_attach: cgroup_helpers.c
.PHONY: force
# force a rebuild of BPFOBJ when its dependencies are updated
force:
DEFAULT_BPFTOOL := $(OUTPUT)/tools/usr/local/sbin/bpftool
BPFTOOL ?= $(DEFAULT_BPFTOOL)
$(DEFAULT_BPFTOOL): force
$(MAKE) -C $(BPFTOOLDIR) DESTDIR=$(OUTPUT)/tools install
$(BPFOBJ): force
$(MAKE) -C $(BPFDIR) OUTPUT=$(OUTPUT)/
@ -159,27 +190,33 @@ $(OUTPUT)/flow_dissector_load.o: flow_dissector_load.h
# $3 - CFLAGS
# $4 - LDFLAGS
define CLANG_BPF_BUILD_RULE
$(call msg, CLANG-LLC,$(TRUNNER_BINARY),$2)
($(CLANG) $3 -O2 -target bpf -emit-llvm \
-c $1 -o - || echo "BPF obj compilation failed") | \
$(LLC) -mattr=dwarfris -march=bpf -mcpu=probe $4 -filetype=obj -o $2
endef
# Similar to CLANG_BPF_BUILD_RULE, but with disabled alu32
define CLANG_NOALU32_BPF_BUILD_RULE
$(call msg, CLANG-LLC,$(TRUNNER_BINARY),$2)
($(CLANG) $3 -O2 -target bpf -emit-llvm \
-c $1 -o - || echo "BPF obj compilation failed") | \
$(LLC) -march=bpf -mcpu=v2 $4 -filetype=obj -o $2
endef
# Similar to CLANG_BPF_BUILD_RULE, but using native Clang and bpf LLC
define CLANG_NATIVE_BPF_BUILD_RULE
$(call msg, CLANG-BPF,$(TRUNNER_BINARY),$2)
($(CLANG) $3 -O2 -emit-llvm \
-c $1 -o - || echo "BPF obj compilation failed") | \
$(LLC) -march=bpf -mcpu=probe $4 -filetype=obj -o $2
endef
# Build BPF object using GCC
define GCC_BPF_BUILD_RULE
$(call msg, GCC-BPF,$(TRUNNER_BINARY),$2)
$(BPF_GCC) $3 $4 -O2 -c $1 -o $2
endef
SKEL_BLACKLIST := btf__% test_pinning_invalid.c
# Set up extra TRUNNER_XXX "temporary" variables in the environment (relies on
# $eval()) and pass control to DEFINE_TEST_RUNNER_RULES.
# Parameters:
@ -195,8 +232,11 @@ TRUNNER_EXTRA_OBJS := $$(patsubst %.c,$$(TRUNNER_OUTPUT)/%.o, \
$$(filter %.c,$(TRUNNER_EXTRA_SOURCES)))
TRUNNER_EXTRA_HDRS := $$(filter %.h,$(TRUNNER_EXTRA_SOURCES))
TRUNNER_TESTS_HDR := $(TRUNNER_TESTS_DIR)/tests.h
TRUNNER_BPF_OBJS := $$(patsubst %.c,$$(TRUNNER_OUTPUT)/%.o, \
$$(notdir $$(wildcard $(TRUNNER_BPF_PROGS_DIR)/*.c)))
TRUNNER_BPF_SRCS := $$(notdir $$(wildcard $(TRUNNER_BPF_PROGS_DIR)/*.c))
TRUNNER_BPF_OBJS := $$(patsubst %.c,$$(TRUNNER_OUTPUT)/%.o, $$(TRUNNER_BPF_SRCS))
TRUNNER_BPF_SKELS := $$(patsubst %.c,$$(TRUNNER_OUTPUT)/%.skel.h, \
$$(filter-out $(SKEL_BLACKLIST), \
$$(TRUNNER_BPF_SRCS)))
# Evaluate rules now with extra TRUNNER_XXX variables above already defined
$$(eval $$(call DEFINE_TEST_RUNNER_RULES,$1,$2))
@ -226,12 +266,19 @@ $(TRUNNER_BPF_OBJS): $(TRUNNER_OUTPUT)/%.o: \
$$(call $(TRUNNER_BPF_BUILD_RULE),$$<,$$@, \
$(TRUNNER_BPF_CFLAGS), \
$(TRUNNER_BPF_LDFLAGS))
$(TRUNNER_BPF_SKELS): $(TRUNNER_OUTPUT)/%.skel.h: \
$(TRUNNER_OUTPUT)/%.o \
| $(BPFTOOL) $(TRUNNER_OUTPUT)
$$(call msg, GEN-SKEL,$(TRUNNER_BINARY),$$@)
$$(BPFTOOL) gen skeleton $$< > $$@
endif
# ensure we set up tests.h header generation rule just once
ifeq ($($(TRUNNER_TESTS_DIR)-tests-hdr),)
$(TRUNNER_TESTS_DIR)-tests-hdr := y
$(TRUNNER_TESTS_HDR): $(TRUNNER_TESTS_DIR)/*.c
$$(call msg, TEST-HDR,$(TRUNNER_BINARY),$$@)
$$(shell ( cd $(TRUNNER_TESTS_DIR); \
echo '/* Generated header, do not edit */'; \
ls *.c 2> /dev/null | \
@ -245,7 +292,9 @@ $(TRUNNER_TEST_OBJS): $(TRUNNER_OUTPUT)/%.test.o: \
$(TRUNNER_TESTS_DIR)/%.c \
$(TRUNNER_EXTRA_HDRS) \
$(TRUNNER_BPF_OBJS) \
$(TRUNNER_BPF_SKELS) \
$$(BPFOBJ) | $(TRUNNER_OUTPUT)
$$(call msg, TEST-OBJ,$(TRUNNER_BINARY),$$@)
cd $$(@D) && $$(CC) $$(CFLAGS) -c $(CURDIR)/$$< $$(LDLIBS) -o $$(@F)
$(TRUNNER_EXTRA_OBJS): $(TRUNNER_OUTPUT)/%.o: \
@ -253,17 +302,20 @@ $(TRUNNER_EXTRA_OBJS): $(TRUNNER_OUTPUT)/%.o: \
$(TRUNNER_EXTRA_HDRS) \
$(TRUNNER_TESTS_HDR) \
$$(BPFOBJ) | $(TRUNNER_OUTPUT)
$$(call msg, EXTRA-OBJ,$(TRUNNER_BINARY),$$@)
$$(CC) $$(CFLAGS) -c $$< $$(LDLIBS) -o $$@
# only copy extra resources if in flavored build
$(TRUNNER_BINARY)-extras: $(TRUNNER_EXTRA_FILES) | $(TRUNNER_OUTPUT)
ifneq ($2,)
# only copy extra resources if in flavored build
$$(call msg, EXTRAS-CP,$(TRUNNER_BINARY),$(TRUNNER_EXTRA_FILES))
cp -a $$^ $(TRUNNER_OUTPUT)/
endif
$(OUTPUT)/$(TRUNNER_BINARY): $(TRUNNER_TEST_OBJS) \
$(TRUNNER_EXTRA_OBJS) $$(BPFOBJ) \
| $(TRUNNER_BINARY)-extras
$$(call msg, BINARY,,$$@)
$$(CC) $$(CFLAGS) $$(filter %.a %.o,$$^) $$(LDLIBS) -o $$@
endef
@ -315,12 +367,15 @@ verifier/tests.h: verifier/*.c
echo '#endif' \
) > verifier/tests.h)
$(OUTPUT)/test_verifier: test_verifier.c verifier/tests.h $(BPFOBJ) | $(OUTPUT)
$(call msg, BINARY,,$@)
$(CC) $(CFLAGS) $(filter %.a %.o %.c,$^) $(LDLIBS) -o $@
# Make sure we are able to include and link libbpf against c++.
$(OUTPUT)/test_cpp: test_cpp.cpp $(BPFOBJ)
$(OUTPUT)/test_cpp: test_cpp.cpp $(OUTPUT)/test_core_extern.skel.h $(BPFOBJ)
$(call msg, CXX,,$@)
$(CXX) $(CFLAGS) $^ $(LDLIBS) -o $@
EXTRA_CLEAN := $(TEST_CUSTOM_PROGS) \
prog_tests/tests.h map_tests/tests.h verifier/tests.h \
feature $(OUTPUT)/*.o $(OUTPUT)/no_alu32 $(OUTPUT)/bpf_gcc
feature $(OUTPUT)/*.o $(OUTPUT)/no_alu32 $(OUTPUT)/bpf_gcc \
tools *.skel.h

View File

@ -1,26 +1,9 @@
// SPDX-License-Identifier: GPL-2.0
#include <test_progs.h>
#define EMBED_FILE(NAME, PATH) \
asm ( \
" .pushsection \".rodata\", \"a\", @progbits \n" \
" .global "#NAME"_data \n" \
#NAME"_data: \n" \
" .incbin \"" PATH "\" \n" \
#NAME"_data_end: \n" \
" .global "#NAME"_size \n" \
" .type "#NAME"_size, @object \n" \
" .size "#NAME"_size, 4 \n" \
" .align 4, \n" \
#NAME"_size: \n" \
" .int "#NAME"_data_end - "#NAME"_data \n" \
" .popsection \n" \
); \
extern char NAME##_data[]; \
extern int NAME##_size;
#include "test_attach_probe.skel.h"
ssize_t get_base_addr() {
size_t start;
size_t start, offset;
char buf[256];
FILE *f;
@ -28,10 +11,11 @@ ssize_t get_base_addr() {
if (!f)
return -errno;
while (fscanf(f, "%zx-%*x %s %*s\n", &start, buf) == 2) {
while (fscanf(f, "%zx-%*x %s %zx %*[^\n]\n",
&start, buf, &offset) == 3) {
if (strcmp(buf, "r-xp") == 0) {
fclose(f);
return start;
return start - offset;
}
}
@ -39,30 +23,12 @@ ssize_t get_base_addr() {
return -EINVAL;
}
EMBED_FILE(probe, "test_attach_probe.o");
void test_attach_probe(void)
{
const char *kprobe_name = "kprobe/sys_nanosleep";
const char *kretprobe_name = "kretprobe/sys_nanosleep";
const char *uprobe_name = "uprobe/trigger_func";
const char *uretprobe_name = "uretprobe/trigger_func";
const int kprobe_idx = 0, kretprobe_idx = 1;
const int uprobe_idx = 2, uretprobe_idx = 3;
const char *obj_name = "attach_probe";
DECLARE_LIBBPF_OPTS(bpf_object_open_opts, open_opts,
.object_name = obj_name,
.relaxed_maps = true,
);
struct bpf_program *kprobe_prog, *kretprobe_prog;
struct bpf_program *uprobe_prog, *uretprobe_prog;
struct bpf_object *obj;
int err, duration = 0, res;
struct bpf_link *kprobe_link = NULL;
struct bpf_link *kretprobe_link = NULL;
struct bpf_link *uprobe_link = NULL;
struct bpf_link *uretprobe_link = NULL;
int results_map_fd;
int duration = 0;
struct bpf_link *kprobe_link, *kretprobe_link;
struct bpf_link *uprobe_link, *uretprobe_link;
struct test_attach_probe* skel;
size_t uprobe_offset;
ssize_t base_addr;
@ -72,123 +38,68 @@ void test_attach_probe(void)
return;
uprobe_offset = (size_t)&get_base_addr - base_addr;
/* open object */
obj = bpf_object__open_mem(probe_data, probe_size, &open_opts);
if (CHECK(IS_ERR(obj), "obj_open_mem", "err %ld\n", PTR_ERR(obj)))
skel = test_attach_probe__open_and_load();
if (CHECK(!skel, "skel_open", "failed to open skeleton\n"))
return;
if (CHECK(strcmp(bpf_object__name(obj), obj_name), "obj_name",
"wrong obj name '%s', expected '%s'\n",
bpf_object__name(obj), obj_name))
if (CHECK(!skel->bss, "check_bss", ".bss wasn't mmap()-ed\n"))
goto cleanup;
kprobe_prog = bpf_object__find_program_by_title(obj, kprobe_name);
if (CHECK(!kprobe_prog, "find_probe",
"prog '%s' not found\n", kprobe_name))
goto cleanup;
kretprobe_prog = bpf_object__find_program_by_title(obj, kretprobe_name);
if (CHECK(!kretprobe_prog, "find_probe",
"prog '%s' not found\n", kretprobe_name))
goto cleanup;
uprobe_prog = bpf_object__find_program_by_title(obj, uprobe_name);
if (CHECK(!uprobe_prog, "find_probe",
"prog '%s' not found\n", uprobe_name))
goto cleanup;
uretprobe_prog = bpf_object__find_program_by_title(obj, uretprobe_name);
if (CHECK(!uretprobe_prog, "find_probe",
"prog '%s' not found\n", uretprobe_name))
goto cleanup;
/* create maps && load programs */
err = bpf_object__load(obj);
if (CHECK(err, "obj_load", "err %d\n", err))
goto cleanup;
/* load maps */
results_map_fd = bpf_find_map(__func__, obj, "results_map");
if (CHECK(results_map_fd < 0, "find_results_map",
"err %d\n", results_map_fd))
goto cleanup;
kprobe_link = bpf_program__attach_kprobe(kprobe_prog,
kprobe_link = bpf_program__attach_kprobe(skel->progs.handle_kprobe,
false /* retprobe */,
SYS_NANOSLEEP_KPROBE_NAME);
if (CHECK(IS_ERR(kprobe_link), "attach_kprobe",
"err %ld\n", PTR_ERR(kprobe_link))) {
kprobe_link = NULL;
"err %ld\n", PTR_ERR(kprobe_link)))
goto cleanup;
}
kretprobe_link = bpf_program__attach_kprobe(kretprobe_prog,
skel->links.handle_kprobe = kprobe_link;
kretprobe_link = bpf_program__attach_kprobe(skel->progs.handle_kretprobe,
true /* retprobe */,
SYS_NANOSLEEP_KPROBE_NAME);
if (CHECK(IS_ERR(kretprobe_link), "attach_kretprobe",
"err %ld\n", PTR_ERR(kretprobe_link))) {
kretprobe_link = NULL;
"err %ld\n", PTR_ERR(kretprobe_link)))
goto cleanup;
}
uprobe_link = bpf_program__attach_uprobe(uprobe_prog,
skel->links.handle_kretprobe = kretprobe_link;
uprobe_link = bpf_program__attach_uprobe(skel->progs.handle_uprobe,
false /* retprobe */,
0 /* self pid */,
"/proc/self/exe",
uprobe_offset);
if (CHECK(IS_ERR(uprobe_link), "attach_uprobe",
"err %ld\n", PTR_ERR(uprobe_link))) {
uprobe_link = NULL;
"err %ld\n", PTR_ERR(uprobe_link)))
goto cleanup;
}
uretprobe_link = bpf_program__attach_uprobe(uretprobe_prog,
skel->links.handle_uprobe = uprobe_link;
uretprobe_link = bpf_program__attach_uprobe(skel->progs.handle_uretprobe,
true /* retprobe */,
-1 /* any pid */,
"/proc/self/exe",
uprobe_offset);
if (CHECK(IS_ERR(uretprobe_link), "attach_uretprobe",
"err %ld\n", PTR_ERR(uretprobe_link))) {
uretprobe_link = NULL;
"err %ld\n", PTR_ERR(uretprobe_link)))
goto cleanup;
}
skel->links.handle_uretprobe = uretprobe_link;
/* trigger & validate kprobe && kretprobe */
usleep(1);
err = bpf_map_lookup_elem(results_map_fd, &kprobe_idx, &res);
if (CHECK(err, "get_kprobe_res",
"failed to get kprobe res: %d\n", err))
if (CHECK(skel->bss->kprobe_res != 1, "check_kprobe_res",
"wrong kprobe res: %d\n", skel->bss->kprobe_res))
goto cleanup;
if (CHECK(res != kprobe_idx + 1, "check_kprobe_res",
"wrong kprobe res: %d\n", res))
goto cleanup;
err = bpf_map_lookup_elem(results_map_fd, &kretprobe_idx, &res);
if (CHECK(err, "get_kretprobe_res",
"failed to get kretprobe res: %d\n", err))
goto cleanup;
if (CHECK(res != kretprobe_idx + 1, "check_kretprobe_res",
"wrong kretprobe res: %d\n", res))
if (CHECK(skel->bss->kretprobe_res != 2, "check_kretprobe_res",
"wrong kretprobe res: %d\n", skel->bss->kretprobe_res))
goto cleanup;
/* trigger & validate uprobe & uretprobe */
get_base_addr();
err = bpf_map_lookup_elem(results_map_fd, &uprobe_idx, &res);
if (CHECK(err, "get_uprobe_res",
"failed to get uprobe res: %d\n", err))
if (CHECK(skel->bss->uprobe_res != 3, "check_uprobe_res",
"wrong uprobe res: %d\n", skel->bss->uprobe_res))
goto cleanup;
if (CHECK(res != uprobe_idx + 1, "check_uprobe_res",
"wrong uprobe res: %d\n", res))
goto cleanup;
err = bpf_map_lookup_elem(results_map_fd, &uretprobe_idx, &res);
if (CHECK(err, "get_uretprobe_res",
"failed to get uretprobe res: %d\n", err))
goto cleanup;
if (CHECK(res != uretprobe_idx + 1, "check_uretprobe_res",
"wrong uretprobe res: %d\n", res))
if (CHECK(skel->bss->uretprobe_res != 4, "check_uretprobe_res",
"wrong uretprobe res: %d\n", skel->bss->uretprobe_res))
goto cleanup;
cleanup:
bpf_link__destroy(kprobe_link);
bpf_link__destroy(kretprobe_link);
bpf_link__destroy(uprobe_link);
bpf_link__destroy(uretprobe_link);
bpf_object__close(obj);
test_attach_probe__destroy(skel);
}

View File

@ -0,0 +1,111 @@
// SPDX-License-Identifier: GPL-2.0
#include <test_progs.h>
#include "cgroup_helpers.h"
#define PING_CMD "ping -q -c1 -w1 127.0.0.1 > /dev/null"
char bpf_log_buf[BPF_LOG_BUF_SIZE];
static int prog_load(void)
{
struct bpf_insn prog[] = {
BPF_MOV64_IMM(BPF_REG_0, 1), /* r0 = 1 */
BPF_EXIT_INSN(),
};
size_t insns_cnt = sizeof(prog) / sizeof(struct bpf_insn);
return bpf_load_program(BPF_PROG_TYPE_CGROUP_SKB,
prog, insns_cnt, "GPL", 0,
bpf_log_buf, BPF_LOG_BUF_SIZE);
}
void test_cgroup_attach_autodetach(void)
{
__u32 duration = 0, prog_cnt = 4, attach_flags;
int allow_prog[2] = {-1};
__u32 prog_ids[2] = {0};
void *ptr = NULL;
int cg = 0, i;
int attempts;
for (i = 0; i < ARRAY_SIZE(allow_prog); i++) {
allow_prog[i] = prog_load();
if (CHECK(allow_prog[i] < 0, "prog_load",
"verifier output:\n%s\n-------\n", bpf_log_buf))
goto err;
}
if (CHECK_FAIL(setup_cgroup_environment()))
goto err;
/* create a cgroup, attach two programs and remember their ids */
cg = create_and_get_cgroup("/cg_autodetach");
if (CHECK_FAIL(cg < 0))
goto err;
if (CHECK_FAIL(join_cgroup("/cg_autodetach")))
goto err;
for (i = 0; i < ARRAY_SIZE(allow_prog); i++)
if (CHECK(bpf_prog_attach(allow_prog[i], cg,
BPF_CGROUP_INET_EGRESS,
BPF_F_ALLOW_MULTI),
"prog_attach", "prog[%d], errno=%d\n", i, errno))
goto err;
/* make sure that programs are attached and run some traffic */
if (CHECK(bpf_prog_query(cg, BPF_CGROUP_INET_EGRESS, 0, &attach_flags,
prog_ids, &prog_cnt),
"prog_query", "errno=%d\n", errno))
goto err;
if (CHECK_FAIL(system(PING_CMD)))
goto err;
/* allocate some memory (4Mb) to pin the original cgroup */
ptr = malloc(4 * (1 << 20));
if (CHECK_FAIL(!ptr))
goto err;
/* close programs and cgroup fd */
for (i = 0; i < ARRAY_SIZE(allow_prog); i++) {
close(allow_prog[i]);
allow_prog[i] = -1;
}
close(cg);
cg = 0;
/* leave the cgroup and remove it. don't detach programs */
cleanup_cgroup_environment();
/* wait for the asynchronous auto-detachment.
* wait for no more than 5 sec and give up.
*/
for (i = 0; i < ARRAY_SIZE(prog_ids); i++) {
for (attempts = 5; attempts >= 0; attempts--) {
int fd = bpf_prog_get_fd_by_id(prog_ids[i]);
if (fd < 0)
break;
/* don't leave the fd open */
close(fd);
if (CHECK_FAIL(!attempts))
goto err;
sleep(1);
}
}
err:
for (i = 0; i < ARRAY_SIZE(allow_prog); i++)
if (allow_prog[i] >= 0)
close(allow_prog[i]);
if (cg)
close(cg);
free(ptr);
cleanup_cgroup_environment();
}

View File

@ -0,0 +1,285 @@
// SPDX-License-Identifier: GPL-2.0
#include <test_progs.h>
#include "cgroup_helpers.h"
#define PING_CMD "ping -q -c1 -w1 127.0.0.1 > /dev/null"
char bpf_log_buf[BPF_LOG_BUF_SIZE];
static int map_fd = -1;
static int prog_load_cnt(int verdict, int val)
{
int cgroup_storage_fd, percpu_cgroup_storage_fd;
if (map_fd < 0)
map_fd = bpf_create_map(BPF_MAP_TYPE_ARRAY, 4, 8, 1, 0);
if (map_fd < 0) {
printf("failed to create map '%s'\n", strerror(errno));
return -1;
}
cgroup_storage_fd = bpf_create_map(BPF_MAP_TYPE_CGROUP_STORAGE,
sizeof(struct bpf_cgroup_storage_key), 8, 0, 0);
if (cgroup_storage_fd < 0) {
printf("failed to create map '%s'\n", strerror(errno));
return -1;
}
percpu_cgroup_storage_fd = bpf_create_map(
BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE,
sizeof(struct bpf_cgroup_storage_key), 8, 0, 0);
if (percpu_cgroup_storage_fd < 0) {
printf("failed to create map '%s'\n", strerror(errno));
return -1;
}
struct bpf_insn prog[] = {
BPF_MOV32_IMM(BPF_REG_0, 0),
BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_0, -4), /* *(u32 *)(fp - 4) = r0 */
BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), /* r2 = fp - 4 */
BPF_LD_MAP_FD(BPF_REG_1, map_fd),
BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2),
BPF_MOV64_IMM(BPF_REG_1, val), /* r1 = 1 */
BPF_RAW_INSN(BPF_STX | BPF_XADD | BPF_DW, BPF_REG_0, BPF_REG_1, 0, 0), /* xadd r0 += r1 */
BPF_LD_MAP_FD(BPF_REG_1, cgroup_storage_fd),
BPF_MOV64_IMM(BPF_REG_2, 0),
BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_local_storage),
BPF_MOV64_IMM(BPF_REG_1, val),
BPF_RAW_INSN(BPF_STX | BPF_XADD | BPF_W, BPF_REG_0, BPF_REG_1, 0, 0),
BPF_LD_MAP_FD(BPF_REG_1, percpu_cgroup_storage_fd),
BPF_MOV64_IMM(BPF_REG_2, 0),
BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_local_storage),
BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_0, 0),
BPF_ALU64_IMM(BPF_ADD, BPF_REG_3, 0x1),
BPF_STX_MEM(BPF_W, BPF_REG_0, BPF_REG_3, 0),
BPF_MOV64_IMM(BPF_REG_0, verdict), /* r0 = verdict */
BPF_EXIT_INSN(),
};
size_t insns_cnt = sizeof(prog) / sizeof(struct bpf_insn);
int ret;
ret = bpf_load_program(BPF_PROG_TYPE_CGROUP_SKB,
prog, insns_cnt, "GPL", 0,
bpf_log_buf, BPF_LOG_BUF_SIZE);
close(cgroup_storage_fd);
return ret;
}
void test_cgroup_attach_multi(void)
{
__u32 prog_ids[4], prog_cnt = 0, attach_flags, saved_prog_id;
int cg1 = 0, cg2 = 0, cg3 = 0, cg4 = 0, cg5 = 0, key = 0;
DECLARE_LIBBPF_OPTS(bpf_prog_attach_opts, attach_opts);
int allow_prog[7] = {-1};
unsigned long long value;
__u32 duration = 0;
int i = 0;
for (i = 0; i < ARRAY_SIZE(allow_prog); i++) {
allow_prog[i] = prog_load_cnt(1, 1 << i);
if (CHECK(allow_prog[i] < 0, "prog_load",
"verifier output:\n%s\n-------\n", bpf_log_buf))
goto err;
}
if (CHECK_FAIL(setup_cgroup_environment()))
goto err;
cg1 = create_and_get_cgroup("/cg1");
if (CHECK_FAIL(cg1 < 0))
goto err;
cg2 = create_and_get_cgroup("/cg1/cg2");
if (CHECK_FAIL(cg2 < 0))
goto err;
cg3 = create_and_get_cgroup("/cg1/cg2/cg3");
if (CHECK_FAIL(cg3 < 0))
goto err;
cg4 = create_and_get_cgroup("/cg1/cg2/cg3/cg4");
if (CHECK_FAIL(cg4 < 0))
goto err;
cg5 = create_and_get_cgroup("/cg1/cg2/cg3/cg4/cg5");
if (CHECK_FAIL(cg5 < 0))
goto err;
if (CHECK_FAIL(join_cgroup("/cg1/cg2/cg3/cg4/cg5")))
goto err;
if (CHECK(bpf_prog_attach(allow_prog[0], cg1, BPF_CGROUP_INET_EGRESS,
BPF_F_ALLOW_MULTI),
"prog0_attach_to_cg1_multi", "errno=%d\n", errno))
goto err;
if (CHECK(!bpf_prog_attach(allow_prog[0], cg1, BPF_CGROUP_INET_EGRESS,
BPF_F_ALLOW_MULTI),
"fail_same_prog_attach_to_cg1", "unexpected success\n"))
goto err;
if (CHECK(bpf_prog_attach(allow_prog[1], cg1, BPF_CGROUP_INET_EGRESS,
BPF_F_ALLOW_MULTI),
"prog1_attach_to_cg1_multi", "errno=%d\n", errno))
goto err;
if (CHECK(bpf_prog_attach(allow_prog[2], cg2, BPF_CGROUP_INET_EGRESS,
BPF_F_ALLOW_OVERRIDE),
"prog2_attach_to_cg2_override", "errno=%d\n", errno))
goto err;
if (CHECK(bpf_prog_attach(allow_prog[3], cg3, BPF_CGROUP_INET_EGRESS,
BPF_F_ALLOW_MULTI),
"prog3_attach_to_cg3_multi", "errno=%d\n", errno))
goto err;
if (CHECK(bpf_prog_attach(allow_prog[4], cg4, BPF_CGROUP_INET_EGRESS,
BPF_F_ALLOW_OVERRIDE),
"prog4_attach_to_cg4_override", "errno=%d\n", errno))
goto err;
if (CHECK(bpf_prog_attach(allow_prog[5], cg5, BPF_CGROUP_INET_EGRESS, 0),
"prog5_attach_to_cg5_none", "errno=%d\n", errno))
goto err;
CHECK_FAIL(system(PING_CMD));
CHECK_FAIL(bpf_map_lookup_elem(map_fd, &key, &value));
CHECK_FAIL(value != 1 + 2 + 8 + 32);
/* query the number of effective progs in cg5 */
CHECK_FAIL(bpf_prog_query(cg5, BPF_CGROUP_INET_EGRESS,
BPF_F_QUERY_EFFECTIVE, NULL, NULL, &prog_cnt));
CHECK_FAIL(prog_cnt != 4);
/* retrieve prog_ids of effective progs in cg5 */
CHECK_FAIL(bpf_prog_query(cg5, BPF_CGROUP_INET_EGRESS,
BPF_F_QUERY_EFFECTIVE, &attach_flags,
prog_ids, &prog_cnt));
CHECK_FAIL(prog_cnt != 4);
CHECK_FAIL(attach_flags != 0);
saved_prog_id = prog_ids[0];
/* check enospc handling */
prog_ids[0] = 0;
prog_cnt = 2;
CHECK_FAIL(bpf_prog_query(cg5, BPF_CGROUP_INET_EGRESS,
BPF_F_QUERY_EFFECTIVE, &attach_flags,
prog_ids, &prog_cnt) != -1);
CHECK_FAIL(errno != ENOSPC);
CHECK_FAIL(prog_cnt != 4);
/* check that prog_ids are returned even when buffer is too small */
CHECK_FAIL(prog_ids[0] != saved_prog_id);
/* retrieve prog_id of single attached prog in cg5 */
prog_ids[0] = 0;
CHECK_FAIL(bpf_prog_query(cg5, BPF_CGROUP_INET_EGRESS, 0, NULL,
prog_ids, &prog_cnt));
CHECK_FAIL(prog_cnt != 1);
CHECK_FAIL(prog_ids[0] != saved_prog_id);
/* detach bottom program and ping again */
if (CHECK(bpf_prog_detach2(-1, cg5, BPF_CGROUP_INET_EGRESS),
"prog_detach_from_cg5", "errno=%d\n", errno))
goto err;
value = 0;
CHECK_FAIL(bpf_map_update_elem(map_fd, &key, &value, 0));
CHECK_FAIL(system(PING_CMD));
CHECK_FAIL(bpf_map_lookup_elem(map_fd, &key, &value));
CHECK_FAIL(value != 1 + 2 + 8 + 16);
/* test replace */
attach_opts.flags = BPF_F_ALLOW_OVERRIDE | BPF_F_REPLACE;
attach_opts.replace_prog_fd = allow_prog[0];
if (CHECK(!bpf_prog_attach_xattr(allow_prog[6], cg1,
BPF_CGROUP_INET_EGRESS, &attach_opts),
"fail_prog_replace_override", "unexpected success\n"))
goto err;
CHECK_FAIL(errno != EINVAL);
attach_opts.flags = BPF_F_REPLACE;
if (CHECK(!bpf_prog_attach_xattr(allow_prog[6], cg1,
BPF_CGROUP_INET_EGRESS, &attach_opts),
"fail_prog_replace_no_multi", "unexpected success\n"))
goto err;
CHECK_FAIL(errno != EINVAL);
attach_opts.flags = BPF_F_ALLOW_MULTI | BPF_F_REPLACE;
attach_opts.replace_prog_fd = -1;
if (CHECK(!bpf_prog_attach_xattr(allow_prog[6], cg1,
BPF_CGROUP_INET_EGRESS, &attach_opts),
"fail_prog_replace_bad_fd", "unexpected success\n"))
goto err;
CHECK_FAIL(errno != EBADF);
/* replacing a program that is not attached to cgroup should fail */
attach_opts.replace_prog_fd = allow_prog[3];
if (CHECK(!bpf_prog_attach_xattr(allow_prog[6], cg1,
BPF_CGROUP_INET_EGRESS, &attach_opts),
"fail_prog_replace_no_ent", "unexpected success\n"))
goto err;
CHECK_FAIL(errno != ENOENT);
/* replace 1st from the top program */
attach_opts.replace_prog_fd = allow_prog[0];
if (CHECK(bpf_prog_attach_xattr(allow_prog[6], cg1,
BPF_CGROUP_INET_EGRESS, &attach_opts),
"prog_replace", "errno=%d\n", errno))
goto err;
value = 0;
CHECK_FAIL(bpf_map_update_elem(map_fd, &key, &value, 0));
CHECK_FAIL(system(PING_CMD));
CHECK_FAIL(bpf_map_lookup_elem(map_fd, &key, &value));
CHECK_FAIL(value != 64 + 2 + 8 + 16);
/* detach 3rd from bottom program and ping again */
if (CHECK(!bpf_prog_detach2(0, cg3, BPF_CGROUP_INET_EGRESS),
"fail_prog_detach_from_cg3", "unexpected success\n"))
goto err;
if (CHECK(bpf_prog_detach2(allow_prog[3], cg3, BPF_CGROUP_INET_EGRESS),
"prog3_detach_from_cg3", "errno=%d\n", errno))
goto err;
value = 0;
CHECK_FAIL(bpf_map_update_elem(map_fd, &key, &value, 0));
CHECK_FAIL(system(PING_CMD));
CHECK_FAIL(bpf_map_lookup_elem(map_fd, &key, &value));
CHECK_FAIL(value != 64 + 2 + 16);
/* detach 2nd from bottom program and ping again */
if (CHECK(bpf_prog_detach2(-1, cg4, BPF_CGROUP_INET_EGRESS),
"prog_detach_from_cg4", "errno=%d\n", errno))
goto err;
value = 0;
CHECK_FAIL(bpf_map_update_elem(map_fd, &key, &value, 0));
CHECK_FAIL(system(PING_CMD));
CHECK_FAIL(bpf_map_lookup_elem(map_fd, &key, &value));
CHECK_FAIL(value != 64 + 2 + 4);
prog_cnt = 4;
CHECK_FAIL(bpf_prog_query(cg5, BPF_CGROUP_INET_EGRESS,
BPF_F_QUERY_EFFECTIVE, &attach_flags,
prog_ids, &prog_cnt));
CHECK_FAIL(prog_cnt != 3);
CHECK_FAIL(attach_flags != 0);
CHECK_FAIL(bpf_prog_query(cg5, BPF_CGROUP_INET_EGRESS, 0, NULL,
prog_ids, &prog_cnt));
CHECK_FAIL(prog_cnt != 0);
err:
for (i = 0; i < ARRAY_SIZE(allow_prog); i++)
if (allow_prog[i] >= 0)
close(allow_prog[i]);
close(cg1);
close(cg2);
close(cg3);
close(cg4);
close(cg5);
cleanup_cgroup_environment();
}

View File

@ -0,0 +1,148 @@
// SPDX-License-Identifier: GPL-2.0
#include <test_progs.h>
#include "cgroup_helpers.h"
#define FOO "/foo"
#define BAR "/foo/bar/"
#define PING_CMD "ping -q -c1 -w1 127.0.0.1 > /dev/null"
char bpf_log_buf[BPF_LOG_BUF_SIZE];
static int prog_load(int verdict)
{
struct bpf_insn prog[] = {
BPF_MOV64_IMM(BPF_REG_0, verdict), /* r0 = verdict */
BPF_EXIT_INSN(),
};
size_t insns_cnt = sizeof(prog) / sizeof(struct bpf_insn);
return bpf_load_program(BPF_PROG_TYPE_CGROUP_SKB,
prog, insns_cnt, "GPL", 0,
bpf_log_buf, BPF_LOG_BUF_SIZE);
}
void test_cgroup_attach_override(void)
{
int drop_prog = -1, allow_prog = -1, foo = -1, bar = -1;
__u32 duration = 0;
allow_prog = prog_load(1);
if (CHECK(allow_prog < 0, "prog_load_allow",
"verifier output:\n%s\n-------\n", bpf_log_buf))
goto err;
drop_prog = prog_load(0);
if (CHECK(drop_prog < 0, "prog_load_drop",
"verifier output:\n%s\n-------\n", bpf_log_buf))
goto err;
foo = test__join_cgroup(FOO);
if (CHECK(foo < 0, "cgroup_join_foo", "cgroup setup failed\n"))
goto err;
if (CHECK(bpf_prog_attach(drop_prog, foo, BPF_CGROUP_INET_EGRESS,
BPF_F_ALLOW_OVERRIDE),
"prog_attach_drop_foo_override",
"attach prog to %s failed, errno=%d\n", FOO, errno))
goto err;
if (CHECK(!system(PING_CMD), "ping_fail",
"ping unexpectedly succeeded\n"))
goto err;
bar = test__join_cgroup(BAR);
if (CHECK(bar < 0, "cgroup_join_bar", "cgroup setup failed\n"))
goto err;
if (CHECK(!system(PING_CMD), "ping_fail",
"ping unexpectedly succeeded\n"))
goto err;
if (CHECK(bpf_prog_attach(allow_prog, bar, BPF_CGROUP_INET_EGRESS,
BPF_F_ALLOW_OVERRIDE),
"prog_attach_allow_bar_override",
"attach prog to %s failed, errno=%d\n", BAR, errno))
goto err;
if (CHECK(system(PING_CMD), "ping_ok", "ping failed\n"))
goto err;
if (CHECK(bpf_prog_detach(bar, BPF_CGROUP_INET_EGRESS),
"prog_detach_bar",
"detach prog from %s failed, errno=%d\n", BAR, errno))
goto err;
if (CHECK(!system(PING_CMD), "ping_fail",
"ping unexpectedly succeeded\n"))
goto err;
if (CHECK(bpf_prog_attach(allow_prog, bar, BPF_CGROUP_INET_EGRESS,
BPF_F_ALLOW_OVERRIDE),
"prog_attach_allow_bar_override",
"attach prog to %s failed, errno=%d\n", BAR, errno))
goto err;
if (CHECK(bpf_prog_detach(foo, BPF_CGROUP_INET_EGRESS),
"prog_detach_foo",
"detach prog from %s failed, errno=%d\n", FOO, errno))
goto err;
if (CHECK(system(PING_CMD), "ping_ok", "ping failed\n"))
goto err;
if (CHECK(bpf_prog_attach(allow_prog, bar, BPF_CGROUP_INET_EGRESS,
BPF_F_ALLOW_OVERRIDE),
"prog_attach_allow_bar_override",
"attach prog to %s failed, errno=%d\n", BAR, errno))
goto err;
if (CHECK(!bpf_prog_attach(allow_prog, bar, BPF_CGROUP_INET_EGRESS, 0),
"fail_prog_attach_allow_bar_none",
"attach prog to %s unexpectedly succeeded\n", BAR))
goto err;
if (CHECK(bpf_prog_detach(bar, BPF_CGROUP_INET_EGRESS),
"prog_detach_bar",
"detach prog from %s failed, errno=%d\n", BAR, errno))
goto err;
if (CHECK(!bpf_prog_detach(foo, BPF_CGROUP_INET_EGRESS),
"fail_prog_detach_foo",
"double detach from %s unexpectedly succeeded\n", FOO))
goto err;
if (CHECK(bpf_prog_attach(allow_prog, foo, BPF_CGROUP_INET_EGRESS, 0),
"prog_attach_allow_foo_none",
"attach prog to %s failed, errno=%d\n", FOO, errno))
goto err;
if (CHECK(!bpf_prog_attach(allow_prog, bar, BPF_CGROUP_INET_EGRESS, 0),
"fail_prog_attach_allow_bar_none",
"attach prog to %s unexpectedly succeeded\n", BAR))
goto err;
if (CHECK(!bpf_prog_attach(allow_prog, bar, BPF_CGROUP_INET_EGRESS,
BPF_F_ALLOW_OVERRIDE),
"fail_prog_attach_allow_bar_override",
"attach prog to %s unexpectedly succeeded\n", BAR))
goto err;
if (CHECK(!bpf_prog_attach(allow_prog, foo, BPF_CGROUP_INET_EGRESS,
BPF_F_ALLOW_OVERRIDE),
"fail_prog_attach_allow_foo_override",
"attach prog to %s unexpectedly succeeded\n", FOO))
goto err;
if (CHECK(bpf_prog_attach(drop_prog, foo, BPF_CGROUP_INET_EGRESS, 0),
"prog_attach_drop_foo_none",
"attach prog to %s failed, errno=%d\n", FOO, errno))
goto err;
err:
close(foo);
close(bar);
close(allow_prog);
close(drop_prog);
}

View File

@ -0,0 +1,169 @@
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2019 Facebook */
#include <test_progs.h>
#include <sys/mman.h>
#include <sys/utsname.h>
#include <linux/version.h>
#include "test_core_extern.skel.h"
static uint32_t get_kernel_version(void)
{
uint32_t major, minor, patch;
struct utsname info;
uname(&info);
if (sscanf(info.release, "%u.%u.%u", &major, &minor, &patch) != 3)
return 0;
return KERNEL_VERSION(major, minor, patch);
}
#define CFG "CONFIG_BPF_SYSCALL=n\n"
static struct test_case {
const char *name;
const char *cfg;
bool fails;
struct test_core_extern__data data;
} test_cases[] = {
{ .name = "default search path", .data = { .bpf_syscall = true } },
{
.name = "custom values",
.cfg = "CONFIG_BPF_SYSCALL=n\n"
"CONFIG_TRISTATE=m\n"
"CONFIG_BOOL=y\n"
"CONFIG_CHAR=100\n"
"CONFIG_USHORT=30000\n"
"CONFIG_INT=123456\n"
"CONFIG_ULONG=0xDEADBEEFC0DE\n"
"CONFIG_STR=\"abracad\"\n"
"CONFIG_MISSING=0",
.data = {
.bpf_syscall = false,
.tristate_val = TRI_MODULE,
.bool_val = true,
.char_val = 100,
.ushort_val = 30000,
.int_val = 123456,
.ulong_val = 0xDEADBEEFC0DE,
.str_val = "abracad",
},
},
/* TRISTATE */
{ .name = "tristate (y)", .cfg = CFG"CONFIG_TRISTATE=y\n",
.data = { .tristate_val = TRI_YES } },
{ .name = "tristate (n)", .cfg = CFG"CONFIG_TRISTATE=n\n",
.data = { .tristate_val = TRI_NO } },
{ .name = "tristate (m)", .cfg = CFG"CONFIG_TRISTATE=m\n",
.data = { .tristate_val = TRI_MODULE } },
{ .name = "tristate (int)", .fails = 1, .cfg = CFG"CONFIG_TRISTATE=1" },
{ .name = "tristate (bad)", .fails = 1, .cfg = CFG"CONFIG_TRISTATE=M" },
/* BOOL */
{ .name = "bool (y)", .cfg = CFG"CONFIG_BOOL=y\n",
.data = { .bool_val = true } },
{ .name = "bool (n)", .cfg = CFG"CONFIG_BOOL=n\n",
.data = { .bool_val = false } },
{ .name = "bool (tristate)", .fails = 1, .cfg = CFG"CONFIG_BOOL=m" },
{ .name = "bool (int)", .fails = 1, .cfg = CFG"CONFIG_BOOL=1" },
/* CHAR */
{ .name = "char (tristate)", .cfg = CFG"CONFIG_CHAR=m\n",
.data = { .char_val = 'm' } },
{ .name = "char (bad)", .fails = 1, .cfg = CFG"CONFIG_CHAR=q\n" },
{ .name = "char (empty)", .fails = 1, .cfg = CFG"CONFIG_CHAR=\n" },
{ .name = "char (str)", .fails = 1, .cfg = CFG"CONFIG_CHAR=\"y\"\n" },
/* STRING */
{ .name = "str (empty)", .cfg = CFG"CONFIG_STR=\"\"\n",
.data = { .str_val = "\0\0\0\0\0\0\0" } },
{ .name = "str (padded)", .cfg = CFG"CONFIG_STR=\"abra\"\n",
.data = { .str_val = "abra\0\0\0" } },
{ .name = "str (too long)", .cfg = CFG"CONFIG_STR=\"abracada\"\n",
.data = { .str_val = "abracad" } },
{ .name = "str (no value)", .fails = 1, .cfg = CFG"CONFIG_STR=\n" },
{ .name = "str (bad value)", .fails = 1, .cfg = CFG"CONFIG_STR=bla\n" },
/* INTEGERS */
{
.name = "integer forms",
.cfg = CFG
"CONFIG_CHAR=0xA\n"
"CONFIG_USHORT=0462\n"
"CONFIG_INT=-100\n"
"CONFIG_ULONG=+1000000000000",
.data = {
.char_val = 0xA,
.ushort_val = 0462,
.int_val = -100,
.ulong_val = 1000000000000,
},
},
{ .name = "int (bad)", .fails = 1, .cfg = CFG"CONFIG_INT=abc" },
{ .name = "int (str)", .fails = 1, .cfg = CFG"CONFIG_INT=\"abc\"" },
{ .name = "int (empty)", .fails = 1, .cfg = CFG"CONFIG_INT=" },
{ .name = "int (mixed)", .fails = 1, .cfg = CFG"CONFIG_INT=123abc" },
{ .name = "int (max)", .cfg = CFG"CONFIG_INT=2147483647",
.data = { .int_val = 2147483647 } },
{ .name = "int (min)", .cfg = CFG"CONFIG_INT=-2147483648",
.data = { .int_val = -2147483648 } },
{ .name = "int (max+1)", .fails = 1, .cfg = CFG"CONFIG_INT=2147483648" },
{ .name = "int (min-1)", .fails = 1, .cfg = CFG"CONFIG_INT=-2147483649" },
{ .name = "ushort (max)", .cfg = CFG"CONFIG_USHORT=65535",
.data = { .ushort_val = 65535 } },
{ .name = "ushort (min)", .cfg = CFG"CONFIG_USHORT=0",
.data = { .ushort_val = 0 } },
{ .name = "ushort (max+1)", .fails = 1, .cfg = CFG"CONFIG_USHORT=65536" },
{ .name = "ushort (min-1)", .fails = 1, .cfg = CFG"CONFIG_USHORT=-1" },
{ .name = "u64 (max)", .cfg = CFG"CONFIG_ULONG=0xffffffffffffffff",
.data = { .ulong_val = 0xffffffffffffffff } },
{ .name = "u64 (min)", .cfg = CFG"CONFIG_ULONG=0",
.data = { .ulong_val = 0 } },
{ .name = "u64 (max+1)", .fails = 1, .cfg = CFG"CONFIG_ULONG=0x10000000000000000" },
};
void test_core_extern(void)
{
const uint32_t kern_ver = get_kernel_version();
int err, duration = 0, i, j;
struct test_core_extern *skel = NULL;
uint64_t *got, *exp;
int n = sizeof(*skel->data) / sizeof(uint64_t);
for (i = 0; i < ARRAY_SIZE(test_cases); i++) {
struct test_case *t = &test_cases[i];
DECLARE_LIBBPF_OPTS(bpf_object_open_opts, opts,
.kconfig = t->cfg,
);
if (!test__start_subtest(t->name))
continue;
skel = test_core_extern__open_opts(&opts);
if (CHECK(!skel, "skel_open", "skeleton open failed\n"))
goto cleanup;
err = test_core_extern__load(skel);
if (t->fails) {
CHECK(!err, "skel_load",
"shouldn't succeed open/load of skeleton\n");
goto cleanup;
} else if (CHECK(err, "skel_load",
"failed to open/load skeleton\n")) {
goto cleanup;
}
err = test_core_extern__attach(skel);
if (CHECK(err, "attach_raw_tp", "failed attach: %d\n", err))
goto cleanup;
usleep(1);
t->data.kern_ver = kern_ver;
t->data.missing_val = 0xDEADC0DE;
got = (uint64_t *)skel->data;
exp = (uint64_t *)&t->data;
for (j = 0; j < n; j++) {
CHECK(got[j] != exp[j], "check_res",
"result #%d: expected %lx, but got %lx\n",
j, exp[j], got[j]);
}
cleanup:
test_core_extern__destroy(skel);
skel = NULL;
}
}

View File

@ -74,6 +74,7 @@
.b123 = 2, \
.c1c = 3, \
.d00d = 4, \
.f10c = 0, \
}, \
.output_len = sizeof(struct core_reloc_arrays_output) \
}
@ -308,12 +309,15 @@ static struct core_reloc_test_case test_cases[] = {
ARRAYS_CASE(arrays),
ARRAYS_CASE(arrays___diff_arr_dim),
ARRAYS_CASE(arrays___diff_arr_val_sz),
ARRAYS_CASE(arrays___equiv_zero_sz_arr),
ARRAYS_CASE(arrays___fixed_arr),
ARRAYS_ERR_CASE(arrays___err_too_small),
ARRAYS_ERR_CASE(arrays___err_too_shallow),
ARRAYS_ERR_CASE(arrays___err_non_array),
ARRAYS_ERR_CASE(arrays___err_wrong_val_type1),
ARRAYS_ERR_CASE(arrays___err_wrong_val_type2),
ARRAYS_ERR_CASE(arrays___err_bad_zero_sz_arr),
/* enum/ptr/int handling scenarios */
PRIMITIVES_CASE(primitives),

View File

@ -0,0 +1,78 @@
// SPDX-License-Identifier: GPL-2.0
#include <test_progs.h>
#include <bpf/btf.h>
#include "libbpf_internal.h"
static int duration = 0;
static void validate_mask(int case_nr, const char *exp, bool *mask, int n)
{
int i;
for (i = 0; exp[i]; i++) {
if (exp[i] == '1') {
if (CHECK(i + 1 > n, "mask_short",
"case #%d: mask too short, got n=%d, need at least %d\n",
case_nr, n, i + 1))
return;
CHECK(!mask[i], "cpu_not_set",
"case #%d: mask differs, expected cpu#%d SET\n",
case_nr, i);
} else {
CHECK(i < n && mask[i], "cpu_set",
"case #%d: mask differs, expected cpu#%d UNSET\n",
case_nr, i);
}
}
CHECK(i < n, "mask_long",
"case #%d: mask too long, got n=%d, expected at most %d\n",
case_nr, n, i);
}
static struct {
const char *cpu_mask;
const char *expect;
bool fails;
} test_cases[] = {
{ "0\n", "1", false },
{ "0,2\n", "101", false },
{ "0-2\n", "111", false },
{ "0-2,3-4\n", "11111", false },
{ "0", "1", false },
{ "0-2", "111", false },
{ "0,2", "101", false },
{ "0,1-3", "1111", false },
{ "0,1,2,3", "1111", false },
{ "0,2-3,5", "101101", false },
{ "3-3", "0001", false },
{ "2-4,6,9-10", "00111010011", false },
/* failure cases */
{ "", "", true },
{ "0-", "", true },
{ "0 ", "", true },
{ "0_1", "", true },
{ "1-0", "", true },
{ "-1", "", true },
};
void test_cpu_mask()
{
int i, err, n;
bool *mask;
for (i = 0; i < ARRAY_SIZE(test_cases); i++) {
mask = NULL;
err = parse_cpu_mask_str(test_cases[i].cpu_mask, &mask, &n);
if (test_cases[i].fails) {
CHECK(!err, "should_fail",
"case #%d: parsing should fail!\n", i + 1);
} else {
if (CHECK(err, "parse_err",
"case #%d: cpu mask parsing failed: %d\n",
i + 1, err))
continue;
validate_mask(i + 1, test_cases[i].expect, mask, n);
}
free(mask);
}
}

View File

@ -1,90 +1,55 @@
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2019 Facebook */
#include <test_progs.h>
#include "test_pkt_access.skel.h"
#include "fentry_test.skel.h"
#include "fexit_test.skel.h"
void test_fentry_fexit(void)
{
struct bpf_prog_load_attr attr_fentry = {
.file = "./fentry_test.o",
};
struct bpf_prog_load_attr attr_fexit = {
.file = "./fexit_test.o",
};
struct test_pkt_access *pkt_skel = NULL;
struct fentry_test *fentry_skel = NULL;
struct fexit_test *fexit_skel = NULL;
__u64 *fentry_res, *fexit_res;
__u32 duration = 0, retval;
int err, pkt_fd, i;
struct bpf_object *obj_fentry = NULL, *obj_fexit = NULL, *pkt_obj;
struct bpf_map *data_map_fentry, *data_map_fexit;
char fentry_name[] = "fentry/bpf_fentry_testX";
char fexit_name[] = "fexit/bpf_fentry_testX";
int err, pkt_fd, kfree_skb_fd, i;
struct bpf_link *link[12] = {};
struct bpf_program *prog[12];
__u32 duration, retval;
const int zero = 0;
u64 result[12];
err = bpf_prog_load("./test_pkt_access.o", BPF_PROG_TYPE_SCHED_CLS,
&pkt_obj, &pkt_fd);
if (CHECK(err, "prog_load sched cls", "err %d errno %d\n", err, errno))
pkt_skel = test_pkt_access__open_and_load();
if (CHECK(!pkt_skel, "pkt_skel_load", "pkt_access skeleton failed\n"))
return;
err = bpf_prog_load_xattr(&attr_fentry, &obj_fentry, &kfree_skb_fd);
if (CHECK(err, "prog_load fail", "err %d errno %d\n", err, errno))
fentry_skel = fentry_test__open_and_load();
if (CHECK(!fentry_skel, "fentry_skel_load", "fentry skeleton failed\n"))
goto close_prog;
err = bpf_prog_load_xattr(&attr_fexit, &obj_fexit, &kfree_skb_fd);
if (CHECK(err, "prog_load fail", "err %d errno %d\n", err, errno))
fexit_skel = fexit_test__open_and_load();
if (CHECK(!fexit_skel, "fexit_skel_load", "fexit skeleton failed\n"))
goto close_prog;
for (i = 0; i < 6; i++) {
fentry_name[sizeof(fentry_name) - 2] = '1' + i;
prog[i] = bpf_object__find_program_by_title(obj_fentry, fentry_name);
if (CHECK(!prog[i], "find_prog", "prog %s not found\n", fentry_name))
goto close_prog;
link[i] = bpf_program__attach_trace(prog[i]);
if (CHECK(IS_ERR(link[i]), "attach_trace", "failed to link\n"))
goto close_prog;
}
data_map_fentry = bpf_object__find_map_by_name(obj_fentry, "fentry_t.bss");
if (CHECK(!data_map_fentry, "find_data_map", "data map not found\n"))
goto close_prog;
for (i = 6; i < 12; i++) {
fexit_name[sizeof(fexit_name) - 2] = '1' + i - 6;
prog[i] = bpf_object__find_program_by_title(obj_fexit, fexit_name);
if (CHECK(!prog[i], "find_prog", "prog %s not found\n", fexit_name))
goto close_prog;
link[i] = bpf_program__attach_trace(prog[i]);
if (CHECK(IS_ERR(link[i]), "attach_trace", "failed to link\n"))
goto close_prog;
}
data_map_fexit = bpf_object__find_map_by_name(obj_fexit, "fexit_te.bss");
if (CHECK(!data_map_fexit, "find_data_map", "data map not found\n"))
err = fentry_test__attach(fentry_skel);
if (CHECK(err, "fentry_attach", "fentry attach failed: %d\n", err))
goto close_prog;
err = fexit_test__attach(fexit_skel);
if (CHECK(err, "fexit_attach", "fexit attach failed: %d\n", err))
goto close_prog;
pkt_fd = bpf_program__fd(pkt_skel->progs.test_pkt_access);
err = bpf_prog_test_run(pkt_fd, 1, &pkt_v6, sizeof(pkt_v6),
NULL, NULL, &retval, &duration);
CHECK(err || retval, "ipv6",
"err %d errno %d retval %d duration %d\n",
err, errno, retval, duration);
err = bpf_map_lookup_elem(bpf_map__fd(data_map_fentry), &zero, &result);
if (CHECK(err, "get_result",
"failed to get output data: %d\n", err))
goto close_prog;
err = bpf_map_lookup_elem(bpf_map__fd(data_map_fexit), &zero, result + 6);
if (CHECK(err, "get_result",
"failed to get output data: %d\n", err))
goto close_prog;
for (i = 0; i < 12; i++)
if (CHECK(result[i] != 1, "result", "bpf_fentry_test%d failed err %ld\n",
i % 6 + 1, result[i]))
goto close_prog;
fentry_res = (__u64 *)fentry_skel->bss;
fexit_res = (__u64 *)fexit_skel->bss;
printf("%lld\n", fentry_skel->bss->test1_result);
for (i = 0; i < 6; i++) {
CHECK(fentry_res[i] != 1, "result",
"fentry_test%d failed err %lld\n", i + 1, fentry_res[i]);
CHECK(fexit_res[i] != 1, "result",
"fexit_test%d failed err %lld\n", i + 1, fexit_res[i]);
}
close_prog:
for (i = 0; i < 12; i++)
if (!IS_ERR_OR_NULL(link[i]))
bpf_link__destroy(link[i]);
bpf_object__close(obj_fentry);
bpf_object__close(obj_fexit);
bpf_object__close(pkt_obj);
test_pkt_access__destroy(pkt_skel);
fentry_test__destroy(fentry_skel);
fexit_test__destroy(fexit_skel);
}

View File

@ -1,64 +1,43 @@
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2019 Facebook */
#include <test_progs.h>
#include "test_pkt_access.skel.h"
#include "fentry_test.skel.h"
void test_fentry_test(void)
{
struct bpf_prog_load_attr attr = {
.file = "./fentry_test.o",
};
char prog_name[] = "fentry/bpf_fentry_testX";
struct bpf_object *obj = NULL, *pkt_obj;
int err, pkt_fd, kfree_skb_fd, i;
struct bpf_link *link[6] = {};
struct bpf_program *prog[6];
struct test_pkt_access *pkt_skel = NULL;
struct fentry_test *fentry_skel = NULL;
int err, pkt_fd, i;
__u32 duration, retval;
struct bpf_map *data_map;
const int zero = 0;
u64 result[6];
__u64 *result;
err = bpf_prog_load("./test_pkt_access.o", BPF_PROG_TYPE_SCHED_CLS,
&pkt_obj, &pkt_fd);
if (CHECK(err, "prog_load sched cls", "err %d errno %d\n", err, errno))
pkt_skel = test_pkt_access__open_and_load();
if (CHECK(!pkt_skel, "pkt_skel_load", "pkt_access skeleton failed\n"))
return;
err = bpf_prog_load_xattr(&attr, &obj, &kfree_skb_fd);
if (CHECK(err, "prog_load fail", "err %d errno %d\n", err, errno))
goto close_prog;
fentry_skel = fentry_test__open_and_load();
if (CHECK(!fentry_skel, "fentry_skel_load", "fentry skeleton failed\n"))
goto cleanup;
for (i = 0; i < 6; i++) {
prog_name[sizeof(prog_name) - 2] = '1' + i;
prog[i] = bpf_object__find_program_by_title(obj, prog_name);
if (CHECK(!prog[i], "find_prog", "prog %s not found\n", prog_name))
goto close_prog;
link[i] = bpf_program__attach_trace(prog[i]);
if (CHECK(IS_ERR(link[i]), "attach_trace", "failed to link\n"))
goto close_prog;
}
data_map = bpf_object__find_map_by_name(obj, "fentry_t.bss");
if (CHECK(!data_map, "find_data_map", "data map not found\n"))
goto close_prog;
err = fentry_test__attach(fentry_skel);
if (CHECK(err, "fentry_attach", "fentry attach failed: %d\n", err))
goto cleanup;
pkt_fd = bpf_program__fd(pkt_skel->progs.test_pkt_access);
err = bpf_prog_test_run(pkt_fd, 1, &pkt_v6, sizeof(pkt_v6),
NULL, NULL, &retval, &duration);
CHECK(err || retval, "ipv6",
"err %d errno %d retval %d duration %d\n",
err, errno, retval, duration);
err = bpf_map_lookup_elem(bpf_map__fd(data_map), &zero, &result);
if (CHECK(err, "get_result",
"failed to get output data: %d\n", err))
goto close_prog;
result = (__u64 *)fentry_skel->bss;
for (i = 0; i < 6; i++) {
if (CHECK(result[i] != 1, "result",
"fentry_test%d failed err %lld\n", i + 1, result[i]))
goto cleanup;
}
for (i = 0; i < 6; i++)
if (CHECK(result[i] != 1, "result", "bpf_fentry_test%d failed err %ld\n",
i + 1, result[i]))
goto close_prog;
close_prog:
for (i = 0; i < 6; i++)
if (!IS_ERR_OR_NULL(link[i]))
bpf_link__destroy(link[i]);
bpf_object__close(obj);
bpf_object__close(pkt_obj);
cleanup:
fentry_test__destroy(fentry_skel);
test_pkt_access__destroy(pkt_skel);
}

View File

@ -1,16 +1,12 @@
// SPDX-License-Identifier: GPL-2.0
#include <test_progs.h>
#include <sys/mman.h>
#include "test_mmap.skel.h"
struct map_data {
__u64 val[512 * 4];
};
struct bss_data {
__u64 in_val;
__u64 out_val;
};
static size_t roundup_page(size_t sz)
{
long page_size = sysconf(_SC_PAGE_SIZE);
@ -19,41 +15,25 @@ static size_t roundup_page(size_t sz)
void test_mmap(void)
{
const char *file = "test_mmap.o";
const char *probe_name = "raw_tracepoint/sys_enter";
const char *tp_name = "sys_enter";
const size_t bss_sz = roundup_page(sizeof(struct bss_data));
const size_t bss_sz = roundup_page(sizeof(struct test_mmap__bss));
const size_t map_sz = roundup_page(sizeof(struct map_data));
const int zero = 0, one = 1, two = 2, far = 1500;
const long page_size = sysconf(_SC_PAGE_SIZE);
int err, duration = 0, i, data_map_fd;
struct bpf_program *prog;
struct bpf_object *obj;
struct bpf_link *link = NULL;
struct bpf_map *data_map, *bss_map;
void *bss_mmaped = NULL, *map_mmaped = NULL, *tmp1, *tmp2;
volatile struct bss_data *bss_data;
volatile struct map_data *map_data;
struct test_mmap__bss *bss_data;
struct map_data *map_data;
struct test_mmap *skel;
__u64 val = 0;
obj = bpf_object__open_file("test_mmap.o", NULL);
if (CHECK(IS_ERR(obj), "obj_open", "failed to open '%s': %ld\n",
file, PTR_ERR(obj)))
return;
prog = bpf_object__find_program_by_title(obj, probe_name);
if (CHECK(!prog, "find_probe", "prog '%s' not found\n", probe_name))
goto cleanup;
err = bpf_object__load(obj);
if (CHECK(err, "obj_load", "failed to load prog '%s': %d\n",
probe_name, err))
goto cleanup;
bss_map = bpf_object__find_map_by_name(obj, "test_mma.bss");
if (CHECK(!bss_map, "find_bss_map", ".bss map not found\n"))
goto cleanup;
data_map = bpf_object__find_map_by_name(obj, "data_map");
if (CHECK(!data_map, "find_data_map", "data_map map not found\n"))
goto cleanup;
skel = test_mmap__open_and_load();
if (CHECK(!skel, "skel_open_and_load", "skeleton open/load failed\n"))
return;
bss_map = skel->maps.bss;
data_map = skel->maps.data_map;
data_map_fd = bpf_map__fd(data_map);
bss_mmaped = mmap(NULL, bss_sz, PROT_READ | PROT_WRITE, MAP_SHARED,
@ -77,13 +57,15 @@ void test_mmap(void)
CHECK_FAIL(bss_data->in_val);
CHECK_FAIL(bss_data->out_val);
CHECK_FAIL(skel->bss->in_val);
CHECK_FAIL(skel->bss->out_val);
CHECK_FAIL(map_data->val[0]);
CHECK_FAIL(map_data->val[1]);
CHECK_FAIL(map_data->val[2]);
CHECK_FAIL(map_data->val[far]);
link = bpf_program__attach_raw_tracepoint(prog, tp_name);
if (CHECK(IS_ERR(link), "attach_raw_tp", "err %ld\n", PTR_ERR(link)))
err = test_mmap__attach(skel);
if (CHECK(err, "attach_raw_tp", "err %d\n", err))
goto cleanup;
bss_data->in_val = 123;
@ -94,6 +76,8 @@ void test_mmap(void)
CHECK_FAIL(bss_data->in_val != 123);
CHECK_FAIL(bss_data->out_val != 123);
CHECK_FAIL(skel->bss->in_val != 123);
CHECK_FAIL(skel->bss->out_val != 123);
CHECK_FAIL(map_data->val[0] != 111);
CHECK_FAIL(map_data->val[1] != 222);
CHECK_FAIL(map_data->val[2] != 123);
@ -160,6 +144,8 @@ void test_mmap(void)
usleep(1);
CHECK_FAIL(bss_data->in_val != 321);
CHECK_FAIL(bss_data->out_val != 321);
CHECK_FAIL(skel->bss->in_val != 321);
CHECK_FAIL(skel->bss->out_val != 321);
CHECK_FAIL(map_data->val[0] != 111);
CHECK_FAIL(map_data->val[1] != 222);
CHECK_FAIL(map_data->val[2] != 321);
@ -203,6 +189,8 @@ void test_mmap(void)
map_data = tmp2;
CHECK_FAIL(bss_data->in_val != 321);
CHECK_FAIL(bss_data->out_val != 321);
CHECK_FAIL(skel->bss->in_val != 321);
CHECK_FAIL(skel->bss->out_val != 321);
CHECK_FAIL(map_data->val[0] != 111);
CHECK_FAIL(map_data->val[1] != 222);
CHECK_FAIL(map_data->val[2] != 321);
@ -214,7 +202,5 @@ void test_mmap(void)
CHECK_FAIL(munmap(bss_mmaped, bss_sz));
if (map_mmaped)
CHECK_FAIL(munmap(map_mmaped, map_sz));
if (!IS_ERR_OR_NULL(link))
bpf_link__destroy(link);
bpf_object__close(obj);
test_mmap__destroy(skel);
}

View File

@ -4,6 +4,7 @@
#include <sched.h>
#include <sys/socket.h>
#include <test_progs.h>
#include "libbpf_internal.h"
static void on_sample(void *ctx, int cpu, void *data, __u32 size)
{
@ -19,7 +20,7 @@ static void on_sample(void *ctx, int cpu, void *data, __u32 size)
void test_perf_buffer(void)
{
int err, prog_fd, nr_cpus, i, duration = 0;
int err, prog_fd, on_len, nr_on_cpus = 0, nr_cpus, i, duration = 0;
const char *prog_name = "kprobe/sys_nanosleep";
const char *file = "./test_perf_buffer.o";
struct perf_buffer_opts pb_opts = {};
@ -29,15 +30,27 @@ void test_perf_buffer(void)
struct bpf_object *obj;
struct perf_buffer *pb;
struct bpf_link *link;
bool *online;
nr_cpus = libbpf_num_possible_cpus();
if (CHECK(nr_cpus < 0, "nr_cpus", "err %d\n", nr_cpus))
return;
err = parse_cpu_mask_file("/sys/devices/system/cpu/online",
&online, &on_len);
if (CHECK(err, "nr_on_cpus", "err %d\n", err))
return;
for (i = 0; i < on_len; i++)
if (online[i])
nr_on_cpus++;
/* load program */
err = bpf_prog_load(file, BPF_PROG_TYPE_KPROBE, &obj, &prog_fd);
if (CHECK(err, "obj_load", "err %d errno %d\n", err, errno))
return;
if (CHECK(err, "obj_load", "err %d errno %d\n", err, errno)) {
obj = NULL;
goto out_close;
}
prog = bpf_object__find_program_by_title(obj, prog_name);
if (CHECK(!prog, "find_probe", "prog '%s' not found\n", prog_name))
@ -64,6 +77,11 @@ void test_perf_buffer(void)
/* trigger kprobe on every CPU */
CPU_ZERO(&cpu_seen);
for (i = 0; i < nr_cpus; i++) {
if (i >= on_len || !online[i]) {
printf("skipping offline CPU #%d\n", i);
continue;
}
CPU_ZERO(&cpu_set);
CPU_SET(i, &cpu_set);
@ -81,8 +99,8 @@ void test_perf_buffer(void)
if (CHECK(err < 0, "perf_buffer__poll", "err %d\n", err))
goto out_free_pb;
if (CHECK(CPU_COUNT(&cpu_seen) != nr_cpus, "seen_cpu_cnt",
"expect %d, seen %d\n", nr_cpus, CPU_COUNT(&cpu_seen)))
if (CHECK(CPU_COUNT(&cpu_seen) != nr_on_cpus, "seen_cpu_cnt",
"expect %d, seen %d\n", nr_on_cpus, CPU_COUNT(&cpu_seen)))
goto out_free_pb;
out_free_pb:
@ -91,4 +109,5 @@ void test_perf_buffer(void)
bpf_link__destroy(link);
out_close:
bpf_object__close(obj);
free(online);
}

View File

@ -3,8 +3,7 @@
void test_probe_user(void)
{
#define kprobe_name "__sys_connect"
const char *prog_name = "kprobe/" kprobe_name;
const char *prog_name = "kprobe/__sys_connect";
const char *obj_file = "./test_probe_user.o";
DECLARE_LIBBPF_OPTS(bpf_object_open_opts, opts, );
int err, results_map_fd, sock_fd, duration = 0;
@ -33,8 +32,7 @@ void test_probe_user(void)
"err %d\n", results_map_fd))
goto cleanup;
kprobe_link = bpf_program__attach_kprobe(kprobe_prog, false,
kprobe_name);
kprobe_link = bpf_program__attach(kprobe_prog);
if (CHECK(IS_ERR(kprobe_link), "attach_kprobe",
"err %ld\n", PTR_ERR(kprobe_link))) {
kprobe_link = NULL;

View File

@ -16,14 +16,11 @@ struct rdonly_map_subtest {
void test_rdonly_maps(void)
{
const char *prog_name_skip_loop = "raw_tracepoint/sys_enter:skip_loop";
const char *prog_name_part_loop = "raw_tracepoint/sys_enter:part_loop";
const char *prog_name_full_loop = "raw_tracepoint/sys_enter:full_loop";
const char *file = "test_rdonly_maps.o";
struct rdonly_map_subtest subtests[] = {
{ "skip loop", prog_name_skip_loop, 0, 0 },
{ "part loop", prog_name_part_loop, 3, 2 + 3 + 4 },
{ "full loop", prog_name_full_loop, 4, 2 + 3 + 4 + 5 },
{ "skip loop", "skip_loop", 0, 0 },
{ "part loop", "part_loop", 3, 2 + 3 + 4 },
{ "full loop", "full_loop", 4, 2 + 3 + 4 + 5 },
};
int i, err, zero = 0, duration = 0;
struct bpf_link *link = NULL;
@ -50,7 +47,7 @@ void test_rdonly_maps(void)
if (!test__start_subtest(t->subtest_name))
continue;
prog = bpf_object__find_program_by_title(obj, t->prog_name);
prog = bpf_object__find_program_by_name(obj, t->prog_name);
if (CHECK(!prog, "find_prog", "prog '%s' not found\n",
t->prog_name))
goto cleanup;

View File

@ -20,8 +20,11 @@
#include <bpf/libbpf.h>
#include "bpf_rlimit.h"
#include "bpf_util.h"
#include "test_progs.h"
#include "test_select_reuseport_common.h"
#define MAX_TEST_NAME 80
#define MIN_TCPHDR_LEN 20
#define UDPHDR_LEN 8
@ -32,11 +35,11 @@
static int result_map, tmp_index_ovr_map, linum_map, data_check_map;
static enum result expected_results[NR_RESULTS];
static int sk_fds[REUSEPORT_ARRAY_SIZE];
static int reuseport_array, outer_map;
static int reuseport_array = -1, outer_map = -1;
static int select_by_skb_data_prog;
static int saved_tcp_syncookie;
static int saved_tcp_syncookie = -1;
static struct bpf_object *obj;
static int saved_tcp_fo;
static int saved_tcp_fo = -1;
static __u32 index_zero;
static int epfd;
@ -46,16 +49,21 @@ static union sa46 {
sa_family_t family;
} srv_sa;
#define CHECK(condition, tag, format...) ({ \
int __ret = !!(condition); \
if (__ret) { \
printf("%s(%d):FAIL:%s ", __func__, __LINE__, tag); \
printf(format); \
exit(-1); \
#define RET_IF(condition, tag, format...) ({ \
if (CHECK_FAIL(condition)) { \
printf(tag " " format); \
return; \
} \
})
static void create_maps(void)
#define RET_ERR(condition, tag, format...) ({ \
if (CHECK_FAIL(condition)) { \
printf(tag " " format); \
return -1; \
} \
})
static int create_maps(void)
{
struct bpf_create_map_attr attr = {};
@ -67,8 +75,8 @@ static void create_maps(void)
attr.max_entries = REUSEPORT_ARRAY_SIZE;
reuseport_array = bpf_create_map_xattr(&attr);
CHECK(reuseport_array == -1, "creating reuseport_array",
"reuseport_array:%d errno:%d\n", reuseport_array, errno);
RET_ERR(reuseport_array == -1, "creating reuseport_array",
"reuseport_array:%d errno:%d\n", reuseport_array, errno);
/* Creating outer_map */
attr.name = "outer_map";
@ -78,63 +86,61 @@ static void create_maps(void)
attr.max_entries = 1;
attr.inner_map_fd = reuseport_array;
outer_map = bpf_create_map_xattr(&attr);
CHECK(outer_map == -1, "creating outer_map",
"outer_map:%d errno:%d\n", outer_map, errno);
RET_ERR(outer_map == -1, "creating outer_map",
"outer_map:%d errno:%d\n", outer_map, errno);
return 0;
}
static void prepare_bpf_obj(void)
static int prepare_bpf_obj(void)
{
struct bpf_program *prog;
struct bpf_map *map;
int err;
struct bpf_object_open_attr attr = {
.file = "test_select_reuseport_kern.o",
.prog_type = BPF_PROG_TYPE_SK_REUSEPORT,
};
obj = bpf_object__open_xattr(&attr);
CHECK(IS_ERR_OR_NULL(obj), "open test_select_reuseport_kern.o",
"obj:%p PTR_ERR(obj):%ld\n", obj, PTR_ERR(obj));
prog = bpf_program__next(NULL, obj);
CHECK(!prog, "get first bpf_program", "!prog\n");
bpf_program__set_type(prog, attr.prog_type);
obj = bpf_object__open("test_select_reuseport_kern.o");
RET_ERR(IS_ERR_OR_NULL(obj), "open test_select_reuseport_kern.o",
"obj:%p PTR_ERR(obj):%ld\n", obj, PTR_ERR(obj));
map = bpf_object__find_map_by_name(obj, "outer_map");
CHECK(!map, "find outer_map", "!map\n");
RET_ERR(!map, "find outer_map", "!map\n");
err = bpf_map__reuse_fd(map, outer_map);
CHECK(err, "reuse outer_map", "err:%d\n", err);
RET_ERR(err, "reuse outer_map", "err:%d\n", err);
err = bpf_object__load(obj);
CHECK(err, "load bpf_object", "err:%d\n", err);
RET_ERR(err, "load bpf_object", "err:%d\n", err);
prog = bpf_program__next(NULL, obj);
RET_ERR(!prog, "get first bpf_program", "!prog\n");
select_by_skb_data_prog = bpf_program__fd(prog);
CHECK(select_by_skb_data_prog == -1, "get prog fd",
"select_by_skb_data_prog:%d\n", select_by_skb_data_prog);
RET_ERR(select_by_skb_data_prog == -1, "get prog fd",
"select_by_skb_data_prog:%d\n", select_by_skb_data_prog);
map = bpf_object__find_map_by_name(obj, "result_map");
CHECK(!map, "find result_map", "!map\n");
RET_ERR(!map, "find result_map", "!map\n");
result_map = bpf_map__fd(map);
CHECK(result_map == -1, "get result_map fd",
"result_map:%d\n", result_map);
RET_ERR(result_map == -1, "get result_map fd",
"result_map:%d\n", result_map);
map = bpf_object__find_map_by_name(obj, "tmp_index_ovr_map");
CHECK(!map, "find tmp_index_ovr_map", "!map\n");
RET_ERR(!map, "find tmp_index_ovr_map\n", "!map");
tmp_index_ovr_map = bpf_map__fd(map);
CHECK(tmp_index_ovr_map == -1, "get tmp_index_ovr_map fd",
"tmp_index_ovr_map:%d\n", tmp_index_ovr_map);
RET_ERR(tmp_index_ovr_map == -1, "get tmp_index_ovr_map fd",
"tmp_index_ovr_map:%d\n", tmp_index_ovr_map);
map = bpf_object__find_map_by_name(obj, "linum_map");
CHECK(!map, "find linum_map", "!map\n");
RET_ERR(!map, "find linum_map", "!map\n");
linum_map = bpf_map__fd(map);
CHECK(linum_map == -1, "get linum_map fd",
"linum_map:%d\n", linum_map);
RET_ERR(linum_map == -1, "get linum_map fd",
"linum_map:%d\n", linum_map);
map = bpf_object__find_map_by_name(obj, "data_check_map");
CHECK(!map, "find data_check_map", "!map\n");
RET_ERR(!map, "find data_check_map", "!map\n");
data_check_map = bpf_map__fd(map);
CHECK(data_check_map == -1, "get data_check_map fd",
"data_check_map:%d\n", data_check_map);
RET_ERR(data_check_map == -1, "get data_check_map fd",
"data_check_map:%d\n", data_check_map);
return 0;
}
static void sa46_init_loopback(union sa46 *sa, sa_family_t family)
@ -163,65 +169,73 @@ static int read_int_sysctl(const char *sysctl)
int fd, ret;
fd = open(sysctl, 0);
CHECK(fd == -1, "open(sysctl)", "sysctl:%s fd:%d errno:%d\n",
sysctl, fd, errno);
RET_ERR(fd == -1, "open(sysctl)",
"sysctl:%s fd:%d errno:%d\n", sysctl, fd, errno);
ret = read(fd, buf, sizeof(buf));
CHECK(ret <= 0, "read(sysctl)", "sysctl:%s ret:%d errno:%d\n",
sysctl, ret, errno);
close(fd);
RET_ERR(ret <= 0, "read(sysctl)",
"sysctl:%s ret:%d errno:%d\n", sysctl, ret, errno);
close(fd);
return atoi(buf);
}
static void write_int_sysctl(const char *sysctl, int v)
static int write_int_sysctl(const char *sysctl, int v)
{
int fd, ret, size;
char buf[16];
fd = open(sysctl, O_RDWR);
CHECK(fd == -1, "open(sysctl)", "sysctl:%s fd:%d errno:%d\n",
sysctl, fd, errno);
RET_ERR(fd == -1, "open(sysctl)",
"sysctl:%s fd:%d errno:%d\n", sysctl, fd, errno);
size = snprintf(buf, sizeof(buf), "%d", v);
ret = write(fd, buf, size);
CHECK(ret != size, "write(sysctl)",
"sysctl:%s ret:%d size:%d errno:%d\n", sysctl, ret, size, errno);
RET_ERR(ret != size, "write(sysctl)",
"sysctl:%s ret:%d size:%d errno:%d\n",
sysctl, ret, size, errno);
close(fd);
return 0;
}
static void restore_sysctls(void)
{
write_int_sysctl(TCP_FO_SYSCTL, saved_tcp_fo);
write_int_sysctl(TCP_SYNCOOKIE_SYSCTL, saved_tcp_syncookie);
if (saved_tcp_fo != -1)
write_int_sysctl(TCP_FO_SYSCTL, saved_tcp_fo);
if (saved_tcp_syncookie != -1)
write_int_sysctl(TCP_SYNCOOKIE_SYSCTL, saved_tcp_syncookie);
}
static void enable_fastopen(void)
static int enable_fastopen(void)
{
int fo;
fo = read_int_sysctl(TCP_FO_SYSCTL);
write_int_sysctl(TCP_FO_SYSCTL, fo | 7);
if (fo < 0)
return -1;
return write_int_sysctl(TCP_FO_SYSCTL, fo | 7);
}
static void enable_syncookie(void)
static int enable_syncookie(void)
{
write_int_sysctl(TCP_SYNCOOKIE_SYSCTL, 2);
return write_int_sysctl(TCP_SYNCOOKIE_SYSCTL, 2);
}
static void disable_syncookie(void)
static int disable_syncookie(void)
{
write_int_sysctl(TCP_SYNCOOKIE_SYSCTL, 0);
return write_int_sysctl(TCP_SYNCOOKIE_SYSCTL, 0);
}
static __u32 get_linum(void)
static long get_linum(void)
{
__u32 linum;
int err;
err = bpf_map_lookup_elem(linum_map, &index_zero, &linum);
CHECK(err == -1, "lookup_elem(linum_map)", "err:%d errno:%d\n",
err, errno);
RET_ERR(err == -1, "lookup_elem(linum_map)", "err:%d errno:%d\n",
err, errno);
return linum;
}
@ -237,12 +251,12 @@ static void check_data(int type, sa_family_t family, const struct cmd *cmd,
addrlen = sizeof(cli_sa);
err = getsockname(cli_fd, (struct sockaddr *)&cli_sa,
&addrlen);
CHECK(err == -1, "getsockname(cli_fd)", "err:%d errno:%d\n",
err, errno);
RET_IF(err == -1, "getsockname(cli_fd)", "err:%d errno:%d\n",
err, errno);
err = bpf_map_lookup_elem(data_check_map, &index_zero, &result);
CHECK(err == -1, "lookup_elem(data_check_map)", "err:%d errno:%d\n",
err, errno);
RET_IF(err == -1, "lookup_elem(data_check_map)", "err:%d errno:%d\n",
err, errno);
if (type == SOCK_STREAM) {
expected.len = MIN_TCPHDR_LEN;
@ -284,22 +298,22 @@ static void check_data(int type, sa_family_t family, const struct cmd *cmd,
printf("expected: (0x%x, %u, %u)\n",
expected.eth_protocol, expected.ip_protocol,
expected.bind_inany);
CHECK(1, "data_check result != expected",
"bpf_prog_linum:%u\n", get_linum());
RET_IF(1, "data_check result != expected",
"bpf_prog_linum:%ld\n", get_linum());
}
CHECK(!result.hash, "data_check result.hash empty",
"result.hash:%u", result.hash);
RET_IF(!result.hash, "data_check result.hash empty",
"result.hash:%u", result.hash);
expected.len += cmd ? sizeof(*cmd) : 0;
if (type == SOCK_STREAM)
CHECK(expected.len > result.len, "expected.len > result.len",
"expected.len:%u result.len:%u bpf_prog_linum:%u\n",
expected.len, result.len, get_linum());
RET_IF(expected.len > result.len, "expected.len > result.len",
"expected.len:%u result.len:%u bpf_prog_linum:%ld\n",
expected.len, result.len, get_linum());
else
CHECK(expected.len != result.len, "expected.len != result.len",
"expected.len:%u result.len:%u bpf_prog_linum:%u\n",
expected.len, result.len, get_linum());
RET_IF(expected.len != result.len, "expected.len != result.len",
"expected.len:%u result.len:%u bpf_prog_linum:%ld\n",
expected.len, result.len, get_linum());
}
static void check_results(void)
@ -310,8 +324,8 @@ static void check_results(void)
for (i = 0; i < NR_RESULTS; i++) {
err = bpf_map_lookup_elem(result_map, &i, &results[i]);
CHECK(err == -1, "lookup_elem(result_map)",
"i:%u err:%d errno:%d\n", i, err, errno);
RET_IF(err == -1, "lookup_elem(result_map)",
"i:%u err:%d errno:%d\n", i, err, errno);
}
for (i = 0; i < NR_RESULTS; i++) {
@ -337,10 +351,10 @@ static void check_results(void)
printf(", %u", expected_results[i]);
printf("]\n");
CHECK(expected_results[broken] != results[broken],
"unexpected result",
"expected_results[%u] != results[%u] bpf_prog_linum:%u\n",
broken, broken, get_linum());
RET_IF(expected_results[broken] != results[broken],
"unexpected result",
"expected_results[%u] != results[%u] bpf_prog_linum:%ld\n",
broken, broken, get_linum());
}
static int send_data(int type, sa_family_t family, void *data, size_t len,
@ -350,17 +364,17 @@ static int send_data(int type, sa_family_t family, void *data, size_t len,
int fd, err;
fd = socket(family, type, 0);
CHECK(fd == -1, "socket()", "fd:%d errno:%d\n", fd, errno);
RET_ERR(fd == -1, "socket()", "fd:%d errno:%d\n", fd, errno);
sa46_init_loopback(&cli_sa, family);
err = bind(fd, (struct sockaddr *)&cli_sa, sizeof(cli_sa));
CHECK(fd == -1, "bind(cli_sa)", "err:%d errno:%d\n", err, errno);
RET_ERR(fd == -1, "bind(cli_sa)", "err:%d errno:%d\n", err, errno);
err = sendto(fd, data, len, MSG_FASTOPEN, (struct sockaddr *)&srv_sa,
sizeof(srv_sa));
CHECK(err != len && expected >= PASS,
"sendto()", "family:%u err:%d errno:%d expected:%d\n",
family, err, errno, expected);
RET_ERR(err != len && expected >= PASS,
"sendto()", "family:%u err:%d errno:%d expected:%d\n",
family, err, errno, expected);
return fd;
}
@ -375,47 +389,49 @@ static void do_test(int type, sa_family_t family, struct cmd *cmd,
cli_fd = send_data(type, family, cmd, cmd ? sizeof(*cmd) : 0,
expected);
if (cli_fd < 0)
return;
nev = epoll_wait(epfd, &ev, 1, expected >= PASS ? 5 : 0);
CHECK((nev <= 0 && expected >= PASS) ||
(nev > 0 && expected < PASS),
"nev <> expected",
"nev:%d expected:%d type:%d family:%d data:(%d, %d)\n",
nev, expected, type, family,
cmd ? cmd->reuseport_index : -1,
cmd ? cmd->pass_on_failure : -1);
RET_IF((nev <= 0 && expected >= PASS) ||
(nev > 0 && expected < PASS),
"nev <> expected",
"nev:%d expected:%d type:%d family:%d data:(%d, %d)\n",
nev, expected, type, family,
cmd ? cmd->reuseport_index : -1,
cmd ? cmd->pass_on_failure : -1);
check_results();
check_data(type, family, cmd, cli_fd);
if (expected < PASS)
return;
CHECK(expected != PASS_ERR_SK_SELECT_REUSEPORT &&
cmd->reuseport_index != ev.data.u32,
"check cmd->reuseport_index",
"cmd:(%u, %u) ev.data.u32:%u\n",
cmd->pass_on_failure, cmd->reuseport_index, ev.data.u32);
RET_IF(expected != PASS_ERR_SK_SELECT_REUSEPORT &&
cmd->reuseport_index != ev.data.u32,
"check cmd->reuseport_index",
"cmd:(%u, %u) ev.data.u32:%u\n",
cmd->pass_on_failure, cmd->reuseport_index, ev.data.u32);
srv_fd = sk_fds[ev.data.u32];
if (type == SOCK_STREAM) {
int new_fd = accept(srv_fd, NULL, 0);
CHECK(new_fd == -1, "accept(srv_fd)",
"ev.data.u32:%u new_fd:%d errno:%d\n",
ev.data.u32, new_fd, errno);
RET_IF(new_fd == -1, "accept(srv_fd)",
"ev.data.u32:%u new_fd:%d errno:%d\n",
ev.data.u32, new_fd, errno);
nread = recv(new_fd, &rcv_cmd, sizeof(rcv_cmd), MSG_DONTWAIT);
CHECK(nread != sizeof(rcv_cmd),
"recv(new_fd)",
"ev.data.u32:%u nread:%zd sizeof(rcv_cmd):%zu errno:%d\n",
ev.data.u32, nread, sizeof(rcv_cmd), errno);
RET_IF(nread != sizeof(rcv_cmd),
"recv(new_fd)",
"ev.data.u32:%u nread:%zd sizeof(rcv_cmd):%zu errno:%d\n",
ev.data.u32, nread, sizeof(rcv_cmd), errno);
close(new_fd);
} else {
nread = recv(srv_fd, &rcv_cmd, sizeof(rcv_cmd), MSG_DONTWAIT);
CHECK(nread != sizeof(rcv_cmd),
"recv(sk_fds)",
"ev.data.u32:%u nread:%zd sizeof(rcv_cmd):%zu errno:%d\n",
ev.data.u32, nread, sizeof(rcv_cmd), errno);
RET_IF(nread != sizeof(rcv_cmd),
"recv(sk_fds)",
"ev.data.u32:%u nread:%zd sizeof(rcv_cmd):%zu errno:%d\n",
ev.data.u32, nread, sizeof(rcv_cmd), errno);
}
close(cli_fd);
@ -428,18 +444,14 @@ static void test_err_inner_map(int type, sa_family_t family)
.pass_on_failure = 0,
};
printf("%s: ", __func__);
expected_results[DROP_ERR_INNER_MAP]++;
do_test(type, family, &cmd, DROP_ERR_INNER_MAP);
printf("OK\n");
}
static void test_err_skb_data(int type, sa_family_t family)
{
printf("%s: ", __func__);
expected_results[DROP_ERR_SKB_DATA]++;
do_test(type, family, NULL, DROP_ERR_SKB_DATA);
printf("OK\n");
}
static void test_err_sk_select_port(int type, sa_family_t family)
@ -449,10 +461,8 @@ static void test_err_sk_select_port(int type, sa_family_t family)
.pass_on_failure = 0,
};
printf("%s: ", __func__);
expected_results[DROP_ERR_SK_SELECT_REUSEPORT]++;
do_test(type, family, &cmd, DROP_ERR_SK_SELECT_REUSEPORT);
printf("OK\n");
}
static void test_pass(int type, sa_family_t family)
@ -460,14 +470,12 @@ static void test_pass(int type, sa_family_t family)
struct cmd cmd;
int i;
printf("%s: ", __func__);
cmd.pass_on_failure = 0;
for (i = 0; i < REUSEPORT_ARRAY_SIZE; i++) {
expected_results[PASS]++;
cmd.reuseport_index = i;
do_test(type, family, &cmd, PASS);
}
printf("OK\n");
}
static void test_syncookie(int type, sa_family_t family)
@ -481,7 +489,6 @@ static void test_syncookie(int type, sa_family_t family)
if (type != SOCK_STREAM)
return;
printf("%s: ", __func__);
/*
* +1 for TCP-SYN and
* +1 for the TCP-ACK (ack the syncookie)
@ -497,17 +504,16 @@ static void test_syncookie(int type, sa_family_t family)
*/
err = bpf_map_update_elem(tmp_index_ovr_map, &index_zero,
&tmp_index, BPF_ANY);
CHECK(err == -1, "update_elem(tmp_index_ovr_map, 0, 1)",
"err:%d errno:%d\n", err, errno);
RET_IF(err == -1, "update_elem(tmp_index_ovr_map, 0, 1)",
"err:%d errno:%d\n", err, errno);
do_test(type, family, &cmd, PASS);
err = bpf_map_lookup_elem(tmp_index_ovr_map, &index_zero,
&tmp_index);
CHECK(err == -1 || tmp_index != -1,
"lookup_elem(tmp_index_ovr_map)",
"err:%d errno:%d tmp_index:%d\n",
err, errno, tmp_index);
RET_IF(err == -1 || tmp_index != -1,
"lookup_elem(tmp_index_ovr_map)",
"err:%d errno:%d tmp_index:%d\n",
err, errno, tmp_index);
disable_syncookie();
printf("OK\n");
}
static void test_pass_on_err(int type, sa_family_t family)
@ -517,10 +523,8 @@ static void test_pass_on_err(int type, sa_family_t family)
.pass_on_failure = 1,
};
printf("%s: ", __func__);
expected_results[PASS_ERR_SK_SELECT_REUSEPORT] += 1;
do_test(type, family, &cmd, PASS_ERR_SK_SELECT_REUSEPORT);
printf("OK\n");
}
static void test_detach_bpf(int type, sa_family_t family)
@ -532,46 +536,47 @@ static void test_detach_bpf(int type, sa_family_t family)
struct cmd cmd = {};
int optvalue = 0;
printf("%s: ", __func__);
err = setsockopt(sk_fds[0], SOL_SOCKET, SO_DETACH_REUSEPORT_BPF,
&optvalue, sizeof(optvalue));
CHECK(err == -1, "setsockopt(SO_DETACH_REUSEPORT_BPF)",
"err:%d errno:%d\n", err, errno);
RET_IF(err == -1, "setsockopt(SO_DETACH_REUSEPORT_BPF)",
"err:%d errno:%d\n", err, errno);
err = setsockopt(sk_fds[1], SOL_SOCKET, SO_DETACH_REUSEPORT_BPF,
&optvalue, sizeof(optvalue));
CHECK(err == 0 || errno != ENOENT, "setsockopt(SO_DETACH_REUSEPORT_BPF)",
"err:%d errno:%d\n", err, errno);
RET_IF(err == 0 || errno != ENOENT,
"setsockopt(SO_DETACH_REUSEPORT_BPF)",
"err:%d errno:%d\n", err, errno);
for (i = 0; i < NR_RESULTS; i++) {
err = bpf_map_lookup_elem(result_map, &i, &tmp);
CHECK(err == -1, "lookup_elem(result_map)",
"i:%u err:%d errno:%d\n", i, err, errno);
RET_IF(err == -1, "lookup_elem(result_map)",
"i:%u err:%d errno:%d\n", i, err, errno);
nr_run_before += tmp;
}
cli_fd = send_data(type, family, &cmd, sizeof(cmd), PASS);
if (cli_fd < 0)
return;
nev = epoll_wait(epfd, &ev, 1, 5);
CHECK(nev <= 0, "nev <= 0",
"nev:%d expected:1 type:%d family:%d data:(0, 0)\n",
nev, type, family);
RET_IF(nev <= 0, "nev <= 0",
"nev:%d expected:1 type:%d family:%d data:(0, 0)\n",
nev, type, family);
for (i = 0; i < NR_RESULTS; i++) {
err = bpf_map_lookup_elem(result_map, &i, &tmp);
CHECK(err == -1, "lookup_elem(result_map)",
"i:%u err:%d errno:%d\n", i, err, errno);
RET_IF(err == -1, "lookup_elem(result_map)",
"i:%u err:%d errno:%d\n", i, err, errno);
nr_run_after += tmp;
}
CHECK(nr_run_before != nr_run_after,
"nr_run_before != nr_run_after",
"nr_run_before:%u nr_run_after:%u\n",
nr_run_before, nr_run_after);
RET_IF(nr_run_before != nr_run_after,
"nr_run_before != nr_run_after",
"nr_run_before:%u nr_run_after:%u\n",
nr_run_before, nr_run_after);
printf("OK\n");
close(cli_fd);
#else
printf("%s: SKIP\n", __func__);
test__skip();
#endif
}
@ -594,73 +599,83 @@ static void prepare_sk_fds(int type, sa_family_t family, bool inany)
*/
for (i = first; i >= 0; i--) {
sk_fds[i] = socket(family, type, 0);
CHECK(sk_fds[i] == -1, "socket()", "sk_fds[%d]:%d errno:%d\n",
i, sk_fds[i], errno);
RET_IF(sk_fds[i] == -1, "socket()", "sk_fds[%d]:%d errno:%d\n",
i, sk_fds[i], errno);
err = setsockopt(sk_fds[i], SOL_SOCKET, SO_REUSEPORT,
&optval, sizeof(optval));
CHECK(err == -1, "setsockopt(SO_REUSEPORT)",
"sk_fds[%d] err:%d errno:%d\n",
i, err, errno);
RET_IF(err == -1, "setsockopt(SO_REUSEPORT)",
"sk_fds[%d] err:%d errno:%d\n",
i, err, errno);
if (i == first) {
err = setsockopt(sk_fds[i], SOL_SOCKET,
SO_ATTACH_REUSEPORT_EBPF,
&select_by_skb_data_prog,
sizeof(select_by_skb_data_prog));
CHECK(err == -1, "setsockopt(SO_ATTACH_REUEPORT_EBPF)",
"err:%d errno:%d\n", err, errno);
RET_IF(err == -1, "setsockopt(SO_ATTACH_REUEPORT_EBPF)",
"err:%d errno:%d\n", err, errno);
}
err = bind(sk_fds[i], (struct sockaddr *)&srv_sa, addrlen);
CHECK(err == -1, "bind()", "sk_fds[%d] err:%d errno:%d\n",
i, err, errno);
RET_IF(err == -1, "bind()", "sk_fds[%d] err:%d errno:%d\n",
i, err, errno);
if (type == SOCK_STREAM) {
err = listen(sk_fds[i], 10);
CHECK(err == -1, "listen()",
"sk_fds[%d] err:%d errno:%d\n",
i, err, errno);
RET_IF(err == -1, "listen()",
"sk_fds[%d] err:%d errno:%d\n",
i, err, errno);
}
err = bpf_map_update_elem(reuseport_array, &i, &sk_fds[i],
BPF_NOEXIST);
CHECK(err == -1, "update_elem(reuseport_array)",
"sk_fds[%d] err:%d errno:%d\n", i, err, errno);
RET_IF(err == -1, "update_elem(reuseport_array)",
"sk_fds[%d] err:%d errno:%d\n", i, err, errno);
if (i == first) {
socklen_t addrlen = sizeof(srv_sa);
err = getsockname(sk_fds[i], (struct sockaddr *)&srv_sa,
&addrlen);
CHECK(err == -1, "getsockname()",
"sk_fds[%d] err:%d errno:%d\n", i, err, errno);
RET_IF(err == -1, "getsockname()",
"sk_fds[%d] err:%d errno:%d\n", i, err, errno);
}
}
epfd = epoll_create(1);
CHECK(epfd == -1, "epoll_create(1)",
"epfd:%d errno:%d\n", epfd, errno);
RET_IF(epfd == -1, "epoll_create(1)",
"epfd:%d errno:%d\n", epfd, errno);
ev.events = EPOLLIN;
for (i = 0; i < REUSEPORT_ARRAY_SIZE; i++) {
ev.data.u32 = i;
err = epoll_ctl(epfd, EPOLL_CTL_ADD, sk_fds[i], &ev);
CHECK(err, "epoll_ctl(EPOLL_CTL_ADD)", "sk_fds[%d]\n", i);
RET_IF(err, "epoll_ctl(EPOLL_CTL_ADD)", "sk_fds[%d]\n", i);
}
}
static void setup_per_test(int type, unsigned short family, bool inany)
static void setup_per_test(int type, sa_family_t family, bool inany,
bool no_inner_map)
{
int ovr = -1, err;
prepare_sk_fds(type, family, inany);
err = bpf_map_update_elem(tmp_index_ovr_map, &index_zero, &ovr,
BPF_ANY);
CHECK(err == -1, "update_elem(tmp_index_ovr_map, 0, -1)",
"err:%d errno:%d\n", err, errno);
RET_IF(err == -1, "update_elem(tmp_index_ovr_map, 0, -1)",
"err:%d errno:%d\n", err, errno);
/* Install reuseport_array to outer_map? */
if (no_inner_map)
return;
err = bpf_map_update_elem(outer_map, &index_zero, &reuseport_array,
BPF_ANY);
RET_IF(err == -1, "update_elem(outer_map, 0, reuseport_array)",
"err:%d errno:%d\n", err, errno);
}
static void cleanup_per_test(void)
static void cleanup_per_test(bool no_inner_map)
{
int i, err;
@ -668,75 +683,124 @@ static void cleanup_per_test(void)
close(sk_fds[i]);
close(epfd);
/* Delete reuseport_array from outer_map? */
if (no_inner_map)
return;
err = bpf_map_delete_elem(outer_map, &index_zero);
CHECK(err == -1, "delete_elem(outer_map)",
"err:%d errno:%d\n", err, errno);
RET_IF(err == -1, "delete_elem(outer_map)",
"err:%d errno:%d\n", err, errno);
}
static void cleanup(void)
{
close(outer_map);
close(reuseport_array);
bpf_object__close(obj);
if (outer_map != -1)
close(outer_map);
if (reuseport_array != -1)
close(reuseport_array);
if (obj)
bpf_object__close(obj);
}
static void test_all(void)
static const char *family_str(sa_family_t family)
{
/* Extra SOCK_STREAM to test bind_inany==true */
const int types[] = { SOCK_STREAM, SOCK_DGRAM, SOCK_STREAM };
const char * const type_strings[] = { "TCP", "UDP", "TCP" };
const char * const family_strings[] = { "IPv6", "IPv4" };
const unsigned short families[] = { AF_INET6, AF_INET };
const bool bind_inany[] = { false, false, true };
int t, f, err;
for (f = 0; f < ARRAY_SIZE(families); f++) {
unsigned short family = families[f];
for (t = 0; t < ARRAY_SIZE(types); t++) {
bool inany = bind_inany[t];
int type = types[t];
printf("######## %s/%s %s ########\n",
family_strings[f], type_strings[t],
inany ? " INANY " : "LOOPBACK");
setup_per_test(type, family, inany);
test_err_inner_map(type, family);
/* Install reuseport_array to the outer_map */
err = bpf_map_update_elem(outer_map, &index_zero,
&reuseport_array, BPF_ANY);
CHECK(err == -1, "update_elem(outer_map)",
"err:%d errno:%d\n", err, errno);
test_err_skb_data(type, family);
test_err_sk_select_port(type, family);
test_pass(type, family);
test_syncookie(type, family);
test_pass_on_err(type, family);
/* Must be the last test */
test_detach_bpf(type, family);
cleanup_per_test();
printf("\n");
}
switch (family) {
case AF_INET:
return "IPv4";
case AF_INET6:
return "IPv6";
default:
return "unknown";
}
}
int main(int argc, const char **argv)
static const char *sotype_str(int sotype)
{
create_maps();
prepare_bpf_obj();
switch (sotype) {
case SOCK_STREAM:
return "TCP";
case SOCK_DGRAM:
return "UDP";
default:
return "unknown";
}
}
#define TEST_INIT(fn, ...) { fn, #fn, __VA_ARGS__ }
static void test_config(int sotype, sa_family_t family, bool inany)
{
const struct test {
void (*fn)(int sotype, sa_family_t family);
const char *name;
bool no_inner_map;
} tests[] = {
TEST_INIT(test_err_inner_map, true /* no_inner_map */),
TEST_INIT(test_err_skb_data),
TEST_INIT(test_err_sk_select_port),
TEST_INIT(test_pass),
TEST_INIT(test_syncookie),
TEST_INIT(test_pass_on_err),
TEST_INIT(test_detach_bpf),
};
char s[MAX_TEST_NAME];
const struct test *t;
for (t = tests; t < tests + ARRAY_SIZE(tests); t++) {
snprintf(s, sizeof(s), "%s/%s %s %s",
family_str(family), sotype_str(sotype),
inany ? "INANY" : "LOOPBACK", t->name);
if (!test__start_subtest(s))
continue;
setup_per_test(sotype, family, inany, t->no_inner_map);
t->fn(sotype, family);
cleanup_per_test(t->no_inner_map);
}
}
#define BIND_INANY true
static void test_all(void)
{
const struct config {
int sotype;
sa_family_t family;
bool inany;
} configs[] = {
{ SOCK_STREAM, AF_INET },
{ SOCK_STREAM, AF_INET, BIND_INANY },
{ SOCK_STREAM, AF_INET6 },
{ SOCK_STREAM, AF_INET6, BIND_INANY },
{ SOCK_DGRAM, AF_INET },
{ SOCK_DGRAM, AF_INET6 },
};
const struct config *c;
for (c = configs; c < configs + ARRAY_SIZE(configs); c++)
test_config(c->sotype, c->family, c->inany);
}
void test_select_reuseport(void)
{
if (create_maps())
goto out;
if (prepare_bpf_obj())
goto out;
saved_tcp_fo = read_int_sysctl(TCP_FO_SYSCTL);
saved_tcp_syncookie = read_int_sysctl(TCP_SYNCOOKIE_SYSCTL);
enable_fastopen();
disable_syncookie();
atexit(restore_sysctls);
if (saved_tcp_syncookie < 0 || saved_tcp_syncookie < 0)
goto out;
if (enable_fastopen())
goto out;
if (disable_syncookie())
goto out;
test_all();
out:
cleanup();
return 0;
restore_sysctls();
}

View File

@ -11,6 +11,9 @@ void test_skb_ctx(void)
.cb[4] = 5,
.priority = 6,
.tstamp = 7,
.wire_len = 100,
.gso_segs = 8,
.mark = 9,
};
struct bpf_prog_test_run_attr tattr = {
.data_in = &pkt_v4,
@ -91,4 +94,8 @@ void test_skb_ctx(void)
"ctx_out_tstamp",
"skb->tstamp == %lld, expected %d\n",
skb.tstamp, 8);
CHECK_ATTR(skb.mark != 10,
"ctx_out_mark",
"skb->mark == %u, expected %d\n",
skb.mark, 10);
}

View File

@ -0,0 +1,63 @@
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2019 Facebook */
#include <test_progs.h>
struct s {
int a;
long long b;
} __attribute__((packed));
#include "test_skeleton.skel.h"
void test_skeleton(void)
{
int duration = 0, err;
struct test_skeleton* skel;
struct test_skeleton__bss *bss;
struct test_skeleton__kconfig *kcfg;
skel = test_skeleton__open();
if (CHECK(!skel, "skel_open", "failed to open skeleton\n"))
return;
if (CHECK(skel->kconfig, "skel_kconfig", "kconfig is mmaped()!\n"))
goto cleanup;
err = test_skeleton__load(skel);
if (CHECK(err, "skel_load", "failed to load skeleton: %d\n", err))
goto cleanup;
bss = skel->bss;
bss->in1 = 1;
bss->in2 = 2;
bss->in3 = 3;
bss->in4 = 4;
bss->in5.a = 5;
bss->in5.b = 6;
kcfg = skel->kconfig;
err = test_skeleton__attach(skel);
if (CHECK(err, "skel_attach", "skeleton attach failed: %d\n", err))
goto cleanup;
/* trigger tracepoint */
usleep(1);
CHECK(bss->out1 != 1, "res1", "got %d != exp %d\n", bss->out1, 1);
CHECK(bss->out2 != 2, "res2", "got %lld != exp %d\n", bss->out2, 2);
CHECK(bss->out3 != 3, "res3", "got %d != exp %d\n", (int)bss->out3, 3);
CHECK(bss->out4 != 4, "res4", "got %lld != exp %d\n", bss->out4, 4);
CHECK(bss->handler_out5.a != 5, "res5", "got %d != exp %d\n",
bss->handler_out5.a, 5);
CHECK(bss->handler_out5.b != 6, "res6", "got %lld != exp %d\n",
bss->handler_out5.b, 6);
CHECK(bss->bpf_syscall != kcfg->CONFIG_BPF_SYSCALL, "ext1",
"got %d != exp %d\n", bss->bpf_syscall, kcfg->CONFIG_BPF_SYSCALL);
CHECK(bss->kern_ver != kcfg->LINUX_KERNEL_VERSION, "ext2",
"got %d != exp %d\n", bss->kern_ver, kcfg->LINUX_KERNEL_VERSION);
cleanup:
test_skeleton__destroy(skel);
}

View File

@ -1,16 +1,14 @@
// SPDX-License-Identifier: GPL-2.0
#include <test_progs.h>
#include "test_stacktrace_build_id.skel.h"
void test_stacktrace_build_id(void)
{
int control_map_fd, stackid_hmap_fd, stackmap_fd, stack_amap_fd;
const char *prog_name = "tracepoint/random/urandom_read";
const char *file = "./test_stacktrace_build_id.o";
int err, prog_fd, stack_trace_len;
struct test_stacktrace_build_id *skel;
int err, stack_trace_len;
__u32 key, previous_key, val, duration = 0;
struct bpf_program *prog;
struct bpf_object *obj;
struct bpf_link *link = NULL;
char buf[256];
int i, j;
struct bpf_stack_build_id id_offs[PERF_MAX_STACK_DEPTH];
@ -18,43 +16,24 @@ void test_stacktrace_build_id(void)
int retry = 1;
retry:
err = bpf_prog_load(file, BPF_PROG_TYPE_TRACEPOINT, &obj, &prog_fd);
if (CHECK(err, "prog_load", "err %d errno %d\n", err, errno))
skel = test_stacktrace_build_id__open_and_load();
if (CHECK(!skel, "skel_open_and_load", "skeleton open/load failed\n"))
return;
prog = bpf_object__find_program_by_title(obj, prog_name);
if (CHECK(!prog, "find_prog", "prog '%s' not found\n", prog_name))
goto close_prog;
link = bpf_program__attach_tracepoint(prog, "random", "urandom_read");
if (CHECK(IS_ERR(link), "attach_tp", "err %ld\n", PTR_ERR(link)))
goto close_prog;
err = test_stacktrace_build_id__attach(skel);
if (CHECK(err, "attach_tp", "err %d\n", err))
goto cleanup;
/* find map fds */
control_map_fd = bpf_find_map(__func__, obj, "control_map");
if (CHECK(control_map_fd < 0, "bpf_find_map control_map",
"err %d errno %d\n", err, errno))
goto disable_pmu;
stackid_hmap_fd = bpf_find_map(__func__, obj, "stackid_hmap");
if (CHECK(stackid_hmap_fd < 0, "bpf_find_map stackid_hmap",
"err %d errno %d\n", err, errno))
goto disable_pmu;
stackmap_fd = bpf_find_map(__func__, obj, "stackmap");
if (CHECK(stackmap_fd < 0, "bpf_find_map stackmap", "err %d errno %d\n",
err, errno))
goto disable_pmu;
stack_amap_fd = bpf_find_map(__func__, obj, "stack_amap");
if (CHECK(stack_amap_fd < 0, "bpf_find_map stack_amap",
"err %d errno %d\n", err, errno))
goto disable_pmu;
control_map_fd = bpf_map__fd(skel->maps.control_map);
stackid_hmap_fd = bpf_map__fd(skel->maps.stackid_hmap);
stackmap_fd = bpf_map__fd(skel->maps.stackmap);
stack_amap_fd = bpf_map__fd(skel->maps.stack_amap);
if (CHECK_FAIL(system("dd if=/dev/urandom of=/dev/zero count=4 2> /dev/null")))
goto disable_pmu;
goto cleanup;
if (CHECK_FAIL(system("./urandom_read")))
goto disable_pmu;
goto cleanup;
/* disable stack trace collection */
key = 0;
val = 1;
@ -66,23 +45,23 @@ void test_stacktrace_build_id(void)
err = compare_map_keys(stackid_hmap_fd, stackmap_fd);
if (CHECK(err, "compare_map_keys stackid_hmap vs. stackmap",
"err %d errno %d\n", err, errno))
goto disable_pmu;
goto cleanup;
err = compare_map_keys(stackmap_fd, stackid_hmap_fd);
if (CHECK(err, "compare_map_keys stackmap vs. stackid_hmap",
"err %d errno %d\n", err, errno))
goto disable_pmu;
goto cleanup;
err = extract_build_id(buf, 256);
if (CHECK(err, "get build_id with readelf",
"err %d errno %d\n", err, errno))
goto disable_pmu;
goto cleanup;
err = bpf_map_get_next_key(stackmap_fd, NULL, &key);
if (CHECK(err, "get_next_key from stackmap",
"err %d, errno %d\n", err, errno))
goto disable_pmu;
goto cleanup;
do {
char build_id[64];
@ -90,7 +69,7 @@ void test_stacktrace_build_id(void)
err = bpf_map_lookup_elem(stackmap_fd, &key, id_offs);
if (CHECK(err, "lookup_elem from stackmap",
"err %d, errno %d\n", err, errno))
goto disable_pmu;
goto cleanup;
for (i = 0; i < PERF_MAX_STACK_DEPTH; ++i)
if (id_offs[i].status == BPF_STACK_BUILD_ID_VALID &&
id_offs[i].offset != 0) {
@ -108,8 +87,7 @@ void test_stacktrace_build_id(void)
* try it one more time.
*/
if (build_id_matches < 1 && retry--) {
bpf_link__destroy(link);
bpf_object__close(obj);
test_stacktrace_build_id__destroy(skel);
printf("%s:WARN:Didn't find expected build ID from the map, retrying\n",
__func__);
goto retry;
@ -117,17 +95,14 @@ void test_stacktrace_build_id(void)
if (CHECK(build_id_matches < 1, "build id match",
"Didn't find expected build ID from the map\n"))
goto disable_pmu;
goto cleanup;
stack_trace_len = PERF_MAX_STACK_DEPTH
* sizeof(struct bpf_stack_build_id);
stack_trace_len = PERF_MAX_STACK_DEPTH *
sizeof(struct bpf_stack_build_id);
err = compare_stack_ips(stackmap_fd, stack_amap_fd, stack_trace_len);
CHECK(err, "compare_stack_ips stackmap vs. stack_amap",
"err %d errno %d\n", err, errno);
disable_pmu:
bpf_link__destroy(link);
close_prog:
bpf_object__close(obj);
cleanup:
test_stacktrace_build_id__destroy(skel);
}

View File

@ -1,5 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
#include <test_progs.h>
#include "test_stacktrace_build_id.skel.h"
static __u64 read_perf_max_sample_freq(void)
{
@ -16,19 +17,15 @@ static __u64 read_perf_max_sample_freq(void)
void test_stacktrace_build_id_nmi(void)
{
int control_map_fd, stackid_hmap_fd, stackmap_fd, stack_amap_fd;
const char *prog_name = "tracepoint/random/urandom_read";
const char *file = "./test_stacktrace_build_id.o";
int err, pmu_fd, prog_fd;
int control_map_fd, stackid_hmap_fd, stackmap_fd;
struct test_stacktrace_build_id *skel;
int err, pmu_fd;
struct perf_event_attr attr = {
.freq = 1,
.type = PERF_TYPE_HARDWARE,
.config = PERF_COUNT_HW_CPU_CYCLES,
};
__u32 key, previous_key, val, duration = 0;
struct bpf_program *prog;
struct bpf_object *obj;
struct bpf_link *link;
char buf[256];
int i, j;
struct bpf_stack_build_id id_offs[PERF_MAX_STACK_DEPTH];
@ -38,13 +35,16 @@ void test_stacktrace_build_id_nmi(void)
attr.sample_freq = read_perf_max_sample_freq();
retry:
err = bpf_prog_load(file, BPF_PROG_TYPE_PERF_EVENT, &obj, &prog_fd);
if (CHECK(err, "prog_load", "err %d errno %d\n", err, errno))
skel = test_stacktrace_build_id__open();
if (CHECK(!skel, "skel_open", "skeleton open failed\n"))
return;
prog = bpf_object__find_program_by_title(obj, prog_name);
if (CHECK(!prog, "find_prog", "prog '%s' not found\n", prog_name))
goto close_prog;
/* override program type */
bpf_program__set_perf_event(skel->progs.oncpu);
err = test_stacktrace_build_id__load(skel);
if (CHECK(err, "skel_load", "skeleton load failed: %d\n", err))
goto cleanup;
pmu_fd = syscall(__NR_perf_event_open, &attr, -1 /* pid */,
0 /* cpu 0 */, -1 /* group id */,
@ -52,40 +52,25 @@ void test_stacktrace_build_id_nmi(void)
if (CHECK(pmu_fd < 0, "perf_event_open",
"err %d errno %d. Does the test host support PERF_COUNT_HW_CPU_CYCLES?\n",
pmu_fd, errno))
goto close_prog;
goto cleanup;
link = bpf_program__attach_perf_event(prog, pmu_fd);
if (CHECK(IS_ERR(link), "attach_perf_event",
"err %ld\n", PTR_ERR(link))) {
skel->links.oncpu = bpf_program__attach_perf_event(skel->progs.oncpu,
pmu_fd);
if (CHECK(IS_ERR(skel->links.oncpu), "attach_perf_event",
"err %ld\n", PTR_ERR(skel->links.oncpu))) {
close(pmu_fd);
goto close_prog;
goto cleanup;
}
/* find map fds */
control_map_fd = bpf_find_map(__func__, obj, "control_map");
if (CHECK(control_map_fd < 0, "bpf_find_map control_map",
"err %d errno %d\n", err, errno))
goto disable_pmu;
stackid_hmap_fd = bpf_find_map(__func__, obj, "stackid_hmap");
if (CHECK(stackid_hmap_fd < 0, "bpf_find_map stackid_hmap",
"err %d errno %d\n", err, errno))
goto disable_pmu;
stackmap_fd = bpf_find_map(__func__, obj, "stackmap");
if (CHECK(stackmap_fd < 0, "bpf_find_map stackmap", "err %d errno %d\n",
err, errno))
goto disable_pmu;
stack_amap_fd = bpf_find_map(__func__, obj, "stack_amap");
if (CHECK(stack_amap_fd < 0, "bpf_find_map stack_amap",
"err %d errno %d\n", err, errno))
goto disable_pmu;
control_map_fd = bpf_map__fd(skel->maps.control_map);
stackid_hmap_fd = bpf_map__fd(skel->maps.stackid_hmap);
stackmap_fd = bpf_map__fd(skel->maps.stackmap);
if (CHECK_FAIL(system("dd if=/dev/urandom of=/dev/zero count=4 2> /dev/null")))
goto disable_pmu;
goto cleanup;
if (CHECK_FAIL(system("taskset 0x1 ./urandom_read 100000")))
goto disable_pmu;
goto cleanup;
/* disable stack trace collection */
key = 0;
val = 1;
@ -97,23 +82,23 @@ void test_stacktrace_build_id_nmi(void)
err = compare_map_keys(stackid_hmap_fd, stackmap_fd);
if (CHECK(err, "compare_map_keys stackid_hmap vs. stackmap",
"err %d errno %d\n", err, errno))
goto disable_pmu;
goto cleanup;
err = compare_map_keys(stackmap_fd, stackid_hmap_fd);
if (CHECK(err, "compare_map_keys stackmap vs. stackid_hmap",
"err %d errno %d\n", err, errno))
goto disable_pmu;
goto cleanup;
err = extract_build_id(buf, 256);
if (CHECK(err, "get build_id with readelf",
"err %d errno %d\n", err, errno))
goto disable_pmu;
goto cleanup;
err = bpf_map_get_next_key(stackmap_fd, NULL, &key);
if (CHECK(err, "get_next_key from stackmap",
"err %d, errno %d\n", err, errno))
goto disable_pmu;
goto cleanup;
do {
char build_id[64];
@ -121,7 +106,7 @@ void test_stacktrace_build_id_nmi(void)
err = bpf_map_lookup_elem(stackmap_fd, &key, id_offs);
if (CHECK(err, "lookup_elem from stackmap",
"err %d, errno %d\n", err, errno))
goto disable_pmu;
goto cleanup;
for (i = 0; i < PERF_MAX_STACK_DEPTH; ++i)
if (id_offs[i].status == BPF_STACK_BUILD_ID_VALID &&
id_offs[i].offset != 0) {
@ -139,8 +124,7 @@ void test_stacktrace_build_id_nmi(void)
* try it one more time.
*/
if (build_id_matches < 1 && retry--) {
bpf_link__destroy(link);
bpf_object__close(obj);
test_stacktrace_build_id__destroy(skel);
printf("%s:WARN:Didn't find expected build ID from the map, retrying\n",
__func__);
goto retry;
@ -148,7 +132,7 @@ void test_stacktrace_build_id_nmi(void)
if (CHECK(build_id_matches < 1, "build id match",
"Didn't find expected build ID from the map\n"))
goto disable_pmu;
goto cleanup;
/*
* We intentionally skip compare_stack_ips(). This is because we
@ -157,8 +141,6 @@ void test_stacktrace_build_id_nmi(void)
* BPF_STACK_BUILD_ID_IP;
*/
disable_pmu:
bpf_link__destroy(link);
close_prog:
bpf_object__close(obj);
cleanup:
test_stacktrace_build_id__destroy(skel);
}

View File

@ -0,0 +1,25 @@
// SPDX-License-Identifier: GPL-2.0
#include <test_progs.h>
void test_xdp_perf(void)
{
const char *file = "./xdp_dummy.o";
__u32 duration, retval, size;
struct bpf_object *obj;
char in[128], out[128];
int err, prog_fd;
err = bpf_prog_load(file, BPF_PROG_TYPE_XDP, &obj, &prog_fd);
if (CHECK_FAIL(err))
return;
err = bpf_prog_test_run(prog_fd, 1000000, &in[0], 128,
out, &size, &retval, &duration);
CHECK(err || retval != XDP_PASS || size != 128,
"xdp-perf",
"err %d errno %d retval %d size %d\n",
err, errno, retval, size);
bpf_object__close(obj);
}

View File

@ -0,0 +1,3 @@
#include "core_reloc_types.h"
void f(struct core_reloc_arrays___equiv_zero_sz_arr x) {}

View File

@ -0,0 +1,3 @@
#include "core_reloc_types.h"
void f(struct core_reloc_arrays___err_bad_zero_sz_arr x) {}

View File

@ -0,0 +1,3 @@
#include "core_reloc_types.h"
void f(struct core_reloc_arrays___fixed_arr x) {}

Some files were not shown because too many files have changed in this diff Show More