Merge branch 'bpf-hw-offload'

Jakub Kicinski says:

====================
BPF hardware offload (cls_bpf for now)

Rebased and improved.

v7:
 - fix patch 4.
v6 (patch 8 only):
 - explicitly check for registers >= MAX_BPF_REG;
 - fix leaky error path.
v5:
 - fix names of guard defines in bpf_verfier.h.
v4:
 - rename parser -> analyzer;
 - reorganize the analyzer patches a bit;
 - use bitfield.h directly.

--- merge blurb:
In the last year a lot of progress have been made on offloading
simpler TC classifiers.  There is also growing interest in using
BPF for generic high-speed packet processing in the kernel.
It seems beneficial to tie those two trends together and think
about hardware offloads of BPF programs.  This patch set presents
such offload to Netronome smart NICs.  cls_bpf is extended with
hardware offload capabilities and NFP driver gets a JIT translator
which in presence of capable firmware can be used to offload
the BPF program onto the card.

BPF JIT implementation is not 100% complete (e.g. missing instructions)
but it is functional.  Encouragingly it should be possible to
offload most (if not all) advanced BPF features onto the NIC -
including packet modification, maps, tunnel encap/decap etc.

Example of basic tests I used:
  __section_cls_entry
  int cls_entry(struct __sk_buff *skb)
  {
	if (load_byte(skb, 0) != 0x0)
		return 0;

	if (load_byte(skb, 4) != 0x1)
		return 0;

	skb->mark = 0xcafe;

	if (load_byte(skb, 50) != 0xff)
		return 0;

	return ~0U;
  }

Above code can be compiled with Clang and loaded like this:

	ethtool -K p1p1 hw-tc-offload on
	tc qdisc add dev p1p1 ingress
	tc filter add dev p1p1 parent ffff:  bpf obj prog.o action drop

This set implements the basic transparent offload, the skip_{sw,hw}
flags and reporting statistics for cls_bpf.
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
David S. Miller 2016-09-21 19:50:15 -04:00
commit c3d9b9f3de
18 changed files with 3395 additions and 222 deletions

View File

@ -3,6 +3,13 @@ obj-$(CONFIG_NFP_NETVF) += nfp_netvf.o
nfp_netvf-objs := \
nfp_net_common.o \
nfp_net_ethtool.o \
nfp_net_offload.o \
nfp_netvf_main.o
ifeq ($(CONFIG_BPF_SYSCALL),y)
nfp_netvf-objs += \
nfp_bpf_verifier.o \
nfp_bpf_jit.o
endif
nfp_netvf-$(CONFIG_NFP_NET_DEBUG) += nfp_net_debugfs.o

View File

@ -0,0 +1,233 @@
/*
* Copyright (C) 2016 Netronome Systems, Inc.
*
* This software is dual licensed under the GNU General License Version 2,
* June 1991 as shown in the file COPYING in the top-level directory of this
* source tree or the BSD 2-Clause License provided below. You have the
* option to license this software under the complete terms of either license.
*
* The BSD 2-Clause License:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* 1. Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* 2. Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#ifndef __NFP_ASM_H__
#define __NFP_ASM_H__ 1
#include "nfp_bpf.h"
#define REG_NONE 0
#define RE_REG_NO_DST 0x020
#define RE_REG_IMM 0x020
#define RE_REG_IMM_encode(x) \
(RE_REG_IMM | ((x) & 0x1f) | (((x) & 0x60) << 1))
#define RE_REG_IMM_MAX 0x07fULL
#define RE_REG_XFR 0x080
#define UR_REG_XFR 0x180
#define UR_REG_NN 0x280
#define UR_REG_NO_DST 0x300
#define UR_REG_IMM UR_REG_NO_DST
#define UR_REG_IMM_encode(x) (UR_REG_IMM | (x))
#define UR_REG_IMM_MAX 0x0ffULL
#define OP_BR_BASE 0x0d800000020ULL
#define OP_BR_BASE_MASK 0x0f8000c3ce0ULL
#define OP_BR_MASK 0x0000000001fULL
#define OP_BR_EV_PIP 0x00000000300ULL
#define OP_BR_CSS 0x0000003c000ULL
#define OP_BR_DEFBR 0x00000300000ULL
#define OP_BR_ADDR_LO 0x007ffc00000ULL
#define OP_BR_ADDR_HI 0x10000000000ULL
#define nfp_is_br(_insn) \
(((_insn) & OP_BR_BASE_MASK) == OP_BR_BASE)
enum br_mask {
BR_BEQ = 0x00,
BR_BNE = 0x01,
BR_BHS = 0x04,
BR_BLO = 0x05,
BR_BGE = 0x08,
BR_UNC = 0x18,
};
enum br_ev_pip {
BR_EV_PIP_UNCOND = 0,
BR_EV_PIP_COND = 1,
};
enum br_ctx_signal_state {
BR_CSS_NONE = 2,
};
#define OP_BBYTE_BASE 0x0c800000000ULL
#define OP_BB_A_SRC 0x000000000ffULL
#define OP_BB_BYTE 0x00000000300ULL
#define OP_BB_B_SRC 0x0000003fc00ULL
#define OP_BB_I8 0x00000040000ULL
#define OP_BB_EQ 0x00000080000ULL
#define OP_BB_DEFBR 0x00000300000ULL
#define OP_BB_ADDR_LO 0x007ffc00000ULL
#define OP_BB_ADDR_HI 0x10000000000ULL
#define OP_BALU_BASE 0x0e800000000ULL
#define OP_BA_A_SRC 0x000000003ffULL
#define OP_BA_B_SRC 0x000000ffc00ULL
#define OP_BA_DEFBR 0x00000300000ULL
#define OP_BA_ADDR_HI 0x0007fc00000ULL
#define OP_IMMED_A_SRC 0x000000003ffULL
#define OP_IMMED_B_SRC 0x000000ffc00ULL
#define OP_IMMED_IMM 0x0000ff00000ULL
#define OP_IMMED_WIDTH 0x00060000000ULL
#define OP_IMMED_INV 0x00080000000ULL
#define OP_IMMED_SHIFT 0x00600000000ULL
#define OP_IMMED_BASE 0x0f000000000ULL
#define OP_IMMED_WR_AB 0x20000000000ULL
enum immed_width {
IMMED_WIDTH_ALL = 0,
IMMED_WIDTH_BYTE = 1,
IMMED_WIDTH_WORD = 2,
};
enum immed_shift {
IMMED_SHIFT_0B = 0,
IMMED_SHIFT_1B = 1,
IMMED_SHIFT_2B = 2,
};
#define OP_SHF_BASE 0x08000000000ULL
#define OP_SHF_A_SRC 0x000000000ffULL
#define OP_SHF_SC 0x00000000300ULL
#define OP_SHF_B_SRC 0x0000003fc00ULL
#define OP_SHF_I8 0x00000040000ULL
#define OP_SHF_SW 0x00000080000ULL
#define OP_SHF_DST 0x0000ff00000ULL
#define OP_SHF_SHIFT 0x001f0000000ULL
#define OP_SHF_OP 0x00e00000000ULL
#define OP_SHF_DST_AB 0x01000000000ULL
#define OP_SHF_WR_AB 0x20000000000ULL
enum shf_op {
SHF_OP_NONE = 0,
SHF_OP_AND = 2,
SHF_OP_OR = 5,
};
enum shf_sc {
SHF_SC_R_ROT = 0,
SHF_SC_R_SHF = 1,
SHF_SC_L_SHF = 2,
SHF_SC_R_DSHF = 3,
};
#define OP_ALU_A_SRC 0x000000003ffULL
#define OP_ALU_B_SRC 0x000000ffc00ULL
#define OP_ALU_DST 0x0003ff00000ULL
#define OP_ALU_SW 0x00040000000ULL
#define OP_ALU_OP 0x00f80000000ULL
#define OP_ALU_DST_AB 0x01000000000ULL
#define OP_ALU_BASE 0x0a000000000ULL
#define OP_ALU_WR_AB 0x20000000000ULL
enum alu_op {
ALU_OP_NONE = 0x00,
ALU_OP_ADD = 0x01,
ALU_OP_NEG = 0x04,
ALU_OP_AND = 0x08,
ALU_OP_SUB_C = 0x0d,
ALU_OP_ADD_C = 0x11,
ALU_OP_OR = 0x14,
ALU_OP_SUB = 0x15,
ALU_OP_XOR = 0x18,
};
enum alu_dst_ab {
ALU_DST_A = 0,
ALU_DST_B = 1,
};
#define OP_LDF_BASE 0x0c000000000ULL
#define OP_LDF_A_SRC 0x000000000ffULL
#define OP_LDF_SC 0x00000000300ULL
#define OP_LDF_B_SRC 0x0000003fc00ULL
#define OP_LDF_I8 0x00000040000ULL
#define OP_LDF_SW 0x00000080000ULL
#define OP_LDF_ZF 0x00000100000ULL
#define OP_LDF_BMASK 0x0000f000000ULL
#define OP_LDF_SHF 0x001f0000000ULL
#define OP_LDF_WR_AB 0x20000000000ULL
#define OP_CMD_A_SRC 0x000000000ffULL
#define OP_CMD_CTX 0x00000000300ULL
#define OP_CMD_B_SRC 0x0000003fc00ULL
#define OP_CMD_TOKEN 0x000000c0000ULL
#define OP_CMD_XFER 0x00001f00000ULL
#define OP_CMD_CNT 0x0000e000000ULL
#define OP_CMD_SIG 0x000f0000000ULL
#define OP_CMD_TGT_CMD 0x07f00000000ULL
#define OP_CMD_MODE 0x1c0000000000ULL
struct cmd_tgt_act {
u8 token;
u8 tgt_cmd;
};
enum cmd_tgt_map {
CMD_TGT_READ8,
CMD_TGT_WRITE8,
CMD_TGT_READ_LE,
CMD_TGT_READ_SWAP_LE,
__CMD_TGT_MAP_SIZE,
};
enum cmd_mode {
CMD_MODE_40b_AB = 0,
CMD_MODE_40b_BA = 1,
CMD_MODE_32b = 4,
};
enum cmd_ctx_swap {
CMD_CTX_SWAP = 0,
CMD_CTX_NO_SWAP = 3,
};
#define OP_LCSR_BASE 0x0fc00000000ULL
#define OP_LCSR_A_SRC 0x000000003ffULL
#define OP_LCSR_B_SRC 0x000000ffc00ULL
#define OP_LCSR_WRITE 0x00000200000ULL
#define OP_LCSR_ADDR 0x001ffc00000ULL
enum lcsr_wr_src {
LCSR_WR_AREG,
LCSR_WR_BREG,
LCSR_WR_IMM,
};
#define OP_CARB_BASE 0x0e000000000ULL
#define OP_CARB_OR 0x00000010000ULL
#endif

View File

@ -0,0 +1,212 @@
/*
* Copyright (C) 2016 Netronome Systems, Inc.
*
* This software is dual licensed under the GNU General License Version 2,
* June 1991 as shown in the file COPYING in the top-level directory of this
* source tree or the BSD 2-Clause License provided below. You have the
* option to license this software under the complete terms of either license.
*
* The BSD 2-Clause License:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* 1. Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* 2. Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#ifndef __NFP_BPF_H__
#define __NFP_BPF_H__ 1
#include <linux/bitfield.h>
#include <linux/bpf.h>
#include <linux/list.h>
#include <linux/types.h>
#define FIELD_FIT(mask, val) (!((((u64)val) << __bf_shf(mask)) & ~(mask)))
/* For branch fixup logic use up-most byte of branch instruction as scratch
* area. Remember to clear this before sending instructions to HW!
*/
#define OP_BR_SPECIAL 0xff00000000000000ULL
enum br_special {
OP_BR_NORMAL = 0,
OP_BR_GO_OUT,
OP_BR_GO_ABORT,
};
enum static_regs {
STATIC_REG_PKT = 1,
#define REG_PKT_BANK ALU_DST_A
STATIC_REG_IMM = 2, /* Bank AB */
};
enum nfp_bpf_action_type {
NN_ACT_TC_DROP,
NN_ACT_TC_REDIR,
NN_ACT_DIRECT,
};
/* Software register representation, hardware encoding in asm.h */
#define NN_REG_TYPE GENMASK(31, 24)
#define NN_REG_VAL GENMASK(7, 0)
enum nfp_bpf_reg_type {
NN_REG_GPR_A = BIT(0),
NN_REG_GPR_B = BIT(1),
NN_REG_NNR = BIT(2),
NN_REG_XFER = BIT(3),
NN_REG_IMM = BIT(4),
NN_REG_NONE = BIT(5),
};
#define NN_REG_GPR_BOTH (NN_REG_GPR_A | NN_REG_GPR_B)
#define reg_both(x) ((x) | FIELD_PREP(NN_REG_TYPE, NN_REG_GPR_BOTH))
#define reg_a(x) ((x) | FIELD_PREP(NN_REG_TYPE, NN_REG_GPR_A))
#define reg_b(x) ((x) | FIELD_PREP(NN_REG_TYPE, NN_REG_GPR_B))
#define reg_nnr(x) ((x) | FIELD_PREP(NN_REG_TYPE, NN_REG_NNR))
#define reg_xfer(x) ((x) | FIELD_PREP(NN_REG_TYPE, NN_REG_XFER))
#define reg_imm(x) ((x) | FIELD_PREP(NN_REG_TYPE, NN_REG_IMM))
#define reg_none() (FIELD_PREP(NN_REG_TYPE, NN_REG_NONE))
#define pkt_reg(np) reg_a((np)->regs_per_thread - STATIC_REG_PKT)
#define imm_a(np) reg_a((np)->regs_per_thread - STATIC_REG_IMM)
#define imm_b(np) reg_b((np)->regs_per_thread - STATIC_REG_IMM)
#define imm_both(np) reg_both((np)->regs_per_thread - STATIC_REG_IMM)
#define NFP_BPF_ABI_FLAGS reg_nnr(0)
#define NFP_BPF_ABI_FLAG_MARK 1
#define NFP_BPF_ABI_MARK reg_nnr(1)
#define NFP_BPF_ABI_PKT reg_nnr(2)
#define NFP_BPF_ABI_LEN reg_nnr(3)
struct nfp_prog;
struct nfp_insn_meta;
typedef int (*instr_cb_t)(struct nfp_prog *, struct nfp_insn_meta *);
#define nfp_prog_first_meta(nfp_prog) \
list_first_entry(&(nfp_prog)->insns, struct nfp_insn_meta, l)
#define nfp_prog_last_meta(nfp_prog) \
list_last_entry(&(nfp_prog)->insns, struct nfp_insn_meta, l)
#define nfp_meta_next(meta) list_next_entry(meta, l)
#define nfp_meta_prev(meta) list_prev_entry(meta, l)
/**
* struct nfp_insn_meta - BPF instruction wrapper
* @insn: BPF instruction
* @off: index of first generated machine instruction (in nfp_prog.prog)
* @n: eBPF instruction number
* @skip: skip this instruction (optimized out)
* @double_cb: callback for second part of the instruction
* @l: link on nfp_prog->insns list
*/
struct nfp_insn_meta {
struct bpf_insn insn;
unsigned int off;
unsigned short n;
bool skip;
instr_cb_t double_cb;
struct list_head l;
};
#define BPF_SIZE_MASK 0x18
static inline u8 mbpf_class(const struct nfp_insn_meta *meta)
{
return BPF_CLASS(meta->insn.code);
}
static inline u8 mbpf_src(const struct nfp_insn_meta *meta)
{
return BPF_SRC(meta->insn.code);
}
static inline u8 mbpf_op(const struct nfp_insn_meta *meta)
{
return BPF_OP(meta->insn.code);
}
static inline u8 mbpf_mode(const struct nfp_insn_meta *meta)
{
return BPF_MODE(meta->insn.code);
}
/**
* struct nfp_prog - nfp BPF program
* @prog: machine code
* @prog_len: number of valid instructions in @prog array
* @__prog_alloc_len: alloc size of @prog array
* @act: BPF program/action type (TC DA, TC with action, XDP etc.)
* @num_regs: number of registers used by this program
* @regs_per_thread: number of basic registers allocated per thread
* @start_off: address of the first instruction in the memory
* @tgt_out: jump target for normal exit
* @tgt_abort: jump target for abort (e.g. access outside of packet buffer)
* @tgt_done: jump target to get the next packet
* @n_translated: number of successfully translated instructions (for errors)
* @error: error code if something went wrong
* @insns: list of BPF instruction wrappers (struct nfp_insn_meta)
*/
struct nfp_prog {
u64 *prog;
unsigned int prog_len;
unsigned int __prog_alloc_len;
enum nfp_bpf_action_type act;
unsigned int num_regs;
unsigned int regs_per_thread;
unsigned int start_off;
unsigned int tgt_out;
unsigned int tgt_abort;
unsigned int tgt_done;
unsigned int n_translated;
int error;
struct list_head insns;
};
struct nfp_bpf_result {
unsigned int n_instr;
bool dense_mode;
};
#ifdef CONFIG_BPF_SYSCALL
int
nfp_bpf_jit(struct bpf_prog *filter, void *prog, enum nfp_bpf_action_type act,
unsigned int prog_start, unsigned int prog_done,
unsigned int prog_sz, struct nfp_bpf_result *res);
#else
int
nfp_bpf_jit(struct bpf_prog *filter, void *prog, enum nfp_bpf_action_type act,
unsigned int prog_start, unsigned int prog_done,
unsigned int prog_sz, struct nfp_bpf_result *res)
{
return -ENOTSUPP;
}
#endif
int nfp_prog_verify(struct nfp_prog *nfp_prog, struct bpf_prog *prog);
#endif

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,171 @@
/*
* Copyright (C) 2016 Netronome Systems, Inc.
*
* This software is dual licensed under the GNU General License Version 2,
* June 1991 as shown in the file COPYING in the top-level directory of this
* source tree or the BSD 2-Clause License provided below. You have the
* option to license this software under the complete terms of either license.
*
* The BSD 2-Clause License:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* 1. Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* 2. Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#define pr_fmt(fmt) "NFP net bpf: " fmt
#include <linux/bpf.h>
#include <linux/bpf_verifier.h>
#include <linux/kernel.h>
#include <linux/pkt_cls.h>
#include "nfp_bpf.h"
/* Analyzer/verifier definitions */
struct nfp_bpf_analyzer_priv {
struct nfp_prog *prog;
struct nfp_insn_meta *meta;
};
static struct nfp_insn_meta *
nfp_bpf_goto_meta(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
unsigned int insn_idx, unsigned int n_insns)
{
unsigned int forward, backward, i;
backward = meta->n - insn_idx;
forward = insn_idx - meta->n;
if (min(forward, backward) > n_insns - insn_idx - 1) {
backward = n_insns - insn_idx - 1;
meta = nfp_prog_last_meta(nfp_prog);
}
if (min(forward, backward) > insn_idx && backward > insn_idx) {
forward = insn_idx;
meta = nfp_prog_first_meta(nfp_prog);
}
if (forward < backward)
for (i = 0; i < forward; i++)
meta = nfp_meta_next(meta);
else
for (i = 0; i < backward; i++)
meta = nfp_meta_prev(meta);
return meta;
}
static int
nfp_bpf_check_exit(struct nfp_prog *nfp_prog,
const struct bpf_verifier_env *env)
{
const struct bpf_reg_state *reg0 = &env->cur_state.regs[0];
if (reg0->type != CONST_IMM) {
pr_info("unsupported exit state: %d, imm: %llx\n",
reg0->type, reg0->imm);
return -EINVAL;
}
if (nfp_prog->act != NN_ACT_DIRECT &&
reg0->imm != 0 && (reg0->imm & ~0U) != ~0U) {
pr_info("unsupported exit state: %d, imm: %llx\n",
reg0->type, reg0->imm);
return -EINVAL;
}
if (nfp_prog->act == NN_ACT_DIRECT && reg0->imm <= TC_ACT_REDIRECT &&
reg0->imm != TC_ACT_SHOT && reg0->imm != TC_ACT_STOLEN &&
reg0->imm != TC_ACT_QUEUED) {
pr_info("unsupported exit state: %d, imm: %llx\n",
reg0->type, reg0->imm);
return -EINVAL;
}
return 0;
}
static int
nfp_bpf_check_ctx_ptr(struct nfp_prog *nfp_prog,
const struct bpf_verifier_env *env, u8 reg)
{
if (env->cur_state.regs[reg].type != PTR_TO_CTX)
return -EINVAL;
return 0;
}
static int
nfp_verify_insn(struct bpf_verifier_env *env, int insn_idx, int prev_insn_idx)
{
struct nfp_bpf_analyzer_priv *priv = env->analyzer_priv;
struct nfp_insn_meta *meta = priv->meta;
meta = nfp_bpf_goto_meta(priv->prog, meta, insn_idx, env->prog->len);
priv->meta = meta;
if (meta->insn.src_reg == BPF_REG_10 ||
meta->insn.dst_reg == BPF_REG_10) {
pr_err("stack not yet supported\n");
return -EINVAL;
}
if (meta->insn.src_reg >= MAX_BPF_REG ||
meta->insn.dst_reg >= MAX_BPF_REG) {
pr_err("program uses extended registers - jit hardening?\n");
return -EINVAL;
}
if (meta->insn.code == (BPF_JMP | BPF_EXIT))
return nfp_bpf_check_exit(priv->prog, env);
if ((meta->insn.code & ~BPF_SIZE_MASK) == (BPF_LDX | BPF_MEM))
return nfp_bpf_check_ctx_ptr(priv->prog, env,
meta->insn.src_reg);
if ((meta->insn.code & ~BPF_SIZE_MASK) == (BPF_STX | BPF_MEM))
return nfp_bpf_check_ctx_ptr(priv->prog, env,
meta->insn.dst_reg);
return 0;
}
static const struct bpf_ext_analyzer_ops nfp_bpf_analyzer_ops = {
.insn_hook = nfp_verify_insn,
};
int nfp_prog_verify(struct nfp_prog *nfp_prog, struct bpf_prog *prog)
{
struct nfp_bpf_analyzer_priv *priv;
int ret;
priv = kzalloc(sizeof(*priv), GFP_KERNEL);
if (!priv)
return -ENOMEM;
priv->prog = nfp_prog;
priv->meta = nfp_prog_first_meta(nfp_prog);
ret = bpf_analyzer(prog, &nfp_bpf_analyzer_ops, priv);
kfree(priv);
return ret;
}

View File

@ -62,6 +62,9 @@
/* Max time to wait for NFP to respond on updates (in seconds) */
#define NFP_NET_POLL_TIMEOUT 5
/* Interval for reading offloaded filter stats */
#define NFP_NET_STAT_POLL_IVL msecs_to_jiffies(100)
/* Bar allocation */
#define NFP_NET_CTRL_BAR 0
#define NFP_NET_Q0_BAR 2
@ -220,7 +223,7 @@ struct nfp_net_tx_ring {
#define PCIE_DESC_RX_I_TCP_CSUM_OK cpu_to_le16(BIT(11))
#define PCIE_DESC_RX_I_UDP_CSUM cpu_to_le16(BIT(10))
#define PCIE_DESC_RX_I_UDP_CSUM_OK cpu_to_le16(BIT(9))
#define PCIE_DESC_RX_SPARE cpu_to_le16(BIT(8))
#define PCIE_DESC_RX_BPF cpu_to_le16(BIT(8))
#define PCIE_DESC_RX_EOP cpu_to_le16(BIT(7))
#define PCIE_DESC_RX_IP4_CSUM cpu_to_le16(BIT(6))
#define PCIE_DESC_RX_IP4_CSUM_OK cpu_to_le16(BIT(5))
@ -266,6 +269,8 @@ struct nfp_net_rx_desc {
};
};
#define NFP_NET_META_FIELD_MASK GENMASK(NFP_NET_META_FIELD_SIZE - 1, 0)
struct nfp_net_rx_hash {
__be32 hash_type;
__be32 hash;
@ -405,6 +410,11 @@ static inline bool nfp_net_fw_ver_eq(struct nfp_net_fw_version *fw_ver,
fw_ver->minor == minor;
}
struct nfp_stat_pair {
u64 pkts;
u64 bytes;
};
/**
* struct nfp_net - NFP network device structure
* @pdev: Backpointer to PCI device
@ -413,6 +423,7 @@ static inline bool nfp_net_fw_ver_eq(struct nfp_net_fw_version *fw_ver,
* @is_vf: Is the driver attached to a VF?
* @is_nfp3200: Is the driver for a NFP-3200 card?
* @fw_loaded: Is the firmware loaded?
* @bpf_offload_skip_sw: Offloaded BPF program will not be rerun by cls_bpf
* @ctrl: Local copy of the control register/word.
* @fl_bufsz: Currently configured size of the freelist buffers
* @rx_offset: Offset in the RX buffers where packet data starts
@ -427,6 +438,11 @@ static inline bool nfp_net_fw_ver_eq(struct nfp_net_fw_version *fw_ver,
* @rss_cfg: RSS configuration
* @rss_key: RSS secret key
* @rss_itbl: RSS indirection table
* @rx_filter: Filter offload statistics - dropped packets/bytes
* @rx_filter_prev: Filter offload statistics - values from previous update
* @rx_filter_change: Jiffies when statistics last changed
* @rx_filter_stats_timer: Timer for polling filter offload statistics
* @rx_filter_lock: Lock protecting timer state changes (teardown)
* @max_tx_rings: Maximum number of TX rings supported by the Firmware
* @max_rx_rings: Maximum number of RX rings supported by the Firmware
* @num_tx_rings: Currently configured number of TX rings
@ -473,6 +489,7 @@ struct nfp_net {
unsigned is_vf:1;
unsigned is_nfp3200:1;
unsigned fw_loaded:1;
unsigned bpf_offload_skip_sw:1;
u32 ctrl;
u32 fl_bufsz;
@ -502,6 +519,11 @@ struct nfp_net {
u8 rss_key[NFP_NET_CFG_RSS_KEY_SZ];
u8 rss_itbl[NFP_NET_CFG_RSS_ITBL_SZ];
struct nfp_stat_pair rx_filter, rx_filter_prev;
unsigned long rx_filter_change;
struct timer_list rx_filter_stats_timer;
spinlock_t rx_filter_lock;
int max_tx_rings;
int max_rx_rings;
@ -561,12 +583,28 @@ struct nfp_net {
/* Functions to read/write from/to a BAR
* Performs any endian conversion necessary.
*/
static inline u16 nn_readb(struct nfp_net *nn, int off)
{
return readb(nn->ctrl_bar + off);
}
static inline void nn_writeb(struct nfp_net *nn, int off, u8 val)
{
writeb(val, nn->ctrl_bar + off);
}
/* NFP-3200 can't handle 16-bit accesses too well - hence no readw/writew */
/* NFP-3200 can't handle 16-bit accesses too well */
static inline u16 nn_readw(struct nfp_net *nn, int off)
{
WARN_ON_ONCE(nn->is_nfp3200);
return readw(nn->ctrl_bar + off);
}
static inline void nn_writew(struct nfp_net *nn, int off, u16 val)
{
WARN_ON_ONCE(nn->is_nfp3200);
writew(val, nn->ctrl_bar + off);
}
static inline u32 nn_readl(struct nfp_net *nn, int off)
{
@ -757,4 +795,9 @@ static inline void nfp_net_debugfs_adapter_del(struct nfp_net *nn)
}
#endif /* CONFIG_NFP_NET_DEBUG */
void nfp_net_filter_stats_timer(unsigned long data);
int
nfp_net_bpf_offload(struct nfp_net *nn, u32 handle, __be16 proto,
struct tc_cls_bpf_offload *cls_bpf);
#endif /* _NFP_NET_H_ */

View File

@ -60,6 +60,7 @@
#include <linux/ktime.h>
#include <net/pkt_cls.h>
#include <net/vxlan.h>
#include "nfp_net_ctrl.h"
@ -1292,36 +1293,70 @@ static void nfp_net_rx_csum(struct nfp_net *nn, struct nfp_net_r_vector *r_vec,
}
}
/**
* nfp_net_set_hash() - Set SKB hash data
* @netdev: adapter's net_device structure
* @skb: SKB to set the hash data on
* @rxd: RX descriptor
*
* The RSS hash and hash-type are pre-pended to the packet data.
* Extract and decode it and set the skb fields.
*/
static void nfp_net_set_hash(struct net_device *netdev, struct sk_buff *skb,
struct nfp_net_rx_desc *rxd)
unsigned int type, __be32 *hash)
{
if (!(netdev->features & NETIF_F_RXHASH))
return;
switch (type) {
case NFP_NET_RSS_IPV4:
case NFP_NET_RSS_IPV6:
case NFP_NET_RSS_IPV6_EX:
skb_set_hash(skb, get_unaligned_be32(hash), PKT_HASH_TYPE_L3);
break;
default:
skb_set_hash(skb, get_unaligned_be32(hash), PKT_HASH_TYPE_L4);
break;
}
}
static void
nfp_net_set_hash_desc(struct net_device *netdev, struct sk_buff *skb,
struct nfp_net_rx_desc *rxd)
{
struct nfp_net_rx_hash *rx_hash;
if (!(rxd->rxd.flags & PCIE_DESC_RX_RSS) ||
!(netdev->features & NETIF_F_RXHASH))
if (!(rxd->rxd.flags & PCIE_DESC_RX_RSS))
return;
rx_hash = (struct nfp_net_rx_hash *)(skb->data - sizeof(*rx_hash));
switch (be32_to_cpu(rx_hash->hash_type)) {
case NFP_NET_RSS_IPV4:
case NFP_NET_RSS_IPV6:
case NFP_NET_RSS_IPV6_EX:
skb_set_hash(skb, be32_to_cpu(rx_hash->hash), PKT_HASH_TYPE_L3);
break;
default:
skb_set_hash(skb, be32_to_cpu(rx_hash->hash), PKT_HASH_TYPE_L4);
break;
nfp_net_set_hash(netdev, skb, get_unaligned_be32(&rx_hash->hash_type),
&rx_hash->hash);
}
static void *
nfp_net_parse_meta(struct net_device *netdev, struct sk_buff *skb,
int meta_len)
{
u8 *data = skb->data - meta_len;
u32 meta_info;
meta_info = get_unaligned_be32(data);
data += 4;
while (meta_info) {
switch (meta_info & NFP_NET_META_FIELD_MASK) {
case NFP_NET_META_HASH:
meta_info >>= NFP_NET_META_FIELD_SIZE;
nfp_net_set_hash(netdev, skb,
meta_info & NFP_NET_META_FIELD_MASK,
(__be32 *)data);
data += 4;
break;
case NFP_NET_META_MARK:
skb->mark = get_unaligned_be32(data);
data += 4;
break;
default:
return NULL;
}
meta_info >>= NFP_NET_META_FIELD_SIZE;
}
return data;
}
/**
@ -1438,14 +1473,29 @@ static int nfp_net_rx(struct nfp_net_rx_ring *rx_ring, int budget)
skb_reserve(skb, nn->rx_offset);
skb_put(skb, data_len - meta_len);
nfp_net_set_hash(nn->netdev, skb, rxd);
/* Stats update */
u64_stats_update_begin(&r_vec->rx_sync);
r_vec->rx_pkts++;
r_vec->rx_bytes += skb->len;
u64_stats_update_end(&r_vec->rx_sync);
if (nn->fw_ver.major <= 3) {
nfp_net_set_hash_desc(nn->netdev, skb, rxd);
} else if (meta_len) {
void *end;
end = nfp_net_parse_meta(nn->netdev, skb, meta_len);
if (unlikely(end != skb->data)) {
u64_stats_update_begin(&r_vec->rx_sync);
r_vec->rx_drops++;
u64_stats_update_end(&r_vec->rx_sync);
dev_kfree_skb_any(skb);
nn_warn_ratelimit(nn, "invalid RX packet metadata\n");
continue;
}
}
skb_record_rx_queue(skb, rx_ring->idx);
skb->protocol = eth_type_trans(skb, nn->netdev);
@ -2382,6 +2432,31 @@ static struct rtnl_link_stats64 *nfp_net_stat64(struct net_device *netdev,
return stats;
}
static bool nfp_net_ebpf_capable(struct nfp_net *nn)
{
if (nn->cap & NFP_NET_CFG_CTRL_BPF &&
nn_readb(nn, NFP_NET_CFG_BPF_ABI) == NFP_NET_BPF_ABI)
return true;
return false;
}
static int
nfp_net_setup_tc(struct net_device *netdev, u32 handle, __be16 proto,
struct tc_to_netdev *tc)
{
struct nfp_net *nn = netdev_priv(netdev);
if (TC_H_MAJ(handle) != TC_H_MAJ(TC_H_INGRESS))
return -ENOTSUPP;
if (proto != htons(ETH_P_ALL))
return -ENOTSUPP;
if (tc->type == TC_SETUP_CLSBPF && nfp_net_ebpf_capable(nn))
return nfp_net_bpf_offload(nn, handle, proto, tc->cls_bpf);
return -EINVAL;
}
static int nfp_net_set_features(struct net_device *netdev,
netdev_features_t features)
{
@ -2436,6 +2511,11 @@ static int nfp_net_set_features(struct net_device *netdev,
new_ctrl &= ~NFP_NET_CFG_CTRL_GATHER;
}
if (changed & NETIF_F_HW_TC && nn->ctrl & NFP_NET_CFG_CTRL_BPF) {
nn_err(nn, "Cannot disable HW TC offload while in use\n");
return -EBUSY;
}
nn_dbg(nn, "Feature change 0x%llx -> 0x%llx (changed=0x%llx)\n",
netdev->features, features, changed);
@ -2585,6 +2665,7 @@ static const struct net_device_ops nfp_net_netdev_ops = {
.ndo_stop = nfp_net_netdev_close,
.ndo_start_xmit = nfp_net_tx,
.ndo_get_stats64 = nfp_net_stat64,
.ndo_setup_tc = nfp_net_setup_tc,
.ndo_tx_timeout = nfp_net_tx_timeout,
.ndo_set_rx_mode = nfp_net_set_rx_mode,
.ndo_change_mtu = nfp_net_change_mtu,
@ -2610,7 +2691,7 @@ void nfp_net_info(struct nfp_net *nn)
nn->fw_ver.resv, nn->fw_ver.class,
nn->fw_ver.major, nn->fw_ver.minor,
nn->max_mtu);
nn_info(nn, "CAP: %#x %s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
nn_info(nn, "CAP: %#x %s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
nn->cap,
nn->cap & NFP_NET_CFG_CTRL_PROMISC ? "PROMISC " : "",
nn->cap & NFP_NET_CFG_CTRL_L2BC ? "L2BCFILT " : "",
@ -2627,7 +2708,8 @@ void nfp_net_info(struct nfp_net *nn)
nn->cap & NFP_NET_CFG_CTRL_MSIXAUTO ? "AUTOMASK " : "",
nn->cap & NFP_NET_CFG_CTRL_IRQMOD ? "IRQMOD " : "",
nn->cap & NFP_NET_CFG_CTRL_VXLAN ? "VXLAN " : "",
nn->cap & NFP_NET_CFG_CTRL_NVGRE ? "NVGRE " : "");
nn->cap & NFP_NET_CFG_CTRL_NVGRE ? "NVGRE " : "",
nfp_net_ebpf_capable(nn) ? "BPF " : "");
}
/**
@ -2670,10 +2752,13 @@ struct nfp_net *nfp_net_netdev_alloc(struct pci_dev *pdev,
nn->rxd_cnt = NFP_NET_RX_DESCS_DEFAULT;
spin_lock_init(&nn->reconfig_lock);
spin_lock_init(&nn->rx_filter_lock);
spin_lock_init(&nn->link_status_lock);
setup_timer(&nn->reconfig_timer,
nfp_net_reconfig_timer, (unsigned long)nn);
setup_timer(&nn->rx_filter_stats_timer,
nfp_net_filter_stats_timer, (unsigned long)nn);
return nn;
}
@ -2795,6 +2880,9 @@ int nfp_net_netdev_init(struct net_device *netdev)
netdev->features = netdev->hw_features;
if (nfp_net_ebpf_capable(nn))
netdev->hw_features |= NETIF_F_HW_TC;
/* Advertise but disable TSO by default. */
netdev->features &= ~(NETIF_F_TSO | NETIF_F_TSO6);

View File

@ -65,6 +65,13 @@
*/
#define NFP_NET_LSO_MAX_HDR_SZ 255
/**
* Prepend field types
*/
#define NFP_NET_META_FIELD_SIZE 4
#define NFP_NET_META_HASH 1 /* next field carries hash type */
#define NFP_NET_META_MARK 2
/**
* Hash type pre-pended when a RSS hash was computed
*/
@ -123,6 +130,7 @@
#define NFP_NET_CFG_CTRL_L2SWITCH_LOCAL (0x1 << 23) /* Switch to local */
#define NFP_NET_CFG_CTRL_VXLAN (0x1 << 24) /* VXLAN tunnel support */
#define NFP_NET_CFG_CTRL_NVGRE (0x1 << 25) /* NVGRE tunnel support */
#define NFP_NET_CFG_CTRL_BPF (0x1 << 27) /* BPF offload capable */
#define NFP_NET_CFG_UPDATE 0x0004
#define NFP_NET_CFG_UPDATE_GEN (0x1 << 0) /* General update */
#define NFP_NET_CFG_UPDATE_RING (0x1 << 1) /* Ring config change */
@ -134,6 +142,7 @@
#define NFP_NET_CFG_UPDATE_RESET (0x1 << 7) /* Update due to FLR */
#define NFP_NET_CFG_UPDATE_IRQMOD (0x1 << 8) /* IRQ mod change */
#define NFP_NET_CFG_UPDATE_VXLAN (0x1 << 9) /* VXLAN port change */
#define NFP_NET_CFG_UPDATE_BPF (0x1 << 10) /* BPF program load */
#define NFP_NET_CFG_UPDATE_ERR (0x1 << 31) /* A error occurred */
#define NFP_NET_CFG_TXRS_ENABLE 0x0008
#define NFP_NET_CFG_RXRS_ENABLE 0x0010
@ -196,10 +205,37 @@
#define NFP_NET_CFG_VXLAN_SZ 0x0008
/**
* 64B reserved for future use (0x0080 - 0x00c0)
* NFP6000 - BPF section
* @NFP_NET_CFG_BPF_ABI: BPF ABI version
* @NFP_NET_CFG_BPF_CAP: BPF capabilities
* @NFP_NET_CFG_BPF_MAX_LEN: Maximum size of JITed BPF code in bytes
* @NFP_NET_CFG_BPF_START: Offset at which BPF will be loaded
* @NFP_NET_CFG_BPF_DONE: Offset to jump to on exit
* @NFP_NET_CFG_BPF_STACK_SZ: Total size of stack area in 64B chunks
* @NFP_NET_CFG_BPF_INL_MTU: Packet data split offset in 64B chunks
* @NFP_NET_CFG_BPF_SIZE: Size of the JITed BPF code in instructions
* @NFP_NET_CFG_BPF_ADDR: DMA address of the buffer with JITed BPF code
*/
#define NFP_NET_CFG_RESERVED 0x0080
#define NFP_NET_CFG_RESERVED_SZ 0x0040
#define NFP_NET_CFG_BPF_ABI 0x0080
#define NFP_NET_BPF_ABI 1
#define NFP_NET_CFG_BPF_CAP 0x0081
#define NFP_NET_BPF_CAP_RELO (1 << 0) /* seamless reload */
#define NFP_NET_CFG_BPF_MAX_LEN 0x0082
#define NFP_NET_CFG_BPF_START 0x0084
#define NFP_NET_CFG_BPF_DONE 0x0086
#define NFP_NET_CFG_BPF_STACK_SZ 0x0088
#define NFP_NET_CFG_BPF_INL_MTU 0x0089
#define NFP_NET_CFG_BPF_SIZE 0x008e
#define NFP_NET_CFG_BPF_ADDR 0x0090
#define NFP_NET_CFG_BPF_CFG_8CTX (1 << 0) /* 8ctx mode */
#define NFP_NET_CFG_BPF_CFG_MASK 7ULL
#define NFP_NET_CFG_BPF_ADDR_MASK (~NFP_NET_CFG_BPF_CFG_MASK)
/**
* 40B reserved for future use (0x0098 - 0x00c0)
*/
#define NFP_NET_CFG_RESERVED 0x0098
#define NFP_NET_CFG_RESERVED_SZ 0x0028
/**
* RSS configuration (0x0100 - 0x01ac):
@ -303,6 +339,15 @@
#define NFP_NET_CFG_STATS_TX_MC_FRAMES (NFP_NET_CFG_STATS_BASE + 0x80)
#define NFP_NET_CFG_STATS_TX_BC_FRAMES (NFP_NET_CFG_STATS_BASE + 0x88)
#define NFP_NET_CFG_STATS_APP0_FRAMES (NFP_NET_CFG_STATS_BASE + 0x90)
#define NFP_NET_CFG_STATS_APP0_BYTES (NFP_NET_CFG_STATS_BASE + 0x98)
#define NFP_NET_CFG_STATS_APP1_FRAMES (NFP_NET_CFG_STATS_BASE + 0xa0)
#define NFP_NET_CFG_STATS_APP1_BYTES (NFP_NET_CFG_STATS_BASE + 0xa8)
#define NFP_NET_CFG_STATS_APP2_FRAMES (NFP_NET_CFG_STATS_BASE + 0xb0)
#define NFP_NET_CFG_STATS_APP2_BYTES (NFP_NET_CFG_STATS_BASE + 0xb8)
#define NFP_NET_CFG_STATS_APP3_FRAMES (NFP_NET_CFG_STATS_BASE + 0xc0)
#define NFP_NET_CFG_STATS_APP3_BYTES (NFP_NET_CFG_STATS_BASE + 0xc8)
/**
* Per ring stats (0x1000 - 0x1800)
* options, 64bit per entry

View File

@ -106,6 +106,18 @@ static const struct _nfp_net_et_stats nfp_net_et_stats[] = {
{"dev_tx_pkts", NN_ET_DEV_STAT(NFP_NET_CFG_STATS_TX_FRAMES)},
{"dev_tx_mc_pkts", NN_ET_DEV_STAT(NFP_NET_CFG_STATS_TX_MC_FRAMES)},
{"dev_tx_bc_pkts", NN_ET_DEV_STAT(NFP_NET_CFG_STATS_TX_BC_FRAMES)},
{"bpf_pass_pkts", NN_ET_DEV_STAT(NFP_NET_CFG_STATS_APP0_FRAMES)},
{"bpf_pass_bytes", NN_ET_DEV_STAT(NFP_NET_CFG_STATS_APP0_BYTES)},
/* see comments in outro functions in nfp_bpf_jit.c to find out
* how different BPF modes use app-specific counters
*/
{"bpf_app1_pkts", NN_ET_DEV_STAT(NFP_NET_CFG_STATS_APP1_FRAMES)},
{"bpf_app1_bytes", NN_ET_DEV_STAT(NFP_NET_CFG_STATS_APP1_BYTES)},
{"bpf_app2_pkts", NN_ET_DEV_STAT(NFP_NET_CFG_STATS_APP2_FRAMES)},
{"bpf_app2_bytes", NN_ET_DEV_STAT(NFP_NET_CFG_STATS_APP2_BYTES)},
{"bpf_app3_pkts", NN_ET_DEV_STAT(NFP_NET_CFG_STATS_APP3_FRAMES)},
{"bpf_app3_bytes", NN_ET_DEV_STAT(NFP_NET_CFG_STATS_APP3_BYTES)},
};
#define NN_ET_GLOBAL_STATS_LEN ARRAY_SIZE(nfp_net_et_stats)

View File

@ -0,0 +1,291 @@
/*
* Copyright (C) 2016 Netronome Systems, Inc.
*
* This software is dual licensed under the GNU General License Version 2,
* June 1991 as shown in the file COPYING in the top-level directory of this
* source tree or the BSD 2-Clause License provided below. You have the
* option to license this software under the complete terms of either license.
*
* The BSD 2-Clause License:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* 1. Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* 2. Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
/*
* nfp_net_offload.c
* Netronome network device driver: TC offload functions for PF and VF
*/
#include <linux/kernel.h>
#include <linux/netdevice.h>
#include <linux/pci.h>
#include <linux/jiffies.h>
#include <linux/timer.h>
#include <linux/list.h>
#include <net/pkt_cls.h>
#include <net/tc_act/tc_gact.h>
#include <net/tc_act/tc_mirred.h>
#include "nfp_bpf.h"
#include "nfp_net_ctrl.h"
#include "nfp_net.h"
void nfp_net_filter_stats_timer(unsigned long data)
{
struct nfp_net *nn = (void *)data;
struct nfp_stat_pair latest;
spin_lock_bh(&nn->rx_filter_lock);
if (nn->ctrl & NFP_NET_CFG_CTRL_BPF)
mod_timer(&nn->rx_filter_stats_timer,
jiffies + NFP_NET_STAT_POLL_IVL);
spin_unlock_bh(&nn->rx_filter_lock);
latest.pkts = nn_readq(nn, NFP_NET_CFG_STATS_APP1_FRAMES);
latest.bytes = nn_readq(nn, NFP_NET_CFG_STATS_APP1_BYTES);
if (latest.pkts != nn->rx_filter.pkts)
nn->rx_filter_change = jiffies;
nn->rx_filter = latest;
}
static void nfp_net_bpf_stats_reset(struct nfp_net *nn)
{
nn->rx_filter.pkts = nn_readq(nn, NFP_NET_CFG_STATS_APP1_FRAMES);
nn->rx_filter.bytes = nn_readq(nn, NFP_NET_CFG_STATS_APP1_BYTES);
nn->rx_filter_prev = nn->rx_filter;
nn->rx_filter_change = jiffies;
}
static int
nfp_net_bpf_stats_update(struct nfp_net *nn, struct tc_cls_bpf_offload *cls_bpf)
{
struct tc_action *a;
LIST_HEAD(actions);
u64 bytes, pkts;
pkts = nn->rx_filter.pkts - nn->rx_filter_prev.pkts;
bytes = nn->rx_filter.bytes - nn->rx_filter_prev.bytes;
bytes -= pkts * ETH_HLEN;
nn->rx_filter_prev = nn->rx_filter;
preempt_disable();
tcf_exts_to_list(cls_bpf->exts, &actions);
list_for_each_entry(a, &actions, list)
tcf_action_stats_update(a, bytes, pkts, nn->rx_filter_change);
preempt_enable();
return 0;
}
static int
nfp_net_bpf_get_act(struct nfp_net *nn, struct tc_cls_bpf_offload *cls_bpf)
{
const struct tc_action *a;
LIST_HEAD(actions);
/* TC direct action */
if (cls_bpf->exts_integrated) {
if (tc_no_actions(cls_bpf->exts))
return NN_ACT_DIRECT;
return -ENOTSUPP;
}
/* TC legacy mode */
if (!tc_single_action(cls_bpf->exts))
return -ENOTSUPP;
tcf_exts_to_list(cls_bpf->exts, &actions);
list_for_each_entry(a, &actions, list) {
if (is_tcf_gact_shot(a))
return NN_ACT_TC_DROP;
if (is_tcf_mirred_redirect(a) &&
tcf_mirred_ifindex(a) == nn->netdev->ifindex)
return NN_ACT_TC_REDIR;
}
return -ENOTSUPP;
}
static int
nfp_net_bpf_offload_prepare(struct nfp_net *nn,
struct tc_cls_bpf_offload *cls_bpf,
struct nfp_bpf_result *res,
void **code, dma_addr_t *dma_addr, u16 max_instr)
{
unsigned int code_sz = max_instr * sizeof(u64);
enum nfp_bpf_action_type act;
u16 start_off, done_off;
unsigned int max_mtu;
int ret;
ret = nfp_net_bpf_get_act(nn, cls_bpf);
if (ret < 0)
return ret;
act = ret;
max_mtu = nn_readb(nn, NFP_NET_CFG_BPF_INL_MTU) * 64 - 32;
if (max_mtu < nn->netdev->mtu) {
nn_info(nn, "BPF offload not supported with MTU larger than HW packet split boundary\n");
return -ENOTSUPP;
}
start_off = nn_readw(nn, NFP_NET_CFG_BPF_START);
done_off = nn_readw(nn, NFP_NET_CFG_BPF_DONE);
*code = dma_zalloc_coherent(&nn->pdev->dev, code_sz, dma_addr,
GFP_KERNEL);
if (!*code)
return -ENOMEM;
ret = nfp_bpf_jit(cls_bpf->prog, *code, act, start_off, done_off,
max_instr, res);
if (ret)
goto out;
return 0;
out:
dma_free_coherent(&nn->pdev->dev, code_sz, *code, *dma_addr);
return ret;
}
static void
nfp_net_bpf_load_and_start(struct nfp_net *nn, u32 tc_flags,
void *code, dma_addr_t dma_addr,
unsigned int code_sz, unsigned int n_instr,
bool dense_mode)
{
u64 bpf_addr = dma_addr;
int err;
nn->bpf_offload_skip_sw = !!(tc_flags & TCA_CLS_FLAGS_SKIP_SW);
if (dense_mode)
bpf_addr |= NFP_NET_CFG_BPF_CFG_8CTX;
nn_writew(nn, NFP_NET_CFG_BPF_SIZE, n_instr);
nn_writeq(nn, NFP_NET_CFG_BPF_ADDR, bpf_addr);
/* Load up the JITed code */
err = nfp_net_reconfig(nn, NFP_NET_CFG_UPDATE_BPF);
if (err)
nn_err(nn, "FW command error while loading BPF: %d\n", err);
/* Enable passing packets through BPF function */
nn->ctrl |= NFP_NET_CFG_CTRL_BPF;
nn_writel(nn, NFP_NET_CFG_CTRL, nn->ctrl);
err = nfp_net_reconfig(nn, NFP_NET_CFG_UPDATE_GEN);
if (err)
nn_err(nn, "FW command error while enabling BPF: %d\n", err);
dma_free_coherent(&nn->pdev->dev, code_sz, code, dma_addr);
nfp_net_bpf_stats_reset(nn);
mod_timer(&nn->rx_filter_stats_timer, jiffies + NFP_NET_STAT_POLL_IVL);
}
static int nfp_net_bpf_stop(struct nfp_net *nn)
{
if (!(nn->ctrl & NFP_NET_CFG_CTRL_BPF))
return 0;
spin_lock_bh(&nn->rx_filter_lock);
nn->ctrl &= ~NFP_NET_CFG_CTRL_BPF;
spin_unlock_bh(&nn->rx_filter_lock);
nn_writel(nn, NFP_NET_CFG_CTRL, nn->ctrl);
del_timer_sync(&nn->rx_filter_stats_timer);
nn->bpf_offload_skip_sw = 0;
return nfp_net_reconfig(nn, NFP_NET_CFG_UPDATE_GEN);
}
int
nfp_net_bpf_offload(struct nfp_net *nn, u32 handle, __be16 proto,
struct tc_cls_bpf_offload *cls_bpf)
{
struct nfp_bpf_result res;
dma_addr_t dma_addr;
u16 max_instr;
void *code;
int err;
max_instr = nn_readw(nn, NFP_NET_CFG_BPF_MAX_LEN);
switch (cls_bpf->command) {
case TC_CLSBPF_REPLACE:
/* There is nothing stopping us from implementing seamless
* replace but the simple method of loading I adopted in
* the firmware does not handle atomic replace (i.e. we have to
* stop the BPF offload and re-enable it). Leaking-in a few
* frames which didn't have BPF applied in the hardware should
* be fine if software fallback is available, though.
*/
if (nn->bpf_offload_skip_sw)
return -EBUSY;
err = nfp_net_bpf_offload_prepare(nn, cls_bpf, &res, &code,
&dma_addr, max_instr);
if (err)
return err;
nfp_net_bpf_stop(nn);
nfp_net_bpf_load_and_start(nn, cls_bpf->gen_flags, code,
dma_addr, max_instr * sizeof(u64),
res.n_instr, res.dense_mode);
return 0;
case TC_CLSBPF_ADD:
if (nn->ctrl & NFP_NET_CFG_CTRL_BPF)
return -EBUSY;
err = nfp_net_bpf_offload_prepare(nn, cls_bpf, &res, &code,
&dma_addr, max_instr);
if (err)
return err;
nfp_net_bpf_load_and_start(nn, cls_bpf->gen_flags, code,
dma_addr, max_instr * sizeof(u64),
res.n_instr, res.dense_mode);
return 0;
case TC_CLSBPF_DESTROY:
return nfp_net_bpf_stop(nn);
case TC_CLSBPF_STATS:
return nfp_net_bpf_stats_update(nn, cls_bpf);
default:
return -ENOTSUPP;
}
}

View File

@ -148,7 +148,7 @@ static int nfp_netvf_pci_probe(struct pci_dev *pdev,
dev_warn(&pdev->dev, "OBSOLETE Firmware detected - VF isolation not available\n");
} else {
switch (fw_ver.major) {
case 1 ... 3:
case 1 ... 4:
if (is_nfp3200) {
stride = 2;
tx_bar_no = NFP_NET_Q0_BAR;

View File

@ -0,0 +1,90 @@
/* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of version 2 of the GNU General Public
* License as published by the Free Software Foundation.
*/
#ifndef _LINUX_BPF_VERIFIER_H
#define _LINUX_BPF_VERIFIER_H 1
#include <linux/bpf.h> /* for enum bpf_reg_type */
#include <linux/filter.h> /* for MAX_BPF_STACK */
struct bpf_reg_state {
enum bpf_reg_type type;
union {
/* valid when type == CONST_IMM | PTR_TO_STACK | UNKNOWN_VALUE */
s64 imm;
/* valid when type == PTR_TO_PACKET* */
struct {
u32 id;
u16 off;
u16 range;
};
/* valid when type == CONST_PTR_TO_MAP | PTR_TO_MAP_VALUE |
* PTR_TO_MAP_VALUE_OR_NULL
*/
struct bpf_map *map_ptr;
};
};
enum bpf_stack_slot_type {
STACK_INVALID, /* nothing was stored in this stack slot */
STACK_SPILL, /* register spilled into stack */
STACK_MISC /* BPF program wrote some data into this slot */
};
#define BPF_REG_SIZE 8 /* size of eBPF register in bytes */
/* state of the program:
* type of all registers and stack info
*/
struct bpf_verifier_state {
struct bpf_reg_state regs[MAX_BPF_REG];
u8 stack_slot_type[MAX_BPF_STACK];
struct bpf_reg_state spilled_regs[MAX_BPF_STACK / BPF_REG_SIZE];
};
/* linked list of verifier states used to prune search */
struct bpf_verifier_state_list {
struct bpf_verifier_state state;
struct bpf_verifier_state_list *next;
};
struct bpf_insn_aux_data {
enum bpf_reg_type ptr_type; /* pointer type for load/store insns */
};
#define MAX_USED_MAPS 64 /* max number of maps accessed by one eBPF program */
struct bpf_verifier_env;
struct bpf_ext_analyzer_ops {
int (*insn_hook)(struct bpf_verifier_env *env,
int insn_idx, int prev_insn_idx);
};
/* single container for all structs
* one verifier_env per bpf_check() call
*/
struct bpf_verifier_env {
struct bpf_prog *prog; /* eBPF program being verified */
struct bpf_verifier_stack_elem *head; /* stack of verifier states to be processed */
int stack_size; /* number of states to be processed */
struct bpf_verifier_state cur_state; /* current verifier state */
struct bpf_verifier_state_list **explored_states; /* search pruning optimization */
const struct bpf_ext_analyzer_ops *analyzer_ops; /* external analyzer ops */
void *analyzer_priv; /* pointer to external analyzer's private data */
struct bpf_map *used_maps[MAX_USED_MAPS]; /* array of map's used by eBPF program */
u32 used_map_cnt; /* number of used maps */
u32 id_gen; /* used to generate unique reg IDs */
bool allow_ptr_leaks;
bool seen_direct_write;
struct bpf_insn_aux_data *insn_aux_data; /* array of per-insn state */
};
int bpf_analyzer(struct bpf_prog *prog, const struct bpf_ext_analyzer_ops *ops,
void *priv);
#endif /* _LINUX_BPF_VERIFIER_H */

View File

@ -789,6 +789,7 @@ enum {
TC_SETUP_CLSU32,
TC_SETUP_CLSFLOWER,
TC_SETUP_MATCHALL,
TC_SETUP_CLSBPF,
};
struct tc_cls_u32_offload;
@ -800,6 +801,7 @@ struct tc_to_netdev {
struct tc_cls_u32_offload *cls_u32;
struct tc_cls_flower_offload *cls_flower;
struct tc_cls_matchall_offload *cls_mall;
struct tc_cls_bpf_offload *cls_bpf;
};
};

View File

@ -486,4 +486,20 @@ struct tc_cls_matchall_offload {
unsigned long cookie;
};
enum tc_clsbpf_command {
TC_CLSBPF_ADD,
TC_CLSBPF_REPLACE,
TC_CLSBPF_DESTROY,
TC_CLSBPF_STATS,
};
struct tc_cls_bpf_offload {
enum tc_clsbpf_command command;
struct tcf_exts *exts;
struct bpf_prog *prog;
const char *name;
bool exts_integrated;
u32 gen_flags;
};
#endif

View File

@ -396,6 +396,7 @@ enum {
TCA_BPF_FD,
TCA_BPF_NAME,
TCA_BPF_FLAGS,
TCA_BPF_FLAGS_GEN,
__TCA_BPF_MAX,
};

View File

@ -14,6 +14,7 @@
#include <linux/types.h>
#include <linux/slab.h>
#include <linux/bpf.h>
#include <linux/bpf_verifier.h>
#include <linux/filter.h>
#include <net/netlink.h>
#include <linux/file.h>
@ -126,77 +127,16 @@
* are set to NOT_INIT to indicate that they are no longer readable.
*/
struct reg_state {
enum bpf_reg_type type;
union {
/* valid when type == CONST_IMM | PTR_TO_STACK | UNKNOWN_VALUE */
s64 imm;
/* valid when type == PTR_TO_PACKET* */
struct {
u32 id;
u16 off;
u16 range;
};
/* valid when type == CONST_PTR_TO_MAP | PTR_TO_MAP_VALUE |
* PTR_TO_MAP_VALUE_OR_NULL
*/
struct bpf_map *map_ptr;
};
};
enum bpf_stack_slot_type {
STACK_INVALID, /* nothing was stored in this stack slot */
STACK_SPILL, /* register spilled into stack */
STACK_MISC /* BPF program wrote some data into this slot */
};
#define BPF_REG_SIZE 8 /* size of eBPF register in bytes */
/* state of the program:
* type of all registers and stack info
*/
struct verifier_state {
struct reg_state regs[MAX_BPF_REG];
u8 stack_slot_type[MAX_BPF_STACK];
struct reg_state spilled_regs[MAX_BPF_STACK / BPF_REG_SIZE];
};
/* linked list of verifier states used to prune search */
struct verifier_state_list {
struct verifier_state state;
struct verifier_state_list *next;
};
/* verifier_state + insn_idx are pushed to stack when branch is encountered */
struct verifier_stack_elem {
struct bpf_verifier_stack_elem {
/* verifer state is 'st'
* before processing instruction 'insn_idx'
* and after processing instruction 'prev_insn_idx'
*/
struct verifier_state st;
struct bpf_verifier_state st;
int insn_idx;
int prev_insn_idx;
struct verifier_stack_elem *next;
};
#define MAX_USED_MAPS 64 /* max number of maps accessed by one eBPF program */
/* single container for all structs
* one verifier_env per bpf_check() call
*/
struct verifier_env {
struct bpf_prog *prog; /* eBPF program being verified */
struct verifier_stack_elem *head; /* stack of verifier states to be processed */
int stack_size; /* number of states to be processed */
struct verifier_state cur_state; /* current verifier state */
struct verifier_state_list **explored_states; /* search pruning optimization */
struct bpf_map *used_maps[MAX_USED_MAPS]; /* array of map's used by eBPF program */
u32 used_map_cnt; /* number of used maps */
u32 id_gen; /* used to generate unique reg IDs */
bool allow_ptr_leaks;
bool seen_direct_write;
struct bpf_verifier_stack_elem *next;
};
#define BPF_COMPLEXITY_LIMIT_INSNS 65536
@ -249,9 +189,9 @@ static const char * const reg_type_str[] = {
[PTR_TO_PACKET_END] = "pkt_end",
};
static void print_verifier_state(struct verifier_state *state)
static void print_verifier_state(struct bpf_verifier_state *state)
{
struct reg_state *reg;
struct bpf_reg_state *reg;
enum bpf_reg_type t;
int i;
@ -427,9 +367,9 @@ static void print_bpf_insn(struct bpf_insn *insn)
}
}
static int pop_stack(struct verifier_env *env, int *prev_insn_idx)
static int pop_stack(struct bpf_verifier_env *env, int *prev_insn_idx)
{
struct verifier_stack_elem *elem;
struct bpf_verifier_stack_elem *elem;
int insn_idx;
if (env->head == NULL)
@ -446,12 +386,12 @@ static int pop_stack(struct verifier_env *env, int *prev_insn_idx)
return insn_idx;
}
static struct verifier_state *push_stack(struct verifier_env *env, int insn_idx,
int prev_insn_idx)
static struct bpf_verifier_state *push_stack(struct bpf_verifier_env *env,
int insn_idx, int prev_insn_idx)
{
struct verifier_stack_elem *elem;
struct bpf_verifier_stack_elem *elem;
elem = kmalloc(sizeof(struct verifier_stack_elem), GFP_KERNEL);
elem = kmalloc(sizeof(struct bpf_verifier_stack_elem), GFP_KERNEL);
if (!elem)
goto err;
@ -477,7 +417,7 @@ static const int caller_saved[CALLER_SAVED_REGS] = {
BPF_REG_0, BPF_REG_1, BPF_REG_2, BPF_REG_3, BPF_REG_4, BPF_REG_5
};
static void init_reg_state(struct reg_state *regs)
static void init_reg_state(struct bpf_reg_state *regs)
{
int i;
@ -493,7 +433,7 @@ static void init_reg_state(struct reg_state *regs)
regs[BPF_REG_1].type = PTR_TO_CTX;
}
static void mark_reg_unknown_value(struct reg_state *regs, u32 regno)
static void mark_reg_unknown_value(struct bpf_reg_state *regs, u32 regno)
{
BUG_ON(regno >= MAX_BPF_REG);
regs[regno].type = UNKNOWN_VALUE;
@ -506,7 +446,7 @@ enum reg_arg_type {
DST_OP_NO_MARK /* same as above, check only, don't mark */
};
static int check_reg_arg(struct reg_state *regs, u32 regno,
static int check_reg_arg(struct bpf_reg_state *regs, u32 regno,
enum reg_arg_type t)
{
if (regno >= MAX_BPF_REG) {
@ -566,8 +506,8 @@ static bool is_spillable_regtype(enum bpf_reg_type type)
/* check_stack_read/write functions track spill/fill of registers,
* stack boundary and alignment are checked in check_mem_access()
*/
static int check_stack_write(struct verifier_state *state, int off, int size,
int value_regno)
static int check_stack_write(struct bpf_verifier_state *state, int off,
int size, int value_regno)
{
int i;
/* caller checked that off % size == 0 and -MAX_BPF_STACK <= off < 0,
@ -592,7 +532,7 @@ static int check_stack_write(struct verifier_state *state, int off, int size,
} else {
/* regular write of data into stack */
state->spilled_regs[(MAX_BPF_STACK + off) / BPF_REG_SIZE] =
(struct reg_state) {};
(struct bpf_reg_state) {};
for (i = 0; i < size; i++)
state->stack_slot_type[MAX_BPF_STACK + off + i] = STACK_MISC;
@ -600,7 +540,7 @@ static int check_stack_write(struct verifier_state *state, int off, int size,
return 0;
}
static int check_stack_read(struct verifier_state *state, int off, int size,
static int check_stack_read(struct bpf_verifier_state *state, int off, int size,
int value_regno)
{
u8 *slot_type;
@ -641,7 +581,7 @@ static int check_stack_read(struct verifier_state *state, int off, int size,
}
/* check read/write into map element returned by bpf_map_lookup_elem() */
static int check_map_access(struct verifier_env *env, u32 regno, int off,
static int check_map_access(struct bpf_verifier_env *env, u32 regno, int off,
int size)
{
struct bpf_map *map = env->cur_state.regs[regno].map_ptr;
@ -656,7 +596,7 @@ static int check_map_access(struct verifier_env *env, u32 regno, int off,
#define MAX_PACKET_OFF 0xffff
static bool may_access_direct_pkt_data(struct verifier_env *env,
static bool may_access_direct_pkt_data(struct bpf_verifier_env *env,
const struct bpf_call_arg_meta *meta)
{
switch (env->prog->type) {
@ -673,11 +613,11 @@ static bool may_access_direct_pkt_data(struct verifier_env *env,
}
}
static int check_packet_access(struct verifier_env *env, u32 regno, int off,
static int check_packet_access(struct bpf_verifier_env *env, u32 regno, int off,
int size)
{
struct reg_state *regs = env->cur_state.regs;
struct reg_state *reg = &regs[regno];
struct bpf_reg_state *regs = env->cur_state.regs;
struct bpf_reg_state *reg = &regs[regno];
off += reg->off;
if (off < 0 || size <= 0 || off + size > reg->range) {
@ -689,9 +629,13 @@ static int check_packet_access(struct verifier_env *env, u32 regno, int off,
}
/* check access to 'struct bpf_context' fields */
static int check_ctx_access(struct verifier_env *env, int off, int size,
static int check_ctx_access(struct bpf_verifier_env *env, int off, int size,
enum bpf_access_type t, enum bpf_reg_type *reg_type)
{
/* for analyzer ctx accesses are already validated and converted */
if (env->analyzer_ops)
return 0;
if (env->prog->aux->ops->is_valid_access &&
env->prog->aux->ops->is_valid_access(off, size, t, reg_type)) {
/* remember the offset of last byte accessed in ctx */
@ -704,7 +648,7 @@ static int check_ctx_access(struct verifier_env *env, int off, int size,
return -EACCES;
}
static bool is_pointer_value(struct verifier_env *env, int regno)
static bool is_pointer_value(struct bpf_verifier_env *env, int regno)
{
if (env->allow_ptr_leaks)
return false;
@ -718,12 +662,13 @@ static bool is_pointer_value(struct verifier_env *env, int regno)
}
}
static int check_ptr_alignment(struct verifier_env *env, struct reg_state *reg,
int off, int size)
static int check_ptr_alignment(struct bpf_verifier_env *env,
struct bpf_reg_state *reg, int off, int size)
{
if (reg->type != PTR_TO_PACKET) {
if (off % size != 0) {
verbose("misaligned access off %d size %d\n", off, size);
verbose("misaligned access off %d size %d\n",
off, size);
return -EACCES;
} else {
return 0;
@ -764,12 +709,12 @@ static int check_ptr_alignment(struct verifier_env *env, struct reg_state *reg,
* if t==write && value_regno==-1, some unknown value is stored into memory
* if t==read && value_regno==-1, don't care what we read from memory
*/
static int check_mem_access(struct verifier_env *env, u32 regno, int off,
static int check_mem_access(struct bpf_verifier_env *env, u32 regno, int off,
int bpf_size, enum bpf_access_type t,
int value_regno)
{
struct verifier_state *state = &env->cur_state;
struct reg_state *reg = &state->regs[regno];
struct bpf_verifier_state *state = &env->cur_state;
struct bpf_reg_state *reg = &state->regs[regno];
int size, err = 0;
if (reg->type == PTR_TO_STACK)
@ -855,9 +800,9 @@ static int check_mem_access(struct verifier_env *env, u32 regno, int off,
return err;
}
static int check_xadd(struct verifier_env *env, struct bpf_insn *insn)
static int check_xadd(struct bpf_verifier_env *env, struct bpf_insn *insn)
{
struct reg_state *regs = env->cur_state.regs;
struct bpf_reg_state *regs = env->cur_state.regs;
int err;
if ((BPF_SIZE(insn->code) != BPF_W && BPF_SIZE(insn->code) != BPF_DW) ||
@ -891,12 +836,12 @@ static int check_xadd(struct verifier_env *env, struct bpf_insn *insn)
* bytes from that pointer, make sure that it's within stack boundary
* and all elements of stack are initialized
*/
static int check_stack_boundary(struct verifier_env *env, int regno,
static int check_stack_boundary(struct bpf_verifier_env *env, int regno,
int access_size, bool zero_size_allowed,
struct bpf_call_arg_meta *meta)
{
struct verifier_state *state = &env->cur_state;
struct reg_state *regs = state->regs;
struct bpf_verifier_state *state = &env->cur_state;
struct bpf_reg_state *regs = state->regs;
int off, i;
if (regs[regno].type != PTR_TO_STACK) {
@ -935,11 +880,11 @@ static int check_stack_boundary(struct verifier_env *env, int regno,
return 0;
}
static int check_func_arg(struct verifier_env *env, u32 regno,
static int check_func_arg(struct bpf_verifier_env *env, u32 regno,
enum bpf_arg_type arg_type,
struct bpf_call_arg_meta *meta)
{
struct reg_state *regs = env->cur_state.regs, *reg = &regs[regno];
struct bpf_reg_state *regs = env->cur_state.regs, *reg = &regs[regno];
enum bpf_reg_type expected_type, type = reg->type;
int err = 0;
@ -1144,10 +1089,10 @@ static int check_raw_mode(const struct bpf_func_proto *fn)
return count > 1 ? -EINVAL : 0;
}
static void clear_all_pkt_pointers(struct verifier_env *env)
static void clear_all_pkt_pointers(struct bpf_verifier_env *env)
{
struct verifier_state *state = &env->cur_state;
struct reg_state *regs = state->regs, *reg;
struct bpf_verifier_state *state = &env->cur_state;
struct bpf_reg_state *regs = state->regs, *reg;
int i;
for (i = 0; i < MAX_BPF_REG; i++)
@ -1167,12 +1112,12 @@ static void clear_all_pkt_pointers(struct verifier_env *env)
}
}
static int check_call(struct verifier_env *env, int func_id)
static int check_call(struct bpf_verifier_env *env, int func_id)
{
struct verifier_state *state = &env->cur_state;
struct bpf_verifier_state *state = &env->cur_state;
const struct bpf_func_proto *fn = NULL;
struct reg_state *regs = state->regs;
struct reg_state *reg;
struct bpf_reg_state *regs = state->regs;
struct bpf_reg_state *reg;
struct bpf_call_arg_meta meta;
bool changes_data;
int i, err;
@ -1275,12 +1220,13 @@ static int check_call(struct verifier_env *env, int func_id)
return 0;
}
static int check_packet_ptr_add(struct verifier_env *env, struct bpf_insn *insn)
static int check_packet_ptr_add(struct bpf_verifier_env *env,
struct bpf_insn *insn)
{
struct reg_state *regs = env->cur_state.regs;
struct reg_state *dst_reg = &regs[insn->dst_reg];
struct reg_state *src_reg = &regs[insn->src_reg];
struct reg_state tmp_reg;
struct bpf_reg_state *regs = env->cur_state.regs;
struct bpf_reg_state *dst_reg = &regs[insn->dst_reg];
struct bpf_reg_state *src_reg = &regs[insn->src_reg];
struct bpf_reg_state tmp_reg;
s32 imm;
if (BPF_SRC(insn->code) == BPF_K) {
@ -1348,10 +1294,10 @@ static int check_packet_ptr_add(struct verifier_env *env, struct bpf_insn *insn)
return 0;
}
static int evaluate_reg_alu(struct verifier_env *env, struct bpf_insn *insn)
static int evaluate_reg_alu(struct bpf_verifier_env *env, struct bpf_insn *insn)
{
struct reg_state *regs = env->cur_state.regs;
struct reg_state *dst_reg = &regs[insn->dst_reg];
struct bpf_reg_state *regs = env->cur_state.regs;
struct bpf_reg_state *dst_reg = &regs[insn->dst_reg];
u8 opcode = BPF_OP(insn->code);
s64 imm_log2;
@ -1361,7 +1307,7 @@ static int evaluate_reg_alu(struct verifier_env *env, struct bpf_insn *insn)
*/
if (BPF_SRC(insn->code) == BPF_X) {
struct reg_state *src_reg = &regs[insn->src_reg];
struct bpf_reg_state *src_reg = &regs[insn->src_reg];
if (src_reg->type == UNKNOWN_VALUE && src_reg->imm > 0 &&
dst_reg->imm && opcode == BPF_ADD) {
@ -1450,11 +1396,12 @@ static int evaluate_reg_alu(struct verifier_env *env, struct bpf_insn *insn)
return 0;
}
static int evaluate_reg_imm_alu(struct verifier_env *env, struct bpf_insn *insn)
static int evaluate_reg_imm_alu(struct bpf_verifier_env *env,
struct bpf_insn *insn)
{
struct reg_state *regs = env->cur_state.regs;
struct reg_state *dst_reg = &regs[insn->dst_reg];
struct reg_state *src_reg = &regs[insn->src_reg];
struct bpf_reg_state *regs = env->cur_state.regs;
struct bpf_reg_state *dst_reg = &regs[insn->dst_reg];
struct bpf_reg_state *src_reg = &regs[insn->src_reg];
u8 opcode = BPF_OP(insn->code);
/* dst_reg->type == CONST_IMM here, simulate execution of 'add' insn.
@ -1471,9 +1418,9 @@ static int evaluate_reg_imm_alu(struct verifier_env *env, struct bpf_insn *insn)
}
/* check validity of 32-bit and 64-bit arithmetic operations */
static int check_alu_op(struct verifier_env *env, struct bpf_insn *insn)
static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn)
{
struct reg_state *regs = env->cur_state.regs, *dst_reg;
struct bpf_reg_state *regs = env->cur_state.regs, *dst_reg;
u8 opcode = BPF_OP(insn->code);
int err;
@ -1647,10 +1594,10 @@ static int check_alu_op(struct verifier_env *env, struct bpf_insn *insn)
return 0;
}
static void find_good_pkt_pointers(struct verifier_state *state,
const struct reg_state *dst_reg)
static void find_good_pkt_pointers(struct bpf_verifier_state *state,
struct bpf_reg_state *dst_reg)
{
struct reg_state *regs = state->regs, *reg;
struct bpf_reg_state *regs = state->regs, *reg;
int i;
/* LLVM can generate two kind of checks:
@ -1696,11 +1643,11 @@ static void find_good_pkt_pointers(struct verifier_state *state,
}
}
static int check_cond_jmp_op(struct verifier_env *env,
static int check_cond_jmp_op(struct bpf_verifier_env *env,
struct bpf_insn *insn, int *insn_idx)
{
struct verifier_state *other_branch, *this_branch = &env->cur_state;
struct reg_state *regs = this_branch->regs, *dst_reg;
struct bpf_verifier_state *other_branch, *this_branch = &env->cur_state;
struct bpf_reg_state *regs = this_branch->regs, *dst_reg;
u8 opcode = BPF_OP(insn->code);
int err;
@ -1762,7 +1709,7 @@ static int check_cond_jmp_op(struct verifier_env *env,
if (!other_branch)
return -EFAULT;
/* detect if R == 0 where R is returned value from bpf_map_lookup_elem() */
/* detect if R == 0 where R is returned from bpf_map_lookup_elem() */
if (BPF_SRC(insn->code) == BPF_K &&
insn->imm == 0 && (opcode == BPF_JEQ || opcode == BPF_JNE) &&
dst_reg->type == PTR_TO_MAP_VALUE_OR_NULL) {
@ -1804,9 +1751,9 @@ static struct bpf_map *ld_imm64_to_map_ptr(struct bpf_insn *insn)
}
/* verify BPF_LD_IMM64 instruction */
static int check_ld_imm(struct verifier_env *env, struct bpf_insn *insn)
static int check_ld_imm(struct bpf_verifier_env *env, struct bpf_insn *insn)
{
struct reg_state *regs = env->cur_state.regs;
struct bpf_reg_state *regs = env->cur_state.regs;
int err;
if (BPF_SIZE(insn->code) != BPF_DW) {
@ -1822,9 +1769,19 @@ static int check_ld_imm(struct verifier_env *env, struct bpf_insn *insn)
if (err)
return err;
if (insn->src_reg == 0)
/* generic move 64-bit immediate into a register */
if (insn->src_reg == 0) {
/* generic move 64-bit immediate into a register,
* only analyzer needs to collect the ld_imm value.
*/
u64 imm = ((u64)(insn + 1)->imm << 32) | (u32)insn->imm;
if (!env->analyzer_ops)
return 0;
regs[insn->dst_reg].type = CONST_IMM;
regs[insn->dst_reg].imm = imm;
return 0;
}
/* replace_map_fd_with_map_ptr() should have caught bad ld_imm64 */
BUG_ON(insn->src_reg != BPF_PSEUDO_MAP_FD);
@ -1861,11 +1818,11 @@ static bool may_access_skb(enum bpf_prog_type type)
* Output:
* R0 - 8/16/32-bit skb data converted to cpu endianness
*/
static int check_ld_abs(struct verifier_env *env, struct bpf_insn *insn)
static int check_ld_abs(struct bpf_verifier_env *env, struct bpf_insn *insn)
{
struct reg_state *regs = env->cur_state.regs;
struct bpf_reg_state *regs = env->cur_state.regs;
u8 mode = BPF_MODE(insn->code);
struct reg_state *reg;
struct bpf_reg_state *reg;
int i, err;
if (!may_access_skb(env->prog->type)) {
@ -1951,7 +1908,7 @@ enum {
BRANCH = 2,
};
#define STATE_LIST_MARK ((struct verifier_state_list *) -1L)
#define STATE_LIST_MARK ((struct bpf_verifier_state_list *) -1L)
static int *insn_stack; /* stack of insns to process */
static int cur_stack; /* current stack index */
@ -1962,7 +1919,7 @@ static int *insn_state;
* w - next instruction
* e - edge
*/
static int push_insn(int t, int w, int e, struct verifier_env *env)
static int push_insn(int t, int w, int e, struct bpf_verifier_env *env)
{
if (e == FALLTHROUGH && insn_state[t] >= (DISCOVERED | FALLTHROUGH))
return 0;
@ -2003,7 +1960,7 @@ static int push_insn(int t, int w, int e, struct verifier_env *env)
/* non-recursive depth-first-search to detect loops in BPF program
* loop == back-edge in directed graph
*/
static int check_cfg(struct verifier_env *env)
static int check_cfg(struct bpf_verifier_env *env)
{
struct bpf_insn *insns = env->prog->insnsi;
int insn_cnt = env->prog->len;
@ -2112,7 +2069,8 @@ static int check_cfg(struct verifier_env *env)
/* the following conditions reduce the number of explored insns
* from ~140k to ~80k for ultra large programs that use a lot of ptr_to_packet
*/
static bool compare_ptrs_to_packet(struct reg_state *old, struct reg_state *cur)
static bool compare_ptrs_to_packet(struct bpf_reg_state *old,
struct bpf_reg_state *cur)
{
if (old->id != cur->id)
return false;
@ -2187,9 +2145,10 @@ static bool compare_ptrs_to_packet(struct reg_state *old, struct reg_state *cur)
* whereas register type in current state is meaningful, it means that
* the current state will reach 'bpf_exit' instruction safely
*/
static bool states_equal(struct verifier_state *old, struct verifier_state *cur)
static bool states_equal(struct bpf_verifier_state *old,
struct bpf_verifier_state *cur)
{
struct reg_state *rold, *rcur;
struct bpf_reg_state *rold, *rcur;
int i;
for (i = 0; i < MAX_BPF_REG; i++) {
@ -2229,9 +2188,9 @@ static bool states_equal(struct verifier_state *old, struct verifier_state *cur)
* the same, check that stored pointers types
* are the same as well.
* Ex: explored safe path could have stored
* (struct reg_state) {.type = PTR_TO_STACK, .imm = -8}
* (bpf_reg_state) {.type = PTR_TO_STACK, .imm = -8}
* but current path has stored:
* (struct reg_state) {.type = PTR_TO_STACK, .imm = -16}
* (bpf_reg_state) {.type = PTR_TO_STACK, .imm = -16}
* such verifier states are not equivalent.
* return false to continue verification of this path
*/
@ -2242,10 +2201,10 @@ static bool states_equal(struct verifier_state *old, struct verifier_state *cur)
return true;
}
static int is_state_visited(struct verifier_env *env, int insn_idx)
static int is_state_visited(struct bpf_verifier_env *env, int insn_idx)
{
struct verifier_state_list *new_sl;
struct verifier_state_list *sl;
struct bpf_verifier_state_list *new_sl;
struct bpf_verifier_state_list *sl;
sl = env->explored_states[insn_idx];
if (!sl)
@ -2269,7 +2228,7 @@ static int is_state_visited(struct verifier_env *env, int insn_idx)
* it will be rejected. Since there are no loops, we won't be
* seeing this 'insn_idx' instruction again on the way to bpf_exit
*/
new_sl = kmalloc(sizeof(struct verifier_state_list), GFP_USER);
new_sl = kmalloc(sizeof(struct bpf_verifier_state_list), GFP_USER);
if (!new_sl)
return -ENOMEM;
@ -2280,11 +2239,20 @@ static int is_state_visited(struct verifier_env *env, int insn_idx)
return 0;
}
static int do_check(struct verifier_env *env)
static int ext_analyzer_insn_hook(struct bpf_verifier_env *env,
int insn_idx, int prev_insn_idx)
{
struct verifier_state *state = &env->cur_state;
if (!env->analyzer_ops || !env->analyzer_ops->insn_hook)
return 0;
return env->analyzer_ops->insn_hook(env, insn_idx, prev_insn_idx);
}
static int do_check(struct bpf_verifier_env *env)
{
struct bpf_verifier_state *state = &env->cur_state;
struct bpf_insn *insns = env->prog->insnsi;
struct reg_state *regs = state->regs;
struct bpf_reg_state *regs = state->regs;
int insn_cnt = env->prog->len;
int insn_idx, prev_insn_idx = 0;
int insn_processed = 0;
@ -2338,13 +2306,17 @@ static int do_check(struct verifier_env *env)
print_bpf_insn(insn);
}
err = ext_analyzer_insn_hook(env, insn_idx, prev_insn_idx);
if (err)
return err;
if (class == BPF_ALU || class == BPF_ALU64) {
err = check_alu_op(env, insn);
if (err)
return err;
} else if (class == BPF_LDX) {
enum bpf_reg_type src_reg_type;
enum bpf_reg_type *prev_src_type, src_reg_type;
/* check for reserved fields is already done */
@ -2374,16 +2346,18 @@ static int do_check(struct verifier_env *env)
continue;
}
if (insn->imm == 0) {
prev_src_type = &env->insn_aux_data[insn_idx].ptr_type;
if (*prev_src_type == NOT_INIT) {
/* saw a valid insn
* dst_reg = *(u32 *)(src_reg + off)
* use reserved 'imm' field to mark this insn
* save type to validate intersecting paths
*/
insn->imm = src_reg_type;
*prev_src_type = src_reg_type;
} else if (src_reg_type != insn->imm &&
} else if (src_reg_type != *prev_src_type &&
(src_reg_type == PTR_TO_CTX ||
insn->imm == PTR_TO_CTX)) {
*prev_src_type == PTR_TO_CTX)) {
/* ABuser program is trying to use the same insn
* dst_reg = *(u32*) (src_reg + off)
* with different pointer types:
@ -2396,7 +2370,7 @@ static int do_check(struct verifier_env *env)
}
} else if (class == BPF_STX) {
enum bpf_reg_type dst_reg_type;
enum bpf_reg_type *prev_dst_type, dst_reg_type;
if (BPF_MODE(insn->code) == BPF_XADD) {
err = check_xadd(env, insn);
@ -2424,11 +2398,13 @@ static int do_check(struct verifier_env *env)
if (err)
return err;
if (insn->imm == 0) {
insn->imm = dst_reg_type;
} else if (dst_reg_type != insn->imm &&
prev_dst_type = &env->insn_aux_data[insn_idx].ptr_type;
if (*prev_dst_type == NOT_INIT) {
*prev_dst_type = dst_reg_type;
} else if (dst_reg_type != *prev_dst_type &&
(dst_reg_type == PTR_TO_CTX ||
insn->imm == PTR_TO_CTX)) {
*prev_dst_type == PTR_TO_CTX)) {
verbose("same insn cannot be used with different pointers\n");
return -EINVAL;
}
@ -2563,7 +2539,7 @@ static int check_map_prog_compatibility(struct bpf_map *map,
/* look for pseudo eBPF instructions that access map FDs and
* replace them with actual map pointers
*/
static int replace_map_fd_with_map_ptr(struct verifier_env *env)
static int replace_map_fd_with_map_ptr(struct bpf_verifier_env *env)
{
struct bpf_insn *insn = env->prog->insnsi;
int insn_cnt = env->prog->len;
@ -2660,7 +2636,7 @@ static int replace_map_fd_with_map_ptr(struct verifier_env *env)
}
/* drop refcnt of maps used by the rejected program */
static void release_maps(struct verifier_env *env)
static void release_maps(struct bpf_verifier_env *env)
{
int i;
@ -2669,7 +2645,7 @@ static void release_maps(struct verifier_env *env)
}
/* convert pseudo BPF_LD_IMM64 into generic BPF_LD_IMM64 */
static void convert_pseudo_ld_imm64(struct verifier_env *env)
static void convert_pseudo_ld_imm64(struct bpf_verifier_env *env)
{
struct bpf_insn *insn = env->prog->insnsi;
int insn_cnt = env->prog->len;
@ -2683,13 +2659,14 @@ static void convert_pseudo_ld_imm64(struct verifier_env *env)
/* convert load instructions that access fields of 'struct __sk_buff'
* into sequence of instructions that access fields of 'struct sk_buff'
*/
static int convert_ctx_accesses(struct verifier_env *env)
static int convert_ctx_accesses(struct bpf_verifier_env *env)
{
const struct bpf_verifier_ops *ops = env->prog->aux->ops;
const int insn_cnt = env->prog->len;
struct bpf_insn insn_buf[16], *insn;
struct bpf_prog *new_prog;
enum bpf_access_type type;
int i, insn_cnt, cnt;
int i, cnt, delta = 0;
if (ops->gen_prologue) {
cnt = ops->gen_prologue(insn_buf, env->seen_direct_write,
@ -2703,18 +2680,16 @@ static int convert_ctx_accesses(struct verifier_env *env)
if (!new_prog)
return -ENOMEM;
env->prog = new_prog;
delta += cnt - 1;
}
}
if (!ops->convert_ctx_access)
return 0;
insn_cnt = env->prog->len;
insn = env->prog->insnsi;
insn = env->prog->insnsi + delta;
for (i = 0; i < insn_cnt; i++, insn++) {
u32 insn_delta;
if (insn->code == (BPF_LDX | BPF_MEM | BPF_W) ||
insn->code == (BPF_LDX | BPF_MEM | BPF_DW))
type = BPF_READ;
@ -2724,11 +2699,8 @@ static int convert_ctx_accesses(struct verifier_env *env)
else
continue;
if (insn->imm != PTR_TO_CTX) {
/* clear internal mark */
insn->imm = 0;
if (env->insn_aux_data[i].ptr_type != PTR_TO_CTX)
continue;
}
cnt = ops->convert_ctx_access(type, insn->dst_reg, insn->src_reg,
insn->off, insn_buf, env->prog);
@ -2737,26 +2709,24 @@ static int convert_ctx_accesses(struct verifier_env *env)
return -EINVAL;
}
new_prog = bpf_patch_insn_single(env->prog, i, insn_buf, cnt);
new_prog = bpf_patch_insn_single(env->prog, i + delta, insn_buf,
cnt);
if (!new_prog)
return -ENOMEM;
insn_delta = cnt - 1;
delta += cnt - 1;
/* keep walking new program and skip insns we just inserted */
env->prog = new_prog;
insn = new_prog->insnsi + i + insn_delta;
insn_cnt += insn_delta;
i += insn_delta;
insn = new_prog->insnsi + i + delta;
}
return 0;
}
static void free_states(struct verifier_env *env)
static void free_states(struct bpf_verifier_env *env)
{
struct verifier_state_list *sl, *sln;
struct bpf_verifier_state_list *sl, *sln;
int i;
if (!env->explored_states)
@ -2779,19 +2749,24 @@ static void free_states(struct verifier_env *env)
int bpf_check(struct bpf_prog **prog, union bpf_attr *attr)
{
char __user *log_ubuf = NULL;
struct verifier_env *env;
struct bpf_verifier_env *env;
int ret = -EINVAL;
if ((*prog)->len <= 0 || (*prog)->len > BPF_MAXINSNS)
return -E2BIG;
/* 'struct verifier_env' can be global, but since it's not small,
/* 'struct bpf_verifier_env' can be global, but since it's not small,
* allocate/free it every time bpf_check() is called
*/
env = kzalloc(sizeof(struct verifier_env), GFP_KERNEL);
env = kzalloc(sizeof(struct bpf_verifier_env), GFP_KERNEL);
if (!env)
return -ENOMEM;
env->insn_aux_data = vzalloc(sizeof(struct bpf_insn_aux_data) *
(*prog)->len);
ret = -ENOMEM;
if (!env->insn_aux_data)
goto err_free_env;
env->prog = *prog;
/* grab the mutex to protect few globals used by verifier */
@ -2810,12 +2785,12 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr)
/* log_* values have to be sane */
if (log_size < 128 || log_size > UINT_MAX >> 8 ||
log_level == 0 || log_ubuf == NULL)
goto free_env;
goto err_unlock;
ret = -ENOMEM;
log_buf = vmalloc(log_size);
if (!log_buf)
goto free_env;
goto err_unlock;
} else {
log_level = 0;
}
@ -2825,7 +2800,7 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr)
goto skip_full_check;
env->explored_states = kcalloc(env->prog->len,
sizeof(struct verifier_state_list *),
sizeof(struct bpf_verifier_state_list *),
GFP_USER);
ret = -ENOMEM;
if (!env->explored_states)
@ -2884,14 +2859,67 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr)
free_log_buf:
if (log_level)
vfree(log_buf);
free_env:
if (!env->prog->aux->used_maps)
/* if we didn't copy map pointers into bpf_prog_info, release
* them now. Otherwise free_bpf_prog_info() will release them.
*/
release_maps(env);
*prog = env->prog;
kfree(env);
err_unlock:
mutex_unlock(&bpf_verifier_lock);
vfree(env->insn_aux_data);
err_free_env:
kfree(env);
return ret;
}
int bpf_analyzer(struct bpf_prog *prog, const struct bpf_ext_analyzer_ops *ops,
void *priv)
{
struct bpf_verifier_env *env;
int ret;
env = kzalloc(sizeof(struct bpf_verifier_env), GFP_KERNEL);
if (!env)
return -ENOMEM;
env->insn_aux_data = vzalloc(sizeof(struct bpf_insn_aux_data) *
prog->len);
ret = -ENOMEM;
if (!env->insn_aux_data)
goto err_free_env;
env->prog = prog;
env->analyzer_ops = ops;
env->analyzer_priv = priv;
/* grab the mutex to protect few globals used by verifier */
mutex_lock(&bpf_verifier_lock);
log_level = 0;
env->explored_states = kcalloc(env->prog->len,
sizeof(struct bpf_verifier_state_list *),
GFP_KERNEL);
ret = -ENOMEM;
if (!env->explored_states)
goto skip_full_check;
ret = check_cfg(env);
if (ret < 0)
goto skip_full_check;
env->allow_ptr_leaks = capable(CAP_SYS_ADMIN);
ret = do_check(env);
skip_full_check:
while (pop_stack(env, NULL) >= 0);
free_states(env);
mutex_unlock(&bpf_verifier_lock);
vfree(env->insn_aux_data);
err_free_env:
kfree(env);
return ret;
}
EXPORT_SYMBOL_GPL(bpf_analyzer);

View File

@ -204,6 +204,13 @@ static int tcf_mirred(struct sk_buff *skb, const struct tc_action *a,
return retval;
}
static void tcf_stats_update(struct tc_action *a, u64 bytes, u32 packets,
u64 lastuse)
{
tcf_lastuse_update(&a->tcfa_tm);
_bstats_cpu_update(this_cpu_ptr(a->cpu_bstats), bytes, packets);
}
static int tcf_mirred_dump(struct sk_buff *skb, struct tc_action *a, int bind,
int ref)
{
@ -281,6 +288,7 @@ static struct tc_action_ops act_mirred_ops = {
.type = TCA_ACT_MIRRED,
.owner = THIS_MODULE,
.act = tcf_mirred,
.stats_update = tcf_stats_update,
.dump = tcf_mirred_dump,
.cleanup = tcf_mirred_release,
.init = tcf_mirred_init,

View File

@ -27,6 +27,8 @@ MODULE_AUTHOR("Daniel Borkmann <dborkman@redhat.com>");
MODULE_DESCRIPTION("TC BPF based classifier");
#define CLS_BPF_NAME_LEN 256
#define CLS_BPF_SUPPORTED_GEN_FLAGS \
(TCA_CLS_FLAGS_SKIP_HW | TCA_CLS_FLAGS_SKIP_SW)
struct cls_bpf_head {
struct list_head plist;
@ -39,6 +41,8 @@ struct cls_bpf_prog {
struct list_head link;
struct tcf_result res;
bool exts_integrated;
bool offloaded;
u32 gen_flags;
struct tcf_exts exts;
u32 handle;
union {
@ -54,6 +58,7 @@ struct cls_bpf_prog {
static const struct nla_policy bpf_policy[TCA_BPF_MAX + 1] = {
[TCA_BPF_CLASSID] = { .type = NLA_U32 },
[TCA_BPF_FLAGS] = { .type = NLA_U32 },
[TCA_BPF_FLAGS_GEN] = { .type = NLA_U32 },
[TCA_BPF_FD] = { .type = NLA_U32 },
[TCA_BPF_NAME] = { .type = NLA_NUL_STRING,
.len = CLS_BPF_NAME_LEN },
@ -91,7 +96,9 @@ static int cls_bpf_classify(struct sk_buff *skb, const struct tcf_proto *tp,
qdisc_skb_cb(skb)->tc_classid = prog->res.classid;
if (at_ingress) {
if (tc_skip_sw(prog->gen_flags)) {
filter_res = prog->exts_integrated ? TC_ACT_UNSPEC : 0;
} else if (at_ingress) {
/* It is safe to push/pull even if skb_shared() */
__skb_push(skb, skb->mac_len);
bpf_compute_data_end(skb);
@ -138,6 +145,91 @@ static bool cls_bpf_is_ebpf(const struct cls_bpf_prog *prog)
return !prog->bpf_ops;
}
static int cls_bpf_offload_cmd(struct tcf_proto *tp, struct cls_bpf_prog *prog,
enum tc_clsbpf_command cmd)
{
struct net_device *dev = tp->q->dev_queue->dev;
struct tc_cls_bpf_offload bpf_offload = {};
struct tc_to_netdev offload;
offload.type = TC_SETUP_CLSBPF;
offload.cls_bpf = &bpf_offload;
bpf_offload.command = cmd;
bpf_offload.exts = &prog->exts;
bpf_offload.prog = prog->filter;
bpf_offload.name = prog->bpf_name;
bpf_offload.exts_integrated = prog->exts_integrated;
bpf_offload.gen_flags = prog->gen_flags;
return dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle,
tp->protocol, &offload);
}
static int cls_bpf_offload(struct tcf_proto *tp, struct cls_bpf_prog *prog,
struct cls_bpf_prog *oldprog)
{
struct net_device *dev = tp->q->dev_queue->dev;
struct cls_bpf_prog *obj = prog;
enum tc_clsbpf_command cmd;
bool skip_sw;
int ret;
skip_sw = tc_skip_sw(prog->gen_flags) ||
(oldprog && tc_skip_sw(oldprog->gen_flags));
if (oldprog && oldprog->offloaded) {
if (tc_should_offload(dev, tp, prog->gen_flags)) {
cmd = TC_CLSBPF_REPLACE;
} else if (!tc_skip_sw(prog->gen_flags)) {
obj = oldprog;
cmd = TC_CLSBPF_DESTROY;
} else {
return -EINVAL;
}
} else {
if (!tc_should_offload(dev, tp, prog->gen_flags))
return skip_sw ? -EINVAL : 0;
cmd = TC_CLSBPF_ADD;
}
ret = cls_bpf_offload_cmd(tp, obj, cmd);
if (ret)
return skip_sw ? ret : 0;
obj->offloaded = true;
if (oldprog)
oldprog->offloaded = false;
return 0;
}
static void cls_bpf_stop_offload(struct tcf_proto *tp,
struct cls_bpf_prog *prog)
{
int err;
if (!prog->offloaded)
return;
err = cls_bpf_offload_cmd(tp, prog, TC_CLSBPF_DESTROY);
if (err) {
pr_err("Stopping hardware offload failed: %d\n", err);
return;
}
prog->offloaded = false;
}
static void cls_bpf_offload_update_stats(struct tcf_proto *tp,
struct cls_bpf_prog *prog)
{
if (!prog->offloaded)
return;
cls_bpf_offload_cmd(tp, prog, TC_CLSBPF_STATS);
}
static int cls_bpf_init(struct tcf_proto *tp)
{
struct cls_bpf_head *head;
@ -177,6 +269,7 @@ static int cls_bpf_delete(struct tcf_proto *tp, unsigned long arg)
{
struct cls_bpf_prog *prog = (struct cls_bpf_prog *) arg;
cls_bpf_stop_offload(tp, prog);
list_del_rcu(&prog->link);
tcf_unbind_filter(tp, &prog->res);
call_rcu(&prog->rcu, __cls_bpf_delete_prog);
@ -193,6 +286,7 @@ static bool cls_bpf_destroy(struct tcf_proto *tp, bool force)
return false;
list_for_each_entry_safe(prog, tmp, &head->plist, link) {
cls_bpf_stop_offload(tp, prog);
list_del_rcu(&prog->link);
tcf_unbind_filter(tp, &prog->res);
call_rcu(&prog->rcu, __cls_bpf_delete_prog);
@ -302,6 +396,7 @@ static int cls_bpf_modify_existing(struct net *net, struct tcf_proto *tp,
{
bool is_bpf, is_ebpf, have_exts = false;
struct tcf_exts exts;
u32 gen_flags = 0;
int ret;
is_bpf = tb[TCA_BPF_OPS_LEN] && tb[TCA_BPF_OPS];
@ -326,8 +421,17 @@ static int cls_bpf_modify_existing(struct net *net, struct tcf_proto *tp,
have_exts = bpf_flags & TCA_BPF_FLAG_ACT_DIRECT;
}
if (tb[TCA_BPF_FLAGS_GEN]) {
gen_flags = nla_get_u32(tb[TCA_BPF_FLAGS_GEN]);
if (gen_flags & ~CLS_BPF_SUPPORTED_GEN_FLAGS ||
!tc_flags_valid(gen_flags)) {
ret = -EINVAL;
goto errout;
}
}
prog->exts_integrated = have_exts;
prog->gen_flags = gen_flags;
ret = is_bpf ? cls_bpf_prog_from_ops(tb, prog) :
cls_bpf_prog_from_efd(tb, prog, tp);
@ -415,6 +519,12 @@ static int cls_bpf_change(struct net *net, struct sk_buff *in_skb,
if (ret < 0)
goto errout;
ret = cls_bpf_offload(tp, prog, oldprog);
if (ret) {
cls_bpf_delete_prog(tp, prog);
return ret;
}
if (oldprog) {
list_replace_rcu(&oldprog->link, &prog->link);
tcf_unbind_filter(tp, &oldprog->res);
@ -476,6 +586,8 @@ static int cls_bpf_dump(struct net *net, struct tcf_proto *tp, unsigned long fh,
tm->tcm_handle = prog->handle;
cls_bpf_offload_update_stats(tp, prog);
nest = nla_nest_start(skb, TCA_OPTIONS);
if (nest == NULL)
goto nla_put_failure;
@ -498,6 +610,9 @@ static int cls_bpf_dump(struct net *net, struct tcf_proto *tp, unsigned long fh,
bpf_flags |= TCA_BPF_FLAG_ACT_DIRECT;
if (bpf_flags && nla_put_u32(skb, TCA_BPF_FLAGS, bpf_flags))
goto nla_put_failure;
if (prog->gen_flags &&
nla_put_u32(skb, TCA_BPF_FLAGS_GEN, prog->gen_flags))
goto nla_put_failure;
nla_nest_end(skb, nest);