2017-11-01 21:08:43 +07:00
|
|
|
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
|
2005-04-17 05:20:36 +07:00
|
|
|
#ifndef __LINUX_PKT_CLS_H
|
|
|
|
#define __LINUX_PKT_CLS_H
|
|
|
|
|
2009-01-30 23:35:32 +07:00
|
|
|
#include <linux/types.h>
|
2005-04-17 05:20:36 +07:00
|
|
|
#include <linux/pkt_sched.h>
|
|
|
|
|
2017-01-24 19:02:41 +07:00
|
|
|
#define TC_COOKIE_MAX_SIZE 16
|
|
|
|
|
2005-04-17 05:20:36 +07:00
|
|
|
/* Action attributes */
|
2009-11-05 00:50:58 +07:00
|
|
|
enum {
|
2005-04-17 05:20:36 +07:00
|
|
|
TCA_ACT_UNSPEC,
|
|
|
|
TCA_ACT_KIND,
|
|
|
|
TCA_ACT_OPTIONS,
|
|
|
|
TCA_ACT_INDEX,
|
|
|
|
TCA_ACT_STATS,
|
2016-04-26 15:06:18 +07:00
|
|
|
TCA_ACT_PAD,
|
2017-01-24 19:02:41 +07:00
|
|
|
TCA_ACT_COOKIE,
|
2019-10-30 21:09:05 +07:00
|
|
|
TCA_ACT_FLAGS,
|
2020-03-20 06:26:23 +07:00
|
|
|
TCA_ACT_HW_STATS,
|
2020-03-28 22:37:43 +07:00
|
|
|
TCA_ACT_USED_HW_STATS,
|
2005-04-17 05:20:36 +07:00
|
|
|
__TCA_ACT_MAX
|
|
|
|
};
|
|
|
|
|
2019-10-30 21:09:05 +07:00
|
|
|
#define TCA_ACT_FLAGS_NO_PERCPU_STATS 1 /* Don't use percpu allocator for
|
|
|
|
* actions stats.
|
|
|
|
*/
|
|
|
|
|
2020-03-07 18:40:20 +07:00
|
|
|
/* tca HW stats type
|
|
|
|
* When user does not pass the attribute, he does not care.
|
|
|
|
* It is the same as if he would pass the attribute with
|
|
|
|
* all supported bits set.
|
|
|
|
* In case no bits are set, user is not interested in getting any HW statistics.
|
|
|
|
*/
|
2020-03-20 06:26:23 +07:00
|
|
|
#define TCA_ACT_HW_STATS_IMMEDIATE (1 << 0) /* Means that in dump, user
|
|
|
|
* gets the current HW stats
|
|
|
|
* state from the device
|
|
|
|
* queried at the dump time.
|
|
|
|
*/
|
|
|
|
#define TCA_ACT_HW_STATS_DELAYED (1 << 1) /* Means that in dump, user gets
|
|
|
|
* HW stats that might be out of date
|
|
|
|
* for some time, maybe couple of
|
|
|
|
* seconds. This is the case when
|
|
|
|
* driver polls stats updates
|
|
|
|
* periodically or when it gets async
|
|
|
|
* stats update from the device.
|
|
|
|
*/
|
2020-03-07 18:40:20 +07:00
|
|
|
|
2005-04-17 05:20:36 +07:00
|
|
|
#define TCA_ACT_MAX __TCA_ACT_MAX
|
|
|
|
#define TCA_OLD_COMPAT (TCA_ACT_MAX+1)
|
|
|
|
#define TCA_ACT_MAX_PRIO 32
|
|
|
|
#define TCA_ACT_BIND 1
|
|
|
|
#define TCA_ACT_NOBIND 0
|
|
|
|
#define TCA_ACT_UNBIND 1
|
|
|
|
#define TCA_ACT_NOUNBIND 0
|
|
|
|
#define TCA_ACT_REPLACE 1
|
|
|
|
#define TCA_ACT_NOREPLACE 0
|
|
|
|
|
|
|
|
#define TC_ACT_UNSPEC (-1)
|
|
|
|
#define TC_ACT_OK 0
|
|
|
|
#define TC_ACT_RECLASSIFY 1
|
|
|
|
#define TC_ACT_SHOT 2
|
|
|
|
#define TC_ACT_PIPE 3
|
|
|
|
#define TC_ACT_STOLEN 4
|
|
|
|
#define TC_ACT_QUEUED 5
|
|
|
|
#define TC_ACT_REPEAT 6
|
2015-09-16 13:05:43 +07:00
|
|
|
#define TC_ACT_REDIRECT 7
|
2017-06-06 19:12:02 +07:00
|
|
|
#define TC_ACT_TRAP 8 /* For hw path, this means "trap to cpu"
|
|
|
|
* and don't further process the frame
|
|
|
|
* in hardware. For sw path, this is
|
|
|
|
* equivalent of TC_ACT_STOLEN - drop
|
|
|
|
* the skb and act like everything
|
|
|
|
* is alright.
|
|
|
|
*/
|
2018-07-30 19:30:42 +07:00
|
|
|
#define TC_ACT_VALUE_MAX TC_ACT_TRAP
|
2017-05-02 15:12:00 +07:00
|
|
|
|
|
|
|
/* There is a special kind of actions called "extended actions",
|
|
|
|
* which need a value parameter. These have a local opcode located in
|
|
|
|
* the highest nibble, starting from 1. The rest of the bits
|
|
|
|
* are used to carry the value. These two parts together make
|
|
|
|
* a combined opcode.
|
|
|
|
*/
|
|
|
|
#define __TC_ACT_EXT_SHIFT 28
|
|
|
|
#define __TC_ACT_EXT(local) ((local) << __TC_ACT_EXT_SHIFT)
|
|
|
|
#define TC_ACT_EXT_VAL_MASK ((1 << __TC_ACT_EXT_SHIFT) - 1)
|
2018-07-30 19:30:42 +07:00
|
|
|
#define TC_ACT_EXT_OPCODE(combined) ((combined) & (~TC_ACT_EXT_VAL_MASK))
|
|
|
|
#define TC_ACT_EXT_CMP(combined, opcode) (TC_ACT_EXT_OPCODE(combined) == opcode)
|
2017-05-02 15:12:00 +07:00
|
|
|
|
|
|
|
#define TC_ACT_JUMP __TC_ACT_EXT(1)
|
2017-05-17 16:08:03 +07:00
|
|
|
#define TC_ACT_GOTO_CHAIN __TC_ACT_EXT(2)
|
2018-07-30 19:30:42 +07:00
|
|
|
#define TC_ACT_EXT_OPCODE_MAX TC_ACT_GOTO_CHAIN
|
2005-04-17 05:20:36 +07:00
|
|
|
|
2019-02-10 19:24:59 +07:00
|
|
|
/* These macros are put here for binary compatibility with userspace apps that
|
|
|
|
* make use of them. For kernel code and new userspace apps, use the TCA_ID_*
|
|
|
|
* versions.
|
|
|
|
*/
|
|
|
|
#define TCA_ACT_GACT 5
|
|
|
|
#define TCA_ACT_IPT 6
|
|
|
|
#define TCA_ACT_PEDIT 7
|
|
|
|
#define TCA_ACT_MIRRED 8
|
|
|
|
#define TCA_ACT_NAT 9
|
|
|
|
#define TCA_ACT_XT 10
|
|
|
|
#define TCA_ACT_SKBEDIT 11
|
|
|
|
#define TCA_ACT_VLAN 12
|
|
|
|
#define TCA_ACT_BPF 13
|
|
|
|
#define TCA_ACT_CONNMARK 14
|
|
|
|
#define TCA_ACT_SKBMOD 15
|
|
|
|
#define TCA_ACT_CSUM 16
|
|
|
|
#define TCA_ACT_TUNNEL_KEY 17
|
|
|
|
#define TCA_ACT_SIMP 22
|
|
|
|
#define TCA_ACT_IFE 25
|
|
|
|
#define TCA_ACT_SAMPLE 26
|
|
|
|
|
2005-04-17 05:20:36 +07:00
|
|
|
/* Action type identifiers*/
|
2019-02-10 19:25:00 +07:00
|
|
|
enum tca_id {
|
2019-02-10 19:24:59 +07:00
|
|
|
TCA_ID_UNSPEC = 0,
|
|
|
|
TCA_ID_POLICE = 1,
|
|
|
|
TCA_ID_GACT = TCA_ACT_GACT,
|
|
|
|
TCA_ID_IPT = TCA_ACT_IPT,
|
|
|
|
TCA_ID_PEDIT = TCA_ACT_PEDIT,
|
|
|
|
TCA_ID_MIRRED = TCA_ACT_MIRRED,
|
|
|
|
TCA_ID_NAT = TCA_ACT_NAT,
|
|
|
|
TCA_ID_XT = TCA_ACT_XT,
|
|
|
|
TCA_ID_SKBEDIT = TCA_ACT_SKBEDIT,
|
|
|
|
TCA_ID_VLAN = TCA_ACT_VLAN,
|
|
|
|
TCA_ID_BPF = TCA_ACT_BPF,
|
|
|
|
TCA_ID_CONNMARK = TCA_ACT_CONNMARK,
|
|
|
|
TCA_ID_SKBMOD = TCA_ACT_SKBMOD,
|
|
|
|
TCA_ID_CSUM = TCA_ACT_CSUM,
|
|
|
|
TCA_ID_TUNNEL_KEY = TCA_ACT_TUNNEL_KEY,
|
|
|
|
TCA_ID_SIMP = TCA_ACT_SIMP,
|
|
|
|
TCA_ID_IFE = TCA_ACT_IFE,
|
|
|
|
TCA_ID_SAMPLE = TCA_ACT_SAMPLE,
|
2019-05-29 00:03:50 +07:00
|
|
|
TCA_ID_CTINFO,
|
2019-07-07 21:01:57 +07:00
|
|
|
TCA_ID_MPLS,
|
2019-07-09 14:30:48 +07:00
|
|
|
TCA_ID_CT,
|
2019-07-07 21:01:57 +07:00
|
|
|
/* other actions go here */
|
2019-02-10 19:24:59 +07:00
|
|
|
__TCA_ID_MAX = 255
|
2005-04-17 05:20:36 +07:00
|
|
|
};
|
|
|
|
|
|
|
|
#define TCA_ID_MAX __TCA_ID_MAX
|
|
|
|
|
2009-11-05 00:50:58 +07:00
|
|
|
struct tc_police {
|
2005-04-17 05:20:36 +07:00
|
|
|
__u32 index;
|
|
|
|
int action;
|
|
|
|
#define TC_POLICE_UNSPEC TC_ACT_UNSPEC
|
|
|
|
#define TC_POLICE_OK TC_ACT_OK
|
|
|
|
#define TC_POLICE_RECLASSIFY TC_ACT_RECLASSIFY
|
|
|
|
#define TC_POLICE_SHOT TC_ACT_SHOT
|
|
|
|
#define TC_POLICE_PIPE TC_ACT_PIPE
|
|
|
|
|
|
|
|
__u32 limit;
|
|
|
|
__u32 burst;
|
|
|
|
__u32 mtu;
|
|
|
|
struct tc_ratespec rate;
|
|
|
|
struct tc_ratespec peakrate;
|
2016-06-05 21:41:32 +07:00
|
|
|
int refcnt;
|
|
|
|
int bindcnt;
|
2005-04-17 05:20:36 +07:00
|
|
|
__u32 capab;
|
|
|
|
};
|
|
|
|
|
2009-11-05 00:50:58 +07:00
|
|
|
struct tcf_t {
|
2005-04-17 05:20:36 +07:00
|
|
|
__u64 install;
|
|
|
|
__u64 lastuse;
|
|
|
|
__u64 expires;
|
2016-06-06 17:32:54 +07:00
|
|
|
__u64 firstuse;
|
2005-04-17 05:20:36 +07:00
|
|
|
};
|
|
|
|
|
2009-11-05 00:50:58 +07:00
|
|
|
struct tc_cnt {
|
2016-06-05 21:41:32 +07:00
|
|
|
int refcnt;
|
2005-04-17 05:20:36 +07:00
|
|
|
int bindcnt;
|
|
|
|
};
|
|
|
|
|
|
|
|
#define tc_gen \
|
|
|
|
__u32 index; \
|
|
|
|
__u32 capab; \
|
|
|
|
int action; \
|
|
|
|
int refcnt; \
|
|
|
|
int bindcnt
|
|
|
|
|
2009-11-05 00:50:58 +07:00
|
|
|
enum {
|
2005-04-17 05:20:36 +07:00
|
|
|
TCA_POLICE_UNSPEC,
|
|
|
|
TCA_POLICE_TBF,
|
|
|
|
TCA_POLICE_RATE,
|
|
|
|
TCA_POLICE_PEAKRATE,
|
|
|
|
TCA_POLICE_AVRATE,
|
|
|
|
TCA_POLICE_RESULT,
|
2016-05-24 08:07:20 +07:00
|
|
|
TCA_POLICE_TM,
|
|
|
|
TCA_POLICE_PAD,
|
2019-09-04 22:03:43 +07:00
|
|
|
TCA_POLICE_RATE64,
|
|
|
|
TCA_POLICE_PEAKRATE64,
|
2005-04-17 05:20:36 +07:00
|
|
|
__TCA_POLICE_MAX
|
|
|
|
#define TCA_POLICE_RESULT TCA_POLICE_RESULT
|
|
|
|
};
|
|
|
|
|
|
|
|
#define TCA_POLICE_MAX (__TCA_POLICE_MAX - 1)
|
|
|
|
|
2016-05-13 07:08:22 +07:00
|
|
|
/* tca flags definitions */
|
2017-02-16 15:31:12 +07:00
|
|
|
#define TCA_CLS_FLAGS_SKIP_HW (1 << 0) /* don't offload filter to HW */
|
|
|
|
#define TCA_CLS_FLAGS_SKIP_SW (1 << 1) /* don't use filter in SW */
|
|
|
|
#define TCA_CLS_FLAGS_IN_HW (1 << 2) /* filter is offloaded to HW */
|
|
|
|
#define TCA_CLS_FLAGS_NOT_IN_HW (1 << 3) /* filter isn't offloaded to HW */
|
sched: cls: enable verbose logging
Currently, when the rule is not to be exclusively executed by the
hardware, extack is not passed along and offloading failures don't
get logged. The idea was that hardware failures are okay because the
rule will get executed in software then and this way it doesn't confuse
unware users.
But this is not helpful in case one needs to understand why a certain
rule failed to get offloaded. Considering it may have been a temporary
failure, like resources exceeded or so, reproducing it later and knowing
that it is triggering the same reason may be challenging.
The ultimate goal is to improve Open vSwitch debuggability when using
flower offloading.
This patch adds a new flag to enable verbose logging. With the flag set,
extack will be passed to the driver, which will be able to log the
error. As the operation itself probably won't fail (not because of this,
at least), current iproute will already log it as a Warning.
The flag is generic, so it can be reused later. No need to restrict it
just for HW offloading. The command line will follow the syntax that
tc-ebpf already uses, tc ... [ verbose ] ... , and extend its meaning.
For example:
# ./tc qdisc add dev p7p1 ingress
# ./tc filter add dev p7p1 parent ffff: protocol ip prio 1 \
flower verbose \
src_mac ed:13:db:00:00:00 dst_mac 01:80:c2:00:00:d0 \
src_ip 56.0.0.0 dst_ip 55.0.0.0 action drop
Warning: TC offload is disabled on net device.
# echo $?
0
# ./tc filter add dev p7p1 parent ffff: protocol ip prio 1 \
flower \
src_mac ff:13:db:00:00:00 dst_mac 01:80:c2:00:00:d0 \
src_ip 56.0.0.0 dst_ip 55.0.0.0 action drop
# echo $?
0
Signed-off-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2018-05-14 03:44:27 +07:00
|
|
|
#define TCA_CLS_FLAGS_VERBOSE (1 << 4) /* verbose logging */
|
2016-05-13 07:08:22 +07:00
|
|
|
|
2005-04-17 05:20:36 +07:00
|
|
|
/* U32 filters */
|
|
|
|
|
|
|
|
#define TC_U32_HTID(h) ((h)&0xFFF00000)
|
|
|
|
#define TC_U32_USERHTID(h) (TC_U32_HTID(h)>>20)
|
|
|
|
#define TC_U32_HASH(h) (((h)>>12)&0xFF)
|
|
|
|
#define TC_U32_NODE(h) ((h)&0xFFF)
|
|
|
|
#define TC_U32_KEY(h) ((h)&0xFFFFF)
|
|
|
|
#define TC_U32_UNSPEC 0
|
|
|
|
#define TC_U32_ROOT (0xFFF00000)
|
|
|
|
|
2009-11-05 00:50:58 +07:00
|
|
|
enum {
|
2005-04-17 05:20:36 +07:00
|
|
|
TCA_U32_UNSPEC,
|
|
|
|
TCA_U32_CLASSID,
|
|
|
|
TCA_U32_HASH,
|
|
|
|
TCA_U32_LINK,
|
|
|
|
TCA_U32_DIVISOR,
|
|
|
|
TCA_U32_SEL,
|
|
|
|
TCA_U32_POLICE,
|
2016-05-24 08:07:20 +07:00
|
|
|
TCA_U32_ACT,
|
2005-04-17 05:20:36 +07:00
|
|
|
TCA_U32_INDEV,
|
|
|
|
TCA_U32_PCNT,
|
|
|
|
TCA_U32_MARK,
|
2016-02-26 22:54:39 +07:00
|
|
|
TCA_U32_FLAGS,
|
2016-04-26 15:06:18 +07:00
|
|
|
TCA_U32_PAD,
|
2005-04-17 05:20:36 +07:00
|
|
|
__TCA_U32_MAX
|
|
|
|
};
|
|
|
|
|
|
|
|
#define TCA_U32_MAX (__TCA_U32_MAX - 1)
|
|
|
|
|
2009-11-05 00:50:58 +07:00
|
|
|
struct tc_u32_key {
|
2008-03-18 12:46:46 +07:00
|
|
|
__be32 mask;
|
|
|
|
__be32 val;
|
2005-04-17 05:20:36 +07:00
|
|
|
int off;
|
|
|
|
int offmask;
|
|
|
|
};
|
|
|
|
|
2009-11-05 00:50:58 +07:00
|
|
|
struct tc_u32_sel {
|
2005-04-17 05:20:36 +07:00
|
|
|
unsigned char flags;
|
|
|
|
unsigned char offshift;
|
|
|
|
unsigned char nkeys;
|
|
|
|
|
2008-03-18 12:46:46 +07:00
|
|
|
__be16 offmask;
|
2005-04-17 05:20:36 +07:00
|
|
|
__u16 off;
|
|
|
|
short offoff;
|
|
|
|
|
|
|
|
short hoff;
|
2008-03-18 12:46:46 +07:00
|
|
|
__be32 hmask;
|
2005-04-17 05:20:36 +07:00
|
|
|
struct tc_u32_key keys[0];
|
|
|
|
};
|
|
|
|
|
2009-11-05 00:50:58 +07:00
|
|
|
struct tc_u32_mark {
|
2005-04-17 05:20:36 +07:00
|
|
|
__u32 val;
|
|
|
|
__u32 mask;
|
|
|
|
__u32 success;
|
|
|
|
};
|
|
|
|
|
2009-11-05 00:50:58 +07:00
|
|
|
struct tc_u32_pcnt {
|
2005-04-17 05:20:36 +07:00
|
|
|
__u64 rcnt;
|
|
|
|
__u64 rhit;
|
|
|
|
__u64 kcnts[0];
|
|
|
|
};
|
|
|
|
|
|
|
|
/* Flags */
|
|
|
|
|
|
|
|
#define TC_U32_TERMINAL 1
|
|
|
|
#define TC_U32_OFFSET 2
|
|
|
|
#define TC_U32_VAROFFSET 4
|
|
|
|
#define TC_U32_EAT 8
|
|
|
|
|
|
|
|
#define TC_U32_MAXDEPTH 8
|
|
|
|
|
|
|
|
|
|
|
|
/* RSVP filter */
|
|
|
|
|
2009-11-05 00:50:58 +07:00
|
|
|
enum {
|
2005-04-17 05:20:36 +07:00
|
|
|
TCA_RSVP_UNSPEC,
|
|
|
|
TCA_RSVP_CLASSID,
|
|
|
|
TCA_RSVP_DST,
|
|
|
|
TCA_RSVP_SRC,
|
|
|
|
TCA_RSVP_PINFO,
|
|
|
|
TCA_RSVP_POLICE,
|
|
|
|
TCA_RSVP_ACT,
|
|
|
|
__TCA_RSVP_MAX
|
|
|
|
};
|
|
|
|
|
|
|
|
#define TCA_RSVP_MAX (__TCA_RSVP_MAX - 1 )
|
|
|
|
|
2009-11-05 00:50:58 +07:00
|
|
|
struct tc_rsvp_gpi {
|
2005-04-17 05:20:36 +07:00
|
|
|
__u32 key;
|
|
|
|
__u32 mask;
|
|
|
|
int offset;
|
|
|
|
};
|
|
|
|
|
2009-11-05 00:50:58 +07:00
|
|
|
struct tc_rsvp_pinfo {
|
2005-04-17 05:20:36 +07:00
|
|
|
struct tc_rsvp_gpi dpi;
|
|
|
|
struct tc_rsvp_gpi spi;
|
|
|
|
__u8 protocol;
|
|
|
|
__u8 tunnelid;
|
|
|
|
__u8 tunnelhdr;
|
2005-06-29 02:56:45 +07:00
|
|
|
__u8 pad;
|
2005-04-17 05:20:36 +07:00
|
|
|
};
|
|
|
|
|
|
|
|
/* ROUTE filter */
|
|
|
|
|
2009-11-05 00:50:58 +07:00
|
|
|
enum {
|
2005-04-17 05:20:36 +07:00
|
|
|
TCA_ROUTE4_UNSPEC,
|
|
|
|
TCA_ROUTE4_CLASSID,
|
|
|
|
TCA_ROUTE4_TO,
|
|
|
|
TCA_ROUTE4_FROM,
|
|
|
|
TCA_ROUTE4_IIF,
|
|
|
|
TCA_ROUTE4_POLICE,
|
|
|
|
TCA_ROUTE4_ACT,
|
|
|
|
__TCA_ROUTE4_MAX
|
|
|
|
};
|
|
|
|
|
|
|
|
#define TCA_ROUTE4_MAX (__TCA_ROUTE4_MAX - 1)
|
|
|
|
|
|
|
|
|
|
|
|
/* FW filter */
|
|
|
|
|
2009-11-05 00:50:58 +07:00
|
|
|
enum {
|
2005-04-17 05:20:36 +07:00
|
|
|
TCA_FW_UNSPEC,
|
|
|
|
TCA_FW_CLASSID,
|
|
|
|
TCA_FW_POLICE,
|
2019-06-15 16:03:49 +07:00
|
|
|
TCA_FW_INDEV,
|
2005-04-17 05:20:36 +07:00
|
|
|
TCA_FW_ACT, /* used by CONFIG_NET_CLS_ACT */
|
2006-08-26 06:11:42 +07:00
|
|
|
TCA_FW_MASK,
|
2005-04-17 05:20:36 +07:00
|
|
|
__TCA_FW_MAX
|
|
|
|
};
|
|
|
|
|
|
|
|
#define TCA_FW_MAX (__TCA_FW_MAX - 1)
|
|
|
|
|
|
|
|
/* TC index filter */
|
|
|
|
|
2009-11-05 00:50:58 +07:00
|
|
|
enum {
|
2005-04-17 05:20:36 +07:00
|
|
|
TCA_TCINDEX_UNSPEC,
|
|
|
|
TCA_TCINDEX_HASH,
|
|
|
|
TCA_TCINDEX_MASK,
|
|
|
|
TCA_TCINDEX_SHIFT,
|
|
|
|
TCA_TCINDEX_FALL_THROUGH,
|
|
|
|
TCA_TCINDEX_CLASSID,
|
|
|
|
TCA_TCINDEX_POLICE,
|
|
|
|
TCA_TCINDEX_ACT,
|
|
|
|
__TCA_TCINDEX_MAX
|
|
|
|
};
|
|
|
|
|
|
|
|
#define TCA_TCINDEX_MAX (__TCA_TCINDEX_MAX - 1)
|
|
|
|
|
[NET_SCHED]: Add flow classifier
Add new "flow" classifier, which is meant to extend the SFQ hashing
capabilities without hard-coding new hash functions and also allows
deterministic mappings of keys to classes, replacing some out of tree
iptables patches like IPCLASSIFY (maps IPs to classes), IPMARK (maps
IPs to marks, with fw filters to classes), ...
Some examples:
- Classic SFQ hash:
tc filter add ... flow hash \
keys src,dst,proto,proto-src,proto-dst divisor 1024
- Classic SFQ hash, but using information from conntrack to work properly in
combination with NAT:
tc filter add ... flow hash \
keys nfct-src,nfct-dst,proto,nfct-proto-src,nfct-proto-dst divisor 1024
- Map destination IPs of 192.168.0.0/24 to classids 1-257:
tc filter add ... flow map \
key dst addend -192.168.0.0 divisor 256
- alternatively:
tc filter add ... flow map \
key dst and 0xff
- similar, but reverse ordered:
tc filter add ... flow map \
key dst and 0xff xor 0xff
Perturbation is currently not supported because we can't reliable kill the
timer on destruction.
Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
2008-02-01 09:37:42 +07:00
|
|
|
/* Flow filter */
|
|
|
|
|
2009-11-05 00:50:58 +07:00
|
|
|
enum {
|
[NET_SCHED]: Add flow classifier
Add new "flow" classifier, which is meant to extend the SFQ hashing
capabilities without hard-coding new hash functions and also allows
deterministic mappings of keys to classes, replacing some out of tree
iptables patches like IPCLASSIFY (maps IPs to classes), IPMARK (maps
IPs to marks, with fw filters to classes), ...
Some examples:
- Classic SFQ hash:
tc filter add ... flow hash \
keys src,dst,proto,proto-src,proto-dst divisor 1024
- Classic SFQ hash, but using information from conntrack to work properly in
combination with NAT:
tc filter add ... flow hash \
keys nfct-src,nfct-dst,proto,nfct-proto-src,nfct-proto-dst divisor 1024
- Map destination IPs of 192.168.0.0/24 to classids 1-257:
tc filter add ... flow map \
key dst addend -192.168.0.0 divisor 256
- alternatively:
tc filter add ... flow map \
key dst and 0xff
- similar, but reverse ordered:
tc filter add ... flow map \
key dst and 0xff xor 0xff
Perturbation is currently not supported because we can't reliable kill the
timer on destruction.
Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
2008-02-01 09:37:42 +07:00
|
|
|
FLOW_KEY_SRC,
|
|
|
|
FLOW_KEY_DST,
|
|
|
|
FLOW_KEY_PROTO,
|
|
|
|
FLOW_KEY_PROTO_SRC,
|
|
|
|
FLOW_KEY_PROTO_DST,
|
|
|
|
FLOW_KEY_IIF,
|
|
|
|
FLOW_KEY_PRIORITY,
|
|
|
|
FLOW_KEY_MARK,
|
|
|
|
FLOW_KEY_NFCT,
|
|
|
|
FLOW_KEY_NFCT_SRC,
|
|
|
|
FLOW_KEY_NFCT_DST,
|
|
|
|
FLOW_KEY_NFCT_PROTO_SRC,
|
|
|
|
FLOW_KEY_NFCT_PROTO_DST,
|
|
|
|
FLOW_KEY_RTCLASSID,
|
|
|
|
FLOW_KEY_SKUID,
|
|
|
|
FLOW_KEY_SKGID,
|
2008-02-06 07:21:04 +07:00
|
|
|
FLOW_KEY_VLAN_TAG,
|
2010-08-21 13:23:15 +07:00
|
|
|
FLOW_KEY_RXHASH,
|
[NET_SCHED]: Add flow classifier
Add new "flow" classifier, which is meant to extend the SFQ hashing
capabilities without hard-coding new hash functions and also allows
deterministic mappings of keys to classes, replacing some out of tree
iptables patches like IPCLASSIFY (maps IPs to classes), IPMARK (maps
IPs to marks, with fw filters to classes), ...
Some examples:
- Classic SFQ hash:
tc filter add ... flow hash \
keys src,dst,proto,proto-src,proto-dst divisor 1024
- Classic SFQ hash, but using information from conntrack to work properly in
combination with NAT:
tc filter add ... flow hash \
keys nfct-src,nfct-dst,proto,nfct-proto-src,nfct-proto-dst divisor 1024
- Map destination IPs of 192.168.0.0/24 to classids 1-257:
tc filter add ... flow map \
key dst addend -192.168.0.0 divisor 256
- alternatively:
tc filter add ... flow map \
key dst and 0xff
- similar, but reverse ordered:
tc filter add ... flow map \
key dst and 0xff xor 0xff
Perturbation is currently not supported because we can't reliable kill the
timer on destruction.
Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
2008-02-01 09:37:42 +07:00
|
|
|
__FLOW_KEY_MAX,
|
|
|
|
};
|
|
|
|
|
|
|
|
#define FLOW_KEY_MAX (__FLOW_KEY_MAX - 1)
|
|
|
|
|
2009-11-05 00:50:58 +07:00
|
|
|
enum {
|
[NET_SCHED]: Add flow classifier
Add new "flow" classifier, which is meant to extend the SFQ hashing
capabilities without hard-coding new hash functions and also allows
deterministic mappings of keys to classes, replacing some out of tree
iptables patches like IPCLASSIFY (maps IPs to classes), IPMARK (maps
IPs to marks, with fw filters to classes), ...
Some examples:
- Classic SFQ hash:
tc filter add ... flow hash \
keys src,dst,proto,proto-src,proto-dst divisor 1024
- Classic SFQ hash, but using information from conntrack to work properly in
combination with NAT:
tc filter add ... flow hash \
keys nfct-src,nfct-dst,proto,nfct-proto-src,nfct-proto-dst divisor 1024
- Map destination IPs of 192.168.0.0/24 to classids 1-257:
tc filter add ... flow map \
key dst addend -192.168.0.0 divisor 256
- alternatively:
tc filter add ... flow map \
key dst and 0xff
- similar, but reverse ordered:
tc filter add ... flow map \
key dst and 0xff xor 0xff
Perturbation is currently not supported because we can't reliable kill the
timer on destruction.
Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
2008-02-01 09:37:42 +07:00
|
|
|
FLOW_MODE_MAP,
|
|
|
|
FLOW_MODE_HASH,
|
|
|
|
};
|
|
|
|
|
2009-11-05 00:50:58 +07:00
|
|
|
enum {
|
[NET_SCHED]: Add flow classifier
Add new "flow" classifier, which is meant to extend the SFQ hashing
capabilities without hard-coding new hash functions and also allows
deterministic mappings of keys to classes, replacing some out of tree
iptables patches like IPCLASSIFY (maps IPs to classes), IPMARK (maps
IPs to marks, with fw filters to classes), ...
Some examples:
- Classic SFQ hash:
tc filter add ... flow hash \
keys src,dst,proto,proto-src,proto-dst divisor 1024
- Classic SFQ hash, but using information from conntrack to work properly in
combination with NAT:
tc filter add ... flow hash \
keys nfct-src,nfct-dst,proto,nfct-proto-src,nfct-proto-dst divisor 1024
- Map destination IPs of 192.168.0.0/24 to classids 1-257:
tc filter add ... flow map \
key dst addend -192.168.0.0 divisor 256
- alternatively:
tc filter add ... flow map \
key dst and 0xff
- similar, but reverse ordered:
tc filter add ... flow map \
key dst and 0xff xor 0xff
Perturbation is currently not supported because we can't reliable kill the
timer on destruction.
Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
2008-02-01 09:37:42 +07:00
|
|
|
TCA_FLOW_UNSPEC,
|
|
|
|
TCA_FLOW_KEYS,
|
|
|
|
TCA_FLOW_MODE,
|
|
|
|
TCA_FLOW_BASECLASS,
|
|
|
|
TCA_FLOW_RSHIFT,
|
|
|
|
TCA_FLOW_ADDEND,
|
|
|
|
TCA_FLOW_MASK,
|
|
|
|
TCA_FLOW_XOR,
|
|
|
|
TCA_FLOW_DIVISOR,
|
|
|
|
TCA_FLOW_ACT,
|
|
|
|
TCA_FLOW_POLICE,
|
|
|
|
TCA_FLOW_EMATCHES,
|
2008-07-15 10:36:32 +07:00
|
|
|
TCA_FLOW_PERTURB,
|
[NET_SCHED]: Add flow classifier
Add new "flow" classifier, which is meant to extend the SFQ hashing
capabilities without hard-coding new hash functions and also allows
deterministic mappings of keys to classes, replacing some out of tree
iptables patches like IPCLASSIFY (maps IPs to classes), IPMARK (maps
IPs to marks, with fw filters to classes), ...
Some examples:
- Classic SFQ hash:
tc filter add ... flow hash \
keys src,dst,proto,proto-src,proto-dst divisor 1024
- Classic SFQ hash, but using information from conntrack to work properly in
combination with NAT:
tc filter add ... flow hash \
keys nfct-src,nfct-dst,proto,nfct-proto-src,nfct-proto-dst divisor 1024
- Map destination IPs of 192.168.0.0/24 to classids 1-257:
tc filter add ... flow map \
key dst addend -192.168.0.0 divisor 256
- alternatively:
tc filter add ... flow map \
key dst and 0xff
- similar, but reverse ordered:
tc filter add ... flow map \
key dst and 0xff xor 0xff
Perturbation is currently not supported because we can't reliable kill the
timer on destruction.
Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
2008-02-01 09:37:42 +07:00
|
|
|
__TCA_FLOW_MAX
|
|
|
|
};
|
|
|
|
|
|
|
|
#define TCA_FLOW_MAX (__TCA_FLOW_MAX - 1)
|
|
|
|
|
2005-04-17 05:20:36 +07:00
|
|
|
/* Basic filter */
|
|
|
|
|
2019-01-18 08:14:01 +07:00
|
|
|
struct tc_basic_pcnt {
|
|
|
|
__u64 rcnt;
|
|
|
|
__u64 rhit;
|
|
|
|
};
|
|
|
|
|
2009-11-05 00:50:58 +07:00
|
|
|
enum {
|
2005-04-17 05:20:36 +07:00
|
|
|
TCA_BASIC_UNSPEC,
|
|
|
|
TCA_BASIC_CLASSID,
|
|
|
|
TCA_BASIC_EMATCHES,
|
|
|
|
TCA_BASIC_ACT,
|
|
|
|
TCA_BASIC_POLICE,
|
2019-01-18 08:14:01 +07:00
|
|
|
TCA_BASIC_PCNT,
|
|
|
|
TCA_BASIC_PAD,
|
2005-04-17 05:20:36 +07:00
|
|
|
__TCA_BASIC_MAX
|
|
|
|
};
|
|
|
|
|
|
|
|
#define TCA_BASIC_MAX (__TCA_BASIC_MAX - 1)
|
|
|
|
|
2008-11-08 13:56:00 +07:00
|
|
|
|
|
|
|
/* Cgroup classifier */
|
|
|
|
|
2009-11-05 00:50:58 +07:00
|
|
|
enum {
|
2008-11-08 13:56:00 +07:00
|
|
|
TCA_CGROUP_UNSPEC,
|
|
|
|
TCA_CGROUP_ACT,
|
|
|
|
TCA_CGROUP_POLICE,
|
|
|
|
TCA_CGROUP_EMATCHES,
|
|
|
|
__TCA_CGROUP_MAX,
|
|
|
|
};
|
|
|
|
|
|
|
|
#define TCA_CGROUP_MAX (__TCA_CGROUP_MAX - 1)
|
|
|
|
|
net: sched: cls_bpf: add BPF-based classifier
This work contains a lightweight BPF-based traffic classifier that can
serve as a flexible alternative to ematch-based tree classification, i.e.
now that BPF filter engine can also be JITed in the kernel. Naturally, tc
actions and policies are supported as well with cls_bpf. Multiple BPF
programs/filter can be attached for a class, or they can just as well be
written within a single BPF program, that's really up to the user how he
wishes to run/optimize the code, e.g. also for inversion of verdicts etc.
The notion of a BPF program's return/exit codes is being kept as follows:
0: No match
-1: Select classid given in "tc filter ..." command
else: flowid, overwrite the default one
As a minimal usage example with iproute2, we use a 3 band prio root qdisc
on a router with sfq each as leave, and assign ssh and icmp bpf-based
filters to band 1, http traffic to band 2 and the rest to band 3. For the
first two bands we load the bytecode from a file, in the 2nd we load it
inline as an example:
echo 1 > /proc/sys/net/core/bpf_jit_enable
tc qdisc del dev em1 root
tc qdisc add dev em1 root handle 1: prio bands 3 priomap 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
tc qdisc add dev em1 parent 1:1 sfq perturb 16
tc qdisc add dev em1 parent 1:2 sfq perturb 16
tc qdisc add dev em1 parent 1:3 sfq perturb 16
tc filter add dev em1 parent 1: bpf run bytecode-file /etc/tc/ssh.bpf flowid 1:1
tc filter add dev em1 parent 1: bpf run bytecode-file /etc/tc/icmp.bpf flowid 1:1
tc filter add dev em1 parent 1: bpf run bytecode-file /etc/tc/http.bpf flowid 1:2
tc filter add dev em1 parent 1: bpf run bytecode "`bpfc -f tc -i misc.ops`" flowid 1:3
BPF programs can be easily created and passed to tc, either as inline
'bytecode' or 'bytecode-file'. There are a couple of front-ends that can
compile opcodes, for example:
1) People familiar with tcpdump-like filters:
tcpdump -iem1 -ddd port 22 | tr '\n' ',' > /etc/tc/ssh.bpf
2) People that want to low-level program their filters or use BPF
extensions that lack support by libpcap's compiler:
bpfc -f tc -i ssh.ops > /etc/tc/ssh.bpf
ssh.ops example code:
ldh [12]
jne #0x800, drop
ldb [23]
jneq #6, drop
ldh [20]
jset #0x1fff, drop
ldxb 4 * ([14] & 0xf)
ldh [%x + 14]
jeq #0x16, pass
ldh [%x + 16]
jne #0x16, drop
pass: ret #-1
drop: ret #0
It was chosen to load bytecode into tc, since the reverse operation,
tc filter list dev em1, is then able to show the exact commands again.
Possible follow-up work could also include a small expression compiler
for iproute2. Tested with the help of bmon. This idea came up during
the Netfilter Workshop 2013 in Copenhagen. Also thanks to feedback from
Eric Dumazet!
Signed-off-by: Daniel Borkmann <dborkman@redhat.com>
Cc: Thomas Graf <tgraf@suug.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
2013-10-28 22:43:02 +07:00
|
|
|
/* BPF classifier */
|
|
|
|
|
2015-09-16 13:05:42 +07:00
|
|
|
#define TCA_BPF_FLAG_ACT_DIRECT (1 << 0)
|
|
|
|
|
net: sched: cls_bpf: add BPF-based classifier
This work contains a lightweight BPF-based traffic classifier that can
serve as a flexible alternative to ematch-based tree classification, i.e.
now that BPF filter engine can also be JITed in the kernel. Naturally, tc
actions and policies are supported as well with cls_bpf. Multiple BPF
programs/filter can be attached for a class, or they can just as well be
written within a single BPF program, that's really up to the user how he
wishes to run/optimize the code, e.g. also for inversion of verdicts etc.
The notion of a BPF program's return/exit codes is being kept as follows:
0: No match
-1: Select classid given in "tc filter ..." command
else: flowid, overwrite the default one
As a minimal usage example with iproute2, we use a 3 band prio root qdisc
on a router with sfq each as leave, and assign ssh and icmp bpf-based
filters to band 1, http traffic to band 2 and the rest to band 3. For the
first two bands we load the bytecode from a file, in the 2nd we load it
inline as an example:
echo 1 > /proc/sys/net/core/bpf_jit_enable
tc qdisc del dev em1 root
tc qdisc add dev em1 root handle 1: prio bands 3 priomap 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
tc qdisc add dev em1 parent 1:1 sfq perturb 16
tc qdisc add dev em1 parent 1:2 sfq perturb 16
tc qdisc add dev em1 parent 1:3 sfq perturb 16
tc filter add dev em1 parent 1: bpf run bytecode-file /etc/tc/ssh.bpf flowid 1:1
tc filter add dev em1 parent 1: bpf run bytecode-file /etc/tc/icmp.bpf flowid 1:1
tc filter add dev em1 parent 1: bpf run bytecode-file /etc/tc/http.bpf flowid 1:2
tc filter add dev em1 parent 1: bpf run bytecode "`bpfc -f tc -i misc.ops`" flowid 1:3
BPF programs can be easily created and passed to tc, either as inline
'bytecode' or 'bytecode-file'. There are a couple of front-ends that can
compile opcodes, for example:
1) People familiar with tcpdump-like filters:
tcpdump -iem1 -ddd port 22 | tr '\n' ',' > /etc/tc/ssh.bpf
2) People that want to low-level program their filters or use BPF
extensions that lack support by libpcap's compiler:
bpfc -f tc -i ssh.ops > /etc/tc/ssh.bpf
ssh.ops example code:
ldh [12]
jne #0x800, drop
ldb [23]
jneq #6, drop
ldh [20]
jset #0x1fff, drop
ldxb 4 * ([14] & 0xf)
ldh [%x + 14]
jeq #0x16, pass
ldh [%x + 16]
jne #0x16, drop
pass: ret #-1
drop: ret #0
It was chosen to load bytecode into tc, since the reverse operation,
tc filter list dev em1, is then able to show the exact commands again.
Possible follow-up work could also include a small expression compiler
for iproute2. Tested with the help of bmon. This idea came up during
the Netfilter Workshop 2013 in Copenhagen. Also thanks to feedback from
Eric Dumazet!
Signed-off-by: Daniel Borkmann <dborkman@redhat.com>
Cc: Thomas Graf <tgraf@suug.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
2013-10-28 22:43:02 +07:00
|
|
|
enum {
|
|
|
|
TCA_BPF_UNSPEC,
|
|
|
|
TCA_BPF_ACT,
|
|
|
|
TCA_BPF_POLICE,
|
|
|
|
TCA_BPF_CLASSID,
|
|
|
|
TCA_BPF_OPS_LEN,
|
|
|
|
TCA_BPF_OPS,
|
cls_bpf: add initial eBPF support for programmable classifiers
This work extends the "classic" BPF programmable tc classifier by
extending its scope also to native eBPF code!
This allows for user space to implement own custom, 'safe' C like
classifiers (or whatever other frontend language LLVM et al may
provide in future), that can then be compiled with the LLVM eBPF
backend to an eBPF elf file. The result of this can be loaded into
the kernel via iproute2's tc. In the kernel, they can be JITed on
major archs and thus run in native performance.
Simple, minimal toy example to demonstrate the workflow:
#include <linux/ip.h>
#include <linux/if_ether.h>
#include <linux/bpf.h>
#include "tc_bpf_api.h"
__section("classify")
int cls_main(struct sk_buff *skb)
{
return (0x800 << 16) | load_byte(skb, ETH_HLEN + __builtin_offsetof(struct iphdr, tos));
}
char __license[] __section("license") = "GPL";
The classifier can then be compiled into eBPF opcodes and loaded
via tc, for example:
clang -O2 -emit-llvm -c cls.c -o - | llc -march=bpf -filetype=obj -o cls.o
tc filter add dev em1 parent 1: bpf cls.o [...]
As it has been demonstrated, the scope can even reach up to a fully
fledged flow dissector (similarly as in samples/bpf/sockex2_kern.c).
For tc, maps are allowed to be used, but from kernel context only,
in other words, eBPF code can keep state across filter invocations.
In future, we perhaps may reattach from a different application to
those maps e.g., to read out collected statistics/state.
Similarly as in socket filters, we may extend functionality for eBPF
classifiers over time depending on the use cases. For that purpose,
cls_bpf programs are using BPF_PROG_TYPE_SCHED_CLS program type, so
we can allow additional functions/accessors (e.g. an ABI compatible
offset translation to skb fields/metadata). For an initial cls_bpf
support, we allow the same set of helper functions as eBPF socket
filters, but we could diverge at some point in time w/o problem.
I was wondering whether cls_bpf and act_bpf could share C programs,
I can imagine that at some point, we introduce i) further common
handlers for both (or even beyond their scope), and/or if truly needed
ii) some restricted function space for each of them. Both can be
abstracted easily through struct bpf_verifier_ops in future.
The context of cls_bpf versus act_bpf is slightly different though:
a cls_bpf program will return a specific classid whereas act_bpf a
drop/non-drop return code, latter may also in future mangle skbs.
That said, we can surely have a "classify" and "action" section in
a single object file, or considered mentioned constraint add a
possibility of a shared section.
The workflow for getting native eBPF running from tc [1] is as
follows: for f_bpf, I've added a slightly modified ELF parser code
from Alexei's kernel sample, which reads out the LLVM compiled
object, sets up maps (and dynamically fixes up map fds) if any, and
loads the eBPF instructions all centrally through the bpf syscall.
The resulting fd from the loaded program itself is being passed down
to cls_bpf, which looks up struct bpf_prog from the fd store, and
holds reference, so that it stays available also after tc program
lifetime. On tc filter destruction, it will then drop its reference.
Moreover, I've also added the optional possibility to annotate an
eBPF filter with a name (e.g. path to object file, or something
else if preferred) so that when tc dumps currently installed filters,
some more context can be given to an admin for a given instance (as
opposed to just the file descriptor number).
Last but not least, bpf_prog_get() and bpf_prog_put() needed to be
exported, so that eBPF can be used from cls_bpf built as a module.
Thanks to 60a3b2253c41 ("net: bpf: make eBPF interpreter images
read-only") I think this is of no concern since anything wanting to
alter eBPF opcode after verification stage would crash the kernel.
[1] http://git.breakpoint.cc/cgit/dborkman/iproute2.git/log/?h=ebpf
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Cc: Jamal Hadi Salim <jhs@mojatatu.com>
Cc: Jiri Pirko <jiri@resnulli.us>
Acked-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-03-01 18:31:48 +07:00
|
|
|
TCA_BPF_FD,
|
|
|
|
TCA_BPF_NAME,
|
2015-09-16 13:05:42 +07:00
|
|
|
TCA_BPF_FLAGS,
|
2016-09-21 17:43:54 +07:00
|
|
|
TCA_BPF_FLAGS_GEN,
|
2017-01-14 05:38:15 +07:00
|
|
|
TCA_BPF_TAG,
|
2017-06-22 01:16:11 +07:00
|
|
|
TCA_BPF_ID,
|
net: sched: cls_bpf: add BPF-based classifier
This work contains a lightweight BPF-based traffic classifier that can
serve as a flexible alternative to ematch-based tree classification, i.e.
now that BPF filter engine can also be JITed in the kernel. Naturally, tc
actions and policies are supported as well with cls_bpf. Multiple BPF
programs/filter can be attached for a class, or they can just as well be
written within a single BPF program, that's really up to the user how he
wishes to run/optimize the code, e.g. also for inversion of verdicts etc.
The notion of a BPF program's return/exit codes is being kept as follows:
0: No match
-1: Select classid given in "tc filter ..." command
else: flowid, overwrite the default one
As a minimal usage example with iproute2, we use a 3 band prio root qdisc
on a router with sfq each as leave, and assign ssh and icmp bpf-based
filters to band 1, http traffic to band 2 and the rest to band 3. For the
first two bands we load the bytecode from a file, in the 2nd we load it
inline as an example:
echo 1 > /proc/sys/net/core/bpf_jit_enable
tc qdisc del dev em1 root
tc qdisc add dev em1 root handle 1: prio bands 3 priomap 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
tc qdisc add dev em1 parent 1:1 sfq perturb 16
tc qdisc add dev em1 parent 1:2 sfq perturb 16
tc qdisc add dev em1 parent 1:3 sfq perturb 16
tc filter add dev em1 parent 1: bpf run bytecode-file /etc/tc/ssh.bpf flowid 1:1
tc filter add dev em1 parent 1: bpf run bytecode-file /etc/tc/icmp.bpf flowid 1:1
tc filter add dev em1 parent 1: bpf run bytecode-file /etc/tc/http.bpf flowid 1:2
tc filter add dev em1 parent 1: bpf run bytecode "`bpfc -f tc -i misc.ops`" flowid 1:3
BPF programs can be easily created and passed to tc, either as inline
'bytecode' or 'bytecode-file'. There are a couple of front-ends that can
compile opcodes, for example:
1) People familiar with tcpdump-like filters:
tcpdump -iem1 -ddd port 22 | tr '\n' ',' > /etc/tc/ssh.bpf
2) People that want to low-level program their filters or use BPF
extensions that lack support by libpcap's compiler:
bpfc -f tc -i ssh.ops > /etc/tc/ssh.bpf
ssh.ops example code:
ldh [12]
jne #0x800, drop
ldb [23]
jneq #6, drop
ldh [20]
jset #0x1fff, drop
ldxb 4 * ([14] & 0xf)
ldh [%x + 14]
jeq #0x16, pass
ldh [%x + 16]
jne #0x16, drop
pass: ret #-1
drop: ret #0
It was chosen to load bytecode into tc, since the reverse operation,
tc filter list dev em1, is then able to show the exact commands again.
Possible follow-up work could also include a small expression compiler
for iproute2. Tested with the help of bmon. This idea came up during
the Netfilter Workshop 2013 in Copenhagen. Also thanks to feedback from
Eric Dumazet!
Signed-off-by: Daniel Borkmann <dborkman@redhat.com>
Cc: Thomas Graf <tgraf@suug.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
2013-10-28 22:43:02 +07:00
|
|
|
__TCA_BPF_MAX,
|
|
|
|
};
|
|
|
|
|
|
|
|
#define TCA_BPF_MAX (__TCA_BPF_MAX - 1)
|
|
|
|
|
2015-05-12 19:56:21 +07:00
|
|
|
/* Flower classifier */
|
|
|
|
|
|
|
|
enum {
|
|
|
|
TCA_FLOWER_UNSPEC,
|
|
|
|
TCA_FLOWER_CLASSID,
|
|
|
|
TCA_FLOWER_INDEV,
|
|
|
|
TCA_FLOWER_ACT,
|
|
|
|
TCA_FLOWER_KEY_ETH_DST, /* ETH_ALEN */
|
|
|
|
TCA_FLOWER_KEY_ETH_DST_MASK, /* ETH_ALEN */
|
|
|
|
TCA_FLOWER_KEY_ETH_SRC, /* ETH_ALEN */
|
|
|
|
TCA_FLOWER_KEY_ETH_SRC_MASK, /* ETH_ALEN */
|
|
|
|
TCA_FLOWER_KEY_ETH_TYPE, /* be16 */
|
|
|
|
TCA_FLOWER_KEY_IP_PROTO, /* u8 */
|
|
|
|
TCA_FLOWER_KEY_IPV4_SRC, /* be32 */
|
|
|
|
TCA_FLOWER_KEY_IPV4_SRC_MASK, /* be32 */
|
|
|
|
TCA_FLOWER_KEY_IPV4_DST, /* be32 */
|
|
|
|
TCA_FLOWER_KEY_IPV4_DST_MASK, /* be32 */
|
|
|
|
TCA_FLOWER_KEY_IPV6_SRC, /* struct in6_addr */
|
|
|
|
TCA_FLOWER_KEY_IPV6_SRC_MASK, /* struct in6_addr */
|
|
|
|
TCA_FLOWER_KEY_IPV6_DST, /* struct in6_addr */
|
|
|
|
TCA_FLOWER_KEY_IPV6_DST_MASK, /* struct in6_addr */
|
|
|
|
TCA_FLOWER_KEY_TCP_SRC, /* be16 */
|
|
|
|
TCA_FLOWER_KEY_TCP_DST, /* be16 */
|
|
|
|
TCA_FLOWER_KEY_UDP_SRC, /* be16 */
|
|
|
|
TCA_FLOWER_KEY_UDP_DST, /* be16 */
|
2016-03-08 17:42:29 +07:00
|
|
|
|
|
|
|
TCA_FLOWER_FLAGS,
|
2016-09-15 19:28:24 +07:00
|
|
|
TCA_FLOWER_KEY_VLAN_ID, /* be16 */
|
|
|
|
TCA_FLOWER_KEY_VLAN_PRIO, /* u8 */
|
|
|
|
TCA_FLOWER_KEY_VLAN_ETH_TYPE, /* be16 */
|
2016-09-08 20:23:47 +07:00
|
|
|
|
|
|
|
TCA_FLOWER_KEY_ENC_KEY_ID, /* be32 */
|
|
|
|
TCA_FLOWER_KEY_ENC_IPV4_SRC, /* be32 */
|
|
|
|
TCA_FLOWER_KEY_ENC_IPV4_SRC_MASK,/* be32 */
|
|
|
|
TCA_FLOWER_KEY_ENC_IPV4_DST, /* be32 */
|
|
|
|
TCA_FLOWER_KEY_ENC_IPV4_DST_MASK,/* be32 */
|
|
|
|
TCA_FLOWER_KEY_ENC_IPV6_SRC, /* struct in6_addr */
|
|
|
|
TCA_FLOWER_KEY_ENC_IPV6_SRC_MASK,/* struct in6_addr */
|
|
|
|
TCA_FLOWER_KEY_ENC_IPV6_DST, /* struct in6_addr */
|
|
|
|
TCA_FLOWER_KEY_ENC_IPV6_DST_MASK,/* struct in6_addr */
|
|
|
|
|
2016-09-15 19:28:22 +07:00
|
|
|
TCA_FLOWER_KEY_TCP_SRC_MASK, /* be16 */
|
|
|
|
TCA_FLOWER_KEY_TCP_DST_MASK, /* be16 */
|
|
|
|
TCA_FLOWER_KEY_UDP_SRC_MASK, /* be16 */
|
|
|
|
TCA_FLOWER_KEY_UDP_DST_MASK, /* be16 */
|
2016-11-03 19:24:21 +07:00
|
|
|
TCA_FLOWER_KEY_SCTP_SRC_MASK, /* be16 */
|
|
|
|
TCA_FLOWER_KEY_SCTP_DST_MASK, /* be16 */
|
|
|
|
|
|
|
|
TCA_FLOWER_KEY_SCTP_SRC, /* be16 */
|
|
|
|
TCA_FLOWER_KEY_SCTP_DST, /* be16 */
|
2016-11-07 20:14:39 +07:00
|
|
|
|
|
|
|
TCA_FLOWER_KEY_ENC_UDP_SRC_PORT, /* be16 */
|
|
|
|
TCA_FLOWER_KEY_ENC_UDP_SRC_PORT_MASK, /* be16 */
|
|
|
|
TCA_FLOWER_KEY_ENC_UDP_DST_PORT, /* be16 */
|
|
|
|
TCA_FLOWER_KEY_ENC_UDP_DST_PORT_MASK, /* be16 */
|
2016-12-07 19:03:10 +07:00
|
|
|
|
|
|
|
TCA_FLOWER_KEY_FLAGS, /* be32 */
|
|
|
|
TCA_FLOWER_KEY_FLAGS_MASK, /* be32 */
|
2016-12-07 19:48:28 +07:00
|
|
|
|
|
|
|
TCA_FLOWER_KEY_ICMPV4_CODE, /* u8 */
|
|
|
|
TCA_FLOWER_KEY_ICMPV4_CODE_MASK,/* u8 */
|
|
|
|
TCA_FLOWER_KEY_ICMPV4_TYPE, /* u8 */
|
|
|
|
TCA_FLOWER_KEY_ICMPV4_TYPE_MASK,/* u8 */
|
|
|
|
TCA_FLOWER_KEY_ICMPV6_CODE, /* u8 */
|
|
|
|
TCA_FLOWER_KEY_ICMPV6_CODE_MASK,/* u8 */
|
|
|
|
TCA_FLOWER_KEY_ICMPV6_TYPE, /* u8 */
|
|
|
|
TCA_FLOWER_KEY_ICMPV6_TYPE_MASK,/* u8 */
|
|
|
|
|
2017-01-11 20:05:43 +07:00
|
|
|
TCA_FLOWER_KEY_ARP_SIP, /* be32 */
|
|
|
|
TCA_FLOWER_KEY_ARP_SIP_MASK, /* be32 */
|
|
|
|
TCA_FLOWER_KEY_ARP_TIP, /* be32 */
|
|
|
|
TCA_FLOWER_KEY_ARP_TIP_MASK, /* be32 */
|
|
|
|
TCA_FLOWER_KEY_ARP_OP, /* u8 */
|
|
|
|
TCA_FLOWER_KEY_ARP_OP_MASK, /* u8 */
|
|
|
|
TCA_FLOWER_KEY_ARP_SHA, /* ETH_ALEN */
|
|
|
|
TCA_FLOWER_KEY_ARP_SHA_MASK, /* ETH_ALEN */
|
|
|
|
TCA_FLOWER_KEY_ARP_THA, /* ETH_ALEN */
|
|
|
|
TCA_FLOWER_KEY_ARP_THA_MASK, /* ETH_ALEN */
|
|
|
|
|
2017-04-23 03:52:47 +07:00
|
|
|
TCA_FLOWER_KEY_MPLS_TTL, /* u8 - 8 bits */
|
|
|
|
TCA_FLOWER_KEY_MPLS_BOS, /* u8 - 1 bit */
|
|
|
|
TCA_FLOWER_KEY_MPLS_TC, /* u8 - 3 bits */
|
|
|
|
TCA_FLOWER_KEY_MPLS_LABEL, /* be32 - 20 bits */
|
|
|
|
|
2017-05-23 23:40:45 +07:00
|
|
|
TCA_FLOWER_KEY_TCP_FLAGS, /* be16 */
|
|
|
|
TCA_FLOWER_KEY_TCP_FLAGS_MASK, /* be16 */
|
|
|
|
|
2017-06-02 01:37:38 +07:00
|
|
|
TCA_FLOWER_KEY_IP_TOS, /* u8 */
|
|
|
|
TCA_FLOWER_KEY_IP_TOS_MASK, /* u8 */
|
|
|
|
TCA_FLOWER_KEY_IP_TTL, /* u8 */
|
|
|
|
TCA_FLOWER_KEY_IP_TTL_MASK, /* u8 */
|
|
|
|
|
2018-07-06 12:38:16 +07:00
|
|
|
TCA_FLOWER_KEY_CVLAN_ID, /* be16 */
|
|
|
|
TCA_FLOWER_KEY_CVLAN_PRIO, /* u8 */
|
|
|
|
TCA_FLOWER_KEY_CVLAN_ETH_TYPE, /* be16 */
|
|
|
|
|
2018-07-17 23:27:18 +07:00
|
|
|
TCA_FLOWER_KEY_ENC_IP_TOS, /* u8 */
|
|
|
|
TCA_FLOWER_KEY_ENC_IP_TOS_MASK, /* u8 */
|
|
|
|
TCA_FLOWER_KEY_ENC_IP_TTL, /* u8 */
|
|
|
|
TCA_FLOWER_KEY_ENC_IP_TTL_MASK, /* u8 */
|
|
|
|
|
2018-08-07 22:36:01 +07:00
|
|
|
TCA_FLOWER_KEY_ENC_OPTS,
|
|
|
|
TCA_FLOWER_KEY_ENC_OPTS_MASK,
|
|
|
|
|
2018-09-07 21:22:21 +07:00
|
|
|
TCA_FLOWER_IN_HW_COUNT,
|
|
|
|
|
2018-11-13 07:15:55 +07:00
|
|
|
TCA_FLOWER_KEY_PORT_SRC_MIN, /* be16 */
|
|
|
|
TCA_FLOWER_KEY_PORT_SRC_MAX, /* be16 */
|
|
|
|
TCA_FLOWER_KEY_PORT_DST_MIN, /* be16 */
|
|
|
|
TCA_FLOWER_KEY_PORT_DST_MAX, /* be16 */
|
|
|
|
|
2019-07-09 14:30:50 +07:00
|
|
|
TCA_FLOWER_KEY_CT_STATE, /* u16 */
|
|
|
|
TCA_FLOWER_KEY_CT_STATE_MASK, /* u16 */
|
|
|
|
TCA_FLOWER_KEY_CT_ZONE, /* u16 */
|
|
|
|
TCA_FLOWER_KEY_CT_ZONE_MASK, /* u16 */
|
|
|
|
TCA_FLOWER_KEY_CT_MARK, /* u32 */
|
|
|
|
TCA_FLOWER_KEY_CT_MARK_MASK, /* u32 */
|
|
|
|
TCA_FLOWER_KEY_CT_LABELS, /* u128 */
|
|
|
|
TCA_FLOWER_KEY_CT_LABELS_MASK, /* u128 */
|
|
|
|
|
2015-05-12 19:56:21 +07:00
|
|
|
__TCA_FLOWER_MAX,
|
|
|
|
};
|
|
|
|
|
|
|
|
#define TCA_FLOWER_MAX (__TCA_FLOWER_MAX - 1)
|
|
|
|
|
2019-07-09 14:30:50 +07:00
|
|
|
enum {
|
|
|
|
TCA_FLOWER_KEY_CT_FLAGS_NEW = 1 << 0, /* Beginning of a new connection. */
|
|
|
|
TCA_FLOWER_KEY_CT_FLAGS_ESTABLISHED = 1 << 1, /* Part of an existing connection. */
|
|
|
|
TCA_FLOWER_KEY_CT_FLAGS_RELATED = 1 << 2, /* Related to an established connection. */
|
|
|
|
TCA_FLOWER_KEY_CT_FLAGS_TRACKED = 1 << 3, /* Conntrack has occurred. */
|
|
|
|
};
|
|
|
|
|
2018-08-07 22:36:01 +07:00
|
|
|
enum {
|
|
|
|
TCA_FLOWER_KEY_ENC_OPTS_UNSPEC,
|
|
|
|
TCA_FLOWER_KEY_ENC_OPTS_GENEVE, /* Nested
|
|
|
|
* TCA_FLOWER_KEY_ENC_OPT_GENEVE_
|
|
|
|
* attributes
|
|
|
|
*/
|
2019-11-21 17:03:28 +07:00
|
|
|
TCA_FLOWER_KEY_ENC_OPTS_VXLAN, /* Nested
|
|
|
|
* TCA_FLOWER_KEY_ENC_OPT_VXLAN_
|
|
|
|
* attributes
|
|
|
|
*/
|
2019-11-21 17:03:29 +07:00
|
|
|
TCA_FLOWER_KEY_ENC_OPTS_ERSPAN, /* Nested
|
|
|
|
* TCA_FLOWER_KEY_ENC_OPT_ERSPAN_
|
|
|
|
* attributes
|
|
|
|
*/
|
2018-08-07 22:36:01 +07:00
|
|
|
__TCA_FLOWER_KEY_ENC_OPTS_MAX,
|
|
|
|
};
|
|
|
|
|
|
|
|
#define TCA_FLOWER_KEY_ENC_OPTS_MAX (__TCA_FLOWER_KEY_ENC_OPTS_MAX - 1)
|
|
|
|
|
|
|
|
enum {
|
|
|
|
TCA_FLOWER_KEY_ENC_OPT_GENEVE_UNSPEC,
|
|
|
|
TCA_FLOWER_KEY_ENC_OPT_GENEVE_CLASS, /* u16 */
|
|
|
|
TCA_FLOWER_KEY_ENC_OPT_GENEVE_TYPE, /* u8 */
|
|
|
|
TCA_FLOWER_KEY_ENC_OPT_GENEVE_DATA, /* 4 to 128 bytes */
|
|
|
|
|
|
|
|
__TCA_FLOWER_KEY_ENC_OPT_GENEVE_MAX,
|
|
|
|
};
|
|
|
|
|
|
|
|
#define TCA_FLOWER_KEY_ENC_OPT_GENEVE_MAX \
|
|
|
|
(__TCA_FLOWER_KEY_ENC_OPT_GENEVE_MAX - 1)
|
|
|
|
|
2019-11-21 17:03:28 +07:00
|
|
|
enum {
|
|
|
|
TCA_FLOWER_KEY_ENC_OPT_VXLAN_UNSPEC,
|
|
|
|
TCA_FLOWER_KEY_ENC_OPT_VXLAN_GBP, /* u32 */
|
|
|
|
__TCA_FLOWER_KEY_ENC_OPT_VXLAN_MAX,
|
|
|
|
};
|
|
|
|
|
|
|
|
#define TCA_FLOWER_KEY_ENC_OPT_VXLAN_MAX \
|
|
|
|
(__TCA_FLOWER_KEY_ENC_OPT_VXLAN_MAX - 1)
|
|
|
|
|
2019-11-21 17:03:29 +07:00
|
|
|
enum {
|
|
|
|
TCA_FLOWER_KEY_ENC_OPT_ERSPAN_UNSPEC,
|
|
|
|
TCA_FLOWER_KEY_ENC_OPT_ERSPAN_VER, /* u8 */
|
|
|
|
TCA_FLOWER_KEY_ENC_OPT_ERSPAN_INDEX, /* be32 */
|
|
|
|
TCA_FLOWER_KEY_ENC_OPT_ERSPAN_DIR, /* u8 */
|
|
|
|
TCA_FLOWER_KEY_ENC_OPT_ERSPAN_HWID, /* u8 */
|
|
|
|
__TCA_FLOWER_KEY_ENC_OPT_ERSPAN_MAX,
|
|
|
|
};
|
|
|
|
|
|
|
|
#define TCA_FLOWER_KEY_ENC_OPT_ERSPAN_MAX \
|
|
|
|
(__TCA_FLOWER_KEY_ENC_OPT_ERSPAN_MAX - 1)
|
|
|
|
|
2016-12-07 19:03:10 +07:00
|
|
|
enum {
|
|
|
|
TCA_FLOWER_KEY_FLAGS_IS_FRAGMENT = (1 << 0),
|
2018-03-07 00:11:14 +07:00
|
|
|
TCA_FLOWER_KEY_FLAGS_FRAG_IS_FIRST = (1 << 1),
|
2016-12-07 19:03:10 +07:00
|
|
|
};
|
|
|
|
|
2018-11-13 07:15:55 +07:00
|
|
|
#define TCA_FLOWER_MASK_FLAGS_RANGE (1 << 0) /* Range-based match */
|
|
|
|
|
2016-07-21 17:03:11 +07:00
|
|
|
/* Match-all classifier */
|
|
|
|
|
2019-01-18 03:44:25 +07:00
|
|
|
struct tc_matchall_pcnt {
|
|
|
|
__u64 rhit;
|
|
|
|
};
|
|
|
|
|
2016-07-21 17:03:11 +07:00
|
|
|
enum {
|
|
|
|
TCA_MATCHALL_UNSPEC,
|
|
|
|
TCA_MATCHALL_CLASSID,
|
|
|
|
TCA_MATCHALL_ACT,
|
2016-07-21 17:03:12 +07:00
|
|
|
TCA_MATCHALL_FLAGS,
|
2019-01-18 03:44:25 +07:00
|
|
|
TCA_MATCHALL_PCNT,
|
|
|
|
TCA_MATCHALL_PAD,
|
2016-07-21 17:03:11 +07:00
|
|
|
__TCA_MATCHALL_MAX,
|
|
|
|
};
|
|
|
|
|
|
|
|
#define TCA_MATCHALL_MAX (__TCA_MATCHALL_MAX - 1)
|
|
|
|
|
2005-04-17 05:20:36 +07:00
|
|
|
/* Extended Matches */
|
|
|
|
|
2009-11-05 00:50:58 +07:00
|
|
|
struct tcf_ematch_tree_hdr {
|
2005-04-17 05:20:36 +07:00
|
|
|
__u16 nmatches;
|
|
|
|
__u16 progid;
|
|
|
|
};
|
|
|
|
|
2009-11-05 00:50:58 +07:00
|
|
|
enum {
|
2005-04-17 05:20:36 +07:00
|
|
|
TCA_EMATCH_TREE_UNSPEC,
|
|
|
|
TCA_EMATCH_TREE_HDR,
|
|
|
|
TCA_EMATCH_TREE_LIST,
|
|
|
|
__TCA_EMATCH_TREE_MAX
|
|
|
|
};
|
|
|
|
#define TCA_EMATCH_TREE_MAX (__TCA_EMATCH_TREE_MAX - 1)
|
|
|
|
|
2009-11-05 00:50:58 +07:00
|
|
|
struct tcf_ematch_hdr {
|
2005-04-17 05:20:36 +07:00
|
|
|
__u16 matchid;
|
|
|
|
__u16 kind;
|
|
|
|
__u16 flags;
|
|
|
|
__u16 pad; /* currently unused */
|
|
|
|
};
|
|
|
|
|
|
|
|
/* 0 1
|
|
|
|
* 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5
|
|
|
|
* +-----------------------+-+-+---+
|
|
|
|
* | Unused |S|I| R |
|
|
|
|
* +-----------------------+-+-+---+
|
|
|
|
*
|
|
|
|
* R(2) ::= relation to next ematch
|
|
|
|
* where: 0 0 END (last ematch)
|
|
|
|
* 0 1 AND
|
|
|
|
* 1 0 OR
|
|
|
|
* 1 1 Unused (invalid)
|
|
|
|
* I(1) ::= invert result
|
|
|
|
* S(1) ::= simple payload
|
|
|
|
*/
|
|
|
|
#define TCF_EM_REL_END 0
|
|
|
|
#define TCF_EM_REL_AND (1<<0)
|
|
|
|
#define TCF_EM_REL_OR (1<<1)
|
|
|
|
#define TCF_EM_INVERT (1<<2)
|
|
|
|
#define TCF_EM_SIMPLE (1<<3)
|
|
|
|
|
|
|
|
#define TCF_EM_REL_MASK 3
|
|
|
|
#define TCF_EM_REL_VALID(v) (((v) & TCF_EM_REL_MASK) != TCF_EM_REL_MASK)
|
|
|
|
|
2009-11-05 00:50:58 +07:00
|
|
|
enum {
|
2005-04-17 05:20:36 +07:00
|
|
|
TCF_LAYER_LINK,
|
|
|
|
TCF_LAYER_NETWORK,
|
|
|
|
TCF_LAYER_TRANSPORT,
|
|
|
|
__TCF_LAYER_MAX
|
|
|
|
};
|
|
|
|
#define TCF_LAYER_MAX (__TCF_LAYER_MAX - 1)
|
|
|
|
|
|
|
|
/* Ematch type assignments
|
|
|
|
* 1..32767 Reserved for ematches inside kernel tree
|
|
|
|
* 32768..65535 Free to use, not reliable
|
|
|
|
*/
|
2007-07-12 09:46:26 +07:00
|
|
|
#define TCF_EM_CONTAINER 0
|
|
|
|
#define TCF_EM_CMP 1
|
|
|
|
#define TCF_EM_NBYTE 2
|
|
|
|
#define TCF_EM_U32 3
|
|
|
|
#define TCF_EM_META 4
|
|
|
|
#define TCF_EM_TEXT 5
|
2012-07-04 10:32:03 +07:00
|
|
|
#define TCF_EM_VLAN 6
|
|
|
|
#define TCF_EM_CANID 7
|
2012-07-11 17:56:57 +07:00
|
|
|
#define TCF_EM_IPSET 8
|
2018-02-16 00:42:43 +07:00
|
|
|
#define TCF_EM_IPT 9
|
|
|
|
#define TCF_EM_MAX 9
|
2005-04-17 05:20:36 +07:00
|
|
|
|
2009-11-05 00:50:58 +07:00
|
|
|
enum {
|
2005-04-17 05:20:36 +07:00
|
|
|
TCF_EM_PROG_TC
|
|
|
|
};
|
|
|
|
|
2009-11-05 00:50:58 +07:00
|
|
|
enum {
|
2005-04-17 05:20:36 +07:00
|
|
|
TCF_EM_OPND_EQ,
|
|
|
|
TCF_EM_OPND_GT,
|
|
|
|
TCF_EM_OPND_LT
|
|
|
|
};
|
|
|
|
|
|
|
|
#endif
|