2017-11-01 21:08:43 +07:00
|
|
|
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
|
2005-04-17 05:20:36 +07:00
|
|
|
#ifndef __LINUX_IF_PACKET_H
|
|
|
|
#define __LINUX_IF_PACKET_H
|
|
|
|
|
2006-11-08 15:26:29 +07:00
|
|
|
#include <linux/types.h>
|
|
|
|
|
2009-11-05 00:50:58 +07:00
|
|
|
struct sockaddr_pkt {
|
2005-04-17 05:20:36 +07:00
|
|
|
unsigned short spkt_family;
|
|
|
|
unsigned char spkt_device[14];
|
2006-11-08 15:26:29 +07:00
|
|
|
__be16 spkt_protocol;
|
2005-04-17 05:20:36 +07:00
|
|
|
};
|
|
|
|
|
2009-11-05 00:50:58 +07:00
|
|
|
struct sockaddr_ll {
|
2005-04-17 05:20:36 +07:00
|
|
|
unsigned short sll_family;
|
2006-11-08 15:26:29 +07:00
|
|
|
__be16 sll_protocol;
|
2005-04-17 05:20:36 +07:00
|
|
|
int sll_ifindex;
|
|
|
|
unsigned short sll_hatype;
|
|
|
|
unsigned char sll_pkttype;
|
|
|
|
unsigned char sll_halen;
|
|
|
|
unsigned char sll_addr[8];
|
|
|
|
};
|
|
|
|
|
|
|
|
/* Packet types */
|
|
|
|
|
|
|
|
#define PACKET_HOST 0 /* To us */
|
|
|
|
#define PACKET_BROADCAST 1 /* To all */
|
|
|
|
#define PACKET_MULTICAST 2 /* To group */
|
|
|
|
#define PACKET_OTHERHOST 3 /* To someone else */
|
|
|
|
#define PACKET_OUTGOING 4 /* Outgoing of any type */
|
|
|
|
#define PACKET_LOOPBACK 5 /* MC/BRD frame looped back */
|
netlink: specify netlink packet direction for nlmon
In order to facilitate development for netlink protocol dissector,
fill the unused field skb->pkt_type of the cloned skb with a hint
of the address space of the new owner (receiver) socket in the
notion of "to kernel" resp. "to user".
At the time we invoke __netlink_deliver_tap_skb(), we already have
set the new skb owner via netlink_skb_set_owner_r(), so we can use
that for netlink_is_kernel() probing.
In normal PF_PACKET network traffic, this field denotes if the
packet is destined for us (PACKET_HOST), if it's broadcast
(PACKET_BROADCAST), etc.
As we only have 3 bit reserved, we can use the value (= 6) of
PACKET_FASTROUTE as it's _not used_ anywhere in the whole kernel
and not supported anywhere, and packets of such type were never
exposed to user space, so there are no overlapping users of such
kind. Thus, as wished, that seems the only way to make both
PACKET_* values non-overlapping and therefore device agnostic.
By using those two flags for netlink skbs on nlmon devices, they
can be made available and picked up via sll_pkttype (previously
unused in netlink context) in struct sockaddr_ll. We now have
these two directions:
- PACKET_USER (= 6) -> to user space
- PACKET_KERNEL (= 7) -> to kernel space
Partial `ip a` example strace for sa_family=AF_NETLINK with
detected nl msg direction:
syscall: direction:
sendto(3, ...) = 40 /* to kernel */
recvmsg(3, ...) = 3404 /* to user */
recvmsg(3, ...) = 1120 /* to user */
recvmsg(3, ...) = 20 /* to user */
sendto(3, ...) = 40 /* to kernel */
recvmsg(3, ...) = 168 /* to user */
recvmsg(3, ...) = 144 /* to user */
recvmsg(3, ...) = 20 /* to user */
Signed-off-by: Daniel Borkmann <dborkman@redhat.com>
Signed-off-by: Jakub Zawadzki <darkjames-ws@darkjames.pl>
Signed-off-by: David S. Miller <davem@davemloft.net>
2013-12-23 20:35:56 +07:00
|
|
|
#define PACKET_USER 6 /* To user space */
|
|
|
|
#define PACKET_KERNEL 7 /* To kernel space */
|
|
|
|
/* Unused, PACKET_FASTROUTE and PACKET_LOOPBACK are invisible to user space */
|
2005-04-17 05:20:36 +07:00
|
|
|
#define PACKET_FASTROUTE 6 /* Fastrouted frame */
|
|
|
|
|
|
|
|
/* Packet socket options */
|
|
|
|
|
|
|
|
#define PACKET_ADD_MEMBERSHIP 1
|
|
|
|
#define PACKET_DROP_MEMBERSHIP 2
|
|
|
|
#define PACKET_RECV_OUTPUT 3
|
|
|
|
/* Value 4 is still used by obsolete turbo-packet. */
|
|
|
|
#define PACKET_RX_RING 5
|
|
|
|
#define PACKET_STATISTICS 6
|
|
|
|
#define PACKET_COPY_THRESH 7
|
2007-02-05 14:31:32 +07:00
|
|
|
#define PACKET_AUXDATA 8
|
2007-04-21 06:05:39 +07:00
|
|
|
#define PACKET_ORIGDEV 9
|
2008-07-15 12:50:15 +07:00
|
|
|
#define PACKET_VERSION 10
|
|
|
|
#define PACKET_HDRLEN 11
|
2008-07-19 08:05:19 +07:00
|
|
|
#define PACKET_RESERVE 12
|
2009-05-19 12:11:22 +07:00
|
|
|
#define PACKET_TX_RING 13
|
|
|
|
#define PACKET_LOSS 14
|
2010-02-05 11:24:10 +07:00
|
|
|
#define PACKET_VNET_HDR 15
|
2010-04-08 05:41:28 +07:00
|
|
|
#define PACKET_TX_TIMESTAMP 16
|
2010-06-02 19:53:56 +07:00
|
|
|
#define PACKET_TIMESTAMP 17
|
2011-07-05 15:45:05 +07:00
|
|
|
#define PACKET_FANOUT 18
|
2012-11-07 06:10:47 +07:00
|
|
|
#define PACKET_TX_HAS_OFF 19
|
packet: introduce PACKET_QDISC_BYPASS socket option
This patch introduces a PACKET_QDISC_BYPASS socket option, that
allows for using a similar xmit() function as in pktgen instead
of taking the dev_queue_xmit() path. This can be very useful when
PF_PACKET applications are required to be used in a similar
scenario as pktgen, but with full, flexible packet payload that
needs to be provided, for example.
On default, nothing changes in behaviour for normal PF_PACKET
TX users, so everything stays as is for applications. New users,
however, can now set PACKET_QDISC_BYPASS if needed to prevent
own packets from i) reentering packet_rcv() and ii) to directly
push the frame to the driver.
In doing so we can increase pps (here 64 byte packets) for
PF_PACKET a bit:
# CPUs -- QDISC_BYPASS -- qdisc path -- qdisc path[**]
1 CPU == 1,509,628 pps -- 1,208,708 -- 1,247,436
2 CPUs == 3,198,659 pps -- 2,536,012 -- 1,605,779
3 CPUs == 4,787,992 pps -- 3,788,740 -- 1,735,610
4 CPUs == 6,173,956 pps -- 4,907,799 -- 1,909,114
5 CPUs == 7,495,676 pps -- 5,956,499 -- 2,014,422
6 CPUs == 9,001,496 pps -- 7,145,064 -- 2,155,261
7 CPUs == 10,229,776 pps -- 8,190,596 -- 2,220,619
8 CPUs == 11,040,732 pps -- 9,188,544 -- 2,241,879
9 CPUs == 12,009,076 pps -- 10,275,936 -- 2,068,447
10 CPUs == 11,380,052 pps -- 11,265,337 -- 1,578,689
11 CPUs == 11,672,676 pps -- 11,845,344 -- 1,297,412
[...]
20 CPUs == 11,363,192 pps -- 11,014,933 -- 1,245,081
[**]: qdisc path with packet_rcv(), how probably most people
seem to use it (hopefully not anymore if not needed)
The test was done using a modified trafgen, sending a simple
static 64 bytes packet, on all CPUs. The trick in the fast
"qdisc path" case, is to avoid reentering packet_rcv() by
setting the RAW socket protocol to zero, like:
socket(PF_PACKET, SOCK_RAW, 0);
Tradeoffs are documented as well in this patch, clearly, if
queues are busy, we will drop more packets, tc disciplines are
ignored, and these packets are not visible to taps anymore. For
a pktgen like scenario, we argue that this is acceptable.
The pointer to the xmit function has been placed in packet
socket structure hole between cached_dev and prot_hook that
is hot anyway as we're working on cached_dev in each send path.
Done in joint work together with Jesper Dangaard Brouer.
Signed-off-by: Daniel Borkmann <dborkman@redhat.com>
Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2013-12-06 17:36:17 +07:00
|
|
|
#define PACKET_QDISC_BYPASS 20
|
2015-05-12 22:56:50 +07:00
|
|
|
#define PACKET_ROLLOVER_STATS 21
|
2015-08-15 09:31:34 +07:00
|
|
|
#define PACKET_FANOUT_DATA 22
|
2018-09-03 21:23:36 +07:00
|
|
|
#define PACKET_IGNORE_OUTGOING 23
|
2011-07-05 15:45:05 +07:00
|
|
|
|
|
|
|
#define PACKET_FANOUT_HASH 0
|
|
|
|
#define PACKET_FANOUT_LB 1
|
2011-07-06 15:56:38 +07:00
|
|
|
#define PACKET_FANOUT_CPU 2
|
packet: packet fanout rollover during socket overload
Changes:
v3->v2: rebase (no other changes)
passes selftest
v2->v1: read f->num_members only once
fix bug: test rollover mode + flag
Minimize packet drop in a fanout group. If one socket is full,
roll over packets to another from the group. Maintain flow
affinity during normal load using an rxhash fanout policy, while
dispersing unexpected traffic storms that hit a single cpu, such
as spoofed-source DoS flows. Rollover breaks affinity for flows
arriving at saturated sockets during those conditions.
The patch adds a fanout policy ROLLOVER that rotates between sockets,
filling each socket before moving to the next. It also adds a fanout
flag ROLLOVER. If passed along with any other fanout policy, the
primary policy is applied until the chosen socket is full. Then,
rollover selects another socket, to delay packet drop until the
entire system is saturated.
Probing sockets is not free. Selecting the last used socket, as
rollover does, is a greedy approach that maximizes chance of
success, at the cost of extreme load imbalance. In practice, with
sufficiently long queues to absorb bursts, sockets are drained in
parallel and load balance looks uniform in `top`.
To avoid contention, scales counters with number of sockets and
accesses them lockfree. Values are bounds checked to ensure
correctness.
Tested using an application with 9 threads pinned to CPUs, one socket
per thread and sufficient busywork per packet operation to limits each
thread to handling 32 Kpps. When sent 500 Kpps single UDP stream
packets, a FANOUT_CPU setup processes 32 Kpps in total without this
patch, 270 Kpps with the patch. Tested with read() and with a packet
ring (V1).
Also, passes psock_fanout.c unit test added to selftests.
Signed-off-by: Willem de Bruijn <willemb@google.com>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2013-03-19 17:18:11 +07:00
|
|
|
#define PACKET_FANOUT_ROLLOVER 3
|
2013-08-29 03:13:09 +07:00
|
|
|
#define PACKET_FANOUT_RND 4
|
2014-01-23 04:01:44 +07:00
|
|
|
#define PACKET_FANOUT_QM 5
|
2015-08-15 09:31:34 +07:00
|
|
|
#define PACKET_FANOUT_CBPF 6
|
2015-08-15 09:31:35 +07:00
|
|
|
#define PACKET_FANOUT_EBPF 7
|
packet: packet fanout rollover during socket overload
Changes:
v3->v2: rebase (no other changes)
passes selftest
v2->v1: read f->num_members only once
fix bug: test rollover mode + flag
Minimize packet drop in a fanout group. If one socket is full,
roll over packets to another from the group. Maintain flow
affinity during normal load using an rxhash fanout policy, while
dispersing unexpected traffic storms that hit a single cpu, such
as spoofed-source DoS flows. Rollover breaks affinity for flows
arriving at saturated sockets during those conditions.
The patch adds a fanout policy ROLLOVER that rotates between sockets,
filling each socket before moving to the next. It also adds a fanout
flag ROLLOVER. If passed along with any other fanout policy, the
primary policy is applied until the chosen socket is full. Then,
rollover selects another socket, to delay packet drop until the
entire system is saturated.
Probing sockets is not free. Selecting the last used socket, as
rollover does, is a greedy approach that maximizes chance of
success, at the cost of extreme load imbalance. In practice, with
sufficiently long queues to absorb bursts, sockets are drained in
parallel and load balance looks uniform in `top`.
To avoid contention, scales counters with number of sockets and
accesses them lockfree. Values are bounds checked to ensure
correctness.
Tested using an application with 9 threads pinned to CPUs, one socket
per thread and sufficient busywork per packet operation to limits each
thread to handling 32 Kpps. When sent 500 Kpps single UDP stream
packets, a FANOUT_CPU setup processes 32 Kpps in total without this
patch, 270 Kpps with the patch. Tested with read() and with a packet
ring (V1).
Also, passes psock_fanout.c unit test added to selftests.
Signed-off-by: Willem de Bruijn <willemb@google.com>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2013-03-19 17:18:11 +07:00
|
|
|
#define PACKET_FANOUT_FLAG_ROLLOVER 0x1000
|
2017-04-21 21:56:11 +07:00
|
|
|
#define PACKET_FANOUT_FLAG_UNIQUEID 0x2000
|
2011-07-05 15:43:20 +07:00
|
|
|
#define PACKET_FANOUT_FLAG_DEFRAG 0x8000
|
2005-04-17 05:20:36 +07:00
|
|
|
|
2009-11-05 00:50:58 +07:00
|
|
|
struct tpacket_stats {
|
2005-04-17 05:20:36 +07:00
|
|
|
unsigned int tp_packets;
|
|
|
|
unsigned int tp_drops;
|
|
|
|
};
|
|
|
|
|
2011-08-19 17:18:15 +07:00
|
|
|
struct tpacket_stats_v3 {
|
|
|
|
unsigned int tp_packets;
|
|
|
|
unsigned int tp_drops;
|
|
|
|
unsigned int tp_freeze_q_cnt;
|
|
|
|
};
|
|
|
|
|
2015-05-12 22:56:50 +07:00
|
|
|
struct tpacket_rollover_stats {
|
|
|
|
__aligned_u64 tp_all;
|
|
|
|
__aligned_u64 tp_huge;
|
|
|
|
__aligned_u64 tp_failed;
|
|
|
|
};
|
|
|
|
|
2011-08-19 17:18:15 +07:00
|
|
|
union tpacket_stats_u {
|
|
|
|
struct tpacket_stats stats1;
|
|
|
|
struct tpacket_stats_v3 stats3;
|
|
|
|
};
|
|
|
|
|
2009-11-05 00:50:58 +07:00
|
|
|
struct tpacket_auxdata {
|
2007-02-05 14:31:32 +07:00
|
|
|
__u32 tp_status;
|
|
|
|
__u32 tp_len;
|
|
|
|
__u32 tp_snaplen;
|
|
|
|
__u16 tp_mac;
|
|
|
|
__u16 tp_net;
|
2008-07-15 12:50:39 +07:00
|
|
|
__u16 tp_vlan_tci;
|
2013-12-17 20:53:40 +07:00
|
|
|
__u16 tp_vlan_tpid;
|
2007-02-05 14:31:32 +07:00
|
|
|
};
|
|
|
|
|
2009-05-19 12:11:22 +07:00
|
|
|
/* Rx ring - header status */
|
2013-12-17 20:53:40 +07:00
|
|
|
#define TP_STATUS_KERNEL 0
|
|
|
|
#define TP_STATUS_USER (1 << 0)
|
|
|
|
#define TP_STATUS_COPY (1 << 1)
|
|
|
|
#define TP_STATUS_LOSING (1 << 2)
|
|
|
|
#define TP_STATUS_CSUMNOTREADY (1 << 3)
|
|
|
|
#define TP_STATUS_VLAN_VALID (1 << 4) /* auxdata has valid tp_vlan_tci */
|
|
|
|
#define TP_STATUS_BLK_TMO (1 << 5)
|
|
|
|
#define TP_STATUS_VLAN_TPID_VALID (1 << 6) /* auxdata has valid tp_vlan_tpid */
|
2015-03-23 13:11:13 +07:00
|
|
|
#define TP_STATUS_CSUM_VALID (1 << 7)
|
2009-05-19 12:11:22 +07:00
|
|
|
|
|
|
|
/* Tx ring - header status */
|
2013-04-23 07:39:30 +07:00
|
|
|
#define TP_STATUS_AVAILABLE 0
|
|
|
|
#define TP_STATUS_SEND_REQUEST (1 << 0)
|
|
|
|
#define TP_STATUS_SENDING (1 << 1)
|
|
|
|
#define TP_STATUS_WRONG_FORMAT (1 << 2)
|
2009-05-19 12:11:22 +07:00
|
|
|
|
packet: if hw/sw ts enabled in rx/tx ring, report which ts we got
Currently, there is no way to find out which timestamp is reported in
tpacket{,2,3}_hdr's tp_sec, tp_{n,u}sec members. It can be one of
SOF_TIMESTAMPING_SYS_HARDWARE, SOF_TIMESTAMPING_RAW_HARDWARE,
SOF_TIMESTAMPING_SOFTWARE, or a fallback variant late call from the
PF_PACKET code in software.
Therefore, report in the tp_status member of the ring buffer which
timestamp has been reported for RX and TX path. This should not break
anything for the following reasons: i) in RX ring path, the user needs
to test for tp_status & TP_STATUS_USER, and later for other flags as
well such as TP_STATUS_VLAN_VALID et al, so adding other flags will
do no harm; ii) in TX ring path, time stamps with PACKET_TIMESTAMP
socketoption are not available resp. had no effect except that the
application setting this is buggy. Next to TP_STATUS_AVAILABLE, the
user also should check for other flags such as TP_STATUS_WRONG_FORMAT
to reclaim frames to the application. Thus, in case TX ts are turned
off (default case), nothing happens to the application logic, and in
case we want to use this new feature, we now can also check which of
the ts source is reported in the status field as provided in the docs.
Reported-by: Richard Cochran <richardcochran@gmail.com>
Signed-off-by: Daniel Borkmann <dborkman@redhat.com>
Acked-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2013-04-23 07:39:31 +07:00
|
|
|
/* Rx and Tx ring - header status */
|
|
|
|
#define TP_STATUS_TS_SOFTWARE (1 << 29)
|
2014-07-26 05:01:31 +07:00
|
|
|
#define TP_STATUS_TS_SYS_HARDWARE (1 << 30) /* deprecated, never set */
|
2019-06-27 10:25:30 +07:00
|
|
|
#define TP_STATUS_TS_RAW_HARDWARE (1U << 31)
|
packet: if hw/sw ts enabled in rx/tx ring, report which ts we got
Currently, there is no way to find out which timestamp is reported in
tpacket{,2,3}_hdr's tp_sec, tp_{n,u}sec members. It can be one of
SOF_TIMESTAMPING_SYS_HARDWARE, SOF_TIMESTAMPING_RAW_HARDWARE,
SOF_TIMESTAMPING_SOFTWARE, or a fallback variant late call from the
PF_PACKET code in software.
Therefore, report in the tp_status member of the ring buffer which
timestamp has been reported for RX and TX path. This should not break
anything for the following reasons: i) in RX ring path, the user needs
to test for tp_status & TP_STATUS_USER, and later for other flags as
well such as TP_STATUS_VLAN_VALID et al, so adding other flags will
do no harm; ii) in TX ring path, time stamps with PACKET_TIMESTAMP
socketoption are not available resp. had no effect except that the
application setting this is buggy. Next to TP_STATUS_AVAILABLE, the
user also should check for other flags such as TP_STATUS_WRONG_FORMAT
to reclaim frames to the application. Thus, in case TX ts are turned
off (default case), nothing happens to the application logic, and in
case we want to use this new feature, we now can also check which of
the ts source is reported in the status field as provided in the docs.
Reported-by: Richard Cochran <richardcochran@gmail.com>
Signed-off-by: Daniel Borkmann <dborkman@redhat.com>
Acked-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2013-04-23 07:39:31 +07:00
|
|
|
|
2011-08-19 17:18:15 +07:00
|
|
|
/* Rx ring - feature request bits */
|
|
|
|
#define TP_FT_REQ_FILL_RXHASH 0x1
|
|
|
|
|
2009-11-05 00:50:58 +07:00
|
|
|
struct tpacket_hdr {
|
2005-04-17 05:20:36 +07:00
|
|
|
unsigned long tp_status;
|
|
|
|
unsigned int tp_len;
|
|
|
|
unsigned int tp_snaplen;
|
|
|
|
unsigned short tp_mac;
|
|
|
|
unsigned short tp_net;
|
|
|
|
unsigned int tp_sec;
|
|
|
|
unsigned int tp_usec;
|
|
|
|
};
|
|
|
|
|
|
|
|
#define TPACKET_ALIGNMENT 16
|
|
|
|
#define TPACKET_ALIGN(x) (((x)+TPACKET_ALIGNMENT-1)&~(TPACKET_ALIGNMENT-1))
|
|
|
|
#define TPACKET_HDRLEN (TPACKET_ALIGN(sizeof(struct tpacket_hdr)) + sizeof(struct sockaddr_ll))
|
|
|
|
|
2009-11-05 00:50:58 +07:00
|
|
|
struct tpacket2_hdr {
|
2008-07-15 12:50:15 +07:00
|
|
|
__u32 tp_status;
|
|
|
|
__u32 tp_len;
|
|
|
|
__u32 tp_snaplen;
|
|
|
|
__u16 tp_mac;
|
|
|
|
__u16 tp_net;
|
|
|
|
__u32 tp_sec;
|
|
|
|
__u32 tp_nsec;
|
2008-07-15 12:50:39 +07:00
|
|
|
__u16 tp_vlan_tci;
|
2013-12-17 20:53:40 +07:00
|
|
|
__u16 tp_vlan_tpid;
|
|
|
|
__u8 tp_padding[4];
|
2008-07-15 12:50:15 +07:00
|
|
|
};
|
|
|
|
|
2011-08-25 17:43:30 +07:00
|
|
|
struct tpacket_hdr_variant1 {
|
2011-08-19 17:18:15 +07:00
|
|
|
__u32 tp_rxhash;
|
|
|
|
__u32 tp_vlan_tci;
|
2013-12-17 20:53:40 +07:00
|
|
|
__u16 tp_vlan_tpid;
|
|
|
|
__u16 tp_padding;
|
2011-08-19 17:18:15 +07:00
|
|
|
};
|
|
|
|
|
|
|
|
struct tpacket3_hdr {
|
|
|
|
__u32 tp_next_offset;
|
|
|
|
__u32 tp_sec;
|
|
|
|
__u32 tp_nsec;
|
|
|
|
__u32 tp_snaplen;
|
|
|
|
__u32 tp_len;
|
|
|
|
__u32 tp_status;
|
|
|
|
__u16 tp_mac;
|
|
|
|
__u16 tp_net;
|
|
|
|
/* pkt_hdr variants */
|
|
|
|
union {
|
2011-08-25 17:43:30 +07:00
|
|
|
struct tpacket_hdr_variant1 hv1;
|
2011-08-19 17:18:15 +07:00
|
|
|
};
|
2013-12-17 20:53:40 +07:00
|
|
|
__u8 tp_padding[8];
|
2011-08-19 17:18:15 +07:00
|
|
|
};
|
|
|
|
|
2011-08-25 17:43:30 +07:00
|
|
|
struct tpacket_bd_ts {
|
2011-08-19 17:18:15 +07:00
|
|
|
unsigned int ts_sec;
|
|
|
|
union {
|
|
|
|
unsigned int ts_usec;
|
|
|
|
unsigned int ts_nsec;
|
|
|
|
};
|
|
|
|
};
|
|
|
|
|
2011-08-25 17:43:30 +07:00
|
|
|
struct tpacket_hdr_v1 {
|
2011-08-19 17:18:15 +07:00
|
|
|
__u32 block_status;
|
|
|
|
__u32 num_pkts;
|
|
|
|
__u32 offset_to_first_pkt;
|
|
|
|
|
|
|
|
/* Number of valid bytes (including padding)
|
|
|
|
* blk_len <= tp_block_size
|
|
|
|
*/
|
|
|
|
__u32 blk_len;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Quite a few uses of sequence number:
|
|
|
|
* 1. Make sure cache flush etc worked.
|
|
|
|
* Well, one can argue - why not use the increasing ts below?
|
|
|
|
* But look at 2. below first.
|
|
|
|
* 2. When you pass around blocks to other user space decoders,
|
|
|
|
* you can see which blk[s] is[are] outstanding etc.
|
|
|
|
* 3. Validate kernel code.
|
|
|
|
*/
|
2011-09-30 09:09:54 +07:00
|
|
|
__aligned_u64 seq_num;
|
2011-08-19 17:18:15 +07:00
|
|
|
|
|
|
|
/*
|
|
|
|
* ts_last_pkt:
|
|
|
|
*
|
|
|
|
* Case 1. Block has 'N'(N >=1) packets and TMO'd(timed out)
|
|
|
|
* ts_last_pkt == 'time-stamp of last packet' and NOT the
|
|
|
|
* time when the timer fired and the block was closed.
|
|
|
|
* By providing the ts of the last packet we can absolutely
|
|
|
|
* guarantee that time-stamp wise, the first packet in the
|
|
|
|
* next block will never precede the last packet of the
|
|
|
|
* previous block.
|
|
|
|
* Case 2. Block has zero packets and TMO'd
|
|
|
|
* ts_last_pkt = time when the timer fired and the block
|
|
|
|
* was closed.
|
|
|
|
* Case 3. Block has 'N' packets and NO TMO.
|
|
|
|
* ts_last_pkt = time-stamp of the last pkt in the block.
|
|
|
|
*
|
|
|
|
* ts_first_pkt:
|
|
|
|
* Is always the time-stamp when the block was opened.
|
|
|
|
* Case a) ZERO packets
|
|
|
|
* No packets to deal with but atleast you know the
|
|
|
|
* time-interval of this block.
|
|
|
|
* Case b) Non-zero packets
|
|
|
|
* Use the ts of the first packet in the block.
|
|
|
|
*
|
|
|
|
*/
|
2011-08-25 17:43:30 +07:00
|
|
|
struct tpacket_bd_ts ts_first_pkt, ts_last_pkt;
|
2011-08-19 17:18:15 +07:00
|
|
|
};
|
|
|
|
|
2011-08-25 17:43:30 +07:00
|
|
|
union tpacket_bd_header_u {
|
|
|
|
struct tpacket_hdr_v1 bh1;
|
2011-08-19 17:18:15 +07:00
|
|
|
};
|
|
|
|
|
2011-08-25 17:43:30 +07:00
|
|
|
struct tpacket_block_desc {
|
2011-08-19 17:18:15 +07:00
|
|
|
__u32 version;
|
|
|
|
__u32 offset_to_priv;
|
2011-08-25 17:43:30 +07:00
|
|
|
union tpacket_bd_header_u hdr;
|
2011-08-19 17:18:15 +07:00
|
|
|
};
|
|
|
|
|
2008-07-15 12:50:15 +07:00
|
|
|
#define TPACKET2_HDRLEN (TPACKET_ALIGN(sizeof(struct tpacket2_hdr)) + sizeof(struct sockaddr_ll))
|
2011-08-19 17:18:15 +07:00
|
|
|
#define TPACKET3_HDRLEN (TPACKET_ALIGN(sizeof(struct tpacket3_hdr)) + sizeof(struct sockaddr_ll))
|
2008-07-15 12:50:15 +07:00
|
|
|
|
2009-11-05 00:50:58 +07:00
|
|
|
enum tpacket_versions {
|
2008-07-15 12:50:15 +07:00
|
|
|
TPACKET_V1,
|
|
|
|
TPACKET_V2,
|
2011-08-19 17:18:15 +07:00
|
|
|
TPACKET_V3
|
2008-07-15 12:50:15 +07:00
|
|
|
};
|
|
|
|
|
2005-04-17 05:20:36 +07:00
|
|
|
/*
|
|
|
|
Frame structure:
|
|
|
|
|
|
|
|
- Start. Frame must be aligned to TPACKET_ALIGNMENT=16
|
|
|
|
- struct tpacket_hdr
|
|
|
|
- pad to TPACKET_ALIGNMENT=16
|
|
|
|
- struct sockaddr_ll
|
|
|
|
- Gap, chosen so that packet data (Start+tp_net) alignes to TPACKET_ALIGNMENT=16
|
|
|
|
- Start+tp_mac: [ Optional MAC header ]
|
|
|
|
- Start+tp_net: Packet data, aligned to TPACKET_ALIGNMENT=16.
|
|
|
|
- Pad to align to TPACKET_ALIGNMENT=16
|
|
|
|
*/
|
|
|
|
|
2009-11-05 00:50:58 +07:00
|
|
|
struct tpacket_req {
|
2005-04-17 05:20:36 +07:00
|
|
|
unsigned int tp_block_size; /* Minimal size of contiguous block */
|
|
|
|
unsigned int tp_block_nr; /* Number of blocks */
|
|
|
|
unsigned int tp_frame_size; /* Size of frame */
|
|
|
|
unsigned int tp_frame_nr; /* Total number of frames */
|
|
|
|
};
|
|
|
|
|
2011-08-19 17:18:15 +07:00
|
|
|
struct tpacket_req3 {
|
|
|
|
unsigned int tp_block_size; /* Minimal size of contiguous block */
|
|
|
|
unsigned int tp_block_nr; /* Number of blocks */
|
|
|
|
unsigned int tp_frame_size; /* Size of frame */
|
|
|
|
unsigned int tp_frame_nr; /* Total number of frames */
|
|
|
|
unsigned int tp_retire_blk_tov; /* timeout in msecs */
|
|
|
|
unsigned int tp_sizeof_priv; /* offset to private data area */
|
|
|
|
unsigned int tp_feature_req_word;
|
|
|
|
};
|
|
|
|
|
|
|
|
union tpacket_req_u {
|
|
|
|
struct tpacket_req req;
|
|
|
|
struct tpacket_req3 req3;
|
|
|
|
};
|
|
|
|
|
2009-11-05 00:50:58 +07:00
|
|
|
struct packet_mreq {
|
2005-04-17 05:20:36 +07:00
|
|
|
int mr_ifindex;
|
|
|
|
unsigned short mr_type;
|
|
|
|
unsigned short mr_alen;
|
|
|
|
unsigned char mr_address[8];
|
|
|
|
};
|
|
|
|
|
|
|
|
#define PACKET_MR_MULTICAST 0
|
|
|
|
#define PACKET_MR_PROMISC 1
|
|
|
|
#define PACKET_MR_ALLMULTI 2
|
2009-05-20 01:27:17 +07:00
|
|
|
#define PACKET_MR_UNICAST 3
|
2005-04-17 05:20:36 +07:00
|
|
|
|
|
|
|
#endif
|