linux_dsm_epyc7002/include/uapi/linux/inet_diag.h
Martin KaFai Lau 085c20cacf bpf: inet_diag: Dump bpf_sk_storages in inet_diag_dump()
This patch will dump out the bpf_sk_storages of a sk
if the request has the INET_DIAG_REQ_SK_BPF_STORAGES nlattr.

An array of SK_DIAG_BPF_STORAGE_REQ_MAP_FD can be specified in
INET_DIAG_REQ_SK_BPF_STORAGES to select which bpf_sk_storage to dump.
If no map_fd is specified, all bpf_sk_storages of a sk will be dumped.

bpf_sk_storages can be added to the system at runtime.  It is difficult
to find a proper static value for cb->min_dump_alloc.

This patch learns the nlattr size required to dump the bpf_sk_storages
of a sk.  If it happens to be the very first nlmsg of a dump and it
cannot fit the needed bpf_sk_storages,  it will try to expand the
skb by "pskb_expand_head()".

Instead of expanding it in inet_sk_diag_fill(), it is expanded at a
sleepable context in __inet_diag_dump() so __GFP_DIRECT_RECLAIM can
be used.  In __inet_diag_dump(), it will retry as long as the
skb is empty and the cb->min_dump_alloc becomes larger than before.
cb->min_dump_alloc is bounded by KMALLOC_MAX_SIZE.  The min_dump_alloc
is also changed from 'u16' to 'u32' to accommodate a sk that may have
a few large bpf_sk_storages.

The updated cb->min_dump_alloc will also be used to allocate the skb in
the next dump.  This logic already exists in netlink_dump().

Here is the sample output of a locally modified 'ss' and it could be made
more readable by using BTF later:
[root@arch-fb-vm1 ~]# ss --bpf-map-id 14 --bpf-map-id 13 -t6an 'dst [::1]:8989'
State Recv-Q Send-Q Local Address:Port  Peer Address:PortProcess
ESTAB 0      0              [::1]:51072        [::1]:8989
	 bpf_map_id:14 value:[ 3feb ]
	 bpf_map_id:13 value:[ 3f ]
ESTAB 0      0              [::1]:51070        [::1]:8989
	 bpf_map_id:14 value:[ 3feb ]
	 bpf_map_id:13 value:[ 3f ]

[root@arch-fb-vm1 ~]# ~/devshare/github/iproute2/misc/ss --bpf-maps -t6an 'dst [::1]:8989'
State         Recv-Q         Send-Q                   Local Address:Port                    Peer Address:Port         Process
ESTAB         0              0                                [::1]:51072                          [::1]:8989
	 bpf_map_id:14 value:[ 3feb ]
	 bpf_map_id:13 value:[ 3f ]
	 bpf_map_id:12 value:[ 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000... total:65407 ]
ESTAB         0              0                                [::1]:51070                          [::1]:8989
	 bpf_map_id:14 value:[ 3feb ]
	 bpf_map_id:13 value:[ 3f ]
	 bpf_map_id:12 value:[ 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000... total:65407 ]

Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Song Liu <songliubraving@fb.com>
Link: https://lore.kernel.org/bpf/20200225230427.1976129-1-kafai@fb.com
2020-02-27 18:50:19 -08:00

218 lines
4.5 KiB
C

/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
#ifndef _UAPI_INET_DIAG_H_
#define _UAPI_INET_DIAG_H_
#include <linux/types.h>
/* Just some random number */
#define TCPDIAG_GETSOCK 18
#define DCCPDIAG_GETSOCK 19
#define INET_DIAG_GETSOCK_MAX 24
/* Socket identity */
struct inet_diag_sockid {
__be16 idiag_sport;
__be16 idiag_dport;
__be32 idiag_src[4];
__be32 idiag_dst[4];
__u32 idiag_if;
__u32 idiag_cookie[2];
#define INET_DIAG_NOCOOKIE (~0U)
};
/* Request structure */
struct inet_diag_req {
__u8 idiag_family; /* Family of addresses. */
__u8 idiag_src_len;
__u8 idiag_dst_len;
__u8 idiag_ext; /* Query extended information */
struct inet_diag_sockid id;
__u32 idiag_states; /* States to dump */
__u32 idiag_dbs; /* Tables to dump (NI) */
};
struct inet_diag_req_v2 {
__u8 sdiag_family;
__u8 sdiag_protocol;
__u8 idiag_ext;
__u8 pad;
__u32 idiag_states;
struct inet_diag_sockid id;
};
/*
* SOCK_RAW sockets require the underlied protocol to be
* additionally specified so we can use @pad member for
* this, but we can't rename it because userspace programs
* still may depend on this name. Instead lets use another
* structure definition as an alias for struct
* @inet_diag_req_v2.
*/
struct inet_diag_req_raw {
__u8 sdiag_family;
__u8 sdiag_protocol;
__u8 idiag_ext;
__u8 sdiag_raw_protocol;
__u32 idiag_states;
struct inet_diag_sockid id;
};
enum {
INET_DIAG_REQ_NONE,
INET_DIAG_REQ_BYTECODE,
INET_DIAG_REQ_SK_BPF_STORAGES,
__INET_DIAG_REQ_MAX,
};
#define INET_DIAG_REQ_MAX (__INET_DIAG_REQ_MAX - 1)
/* Bytecode is sequence of 4 byte commands followed by variable arguments.
* All the commands identified by "code" are conditional jumps forward:
* to offset cc+"yes" or to offset cc+"no". "yes" is supposed to be
* length of the command and its arguments.
*/
struct inet_diag_bc_op {
unsigned char code;
unsigned char yes;
unsigned short no;
};
enum {
INET_DIAG_BC_NOP,
INET_DIAG_BC_JMP,
INET_DIAG_BC_S_GE,
INET_DIAG_BC_S_LE,
INET_DIAG_BC_D_GE,
INET_DIAG_BC_D_LE,
INET_DIAG_BC_AUTO,
INET_DIAG_BC_S_COND,
INET_DIAG_BC_D_COND,
INET_DIAG_BC_DEV_COND, /* u32 ifindex */
INET_DIAG_BC_MARK_COND,
INET_DIAG_BC_S_EQ,
INET_DIAG_BC_D_EQ,
};
struct inet_diag_hostcond {
__u8 family;
__u8 prefix_len;
int port;
__be32 addr[0];
};
struct inet_diag_markcond {
__u32 mark;
__u32 mask;
};
/* Base info structure. It contains socket identity (addrs/ports/cookie)
* and, alas, the information shown by netstat. */
struct inet_diag_msg {
__u8 idiag_family;
__u8 idiag_state;
__u8 idiag_timer;
__u8 idiag_retrans;
struct inet_diag_sockid id;
__u32 idiag_expires;
__u32 idiag_rqueue;
__u32 idiag_wqueue;
__u32 idiag_uid;
__u32 idiag_inode;
};
/* Extensions */
enum {
INET_DIAG_NONE,
INET_DIAG_MEMINFO,
INET_DIAG_INFO,
INET_DIAG_VEGASINFO,
INET_DIAG_CONG,
INET_DIAG_TOS,
INET_DIAG_TCLASS,
INET_DIAG_SKMEMINFO,
INET_DIAG_SHUTDOWN,
/*
* Next extenstions cannot be requested in struct inet_diag_req_v2:
* its field idiag_ext has only 8 bits.
*/
INET_DIAG_DCTCPINFO, /* request as INET_DIAG_VEGASINFO */
INET_DIAG_PROTOCOL, /* response attribute only */
INET_DIAG_SKV6ONLY,
INET_DIAG_LOCALS,
INET_DIAG_PEERS,
INET_DIAG_PAD,
INET_DIAG_MARK, /* only with CAP_NET_ADMIN */
INET_DIAG_BBRINFO, /* request as INET_DIAG_VEGASINFO */
INET_DIAG_CLASS_ID, /* request as INET_DIAG_TCLASS */
INET_DIAG_MD5SIG,
INET_DIAG_ULP_INFO,
INET_DIAG_SK_BPF_STORAGES,
__INET_DIAG_MAX,
};
#define INET_DIAG_MAX (__INET_DIAG_MAX - 1)
enum {
INET_ULP_INFO_UNSPEC,
INET_ULP_INFO_NAME,
INET_ULP_INFO_TLS,
__INET_ULP_INFO_MAX,
};
#define INET_ULP_INFO_MAX (__INET_ULP_INFO_MAX - 1)
/* INET_DIAG_MEM */
struct inet_diag_meminfo {
__u32 idiag_rmem;
__u32 idiag_wmem;
__u32 idiag_fmem;
__u32 idiag_tmem;
};
/* INET_DIAG_VEGASINFO */
struct tcpvegas_info {
__u32 tcpv_enabled;
__u32 tcpv_rttcnt;
__u32 tcpv_rtt;
__u32 tcpv_minrtt;
};
/* INET_DIAG_DCTCPINFO */
struct tcp_dctcp_info {
__u16 dctcp_enabled;
__u16 dctcp_ce_state;
__u32 dctcp_alpha;
__u32 dctcp_ab_ecn;
__u32 dctcp_ab_tot;
};
/* INET_DIAG_BBRINFO */
struct tcp_bbr_info {
/* u64 bw: max-filtered BW (app throughput) estimate in Byte per sec: */
__u32 bbr_bw_lo; /* lower 32 bits of bw */
__u32 bbr_bw_hi; /* upper 32 bits of bw */
__u32 bbr_min_rtt; /* min-filtered RTT in uSec */
__u32 bbr_pacing_gain; /* pacing gain shifted left 8 bits */
__u32 bbr_cwnd_gain; /* cwnd gain shifted left 8 bits */
};
union tcp_cc_info {
struct tcpvegas_info vegas;
struct tcp_dctcp_info dctcp;
struct tcp_bbr_info bbr;
};
#endif /* _UAPI_INET_DIAG_H_ */