mirror of
https://github.com/AuxXxilium/linux_dsm_epyc7002.git
synced 2024-12-18 15:46:42 +07:00
d74bad4e74
== The problem == See description of the problem in the initial patch of this patch set. == The solution == The patch provides much more reliable in-kernel solution for the 2nd part of the problem: making outgoing connecttion from desired IP. It adds new attach types `BPF_CGROUP_INET4_CONNECT` and `BPF_CGROUP_INET6_CONNECT` for program type `BPF_PROG_TYPE_CGROUP_SOCK_ADDR` that can be used to override both source and destination of a connection at connect(2) time. Local end of connection can be bound to desired IP using newly introduced BPF-helper `bpf_bind()`. It allows to bind to only IP though, and doesn't support binding to port, i.e. leverages `IP_BIND_ADDRESS_NO_PORT` socket option. There are two reasons for this: * looking for a free port is expensive and can affect performance significantly; * there is no use-case for port. As for remote end (`struct sockaddr *` passed by user), both parts of it can be overridden, remote IP and remote port. It's useful if an application inside cgroup wants to connect to another application inside same cgroup or to itself, but knows nothing about IP assigned to the cgroup. Support is added for IPv4 and IPv6, for TCP and UDP. IPv4 and IPv6 have separate attach types for same reason as sys_bind hooks, i.e. to prevent reading from / writing to e.g. user_ip6 fields when user passes sockaddr_in since it'd be out-of-bound. == Implementation notes == The patch introduces new field in `struct proto`: `pre_connect` that is a pointer to a function with same signature as `connect` but is called before it. The reason is in some cases BPF hooks should be called way before control is passed to `sk->sk_prot->connect`. Specifically `inet_dgram_connect` autobinds socket before calling `sk->sk_prot->connect` and there is no way to call `bpf_bind()` from hooks from e.g. `ip4_datagram_connect` or `ip6_datagram_connect` since it'd cause double-bind. On the other hand `proto.pre_connect` provides a flexible way to add BPF hooks for connect only for necessary `proto` and call them at desired time before `connect`. Since `bpf_bind()` is allowed to bind only to IP and autobind in `inet_dgram_connect` binds only port there is no chance of double-bind. bpf_bind() sets `force_bind_address_no_port` to bind to only IP despite of value of `bind_address_no_port` socket field. bpf_bind() sets `with_lock` to `false` when calling to __inet_bind() and __inet6_bind() since all call-sites, where bpf_bind() is called, already hold socket lock. Signed-off-by: Andrey Ignatov <rdna@fb.com> Signed-off-by: Alexei Starovoitov <ast@kernel.org> Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
196 lines
6.5 KiB
C
196 lines
6.5 KiB
C
/* SPDX-License-Identifier: GPL-2.0 */
|
|
#ifndef _BPF_CGROUP_H
|
|
#define _BPF_CGROUP_H
|
|
|
|
#include <linux/jump_label.h>
|
|
#include <uapi/linux/bpf.h>
|
|
|
|
struct sock;
|
|
struct sockaddr;
|
|
struct cgroup;
|
|
struct sk_buff;
|
|
struct bpf_sock_ops_kern;
|
|
|
|
#ifdef CONFIG_CGROUP_BPF
|
|
|
|
extern struct static_key_false cgroup_bpf_enabled_key;
|
|
#define cgroup_bpf_enabled static_branch_unlikely(&cgroup_bpf_enabled_key)
|
|
|
|
struct bpf_prog_list {
|
|
struct list_head node;
|
|
struct bpf_prog *prog;
|
|
};
|
|
|
|
struct bpf_prog_array;
|
|
|
|
struct cgroup_bpf {
|
|
/* array of effective progs in this cgroup */
|
|
struct bpf_prog_array __rcu *effective[MAX_BPF_ATTACH_TYPE];
|
|
|
|
/* attached progs to this cgroup and attach flags
|
|
* when flags == 0 or BPF_F_ALLOW_OVERRIDE the progs list will
|
|
* have either zero or one element
|
|
* when BPF_F_ALLOW_MULTI the list can have up to BPF_CGROUP_MAX_PROGS
|
|
*/
|
|
struct list_head progs[MAX_BPF_ATTACH_TYPE];
|
|
u32 flags[MAX_BPF_ATTACH_TYPE];
|
|
|
|
/* temp storage for effective prog array used by prog_attach/detach */
|
|
struct bpf_prog_array __rcu *inactive;
|
|
};
|
|
|
|
void cgroup_bpf_put(struct cgroup *cgrp);
|
|
int cgroup_bpf_inherit(struct cgroup *cgrp);
|
|
|
|
int __cgroup_bpf_attach(struct cgroup *cgrp, struct bpf_prog *prog,
|
|
enum bpf_attach_type type, u32 flags);
|
|
int __cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog,
|
|
enum bpf_attach_type type, u32 flags);
|
|
int __cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr,
|
|
union bpf_attr __user *uattr);
|
|
|
|
/* Wrapper for __cgroup_bpf_*() protected by cgroup_mutex */
|
|
int cgroup_bpf_attach(struct cgroup *cgrp, struct bpf_prog *prog,
|
|
enum bpf_attach_type type, u32 flags);
|
|
int cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog,
|
|
enum bpf_attach_type type, u32 flags);
|
|
int cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr,
|
|
union bpf_attr __user *uattr);
|
|
|
|
int __cgroup_bpf_run_filter_skb(struct sock *sk,
|
|
struct sk_buff *skb,
|
|
enum bpf_attach_type type);
|
|
|
|
int __cgroup_bpf_run_filter_sk(struct sock *sk,
|
|
enum bpf_attach_type type);
|
|
|
|
int __cgroup_bpf_run_filter_sock_addr(struct sock *sk,
|
|
struct sockaddr *uaddr,
|
|
enum bpf_attach_type type);
|
|
|
|
int __cgroup_bpf_run_filter_sock_ops(struct sock *sk,
|
|
struct bpf_sock_ops_kern *sock_ops,
|
|
enum bpf_attach_type type);
|
|
|
|
int __cgroup_bpf_check_dev_permission(short dev_type, u32 major, u32 minor,
|
|
short access, enum bpf_attach_type type);
|
|
|
|
/* Wrappers for __cgroup_bpf_run_filter_skb() guarded by cgroup_bpf_enabled. */
|
|
#define BPF_CGROUP_RUN_PROG_INET_INGRESS(sk, skb) \
|
|
({ \
|
|
int __ret = 0; \
|
|
if (cgroup_bpf_enabled) \
|
|
__ret = __cgroup_bpf_run_filter_skb(sk, skb, \
|
|
BPF_CGROUP_INET_INGRESS); \
|
|
\
|
|
__ret; \
|
|
})
|
|
|
|
#define BPF_CGROUP_RUN_PROG_INET_EGRESS(sk, skb) \
|
|
({ \
|
|
int __ret = 0; \
|
|
if (cgroup_bpf_enabled && sk && sk == skb->sk) { \
|
|
typeof(sk) __sk = sk_to_full_sk(sk); \
|
|
if (sk_fullsock(__sk)) \
|
|
__ret = __cgroup_bpf_run_filter_skb(__sk, skb, \
|
|
BPF_CGROUP_INET_EGRESS); \
|
|
} \
|
|
__ret; \
|
|
})
|
|
|
|
#define BPF_CGROUP_RUN_PROG_INET_SOCK(sk) \
|
|
({ \
|
|
int __ret = 0; \
|
|
if (cgroup_bpf_enabled) { \
|
|
__ret = __cgroup_bpf_run_filter_sk(sk, \
|
|
BPF_CGROUP_INET_SOCK_CREATE); \
|
|
} \
|
|
__ret; \
|
|
})
|
|
|
|
#define BPF_CGROUP_RUN_SA_PROG(sk, uaddr, type) \
|
|
({ \
|
|
int __ret = 0; \
|
|
if (cgroup_bpf_enabled) \
|
|
__ret = __cgroup_bpf_run_filter_sock_addr(sk, uaddr, type); \
|
|
__ret; \
|
|
})
|
|
|
|
#define BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, type) \
|
|
({ \
|
|
int __ret = 0; \
|
|
if (cgroup_bpf_enabled) { \
|
|
lock_sock(sk); \
|
|
__ret = __cgroup_bpf_run_filter_sock_addr(sk, uaddr, type); \
|
|
release_sock(sk); \
|
|
} \
|
|
__ret; \
|
|
})
|
|
|
|
#define BPF_CGROUP_RUN_PROG_INET4_BIND(sk, uaddr) \
|
|
BPF_CGROUP_RUN_SA_PROG(sk, uaddr, BPF_CGROUP_INET4_BIND)
|
|
|
|
#define BPF_CGROUP_RUN_PROG_INET6_BIND(sk, uaddr) \
|
|
BPF_CGROUP_RUN_SA_PROG(sk, uaddr, BPF_CGROUP_INET6_BIND)
|
|
|
|
#define BPF_CGROUP_PRE_CONNECT_ENABLED(sk) (cgroup_bpf_enabled && \
|
|
sk->sk_prot->pre_connect)
|
|
|
|
#define BPF_CGROUP_RUN_PROG_INET4_CONNECT(sk, uaddr) \
|
|
BPF_CGROUP_RUN_SA_PROG(sk, uaddr, BPF_CGROUP_INET4_CONNECT)
|
|
|
|
#define BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr) \
|
|
BPF_CGROUP_RUN_SA_PROG(sk, uaddr, BPF_CGROUP_INET6_CONNECT)
|
|
|
|
#define BPF_CGROUP_RUN_PROG_INET4_CONNECT_LOCK(sk, uaddr) \
|
|
BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, BPF_CGROUP_INET4_CONNECT)
|
|
|
|
#define BPF_CGROUP_RUN_PROG_INET6_CONNECT_LOCK(sk, uaddr) \
|
|
BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, BPF_CGROUP_INET6_CONNECT)
|
|
|
|
#define BPF_CGROUP_RUN_PROG_SOCK_OPS(sock_ops) \
|
|
({ \
|
|
int __ret = 0; \
|
|
if (cgroup_bpf_enabled && (sock_ops)->sk) { \
|
|
typeof(sk) __sk = sk_to_full_sk((sock_ops)->sk); \
|
|
if (__sk && sk_fullsock(__sk)) \
|
|
__ret = __cgroup_bpf_run_filter_sock_ops(__sk, \
|
|
sock_ops, \
|
|
BPF_CGROUP_SOCK_OPS); \
|
|
} \
|
|
__ret; \
|
|
})
|
|
|
|
#define BPF_CGROUP_RUN_PROG_DEVICE_CGROUP(type, major, minor, access) \
|
|
({ \
|
|
int __ret = 0; \
|
|
if (cgroup_bpf_enabled) \
|
|
__ret = __cgroup_bpf_check_dev_permission(type, major, minor, \
|
|
access, \
|
|
BPF_CGROUP_DEVICE); \
|
|
\
|
|
__ret; \
|
|
})
|
|
#else
|
|
|
|
struct cgroup_bpf {};
|
|
static inline void cgroup_bpf_put(struct cgroup *cgrp) {}
|
|
static inline int cgroup_bpf_inherit(struct cgroup *cgrp) { return 0; }
|
|
|
|
#define BPF_CGROUP_PRE_CONNECT_ENABLED(sk) (0)
|
|
#define BPF_CGROUP_RUN_PROG_INET_INGRESS(sk,skb) ({ 0; })
|
|
#define BPF_CGROUP_RUN_PROG_INET_EGRESS(sk,skb) ({ 0; })
|
|
#define BPF_CGROUP_RUN_PROG_INET_SOCK(sk) ({ 0; })
|
|
#define BPF_CGROUP_RUN_PROG_INET4_BIND(sk, uaddr) ({ 0; })
|
|
#define BPF_CGROUP_RUN_PROG_INET6_BIND(sk, uaddr) ({ 0; })
|
|
#define BPF_CGROUP_RUN_PROG_INET4_CONNECT(sk, uaddr) ({ 0; })
|
|
#define BPF_CGROUP_RUN_PROG_INET4_CONNECT_LOCK(sk, uaddr) ({ 0; })
|
|
#define BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr) ({ 0; })
|
|
#define BPF_CGROUP_RUN_PROG_INET6_CONNECT_LOCK(sk, uaddr) ({ 0; })
|
|
#define BPF_CGROUP_RUN_PROG_SOCK_OPS(sock_ops) ({ 0; })
|
|
#define BPF_CGROUP_RUN_PROG_DEVICE_CGROUP(type,major,minor,access) ({ 0; })
|
|
|
|
#endif /* CONFIG_CGROUP_BPF */
|
|
|
|
#endif /* _BPF_CGROUP_H */
|