mirror of
https://github.com/AuxXxilium/linux_dsm_epyc7002.git
synced 2025-01-13 19:56:07 +07:00
d83525ca62
Introduce 'struct bpf_spin_lock' and bpf_spin_lock/unlock() helpers to let bpf program serialize access to other variables. Example: struct hash_elem { int cnt; struct bpf_spin_lock lock; }; struct hash_elem * val = bpf_map_lookup_elem(&hash_map, &key); if (val) { bpf_spin_lock(&val->lock); val->cnt++; bpf_spin_unlock(&val->lock); } Restrictions and safety checks: - bpf_spin_lock is only allowed inside HASH and ARRAY maps. - BTF description of the map is mandatory for safety analysis. - bpf program can take one bpf_spin_lock at a time, since two or more can cause dead locks. - only one 'struct bpf_spin_lock' is allowed per map element. It drastically simplifies implementation yet allows bpf program to use any number of bpf_spin_locks. - when bpf_spin_lock is taken the calls (either bpf2bpf or helpers) are not allowed. - bpf program must bpf_spin_unlock() before return. - bpf program can access 'struct bpf_spin_lock' only via bpf_spin_lock()/bpf_spin_unlock() helpers. - load/store into 'struct bpf_spin_lock lock;' field is not allowed. - to use bpf_spin_lock() helper the BTF description of map value must be a struct and have 'struct bpf_spin_lock anyname;' field at the top level. Nested lock inside another struct is not allowed. - syscall map_lookup doesn't copy bpf_spin_lock field to user space. - syscall map_update and program map_update do not update bpf_spin_lock field. - bpf_spin_lock cannot be on the stack or inside networking packet. bpf_spin_lock can only be inside HASH or ARRAY map value. - bpf_spin_lock is available to root only and to all program types. - bpf_spin_lock is not allowed in inner maps of map-in-map. - ld_abs is not allowed inside spin_lock-ed region. - tracing progs and socket filter progs cannot use bpf_spin_lock due to insufficient preemption checks Implementation details: - cgroup-bpf class of programs can nest with xdp/tc programs. Hence bpf_spin_lock is equivalent to spin_lock_irqsave. Other solutions to avoid nested bpf_spin_lock are possible. Like making sure that all networking progs run with softirq disabled. spin_lock_irqsave is the simplest and doesn't add overhead to the programs that don't use it. - arch_spinlock_t is used when its implemented as queued_spin_lock - archs can force their own arch_spinlock_t - on architectures where queued_spin_lock is not available and sizeof(arch_spinlock_t) != sizeof(__u32) trivial lock is used. - presence of bpf_spin_lock inside map value could have been indicated via extra flag during map_create, but specifying it via BTF is cleaner. It provides introspection for map key/value and reduces user mistakes. Next steps: - allow bpf_spin_lock in other map types (like cgroup local storage) - introduce BPF_F_LOCK flag for bpf_map_update() syscall and helper to request kernel to grab bpf_spin_lock before rewriting the value. That will serialize access to map elements. Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org> Signed-off-by: Alexei Starovoitov <ast@kernel.org> Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
72 lines
2.3 KiB
C
72 lines
2.3 KiB
C
/* SPDX-License-Identifier: GPL-2.0 */
|
|
/* Copyright (c) 2018 Facebook */
|
|
|
|
#ifndef _LINUX_BTF_H
|
|
#define _LINUX_BTF_H 1
|
|
|
|
#include <linux/types.h>
|
|
|
|
struct btf;
|
|
struct btf_member;
|
|
struct btf_type;
|
|
union bpf_attr;
|
|
|
|
extern const struct file_operations btf_fops;
|
|
|
|
void btf_put(struct btf *btf);
|
|
int btf_new_fd(const union bpf_attr *attr);
|
|
struct btf *btf_get_by_fd(int fd);
|
|
int btf_get_info_by_fd(const struct btf *btf,
|
|
const union bpf_attr *attr,
|
|
union bpf_attr __user *uattr);
|
|
/* Figure out the size of a type_id. If type_id is a modifier
|
|
* (e.g. const), it will be resolved to find out the type with size.
|
|
*
|
|
* For example:
|
|
* In describing "const void *", type_id is "const" and "const"
|
|
* refers to "void *". The return type will be "void *".
|
|
*
|
|
* If type_id is a simple "int", then return type will be "int".
|
|
*
|
|
* @btf: struct btf object
|
|
* @type_id: Find out the size of type_id. The type_id of the return
|
|
* type is set to *type_id.
|
|
* @ret_size: It can be NULL. If not NULL, the size of the return
|
|
* type is set to *ret_size.
|
|
* Return: The btf_type (resolved to another type with size info if needed).
|
|
* NULL is returned if type_id itself does not have size info
|
|
* (e.g. void) or it cannot be resolved to another type that
|
|
* has size info.
|
|
* *type_id and *ret_size will not be changed in the
|
|
* NULL return case.
|
|
*/
|
|
const struct btf_type *btf_type_id_size(const struct btf *btf,
|
|
u32 *type_id,
|
|
u32 *ret_size);
|
|
void btf_type_seq_show(const struct btf *btf, u32 type_id, void *obj,
|
|
struct seq_file *m);
|
|
int btf_get_fd_by_id(u32 id);
|
|
u32 btf_id(const struct btf *btf);
|
|
bool btf_member_is_reg_int(const struct btf *btf, const struct btf_type *s,
|
|
const struct btf_member *m,
|
|
u32 expected_offset, u32 expected_size);
|
|
int btf_find_spin_lock(const struct btf *btf, const struct btf_type *t);
|
|
|
|
#ifdef CONFIG_BPF_SYSCALL
|
|
const struct btf_type *btf_type_by_id(const struct btf *btf, u32 type_id);
|
|
const char *btf_name_by_offset(const struct btf *btf, u32 offset);
|
|
#else
|
|
static inline const struct btf_type *btf_type_by_id(const struct btf *btf,
|
|
u32 type_id)
|
|
{
|
|
return NULL;
|
|
}
|
|
static inline const char *btf_name_by_offset(const struct btf *btf,
|
|
u32 offset)
|
|
{
|
|
return NULL;
|
|
}
|
|
#endif
|
|
|
|
#endif
|