Merge branch 'bpf_get_ns_current_pid_tgid'

Carlos Neira says:

====================
Currently bpf_get_current_pid_tgid(), is used to do pid filtering in bcc's
scripts but this helper returns the pid as seen by the root namespace which is
fine when a bcc script is not executed inside a container.
When the process of interest is inside a container, pid filtering will not work
if bpf_get_current_pid_tgid() is used.
This helper addresses this limitation returning the pid as it's seen by the current
namespace where the script is executing.

In the future different pid_ns files may belong to different devices, according to the
discussion between Eric Biederman and Yonghong in 2017 Linux plumbers conference.
To address that situation the helper requires inum and dev_t from /proc/self/ns/pid.
This helper has the same use cases as bpf_get_current_pid_tgid() as it can be
used to do pid filtering even inside a container.
====================

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
This commit is contained in:
Alexei Starovoitov 2020-03-12 17:33:12 -07:00
commit 4823b7210b
14 changed files with 391 additions and 3 deletions

View File

@ -247,6 +247,20 @@ struct file *proc_ns_fget(int fd)
return ERR_PTR(-EINVAL);
}
/**
* ns_match() - Returns true if current namespace matches dev/ino provided.
* @ns_common: current ns
* @dev: dev_t from nsfs that will be matched against current nsfs
* @ino: ino_t from nsfs that will be matched against current nsfs
*
* Return: true if dev and ino matches the current nsfs.
*/
bool ns_match(const struct ns_common *ns, dev_t dev, ino_t ino)
{
return (ns->inum == ino) && (nsfs_mnt->mnt_sb->s_dev == dev);
}
static int nsfs_show_path(struct seq_file *seq, struct dentry *dentry)
{
struct inode *inode = d_inode(dentry);

View File

@ -1497,6 +1497,7 @@ extern const struct bpf_func_proto bpf_strtol_proto;
extern const struct bpf_func_proto bpf_strtoul_proto;
extern const struct bpf_func_proto bpf_tcp_sock_proto;
extern const struct bpf_func_proto bpf_jiffies64_proto;
extern const struct bpf_func_proto bpf_get_ns_current_pid_tgid_proto;
/* Shared helpers among cBPF and eBPF. */
void bpf_user_rnd_init_once(void);

View File

@ -85,6 +85,8 @@ typedef struct ns_common *ns_get_path_helper_t(void *);
extern int ns_get_path_cb(struct path *path, ns_get_path_helper_t ns_get_cb,
void *private_data);
extern bool ns_match(const struct ns_common *ns, dev_t dev, ino_t ino);
extern int ns_get_name(char *buf, size_t size, struct task_struct *task,
const struct proc_ns_operations *ns_ops);
extern void nsfs_init(void);

View File

@ -2914,6 +2914,19 @@ union bpf_attr {
* of sizeof(struct perf_branch_entry).
*
* **-ENOENT** if architecture does not support branch records.
*
* int bpf_get_ns_current_pid_tgid(u64 dev, u64 ino, struct bpf_pidns_info *nsdata, u32 size)
* Description
* Returns 0 on success, values for *pid* and *tgid* as seen from the current
* *namespace* will be returned in *nsdata*.
*
* On failure, the returned value is one of the following:
*
* **-EINVAL** if dev and inum supplied don't match dev_t and inode number
* with nsfs of current task, or if dev conversion to dev_t lost high bits.
*
* **-ENOENT** if pidns does not exists for the current task.
*
*/
#define __BPF_FUNC_MAPPER(FN) \
FN(unspec), \
@ -3035,7 +3048,8 @@ union bpf_attr {
FN(tcp_send_ack), \
FN(send_signal_thread), \
FN(jiffies64), \
FN(read_branch_records),
FN(read_branch_records), \
FN(get_ns_current_pid_tgid),
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
* function eBPF program intends to call
@ -3829,4 +3843,8 @@ struct bpf_sockopt {
__s32 retval;
};
struct bpf_pidns_info {
__u32 pid;
__u32 tgid;
};
#endif /* _UAPI__LINUX_BPF_H__ */

View File

@ -2149,6 +2149,7 @@ const struct bpf_func_proto bpf_get_current_uid_gid_proto __weak;
const struct bpf_func_proto bpf_get_current_comm_proto __weak;
const struct bpf_func_proto bpf_get_current_cgroup_id_proto __weak;
const struct bpf_func_proto bpf_get_local_storage_proto __weak;
const struct bpf_func_proto bpf_get_ns_current_pid_tgid_proto __weak;
const struct bpf_func_proto * __weak bpf_get_trace_printk_proto(void)
{

View File

@ -12,6 +12,8 @@
#include <linux/filter.h>
#include <linux/ctype.h>
#include <linux/jiffies.h>
#include <linux/pid_namespace.h>
#include <linux/proc_ns.h>
#include "../../lib/kstrtox.h"
@ -499,3 +501,46 @@ const struct bpf_func_proto bpf_strtoul_proto = {
.arg4_type = ARG_PTR_TO_LONG,
};
#endif
BPF_CALL_4(bpf_get_ns_current_pid_tgid, u64, dev, u64, ino,
struct bpf_pidns_info *, nsdata, u32, size)
{
struct task_struct *task = current;
struct pid_namespace *pidns;
int err = -EINVAL;
if (unlikely(size != sizeof(struct bpf_pidns_info)))
goto clear;
if (unlikely((u64)(dev_t)dev != dev))
goto clear;
if (unlikely(!task))
goto clear;
pidns = task_active_pid_ns(task);
if (unlikely(!pidns)) {
err = -ENOENT;
goto clear;
}
if (!ns_match(&pidns->ns, (dev_t)dev, ino))
goto clear;
nsdata->pid = task_pid_nr_ns(task, pidns);
nsdata->tgid = task_tgid_nr_ns(task, pidns);
return 0;
clear:
memset((void *)nsdata, 0, (size_t) size);
return err;
}
const struct bpf_func_proto bpf_get_ns_current_pid_tgid_proto = {
.func = bpf_get_ns_current_pid_tgid,
.gpl_only = false,
.ret_type = RET_INTEGER,
.arg1_type = ARG_ANYTHING,
.arg2_type = ARG_ANYTHING,
.arg3_type = ARG_PTR_TO_UNINIT_MEM,
.arg4_type = ARG_CONST_SIZE,
};

View File

@ -843,6 +843,8 @@ tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_send_signal_thread_proto;
case BPF_FUNC_perf_event_read_value:
return &bpf_perf_event_read_value_proto;
case BPF_FUNC_get_ns_current_pid_tgid:
return &bpf_get_ns_current_pid_tgid_proto;
default:
return NULL;
}

View File

@ -435,6 +435,7 @@ class PrinterHelpers(Printer):
'struct bpf_fib_lookup',
'struct bpf_perf_event_data',
'struct bpf_perf_event_value',
'struct bpf_pidns_info',
'struct bpf_sock',
'struct bpf_sock_addr',
'struct bpf_sock_ops',

View File

@ -2914,6 +2914,19 @@ union bpf_attr {
* of sizeof(struct perf_branch_entry).
*
* **-ENOENT** if architecture does not support branch records.
*
* int bpf_get_ns_current_pid_tgid(u64 dev, u64 ino, struct bpf_pidns_info *nsdata, u32 size)
* Description
* Returns 0 on success, values for *pid* and *tgid* as seen from the current
* *namespace* will be returned in *nsdata*.
*
* On failure, the returned value is one of the following:
*
* **-EINVAL** if dev and inum supplied don't match dev_t and inode number
* with nsfs of current task, or if dev conversion to dev_t lost high bits.
*
* **-ENOENT** if pidns does not exists for the current task.
*
*/
#define __BPF_FUNC_MAPPER(FN) \
FN(unspec), \
@ -3035,7 +3048,8 @@ union bpf_attr {
FN(tcp_send_ack), \
FN(send_signal_thread), \
FN(jiffies64), \
FN(read_branch_records),
FN(read_branch_records), \
FN(get_ns_current_pid_tgid),
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
* function eBPF program intends to call
@ -3829,4 +3843,8 @@ struct bpf_sockopt {
__s32 retval;
};
struct bpf_pidns_info {
__u32 pid;
__u32 tgid;
};
#endif /* _UAPI__LINUX_BPF_H__ */

View File

@ -31,6 +31,7 @@ test_tcp_check_syncookie_user
test_sysctl
test_hashmap
test_btf_dump
test_current_pid_tgid_new_ns
xdping
test_cpp
*.skel.h

View File

@ -32,7 +32,8 @@ TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test
test_sock test_btf test_sockmap get_cgroup_id_user test_socket_cookie \
test_cgroup_storage \
test_netcnt test_tcpnotify_user test_sock_fields test_sysctl test_hashmap \
test_progs-no_alu32
test_progs-no_alu32 \
test_current_pid_tgid_new_ns
# Also test bpf-gcc, if present
ifneq ($(BPF_GCC),)

View File

@ -0,0 +1,88 @@
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2020 Carlos Neira cneirabustos@gmail.com */
#include <test_progs.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <unistd.h>
#include <sys/syscall.h>
struct bss {
__u64 dev;
__u64 ino;
__u64 pid_tgid;
__u64 user_pid_tgid;
};
void test_ns_current_pid_tgid(void)
{
const char *probe_name = "raw_tracepoint/sys_enter";
const char *file = "test_ns_current_pid_tgid.o";
int err, key = 0, duration = 0;
struct bpf_link *link = NULL;
struct bpf_program *prog;
struct bpf_map *bss_map;
struct bpf_object *obj;
struct bss bss;
struct stat st;
__u64 id;
obj = bpf_object__open_file(file, NULL);
if (CHECK(IS_ERR(obj), "obj_open", "err %ld\n", PTR_ERR(obj)))
return;
err = bpf_object__load(obj);
if (CHECK(err, "obj_load", "err %d errno %d\n", err, errno))
goto cleanup;
bss_map = bpf_object__find_map_by_name(obj, "test_ns_.bss");
if (CHECK(!bss_map, "find_bss_map", "failed\n"))
goto cleanup;
prog = bpf_object__find_program_by_title(obj, probe_name);
if (CHECK(!prog, "find_prog", "prog '%s' not found\n",
probe_name))
goto cleanup;
memset(&bss, 0, sizeof(bss));
pid_t tid = syscall(SYS_gettid);
pid_t pid = getpid();
id = (__u64) tid << 32 | pid;
bss.user_pid_tgid = id;
if (CHECK_FAIL(stat("/proc/self/ns/pid", &st))) {
perror("Failed to stat /proc/self/ns/pid");
goto cleanup;
}
bss.dev = st.st_dev;
bss.ino = st.st_ino;
err = bpf_map_update_elem(bpf_map__fd(bss_map), &key, &bss, 0);
if (CHECK(err, "setting_bss", "failed to set bss : %d\n", err))
goto cleanup;
link = bpf_program__attach_raw_tracepoint(prog, "sys_enter");
if (CHECK(IS_ERR(link), "attach_raw_tp", "err %ld\n",
PTR_ERR(link))) {
link = NULL;
goto cleanup;
}
/* trigger some syscalls */
usleep(1);
err = bpf_map_lookup_elem(bpf_map__fd(bss_map), &key, &bss);
if (CHECK(err, "set_bss", "failed to get bss : %d\n", err))
goto cleanup;
if (CHECK(id != bss.pid_tgid, "Compare user pid/tgid vs. bpf pid/tgid",
"User pid/tgid %llu BPF pid/tgid %llu\n", id, bss.pid_tgid))
goto cleanup;
cleanup:
if (!link) {
bpf_link__destroy(link);
link = NULL;
}
bpf_object__close(obj);
}

View File

@ -0,0 +1,37 @@
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2019 Carlos Neira cneirabustos@gmail.com */
#include <linux/bpf.h>
#include <stdint.h>
#include <bpf/bpf_helpers.h>
static volatile struct {
__u64 dev;
__u64 ino;
__u64 pid_tgid;
__u64 user_pid_tgid;
} res;
SEC("raw_tracepoint/sys_enter")
int trace(void *ctx)
{
__u64 ns_pid_tgid, expected_pid;
struct bpf_pidns_info nsdata;
__u32 key = 0;
if (bpf_get_ns_current_pid_tgid(res.dev, res.ino, &nsdata,
sizeof(struct bpf_pidns_info)))
return 0;
ns_pid_tgid = (__u64)nsdata.tgid << 32 | nsdata.pid;
expected_pid = res.user_pid_tgid;
if (expected_pid != ns_pid_tgid)
return 0;
res.pid_tgid = ns_pid_tgid;
return 0;
}
char _license[] SEC("license") = "GPL";

View File

@ -0,0 +1,159 @@
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2020 Carlos Neira cneirabustos@gmail.com */
#define _GNU_SOURCE
#include <sys/stat.h>
#include <sys/types.h>
#include <unistd.h>
#include <sys/syscall.h>
#include <sched.h>
#include <sys/wait.h>
#include <sys/mount.h>
#include "test_progs.h"
#define CHECK_NEWNS(condition, tag, format...) ({ \
int __ret = !!(condition); \
if (__ret) { \
printf("%s:FAIL:%s ", __func__, tag); \
printf(format); \
} else { \
printf("%s:PASS:%s\n", __func__, tag); \
} \
__ret; \
})
struct bss {
__u64 dev;
__u64 ino;
__u64 pid_tgid;
__u64 user_pid_tgid;
};
int main(int argc, char **argv)
{
pid_t pid;
int exit_code = 1;
struct stat st;
printf("Testing bpf_get_ns_current_pid_tgid helper in new ns\n");
if (stat("/proc/self/ns/pid", &st)) {
perror("stat failed on /proc/self/ns/pid ns\n");
printf("%s:FAILED\n", argv[0]);
return exit_code;
}
if (CHECK_NEWNS(unshare(CLONE_NEWPID | CLONE_NEWNS),
"unshare CLONE_NEWPID | CLONE_NEWNS", "error errno=%d\n", errno))
return exit_code;
pid = fork();
if (pid == -1) {
perror("Fork() failed\n");
printf("%s:FAILED\n", argv[0]);
return exit_code;
}
if (pid > 0) {
int status;
usleep(5);
waitpid(pid, &status, 0);
return 0;
} else {
pid = fork();
if (pid == -1) {
perror("Fork() failed\n");
printf("%s:FAILED\n", argv[0]);
return exit_code;
}
if (pid > 0) {
int status;
waitpid(pid, &status, 0);
return 0;
} else {
if (CHECK_NEWNS(mount("none", "/proc", NULL, MS_PRIVATE|MS_REC, NULL),
"Unmounting proc", "Cannot umount proc! errno=%d\n", errno))
return exit_code;
if (CHECK_NEWNS(mount("proc", "/proc", "proc", MS_NOSUID|MS_NOEXEC|MS_NODEV, NULL),
"Mounting proc", "Cannot mount proc! errno=%d\n", errno))
return exit_code;
const char *probe_name = "raw_tracepoint/sys_enter";
const char *file = "test_ns_current_pid_tgid.o";
struct bpf_link *link = NULL;
struct bpf_program *prog;
struct bpf_map *bss_map;
struct bpf_object *obj;
int exit_code = 1;
int err, key = 0;
struct bss bss;
struct stat st;
__u64 id;
obj = bpf_object__open_file(file, NULL);
if (CHECK_NEWNS(IS_ERR(obj), "obj_open", "err %ld\n", PTR_ERR(obj)))
return exit_code;
err = bpf_object__load(obj);
if (CHECK_NEWNS(err, "obj_load", "err %d errno %d\n", err, errno))
goto cleanup;
bss_map = bpf_object__find_map_by_name(obj, "test_ns_.bss");
if (CHECK_NEWNS(!bss_map, "find_bss_map", "failed\n"))
goto cleanup;
prog = bpf_object__find_program_by_title(obj, probe_name);
if (CHECK_NEWNS(!prog, "find_prog", "prog '%s' not found\n",
probe_name))
goto cleanup;
memset(&bss, 0, sizeof(bss));
pid_t tid = syscall(SYS_gettid);
pid_t pid = getpid();
id = (__u64) tid << 32 | pid;
bss.user_pid_tgid = id;
if (CHECK_NEWNS(stat("/proc/self/ns/pid", &st),
"stat new ns", "Failed to stat /proc/self/ns/pid errno=%d\n", errno))
goto cleanup;
bss.dev = st.st_dev;
bss.ino = st.st_ino;
err = bpf_map_update_elem(bpf_map__fd(bss_map), &key, &bss, 0);
if (CHECK_NEWNS(err, "setting_bss", "failed to set bss : %d\n", err))
goto cleanup;
link = bpf_program__attach_raw_tracepoint(prog, "sys_enter");
if (CHECK_NEWNS(IS_ERR(link), "attach_raw_tp", "err %ld\n",
PTR_ERR(link))) {
link = NULL;
goto cleanup;
}
/* trigger some syscalls */
usleep(1);
err = bpf_map_lookup_elem(bpf_map__fd(bss_map), &key, &bss);
if (CHECK_NEWNS(err, "set_bss", "failed to get bss : %d\n", err))
goto cleanup;
if (CHECK_NEWNS(id != bss.pid_tgid, "Compare user pid/tgid vs. bpf pid/tgid",
"User pid/tgid %llu BPF pid/tgid %llu\n", id, bss.pid_tgid))
goto cleanup;
exit_code = 0;
printf("%s:PASS\n", argv[0]);
cleanup:
if (!link) {
bpf_link__destroy(link);
link = NULL;
}
bpf_object__close(obj);
}
}
}