Merge branch 'bpf_get_current_cgroup_id'

Yonghong Song says:

====================
bpf has been used extensively for tracing. For example, bcc
contains an almost full set of bpf-based tools to trace kernel
and user functions/events. Most tracing tools are currently
either filtered based on pid or system-wide.

Containers have been used quite extensively in industry and
cgroup is often used together to provide resource isolation
and protection. Several processes may run inside the same
container. It is often desirable to get container-level tracing
results as well, e.g. syscall count, function count, I/O
activity, etc.

This patch implements a new helper, bpf_get_current_cgroup_id(),
which will return cgroup id based on the cgroup within which
the current task is running.

Patch #1 implements the new helper in the kernel.
Patch #2 syncs the uapi bpf.h header and helper between tools
and kernel.
Patch #3 shows how to get the same cgroup id in user space,
so a filter or policy could be configgured in the bpf program
based on current task cgroup.

Changelog:
  v1 -> v2:
     . rebase to resolve merge conflict with latest bpf-next.
====================

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
This commit is contained in:
Alexei Starovoitov 2018-06-03 18:22:42 -07:00
commit 432bdb581e
13 changed files with 267 additions and 4 deletions

View File

@ -746,6 +746,7 @@ extern const struct bpf_func_proto bpf_get_stackid_proto;
extern const struct bpf_func_proto bpf_get_stack_proto;
extern const struct bpf_func_proto bpf_sock_map_update_proto;
extern const struct bpf_func_proto bpf_sock_hash_update_proto;
extern const struct bpf_func_proto bpf_get_current_cgroup_id_proto;
/* Shared helpers among cBPF and eBPF. */
void bpf_user_rnd_init_once(void);

View File

@ -2070,6 +2070,11 @@ union bpf_attr {
* **CONFIG_SOCK_CGROUP_DATA** configuration option.
* Return
* The id is returned or 0 in case the id could not be retrieved.
*
* u64 bpf_get_current_cgroup_id(void)
* Return
* A 64-bit integer containing the current cgroup id based
* on the cgroup within which the current task is running.
*/
#define __BPF_FUNC_MAPPER(FN) \
FN(unspec), \
@ -2151,7 +2156,8 @@ union bpf_attr {
FN(lwt_seg6_action), \
FN(rc_repeat), \
FN(rc_keydown), \
FN(skb_cgroup_id),
FN(skb_cgroup_id), \
FN(get_current_cgroup_id),
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
* function eBPF program intends to call

View File

@ -1765,6 +1765,7 @@ const struct bpf_func_proto bpf_get_current_uid_gid_proto __weak;
const struct bpf_func_proto bpf_get_current_comm_proto __weak;
const struct bpf_func_proto bpf_sock_map_update_proto __weak;
const struct bpf_func_proto bpf_sock_hash_update_proto __weak;
const struct bpf_func_proto bpf_get_current_cgroup_id_proto __weak;
const struct bpf_func_proto * __weak bpf_get_trace_printk_proto(void)
{

View File

@ -179,3 +179,18 @@ const struct bpf_func_proto bpf_get_current_comm_proto = {
.arg1_type = ARG_PTR_TO_UNINIT_MEM,
.arg2_type = ARG_CONST_SIZE,
};
#ifdef CONFIG_CGROUPS
BPF_CALL_0(bpf_get_current_cgroup_id)
{
struct cgroup *cgrp = task_dfl_cgroup(current);
return cgrp->kn->id.id;
}
const struct bpf_func_proto bpf_get_current_cgroup_id_proto = {
.func = bpf_get_current_cgroup_id,
.gpl_only = false,
.ret_type = RET_INTEGER,
};
#endif

View File

@ -564,6 +564,8 @@ tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_get_prandom_u32_proto;
case BPF_FUNC_probe_read_str:
return &bpf_probe_read_str_proto;
case BPF_FUNC_get_current_cgroup_id:
return &bpf_get_current_cgroup_id_proto;
default:
return NULL;
}

View File

@ -2070,6 +2070,11 @@ union bpf_attr {
* **CONFIG_SOCK_CGROUP_DATA** configuration option.
* Return
* The id is returned or 0 in case the id could not be retrieved.
*
* u64 bpf_get_current_cgroup_id(void)
* Return
* A 64-bit integer containing the current cgroup id based
* on the cgroup within which the current task is running.
*/
#define __BPF_FUNC_MAPPER(FN) \
FN(unspec), \
@ -2151,7 +2156,8 @@ union bpf_attr {
FN(lwt_seg6_action), \
FN(rc_repeat), \
FN(rc_keydown), \
FN(skb_cgroup_id),
FN(skb_cgroup_id), \
FN(get_current_cgroup_id),
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
* function eBPF program intends to call

View File

@ -18,3 +18,4 @@ urandom_read
test_btf
test_sockmap
test_lirc_mode2_user
get_cgroup_id_user

View File

@ -24,7 +24,7 @@ urandom_read: urandom_read.c
# Order correspond to 'make run_tests' order
TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test_progs \
test_align test_verifier_log test_dev_cgroup test_tcpbpf_user \
test_sock test_btf test_sockmap test_lirc_mode2_user
test_sock test_btf test_sockmap test_lirc_mode2_user get_cgroup_id_user
TEST_GEN_FILES = test_pkt_access.o test_xdp.o test_l4lb.o test_tcp_estats.o test_obj_id.o \
test_pkt_md_access.o test_xdp_redirect.o test_xdp_meta.o sockmap_parse_prog.o \
@ -34,7 +34,8 @@ TEST_GEN_FILES = test_pkt_access.o test_xdp.o test_l4lb.o test_tcp_estats.o test
sockmap_tcp_msg_prog.o connect4_prog.o connect6_prog.o test_adjust_tail.o \
test_btf_haskv.o test_btf_nokv.o test_sockmap_kern.o test_tunnel_kern.o \
test_get_stack_rawtp.o test_sockmap_kern.o test_sockhash_kern.o \
test_lwt_seg6local.o sendmsg4_prog.o sendmsg6_prog.o test_lirc_mode2_kern.o
test_lwt_seg6local.o sendmsg4_prog.o sendmsg6_prog.o test_lirc_mode2_kern.o \
get_cgroup_id_kern.o
# Order correspond to 'make run_tests' order
TEST_PROGS := test_kmod.sh \
@ -63,6 +64,7 @@ $(OUTPUT)/test_sock: cgroup_helpers.c
$(OUTPUT)/test_sock_addr: cgroup_helpers.c
$(OUTPUT)/test_sockmap: cgroup_helpers.c
$(OUTPUT)/test_progs: trace_helpers.c
$(OUTPUT)/get_cgroup_id_user: cgroup_helpers.c
.PHONY: force

View File

@ -131,6 +131,8 @@ static int (*bpf_rc_repeat)(void *ctx) =
static int (*bpf_rc_keydown)(void *ctx, unsigned int protocol,
unsigned long long scancode, unsigned int toggle) =
(void *) BPF_FUNC_rc_keydown;
static unsigned long long (*bpf_get_current_cgroup_id)(void) =
(void *) BPF_FUNC_get_current_cgroup_id;
/* llvm builtin functions that eBPF C program may use to
* emit BPF_LD_ABS and BPF_LD_IND instructions

View File

@ -6,6 +6,7 @@
#include <sys/types.h>
#include <linux/limits.h>
#include <stdio.h>
#include <stdlib.h>
#include <linux/sched.h>
#include <fcntl.h>
#include <unistd.h>
@ -176,3 +177,59 @@ int create_and_get_cgroup(char *path)
return fd;
}
/**
* get_cgroup_id() - Get cgroup id for a particular cgroup path
* @path: The cgroup path, relative to the workdir, to join
*
* On success, it returns the cgroup id. On failure it returns 0,
* which is an invalid cgroup id.
* If there is a failure, it prints the error to stderr.
*/
unsigned long long get_cgroup_id(char *path)
{
int dirfd, err, flags, mount_id, fhsize;
union {
unsigned long long cgid;
unsigned char raw_bytes[8];
} id;
char cgroup_workdir[PATH_MAX + 1];
struct file_handle *fhp, *fhp2;
unsigned long long ret = 0;
format_cgroup_path(cgroup_workdir, path);
dirfd = AT_FDCWD;
flags = 0;
fhsize = sizeof(*fhp);
fhp = calloc(1, fhsize);
if (!fhp) {
log_err("calloc");
return 0;
}
err = name_to_handle_at(dirfd, cgroup_workdir, fhp, &mount_id, flags);
if (err >= 0 || fhp->handle_bytes != 8) {
log_err("name_to_handle_at");
goto free_mem;
}
fhsize = sizeof(struct file_handle) + fhp->handle_bytes;
fhp2 = realloc(fhp, fhsize);
if (!fhp2) {
log_err("realloc");
goto free_mem;
}
err = name_to_handle_at(dirfd, cgroup_workdir, fhp2, &mount_id, flags);
fhp = fhp2;
if (err < 0) {
log_err("name_to_handle_at");
goto free_mem;
}
memcpy(id.raw_bytes, fhp->f_handle, 8);
ret = id.cgid;
free_mem:
free(fhp);
return ret;
}

View File

@ -13,5 +13,6 @@ int create_and_get_cgroup(char *path);
int join_cgroup(char *path);
int setup_cgroup_environment(void);
void cleanup_cgroup_environment(void);
unsigned long long get_cgroup_id(char *path);
#endif

View File

@ -0,0 +1,28 @@
// SPDX-License-Identifier: GPL-2.0
// Copyright (c) 2018 Facebook
#include <linux/bpf.h>
#include "bpf_helpers.h"
struct bpf_map_def SEC("maps") cg_ids = {
.type = BPF_MAP_TYPE_ARRAY,
.key_size = sizeof(__u32),
.value_size = sizeof(__u64),
.max_entries = 1,
};
SEC("tracepoint/syscalls/sys_enter_nanosleep")
int trace(void *ctx)
{
__u32 key = 0;
__u64 *val;
val = bpf_map_lookup_elem(&cg_ids, &key);
if (val)
*val = bpf_get_current_cgroup_id();
return 0;
}
char _license[] SEC("license") = "GPL";
__u32 _version SEC("version") = 1; /* ignored by tracepoints, required by libbpf.a */

View File

@ -0,0 +1,141 @@
// SPDX-License-Identifier: GPL-2.0
// Copyright (c) 2018 Facebook
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <errno.h>
#include <fcntl.h>
#include <syscall.h>
#include <unistd.h>
#include <linux/perf_event.h>
#include <sys/ioctl.h>
#include <sys/time.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <linux/bpf.h>
#include <bpf/bpf.h>
#include <bpf/libbpf.h>
#include "cgroup_helpers.h"
#include "bpf_rlimit.h"
#define CHECK(condition, tag, format...) ({ \
int __ret = !!(condition); \
if (__ret) { \
printf("%s:FAIL:%s ", __func__, tag); \
printf(format); \
} else { \
printf("%s:PASS:%s\n", __func__, tag); \
} \
__ret; \
})
static int bpf_find_map(const char *test, struct bpf_object *obj,
const char *name)
{
struct bpf_map *map;
map = bpf_object__find_map_by_name(obj, name);
if (!map)
return -1;
return bpf_map__fd(map);
}
#define TEST_CGROUP "/test-bpf-get-cgroup-id/"
int main(int argc, char **argv)
{
const char *probe_name = "syscalls/sys_enter_nanosleep";
const char *file = "get_cgroup_id_kern.o";
int err, bytes, efd, prog_fd, pmu_fd;
struct perf_event_attr attr = {};
int cgroup_fd, cgidmap_fd;
struct bpf_object *obj;
__u64 kcgid = 0, ucgid;
int exit_code = 1;
char buf[256];
__u32 key = 0;
err = setup_cgroup_environment();
if (CHECK(err, "setup_cgroup_environment", "err %d errno %d\n", err,
errno))
return 1;
cgroup_fd = create_and_get_cgroup(TEST_CGROUP);
if (CHECK(cgroup_fd < 0, "create_and_get_cgroup", "err %d errno %d\n",
cgroup_fd, errno))
goto cleanup_cgroup_env;
err = join_cgroup(TEST_CGROUP);
if (CHECK(err, "join_cgroup", "err %d errno %d\n", err, errno))
goto cleanup_cgroup_env;
err = bpf_prog_load(file, BPF_PROG_TYPE_TRACEPOINT, &obj, &prog_fd);
if (CHECK(err, "bpf_prog_load", "err %d errno %d\n", err, errno))
goto cleanup_cgroup_env;
cgidmap_fd = bpf_find_map(__func__, obj, "cg_ids");
if (CHECK(cgidmap_fd < 0, "bpf_find_map", "err %d errno %d\n",
cgidmap_fd, errno))
goto close_prog;
snprintf(buf, sizeof(buf),
"/sys/kernel/debug/tracing/events/%s/id", probe_name);
efd = open(buf, O_RDONLY, 0);
if (CHECK(efd < 0, "open", "err %d errno %d\n", efd, errno))
goto close_prog;
bytes = read(efd, buf, sizeof(buf));
close(efd);
if (CHECK(bytes <= 0 || bytes >= sizeof(buf), "read",
"bytes %d errno %d\n", bytes, errno))
goto close_prog;
attr.config = strtol(buf, NULL, 0);
attr.type = PERF_TYPE_TRACEPOINT;
attr.sample_type = PERF_SAMPLE_RAW;
attr.sample_period = 1;
attr.wakeup_events = 1;
/* attach to this pid so the all bpf invocations will be in the
* cgroup associated with this pid.
*/
pmu_fd = syscall(__NR_perf_event_open, &attr, getpid(), -1, -1, 0);
if (CHECK(pmu_fd < 0, "perf_event_open", "err %d errno %d\n", pmu_fd,
errno))
goto close_prog;
err = ioctl(pmu_fd, PERF_EVENT_IOC_ENABLE, 0);
if (CHECK(err, "perf_event_ioc_enable", "err %d errno %d\n", err,
errno))
goto close_pmu;
err = ioctl(pmu_fd, PERF_EVENT_IOC_SET_BPF, prog_fd);
if (CHECK(err, "perf_event_ioc_set_bpf", "err %d errno %d\n", err,
errno))
goto close_pmu;
/* trigger some syscalls */
sleep(1);
err = bpf_map_lookup_elem(cgidmap_fd, &key, &kcgid);
if (CHECK(err, "bpf_map_lookup_elem", "err %d errno %d\n", err, errno))
goto close_pmu;
ucgid = get_cgroup_id(TEST_CGROUP);
if (CHECK(kcgid != ucgid, "compare_cgroup_id",
"kern cgid %llx user cgid %llx", kcgid, ucgid))
goto close_pmu;
exit_code = 0;
printf("%s:PASS\n", argv[0]);
close_pmu:
close(pmu_fd);
close_prog:
bpf_object__close(obj);
cleanup_cgroup_env:
cleanup_cgroup_environment();
return exit_code;
}