mirror of
https://github.com/AuxXxilium/linux_dsm_epyc7002.git
synced 2024-11-24 12:30:53 +07:00
95ff141e52
$ map_perf_test 128 speed of HASH bpf_map_lookup_elem() in lookups per second w/o JIT w/JIT before 46M 58M after 42M 74M perf report before: 54.23% map_perf_test [kernel.kallsyms] [k] __htab_map_lookup_elem 14.24% map_perf_test [kernel.kallsyms] [k] lookup_elem_raw 8.84% map_perf_test [kernel.kallsyms] [k] htab_map_lookup_elem 5.93% map_perf_test [kernel.kallsyms] [k] bpf_map_lookup_elem 2.30% map_perf_test [kernel.kallsyms] [k] bpf_prog_da4fc6a3f41761a2 1.49% map_perf_test [kernel.kallsyms] [k] kprobe_ftrace_handler after: 60.03% map_perf_test [kernel.kallsyms] [k] __htab_map_lookup_elem 18.07% map_perf_test [kernel.kallsyms] [k] lookup_elem_raw 2.91% map_perf_test [kernel.kallsyms] [k] bpf_prog_da4fc6a3f41761a2 1.94% map_perf_test [kernel.kallsyms] [k] _einittext 1.90% map_perf_test [kernel.kallsyms] [k] __audit_syscall_exit 1.72% map_perf_test [kernel.kallsyms] [k] kprobe_ftrace_handler Notice that bpf_map_lookup_elem() and htab_map_lookup_elem() are trivial functions, yet they take sizeable amount of cpu time. htab_map_gen_lookup() removes bpf_map_lookup_elem() and converts htab_map_lookup_elem() into three BPF insns which causing cpu time for bpf_prog_da4fc6a3f41761a2() slightly increase. $ map_perf_test 256 speed of ARRAY bpf_map_lookup_elem() in lookups per second w/o JIT w/JIT before 97M 174M after 64M 280M before: 37.33% map_perf_test [kernel.kallsyms] [k] array_map_lookup_elem 13.95% map_perf_test [kernel.kallsyms] [k] bpf_map_lookup_elem 6.54% map_perf_test [kernel.kallsyms] [k] bpf_prog_da4fc6a3f41761a2 4.57% map_perf_test [kernel.kallsyms] [k] kprobe_ftrace_handler after: 32.86% map_perf_test [kernel.kallsyms] [k] bpf_prog_da4fc6a3f41761a2 6.54% map_perf_test [kernel.kallsyms] [k] kprobe_ftrace_handler array_map_gen_lookup() removes calls to array_map_lookup_elem() and bpf_map_lookup_elem() and replaces them with 7 bpf insns. The performance without JIT is slower, since executing extra insns in the interpreter is slower than running native C code, but with JIT the performance gains are obvious, since native C->x86 code is replaced with fewer bpf->x86 instructions. Signed-off-by: Alexei Starovoitov <ast@kernel.org> Acked-by: Daniel Borkmann <daniel@iogearbox.net> Signed-off-by: David S. Miller <davem@davemloft.net>
203 lines
4.4 KiB
C
203 lines
4.4 KiB
C
/* Copyright (c) 2016 Facebook
|
|
*
|
|
* This program is free software; you can redistribute it and/or
|
|
* modify it under the terms of version 2 of the GNU General Public
|
|
* License as published by the Free Software Foundation.
|
|
*/
|
|
#include <linux/skbuff.h>
|
|
#include <linux/netdevice.h>
|
|
#include <linux/version.h>
|
|
#include <uapi/linux/bpf.h>
|
|
#include "bpf_helpers.h"
|
|
|
|
#define MAX_ENTRIES 1000
|
|
|
|
struct bpf_map_def SEC("maps") hash_map = {
|
|
.type = BPF_MAP_TYPE_HASH,
|
|
.key_size = sizeof(u32),
|
|
.value_size = sizeof(long),
|
|
.max_entries = MAX_ENTRIES,
|
|
};
|
|
|
|
struct bpf_map_def SEC("maps") lru_hash_map = {
|
|
.type = BPF_MAP_TYPE_LRU_HASH,
|
|
.key_size = sizeof(u32),
|
|
.value_size = sizeof(long),
|
|
.max_entries = 10000,
|
|
};
|
|
|
|
struct bpf_map_def SEC("maps") percpu_lru_hash_map = {
|
|
.type = BPF_MAP_TYPE_LRU_HASH,
|
|
.key_size = sizeof(u32),
|
|
.value_size = sizeof(long),
|
|
.max_entries = 10000,
|
|
.map_flags = BPF_F_NO_COMMON_LRU,
|
|
};
|
|
|
|
struct bpf_map_def SEC("maps") percpu_hash_map = {
|
|
.type = BPF_MAP_TYPE_PERCPU_HASH,
|
|
.key_size = sizeof(u32),
|
|
.value_size = sizeof(long),
|
|
.max_entries = MAX_ENTRIES,
|
|
};
|
|
|
|
struct bpf_map_def SEC("maps") hash_map_alloc = {
|
|
.type = BPF_MAP_TYPE_HASH,
|
|
.key_size = sizeof(u32),
|
|
.value_size = sizeof(long),
|
|
.max_entries = MAX_ENTRIES,
|
|
.map_flags = BPF_F_NO_PREALLOC,
|
|
};
|
|
|
|
struct bpf_map_def SEC("maps") percpu_hash_map_alloc = {
|
|
.type = BPF_MAP_TYPE_PERCPU_HASH,
|
|
.key_size = sizeof(u32),
|
|
.value_size = sizeof(long),
|
|
.max_entries = MAX_ENTRIES,
|
|
.map_flags = BPF_F_NO_PREALLOC,
|
|
};
|
|
|
|
struct bpf_map_def SEC("maps") lpm_trie_map_alloc = {
|
|
.type = BPF_MAP_TYPE_LPM_TRIE,
|
|
.key_size = 8,
|
|
.value_size = sizeof(long),
|
|
.max_entries = 10000,
|
|
.map_flags = BPF_F_NO_PREALLOC,
|
|
};
|
|
|
|
struct bpf_map_def SEC("maps") array_map = {
|
|
.type = BPF_MAP_TYPE_ARRAY,
|
|
.key_size = sizeof(u32),
|
|
.value_size = sizeof(long),
|
|
.max_entries = MAX_ENTRIES,
|
|
};
|
|
|
|
SEC("kprobe/sys_getuid")
|
|
int stress_hmap(struct pt_regs *ctx)
|
|
{
|
|
u32 key = bpf_get_current_pid_tgid();
|
|
long init_val = 1;
|
|
long *value;
|
|
|
|
bpf_map_update_elem(&hash_map, &key, &init_val, BPF_ANY);
|
|
value = bpf_map_lookup_elem(&hash_map, &key);
|
|
if (value)
|
|
bpf_map_delete_elem(&hash_map, &key);
|
|
|
|
return 0;
|
|
}
|
|
|
|
SEC("kprobe/sys_geteuid")
|
|
int stress_percpu_hmap(struct pt_regs *ctx)
|
|
{
|
|
u32 key = bpf_get_current_pid_tgid();
|
|
long init_val = 1;
|
|
long *value;
|
|
|
|
bpf_map_update_elem(&percpu_hash_map, &key, &init_val, BPF_ANY);
|
|
value = bpf_map_lookup_elem(&percpu_hash_map, &key);
|
|
if (value)
|
|
bpf_map_delete_elem(&percpu_hash_map, &key);
|
|
return 0;
|
|
}
|
|
SEC("kprobe/sys_getgid")
|
|
int stress_hmap_alloc(struct pt_regs *ctx)
|
|
{
|
|
u32 key = bpf_get_current_pid_tgid();
|
|
long init_val = 1;
|
|
long *value;
|
|
|
|
bpf_map_update_elem(&hash_map_alloc, &key, &init_val, BPF_ANY);
|
|
value = bpf_map_lookup_elem(&hash_map_alloc, &key);
|
|
if (value)
|
|
bpf_map_delete_elem(&hash_map_alloc, &key);
|
|
return 0;
|
|
}
|
|
|
|
SEC("kprobe/sys_getegid")
|
|
int stress_percpu_hmap_alloc(struct pt_regs *ctx)
|
|
{
|
|
u32 key = bpf_get_current_pid_tgid();
|
|
long init_val = 1;
|
|
long *value;
|
|
|
|
bpf_map_update_elem(&percpu_hash_map_alloc, &key, &init_val, BPF_ANY);
|
|
value = bpf_map_lookup_elem(&percpu_hash_map_alloc, &key);
|
|
if (value)
|
|
bpf_map_delete_elem(&percpu_hash_map_alloc, &key);
|
|
return 0;
|
|
}
|
|
|
|
SEC("kprobe/sys_getpid")
|
|
int stress_lru_hmap_alloc(struct pt_regs *ctx)
|
|
{
|
|
u32 key = bpf_get_prandom_u32();
|
|
long val = 1;
|
|
|
|
bpf_map_update_elem(&lru_hash_map, &key, &val, BPF_ANY);
|
|
|
|
return 0;
|
|
}
|
|
|
|
SEC("kprobe/sys_getppid")
|
|
int stress_percpu_lru_hmap_alloc(struct pt_regs *ctx)
|
|
{
|
|
u32 key = bpf_get_prandom_u32();
|
|
long val = 1;
|
|
|
|
bpf_map_update_elem(&percpu_lru_hash_map, &key, &val, BPF_ANY);
|
|
|
|
return 0;
|
|
}
|
|
|
|
SEC("kprobe/sys_gettid")
|
|
int stress_lpm_trie_map_alloc(struct pt_regs *ctx)
|
|
{
|
|
union {
|
|
u32 b32[2];
|
|
u8 b8[8];
|
|
} key;
|
|
unsigned int i;
|
|
|
|
key.b32[0] = 32;
|
|
key.b8[4] = 192;
|
|
key.b8[5] = 168;
|
|
key.b8[6] = 0;
|
|
key.b8[7] = 1;
|
|
|
|
#pragma clang loop unroll(full)
|
|
for (i = 0; i < 32; ++i)
|
|
bpf_map_lookup_elem(&lpm_trie_map_alloc, &key);
|
|
|
|
return 0;
|
|
}
|
|
|
|
SEC("kprobe/sys_getpgid")
|
|
int stress_hash_map_lookup(struct pt_regs *ctx)
|
|
{
|
|
u32 key = 1, i;
|
|
long *value;
|
|
|
|
#pragma clang loop unroll(full)
|
|
for (i = 0; i < 64; ++i)
|
|
value = bpf_map_lookup_elem(&hash_map, &key);
|
|
|
|
return 0;
|
|
}
|
|
|
|
SEC("kprobe/sys_getpgrp")
|
|
int stress_array_map_lookup(struct pt_regs *ctx)
|
|
{
|
|
u32 key = 1, i;
|
|
long *value;
|
|
|
|
#pragma clang loop unroll(full)
|
|
for (i = 0; i < 64; ++i)
|
|
value = bpf_map_lookup_elem(&array_map, &key);
|
|
|
|
return 0;
|
|
}
|
|
|
|
char _license[] SEC("license") = "GPL";
|
|
u32 _version SEC("version") = LINUX_VERSION_CODE;
|