linux_dsm_epyc7002/samples/bpf/map_perf_test_kern.c
Alexei Starovoitov 95ff141e52 samples/bpf: add map_lookup microbenchmark
$ map_perf_test 128
speed of HASH bpf_map_lookup_elem() in lookups per second
	w/o JIT		w/JIT
before	46M		58M
after	42M		74M

perf report
before:
    54.23%  map_perf_test  [kernel.kallsyms]  [k] __htab_map_lookup_elem
    14.24%  map_perf_test  [kernel.kallsyms]  [k] lookup_elem_raw
     8.84%  map_perf_test  [kernel.kallsyms]  [k] htab_map_lookup_elem
     5.93%  map_perf_test  [kernel.kallsyms]  [k] bpf_map_lookup_elem
     2.30%  map_perf_test  [kernel.kallsyms]  [k] bpf_prog_da4fc6a3f41761a2
     1.49%  map_perf_test  [kernel.kallsyms]  [k] kprobe_ftrace_handler

after:
    60.03%  map_perf_test  [kernel.kallsyms]  [k] __htab_map_lookup_elem
    18.07%  map_perf_test  [kernel.kallsyms]  [k] lookup_elem_raw
     2.91%  map_perf_test  [kernel.kallsyms]  [k] bpf_prog_da4fc6a3f41761a2
     1.94%  map_perf_test  [kernel.kallsyms]  [k] _einittext
     1.90%  map_perf_test  [kernel.kallsyms]  [k] __audit_syscall_exit
     1.72%  map_perf_test  [kernel.kallsyms]  [k] kprobe_ftrace_handler

Notice that bpf_map_lookup_elem() and htab_map_lookup_elem() are trivial
functions, yet they take sizeable amount of cpu time.
htab_map_gen_lookup() removes bpf_map_lookup_elem() and converts
htab_map_lookup_elem() into three BPF insns which causing cpu time
for bpf_prog_da4fc6a3f41761a2() slightly increase.

$ map_perf_test 256
speed of ARRAY bpf_map_lookup_elem() in lookups per second
	w/o JIT		w/JIT
before	97M		174M
after	64M		280M

before:
    37.33%  map_perf_test  [kernel.kallsyms]  [k] array_map_lookup_elem
    13.95%  map_perf_test  [kernel.kallsyms]  [k] bpf_map_lookup_elem
     6.54%  map_perf_test  [kernel.kallsyms]  [k] bpf_prog_da4fc6a3f41761a2
     4.57%  map_perf_test  [kernel.kallsyms]  [k] kprobe_ftrace_handler

after:
    32.86%  map_perf_test  [kernel.kallsyms]  [k] bpf_prog_da4fc6a3f41761a2
     6.54%  map_perf_test  [kernel.kallsyms]  [k] kprobe_ftrace_handler

array_map_gen_lookup() removes calls to array_map_lookup_elem()
and bpf_map_lookup_elem() and replaces them with 7 bpf insns.

The performance without JIT is slower, since executing extra insns
in the interpreter is slower than running native C code,
but with JIT the performance gains are obvious,
since native C->x86 code is replaced with fewer bpf->x86 instructions.

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
2017-03-16 20:44:12 -07:00

203 lines
4.4 KiB
C

/* Copyright (c) 2016 Facebook
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of version 2 of the GNU General Public
* License as published by the Free Software Foundation.
*/
#include <linux/skbuff.h>
#include <linux/netdevice.h>
#include <linux/version.h>
#include <uapi/linux/bpf.h>
#include "bpf_helpers.h"
#define MAX_ENTRIES 1000
struct bpf_map_def SEC("maps") hash_map = {
.type = BPF_MAP_TYPE_HASH,
.key_size = sizeof(u32),
.value_size = sizeof(long),
.max_entries = MAX_ENTRIES,
};
struct bpf_map_def SEC("maps") lru_hash_map = {
.type = BPF_MAP_TYPE_LRU_HASH,
.key_size = sizeof(u32),
.value_size = sizeof(long),
.max_entries = 10000,
};
struct bpf_map_def SEC("maps") percpu_lru_hash_map = {
.type = BPF_MAP_TYPE_LRU_HASH,
.key_size = sizeof(u32),
.value_size = sizeof(long),
.max_entries = 10000,
.map_flags = BPF_F_NO_COMMON_LRU,
};
struct bpf_map_def SEC("maps") percpu_hash_map = {
.type = BPF_MAP_TYPE_PERCPU_HASH,
.key_size = sizeof(u32),
.value_size = sizeof(long),
.max_entries = MAX_ENTRIES,
};
struct bpf_map_def SEC("maps") hash_map_alloc = {
.type = BPF_MAP_TYPE_HASH,
.key_size = sizeof(u32),
.value_size = sizeof(long),
.max_entries = MAX_ENTRIES,
.map_flags = BPF_F_NO_PREALLOC,
};
struct bpf_map_def SEC("maps") percpu_hash_map_alloc = {
.type = BPF_MAP_TYPE_PERCPU_HASH,
.key_size = sizeof(u32),
.value_size = sizeof(long),
.max_entries = MAX_ENTRIES,
.map_flags = BPF_F_NO_PREALLOC,
};
struct bpf_map_def SEC("maps") lpm_trie_map_alloc = {
.type = BPF_MAP_TYPE_LPM_TRIE,
.key_size = 8,
.value_size = sizeof(long),
.max_entries = 10000,
.map_flags = BPF_F_NO_PREALLOC,
};
struct bpf_map_def SEC("maps") array_map = {
.type = BPF_MAP_TYPE_ARRAY,
.key_size = sizeof(u32),
.value_size = sizeof(long),
.max_entries = MAX_ENTRIES,
};
SEC("kprobe/sys_getuid")
int stress_hmap(struct pt_regs *ctx)
{
u32 key = bpf_get_current_pid_tgid();
long init_val = 1;
long *value;
bpf_map_update_elem(&hash_map, &key, &init_val, BPF_ANY);
value = bpf_map_lookup_elem(&hash_map, &key);
if (value)
bpf_map_delete_elem(&hash_map, &key);
return 0;
}
SEC("kprobe/sys_geteuid")
int stress_percpu_hmap(struct pt_regs *ctx)
{
u32 key = bpf_get_current_pid_tgid();
long init_val = 1;
long *value;
bpf_map_update_elem(&percpu_hash_map, &key, &init_val, BPF_ANY);
value = bpf_map_lookup_elem(&percpu_hash_map, &key);
if (value)
bpf_map_delete_elem(&percpu_hash_map, &key);
return 0;
}
SEC("kprobe/sys_getgid")
int stress_hmap_alloc(struct pt_regs *ctx)
{
u32 key = bpf_get_current_pid_tgid();
long init_val = 1;
long *value;
bpf_map_update_elem(&hash_map_alloc, &key, &init_val, BPF_ANY);
value = bpf_map_lookup_elem(&hash_map_alloc, &key);
if (value)
bpf_map_delete_elem(&hash_map_alloc, &key);
return 0;
}
SEC("kprobe/sys_getegid")
int stress_percpu_hmap_alloc(struct pt_regs *ctx)
{
u32 key = bpf_get_current_pid_tgid();
long init_val = 1;
long *value;
bpf_map_update_elem(&percpu_hash_map_alloc, &key, &init_val, BPF_ANY);
value = bpf_map_lookup_elem(&percpu_hash_map_alloc, &key);
if (value)
bpf_map_delete_elem(&percpu_hash_map_alloc, &key);
return 0;
}
SEC("kprobe/sys_getpid")
int stress_lru_hmap_alloc(struct pt_regs *ctx)
{
u32 key = bpf_get_prandom_u32();
long val = 1;
bpf_map_update_elem(&lru_hash_map, &key, &val, BPF_ANY);
return 0;
}
SEC("kprobe/sys_getppid")
int stress_percpu_lru_hmap_alloc(struct pt_regs *ctx)
{
u32 key = bpf_get_prandom_u32();
long val = 1;
bpf_map_update_elem(&percpu_lru_hash_map, &key, &val, BPF_ANY);
return 0;
}
SEC("kprobe/sys_gettid")
int stress_lpm_trie_map_alloc(struct pt_regs *ctx)
{
union {
u32 b32[2];
u8 b8[8];
} key;
unsigned int i;
key.b32[0] = 32;
key.b8[4] = 192;
key.b8[5] = 168;
key.b8[6] = 0;
key.b8[7] = 1;
#pragma clang loop unroll(full)
for (i = 0; i < 32; ++i)
bpf_map_lookup_elem(&lpm_trie_map_alloc, &key);
return 0;
}
SEC("kprobe/sys_getpgid")
int stress_hash_map_lookup(struct pt_regs *ctx)
{
u32 key = 1, i;
long *value;
#pragma clang loop unroll(full)
for (i = 0; i < 64; ++i)
value = bpf_map_lookup_elem(&hash_map, &key);
return 0;
}
SEC("kprobe/sys_getpgrp")
int stress_array_map_lookup(struct pt_regs *ctx)
{
u32 key = 1, i;
long *value;
#pragma clang loop unroll(full)
for (i = 0; i < 64; ++i)
value = bpf_map_lookup_elem(&array_map, &key);
return 0;
}
char _license[] SEC("license") = "GPL";
u32 _version SEC("version") = LINUX_VERSION_CODE;