linux_dsm_epyc7002/samples/bpf/xdp_monitor_user.c
Jesper Dangaard Brouer 417f1d9f21 samples/bpf: xdp_monitor include cpumap tracepoints in monitoring
The xdp_redirect_cpu sample have some "builtin" monitoring of the
tracepoints for xdp_cpumap_*, but it is practical to have an external
tool that can monitor these transpoint as an easy way to troubleshoot
an application using XDP + cpumap.

Specifically I need such external tool when working on Suricata and
XDP cpumap redirect. Extend the xdp_monitor tool sample with
monitoring of these xdp_cpumap_* tracepoints.  Model the output format
like xdp_redirect_cpu.

Given I needed to handle per CPU decoding for cpumap, this patch also
add per CPU info on the existing monitor events.  This resembles part
of the builtin monitoring output from sample xdp_rxq_info.  Thus, also
covering part of that sample in an external monitoring tool.

Performance wise, the cpumap tracepoints uses bulking, which cause
them to have very little overhead.  Thus, they are enabled by default.

Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2018-01-20 02:10:55 +01:00

649 lines
15 KiB
C

/* SPDX-License-Identifier: GPL-2.0
* Copyright(c) 2017 Jesper Dangaard Brouer, Red Hat, Inc.
*/
static const char *__doc__=
"XDP monitor tool, based on tracepoints\n"
;
static const char *__doc_err_only__=
" NOTICE: Only tracking XDP redirect errors\n"
" Enable TX success stats via '--stats'\n"
" (which comes with a per packet processing overhead)\n"
;
#include <errno.h>
#include <stdio.h>
#include <stdlib.h>
#include <stdbool.h>
#include <stdint.h>
#include <string.h>
#include <ctype.h>
#include <unistd.h>
#include <locale.h>
#include <sys/resource.h>
#include <getopt.h>
#include <net/if.h>
#include <time.h>
#include "libbpf.h"
#include "bpf_load.h"
#include "bpf_util.h"
static int verbose = 1;
static bool debug = false;
static const struct option long_options[] = {
{"help", no_argument, NULL, 'h' },
{"debug", no_argument, NULL, 'D' },
{"stats", no_argument, NULL, 'S' },
{"sec", required_argument, NULL, 's' },
{0, 0, NULL, 0 }
};
/* C standard specifies two constants, EXIT_SUCCESS(0) and EXIT_FAILURE(1) */
#define EXIT_FAIL_MEM 5
static void usage(char *argv[])
{
int i;
printf("\nDOCUMENTATION:\n%s\n", __doc__);
printf("\n");
printf(" Usage: %s (options-see-below)\n",
argv[0]);
printf(" Listing options:\n");
for (i = 0; long_options[i].name != 0; i++) {
printf(" --%-15s", long_options[i].name);
if (long_options[i].flag != NULL)
printf(" flag (internal value:%d)",
*long_options[i].flag);
else
printf("(internal short-option: -%c)",
long_options[i].val);
printf("\n");
}
printf("\n");
}
#define NANOSEC_PER_SEC 1000000000 /* 10^9 */
static __u64 gettime(void)
{
struct timespec t;
int res;
res = clock_gettime(CLOCK_MONOTONIC, &t);
if (res < 0) {
fprintf(stderr, "Error with gettimeofday! (%i)\n", res);
exit(EXIT_FAILURE);
}
return (__u64) t.tv_sec * NANOSEC_PER_SEC + t.tv_nsec;
}
enum {
REDIR_SUCCESS = 0,
REDIR_ERROR = 1,
};
#define REDIR_RES_MAX 2
static const char *redir_names[REDIR_RES_MAX] = {
[REDIR_SUCCESS] = "Success",
[REDIR_ERROR] = "Error",
};
static const char *err2str(int err)
{
if (err < REDIR_RES_MAX)
return redir_names[err];
return NULL;
}
/* enum xdp_action */
#define XDP_UNKNOWN XDP_REDIRECT + 1
#define XDP_ACTION_MAX (XDP_UNKNOWN + 1)
static const char *xdp_action_names[XDP_ACTION_MAX] = {
[XDP_ABORTED] = "XDP_ABORTED",
[XDP_DROP] = "XDP_DROP",
[XDP_PASS] = "XDP_PASS",
[XDP_TX] = "XDP_TX",
[XDP_REDIRECT] = "XDP_REDIRECT",
[XDP_UNKNOWN] = "XDP_UNKNOWN",
};
static const char *action2str(int action)
{
if (action < XDP_ACTION_MAX)
return xdp_action_names[action];
return NULL;
}
/* Common stats data record shared with _kern.c */
struct datarec {
__u64 processed;
__u64 dropped;
__u64 info;
};
#define MAX_CPUS 64
/* Userspace structs for collection of stats from maps */
struct record {
__u64 timestamp;
struct datarec total;
struct datarec *cpu;
};
struct u64rec {
__u64 processed;
};
struct record_u64 {
/* record for _kern side __u64 values */
__u64 timestamp;
struct u64rec total;
struct u64rec *cpu;
};
struct stats_record {
struct record_u64 xdp_redirect[REDIR_RES_MAX];
struct record_u64 xdp_exception[XDP_ACTION_MAX];
struct record xdp_cpumap_kthread;
struct record xdp_cpumap_enqueue[MAX_CPUS];
};
static bool map_collect_record(int fd, __u32 key, struct record *rec)
{
/* For percpu maps, userspace gets a value per possible CPU */
unsigned int nr_cpus = bpf_num_possible_cpus();
struct datarec values[nr_cpus];
__u64 sum_processed = 0;
__u64 sum_dropped = 0;
__u64 sum_info = 0;
int i;
if ((bpf_map_lookup_elem(fd, &key, values)) != 0) {
fprintf(stderr,
"ERR: bpf_map_lookup_elem failed key:0x%X\n", key);
return false;
}
/* Get time as close as possible to reading map contents */
rec->timestamp = gettime();
/* Record and sum values from each CPU */
for (i = 0; i < nr_cpus; i++) {
rec->cpu[i].processed = values[i].processed;
sum_processed += values[i].processed;
rec->cpu[i].dropped = values[i].dropped;
sum_dropped += values[i].dropped;
rec->cpu[i].info = values[i].info;
sum_info += values[i].info;
}
rec->total.processed = sum_processed;
rec->total.dropped = sum_dropped;
rec->total.info = sum_info;
return true;
}
static bool map_collect_record_u64(int fd, __u32 key, struct record_u64 *rec)
{
/* For percpu maps, userspace gets a value per possible CPU */
unsigned int nr_cpus = bpf_num_possible_cpus();
struct u64rec values[nr_cpus];
__u64 sum_total = 0;
int i;
if ((bpf_map_lookup_elem(fd, &key, values)) != 0) {
fprintf(stderr,
"ERR: bpf_map_lookup_elem failed key:0x%X\n", key);
return false;
}
/* Get time as close as possible to reading map contents */
rec->timestamp = gettime();
/* Record and sum values from each CPU */
for (i = 0; i < nr_cpus; i++) {
rec->cpu[i].processed = values[i].processed;
sum_total += values[i].processed;
}
rec->total.processed = sum_total;
return true;
}
static double calc_period(struct record *r, struct record *p)
{
double period_ = 0;
__u64 period = 0;
period = r->timestamp - p->timestamp;
if (period > 0)
period_ = ((double) period / NANOSEC_PER_SEC);
return period_;
}
static double calc_period_u64(struct record_u64 *r, struct record_u64 *p)
{
double period_ = 0;
__u64 period = 0;
period = r->timestamp - p->timestamp;
if (period > 0)
period_ = ((double) period / NANOSEC_PER_SEC);
return period_;
}
static double calc_pps(struct datarec *r, struct datarec *p, double period)
{
__u64 packets = 0;
double pps = 0;
if (period > 0) {
packets = r->processed - p->processed;
pps = packets / period;
}
return pps;
}
static double calc_pps_u64(struct u64rec *r, struct u64rec *p, double period)
{
__u64 packets = 0;
double pps = 0;
if (period > 0) {
packets = r->processed - p->processed;
pps = packets / period;
}
return pps;
}
static double calc_drop(struct datarec *r, struct datarec *p, double period)
{
__u64 packets = 0;
double pps = 0;
if (period > 0) {
packets = r->dropped - p->dropped;
pps = packets / period;
}
return pps;
}
static double calc_info(struct datarec *r, struct datarec *p, double period)
{
__u64 packets = 0;
double pps = 0;
if (period > 0) {
packets = r->info - p->info;
pps = packets / period;
}
return pps;
}
static void stats_print(struct stats_record *stats_rec,
struct stats_record *stats_prev,
bool err_only)
{
unsigned int nr_cpus = bpf_num_possible_cpus();
int rec_i = 0, i, to_cpu;
double t = 0, pps = 0;
/* Header */
printf("%-15s %-7s %-12s %-12s %-9s\n",
"XDP-event", "CPU:to", "pps", "drop-pps", "extra-info");
/* tracepoint: xdp:xdp_redirect_* */
if (err_only)
rec_i = REDIR_ERROR;
for (; rec_i < REDIR_RES_MAX; rec_i++) {
struct record_u64 *rec, *prev;
char *fmt1 = "%-15s %-7d %'-12.0f %'-12.0f %s\n";
char *fmt2 = "%-15s %-7s %'-12.0f %'-12.0f %s\n";
rec = &stats_rec->xdp_redirect[rec_i];
prev = &stats_prev->xdp_redirect[rec_i];
t = calc_period_u64(rec, prev);
for (i = 0; i < nr_cpus; i++) {
struct u64rec *r = &rec->cpu[i];
struct u64rec *p = &prev->cpu[i];
pps = calc_pps_u64(r, p, t);
if (pps > 0)
printf(fmt1, "XDP_REDIRECT", i,
rec_i ? 0.0: pps, rec_i ? pps : 0.0,
err2str(rec_i));
}
pps = calc_pps_u64(&rec->total, &prev->total, t);
printf(fmt2, "XDP_REDIRECT", "total",
rec_i ? 0.0: pps, rec_i ? pps : 0.0, err2str(rec_i));
}
/* tracepoint: xdp:xdp_exception */
for (rec_i = 0; rec_i < XDP_ACTION_MAX; rec_i++) {
struct record_u64 *rec, *prev;
char *fmt1 = "%-15s %-7d %'-12.0f %'-12.0f %s\n";
char *fmt2 = "%-15s %-7s %'-12.0f %'-12.0f %s\n";
rec = &stats_rec->xdp_exception[rec_i];
prev = &stats_prev->xdp_exception[rec_i];
t = calc_period_u64(rec, prev);
for (i = 0; i < nr_cpus; i++) {
struct u64rec *r = &rec->cpu[i];
struct u64rec *p = &prev->cpu[i];
pps = calc_pps_u64(r, p, t);
if (pps > 0)
printf(fmt1, "Exception", i,
0.0, pps, err2str(rec_i));
}
pps = calc_pps_u64(&rec->total, &prev->total, t);
if (pps > 0)
printf(fmt2, "Exception", "total",
0.0, pps, action2str(rec_i));
}
/* cpumap enqueue stats */
for (to_cpu = 0; to_cpu < MAX_CPUS; to_cpu++) {
char *fmt1 = "%-15s %3d:%-3d %'-12.0f %'-12.0f %'-10.2f %s\n";
char *fmt2 = "%-15s %3s:%-3d %'-12.0f %'-12.0f %'-10.2f %s\n";
struct record *rec, *prev;
char *info_str = "";
double drop, info;
rec = &stats_rec->xdp_cpumap_enqueue[to_cpu];
prev = &stats_prev->xdp_cpumap_enqueue[to_cpu];
t = calc_period(rec, prev);
for (i = 0; i < nr_cpus; i++) {
struct datarec *r = &rec->cpu[i];
struct datarec *p = &prev->cpu[i];
pps = calc_pps(r, p, t);
drop = calc_drop(r, p, t);
info = calc_info(r, p, t);
if (info > 0) {
info_str = "bulk-average";
info = pps / info; /* calc average bulk size */
}
if (pps > 0)
printf(fmt1, "cpumap-enqueue",
i, to_cpu, pps, drop, info, info_str);
}
pps = calc_pps(&rec->total, &prev->total, t);
if (pps > 0) {
drop = calc_drop(&rec->total, &prev->total, t);
info = calc_info(&rec->total, &prev->total, t);
if (info > 0) {
info_str = "bulk-average";
info = pps / info; /* calc average bulk size */
}
printf(fmt2, "cpumap-enqueue",
"sum", to_cpu, pps, drop, info, info_str);
}
}
/* cpumap kthread stats */
{
char *fmt1 = "%-15s %-7d %'-12.0f %'-12.0f %'-10.0f %s\n";
char *fmt2 = "%-15s %-7s %'-12.0f %'-12.0f %'-10.0f %s\n";
struct record *rec, *prev;
double drop, info;
char *i_str = "";
rec = &stats_rec->xdp_cpumap_kthread;
prev = &stats_prev->xdp_cpumap_kthread;
t = calc_period(rec, prev);
for (i = 0; i < nr_cpus; i++) {
struct datarec *r = &rec->cpu[i];
struct datarec *p = &prev->cpu[i];
pps = calc_pps(r, p, t);
drop = calc_drop(r, p, t);
info = calc_info(r, p, t);
if (info > 0)
i_str = "sched";
if (pps > 0)
printf(fmt1, "cpumap-kthread",
i, pps, drop, info, i_str);
}
pps = calc_pps(&rec->total, &prev->total, t);
drop = calc_drop(&rec->total, &prev->total, t);
info = calc_info(&rec->total, &prev->total, t);
if (info > 0)
i_str = "sched-sum";
printf(fmt2, "cpumap-kthread", "total", pps, drop, info, i_str);
}
printf("\n");
}
static bool stats_collect(struct stats_record *rec)
{
int fd;
int i;
/* TODO: Detect if someone unloaded the perf event_fd's, as
* this can happen by someone running perf-record -e
*/
fd = map_data[0].fd; /* map0: redirect_err_cnt */
for (i = 0; i < REDIR_RES_MAX; i++)
map_collect_record_u64(fd, i, &rec->xdp_redirect[i]);
fd = map_data[1].fd; /* map1: exception_cnt */
for (i = 0; i < XDP_ACTION_MAX; i++) {
map_collect_record_u64(fd, i, &rec->xdp_exception[i]);
}
fd = map_data[2].fd; /* map2: cpumap_enqueue_cnt */
for (i = 0; i < MAX_CPUS; i++)
map_collect_record(fd, i, &rec->xdp_cpumap_enqueue[i]);
fd = map_data[3].fd; /* map3: cpumap_kthread_cnt */
map_collect_record(fd, 0, &rec->xdp_cpumap_kthread);
return true;
}
static void *alloc_rec_per_cpu(int record_size)
{
unsigned int nr_cpus = bpf_num_possible_cpus();
void *array;
size_t size;
size = record_size * nr_cpus;
array = malloc(size);
memset(array, 0, size);
if (!array) {
fprintf(stderr, "Mem alloc error (nr_cpus:%u)\n", nr_cpus);
exit(EXIT_FAIL_MEM);
}
return array;
}
static struct stats_record *alloc_stats_record(void)
{
struct stats_record *rec;
int rec_sz;
int i;
/* Alloc main stats_record structure */
rec = malloc(sizeof(*rec));
memset(rec, 0, sizeof(*rec));
if (!rec) {
fprintf(stderr, "Mem alloc error\n");
exit(EXIT_FAIL_MEM);
}
/* Alloc stats stored per CPU for each record */
rec_sz = sizeof(struct u64rec);
for (i = 0; i < REDIR_RES_MAX; i++)
rec->xdp_redirect[i].cpu = alloc_rec_per_cpu(rec_sz);
for (i = 0; i < XDP_ACTION_MAX; i++)
rec->xdp_exception[i].cpu = alloc_rec_per_cpu(rec_sz);
rec_sz = sizeof(struct datarec);
rec->xdp_cpumap_kthread.cpu = alloc_rec_per_cpu(rec_sz);
for (i = 0; i < MAX_CPUS; i++)
rec->xdp_cpumap_enqueue[i].cpu = alloc_rec_per_cpu(rec_sz);
return rec;
}
static void free_stats_record(struct stats_record *r)
{
int i;
for (i = 0; i < REDIR_RES_MAX; i++)
free(r->xdp_redirect[i].cpu);
for (i = 0; i < XDP_ACTION_MAX; i++)
free(r->xdp_exception[i].cpu);
free(r->xdp_cpumap_kthread.cpu);
for (i = 0; i < MAX_CPUS; i++)
free(r->xdp_cpumap_enqueue[i].cpu);
free(r);
}
/* Pointer swap trick */
static inline void swap(struct stats_record **a, struct stats_record **b)
{
struct stats_record *tmp;
tmp = *a;
*a = *b;
*b = tmp;
}
static void stats_poll(int interval, bool err_only)
{
struct stats_record *rec, *prev;
rec = alloc_stats_record();
prev = alloc_stats_record();
stats_collect(rec);
if (err_only)
printf("\n%s\n", __doc_err_only__);
/* Trick to pretty printf with thousands separators use %' */
setlocale(LC_NUMERIC, "en_US");
/* Header */
if (verbose)
printf("\n%s", __doc__);
/* TODO Need more advanced stats on error types */
if (verbose) {
printf(" - Stats map0: %s\n", map_data[0].name);
printf(" - Stats map1: %s\n", map_data[1].name);
printf("\n");
}
fflush(stdout);
while (1) {
swap(&prev, &rec);
stats_collect(rec);
stats_print(rec, prev, err_only);
fflush(stdout);
sleep(interval);
}
free_stats_record(rec);
free_stats_record(prev);
}
static void print_bpf_prog_info(void)
{
int i;
/* Prog info */
printf("Loaded BPF prog have %d bpf program(s)\n", prog_cnt);
for (i = 0; i < prog_cnt; i++) {
printf(" - prog_fd[%d] = fd(%d)\n", i, prog_fd[i]);
}
/* Maps info */
printf("Loaded BPF prog have %d map(s)\n", map_data_count);
for (i = 0; i < map_data_count; i++) {
char *name = map_data[i].name;
int fd = map_data[i].fd;
printf(" - map_data[%d] = fd(%d) name:%s\n", i, fd, name);
}
/* Event info */
printf("Searching for (max:%d) event file descriptor(s)\n", prog_cnt);
for (i = 0; i < prog_cnt; i++) {
if (event_fd[i] != -1)
printf(" - event_fd[%d] = fd(%d)\n", i, event_fd[i]);
}
}
int main(int argc, char **argv)
{
struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
int longindex = 0, opt;
int ret = EXIT_SUCCESS;
char bpf_obj_file[256];
/* Default settings: */
bool errors_only = true;
int interval = 2;
snprintf(bpf_obj_file, sizeof(bpf_obj_file), "%s_kern.o", argv[0]);
/* Parse commands line args */
while ((opt = getopt_long(argc, argv, "h",
long_options, &longindex)) != -1) {
switch (opt) {
case 'D':
debug = true;
break;
case 'S':
errors_only = false;
break;
case 's':
interval = atoi(optarg);
break;
case 'h':
default:
usage(argv);
return EXIT_FAILURE;
}
}
if (setrlimit(RLIMIT_MEMLOCK, &r)) {
perror("setrlimit(RLIMIT_MEMLOCK)");
return EXIT_FAILURE;
}
if (load_bpf_file(bpf_obj_file)) {
printf("ERROR - bpf_log_buf: %s", bpf_log_buf);
return EXIT_FAILURE;
}
if (!prog_fd[0]) {
printf("ERROR - load_bpf_file: %s\n", strerror(errno));
return EXIT_FAILURE;
}
if (debug) {
print_bpf_prog_info();
}
/* Unload/stop tracepoint event by closing fd's */
if (errors_only) {
/* The prog_fd[i] and event_fd[i] depend on the
* order the functions was defined in _kern.c
*/
close(event_fd[2]); /* tracepoint/xdp/xdp_redirect */
close(prog_fd[2]); /* func: trace_xdp_redirect */
close(event_fd[3]); /* tracepoint/xdp/xdp_redirect_map */
close(prog_fd[3]); /* func: trace_xdp_redirect_map */
}
stats_poll(interval, errors_only);
return ret;
}