mirror of
https://github.com/AuxXxilium/linux_dsm_epyc7002.git
synced 2024-12-23 22:21:59 +07:00
0fb1170ee6
BPF offers another way to generate latency histograms. We attach kprobes at trace_preempt_off and trace_preempt_on and calculate the time it takes to from seeing the off/on transition. The first array is used to store the start time stamp. The key is the CPU id. The second array stores the log2(time diff). We need to use static allocation here (array and not hash tables). The kprobes hooking into trace_preempt_on|off should not calling any dynamic memory allocation or free path. We need to avoid recursivly getting called. Besides that, it reduces jitter in the measurement. CPU 0 latency : count distribution 1 -> 1 : 0 | | 2 -> 3 : 0 | | 4 -> 7 : 0 | | 8 -> 15 : 0 | | 16 -> 31 : 0 | | 32 -> 63 : 0 | | 64 -> 127 : 0 | | 128 -> 255 : 0 | | 256 -> 511 : 0 | | 512 -> 1023 : 0 | | 1024 -> 2047 : 0 | | 2048 -> 4095 : 166723 |*************************************** | 4096 -> 8191 : 19870 |*** | 8192 -> 16383 : 6324 | | 16384 -> 32767 : 1098 | | 32768 -> 65535 : 190 | | 65536 -> 131071 : 179 | | 131072 -> 262143 : 18 | | 262144 -> 524287 : 4 | | 524288 -> 1048575 : 1363 | | CPU 1 latency : count distribution 1 -> 1 : 0 | | 2 -> 3 : 0 | | 4 -> 7 : 0 | | 8 -> 15 : 0 | | 16 -> 31 : 0 | | 32 -> 63 : 0 | | 64 -> 127 : 0 | | 128 -> 255 : 0 | | 256 -> 511 : 0 | | 512 -> 1023 : 0 | | 1024 -> 2047 : 0 | | 2048 -> 4095 : 114042 |*************************************** | 4096 -> 8191 : 9587 |** | 8192 -> 16383 : 4140 | | 16384 -> 32767 : 673 | | 32768 -> 65535 : 179 | | 65536 -> 131071 : 29 | | 131072 -> 262143 : 4 | | 262144 -> 524287 : 1 | | 524288 -> 1048575 : 364 | | CPU 2 latency : count distribution 1 -> 1 : 0 | | 2 -> 3 : 0 | | 4 -> 7 : 0 | | 8 -> 15 : 0 | | 16 -> 31 : 0 | | 32 -> 63 : 0 | | 64 -> 127 : 0 | | 128 -> 255 : 0 | | 256 -> 511 : 0 | | 512 -> 1023 : 0 | | 1024 -> 2047 : 0 | | 2048 -> 4095 : 40147 |*************************************** | 4096 -> 8191 : 2300 |* | 8192 -> 16383 : 828 | | 16384 -> 32767 : 178 | | 32768 -> 65535 : 59 | | 65536 -> 131071 : 2 | | 131072 -> 262143 : 0 | | 262144 -> 524287 : 1 | | 524288 -> 1048575 : 174 | | CPU 3 latency : count distribution 1 -> 1 : 0 | | 2 -> 3 : 0 | | 4 -> 7 : 0 | | 8 -> 15 : 0 | | 16 -> 31 : 0 | | 32 -> 63 : 0 | | 64 -> 127 : 0 | | 128 -> 255 : 0 | | 256 -> 511 : 0 | | 512 -> 1023 : 0 | | 1024 -> 2047 : 0 | | 2048 -> 4095 : 29626 |*************************************** | 4096 -> 8191 : 2704 |** | 8192 -> 16383 : 1090 | | 16384 -> 32767 : 160 | | 32768 -> 65535 : 72 | | 65536 -> 131071 : 32 | | 131072 -> 262143 : 26 | | 262144 -> 524287 : 12 | | 524288 -> 1048575 : 298 | | All this is based on the trace3 examples written by Alexei Starovoitov <ast@plumgrid.com>. Signed-off-by: Daniel Wagner <daniel.wagner@bmw-carit.de> Cc: Alexei Starovoitov <ast@plumgrid.com> Cc: Alexei Starovoitov <ast@plumgrid.com> Cc: "David S. Miller" <davem@davemloft.net> Cc: Daniel Borkmann <daniel@iogearbox.net> Cc: Ingo Molnar <mingo@kernel.org> Cc: linux-kernel@vger.kernel.org Cc: netdev@vger.kernel.org Acked-by: Alexei Starovoitov <ast@plumgrid.com> Acked-by: Daniel Borkmann <daniel@iogearbox.net> Signed-off-by: David S. Miller <davem@davemloft.net>
104 lines
2.0 KiB
C
104 lines
2.0 KiB
C
/* Copyright (c) 2013-2015 PLUMgrid, http://plumgrid.com
|
|
* Copyright (c) 2015 BMW Car IT GmbH
|
|
*
|
|
* This program is free software; you can redistribute it and/or
|
|
* modify it under the terms of version 2 of the GNU General Public
|
|
* License as published by the Free Software Foundation.
|
|
*/
|
|
#include <stdio.h>
|
|
#include <unistd.h>
|
|
#include <stdlib.h>
|
|
#include <signal.h>
|
|
#include <linux/bpf.h>
|
|
#include "libbpf.h"
|
|
#include "bpf_load.h"
|
|
|
|
#define MAX_ENTRIES 20
|
|
#define MAX_CPU 4
|
|
#define MAX_STARS 40
|
|
|
|
struct cpu_hist {
|
|
long data[MAX_ENTRIES];
|
|
long max;
|
|
};
|
|
|
|
static struct cpu_hist cpu_hist[MAX_CPU];
|
|
|
|
static void stars(char *str, long val, long max, int width)
|
|
{
|
|
int i;
|
|
|
|
for (i = 0; i < (width * val / max) - 1 && i < width - 1; i++)
|
|
str[i] = '*';
|
|
if (val > max)
|
|
str[i - 1] = '+';
|
|
str[i] = '\0';
|
|
}
|
|
|
|
static void print_hist(void)
|
|
{
|
|
char starstr[MAX_STARS];
|
|
struct cpu_hist *hist;
|
|
int i, j;
|
|
|
|
/* clear screen */
|
|
printf("\033[2J");
|
|
|
|
for (j = 0; j < MAX_CPU; j++) {
|
|
hist = &cpu_hist[j];
|
|
|
|
/* ignore CPUs without data (maybe offline?) */
|
|
if (hist->max == 0)
|
|
continue;
|
|
|
|
printf("CPU %d\n", j);
|
|
printf(" latency : count distribution\n");
|
|
for (i = 1; i <= MAX_ENTRIES; i++) {
|
|
stars(starstr, hist->data[i - 1], hist->max, MAX_STARS);
|
|
printf("%8ld -> %-8ld : %-8ld |%-*s|\n",
|
|
(1l << i) >> 1, (1l << i) - 1,
|
|
hist->data[i - 1], MAX_STARS, starstr);
|
|
}
|
|
}
|
|
}
|
|
|
|
static void get_data(int fd)
|
|
{
|
|
long key, value;
|
|
int c, i;
|
|
|
|
for (i = 0; i < MAX_CPU; i++)
|
|
cpu_hist[i].max = 0;
|
|
|
|
for (c = 0; c < MAX_CPU; c++) {
|
|
for (i = 0; i < MAX_ENTRIES; i++) {
|
|
key = c * MAX_ENTRIES + i;
|
|
bpf_lookup_elem(fd, &key, &value);
|
|
|
|
cpu_hist[c].data[i] = value;
|
|
if (value > cpu_hist[c].max)
|
|
cpu_hist[c].max = value;
|
|
}
|
|
}
|
|
}
|
|
|
|
int main(int argc, char **argv)
|
|
{
|
|
char filename[256];
|
|
|
|
snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
|
|
|
|
if (load_bpf_file(filename)) {
|
|
printf("%s", bpf_log_buf);
|
|
return 1;
|
|
}
|
|
|
|
while (1) {
|
|
get_data(map_fd[1]);
|
|
print_hist();
|
|
sleep(5);
|
|
}
|
|
|
|
return 0;
|
|
}
|