mirror of
https://github.com/AuxXxilium/linux_dsm_epyc7002.git
synced 2025-01-27 17:23:18 +07:00
perf/core improvements and fixes:
- Support direct --user-regs arguments in 'perf record', previously the only way to sample PERF_SAMPLE_REGS_USER was implicitly selecting it when recording callchains (Andi Kleen) - Support showing sampled user regs in 'perf script' (Andi Kleen) - Introduce the concept of weak groups in 'perf stat': try to set up a group, but if it's not schedulable fallback to not using a group. That gives us the best of both worlds: groups if they work, but still a usable fallback if they don't. E.g: (Andi Kleen) % perf stat -e '{branches,branch-misses,l1d.replacement,l2_lines_in.all,l2_rqsts.all_code_rd}:W' -a sleep 1 125,366,055 branches (80.02%) 9,208,402 branch-misses # 7.35% of all branches (80.01%) 24,560,249 l1d.replacement (80.00%) 43,174,971 l2_lines_in.all (80.05%) 31,891,457 l2_rqsts.all_code_rd (79.92%) - Support metrics in 'stat' and 'list'. A metric is a formula that uses multiple events to compute a higher level result (e.g. IPC). (Andi Kleen) - Add Intel processors vendor event metrics JSON files (Andi Kleen) - Add 'pid' and 'tid' options to 'perf sched timehist' (David Ahern) - Generate 'behavior' string table from kernel headers, helps getting new parameters when synchronizing kernel headers, like MADV_WIPEONFORK and MADV_KEEPONFORK, that are now beautied (Arnaldo Carvalho de Melo) - Improve TUI progress bar by showing how many bytes from a total were processed (Jiri Olsa) - Use scandir() to replace readdir(), prep work to have the synthesizing of PERF_RECORD_ entries for existing threads be multithreaded, making 'perf top' bearable on high core count systems such as Intel's Knights Landing/Mill (Kan Liang) - Allow creating a ~/.perfconfig file when setting a variable to its default value, previously it would bail out and not write such a file (Taeung Song) - Introduce wrapper for allowing purely single threaded apps to avoid the costs of locking (Arnaldo Carvalho de Melo) - Introduce hashtable to reduce the cost of thread lookup - Fix build C++ build wrt poison.h using void pointer arithmetic, affects only the embedded clang/llvm case, that is disabled by default (Arnaldo Carvalho de Melo) - Fix leaking rec_argv in error cases (Martin Kepplinger) - Remove Intel CQM perf test, that infrastructure was nuked (Xiaochen Shen) Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com> -----BEGIN PGP SIGNATURE----- iQIzBAABCAAdFiEELb9bqkb7Te0zijNb1lAW81NSqkAFAlnFH1MACgkQ1lAW81NS qkCPxQ/+LsWTQVdPoLjodivSxELn19zAkf8z6j1frLiFniHq2WxY+galhoLHAGlh F4j0g3P61+7Pspa0RlC1kSEqrk0yHmzCMbodZS+8I4K8qfCA3D1lXGUnJjmMBVkj kYIMxcvotvN0r5Bwzv4Bd8niZHKp4APQyQN6vXZZY3zGwJSNbV88L4qgQhTBvyLV hJ5PhfUkxVpSlJ2Muf0jbp97DhIH2owUFTO51ZV39t40eOeTmp/fJxq2tppbYrKm puWmfMM2KLm01gTcHTw9s5IrHqWq7FAB8lMIXxJN/HPQwR5cO8KJ9Ddo+BOaRbwY OelU6W4VgTX/Wx3oSBd6SSpicNuTyipASQKOSa711ck6EKhd5QnjvrHF4A781v20 zpLYMbk04vdOXRdjOAmnV73INAgC7+3C1L6gfIgT9uAfUpJQRQJu0wfTA4734Rh8 DcrIc6SkQX8s6E5lOW2mzla4yyQxlzm42tFGr1N0ASzgHu623IKkXP/UdRxNo2ep vFNH4DPqZr5hbQkNL2md7u8KL2i/4UQhG+1Uf0jfNYg6O5HcJToLZKc462G4LmVP ASOTyUAGyDFYseAUTLtcM+2W+iTCjFNN/LnHnsOXF8ESpyHJCEXcAOy8v04RMXrP 4z3xP8OrNubBL/WkTuMGRmanFe8ZrASFTddVH/XZXsSDoC13KCk= =oNFx -----END PGP SIGNATURE----- Merge tag 'perf-core-for-mingo-4.15-20170922' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/core Pull perf/core improvements and fixes from Arnaldo Carvalho de Melo: - Support direct --user-regs arguments in 'perf record', previously the only way to sample PERF_SAMPLE_REGS_USER was implicitly selecting it when recording callchains (Andi Kleen) - Support showing sampled user regs in 'perf script' (Andi Kleen) - Introduce the concept of weak groups in 'perf stat': try to set up a group, but if it's not schedulable fallback to not using a group. That gives us the best of both worlds: groups if they work, but still a usable fallback if they don't. E.g: (Andi Kleen) % perf stat -e '{branches,branch-misses,l1d.replacement,l2_lines_in.all,l2_rqsts.all_code_rd}:W' -a sleep 1 125,366,055 branches (80.02%) 9,208,402 branch-misses # 7.35% of all branches (80.01%) 24,560,249 l1d.replacement (80.00%) 43,174,971 l2_lines_in.all (80.05%) 31,891,457 l2_rqsts.all_code_rd (79.92%) - Support metrics in 'stat' and 'list'. A metric is a formula that uses multiple events to compute a higher level result (e.g. IPC). (Andi Kleen) - Add Intel processors vendor event metrics JSON files (Andi Kleen) - Add 'pid' and 'tid' options to 'perf sched timehist' (David Ahern) - Generate 'behavior' string table from kernel headers, helps getting new parameters when synchronizing kernel headers, like MADV_WIPEONFORK and MADV_KEEPONFORK, that are now beautied (Arnaldo Carvalho de Melo) - Improve TUI progress bar by showing how many bytes from a total were processed (Jiri Olsa) - Use scandir() to replace readdir(), prep work to have the synthesizing of PERF_RECORD_ entries for existing threads be multithreaded, making 'perf top' bearable on high core count systems such as Intel's Knights Landing/Mill (Kan Liang) - Allow creating a ~/.perfconfig file when setting a variable to its default value, previously it would bail out and not write such a file (Taeung Song) - Introduce wrapper for allowing purely single threaded apps to avoid the costs of locking (Arnaldo Carvalho de Melo) - Introduce hashtable to reduce the cost of thread lookup - Fix build C++ build wrt poison.h using void pointer arithmetic, affects only the embedded clang/llvm case, that is disabled by default (Arnaldo Carvalho de Melo) - Fix leaking rec_argv in error cases (Martin Kepplinger) - Remove Intel CQM perf test, that infrastructure was nuked (Xiaochen Shen) Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com> Signed-off-by: Ingo Molnar <mingo@kernel.org>
This commit is contained in:
commit
aa469aafdd
@ -14,6 +14,10 @@
|
||||
# define POISON_POINTER_DELTA 0
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
#define LIST_POISON1 NULL
|
||||
#define LIST_POISON2 NULL
|
||||
#else
|
||||
/*
|
||||
* These are non-NULL pointers that will result in page faults
|
||||
* under normal circumstances, used to verify that nobody uses
|
||||
@ -21,6 +25,7 @@
|
||||
*/
|
||||
#define LIST_POISON1 ((void *) 0x100 + POISON_POINTER_DELTA)
|
||||
#define LIST_POISON2 ((void *) 0x200 + POISON_POINTER_DELTA)
|
||||
#endif
|
||||
|
||||
/********** include/linux/timer.h **********/
|
||||
/*
|
||||
|
@ -58,20 +58,12 @@
|
||||
overrides the coredump filter bits */
|
||||
#define MADV_DODUMP 17 /* Clear the MADV_DONTDUMP flag */
|
||||
|
||||
#define MADV_WIPEONFORK 18 /* Zero memory on fork, child only */
|
||||
#define MADV_KEEPONFORK 19 /* Undo MADV_WIPEONFORK */
|
||||
|
||||
/* compatibility flags */
|
||||
#define MAP_FILE 0
|
||||
|
||||
/*
|
||||
* When MAP_HUGETLB is set bits [26:31] encode the log2 of the huge page size.
|
||||
* This gives us 6 bits, which is enough until someone invents 128 bit address
|
||||
* spaces.
|
||||
*
|
||||
* Assume these are all power of twos.
|
||||
* When 0 use the default page size.
|
||||
*/
|
||||
#define MAP_HUGE_SHIFT 26
|
||||
#define MAP_HUGE_MASK 0x3f
|
||||
|
||||
#define PKEY_DISABLE_ACCESS 0x1
|
||||
#define PKEY_DISABLE_WRITE 0x2
|
||||
#define PKEY_ACCESS_MASK (PKEY_DISABLE_ACCESS |\
|
||||
|
@ -8,7 +8,8 @@ perf-list - List all symbolic event types
|
||||
SYNOPSIS
|
||||
--------
|
||||
[verse]
|
||||
'perf list' [--no-desc] [--long-desc] [hw|sw|cache|tracepoint|pmu|sdt|event_glob]
|
||||
'perf list' [--no-desc] [--long-desc]
|
||||
[hw|sw|cache|tracepoint|pmu|sdt|metric|metricgroup|event_glob]
|
||||
|
||||
DESCRIPTION
|
||||
-----------
|
||||
@ -47,6 +48,8 @@ counted. The following modifiers exist:
|
||||
P - use maximum detected precise level
|
||||
S - read sample value (PERF_SAMPLE_READ)
|
||||
D - pin the event to the PMU
|
||||
W - group is weak and will fallback to non-group if not schedulable,
|
||||
only supported in 'perf stat' for now.
|
||||
|
||||
The 'p' modifier can be used for specifying how precise the instruction
|
||||
address should be. The 'p' modifier can be specified multiple times:
|
||||
@ -246,6 +249,10 @@ To limit the list use:
|
||||
|
||||
. 'sdt' to list all Statically Defined Tracepoint events.
|
||||
|
||||
. 'metric' to list metrics
|
||||
|
||||
. 'metricgroup' to list metricgroups with metrics.
|
||||
|
||||
. If none of the above is matched, it will apply the supplied glob to all
|
||||
events, printing the ones that match.
|
||||
|
||||
|
@ -377,6 +377,8 @@ symbolic names, e.g. on x86, ax, si. To list the available registers use
|
||||
--intr-regs=\?. To name registers, pass a comma separated list such as
|
||||
--intr-regs=ax,bx. The list of register is architecture dependent.
|
||||
|
||||
--user-regs::
|
||||
Capture user registers at sample time. Same arguments as -I.
|
||||
|
||||
--running-time::
|
||||
Record running and enabled time for read events (:S)
|
||||
|
@ -106,6 +106,14 @@ OPTIONS for 'perf sched timehist'
|
||||
--max-stack::
|
||||
Maximum number of functions to display in backtrace, default 5.
|
||||
|
||||
-p=::
|
||||
--pid=::
|
||||
Only show events for given process ID (comma separated list).
|
||||
|
||||
-t=::
|
||||
--tid=::
|
||||
Only show events for given thread ID (comma separated list).
|
||||
|
||||
-s::
|
||||
--summary::
|
||||
Show only a summary of scheduling by thread with min, max, and average
|
||||
|
@ -116,8 +116,8 @@ OPTIONS
|
||||
--fields::
|
||||
Comma separated list of fields to print. Options are:
|
||||
comm, tid, pid, time, cpu, event, trace, ip, sym, dso, addr, symoff,
|
||||
srcline, period, iregs, brstack, brstacksym, flags, bpf-output, brstackinsn, brstackoff,
|
||||
callindent, insn, insnlen, synth, phys_addr.
|
||||
srcline, period, iregs, uregs, brstack, brstacksym, flags, bpf-output, brstackinsn,
|
||||
brstackoff, callindent, insn, insnlen, synth, phys_addr.
|
||||
Field list can be prepended with the type, trace, sw or hw,
|
||||
to indicate to which event type the field list applies.
|
||||
e.g., -F sw:comm,tid,time,ip,sym and -F trace:time,cpu,trace
|
||||
|
@ -199,6 +199,13 @@ Aggregate counts per processor socket for system-wide mode measurements.
|
||||
--per-core::
|
||||
Aggregate counts per physical processor for system-wide mode measurements.
|
||||
|
||||
-M::
|
||||
--metrics::
|
||||
Print metrics or metricgroups specified in a comma separated list.
|
||||
For a group all metrics from the group are added.
|
||||
The events from the metrics are automatically measured.
|
||||
See perf list output for the possble metrics and metricgroups.
|
||||
|
||||
-A::
|
||||
--no-aggr::
|
||||
Do not aggregate counts across all monitored CPUs.
|
||||
|
@ -1,34 +1,8 @@
|
||||
tools/perf
|
||||
tools/arch/alpha/include/asm/barrier.h
|
||||
tools/arch/arm/include/asm/barrier.h
|
||||
tools/arch/arm64/include/asm/barrier.h
|
||||
tools/arch/ia64/include/asm/barrier.h
|
||||
tools/arch/mips/include/asm/barrier.h
|
||||
tools/arch/powerpc/include/asm/barrier.h
|
||||
tools/arch/s390/include/asm/barrier.h
|
||||
tools/arch/sh/include/asm/barrier.h
|
||||
tools/arch/sparc/include/asm/barrier.h
|
||||
tools/arch/sparc/include/asm/barrier_32.h
|
||||
tools/arch/sparc/include/asm/barrier_64.h
|
||||
tools/arch/tile/include/asm/barrier.h
|
||||
tools/arch/x86/include/asm/barrier.h
|
||||
tools/arch/x86/include/asm/cmpxchg.h
|
||||
tools/arch/x86/include/asm/cpufeatures.h
|
||||
tools/arch/x86/include/asm/disabled-features.h
|
||||
tools/arch/x86/include/asm/required-features.h
|
||||
tools/arch/x86/include/uapi/asm/svm.h
|
||||
tools/arch/x86/include/uapi/asm/vmx.h
|
||||
tools/arch/x86/include/uapi/asm/kvm.h
|
||||
tools/arch/x86/include/uapi/asm/kvm_perf.h
|
||||
tools/arch/x86/lib/memcpy_64.S
|
||||
tools/arch/x86/lib/memset_64.S
|
||||
tools/arch/s390/include/uapi/asm/kvm_perf.h
|
||||
tools/arch/s390/include/uapi/asm/sie.h
|
||||
tools/arch/xtensa/include/asm/barrier.h
|
||||
tools/arch
|
||||
tools/scripts
|
||||
tools/build
|
||||
tools/arch/x86/include/asm/atomic.h
|
||||
tools/arch/x86/include/asm/rmwcc.h
|
||||
tools/include
|
||||
tools/lib/traceevent
|
||||
tools/lib/api
|
||||
tools/lib/bpf
|
||||
@ -42,60 +16,3 @@ tools/lib/find_bit.c
|
||||
tools/lib/bitmap.c
|
||||
tools/lib/str_error_r.c
|
||||
tools/lib/vsprintf.c
|
||||
tools/include/asm/alternative-asm.h
|
||||
tools/include/asm/atomic.h
|
||||
tools/include/asm/barrier.h
|
||||
tools/include/asm/bug.h
|
||||
tools/include/asm-generic/atomic-gcc.h
|
||||
tools/include/asm-generic/barrier.h
|
||||
tools/include/asm-generic/bitops/arch_hweight.h
|
||||
tools/include/asm-generic/bitops/atomic.h
|
||||
tools/include/asm-generic/bitops/const_hweight.h
|
||||
tools/include/asm-generic/bitops/__ffs.h
|
||||
tools/include/asm-generic/bitops/__ffz.h
|
||||
tools/include/asm-generic/bitops/__fls.h
|
||||
tools/include/asm-generic/bitops/find.h
|
||||
tools/include/asm-generic/bitops/fls64.h
|
||||
tools/include/asm-generic/bitops/fls.h
|
||||
tools/include/asm-generic/bitops/hweight.h
|
||||
tools/include/asm-generic/bitops.h
|
||||
tools/include/linux/atomic.h
|
||||
tools/include/linux/bitops.h
|
||||
tools/include/linux/compiler.h
|
||||
tools/include/linux/compiler-gcc.h
|
||||
tools/include/linux/coresight-pmu.h
|
||||
tools/include/linux/bug.h
|
||||
tools/include/linux/filter.h
|
||||
tools/include/linux/hash.h
|
||||
tools/include/linux/kernel.h
|
||||
tools/include/linux/list.h
|
||||
tools/include/linux/log2.h
|
||||
tools/include/uapi/asm-generic/fcntl.h
|
||||
tools/include/uapi/asm-generic/ioctls.h
|
||||
tools/include/uapi/asm-generic/mman-common.h
|
||||
tools/include/uapi/asm-generic/mman.h
|
||||
tools/include/uapi/drm/drm.h
|
||||
tools/include/uapi/drm/i915_drm.h
|
||||
tools/include/uapi/linux/bpf.h
|
||||
tools/include/uapi/linux/bpf_common.h
|
||||
tools/include/uapi/linux/fcntl.h
|
||||
tools/include/uapi/linux/hw_breakpoint.h
|
||||
tools/include/uapi/linux/kvm.h
|
||||
tools/include/uapi/linux/mman.h
|
||||
tools/include/uapi/linux/perf_event.h
|
||||
tools/include/uapi/linux/sched.h
|
||||
tools/include/uapi/linux/stat.h
|
||||
tools/include/uapi/linux/vhost.h
|
||||
tools/include/uapi/sound/asound.h
|
||||
tools/include/linux/poison.h
|
||||
tools/include/linux/rbtree.h
|
||||
tools/include/linux/rbtree_augmented.h
|
||||
tools/include/linux/refcount.h
|
||||
tools/include/linux/string.h
|
||||
tools/include/linux/stringify.h
|
||||
tools/include/linux/types.h
|
||||
tools/include/linux/err.h
|
||||
tools/include/linux/bitmap.h
|
||||
tools/include/linux/time64.h
|
||||
tools/arch/*/include/uapi/asm/mman.h
|
||||
tools/arch/*/include/uapi/asm/perf_regs.h
|
||||
|
@ -173,7 +173,7 @@ AWK = awk
|
||||
# non-config cases
|
||||
config := 1
|
||||
|
||||
NON_CONFIG_TARGETS := clean TAGS tags cscope help install-doc install-man install-html install-info install-pdf doc man html info pdf
|
||||
NON_CONFIG_TARGETS := clean python-clean TAGS tags cscope help install-doc install-man install-html install-info install-pdf doc man html info pdf
|
||||
|
||||
ifdef MAKECMDGOALS
|
||||
ifeq ($(filter-out $(NON_CONFIG_TARGETS),$(MAKECMDGOALS)),)
|
||||
@ -441,6 +441,13 @@ perf_ioctl_tbl := $(srctree)/tools/perf/trace/beauty/perf_ioctl.sh
|
||||
$(perf_ioctl_array): $(perf_hdr_dir)/perf_event.h $(perf_ioctl_tbl)
|
||||
$(Q)$(SHELL) '$(perf_ioctl_tbl)' $(perf_hdr_dir) > $@
|
||||
|
||||
madvise_behavior_array := $(beauty_outdir)/madvise_behavior_array.c
|
||||
madvise_hdr_dir := $(srctree)/tools/include/uapi/asm-generic/
|
||||
madvise_behavior_tbl := $(srctree)/tools/perf/trace/beauty/madvise_behavior.sh
|
||||
|
||||
$(madvise_behavior_array): $(madvise_hdr_dir)/mman-common.h $(madvise_behavior_tbl)
|
||||
$(Q)$(SHELL) '$(madvise_behavior_tbl)' $(madvise_hdr_dir) > $@
|
||||
|
||||
all: shell_compatibility_test $(ALL_PROGRAMS) $(LANG_BINDINGS) $(OTHER_PROGRAMS)
|
||||
|
||||
$(OUTPUT)python/perf.so: $(PYTHON_EXT_SRCS) $(PYTHON_EXT_DEPS) $(LIBTRACEEVENT_DYNAMIC_LIST)
|
||||
@ -541,6 +548,7 @@ prepare: $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)common-cmds.h archheaders $(drm_ioc
|
||||
$(sndrv_ctl_ioctl_array) \
|
||||
$(kvm_ioctl_array) \
|
||||
$(vhost_virtio_ioctl_array) \
|
||||
$(madvise_behavior_array) \
|
||||
$(perf_ioctl_array)
|
||||
|
||||
$(OUTPUT)%.o: %.c prepare FORCE
|
||||
@ -802,7 +810,10 @@ config-clean:
|
||||
$(call QUIET_CLEAN, config)
|
||||
$(Q)$(MAKE) -C $(srctree)/tools/build/feature/ $(if $(OUTPUT),OUTPUT=$(OUTPUT)feature/,) clean >/dev/null
|
||||
|
||||
clean:: $(LIBTRACEEVENT)-clean $(LIBAPI)-clean $(LIBBPF)-clean $(LIBSUBCMD)-clean config-clean fixdep-clean
|
||||
python-clean:
|
||||
$(python-clean)
|
||||
|
||||
clean:: $(LIBTRACEEVENT)-clean $(LIBAPI)-clean $(LIBBPF)-clean $(LIBSUBCMD)-clean config-clean fixdep-clean python-clean
|
||||
$(call QUIET_CLEAN, core-objs) $(RM) $(LIB_FILE) $(OUTPUT)perf-archive $(OUTPUT)perf-with-kcore $(LANG_BINDINGS)
|
||||
$(Q)find $(if $(OUTPUT),$(OUTPUT),.) -name '*.o' -delete -o -name '\.*.cmd' -delete -o -name '\.*.d' -delete
|
||||
$(Q)$(RM) $(OUTPUT).config-detected
|
||||
@ -811,6 +822,7 @@ clean:: $(LIBTRACEEVENT)-clean $(LIBAPI)-clean $(LIBBPF)-clean $(LIBSUBCMD)-clea
|
||||
$(OUTPUT)util/intel-pt-decoder/inat-tables.c \
|
||||
$(OUTPUT)tests/llvm-src-{base,kbuild,prologue,relocation}.c \
|
||||
$(OUTPUT)pmu-events/pmu-events.c \
|
||||
$(OUTPUT)$(madvise_behavior_array) \
|
||||
$(OUTPUT)$(drm_ioctl_array) \
|
||||
$(OUTPUT)$(pkey_alloc_access_rights_array) \
|
||||
$(OUTPUT)$(sndrv_ctl_ioctl_array) \
|
||||
@ -819,7 +831,6 @@ clean:: $(LIBTRACEEVENT)-clean $(LIBAPI)-clean $(LIBBPF)-clean $(LIBSUBCMD)-clea
|
||||
$(OUTPUT)$(vhost_virtio_ioctl_array) \
|
||||
$(OUTPUT)$(perf_ioctl_array)
|
||||
$(QUIET_SUBDIR0)Documentation $(QUIET_SUBDIR1) clean
|
||||
$(python-clean)
|
||||
|
||||
#
|
||||
# To provide FEATURE-DUMP into $(FEATURE_DUMP_COPY)
|
||||
|
@ -8,7 +8,6 @@ struct test;
|
||||
int test__rdpmc(struct test *test __maybe_unused, int subtest);
|
||||
int test__perf_time_to_tsc(struct test *test __maybe_unused, int subtest);
|
||||
int test__insn_x86(struct test *test __maybe_unused, int subtest);
|
||||
int test__intel_cqm_count_nmi_context(struct test *test __maybe_unused, int subtest);
|
||||
|
||||
#ifdef HAVE_DWARF_UNWIND_SUPPORT
|
||||
struct thread;
|
||||
|
@ -5,4 +5,3 @@ libperf-y += arch-tests.o
|
||||
libperf-y += rdpmc.o
|
||||
libperf-y += perf-time-to-tsc.o
|
||||
libperf-$(CONFIG_AUXTRACE) += insn-x86.o
|
||||
libperf-y += intel-cqm.o
|
||||
|
@ -23,10 +23,6 @@ struct test arch_tests[] = {
|
||||
.func = test__insn_x86,
|
||||
},
|
||||
#endif
|
||||
{
|
||||
.desc = "Intel cqm nmi context read",
|
||||
.func = test__intel_cqm_count_nmi_context,
|
||||
},
|
||||
{
|
||||
.func = NULL,
|
||||
},
|
||||
|
@ -1,127 +0,0 @@
|
||||
#include "tests/tests.h"
|
||||
#include "perf.h"
|
||||
#include "cloexec.h"
|
||||
#include "debug.h"
|
||||
#include "evlist.h"
|
||||
#include "evsel.h"
|
||||
#include "arch-tests.h"
|
||||
|
||||
#include <signal.h>
|
||||
#include <sys/mman.h>
|
||||
#include <sys/wait.h>
|
||||
#include <errno.h>
|
||||
#include <string.h>
|
||||
|
||||
static pid_t spawn(void)
|
||||
{
|
||||
pid_t pid;
|
||||
|
||||
pid = fork();
|
||||
if (pid)
|
||||
return pid;
|
||||
|
||||
while(1)
|
||||
sleep(5);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Create an event group that contains both a sampled hardware
|
||||
* (cpu-cycles) and software (intel_cqm/llc_occupancy/) event. We then
|
||||
* wait for the hardware perf counter to overflow and generate a PMI,
|
||||
* which triggers an event read for both of the events in the group.
|
||||
*
|
||||
* Since reading Intel CQM event counters requires sending SMP IPIs, the
|
||||
* CQM pmu needs to handle the above situation gracefully, and return
|
||||
* the last read counter value to avoid triggering a WARN_ON_ONCE() in
|
||||
* smp_call_function_many() caused by sending IPIs from NMI context.
|
||||
*/
|
||||
int test__intel_cqm_count_nmi_context(struct test *test __maybe_unused, int subtest __maybe_unused)
|
||||
{
|
||||
struct perf_evlist *evlist = NULL;
|
||||
struct perf_evsel *evsel = NULL;
|
||||
struct perf_event_attr pe;
|
||||
int i, fd[2], flag, ret;
|
||||
size_t mmap_len;
|
||||
void *event;
|
||||
pid_t pid;
|
||||
int err = TEST_FAIL;
|
||||
|
||||
flag = perf_event_open_cloexec_flag();
|
||||
|
||||
evlist = perf_evlist__new();
|
||||
if (!evlist) {
|
||||
pr_debug("perf_evlist__new failed\n");
|
||||
return TEST_FAIL;
|
||||
}
|
||||
|
||||
ret = parse_events(evlist, "intel_cqm/llc_occupancy/", NULL);
|
||||
if (ret) {
|
||||
pr_debug("parse_events failed, is \"intel_cqm/llc_occupancy/\" available?\n");
|
||||
err = TEST_SKIP;
|
||||
goto out;
|
||||
}
|
||||
|
||||
evsel = perf_evlist__first(evlist);
|
||||
if (!evsel) {
|
||||
pr_debug("perf_evlist__first failed\n");
|
||||
goto out;
|
||||
}
|
||||
|
||||
memset(&pe, 0, sizeof(pe));
|
||||
pe.size = sizeof(pe);
|
||||
|
||||
pe.type = PERF_TYPE_HARDWARE;
|
||||
pe.config = PERF_COUNT_HW_CPU_CYCLES;
|
||||
pe.read_format = PERF_FORMAT_GROUP;
|
||||
|
||||
pe.sample_period = 128;
|
||||
pe.sample_type = PERF_SAMPLE_IP | PERF_SAMPLE_READ;
|
||||
|
||||
pid = spawn();
|
||||
|
||||
fd[0] = sys_perf_event_open(&pe, pid, -1, -1, flag);
|
||||
if (fd[0] < 0) {
|
||||
pr_debug("failed to open event\n");
|
||||
goto out;
|
||||
}
|
||||
|
||||
memset(&pe, 0, sizeof(pe));
|
||||
pe.size = sizeof(pe);
|
||||
|
||||
pe.type = evsel->attr.type;
|
||||
pe.config = evsel->attr.config;
|
||||
|
||||
fd[1] = sys_perf_event_open(&pe, pid, -1, fd[0], flag);
|
||||
if (fd[1] < 0) {
|
||||
pr_debug("failed to open event\n");
|
||||
goto out;
|
||||
}
|
||||
|
||||
/*
|
||||
* Pick a power-of-two number of pages + 1 for the meta-data
|
||||
* page (struct perf_event_mmap_page). See tools/perf/design.txt.
|
||||
*/
|
||||
mmap_len = page_size * 65;
|
||||
|
||||
event = mmap(NULL, mmap_len, PROT_READ, MAP_SHARED, fd[0], 0);
|
||||
if (event == (void *)(-1)) {
|
||||
pr_debug("failed to mmap %d\n", errno);
|
||||
goto out;
|
||||
}
|
||||
|
||||
sleep(1);
|
||||
|
||||
err = TEST_OK;
|
||||
|
||||
munmap(event, mmap_len);
|
||||
|
||||
for (i = 0; i < 2; i++)
|
||||
close(fd[i]);
|
||||
|
||||
kill(pid, SIGKILL);
|
||||
wait(NULL);
|
||||
out:
|
||||
perf_evlist__delete(evlist);
|
||||
return err;
|
||||
}
|
@ -2732,6 +2732,7 @@ static int perf_c2c__record(int argc, const char **argv)
|
||||
if (!perf_mem_events[j].supported) {
|
||||
pr_err("failed: event '%s' not supported\n",
|
||||
perf_mem_events[j].name);
|
||||
free(rec_argv);
|
||||
return -1;
|
||||
}
|
||||
|
||||
|
@ -34,8 +34,7 @@ static struct option config_options[] = {
|
||||
OPT_END()
|
||||
};
|
||||
|
||||
static int set_config(struct perf_config_set *set, const char *file_name,
|
||||
const char *var, const char *value)
|
||||
static int set_config(struct perf_config_set *set, const char *file_name)
|
||||
{
|
||||
struct perf_config_section *section = NULL;
|
||||
struct perf_config_item *item = NULL;
|
||||
@ -49,7 +48,6 @@ static int set_config(struct perf_config_set *set, const char *file_name,
|
||||
if (!fp)
|
||||
return -1;
|
||||
|
||||
perf_config_set__collect(set, file_name, var, value);
|
||||
fprintf(fp, "%s\n", first_line);
|
||||
|
||||
/* overwrite configvariables */
|
||||
@ -161,6 +159,7 @@ int cmd_config(int argc, const char **argv)
|
||||
struct perf_config_set *set;
|
||||
char *user_config = mkpath("%s/.perfconfig", getenv("HOME"));
|
||||
const char *config_filename;
|
||||
bool changed = false;
|
||||
|
||||
argc = parse_options(argc, argv, config_options, config_usage,
|
||||
PARSE_OPT_STOP_AT_NON_OPTION);
|
||||
@ -231,15 +230,26 @@ int cmd_config(int argc, const char **argv)
|
||||
goto out_err;
|
||||
}
|
||||
} else {
|
||||
if (set_config(set, config_filename, var, value) < 0) {
|
||||
pr_err("Failed to set '%s=%s' on %s\n",
|
||||
var, value, config_filename);
|
||||
if (perf_config_set__collect(set, config_filename,
|
||||
var, value) < 0) {
|
||||
pr_err("Failed to add '%s=%s'\n",
|
||||
var, value);
|
||||
free(arg);
|
||||
goto out_err;
|
||||
}
|
||||
changed = true;
|
||||
}
|
||||
free(arg);
|
||||
}
|
||||
|
||||
if (!changed)
|
||||
break;
|
||||
|
||||
if (set_config(set, config_filename) < 0) {
|
||||
pr_err("Failed to set the configs on %s\n",
|
||||
config_filename);
|
||||
goto out_err;
|
||||
}
|
||||
}
|
||||
|
||||
ret = 0;
|
||||
|
@ -34,7 +34,6 @@
|
||||
#include <termios.h>
|
||||
#include <semaphore.h>
|
||||
#include <signal.h>
|
||||
#include <pthread.h>
|
||||
#include <math.h>
|
||||
|
||||
static const char *get_filename_for_perf_kvm(void)
|
||||
|
@ -15,6 +15,7 @@
|
||||
#include "util/cache.h"
|
||||
#include "util/pmu.h"
|
||||
#include "util/debug.h"
|
||||
#include "util/metricgroup.h"
|
||||
#include <subcmd/parse-options.h>
|
||||
|
||||
static bool desc_flag = true;
|
||||
@ -79,6 +80,10 @@ int cmd_list(int argc, const char **argv)
|
||||
long_desc_flag, details_flag);
|
||||
else if (strcmp(argv[i], "sdt") == 0)
|
||||
print_sdt_events(NULL, NULL, raw_dump);
|
||||
else if (strcmp(argv[i], "metric") == 0)
|
||||
metricgroup__print(true, false, NULL, raw_dump);
|
||||
else if (strcmp(argv[i], "metricgroup") == 0)
|
||||
metricgroup__print(false, true, NULL, raw_dump);
|
||||
else if ((sep = strchr(argv[i], ':')) != NULL) {
|
||||
int sep_idx;
|
||||
|
||||
@ -96,6 +101,7 @@ int cmd_list(int argc, const char **argv)
|
||||
s[sep_idx] = '\0';
|
||||
print_tracepoint_events(s, s + sep_idx + 1, raw_dump);
|
||||
print_sdt_events(s, s + sep_idx + 1, raw_dump);
|
||||
metricgroup__print(true, true, s, raw_dump);
|
||||
free(s);
|
||||
} else {
|
||||
if (asprintf(&s, "*%s*", argv[i]) < 0) {
|
||||
@ -112,6 +118,7 @@ int cmd_list(int argc, const char **argv)
|
||||
details_flag);
|
||||
print_tracepoint_events(NULL, s, raw_dump);
|
||||
print_sdt_events(NULL, s, raw_dump);
|
||||
metricgroup__print(true, true, NULL, raw_dump);
|
||||
free(s);
|
||||
}
|
||||
}
|
||||
|
@ -112,6 +112,7 @@ static int __cmd_record(int argc, const char **argv, struct perf_mem *mem)
|
||||
if (!perf_mem_events[j].supported) {
|
||||
pr_err("failed: event '%s' not supported\n",
|
||||
perf_mem_events__name(j));
|
||||
free(rec_argv);
|
||||
return -1;
|
||||
}
|
||||
|
||||
|
@ -1643,6 +1643,9 @@ static struct option __record_options[] = {
|
||||
OPT_CALLBACK_OPTARG('I', "intr-regs", &record.opts.sample_intr_regs, NULL, "any register",
|
||||
"sample selected machine registers on interrupt,"
|
||||
" use -I ? to list register names", parse_regs),
|
||||
OPT_CALLBACK_OPTARG(0, "user-regs", &record.opts.sample_user_regs, NULL, "any register",
|
||||
"sample selected machine registers on interrupt,"
|
||||
" use -I ? to list register names", parse_regs),
|
||||
OPT_BOOLEAN(0, "running-time", &record.opts.running_time,
|
||||
"Record running/enabled time of read (:S) events"),
|
||||
OPT_CALLBACK('k', "clockid", &record.opts,
|
||||
|
@ -3363,6 +3363,10 @@ int cmd_sched(int argc, const char **argv)
|
||||
OPT_STRING(0, "time", &sched.time_str, "str",
|
||||
"Time span for analysis (start,stop)"),
|
||||
OPT_BOOLEAN(0, "state", &sched.show_state, "Show task state when sched-out"),
|
||||
OPT_STRING('p', "pid", &symbol_conf.pid_list_str, "pid[,pid...]",
|
||||
"analyze events only for given process id(s)"),
|
||||
OPT_STRING('t', "tid", &symbol_conf.tid_list_str, "tid[,tid...]",
|
||||
"analyze events only for given thread id(s)"),
|
||||
OPT_PARENT(sched_options)
|
||||
};
|
||||
|
||||
|
@ -88,6 +88,7 @@ enum perf_output_field {
|
||||
PERF_OUTPUT_BRSTACKOFF = 1U << 24,
|
||||
PERF_OUTPUT_SYNTH = 1U << 25,
|
||||
PERF_OUTPUT_PHYS_ADDR = 1U << 26,
|
||||
PERF_OUTPUT_UREGS = 1U << 27,
|
||||
};
|
||||
|
||||
struct output_option {
|
||||
@ -109,6 +110,7 @@ struct output_option {
|
||||
{.str = "srcline", .field = PERF_OUTPUT_SRCLINE},
|
||||
{.str = "period", .field = PERF_OUTPUT_PERIOD},
|
||||
{.str = "iregs", .field = PERF_OUTPUT_IREGS},
|
||||
{.str = "uregs", .field = PERF_OUTPUT_UREGS},
|
||||
{.str = "brstack", .field = PERF_OUTPUT_BRSTACK},
|
||||
{.str = "brstacksym", .field = PERF_OUTPUT_BRSTACKSYM},
|
||||
{.str = "data_src", .field = PERF_OUTPUT_DATA_SRC},
|
||||
@ -385,6 +387,11 @@ static int perf_evsel__check_attr(struct perf_evsel *evsel,
|
||||
PERF_OUTPUT_IREGS))
|
||||
return -EINVAL;
|
||||
|
||||
if (PRINT_FIELD(UREGS) &&
|
||||
perf_evsel__check_stype(evsel, PERF_SAMPLE_REGS_USER, "UREGS",
|
||||
PERF_OUTPUT_UREGS))
|
||||
return -EINVAL;
|
||||
|
||||
if (PRINT_FIELD(PHYS_ADDR) &&
|
||||
perf_evsel__check_stype(evsel, PERF_SAMPLE_PHYS_ADDR, "PHYS_ADDR",
|
||||
PERF_OUTPUT_PHYS_ADDR))
|
||||
@ -509,6 +516,24 @@ static void print_sample_iregs(struct perf_sample *sample,
|
||||
}
|
||||
}
|
||||
|
||||
static void print_sample_uregs(struct perf_sample *sample,
|
||||
struct perf_event_attr *attr)
|
||||
{
|
||||
struct regs_dump *regs = &sample->user_regs;
|
||||
uint64_t mask = attr->sample_regs_user;
|
||||
unsigned i = 0, r;
|
||||
|
||||
if (!regs || !regs->regs)
|
||||
return;
|
||||
|
||||
printf(" ABI:%" PRIu64 " ", regs->abi);
|
||||
|
||||
for_each_set_bit(r, (unsigned long *) &mask, sizeof(mask) * 8) {
|
||||
u64 val = regs->regs[i++];
|
||||
printf("%5s:0x%"PRIx64" ", perf_reg_name(r), val);
|
||||
}
|
||||
}
|
||||
|
||||
static void print_sample_start(struct perf_sample *sample,
|
||||
struct thread *thread,
|
||||
struct perf_evsel *evsel)
|
||||
@ -1444,6 +1469,9 @@ static void process_event(struct perf_script *script,
|
||||
if (PRINT_FIELD(IREGS))
|
||||
print_sample_iregs(sample, attr);
|
||||
|
||||
if (PRINT_FIELD(UREGS))
|
||||
print_sample_uregs(sample, attr);
|
||||
|
||||
if (PRINT_FIELD(BRSTACK))
|
||||
print_sample_brstack(sample, thread, attr);
|
||||
else if (PRINT_FIELD(BRSTACKSYM))
|
||||
@ -2739,7 +2767,7 @@ int cmd_script(int argc, const char **argv)
|
||||
"+field to add and -field to remove."
|
||||
"Valid types: hw,sw,trace,raw,synth. "
|
||||
"Fields: comm,tid,pid,time,cpu,event,trace,ip,sym,dso,"
|
||||
"addr,symoff,period,iregs,brstack,brstacksym,flags,"
|
||||
"addr,symoff,period,iregs,uregs,brstack,brstacksym,flags,"
|
||||
"bpf-output,callindent,insn,insnlen,brstackinsn,synth,phys_addr",
|
||||
parse_output_fields),
|
||||
OPT_BOOLEAN('a', "all-cpus", &system_wide,
|
||||
@ -2801,6 +2829,8 @@ int cmd_script(int argc, const char **argv)
|
||||
NULL
|
||||
};
|
||||
|
||||
perf_set_singlethreaded();
|
||||
|
||||
setup_scripting();
|
||||
|
||||
argc = parse_options_subcommand(argc, argv, options, script_subcommands, script_usage,
|
||||
|
@ -65,6 +65,7 @@
|
||||
#include "util/tool.h"
|
||||
#include "util/group.h"
|
||||
#include "util/string2.h"
|
||||
#include "util/metricgroup.h"
|
||||
#include "asm/bug.h"
|
||||
|
||||
#include <linux/time64.h>
|
||||
@ -133,6 +134,8 @@ static const char *smi_cost_attrs = {
|
||||
|
||||
static struct perf_evlist *evsel_list;
|
||||
|
||||
static struct rblist metric_events;
|
||||
|
||||
static struct target target = {
|
||||
.uid = UINT_MAX,
|
||||
};
|
||||
@ -192,6 +195,11 @@ static struct perf_stat_config stat_config = {
|
||||
.scale = true,
|
||||
};
|
||||
|
||||
static bool is_duration_time(struct perf_evsel *evsel)
|
||||
{
|
||||
return !strcmp(evsel->name, "duration_time");
|
||||
}
|
||||
|
||||
static inline void diff_timespec(struct timespec *r, struct timespec *a,
|
||||
struct timespec *b)
|
||||
{
|
||||
@ -407,6 +415,8 @@ static void process_interval(void)
|
||||
pr_err("failed to write stat round event\n");
|
||||
}
|
||||
|
||||
init_stats(&walltime_nsecs_stats);
|
||||
update_stats(&walltime_nsecs_stats, stat_config.interval * 1000000);
|
||||
print_counters(&rs, 0, NULL);
|
||||
}
|
||||
|
||||
@ -582,6 +592,32 @@ static bool perf_evsel__should_store_id(struct perf_evsel *counter)
|
||||
return STAT_RECORD || counter->attr.read_format & PERF_FORMAT_ID;
|
||||
}
|
||||
|
||||
static struct perf_evsel *perf_evsel__reset_weak_group(struct perf_evsel *evsel)
|
||||
{
|
||||
struct perf_evsel *c2, *leader;
|
||||
bool is_open = true;
|
||||
|
||||
leader = evsel->leader;
|
||||
pr_debug("Weak group for %s/%d failed\n",
|
||||
leader->name, leader->nr_members);
|
||||
|
||||
/*
|
||||
* for_each_group_member doesn't work here because it doesn't
|
||||
* include the first entry.
|
||||
*/
|
||||
evlist__for_each_entry(evsel_list, c2) {
|
||||
if (c2 == evsel)
|
||||
is_open = false;
|
||||
if (c2->leader == leader) {
|
||||
if (is_open)
|
||||
perf_evsel__close(c2);
|
||||
c2->leader = c2;
|
||||
c2->nr_members = 0;
|
||||
}
|
||||
}
|
||||
return leader;
|
||||
}
|
||||
|
||||
static int __run_perf_stat(int argc, const char **argv)
|
||||
{
|
||||
int interval = stat_config.interval;
|
||||
@ -618,6 +654,15 @@ static int __run_perf_stat(int argc, const char **argv)
|
||||
evlist__for_each_entry(evsel_list, counter) {
|
||||
try_again:
|
||||
if (create_perf_stat_counter(counter) < 0) {
|
||||
|
||||
/* Weak group failed. Reset the group. */
|
||||
if ((errno == EINVAL || errno == EBADF) &&
|
||||
counter->leader != counter &&
|
||||
counter->weak_group) {
|
||||
counter = perf_evsel__reset_weak_group(counter);
|
||||
goto try_again;
|
||||
}
|
||||
|
||||
/*
|
||||
* PPC returns ENXIO for HW counters until 2.6.37
|
||||
* (behavior changed with commit b0a873e).
|
||||
@ -1199,7 +1244,7 @@ static void printout(int id, int nr, struct perf_evsel *counter, double uval,
|
||||
|
||||
perf_stat__print_shadow_stats(counter, uval,
|
||||
first_shadow_cpu(counter, id),
|
||||
&out);
|
||||
&out, &metric_events);
|
||||
if (!csv_output && !metric_only) {
|
||||
print_noise(counter, noise);
|
||||
print_running(run, ena);
|
||||
@ -1325,6 +1370,9 @@ static void print_aggr(char *prefix)
|
||||
ad.id = id = aggr_map->map[s];
|
||||
first = true;
|
||||
evlist__for_each_entry(evsel_list, counter) {
|
||||
if (is_duration_time(counter))
|
||||
continue;
|
||||
|
||||
ad.val = ad.ena = ad.run = 0;
|
||||
ad.nr = 0;
|
||||
if (!collect_data(counter, aggr_cb, &ad))
|
||||
@ -1468,6 +1516,8 @@ static void print_no_aggr_metric(char *prefix)
|
||||
if (prefix)
|
||||
fputs(prefix, stat_config.output);
|
||||
evlist__for_each_entry(evsel_list, counter) {
|
||||
if (is_duration_time(counter))
|
||||
continue;
|
||||
if (first) {
|
||||
aggr_printout(counter, cpu, 0);
|
||||
first = false;
|
||||
@ -1522,6 +1572,8 @@ static void print_metric_headers(const char *prefix, bool no_indent)
|
||||
|
||||
/* Print metrics headers only */
|
||||
evlist__for_each_entry(evsel_list, counter) {
|
||||
if (is_duration_time(counter))
|
||||
continue;
|
||||
os.evsel = counter;
|
||||
out.ctx = &os;
|
||||
out.print_metric = print_metric_header;
|
||||
@ -1530,7 +1582,8 @@ static void print_metric_headers(const char *prefix, bool no_indent)
|
||||
os.evsel = counter;
|
||||
perf_stat__print_shadow_stats(counter, 0,
|
||||
0,
|
||||
&out);
|
||||
&out,
|
||||
&metric_events);
|
||||
}
|
||||
fputc('\n', stat_config.output);
|
||||
}
|
||||
@ -1668,12 +1721,18 @@ static void print_counters(struct timespec *ts, int argc, const char **argv)
|
||||
print_aggr(prefix);
|
||||
break;
|
||||
case AGGR_THREAD:
|
||||
evlist__for_each_entry(evsel_list, counter)
|
||||
evlist__for_each_entry(evsel_list, counter) {
|
||||
if (is_duration_time(counter))
|
||||
continue;
|
||||
print_aggr_thread(counter, prefix);
|
||||
}
|
||||
break;
|
||||
case AGGR_GLOBAL:
|
||||
evlist__for_each_entry(evsel_list, counter)
|
||||
evlist__for_each_entry(evsel_list, counter) {
|
||||
if (is_duration_time(counter))
|
||||
continue;
|
||||
print_counter_aggr(counter, prefix);
|
||||
}
|
||||
if (metric_only)
|
||||
fputc('\n', stat_config.output);
|
||||
break;
|
||||
@ -1681,8 +1740,11 @@ static void print_counters(struct timespec *ts, int argc, const char **argv)
|
||||
if (metric_only)
|
||||
print_no_aggr_metric(prefix);
|
||||
else {
|
||||
evlist__for_each_entry(evsel_list, counter)
|
||||
evlist__for_each_entry(evsel_list, counter) {
|
||||
if (is_duration_time(counter))
|
||||
continue;
|
||||
print_counter(counter, prefix);
|
||||
}
|
||||
}
|
||||
break;
|
||||
case AGGR_UNSET:
|
||||
@ -1754,6 +1816,13 @@ static int enable_metric_only(const struct option *opt __maybe_unused,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int parse_metric_groups(const struct option *opt,
|
||||
const char *str,
|
||||
int unset __maybe_unused)
|
||||
{
|
||||
return metricgroup__parse_groups(opt, str, &metric_events);
|
||||
}
|
||||
|
||||
static const struct option stat_options[] = {
|
||||
OPT_BOOLEAN('T', "transaction", &transaction_run,
|
||||
"hardware transaction statistics"),
|
||||
@ -1819,6 +1888,9 @@ static const struct option stat_options[] = {
|
||||
"measure topdown level 1 statistics"),
|
||||
OPT_BOOLEAN(0, "smi-cost", &smi_cost,
|
||||
"measure SMI cost"),
|
||||
OPT_CALLBACK('M', "metrics", &evsel_list, "metric/metric group list",
|
||||
"monitor specified metrics or metric groups (separated by ,)",
|
||||
parse_metric_groups),
|
||||
OPT_END()
|
||||
};
|
||||
|
||||
|
@ -1732,8 +1732,10 @@ static int timechart__io_record(int argc, const char **argv)
|
||||
if (rec_argv == NULL)
|
||||
return -ENOMEM;
|
||||
|
||||
if (asprintf(&filter, "common_pid != %d", getpid()) < 0)
|
||||
if (asprintf(&filter, "common_pid != %d", getpid()) < 0) {
|
||||
free(rec_argv);
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
p = rec_argv;
|
||||
for (i = 0; i < common_args_nr; i++)
|
||||
|
@ -2078,6 +2078,7 @@ static int trace__record(struct trace *trace, int argc, const char **argv)
|
||||
rec_argv[j++] = "syscalls:sys_enter,syscalls:sys_exit";
|
||||
else {
|
||||
pr_err("Neither raw_syscalls nor syscalls events exist.\n");
|
||||
free(rec_argv);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
@ -2730,20 +2731,23 @@ DEFINE_RESORT_RB(threads, (thread__nr_events(a->thread->priv) < thread__nr_event
|
||||
|
||||
static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp)
|
||||
{
|
||||
DECLARE_RESORT_RB_MACHINE_THREADS(threads, trace->host);
|
||||
size_t printed = trace__fprintf_threads_header(fp);
|
||||
struct rb_node *nd;
|
||||
int i;
|
||||
|
||||
if (threads == NULL) {
|
||||
fprintf(fp, "%s", "Error sorting output by nr_events!\n");
|
||||
return 0;
|
||||
for (i = 0; i < THREADS__TABLE_SIZE; i++) {
|
||||
DECLARE_RESORT_RB_MACHINE_THREADS(threads, trace->host, i);
|
||||
|
||||
if (threads == NULL) {
|
||||
fprintf(fp, "%s", "Error sorting output by nr_events!\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
resort_rb__for_each_entry(nd, threads)
|
||||
printed += trace__fprintf_thread(fp, threads_entry->thread, trace);
|
||||
|
||||
resort_rb__delete(threads);
|
||||
}
|
||||
|
||||
resort_rb__for_each_entry(nd, threads)
|
||||
printed += trace__fprintf_thread(fp, threads_entry->thread, trace);
|
||||
|
||||
resort_rb__delete(threads);
|
||||
|
||||
return printed;
|
||||
}
|
||||
|
||||
|
@ -65,6 +65,7 @@ struct record_opts {
|
||||
unsigned int user_freq;
|
||||
u64 branch_stack;
|
||||
u64 sample_intr_regs;
|
||||
u64 sample_user_regs;
|
||||
u64 default_interval;
|
||||
u64 user_interval;
|
||||
size_t auxtrace_snapshot_size;
|
||||
|
164
tools/perf/pmu-events/arch/x86/broadwell/bdw-metrics.json
Normal file
164
tools/perf/pmu-events/arch/x86/broadwell/bdw-metrics.json
Normal file
@ -0,0 +1,164 @@
|
||||
[
|
||||
{
|
||||
"BriefDescription": "Instructions Per Cycle (per logical thread)",
|
||||
"MetricExpr": "INST_RETIRED.ANY / CPU_CLK_UNHALTED.THREAD",
|
||||
"MetricGroup": "TopDownL1",
|
||||
"MetricName": "IPC"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "Uops Per Instruction",
|
||||
"MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY",
|
||||
"MetricGroup": "Pipeline",
|
||||
"MetricName": "UPI"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "Rough Estimation of fraction of fetched lines bytes that were likely consumed by program instructions",
|
||||
"MetricExpr": "min( 1 , IDQ.MITE_UOPS / ( UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY * 16 * ( ICACHE.HIT + ICACHE.MISSES ) / 4.0 ) )",
|
||||
"MetricGroup": "Frontend",
|
||||
"MetricName": "IFetch_Line_Utilization"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "Fraction of Uops delivered by the DSB (aka Decoded Icache; or Uop Cache)",
|
||||
"MetricExpr": "IDQ.DSB_UOPS / ( IDQ.DSB_UOPS + LSD.UOPS + IDQ.MITE_UOPS + IDQ.MS_UOPS )",
|
||||
"MetricGroup": "DSB; Frontend_Bandwidth",
|
||||
"MetricName": "DSB_Coverage"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "Cycles Per Instruction (threaded)",
|
||||
"MetricExpr": "1 / INST_RETIRED.ANY / cycles",
|
||||
"MetricGroup": "Pipeline;Summary",
|
||||
"MetricName": "CPI"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "Per-thread actual clocks when the logical processor is active. This is called 'Clockticks' in VTune.",
|
||||
"MetricExpr": "CPU_CLK_UNHALTED.THREAD",
|
||||
"MetricGroup": "Summary",
|
||||
"MetricName": "CLKS"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "Total issue-pipeline slots",
|
||||
"MetricExpr": "4*( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles",
|
||||
"MetricGroup": "TopDownL1",
|
||||
"MetricName": "SLOTS"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "Total number of retired Instructions",
|
||||
"MetricExpr": "INST_RETIRED.ANY",
|
||||
"MetricGroup": "Summary",
|
||||
"MetricName": "Instructions"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "Instructions Per Cycle (per physical core)",
|
||||
"MetricExpr": "INST_RETIRED.ANY / ( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles",
|
||||
"MetricGroup": "SMT",
|
||||
"MetricName": "CoreIPC"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is at least 1 uop executed)",
|
||||
"MetricExpr": "UOPS_EXECUTED.THREAD / ( cpu@uops_executed.core\\,cmask\\=1@ / 2) if #SMT_on else UOPS_EXECUTED.CYCLES_GE_1_UOP_EXEC",
|
||||
"MetricGroup": "Pipeline;Ports_Utilization",
|
||||
"MetricName": "ILP"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "Average Branch Address Clear Cost (fraction of cycles)",
|
||||
"MetricExpr": "2* ( RS_EVENTS.EMPTY_CYCLES - ICACHE.IFDATA_STALL - ( 14 * ITLB_MISSES.STLB_HIT + cpu@ITLB_MISSES.WALK_DURATION\\,cmask\\=1@ + 7* ITLB_MISSES.WALK_COMPLETED ) ) / RS_EVENTS.EMPTY_END",
|
||||
"MetricGroup": "Unknown_Branches",
|
||||
"MetricName": "BAClear_Cost"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "Core actual clocks when any thread is active on the physical core",
|
||||
"MetricExpr": "( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else CPU_CLK_UNHALTED.THREAD",
|
||||
"MetricGroup": "SMT",
|
||||
"MetricName": "CORE_CLKS"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "Actual Average Latency for L1 data-cache miss demand loads",
|
||||
"MetricExpr": "L1D_PEND_MISS.PENDING / ( MEM_LOAD_UOPS_RETIRED.L1_MISS + mem_load_uops_retired.hit_lfb )",
|
||||
"MetricGroup": "Memory_Bound;Memory_Lat",
|
||||
"MetricName": "Load_Miss_Real_Latency"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least 1 such miss)",
|
||||
"MetricExpr": "L1D_PEND_MISS.PENDING / ( cpu@l1d_pend_miss.pending_cycles\\,any\\=1@ / 2) if #SMT_on else L1D_PEND_MISS.PENDING_CYCLES",
|
||||
"MetricGroup": "Memory_Bound;Memory_BW",
|
||||
"MetricName": "MLP"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses",
|
||||
"MetricExpr": "( cpu@ITLB_MISSES.WALK_DURATION\\,cmask\\=1@ + cpu@DTLB_LOAD_MISSES.WALK_DURATION\\,cmask\\=1@ + cpu@DTLB_STORE_MISSES.WALK_DURATION\\,cmask\\=1@ + 7*(DTLB_STORE_MISSES.WALK_COMPLETED+DTLB_LOAD_MISSES.WALK_COMPLETED+ITLB_MISSES.WALK_COMPLETED)) / ( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles",
|
||||
"MetricGroup": "TLB",
|
||||
"MetricName": "Page_Walks_Utilization"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "Average CPU Utilization",
|
||||
"MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / msr@tsc@",
|
||||
"MetricGroup": "Summary",
|
||||
"MetricName": "CPU_Utilization"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "Giga Floating Point Operations Per Second",
|
||||
"MetricExpr": "( 1*( FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE ) + 2* FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4*( FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE ) + 8* FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE ) / 1000000000 / duration_time",
|
||||
"MetricGroup": "FLOPS;Summary",
|
||||
"MetricName": "GFLOPs"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "Average Frequency Utilization relative nominal frequency",
|
||||
"MetricExpr": "CPU_CLK_UNHALTED.THREAD / CPU_CLK_UNHALTED.REF_TSC",
|
||||
"MetricGroup": "Power",
|
||||
"MetricName": "Turbo_Utilization"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "Fraction of cycles where both hardware threads were active",
|
||||
"MetricExpr": "1 - CPU_CLK_THREAD_UNHALTED.ONE_THREAD_ACTIVE / ( CPU_CLK_THREAD_UNHALTED.REF_XCLK_ANY / 2 ) if #SMT_on else 0",
|
||||
"MetricGroup": "SMT;Summary",
|
||||
"MetricName": "SMT_2T_Utilization"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "Fraction of cycles spent in Kernel mode",
|
||||
"MetricExpr": "CPU_CLK_UNHALTED.REF_TSC:u / CPU_CLK_UNHALTED.REF_TSC",
|
||||
"MetricGroup": "Summary",
|
||||
"MetricName": "Kernel_Utilization"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "C3 residency percent per core",
|
||||
"MetricExpr": "(cstate_core@c3\\-residency@ / msr@tsc@) * 100",
|
||||
"MetricGroup": "Power",
|
||||
"MetricName": "C3_Core_Residency"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "C6 residency percent per core",
|
||||
"MetricExpr": "(cstate_core@c6\\-residency@ / msr@tsc@) * 100",
|
||||
"MetricGroup": "Power",
|
||||
"MetricName": "C6_Core_Residency"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "C7 residency percent per core",
|
||||
"MetricExpr": "(cstate_core@c7\\-residency@ / msr@tsc@) * 100",
|
||||
"MetricGroup": "Power",
|
||||
"MetricName": "C7_Core_Residency"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "C2 residency percent per package",
|
||||
"MetricExpr": "(cstate_pkg@c2\\-residency@ / msr@tsc@) * 100",
|
||||
"MetricGroup": "Power",
|
||||
"MetricName": "C2_Pkg_Residency"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "C3 residency percent per package",
|
||||
"MetricExpr": "(cstate_pkg@c3\\-residency@ / msr@tsc@) * 100",
|
||||
"MetricGroup": "Power",
|
||||
"MetricName": "C3_Pkg_Residency"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "C6 residency percent per package",
|
||||
"MetricExpr": "(cstate_pkg@c6\\-residency@ / msr@tsc@) * 100",
|
||||
"MetricGroup": "Power",
|
||||
"MetricName": "C6_Pkg_Residency"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "C7 residency percent per package",
|
||||
"MetricExpr": "(cstate_pkg@c7\\-residency@ / msr@tsc@) * 100",
|
||||
"MetricGroup": "Power",
|
||||
"MetricName": "C7_Pkg_Residency"
|
||||
}
|
||||
]
|
164
tools/perf/pmu-events/arch/x86/broadwellde/bdwde-metrics.json
Normal file
164
tools/perf/pmu-events/arch/x86/broadwellde/bdwde-metrics.json
Normal file
@ -0,0 +1,164 @@
|
||||
[
|
||||
{
|
||||
"BriefDescription": "Instructions Per Cycle (per logical thread)",
|
||||
"MetricExpr": "INST_RETIRED.ANY / CPU_CLK_UNHALTED.THREAD",
|
||||
"MetricGroup": "TopDownL1",
|
||||
"MetricName": "IPC"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "Uops Per Instruction",
|
||||
"MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY",
|
||||
"MetricGroup": "Pipeline",
|
||||
"MetricName": "UPI"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "Rough Estimation of fraction of fetched lines bytes that were likely consumed by program instructions",
|
||||
"MetricExpr": "min( 1 , IDQ.MITE_UOPS / ( UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY * 16 * ( ICACHE.HIT + ICACHE.MISSES ) / 4.0 ) )",
|
||||
"MetricGroup": "Frontend",
|
||||
"MetricName": "IFetch_Line_Utilization"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "Fraction of Uops delivered by the DSB (aka Decoded Icache; or Uop Cache)",
|
||||
"MetricExpr": "IDQ.DSB_UOPS / ( IDQ.DSB_UOPS + LSD.UOPS + IDQ.MITE_UOPS + IDQ.MS_UOPS )",
|
||||
"MetricGroup": "DSB; Frontend_Bandwidth",
|
||||
"MetricName": "DSB_Coverage"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "Cycles Per Instruction (threaded)",
|
||||
"MetricExpr": "1 / INST_RETIRED.ANY / cycles",
|
||||
"MetricGroup": "Pipeline;Summary",
|
||||
"MetricName": "CPI"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "Per-thread actual clocks when the logical processor is active. This is called 'Clockticks' in VTune.",
|
||||
"MetricExpr": "CPU_CLK_UNHALTED.THREAD",
|
||||
"MetricGroup": "Summary",
|
||||
"MetricName": "CLKS"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "Total issue-pipeline slots",
|
||||
"MetricExpr": "4*( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles",
|
||||
"MetricGroup": "TopDownL1",
|
||||
"MetricName": "SLOTS"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "Total number of retired Instructions",
|
||||
"MetricExpr": "INST_RETIRED.ANY",
|
||||
"MetricGroup": "Summary",
|
||||
"MetricName": "Instructions"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "Instructions Per Cycle (per physical core)",
|
||||
"MetricExpr": "INST_RETIRED.ANY / ( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles",
|
||||
"MetricGroup": "SMT",
|
||||
"MetricName": "CoreIPC"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is at least 1 uop executed)",
|
||||
"MetricExpr": "UOPS_EXECUTED.THREAD / ( cpu@uops_executed.core\\,cmask\\=1@ / 2) if #SMT_on else UOPS_EXECUTED.CYCLES_GE_1_UOP_EXEC",
|
||||
"MetricGroup": "Pipeline;Ports_Utilization",
|
||||
"MetricName": "ILP"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "Average Branch Address Clear Cost (fraction of cycles)",
|
||||
"MetricExpr": "2* ( RS_EVENTS.EMPTY_CYCLES - ICACHE.IFDATA_STALL - ( 14 * ITLB_MISSES.STLB_HIT + cpu@ITLB_MISSES.WALK_DURATION\\,cmask\\=1@ + 7* ITLB_MISSES.WALK_COMPLETED ) ) / RS_EVENTS.EMPTY_END",
|
||||
"MetricGroup": "Unknown_Branches",
|
||||
"MetricName": "BAClear_Cost"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "Core actual clocks when any thread is active on the physical core",
|
||||
"MetricExpr": "( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else CPU_CLK_UNHALTED.THREAD",
|
||||
"MetricGroup": "SMT",
|
||||
"MetricName": "CORE_CLKS"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "Actual Average Latency for L1 data-cache miss demand loads",
|
||||
"MetricExpr": "L1D_PEND_MISS.PENDING / ( MEM_LOAD_UOPS_RETIRED.L1_MISS + mem_load_uops_retired.hit_lfb )",
|
||||
"MetricGroup": "Memory_Bound;Memory_Lat",
|
||||
"MetricName": "Load_Miss_Real_Latency"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least 1 such miss)",
|
||||
"MetricExpr": "L1D_PEND_MISS.PENDING / ( cpu@l1d_pend_miss.pending_cycles\\,any\\=1@ / 2) if #SMT_on else L1D_PEND_MISS.PENDING_CYCLES",
|
||||
"MetricGroup": "Memory_Bound;Memory_BW",
|
||||
"MetricName": "MLP"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses",
|
||||
"MetricExpr": "( cpu@ITLB_MISSES.WALK_DURATION\\,cmask\\=1@ + cpu@DTLB_LOAD_MISSES.WALK_DURATION\\,cmask\\=1@ + cpu@DTLB_STORE_MISSES.WALK_DURATION\\,cmask\\=1@ + 7*(DTLB_STORE_MISSES.WALK_COMPLETED+DTLB_LOAD_MISSES.WALK_COMPLETED+ITLB_MISSES.WALK_COMPLETED)) / ( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles",
|
||||
"MetricGroup": "TLB",
|
||||
"MetricName": "Page_Walks_Utilization"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "Average CPU Utilization",
|
||||
"MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / msr@tsc@",
|
||||
"MetricGroup": "Summary",
|
||||
"MetricName": "CPU_Utilization"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "Giga Floating Point Operations Per Second",
|
||||
"MetricExpr": "( 1*( FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE ) + 2* FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4*( FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE ) + 8* FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE ) / 1000000000 / duration_time",
|
||||
"MetricGroup": "FLOPS;Summary",
|
||||
"MetricName": "GFLOPs"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "Average Frequency Utilization relative nominal frequency",
|
||||
"MetricExpr": "CPU_CLK_UNHALTED.THREAD / CPU_CLK_UNHALTED.REF_TSC",
|
||||
"MetricGroup": "Power",
|
||||
"MetricName": "Turbo_Utilization"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "Fraction of cycles where both hardware threads were active",
|
||||
"MetricExpr": "1 - CPU_CLK_THREAD_UNHALTED.ONE_THREAD_ACTIVE / ( CPU_CLK_THREAD_UNHALTED.REF_XCLK_ANY / 2 ) if #SMT_on else 0",
|
||||
"MetricGroup": "SMT;Summary",
|
||||
"MetricName": "SMT_2T_Utilization"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "Fraction of cycles spent in Kernel mode",
|
||||
"MetricExpr": "CPU_CLK_UNHALTED.REF_TSC:u / CPU_CLK_UNHALTED.REF_TSC",
|
||||
"MetricGroup": "Summary",
|
||||
"MetricName": "Kernel_Utilization"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "C3 residency percent per core",
|
||||
"MetricExpr": "(cstate_core@c3\\-residency@ / msr@tsc@) * 100",
|
||||
"MetricGroup": "Power",
|
||||
"MetricName": "C3_Core_Residency"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "C6 residency percent per core",
|
||||
"MetricExpr": "(cstate_core@c6\\-residency@ / msr@tsc@) * 100",
|
||||
"MetricGroup": "Power",
|
||||
"MetricName": "C6_Core_Residency"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "C7 residency percent per core",
|
||||
"MetricExpr": "(cstate_core@c7\\-residency@ / msr@tsc@) * 100",
|
||||
"MetricGroup": "Power",
|
||||
"MetricName": "C7_Core_Residency"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "C2 residency percent per package",
|
||||
"MetricExpr": "(cstate_pkg@c2\\-residency@ / msr@tsc@) * 100",
|
||||
"MetricGroup": "Power",
|
||||
"MetricName": "C2_Pkg_Residency"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "C3 residency percent per package",
|
||||
"MetricExpr": "(cstate_pkg@c3\\-residency@ / msr@tsc@) * 100",
|
||||
"MetricGroup": "Power",
|
||||
"MetricName": "C3_Pkg_Residency"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "C6 residency percent per package",
|
||||
"MetricExpr": "(cstate_pkg@c6\\-residency@ / msr@tsc@) * 100",
|
||||
"MetricGroup": "Power",
|
||||
"MetricName": "C6_Pkg_Residency"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "C7 residency percent per package",
|
||||
"MetricExpr": "(cstate_pkg@c7\\-residency@ / msr@tsc@) * 100",
|
||||
"MetricGroup": "Power",
|
||||
"MetricName": "C7_Pkg_Residency"
|
||||
}
|
||||
]
|
164
tools/perf/pmu-events/arch/x86/broadwellx/bdx-metrics.json
Normal file
164
tools/perf/pmu-events/arch/x86/broadwellx/bdx-metrics.json
Normal file
@ -0,0 +1,164 @@
|
||||
[
|
||||
{
|
||||
"BriefDescription": "Instructions Per Cycle (per logical thread)",
|
||||
"MetricExpr": "INST_RETIRED.ANY / CPU_CLK_UNHALTED.THREAD",
|
||||
"MetricGroup": "TopDownL1",
|
||||
"MetricName": "IPC"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "Uops Per Instruction",
|
||||
"MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY",
|
||||
"MetricGroup": "Pipeline",
|
||||
"MetricName": "UPI"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "Rough Estimation of fraction of fetched lines bytes that were likely consumed by program instructions",
|
||||
"MetricExpr": "min( 1 , IDQ.MITE_UOPS / ( UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY * 16 * ( ICACHE.HIT + ICACHE.MISSES ) / 4.0 ) )",
|
||||
"MetricGroup": "Frontend",
|
||||
"MetricName": "IFetch_Line_Utilization"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "Fraction of Uops delivered by the DSB (aka Decoded Icache; or Uop Cache)",
|
||||
"MetricExpr": "IDQ.DSB_UOPS / ( IDQ.DSB_UOPS + LSD.UOPS + IDQ.MITE_UOPS + IDQ.MS_UOPS )",
|
||||
"MetricGroup": "DSB; Frontend_Bandwidth",
|
||||
"MetricName": "DSB_Coverage"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "Cycles Per Instruction (threaded)",
|
||||
"MetricExpr": "1 / INST_RETIRED.ANY / cycles",
|
||||
"MetricGroup": "Pipeline;Summary",
|
||||
"MetricName": "CPI"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "Per-thread actual clocks when the logical processor is active. This is called 'Clockticks' in VTune.",
|
||||
"MetricExpr": "CPU_CLK_UNHALTED.THREAD",
|
||||
"MetricGroup": "Summary",
|
||||
"MetricName": "CLKS"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "Total issue-pipeline slots",
|
||||
"MetricExpr": "4*( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles",
|
||||
"MetricGroup": "TopDownL1",
|
||||
"MetricName": "SLOTS"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "Total number of retired Instructions",
|
||||
"MetricExpr": "INST_RETIRED.ANY",
|
||||
"MetricGroup": "Summary",
|
||||
"MetricName": "Instructions"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "Instructions Per Cycle (per physical core)",
|
||||
"MetricExpr": "INST_RETIRED.ANY / ( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles",
|
||||
"MetricGroup": "SMT",
|
||||
"MetricName": "CoreIPC"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is at least 1 uop executed)",
|
||||
"MetricExpr": "UOPS_EXECUTED.THREAD / ( cpu@uops_executed.core\\,cmask\\=1@ / 2) if #SMT_on else UOPS_EXECUTED.CYCLES_GE_1_UOP_EXEC",
|
||||
"MetricGroup": "Pipeline;Ports_Utilization",
|
||||
"MetricName": "ILP"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "Average Branch Address Clear Cost (fraction of cycles)",
|
||||
"MetricExpr": "2* ( RS_EVENTS.EMPTY_CYCLES - ICACHE.IFDATA_STALL - ( 14 * ITLB_MISSES.STLB_HIT + cpu@ITLB_MISSES.WALK_DURATION\\,cmask\\=1@ + 7* ITLB_MISSES.WALK_COMPLETED ) ) / RS_EVENTS.EMPTY_END",
|
||||
"MetricGroup": "Unknown_Branches",
|
||||
"MetricName": "BAClear_Cost"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "Core actual clocks when any thread is active on the physical core",
|
||||
"MetricExpr": "( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else CPU_CLK_UNHALTED.THREAD",
|
||||
"MetricGroup": "SMT",
|
||||
"MetricName": "CORE_CLKS"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "Actual Average Latency for L1 data-cache miss demand loads",
|
||||
"MetricExpr": "L1D_PEND_MISS.PENDING / ( MEM_LOAD_UOPS_RETIRED.L1_MISS + mem_load_uops_retired.hit_lfb )",
|
||||
"MetricGroup": "Memory_Bound;Memory_Lat",
|
||||
"MetricName": "Load_Miss_Real_Latency"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least 1 such miss)",
|
||||
"MetricExpr": "L1D_PEND_MISS.PENDING / ( cpu@l1d_pend_miss.pending_cycles\\,any\\=1@ / 2) if #SMT_on else L1D_PEND_MISS.PENDING_CYCLES",
|
||||
"MetricGroup": "Memory_Bound;Memory_BW",
|
||||
"MetricName": "MLP"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses",
|
||||
"MetricExpr": "( ITLB_MISSES.WALK_DURATION + DTLB_LOAD_MISSES.WALK_DURATION + DTLB_STORE_MISSES.WALK_DURATION + 7*(DTLB_STORE_MISSES.WALK_COMPLETED+DTLB_LOAD_MISSES.WALK_COMPLETED+ITLB_MISSES.WALK_COMPLETED) ) / (2*( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles)",
|
||||
"MetricGroup": "TLB",
|
||||
"MetricName": "Page_Walks_Utilization"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "Average CPU Utilization",
|
||||
"MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / msr@tsc@",
|
||||
"MetricGroup": "Summary",
|
||||
"MetricName": "CPU_Utilization"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "Giga Floating Point Operations Per Second",
|
||||
"MetricExpr": "( 1*( FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE ) + 2* FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4*( FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE ) + 8* FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE ) / 1000000000 / duration_time",
|
||||
"MetricGroup": "FLOPS;Summary",
|
||||
"MetricName": "GFLOPs"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "Average Frequency Utilization relative nominal frequency",
|
||||
"MetricExpr": "CPU_CLK_UNHALTED.THREAD / CPU_CLK_UNHALTED.REF_TSC",
|
||||
"MetricGroup": "Power",
|
||||
"MetricName": "Turbo_Utilization"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "Fraction of cycles where both hardware threads were active",
|
||||
"MetricExpr": "1 - CPU_CLK_THREAD_UNHALTED.ONE_THREAD_ACTIVE / ( CPU_CLK_THREAD_UNHALTED.REF_XCLK_ANY / 2 ) if #SMT_on else 0",
|
||||
"MetricGroup": "SMT;Summary",
|
||||
"MetricName": "SMT_2T_Utilization"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "Fraction of cycles spent in Kernel mode",
|
||||
"MetricExpr": "CPU_CLK_UNHALTED.REF_TSC:u / CPU_CLK_UNHALTED.REF_TSC",
|
||||
"MetricGroup": "Summary",
|
||||
"MetricName": "Kernel_Utilization"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "C3 residency percent per core",
|
||||
"MetricExpr": "(cstate_core@c3\\-residency@ / msr@tsc@) * 100",
|
||||
"MetricGroup": "Power",
|
||||
"MetricName": "C3_Core_Residency"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "C6 residency percent per core",
|
||||
"MetricExpr": "(cstate_core@c6\\-residency@ / msr@tsc@) * 100",
|
||||
"MetricGroup": "Power",
|
||||
"MetricName": "C6_Core_Residency"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "C7 residency percent per core",
|
||||
"MetricExpr": "(cstate_core@c7\\-residency@ / msr@tsc@) * 100",
|
||||
"MetricGroup": "Power",
|
||||
"MetricName": "C7_Core_Residency"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "C2 residency percent per package",
|
||||
"MetricExpr": "(cstate_pkg@c2\\-residency@ / msr@tsc@) * 100",
|
||||
"MetricGroup": "Power",
|
||||
"MetricName": "C2_Pkg_Residency"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "C3 residency percent per package",
|
||||
"MetricExpr": "(cstate_pkg@c3\\-residency@ / msr@tsc@) * 100",
|
||||
"MetricGroup": "Power",
|
||||
"MetricName": "C3_Pkg_Residency"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "C6 residency percent per package",
|
||||
"MetricExpr": "(cstate_pkg@c6\\-residency@ / msr@tsc@) * 100",
|
||||
"MetricGroup": "Power",
|
||||
"MetricName": "C6_Pkg_Residency"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "C7 residency percent per package",
|
||||
"MetricExpr": "(cstate_pkg@c7\\-residency@ / msr@tsc@) * 100",
|
||||
"MetricGroup": "Power",
|
||||
"MetricName": "C7_Pkg_Residency"
|
||||
}
|
||||
]
|
158
tools/perf/pmu-events/arch/x86/haswell/hsw-metrics.json
Normal file
158
tools/perf/pmu-events/arch/x86/haswell/hsw-metrics.json
Normal file
@ -0,0 +1,158 @@
|
||||
[
|
||||
{
|
||||
"BriefDescription": "Instructions Per Cycle (per logical thread)",
|
||||
"MetricExpr": "INST_RETIRED.ANY / CPU_CLK_UNHALTED.THREAD",
|
||||
"MetricGroup": "TopDownL1",
|
||||
"MetricName": "IPC"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "Uops Per Instruction",
|
||||
"MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY",
|
||||
"MetricGroup": "Pipeline",
|
||||
"MetricName": "UPI"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "Rough Estimation of fraction of fetched lines bytes that were likely consumed by program instructions",
|
||||
"MetricExpr": "min( 1 , IDQ.MITE_UOPS / ( UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY * 16 * ( ICACHE.HIT + ICACHE.MISSES ) / 4.0 ) )",
|
||||
"MetricGroup": "Frontend",
|
||||
"MetricName": "IFetch_Line_Utilization"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "Fraction of Uops delivered by the DSB (aka Decoded Icache; or Uop Cache)",
|
||||
"MetricExpr": "IDQ.DSB_UOPS / ( IDQ.DSB_UOPS + LSD.UOPS + IDQ.MITE_UOPS + IDQ.MS_UOPS )",
|
||||
"MetricGroup": "DSB; Frontend_Bandwidth",
|
||||
"MetricName": "DSB_Coverage"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "Cycles Per Instruction (threaded)",
|
||||
"MetricExpr": "1 / INST_RETIRED.ANY / cycles",
|
||||
"MetricGroup": "Pipeline;Summary",
|
||||
"MetricName": "CPI"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "Per-thread actual clocks when the logical processor is active. This is called 'Clockticks' in VTune.",
|
||||
"MetricExpr": "CPU_CLK_UNHALTED.THREAD",
|
||||
"MetricGroup": "Summary",
|
||||
"MetricName": "CLKS"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "Total issue-pipeline slots",
|
||||
"MetricExpr": "4*( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles",
|
||||
"MetricGroup": "TopDownL1",
|
||||
"MetricName": "SLOTS"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "Total number of retired Instructions",
|
||||
"MetricExpr": "INST_RETIRED.ANY",
|
||||
"MetricGroup": "Summary",
|
||||
"MetricName": "Instructions"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "Instructions Per Cycle (per physical core)",
|
||||
"MetricExpr": "INST_RETIRED.ANY / ( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles",
|
||||
"MetricGroup": "SMT",
|
||||
"MetricName": "CoreIPC"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is at least 1 uop executed)",
|
||||
"MetricExpr": "( UOPS_EXECUTED.CORE / 2 / ( cpu@uops_executed.core\\,cmask\\=1@ / 2)) if #SMT_on else (UOPS_EXECUTED.CORE / cpu@uops_executed.core\\,cmask\\=1@)",
|
||||
"MetricGroup": "Pipeline;Ports_Utilization",
|
||||
"MetricName": "ILP"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "Average Branch Address Clear Cost (fraction of cycles)",
|
||||
"MetricExpr": "2* ( RS_EVENTS.EMPTY_CYCLES - ICACHE.IFDATA_STALL - ( 14 * ITLB_MISSES.STLB_HIT + ITLB_MISSES.WALK_DURATION ) ) / RS_EVENTS.EMPTY_END",
|
||||
"MetricGroup": "Unknown_Branches",
|
||||
"MetricName": "BAClear_Cost"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "Core actual clocks when any thread is active on the physical core",
|
||||
"MetricExpr": "( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else CPU_CLK_UNHALTED.THREAD",
|
||||
"MetricGroup": "SMT",
|
||||
"MetricName": "CORE_CLKS"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "Actual Average Latency for L1 data-cache miss demand loads",
|
||||
"MetricExpr": "L1D_PEND_MISS.PENDING / ( MEM_LOAD_UOPS_RETIRED.L1_MISS + mem_load_uops_retired.hit_lfb )",
|
||||
"MetricGroup": "Memory_Bound;Memory_Lat",
|
||||
"MetricName": "Load_Miss_Real_Latency"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least 1 such miss)",
|
||||
"MetricExpr": "L1D_PEND_MISS.PENDING / ( cpu@l1d_pend_miss.pending_cycles\\,any\\=1@ / 2) if #SMT_on else L1D_PEND_MISS.PENDING_CYCLES",
|
||||
"MetricGroup": "Memory_Bound;Memory_BW",
|
||||
"MetricName": "MLP"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses",
|
||||
"MetricExpr": "( ITLB_MISSES.WALK_DURATION + DTLB_LOAD_MISSES.WALK_DURATION + DTLB_STORE_MISSES.WALK_DURATION ) / ( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles",
|
||||
"MetricGroup": "TLB",
|
||||
"MetricName": "Page_Walks_Utilization"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "Average CPU Utilization",
|
||||
"MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / msr@tsc@",
|
||||
"MetricGroup": "Summary",
|
||||
"MetricName": "CPU_Utilization"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "Average Frequency Utilization relative nominal frequency",
|
||||
"MetricExpr": "CPU_CLK_UNHALTED.THREAD / CPU_CLK_UNHALTED.REF_TSC",
|
||||
"MetricGroup": "Power",
|
||||
"MetricName": "Turbo_Utilization"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "Fraction of cycles where both hardware threads were active",
|
||||
"MetricExpr": "1 - CPU_CLK_THREAD_UNHALTED.ONE_THREAD_ACTIVE / ( CPU_CLK_THREAD_UNHALTED.REF_XCLK_ANY / 2 ) if #SMT_on else 0",
|
||||
"MetricGroup": "SMT;Summary",
|
||||
"MetricName": "SMT_2T_Utilization"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "Fraction of cycles spent in Kernel mode",
|
||||
"MetricExpr": "CPU_CLK_UNHALTED.REF_TSC:u / CPU_CLK_UNHALTED.REF_TSC",
|
||||
"MetricGroup": "Summary",
|
||||
"MetricName": "Kernel_Utilization"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "C3 residency percent per core",
|
||||
"MetricExpr": "(cstate_core@c3\\-residency@ / msr@tsc@) * 100",
|
||||
"MetricGroup": "Power",
|
||||
"MetricName": "C3_Core_Residency"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "C6 residency percent per core",
|
||||
"MetricExpr": "(cstate_core@c6\\-residency@ / msr@tsc@) * 100",
|
||||
"MetricGroup": "Power",
|
||||
"MetricName": "C6_Core_Residency"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "C7 residency percent per core",
|
||||
"MetricExpr": "(cstate_core@c7\\-residency@ / msr@tsc@) * 100",
|
||||
"MetricGroup": "Power",
|
||||
"MetricName": "C7_Core_Residency"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "C2 residency percent per package",
|
||||
"MetricExpr": "(cstate_pkg@c2\\-residency@ / msr@tsc@) * 100",
|
||||
"MetricGroup": "Power",
|
||||
"MetricName": "C2_Pkg_Residency"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "C3 residency percent per package",
|
||||
"MetricExpr": "(cstate_pkg@c3\\-residency@ / msr@tsc@) * 100",
|
||||
"MetricGroup": "Power",
|
||||
"MetricName": "C3_Pkg_Residency"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "C6 residency percent per package",
|
||||
"MetricExpr": "(cstate_pkg@c6\\-residency@ / msr@tsc@) * 100",
|
||||
"MetricGroup": "Power",
|
||||
"MetricName": "C6_Pkg_Residency"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "C7 residency percent per package",
|
||||
"MetricExpr": "(cstate_pkg@c7\\-residency@ / msr@tsc@) * 100",
|
||||
"MetricGroup": "Power",
|
||||
"MetricName": "C7_Pkg_Residency"
|
||||
}
|
||||
]
|
158
tools/perf/pmu-events/arch/x86/haswellx/hsx-metrics.json
Normal file
158
tools/perf/pmu-events/arch/x86/haswellx/hsx-metrics.json
Normal file
@ -0,0 +1,158 @@
|
||||
[
|
||||
{
|
||||
"BriefDescription": "Instructions Per Cycle (per logical thread)",
|
||||
"MetricExpr": "INST_RETIRED.ANY / CPU_CLK_UNHALTED.THREAD",
|
||||
"MetricGroup": "TopDownL1",
|
||||
"MetricName": "IPC"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "Uops Per Instruction",
|
||||
"MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY",
|
||||
"MetricGroup": "Pipeline",
|
||||
"MetricName": "UPI"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "Rough Estimation of fraction of fetched lines bytes that were likely consumed by program instructions",
|
||||
"MetricExpr": "min( 1 , IDQ.MITE_UOPS / ( UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY * 16 * ( ICACHE.HIT + ICACHE.MISSES ) / 4.0 ) )",
|
||||
"MetricGroup": "Frontend",
|
||||
"MetricName": "IFetch_Line_Utilization"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "Fraction of Uops delivered by the DSB (aka Decoded Icache; or Uop Cache)",
|
||||
"MetricExpr": "IDQ.DSB_UOPS / ( IDQ.DSB_UOPS + LSD.UOPS + IDQ.MITE_UOPS + IDQ.MS_UOPS )",
|
||||
"MetricGroup": "DSB; Frontend_Bandwidth",
|
||||
"MetricName": "DSB_Coverage"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "Cycles Per Instruction (threaded)",
|
||||
"MetricExpr": "1 / INST_RETIRED.ANY / cycles",
|
||||
"MetricGroup": "Pipeline;Summary",
|
||||
"MetricName": "CPI"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "Per-thread actual clocks when the logical processor is active. This is called 'Clockticks' in VTune.",
|
||||
"MetricExpr": "CPU_CLK_UNHALTED.THREAD",
|
||||
"MetricGroup": "Summary",
|
||||
"MetricName": "CLKS"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "Total issue-pipeline slots",
|
||||
"MetricExpr": "4*( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles",
|
||||
"MetricGroup": "TopDownL1",
|
||||
"MetricName": "SLOTS"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "Total number of retired Instructions",
|
||||
"MetricExpr": "INST_RETIRED.ANY",
|
||||
"MetricGroup": "Summary",
|
||||
"MetricName": "Instructions"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "Instructions Per Cycle (per physical core)",
|
||||
"MetricExpr": "INST_RETIRED.ANY / ( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles",
|
||||
"MetricGroup": "SMT",
|
||||
"MetricName": "CoreIPC"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is at least 1 uop executed)",
|
||||
"MetricExpr": "( UOPS_EXECUTED.CORE / 2 / ( cpu@uops_executed.core\\,cmask\\=1@ / 2)) if #SMT_on else UOPS_EXECUTED.CORE / cpu@uops_executed.core\\,cmask\\=1@",
|
||||
"MetricGroup": "Pipeline;Ports_Utilization",
|
||||
"MetricName": "ILP"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "Average Branch Address Clear Cost (fraction of cycles)",
|
||||
"MetricExpr": "2* ( RS_EVENTS.EMPTY_CYCLES - ICACHE.IFDATA_STALL - ( 14 * ITLB_MISSES.STLB_HIT + ITLB_MISSES.WALK_DURATION ) ) / RS_EVENTS.EMPTY_END",
|
||||
"MetricGroup": "Unknown_Branches",
|
||||
"MetricName": "BAClear_Cost"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "Core actual clocks when any thread is active on the physical core",
|
||||
"MetricExpr": "( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else CPU_CLK_UNHALTED.THREAD",
|
||||
"MetricGroup": "SMT",
|
||||
"MetricName": "CORE_CLKS"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "Actual Average Latency for L1 data-cache miss demand loads",
|
||||
"MetricExpr": "L1D_PEND_MISS.PENDING / ( MEM_LOAD_UOPS_RETIRED.L1_MISS + mem_load_uops_retired.hit_lfb )",
|
||||
"MetricGroup": "Memory_Bound;Memory_Lat",
|
||||
"MetricName": "Load_Miss_Real_Latency"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least 1 such miss)",
|
||||
"MetricExpr": "L1D_PEND_MISS.PENDING / ( cpu@l1d_pend_miss.pending_cycles\\,any\\=1@ / 2) if #SMT_on else L1D_PEND_MISS.PENDING_CYCLES",
|
||||
"MetricGroup": "Memory_Bound;Memory_BW",
|
||||
"MetricName": "MLP"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses",
|
||||
"MetricExpr": "( ITLB_MISSES.WALK_DURATION + DTLB_LOAD_MISSES.WALK_DURATION + DTLB_STORE_MISSES.WALK_DURATION ) / ( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles",
|
||||
"MetricGroup": "TLB",
|
||||
"MetricName": "Page_Walks_Utilization"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "Average CPU Utilization",
|
||||
"MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / msr@tsc@",
|
||||
"MetricGroup": "Summary",
|
||||
"MetricName": "CPU_Utilization"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "Average Frequency Utilization relative nominal frequency",
|
||||
"MetricExpr": "CPU_CLK_UNHALTED.THREAD / CPU_CLK_UNHALTED.REF_TSC",
|
||||
"MetricGroup": "Power",
|
||||
"MetricName": "Turbo_Utilization"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "Fraction of cycles where both hardware threads were active",
|
||||
"MetricExpr": "1 - CPU_CLK_THREAD_UNHALTED.ONE_THREAD_ACTIVE / ( CPU_CLK_THREAD_UNHALTED.REF_XCLK_ANY / 2 ) if #SMT_on else 0",
|
||||
"MetricGroup": "SMT;Summary",
|
||||
"MetricName": "SMT_2T_Utilization"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "Fraction of cycles spent in Kernel mode",
|
||||
"MetricExpr": "CPU_CLK_UNHALTED.REF_TSC:u / CPU_CLK_UNHALTED.REF_TSC",
|
||||
"MetricGroup": "Summary",
|
||||
"MetricName": "Kernel_Utilization"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "C3 residency percent per core",
|
||||
"MetricExpr": "(cstate_core@c3\\-residency@ / msr@tsc@) * 100",
|
||||
"MetricGroup": "Power",
|
||||
"MetricName": "C3_Core_Residency"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "C6 residency percent per core",
|
||||
"MetricExpr": "(cstate_core@c6\\-residency@ / msr@tsc@) * 100",
|
||||
"MetricGroup": "Power",
|
||||
"MetricName": "C6_Core_Residency"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "C7 residency percent per core",
|
||||
"MetricExpr": "(cstate_core@c7\\-residency@ / msr@tsc@) * 100",
|
||||
"MetricGroup": "Power",
|
||||
"MetricName": "C7_Core_Residency"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "C2 residency percent per package",
|
||||
"MetricExpr": "(cstate_pkg@c2\\-residency@ / msr@tsc@) * 100",
|
||||
"MetricGroup": "Power",
|
||||
"MetricName": "C2_Pkg_Residency"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "C3 residency percent per package",
|
||||
"MetricExpr": "(cstate_pkg@c3\\-residency@ / msr@tsc@) * 100",
|
||||
"MetricGroup": "Power",
|
||||
"MetricName": "C3_Pkg_Residency"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "C6 residency percent per package",
|
||||
"MetricExpr": "(cstate_pkg@c6\\-residency@ / msr@tsc@) * 100",
|
||||
"MetricGroup": "Power",
|
||||
"MetricName": "C6_Pkg_Residency"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "C7 residency percent per package",
|
||||
"MetricExpr": "(cstate_pkg@c7\\-residency@ / msr@tsc@) * 100",
|
||||
"MetricGroup": "Power",
|
||||
"MetricName": "C7_Pkg_Residency"
|
||||
}
|
||||
]
|
164
tools/perf/pmu-events/arch/x86/ivybridge/ivb-metrics.json
Normal file
164
tools/perf/pmu-events/arch/x86/ivybridge/ivb-metrics.json
Normal file
@ -0,0 +1,164 @@
|
||||
[
|
||||
{
|
||||
"BriefDescription": "Instructions Per Cycle (per logical thread)",
|
||||
"MetricExpr": "INST_RETIRED.ANY / CPU_CLK_UNHALTED.THREAD",
|
||||
"MetricGroup": "TopDownL1",
|
||||
"MetricName": "IPC"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "Uops Per Instruction",
|
||||
"MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY",
|
||||
"MetricGroup": "Pipeline",
|
||||
"MetricName": "UPI"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "Rough Estimation of fraction of fetched lines bytes that were likely consumed by program instructions",
|
||||
"MetricExpr": "min( 1 , UOPS_ISSUED.ANY / ( UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY * 32 * ( ICACHE.HIT + ICACHE.MISSES ) / 4) )",
|
||||
"MetricGroup": "Frontend",
|
||||
"MetricName": "IFetch_Line_Utilization"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "Fraction of Uops delivered by the DSB (aka Decoded Icache; or Uop Cache)",
|
||||
"MetricExpr": "IDQ.DSB_UOPS / ( IDQ.DSB_UOPS + LSD.UOPS + IDQ.MITE_UOPS + IDQ.MS_UOPS )",
|
||||
"MetricGroup": "DSB; Frontend_Bandwidth",
|
||||
"MetricName": "DSB_Coverage"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "Cycles Per Instruction (threaded)",
|
||||
"MetricExpr": "1 / INST_RETIRED.ANY / cycles",
|
||||
"MetricGroup": "Pipeline;Summary",
|
||||
"MetricName": "CPI"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "Per-thread actual clocks when the logical processor is active. This is called 'Clockticks' in VTune.",
|
||||
"MetricExpr": "CPU_CLK_UNHALTED.THREAD",
|
||||
"MetricGroup": "Summary",
|
||||
"MetricName": "CLKS"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "Total issue-pipeline slots",
|
||||
"MetricExpr": "4*( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles",
|
||||
"MetricGroup": "TopDownL1",
|
||||
"MetricName": "SLOTS"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "Total number of retired Instructions",
|
||||
"MetricExpr": "INST_RETIRED.ANY",
|
||||
"MetricGroup": "Summary",
|
||||
"MetricName": "Instructions"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "Instructions Per Cycle (per physical core)",
|
||||
"MetricExpr": "INST_RETIRED.ANY / ( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles",
|
||||
"MetricGroup": "SMT",
|
||||
"MetricName": "CoreIPC"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is at least 1 uop executed)",
|
||||
"MetricExpr": "UOPS_EXECUTED.THREAD / ( cpu@uops_executed.core\\,cmask\\=1@ / 2) if #SMT_on else UOPS_EXECUTED.CYCLES_GE_1_UOP_EXEC",
|
||||
"MetricGroup": "Pipeline;Ports_Utilization",
|
||||
"MetricName": "ILP"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "Average Branch Address Clear Cost (fraction of cycles)",
|
||||
"MetricExpr": "2* ( RS_EVENTS.EMPTY_CYCLES - ICACHE.IFETCH_STALL ) / RS_EVENTS.EMPTY_END",
|
||||
"MetricGroup": "Unknown_Branches",
|
||||
"MetricName": "BAClear_Cost"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "Core actual clocks when any thread is active on the physical core",
|
||||
"MetricExpr": "( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else CPU_CLK_UNHALTED.THREAD",
|
||||
"MetricGroup": "SMT",
|
||||
"MetricName": "CORE_CLKS"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "Actual Average Latency for L1 data-cache miss demand loads",
|
||||
"MetricExpr": "L1D_PEND_MISS.PENDING / ( MEM_LOAD_UOPS_RETIRED.L1_MISS + mem_load_uops_retired.hit_lfb )",
|
||||
"MetricGroup": "Memory_Bound;Memory_Lat",
|
||||
"MetricName": "Load_Miss_Real_Latency"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least 1 such miss)",
|
||||
"MetricExpr": "L1D_PEND_MISS.PENDING / ( cpu@l1d_pend_miss.pending_cycles\\,any\\=1@ / 2) if #SMT_on else L1D_PEND_MISS.PENDING_CYCLES",
|
||||
"MetricGroup": "Memory_Bound;Memory_BW",
|
||||
"MetricName": "MLP"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses",
|
||||
"MetricExpr": "( ITLB_MISSES.WALK_DURATION + DTLB_LOAD_MISSES.WALK_DURATION + DTLB_STORE_MISSES.WALK_DURATION ) / ( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles",
|
||||
"MetricGroup": "TLB",
|
||||
"MetricName": "Page_Walks_Utilization"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "Average CPU Utilization",
|
||||
"MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / msr@tsc@",
|
||||
"MetricGroup": "Summary",
|
||||
"MetricName": "CPU_Utilization"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "Giga Floating Point Operations Per Second",
|
||||
"MetricExpr": "( 1*( FP_COMP_OPS_EXE.SSE_SCALAR_SINGLE + FP_COMP_OPS_EXE.SSE_SCALAR_DOUBLE ) + 2* FP_COMP_OPS_EXE.SSE_PACKED_DOUBLE + 4*( FP_COMP_OPS_EXE.SSE_PACKED_SINGLE + SIMD_FP_256.PACKED_DOUBLE ) + 8* SIMD_FP_256.PACKED_SINGLE ) / 1000000000 / duration_time",
|
||||
"MetricGroup": "FLOPS;Summary",
|
||||
"MetricName": "GFLOPs"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "Average Frequency Utilization relative nominal frequency",
|
||||
"MetricExpr": "CPU_CLK_UNHALTED.THREAD / CPU_CLK_UNHALTED.REF_TSC",
|
||||
"MetricGroup": "Power",
|
||||
"MetricName": "Turbo_Utilization"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "Fraction of cycles where both hardware threads were active",
|
||||
"MetricExpr": "1 - CPU_CLK_THREAD_UNHALTED.ONE_THREAD_ACTIVE / ( CPU_CLK_THREAD_UNHALTED.REF_XCLK_ANY / 2 ) if #SMT_on else 0",
|
||||
"MetricGroup": "SMT;Summary",
|
||||
"MetricName": "SMT_2T_Utilization"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "Fraction of cycles spent in Kernel mode",
|
||||
"MetricExpr": "CPU_CLK_UNHALTED.REF_TSC:u / CPU_CLK_UNHALTED.REF_TSC",
|
||||
"MetricGroup": "Summary",
|
||||
"MetricName": "Kernel_Utilization"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "C3 residency percent per core",
|
||||
"MetricExpr": "(cstate_core@c3\\-residency@ / msr@tsc@) * 100",
|
||||
"MetricGroup": "Power",
|
||||
"MetricName": "C3_Core_Residency"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "C6 residency percent per core",
|
||||
"MetricExpr": "(cstate_core@c6\\-residency@ / msr@tsc@) * 100",
|
||||
"MetricGroup": "Power",
|
||||
"MetricName": "C6_Core_Residency"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "C7 residency percent per core",
|
||||
"MetricExpr": "(cstate_core@c7\\-residency@ / msr@tsc@) * 100",
|
||||
"MetricGroup": "Power",
|
||||
"MetricName": "C7_Core_Residency"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "C2 residency percent per package",
|
||||
"MetricExpr": "(cstate_pkg@c2\\-residency@ / msr@tsc@) * 100",
|
||||
"MetricGroup": "Power",
|
||||
"MetricName": "C2_Pkg_Residency"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "C3 residency percent per package",
|
||||
"MetricExpr": "(cstate_pkg@c3\\-residency@ / msr@tsc@) * 100",
|
||||
"MetricGroup": "Power",
|
||||
"MetricName": "C3_Pkg_Residency"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "C6 residency percent per package",
|
||||
"MetricExpr": "(cstate_pkg@c6\\-residency@ / msr@tsc@) * 100",
|
||||
"MetricGroup": "Power",
|
||||
"MetricName": "C6_Pkg_Residency"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "C7 residency percent per package",
|
||||
"MetricExpr": "(cstate_pkg@c7\\-residency@ / msr@tsc@) * 100",
|
||||
"MetricGroup": "Power",
|
||||
"MetricName": "C7_Pkg_Residency"
|
||||
}
|
||||
]
|
164
tools/perf/pmu-events/arch/x86/ivytown/ivt-metrics.json
Normal file
164
tools/perf/pmu-events/arch/x86/ivytown/ivt-metrics.json
Normal file
@ -0,0 +1,164 @@
|
||||
[
|
||||
{
|
||||
"BriefDescription": "Instructions Per Cycle (per logical thread)",
|
||||
"MetricExpr": "INST_RETIRED.ANY / CPU_CLK_UNHALTED.THREAD",
|
||||
"MetricGroup": "TopDownL1",
|
||||
"MetricName": "IPC"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "Uops Per Instruction",
|
||||
"MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY",
|
||||
"MetricGroup": "Pipeline",
|
||||
"MetricName": "UPI"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "Rough Estimation of fraction of fetched lines bytes that were likely consumed by program instructions",
|
||||
"MetricExpr": "min( 1 , UOPS_ISSUED.ANY / ( UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY * 32 * ( ICACHE.HIT + ICACHE.MISSES ) / 4) )",
|
||||
"MetricGroup": "Frontend",
|
||||
"MetricName": "IFetch_Line_Utilization"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "Fraction of Uops delivered by the DSB (aka Decoded Icache; or Uop Cache)",
|
||||
"MetricExpr": "IDQ.DSB_UOPS / ( IDQ.DSB_UOPS + LSD.UOPS + IDQ.MITE_UOPS + IDQ.MS_UOPS )",
|
||||
"MetricGroup": "DSB; Frontend_Bandwidth",
|
||||
"MetricName": "DSB_Coverage"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "Cycles Per Instruction (threaded)",
|
||||
"MetricExpr": "1 / INST_RETIRED.ANY / cycles",
|
||||
"MetricGroup": "Pipeline;Summary",
|
||||
"MetricName": "CPI"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "Per-thread actual clocks when the logical processor is active. This is called 'Clockticks' in VTune.",
|
||||
"MetricExpr": "CPU_CLK_UNHALTED.THREAD",
|
||||
"MetricGroup": "Summary",
|
||||
"MetricName": "CLKS"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "Total issue-pipeline slots",
|
||||
"MetricExpr": "4*( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles",
|
||||
"MetricGroup": "TopDownL1",
|
||||
"MetricName": "SLOTS"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "Total number of retired Instructions",
|
||||
"MetricExpr": "INST_RETIRED.ANY",
|
||||
"MetricGroup": "Summary",
|
||||
"MetricName": "Instructions"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "Instructions Per Cycle (per physical core)",
|
||||
"MetricExpr": "INST_RETIRED.ANY / ( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles",
|
||||
"MetricGroup": "SMT",
|
||||
"MetricName": "CoreIPC"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is at least 1 uop executed)",
|
||||
"MetricExpr": "UOPS_EXECUTED.THREAD / ( cpu@uops_executed.core\\,cmask\\=1@ / 2) if #SMT_on else UOPS_EXECUTED.CYCLES_GE_1_UOP_EXEC",
|
||||
"MetricGroup": "Pipeline;Ports_Utilization",
|
||||
"MetricName": "ILP"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "Average Branch Address Clear Cost (fraction of cycles)",
|
||||
"MetricExpr": "2* ( RS_EVENTS.EMPTY_CYCLES - ICACHE.IFETCH_STALL ) / RS_EVENTS.EMPTY_END",
|
||||
"MetricGroup": "Unknown_Branches",
|
||||
"MetricName": "BAClear_Cost"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "Core actual clocks when any thread is active on the physical core",
|
||||
"MetricExpr": "( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else CPU_CLK_UNHALTED.THREAD",
|
||||
"MetricGroup": "SMT",
|
||||
"MetricName": "CORE_CLKS"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "Actual Average Latency for L1 data-cache miss demand loads",
|
||||
"MetricExpr": "L1D_PEND_MISS.PENDING / ( MEM_LOAD_UOPS_RETIRED.L1_MISS + mem_load_uops_retired.hit_lfb )",
|
||||
"MetricGroup": "Memory_Bound;Memory_Lat",
|
||||
"MetricName": "Load_Miss_Real_Latency"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least 1 such miss)",
|
||||
"MetricExpr": "L1D_PEND_MISS.PENDING / ( cpu@l1d_pend_miss.pending_cycles\\,any\\=1@ / 2) if #SMT_on else L1D_PEND_MISS.PENDING_CYCLES",
|
||||
"MetricGroup": "Memory_Bound;Memory_BW",
|
||||
"MetricName": "MLP"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses",
|
||||
"MetricExpr": "( ITLB_MISSES.WALK_DURATION + DTLB_LOAD_MISSES.WALK_DURATION + DTLB_STORE_MISSES.WALK_DURATION ) / ( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles",
|
||||
"MetricGroup": "TLB",
|
||||
"MetricName": "Page_Walks_Utilization"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "Average CPU Utilization",
|
||||
"MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / msr@tsc@",
|
||||
"MetricGroup": "Summary",
|
||||
"MetricName": "CPU_Utilization"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "Giga Floating Point Operations Per Second",
|
||||
"MetricExpr": "( 1*( FP_COMP_OPS_EXE.SSE_SCALAR_SINGLE + FP_COMP_OPS_EXE.SSE_SCALAR_DOUBLE ) + 2* FP_COMP_OPS_EXE.SSE_PACKED_DOUBLE + 4*( FP_COMP_OPS_EXE.SSE_PACKED_SINGLE + SIMD_FP_256.PACKED_DOUBLE ) + 8* SIMD_FP_256.PACKED_SINGLE ) / 1000000000 / duration_time",
|
||||
"MetricGroup": "FLOPS;Summary",
|
||||
"MetricName": "GFLOPs"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "Average Frequency Utilization relative nominal frequency",
|
||||
"MetricExpr": "CPU_CLK_UNHALTED.THREAD / CPU_CLK_UNHALTED.REF_TSC",
|
||||
"MetricGroup": "Power",
|
||||
"MetricName": "Turbo_Utilization"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "Fraction of cycles where both hardware threads were active",
|
||||
"MetricExpr": "1 - CPU_CLK_THREAD_UNHALTED.ONE_THREAD_ACTIVE / ( CPU_CLK_THREAD_UNHALTED.REF_XCLK_ANY / 2 ) if #SMT_on else 0",
|
||||
"MetricGroup": "SMT;Summary",
|
||||
"MetricName": "SMT_2T_Utilization"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "Fraction of cycles spent in Kernel mode",
|
||||
"MetricExpr": "CPU_CLK_UNHALTED.REF_TSC:u / CPU_CLK_UNHALTED.REF_TSC",
|
||||
"MetricGroup": "Summary",
|
||||
"MetricName": "Kernel_Utilization"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "C3 residency percent per core",
|
||||
"MetricExpr": "(cstate_core@c3\\-residency@ / msr@tsc@) * 100",
|
||||
"MetricGroup": "Power",
|
||||
"MetricName": "C3_Core_Residency"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "C6 residency percent per core",
|
||||
"MetricExpr": "(cstate_core@c6\\-residency@ / msr@tsc@) * 100",
|
||||
"MetricGroup": "Power",
|
||||
"MetricName": "C6_Core_Residency"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "C7 residency percent per core",
|
||||
"MetricExpr": "(cstate_core@c7\\-residency@ / msr@tsc@) * 100",
|
||||
"MetricGroup": "Power",
|
||||
"MetricName": "C7_Core_Residency"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "C2 residency percent per package",
|
||||
"MetricExpr": "(cstate_pkg@c2\\-residency@ / msr@tsc@) * 100",
|
||||
"MetricGroup": "Power",
|
||||
"MetricName": "C2_Pkg_Residency"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "C3 residency percent per package",
|
||||
"MetricExpr": "(cstate_pkg@c3\\-residency@ / msr@tsc@) * 100",
|
||||
"MetricGroup": "Power",
|
||||
"MetricName": "C3_Pkg_Residency"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "C6 residency percent per package",
|
||||
"MetricExpr": "(cstate_pkg@c6\\-residency@ / msr@tsc@) * 100",
|
||||
"MetricGroup": "Power",
|
||||
"MetricName": "C6_Pkg_Residency"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "C7 residency percent per package",
|
||||
"MetricExpr": "(cstate_pkg@c7\\-residency@ / msr@tsc@) * 100",
|
||||
"MetricGroup": "Power",
|
||||
"MetricName": "C7_Pkg_Residency"
|
||||
}
|
||||
]
|
140
tools/perf/pmu-events/arch/x86/jaketown/jkt-metrics.json
Normal file
140
tools/perf/pmu-events/arch/x86/jaketown/jkt-metrics.json
Normal file
@ -0,0 +1,140 @@
|
||||
[
|
||||
{
|
||||
"BriefDescription": "Instructions Per Cycle (per logical thread)",
|
||||
"MetricExpr": "INST_RETIRED.ANY / CPU_CLK_UNHALTED.THREAD",
|
||||
"MetricGroup": "TopDownL1",
|
||||
"MetricName": "IPC"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "Uops Per Instruction",
|
||||
"MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY",
|
||||
"MetricGroup": "Pipeline",
|
||||
"MetricName": "UPI"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "Rough Estimation of fraction of fetched lines bytes that were likely consumed by program instructions",
|
||||
"MetricExpr": "min( 1 , UOPS_ISSUED.ANY / ( UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY * 32 * ( ICACHE.HIT + ICACHE.MISSES ) / 4) )",
|
||||
"MetricGroup": "Frontend",
|
||||
"MetricName": "IFetch_Line_Utilization"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "Fraction of Uops delivered by the DSB (aka Decoded Icache; or Uop Cache)",
|
||||
"MetricExpr": "IDQ.DSB_UOPS / ( IDQ.DSB_UOPS + LSD.UOPS + IDQ.MITE_UOPS + IDQ.MS_UOPS )",
|
||||
"MetricGroup": "DSB; Frontend_Bandwidth",
|
||||
"MetricName": "DSB_Coverage"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "Cycles Per Instruction (threaded)",
|
||||
"MetricExpr": "1 / INST_RETIRED.ANY / cycles",
|
||||
"MetricGroup": "Pipeline;Summary",
|
||||
"MetricName": "CPI"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "Per-thread actual clocks when the logical processor is active. This is called 'Clockticks' in VTune.",
|
||||
"MetricExpr": "CPU_CLK_UNHALTED.THREAD",
|
||||
"MetricGroup": "Summary",
|
||||
"MetricName": "CLKS"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "Total issue-pipeline slots",
|
||||
"MetricExpr": "4*( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles",
|
||||
"MetricGroup": "TopDownL1",
|
||||
"MetricName": "SLOTS"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "Total number of retired Instructions",
|
||||
"MetricExpr": "INST_RETIRED.ANY",
|
||||
"MetricGroup": "Summary",
|
||||
"MetricName": "Instructions"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "Instructions Per Cycle (per physical core)",
|
||||
"MetricExpr": "INST_RETIRED.ANY / ( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles",
|
||||
"MetricGroup": "SMT",
|
||||
"MetricName": "CoreIPC"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is at least 1 uop executed)",
|
||||
"MetricExpr": "UOPS_DISPATCHED.THREAD / ( cpu@UOPS_DISPATCHED.CORE\\,cmask\\=1@ / 2) if #SMT_on else cpu@UOPS_DISPATCHED.CORE\\,cmask\\=1@",
|
||||
"MetricGroup": "Pipeline;Ports_Utilization",
|
||||
"MetricName": "ILP"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "Core actual clocks when any thread is active on the physical core",
|
||||
"MetricExpr": "( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else CPU_CLK_UNHALTED.THREAD",
|
||||
"MetricGroup": "SMT",
|
||||
"MetricName": "CORE_CLKS"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "Average CPU Utilization",
|
||||
"MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / msr@tsc@",
|
||||
"MetricGroup": "Summary",
|
||||
"MetricName": "CPU_Utilization"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "Giga Floating Point Operations Per Second",
|
||||
"MetricExpr": "( 1*( FP_COMP_OPS_EXE.SSE_SCALAR_SINGLE + FP_COMP_OPS_EXE.SSE_SCALAR_DOUBLE ) + 2* FP_COMP_OPS_EXE.SSE_PACKED_DOUBLE + 4*( FP_COMP_OPS_EXE.SSE_PACKED_SINGLE + SIMD_FP_256.PACKED_DOUBLE ) + 8* SIMD_FP_256.PACKED_SINGLE ) / 1000000000 / duration_time",
|
||||
"MetricGroup": "FLOPS;Summary",
|
||||
"MetricName": "GFLOPs"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "Average Frequency Utilization relative nominal frequency",
|
||||
"MetricExpr": "CPU_CLK_UNHALTED.THREAD / CPU_CLK_UNHALTED.REF_TSC",
|
||||
"MetricGroup": "Power",
|
||||
"MetricName": "Turbo_Utilization"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "Fraction of cycles where both hardware threads were active",
|
||||
"MetricExpr": "1 - CPU_CLK_THREAD_UNHALTED.ONE_THREAD_ACTIVE / ( CPU_CLK_THREAD_UNHALTED.REF_XCLK_ANY / 2 ) if #SMT_on else 0",
|
||||
"MetricGroup": "SMT;Summary",
|
||||
"MetricName": "SMT_2T_Utilization"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "Fraction of cycles spent in Kernel mode",
|
||||
"MetricExpr": "CPU_CLK_UNHALTED.REF_TSC:u / CPU_CLK_UNHALTED.REF_TSC",
|
||||
"MetricGroup": "Summary",
|
||||
"MetricName": "Kernel_Utilization"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "C3 residency percent per core",
|
||||
"MetricExpr": "(cstate_core@c3\\-residency@ / msr@tsc@) * 100",
|
||||
"MetricGroup": "Power",
|
||||
"MetricName": "C3_Core_Residency"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "C6 residency percent per core",
|
||||
"MetricExpr": "(cstate_core@c6\\-residency@ / msr@tsc@) * 100",
|
||||
"MetricGroup": "Power",
|
||||
"MetricName": "C6_Core_Residency"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "C7 residency percent per core",
|
||||
"MetricExpr": "(cstate_core@c7\\-residency@ / msr@tsc@) * 100",
|
||||
"MetricGroup": "Power",
|
||||
"MetricName": "C7_Core_Residency"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "C2 residency percent per package",
|
||||
"MetricExpr": "(cstate_pkg@c2\\-residency@ / msr@tsc@) * 100",
|
||||
"MetricGroup": "Power",
|
||||
"MetricName": "C2_Pkg_Residency"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "C3 residency percent per package",
|
||||
"MetricExpr": "(cstate_pkg@c3\\-residency@ / msr@tsc@) * 100",
|
||||
"MetricGroup": "Power",
|
||||
"MetricName": "C3_Pkg_Residency"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "C6 residency percent per package",
|
||||
"MetricExpr": "(cstate_pkg@c6\\-residency@ / msr@tsc@) * 100",
|
||||
"MetricGroup": "Power",
|
||||
"MetricName": "C6_Pkg_Residency"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "C7 residency percent per package",
|
||||
"MetricExpr": "(cstate_pkg@c7\\-residency@ / msr@tsc@) * 100",
|
||||
"MetricGroup": "Power",
|
||||
"MetricName": "C7_Pkg_Residency"
|
||||
}
|
||||
]
|
140
tools/perf/pmu-events/arch/x86/sandybridge/snb-metrics.json
Normal file
140
tools/perf/pmu-events/arch/x86/sandybridge/snb-metrics.json
Normal file
@ -0,0 +1,140 @@
|
||||
[
|
||||
{
|
||||
"BriefDescription": "Instructions Per Cycle (per logical thread)",
|
||||
"MetricExpr": "INST_RETIRED.ANY / CPU_CLK_UNHALTED.THREAD",
|
||||
"MetricGroup": "TopDownL1",
|
||||
"MetricName": "IPC"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "Uops Per Instruction",
|
||||
"MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY",
|
||||
"MetricGroup": "Pipeline",
|
||||
"MetricName": "UPI"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "Rough Estimation of fraction of fetched lines bytes that were likely consumed by program instructions",
|
||||
"MetricExpr": "min( 1 , UOPS_ISSUED.ANY / ( UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY * 32 * ( ICACHE.HIT + ICACHE.MISSES ) / 4) )",
|
||||
"MetricGroup": "Frontend",
|
||||
"MetricName": "IFetch_Line_Utilization"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "Fraction of Uops delivered by the DSB (aka Decoded Icache; or Uop Cache)",
|
||||
"MetricExpr": "IDQ.DSB_UOPS / ( IDQ.DSB_UOPS + LSD.UOPS + IDQ.MITE_UOPS + IDQ.MS_UOPS )",
|
||||
"MetricGroup": "DSB; Frontend_Bandwidth",
|
||||
"MetricName": "DSB_Coverage"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "Cycles Per Instruction (threaded)",
|
||||
"MetricExpr": "1 / INST_RETIRED.ANY / cycles",
|
||||
"MetricGroup": "Pipeline;Summary",
|
||||
"MetricName": "CPI"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "Per-thread actual clocks when the logical processor is active. This is called 'Clockticks' in VTune.",
|
||||
"MetricExpr": "CPU_CLK_UNHALTED.THREAD",
|
||||
"MetricGroup": "Summary",
|
||||
"MetricName": "CLKS"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "Total issue-pipeline slots",
|
||||
"MetricExpr": "4*( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles",
|
||||
"MetricGroup": "TopDownL1",
|
||||
"MetricName": "SLOTS"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "Total number of retired Instructions",
|
||||
"MetricExpr": "INST_RETIRED.ANY",
|
||||
"MetricGroup": "Summary",
|
||||
"MetricName": "Instructions"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "Instructions Per Cycle (per physical core)",
|
||||
"MetricExpr": "INST_RETIRED.ANY / ( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles",
|
||||
"MetricGroup": "SMT",
|
||||
"MetricName": "CoreIPC"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is at least 1 uop executed)",
|
||||
"MetricExpr": "UOPS_DISPATCHED.THREAD / ( cpu@UOPS_DISPATCHED.CORE\\,cmask\\=1@ / 2) if #SMT_on else cpu@UOPS_DISPATCHED.CORE\\,cmask\\=1@",
|
||||
"MetricGroup": "Pipeline;Ports_Utilization",
|
||||
"MetricName": "ILP"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "Core actual clocks when any thread is active on the physical core",
|
||||
"MetricExpr": "( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else CPU_CLK_UNHALTED.THREAD",
|
||||
"MetricGroup": "SMT",
|
||||
"MetricName": "CORE_CLKS"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "Average CPU Utilization",
|
||||
"MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / msr@tsc@",
|
||||
"MetricGroup": "Summary",
|
||||
"MetricName": "CPU_Utilization"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "Giga Floating Point Operations Per Second",
|
||||
"MetricExpr": "( 1*( FP_COMP_OPS_EXE.SSE_SCALAR_SINGLE + FP_COMP_OPS_EXE.SSE_SCALAR_DOUBLE ) + 2* FP_COMP_OPS_EXE.SSE_PACKED_DOUBLE + 4*( FP_COMP_OPS_EXE.SSE_PACKED_SINGLE + SIMD_FP_256.PACKED_DOUBLE ) + 8* SIMD_FP_256.PACKED_SINGLE ) / 1000000000 / duration_time",
|
||||
"MetricGroup": "FLOPS;Summary",
|
||||
"MetricName": "GFLOPs"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "Average Frequency Utilization relative nominal frequency",
|
||||
"MetricExpr": "CPU_CLK_UNHALTED.THREAD / CPU_CLK_UNHALTED.REF_TSC",
|
||||
"MetricGroup": "Power",
|
||||
"MetricName": "Turbo_Utilization"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "Fraction of cycles where both hardware threads were active",
|
||||
"MetricExpr": "1 - CPU_CLK_THREAD_UNHALTED.ONE_THREAD_ACTIVE / ( CPU_CLK_THREAD_UNHALTED.REF_XCLK_ANY / 2 ) if #SMT_on else 0",
|
||||
"MetricGroup": "SMT;Summary",
|
||||
"MetricName": "SMT_2T_Utilization"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "Fraction of cycles spent in Kernel mode",
|
||||
"MetricExpr": "CPU_CLK_UNHALTED.REF_TSC:u / CPU_CLK_UNHALTED.REF_TSC",
|
||||
"MetricGroup": "Summary",
|
||||
"MetricName": "Kernel_Utilization"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "C3 residency percent per core",
|
||||
"MetricExpr": "(cstate_core@c3\\-residency@ / msr@tsc@) * 100",
|
||||
"MetricGroup": "Power",
|
||||
"MetricName": "C3_Core_Residency"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "C6 residency percent per core",
|
||||
"MetricExpr": "(cstate_core@c6\\-residency@ / msr@tsc@) * 100",
|
||||
"MetricGroup": "Power",
|
||||
"MetricName": "C6_Core_Residency"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "C7 residency percent per core",
|
||||
"MetricExpr": "(cstate_core@c7\\-residency@ / msr@tsc@) * 100",
|
||||
"MetricGroup": "Power",
|
||||
"MetricName": "C7_Core_Residency"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "C2 residency percent per package",
|
||||
"MetricExpr": "(cstate_pkg@c2\\-residency@ / msr@tsc@) * 100",
|
||||
"MetricGroup": "Power",
|
||||
"MetricName": "C2_Pkg_Residency"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "C3 residency percent per package",
|
||||
"MetricExpr": "(cstate_pkg@c3\\-residency@ / msr@tsc@) * 100",
|
||||
"MetricGroup": "Power",
|
||||
"MetricName": "C3_Pkg_Residency"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "C6 residency percent per package",
|
||||
"MetricExpr": "(cstate_pkg@c6\\-residency@ / msr@tsc@) * 100",
|
||||
"MetricGroup": "Power",
|
||||
"MetricName": "C6_Pkg_Residency"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "C7 residency percent per package",
|
||||
"MetricExpr": "(cstate_pkg@c7\\-residency@ / msr@tsc@) * 100",
|
||||
"MetricGroup": "Power",
|
||||
"MetricName": "C7_Pkg_Residency"
|
||||
}
|
||||
]
|
164
tools/perf/pmu-events/arch/x86/skylake/skl-metrics.json
Normal file
164
tools/perf/pmu-events/arch/x86/skylake/skl-metrics.json
Normal file
@ -0,0 +1,164 @@
|
||||
[
|
||||
{
|
||||
"BriefDescription": "Instructions Per Cycle (per logical thread)",
|
||||
"MetricExpr": "INST_RETIRED.ANY / CPU_CLK_UNHALTED.THREAD",
|
||||
"MetricGroup": "TopDownL1",
|
||||
"MetricName": "IPC"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "Uops Per Instruction",
|
||||
"MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY",
|
||||
"MetricGroup": "Pipeline",
|
||||
"MetricName": "UPI"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "Rough Estimation of fraction of fetched lines bytes that were likely consumed by program instructions",
|
||||
"MetricExpr": "min( 1 , UOPS_ISSUED.ANY / (UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY * 64 * ( ICACHE_64B.IFTAG_HIT + ICACHE_64B.IFTAG_MISS ) / 4.1) )",
|
||||
"MetricGroup": "Frontend",
|
||||
"MetricName": "IFetch_Line_Utilization"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "Fraction of Uops delivered by the DSB (aka Decoded Icache; or Uop Cache)",
|
||||
"MetricExpr": "IDQ.DSB_UOPS / ( IDQ.DSB_UOPS + LSD.UOPS + IDQ.MITE_UOPS + IDQ.MS_UOPS )",
|
||||
"MetricGroup": "DSB; Frontend_Bandwidth",
|
||||
"MetricName": "DSB_Coverage"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "Cycles Per Instruction (threaded)",
|
||||
"MetricExpr": "1 / INST_RETIRED.ANY / cycles",
|
||||
"MetricGroup": "Pipeline;Summary",
|
||||
"MetricName": "CPI"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "Per-thread actual clocks when the logical processor is active. This is called 'Clockticks' in VTune.",
|
||||
"MetricExpr": "CPU_CLK_UNHALTED.THREAD",
|
||||
"MetricGroup": "Summary",
|
||||
"MetricName": "CLKS"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "Total issue-pipeline slots",
|
||||
"MetricExpr": "4*( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles",
|
||||
"MetricGroup": "TopDownL1",
|
||||
"MetricName": "SLOTS"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "Total number of retired Instructions",
|
||||
"MetricExpr": "INST_RETIRED.ANY",
|
||||
"MetricGroup": "Summary",
|
||||
"MetricName": "Instructions"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "Instructions Per Cycle (per physical core)",
|
||||
"MetricExpr": "INST_RETIRED.ANY / ( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles",
|
||||
"MetricGroup": "SMT",
|
||||
"MetricName": "CoreIPC"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is at least 1 uop executed)",
|
||||
"MetricExpr": "UOPS_EXECUTED.THREAD / ( UOPS_EXECUTED.CORE_CYCLES_GE_1 / 2) if #SMT_on else UOPS_EXECUTED.CORE_CYCLES_GE_1",
|
||||
"MetricGroup": "Pipeline;Ports_Utilization",
|
||||
"MetricName": "ILP"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "Average Branch Address Clear Cost (fraction of cycles)",
|
||||
"MetricExpr": "2* ( RS_EVENTS.EMPTY_CYCLES - ICACHE_16B.IFDATA_STALL - ICACHE_64B.IFTAG_STALL ) / RS_EVENTS.EMPTY_END",
|
||||
"MetricGroup": "Unknown_Branches",
|
||||
"MetricName": "BAClear_Cost"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "Core actual clocks when any thread is active on the physical core",
|
||||
"MetricExpr": "( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else CPU_CLK_UNHALTED.THREAD",
|
||||
"MetricGroup": "SMT",
|
||||
"MetricName": "CORE_CLKS"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "Actual Average Latency for L1 data-cache miss demand loads",
|
||||
"MetricExpr": "L1D_PEND_MISS.PENDING / ( MEM_LOAD_RETIRED.L1_MISS + MEM_LOAD_RETIRED.FB_HIT )",
|
||||
"MetricGroup": "Memory_Bound;Memory_Lat",
|
||||
"MetricName": "Load_Miss_Real_Latency"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least 1 such miss)",
|
||||
"MetricExpr": "L1D_PEND_MISS.PENDING / ( L1D_PEND_MISS.PENDING_CYCLES_ANY / 2) if #SMT_on else L1D_PEND_MISS.PENDING_CYCLES",
|
||||
"MetricGroup": "Memory_Bound;Memory_BW",
|
||||
"MetricName": "MLP"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses",
|
||||
"MetricExpr": "( ITLB_MISSES.WALK_PENDING + DTLB_LOAD_MISSES.WALK_PENDING + DTLB_STORE_MISSES.WALK_PENDING + EPT.WALK_PENDING ) / ( 2 * ( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles )",
|
||||
"MetricGroup": "TLB",
|
||||
"MetricName": "Page_Walks_Utilization"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "Average CPU Utilization",
|
||||
"MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / msr@tsc@",
|
||||
"MetricGroup": "Summary",
|
||||
"MetricName": "CPU_Utilization"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "Giga Floating Point Operations Per Second",
|
||||
"MetricExpr": "( 1*( FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE ) + 2* FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4*( FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE ) + 8* FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE ) / 1000000000 / duration_time",
|
||||
"MetricGroup": "FLOPS;Summary",
|
||||
"MetricName": "GFLOPs"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "Average Frequency Utilization relative nominal frequency",
|
||||
"MetricExpr": "CPU_CLK_UNHALTED.THREAD / CPU_CLK_UNHALTED.REF_TSC",
|
||||
"MetricGroup": "Power",
|
||||
"MetricName": "Turbo_Utilization"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "Fraction of cycles where both hardware threads were active",
|
||||
"MetricExpr": "1 - CPU_CLK_THREAD_UNHALTED.ONE_THREAD_ACTIVE / ( CPU_CLK_THREAD_UNHALTED.REF_XCLK_ANY / 2 ) if #SMT_on else 0",
|
||||
"MetricGroup": "SMT;Summary",
|
||||
"MetricName": "SMT_2T_Utilization"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "Fraction of cycles spent in Kernel mode",
|
||||
"MetricExpr": "CPU_CLK_UNHALTED.REF_TSC:u / CPU_CLK_UNHALTED.REF_TSC",
|
||||
"MetricGroup": "Summary",
|
||||
"MetricName": "Kernel_Utilization"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "C3 residency percent per core",
|
||||
"MetricExpr": "(cstate_core@c3\\-residency@ / msr@tsc@) * 100",
|
||||
"MetricGroup": "Power",
|
||||
"MetricName": "C3_Core_Residency"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "C6 residency percent per core",
|
||||
"MetricExpr": "(cstate_core@c6\\-residency@ / msr@tsc@) * 100",
|
||||
"MetricGroup": "Power",
|
||||
"MetricName": "C6_Core_Residency"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "C7 residency percent per core",
|
||||
"MetricExpr": "(cstate_core@c7\\-residency@ / msr@tsc@) * 100",
|
||||
"MetricGroup": "Power",
|
||||
"MetricName": "C7_Core_Residency"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "C2 residency percent per package",
|
||||
"MetricExpr": "(cstate_pkg@c2\\-residency@ / msr@tsc@) * 100",
|
||||
"MetricGroup": "Power",
|
||||
"MetricName": "C2_Pkg_Residency"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "C3 residency percent per package",
|
||||
"MetricExpr": "(cstate_pkg@c3\\-residency@ / msr@tsc@) * 100",
|
||||
"MetricGroup": "Power",
|
||||
"MetricName": "C3_Pkg_Residency"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "C6 residency percent per package",
|
||||
"MetricExpr": "(cstate_pkg@c6\\-residency@ / msr@tsc@) * 100",
|
||||
"MetricGroup": "Power",
|
||||
"MetricName": "C6_Pkg_Residency"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "C7 residency percent per package",
|
||||
"MetricExpr": "(cstate_pkg@c7\\-residency@ / msr@tsc@) * 100",
|
||||
"MetricGroup": "Power",
|
||||
"MetricName": "C7_Pkg_Residency"
|
||||
}
|
||||
]
|
182
tools/perf/pmu-events/arch/x86/skylakex/skx-metrics.json
Normal file
182
tools/perf/pmu-events/arch/x86/skylakex/skx-metrics.json
Normal file
@ -0,0 +1,182 @@
|
||||
[
|
||||
{
|
||||
"BriefDescription": "Instructions Per Cycle (per logical thread)",
|
||||
"MetricExpr": "INST_RETIRED.ANY / CPU_CLK_UNHALTED.THREAD",
|
||||
"MetricGroup": "TopDownL1",
|
||||
"MetricName": "IPC"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "Uops Per Instruction",
|
||||
"MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY",
|
||||
"MetricGroup": "Pipeline",
|
||||
"MetricName": "UPI"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "Rough Estimation of fraction of fetched lines bytes that were likely consumed by program instructions",
|
||||
"MetricExpr": "min( 1 , UOPS_ISSUED.ANY / (UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY * 64 * ( ICACHE_64B.IFTAG_HIT + ICACHE_64B.IFTAG_MISS ) / 4.1) )",
|
||||
"MetricGroup": "Frontend",
|
||||
"MetricName": "IFetch_Line_Utilization"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "Fraction of Uops delivered by the DSB (aka Decoded ICache; or Uop Cache)",
|
||||
"MetricExpr": "IDQ.DSB_UOPS / ( IDQ.DSB_UOPS + LSD.UOPS + IDQ.MITE_UOPS + IDQ.MS_UOPS )",
|
||||
"MetricGroup": "DSB; Frontend_Bandwidth",
|
||||
"MetricName": "DSB_Coverage"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "Cycles Per Instruction (threaded)",
|
||||
"MetricExpr": "1 / INST_RETIRED.ANY / cycles",
|
||||
"MetricGroup": "Pipeline;Summary",
|
||||
"MetricName": "CPI"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "Per-thread actual clocks when the logical processor is active. This is called 'Clockticks' in VTune.",
|
||||
"MetricExpr": "CPU_CLK_UNHALTED.THREAD",
|
||||
"MetricGroup": "Summary",
|
||||
"MetricName": "CLKS"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "Total issue-pipeline slots (per-core)",
|
||||
"MetricExpr": "4*cycles if not #SMT_on else (( CPU_CLK_UNHALTED.THREAD / 2) * (CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK )) if #EBS_Mode else ( CPU_CLK_UNHALTED.THREAD_ANY / 2 )",
|
||||
"MetricGroup": "TopDownL1",
|
||||
"MetricName": "SLOTS"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "Total number of retired Instructions",
|
||||
"MetricExpr": "INST_RETIRED.ANY",
|
||||
"MetricGroup": "Summary",
|
||||
"MetricName": "Instructions"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "Instructions Per Cycle (per physical core)",
|
||||
"MetricExpr": "INST_RETIRED.ANY / cycles if not #SMT_on else (( CPU_CLK_UNHALTED.THREAD / 2) * (CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK )) if #EBS_Mode else ( CPU_CLK_UNHALTED.THREAD_ANY / 2 )",
|
||||
"MetricGroup": "SMT",
|
||||
"MetricName": "CoreIPC"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is at least 1 uop executed)",
|
||||
"MetricExpr": "UOPS_EXECUTED.THREAD / ( UOPS_EXECUTED.CORE_CYCLES_GE_1 / 2) if #SMT_on else UOPS_EXECUTED.CORE_CYCLES_GE_1",
|
||||
"MetricGroup": "Pipeline;Ports_Utilization",
|
||||
"MetricName": "ILP"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "Average Branch Address Clear Cost (fraction of cycles)",
|
||||
"MetricExpr": "( RS_EVENTS.EMPTY_CYCLES - (ICACHE_16B.IFDATA_STALL +2* ICACHE_16B.IFDATA_STALL:c1:e1) - ICACHE_64B.IFTAG_STALL ) / RS_EVENTS.EMPTY_END",
|
||||
"MetricGroup": "Unknown_Branches",
|
||||
"MetricName": "BAClear_Cost"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "Core actual clocks when any thread is active on the physical core",
|
||||
"MetricExpr": "CPU_CLK_UNHALTED.THREAD if not #SMT_on else (( CPU_CLK_UNHALTED.THREAD / 2) * (CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK )) if 1 else ( CPU_CLK_UNHALTED.THREAD_ANY / 2 )",
|
||||
"MetricGroup": "SMT",
|
||||
"MetricName": "CORE_CLKS"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "Actual Average Latency for L1 data-cache miss demand loads",
|
||||
"MetricExpr": "L1D_PEND_MISS.PENDING / ( MEM_LOAD_RETIRED.L1_MISS_PS + MEM_LOAD_RETIRED.FB_HIT_PS )",
|
||||
"MetricGroup": "Memory_Bound;Memory_Lat",
|
||||
"MetricName": "Load_Miss_Real_Latency"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least 1 such miss)",
|
||||
"MetricExpr": "L1D_PEND_MISS.PENDING / ( L1D_PEND_MISS.PENDING_CYCLES_ANY / 2) if #SMT_on else L1D_PEND_MISS.PENDING_CYCLES",
|
||||
"MetricGroup": "Memory_Bound;Memory_BW",
|
||||
"MetricName": "MLP"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses",
|
||||
"MetricExpr": "( ITLB_MISSES.WALK_PENDING + DTLB_LOAD_MISSES.WALK_PENDING + DTLB_STORE_MISSES.WALK_PENDING + EPT.WALK_PENDING ) / ( 2 * cycles if not #SMT_on else (( CPU_CLK_UNHALTED.THREAD / 2) * (CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK )) if #EBS_Mode else ( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) )",
|
||||
"MetricGroup": "TLB",
|
||||
"MetricName": "Page_Walks_Utilization"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "L1 cache miss per kilo instruction for demand loads",
|
||||
"MetricExpr": "1000 * MEM_LOAD_RETIRED.L1_MISS_PS / INST_RETIRED.ANY",
|
||||
"MetricGroup": "Cache_Misses;",
|
||||
"MetricName": "L1MPKI"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "L2 cache miss per kilo instruction for demand loads",
|
||||
"MetricExpr": "1000 * MEM_LOAD_RETIRED.L2_MISS_PS / INST_RETIRED.ANY",
|
||||
"MetricGroup": "Cache_Misses;",
|
||||
"MetricName": "L2MPKI"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "L3 cache miss per kilo instruction for demand loads",
|
||||
"MetricExpr": "1000 * MEM_LOAD_RETIRED.L3_MISS_PS / INST_RETIRED.ANY",
|
||||
"MetricGroup": "Cache_Misses;",
|
||||
"MetricName": "L3MPKI"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "Average CPU Utilization",
|
||||
"MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / msr@tsc@",
|
||||
"MetricGroup": "Summary",
|
||||
"MetricName": "CPU_Utilization"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "Giga Floating Point Operations Per Second",
|
||||
"MetricExpr": "( 1*( FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE ) + 2* FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4*( FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE ) + 8* (FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.512B_PACKED_DOUBLE ) + 16* FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE) / 1000000000 / duration_time",
|
||||
"MetricGroup": "FLOPS;Summary",
|
||||
"MetricName": "GFLOPs"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "Average Frequency Utilization relative nominal frequency",
|
||||
"MetricExpr": "CPU_CLK_UNHALTED.THREAD / CPU_CLK_UNHALTED.REF_TSC",
|
||||
"MetricGroup": "Power",
|
||||
"MetricName": "Turbo_Utilization"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "Fraction of cycles where both hardware threads were active",
|
||||
"MetricExpr": "1 - CPU_CLK_THREAD_UNHALTED.ONE_THREAD_ACTIVE / ( CPU_CLK_THREAD_UNHALTED.REF_XCLK_ANY / 2 ) if #SMT_on else 0",
|
||||
"MetricGroup": "SMT;Summary",
|
||||
"MetricName": "SMT_2T_Utilization"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "Fraction of cycles spent in Kernel mode",
|
||||
"MetricExpr": "CPU_CLK_UNHALTED.REF_TSC:u / CPU_CLK_UNHALTED.REF_TSC",
|
||||
"MetricGroup": "Summary",
|
||||
"MetricName": "Kernel_Utilization"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "C3 residency percent per core",
|
||||
"MetricExpr": "(cstate_core@c3\\-residency@ / msr@tsc@) * 100",
|
||||
"MetricGroup": "Power",
|
||||
"MetricName": "C3_Core_Residency"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "C6 residency percent per core",
|
||||
"MetricExpr": "(cstate_core@c6\\-residency@ / msr@tsc@) * 100",
|
||||
"MetricGroup": "Power",
|
||||
"MetricName": "C6_Core_Residency"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "C7 residency percent per core",
|
||||
"MetricExpr": "(cstate_core@c7\\-residency@ / msr@tsc@) * 100",
|
||||
"MetricGroup": "Power",
|
||||
"MetricName": "C7_Core_Residency"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "C2 residency percent per package",
|
||||
"MetricExpr": "(cstate_pkg@c2\\-residency@ / msr@tsc@) * 100",
|
||||
"MetricGroup": "Power",
|
||||
"MetricName": "C2_Pkg_Residency"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "C3 residency percent per package",
|
||||
"MetricExpr": "(cstate_pkg@c3\\-residency@ / msr@tsc@) * 100",
|
||||
"MetricGroup": "Power",
|
||||
"MetricName": "C3_Pkg_Residency"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "C6 residency percent per package",
|
||||
"MetricExpr": "(cstate_pkg@c6\\-residency@ / msr@tsc@) * 100",
|
||||
"MetricGroup": "Power",
|
||||
"MetricName": "C6_Pkg_Residency"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "C7 residency percent per package",
|
||||
"MetricExpr": "(cstate_pkg@c7\\-residency@ / msr@tsc@) * 100",
|
||||
"MetricGroup": "Power",
|
||||
"MetricName": "C7_Pkg_Residency"
|
||||
}
|
||||
]
|
@ -292,7 +292,7 @@ static int print_events_table_entry(void *data, char *name, char *event,
|
||||
char *desc, char *long_desc,
|
||||
char *pmu, char *unit, char *perpkg,
|
||||
char *metric_expr,
|
||||
char *metric_name)
|
||||
char *metric_name, char *metric_group)
|
||||
{
|
||||
struct perf_entry_data *pd = data;
|
||||
FILE *outfp = pd->outfp;
|
||||
@ -304,8 +304,10 @@ static int print_events_table_entry(void *data, char *name, char *event,
|
||||
*/
|
||||
fprintf(outfp, "{\n");
|
||||
|
||||
fprintf(outfp, "\t.name = \"%s\",\n", name);
|
||||
fprintf(outfp, "\t.event = \"%s\",\n", event);
|
||||
if (name)
|
||||
fprintf(outfp, "\t.name = \"%s\",\n", name);
|
||||
if (event)
|
||||
fprintf(outfp, "\t.event = \"%s\",\n", event);
|
||||
fprintf(outfp, "\t.desc = \"%s\",\n", desc);
|
||||
fprintf(outfp, "\t.topic = \"%s\",\n", topic);
|
||||
if (long_desc && long_desc[0])
|
||||
@ -320,6 +322,8 @@ static int print_events_table_entry(void *data, char *name, char *event,
|
||||
fprintf(outfp, "\t.metric_expr = \"%s\",\n", metric_expr);
|
||||
if (metric_name)
|
||||
fprintf(outfp, "\t.metric_name = \"%s\",\n", metric_name);
|
||||
if (metric_group)
|
||||
fprintf(outfp, "\t.metric_group = \"%s\",\n", metric_group);
|
||||
fprintf(outfp, "},\n");
|
||||
|
||||
return 0;
|
||||
@ -357,6 +361,9 @@ static char *real_event(const char *name, char *event)
|
||||
{
|
||||
int i;
|
||||
|
||||
if (!name)
|
||||
return NULL;
|
||||
|
||||
for (i = 0; fixed[i].name; i++)
|
||||
if (!strcasecmp(name, fixed[i].name))
|
||||
return (char *)fixed[i].event;
|
||||
@ -369,7 +376,7 @@ int json_events(const char *fn,
|
||||
char *long_desc,
|
||||
char *pmu, char *unit, char *perpkg,
|
||||
char *metric_expr,
|
||||
char *metric_name),
|
||||
char *metric_name, char *metric_group),
|
||||
void *data)
|
||||
{
|
||||
int err = -EIO;
|
||||
@ -397,6 +404,7 @@ int json_events(const char *fn,
|
||||
char *unit = NULL;
|
||||
char *metric_expr = NULL;
|
||||
char *metric_name = NULL;
|
||||
char *metric_group = NULL;
|
||||
unsigned long long eventcode = 0;
|
||||
struct msrmap *msr = NULL;
|
||||
jsmntok_t *msrval = NULL;
|
||||
@ -476,6 +484,8 @@ int json_events(const char *fn,
|
||||
addfield(map, &perpkg, "", "", val);
|
||||
} else if (json_streq(map, field, "MetricName")) {
|
||||
addfield(map, &metric_name, "", "", val);
|
||||
} else if (json_streq(map, field, "MetricGroup")) {
|
||||
addfield(map, &metric_group, "", "", val);
|
||||
} else if (json_streq(map, field, "MetricExpr")) {
|
||||
addfield(map, &metric_expr, "", "", val);
|
||||
for (s = metric_expr; *s; s++)
|
||||
@ -501,10 +511,11 @@ int json_events(const char *fn,
|
||||
addfield(map, &event, ",", filter, NULL);
|
||||
if (msr != NULL)
|
||||
addfield(map, &event, ",", msr->pname, msrval);
|
||||
fixname(name);
|
||||
if (name)
|
||||
fixname(name);
|
||||
|
||||
err = func(data, name, real_event(name, event), desc, long_desc,
|
||||
pmu, unit, perpkg, metric_expr, metric_name);
|
||||
pmu, unit, perpkg, metric_expr, metric_name, metric_group);
|
||||
free(event);
|
||||
free(desc);
|
||||
free(name);
|
||||
@ -516,6 +527,7 @@ int json_events(const char *fn,
|
||||
free(unit);
|
||||
free(metric_expr);
|
||||
free(metric_name);
|
||||
free(metric_group);
|
||||
if (err)
|
||||
break;
|
||||
tok += j;
|
||||
|
@ -6,7 +6,7 @@ int json_events(const char *fn,
|
||||
char *long_desc,
|
||||
char *pmu,
|
||||
char *unit, char *perpkg, char *metric_expr,
|
||||
char *metric_name),
|
||||
char *metric_name, char *metric_group),
|
||||
void *data);
|
||||
char *get_cpu_str(void);
|
||||
|
||||
|
@ -15,6 +15,7 @@ struct pmu_event {
|
||||
const char *perpkg;
|
||||
const char *metric_expr;
|
||||
const char *metric_name;
|
||||
const char *metric_group;
|
||||
};
|
||||
|
||||
/*
|
||||
|
@ -3,6 +3,7 @@
|
||||
*
|
||||
* Builtin regression testing command: ever growing number of sanity tests
|
||||
*/
|
||||
#include <fcntl.h>
|
||||
#include <errno.h>
|
||||
#include <unistd.h>
|
||||
#include <string.h>
|
||||
|
10
tools/perf/trace/beauty/madvise_behavior.sh
Executable file
10
tools/perf/trace/beauty/madvise_behavior.sh
Executable file
@ -0,0 +1,10 @@
|
||||
#!/bin/sh
|
||||
|
||||
header_dir=$1
|
||||
|
||||
printf "static const char *madvise_advices[] = {\n"
|
||||
regex='^[[:space:]]*#[[:space:]]*define[[:space:]]+MADV_([[:alnum:]_]+)[[:space:]]+([[:digit:]]+)[[:space:]]*.*'
|
||||
egrep $regex ${header_dir}/mman-common.h | \
|
||||
sed -r "s/$regex/\2 \1/g" | \
|
||||
sort -n | xargs printf "\t[%s] = \"%s\",\n"
|
||||
printf "};\n"
|
@ -94,35 +94,21 @@ static size_t syscall_arg__scnprintf_mremap_flags(char *bf, size_t size,
|
||||
|
||||
#define SCA_MREMAP_FLAGS syscall_arg__scnprintf_mremap_flags
|
||||
|
||||
static size_t madvise__scnprintf_behavior(int behavior, char *bf, size_t size)
|
||||
{
|
||||
#include "trace/beauty/generated/madvise_behavior_array.c"
|
||||
static DEFINE_STRARRAY(madvise_advices);
|
||||
|
||||
if (behavior < strarray__madvise_advices.nr_entries && strarray__madvise_advices.entries[behavior] != NULL)
|
||||
return scnprintf(bf, size, "MADV_%s", strarray__madvise_advices.entries[behavior]);
|
||||
|
||||
return scnprintf(bf, size, "%#", behavior);
|
||||
}
|
||||
|
||||
static size_t syscall_arg__scnprintf_madvise_behavior(char *bf, size_t size,
|
||||
struct syscall_arg *arg)
|
||||
{
|
||||
int behavior = arg->val;
|
||||
|
||||
switch (behavior) {
|
||||
#define P_MADV_BHV(n) case MADV_##n: return scnprintf(bf, size, #n)
|
||||
P_MADV_BHV(NORMAL);
|
||||
P_MADV_BHV(RANDOM);
|
||||
P_MADV_BHV(SEQUENTIAL);
|
||||
P_MADV_BHV(WILLNEED);
|
||||
P_MADV_BHV(DONTNEED);
|
||||
P_MADV_BHV(FREE);
|
||||
P_MADV_BHV(REMOVE);
|
||||
P_MADV_BHV(DONTFORK);
|
||||
P_MADV_BHV(DOFORK);
|
||||
P_MADV_BHV(HWPOISON);
|
||||
P_MADV_BHV(SOFT_OFFLINE);
|
||||
P_MADV_BHV(MERGEABLE);
|
||||
P_MADV_BHV(UNMERGEABLE);
|
||||
P_MADV_BHV(HUGEPAGE);
|
||||
P_MADV_BHV(NOHUGEPAGE);
|
||||
P_MADV_BHV(DONTDUMP);
|
||||
P_MADV_BHV(DODUMP);
|
||||
#undef P_MADV_BHV
|
||||
default: break;
|
||||
}
|
||||
|
||||
return scnprintf(bf, size, "%#x", behavior);
|
||||
return madvise__scnprintf_behavior(arg->val, bf, size);
|
||||
}
|
||||
|
||||
#define SCA_MADV_BHV syscall_arg__scnprintf_madvise_behavior
|
||||
|
@ -27,13 +27,17 @@ void ui_progress__update(struct ui_progress *p, u64 adv)
|
||||
}
|
||||
}
|
||||
|
||||
void ui_progress__init(struct ui_progress *p, u64 total, const char *title)
|
||||
void __ui_progress__init(struct ui_progress *p, u64 total,
|
||||
const char *title, bool size)
|
||||
{
|
||||
p->curr = 0;
|
||||
p->next = p->step = total / 16 ?: 1;
|
||||
p->total = total;
|
||||
p->title = title;
|
||||
p->size = size;
|
||||
|
||||
if (ui_progress__ops->init)
|
||||
ui_progress__ops->init(p);
|
||||
}
|
||||
|
||||
void ui_progress__finish(void)
|
||||
|
@ -8,12 +8,22 @@ void ui_progress__finish(void);
|
||||
struct ui_progress {
|
||||
const char *title;
|
||||
u64 curr, next, step, total;
|
||||
bool size;
|
||||
};
|
||||
|
||||
void ui_progress__init(struct ui_progress *p, u64 total, const char *title);
|
||||
void __ui_progress__init(struct ui_progress *p, u64 total,
|
||||
const char *title, bool size);
|
||||
|
||||
#define ui_progress__init(p, total, title) \
|
||||
__ui_progress__init(p, total, title, false)
|
||||
|
||||
#define ui_progress__init_size(p, total, title) \
|
||||
__ui_progress__init(p, total, title, true)
|
||||
|
||||
void ui_progress__update(struct ui_progress *p, u64 adv);
|
||||
|
||||
struct ui_progress_ops {
|
||||
void (*init)(struct ui_progress *p);
|
||||
void (*update)(struct ui_progress *p);
|
||||
void (*finish)(void);
|
||||
};
|
||||
|
@ -1,12 +1,33 @@
|
||||
#include <linux/kernel.h>
|
||||
#include "../cache.h"
|
||||
#include "../progress.h"
|
||||
#include "../libslang.h"
|
||||
#include "../ui.h"
|
||||
#include "tui.h"
|
||||
#include "units.h"
|
||||
#include "../browser.h"
|
||||
|
||||
static void __tui_progress__init(struct ui_progress *p)
|
||||
{
|
||||
p->next = p->step = p->total / (SLtt_Screen_Cols - 2) ?: 1;
|
||||
}
|
||||
|
||||
static int get_title(struct ui_progress *p, char *buf, size_t size)
|
||||
{
|
||||
char buf_cur[20];
|
||||
char buf_tot[20];
|
||||
int ret;
|
||||
|
||||
ret = unit_number__scnprintf(buf_cur, sizeof(buf_cur), p->curr);
|
||||
ret += unit_number__scnprintf(buf_tot, sizeof(buf_tot), p->total);
|
||||
|
||||
return ret + scnprintf(buf, size, "%s [%s/%s]",
|
||||
p->title, buf_cur, buf_tot);
|
||||
}
|
||||
|
||||
static void tui_progress__update(struct ui_progress *p)
|
||||
{
|
||||
char buf[100], *title = (char *) p->title;
|
||||
int bar, y;
|
||||
/*
|
||||
* FIXME: We should have a per UI backend way of showing progress,
|
||||
@ -18,13 +39,18 @@ static void tui_progress__update(struct ui_progress *p)
|
||||
if (p->total == 0)
|
||||
return;
|
||||
|
||||
if (p->size) {
|
||||
get_title(p, buf, sizeof(buf));
|
||||
title = buf;
|
||||
}
|
||||
|
||||
ui__refresh_dimensions(false);
|
||||
pthread_mutex_lock(&ui__lock);
|
||||
y = SLtt_Screen_Rows / 2 - 2;
|
||||
SLsmg_set_color(0);
|
||||
SLsmg_draw_box(y, 0, 3, SLtt_Screen_Cols);
|
||||
SLsmg_gotorc(y++, 1);
|
||||
SLsmg_write_string((char *)p->title);
|
||||
SLsmg_write_string(title);
|
||||
SLsmg_fill_region(y, 1, 1, SLtt_Screen_Cols - 2, ' ');
|
||||
SLsmg_set_color(HE_COLORSET_SELECTED);
|
||||
bar = ((SLtt_Screen_Cols - 2) * p->curr) / p->total;
|
||||
@ -49,8 +75,8 @@ static void tui_progress__finish(void)
|
||||
pthread_mutex_unlock(&ui__lock);
|
||||
}
|
||||
|
||||
static struct ui_progress_ops tui_progress__ops =
|
||||
{
|
||||
static struct ui_progress_ops tui_progress__ops = {
|
||||
.init = __tui_progress__init,
|
||||
.update = tui_progress__update,
|
||||
.finish = tui_progress__finish,
|
||||
};
|
||||
|
@ -34,6 +34,7 @@ libperf-y += dso.o
|
||||
libperf-y += symbol.o
|
||||
libperf-y += symbol_fprintf.o
|
||||
libperf-y += color.o
|
||||
libperf-y += metricgroup.o
|
||||
libperf-y += header.o
|
||||
libperf-y += callchain.o
|
||||
libperf-y += values.o
|
||||
@ -78,6 +79,7 @@ libperf-y += data.o
|
||||
libperf-y += tsc.o
|
||||
libperf-y += cloexec.o
|
||||
libperf-y += call-path.o
|
||||
libperf-y += rwsem.o
|
||||
libperf-y += thread-stack.o
|
||||
libperf-$(CONFIG_AUXTRACE) += auxtrace.o
|
||||
libperf-$(CONFIG_AUXTRACE) += intel-pt-decoder/
|
||||
|
@ -700,10 +700,7 @@ struct perf_config_set *perf_config_set__new(void)
|
||||
|
||||
if (set) {
|
||||
INIT_LIST_HEAD(&set->sections);
|
||||
if (perf_config_set__init(set) < 0) {
|
||||
perf_config_set__delete(set);
|
||||
set = NULL;
|
||||
}
|
||||
perf_config_set__init(set);
|
||||
}
|
||||
|
||||
return set;
|
||||
|
@ -3,6 +3,7 @@
|
||||
#include <sys/types.h>
|
||||
#include <sys/stat.h>
|
||||
#include <errno.h>
|
||||
#include <fcntl.h>
|
||||
#include <unistd.h>
|
||||
#include <string.h>
|
||||
|
||||
|
@ -6,6 +6,7 @@
|
||||
#include <sys/stat.h>
|
||||
#include <unistd.h>
|
||||
#include <errno.h>
|
||||
#include <fcntl.h>
|
||||
#include "compress.h"
|
||||
#include "path.h"
|
||||
#include "symbol.h"
|
||||
@ -1365,9 +1366,9 @@ void __dsos__add(struct dsos *dsos, struct dso *dso)
|
||||
|
||||
void dsos__add(struct dsos *dsos, struct dso *dso)
|
||||
{
|
||||
pthread_rwlock_wrlock(&dsos->lock);
|
||||
down_write(&dsos->lock);
|
||||
__dsos__add(dsos, dso);
|
||||
pthread_rwlock_unlock(&dsos->lock);
|
||||
up_write(&dsos->lock);
|
||||
}
|
||||
|
||||
struct dso *__dsos__find(struct dsos *dsos, const char *name, bool cmp_short)
|
||||
@ -1386,9 +1387,9 @@ struct dso *__dsos__find(struct dsos *dsos, const char *name, bool cmp_short)
|
||||
struct dso *dsos__find(struct dsos *dsos, const char *name, bool cmp_short)
|
||||
{
|
||||
struct dso *dso;
|
||||
pthread_rwlock_rdlock(&dsos->lock);
|
||||
down_read(&dsos->lock);
|
||||
dso = __dsos__find(dsos, name, cmp_short);
|
||||
pthread_rwlock_unlock(&dsos->lock);
|
||||
up_read(&dsos->lock);
|
||||
return dso;
|
||||
}
|
||||
|
||||
@ -1415,9 +1416,9 @@ struct dso *__dsos__findnew(struct dsos *dsos, const char *name)
|
||||
struct dso *dsos__findnew(struct dsos *dsos, const char *name)
|
||||
{
|
||||
struct dso *dso;
|
||||
pthread_rwlock_wrlock(&dsos->lock);
|
||||
down_write(&dsos->lock);
|
||||
dso = dso__get(__dsos__findnew(dsos, name));
|
||||
pthread_rwlock_unlock(&dsos->lock);
|
||||
up_write(&dsos->lock);
|
||||
return dso;
|
||||
}
|
||||
|
||||
|
@ -6,7 +6,7 @@
|
||||
#include <linux/rbtree.h>
|
||||
#include <sys/types.h>
|
||||
#include <stdbool.h>
|
||||
#include <pthread.h>
|
||||
#include "rwsem.h"
|
||||
#include <linux/types.h>
|
||||
#include <linux/bitops.h>
|
||||
#include "map.h"
|
||||
@ -129,7 +129,7 @@ struct dso_cache {
|
||||
struct dsos {
|
||||
struct list_head head;
|
||||
struct rb_root root; /* rbtree root sorted by long name */
|
||||
pthread_rwlock_t lock;
|
||||
struct rw_semaphore lock;
|
||||
};
|
||||
|
||||
struct auxtrace_cache;
|
||||
|
@ -1,5 +1,6 @@
|
||||
#include <dirent.h>
|
||||
#include <errno.h>
|
||||
#include <fcntl.h>
|
||||
#include <inttypes.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/types.h>
|
||||
@ -683,12 +684,14 @@ int perf_event__synthesize_threads(struct perf_tool *tool,
|
||||
bool mmap_data,
|
||||
unsigned int proc_map_timeout)
|
||||
{
|
||||
DIR *proc;
|
||||
char proc_path[PATH_MAX];
|
||||
struct dirent *dirent;
|
||||
union perf_event *comm_event, *mmap_event, *fork_event;
|
||||
union perf_event *namespaces_event;
|
||||
char proc_path[PATH_MAX];
|
||||
struct dirent **dirent;
|
||||
int err = -1;
|
||||
char *end;
|
||||
pid_t pid;
|
||||
int n, i;
|
||||
|
||||
if (machine__is_default_guest(machine))
|
||||
return 0;
|
||||
@ -712,29 +715,32 @@ int perf_event__synthesize_threads(struct perf_tool *tool,
|
||||
goto out_free_fork;
|
||||
|
||||
snprintf(proc_path, sizeof(proc_path), "%s/proc", machine->root_dir);
|
||||
proc = opendir(proc_path);
|
||||
n = scandir(proc_path, &dirent, 0, alphasort);
|
||||
|
||||
if (proc == NULL)
|
||||
if (n < 0)
|
||||
goto out_free_namespaces;
|
||||
|
||||
while ((dirent = readdir(proc)) != NULL) {
|
||||
char *end;
|
||||
pid_t pid = strtol(dirent->d_name, &end, 10);
|
||||
|
||||
if (*end) /* only interested in proper numerical dirents */
|
||||
for (i = 0; i < n; i++) {
|
||||
if (!isdigit(dirent[i]->d_name[0]))
|
||||
continue;
|
||||
/*
|
||||
* We may race with exiting thread, so don't stop just because
|
||||
* one thread couldn't be synthesized.
|
||||
*/
|
||||
__event__synthesize_thread(comm_event, mmap_event, fork_event,
|
||||
namespaces_event, pid, 1, process,
|
||||
tool, machine, mmap_data,
|
||||
proc_map_timeout);
|
||||
}
|
||||
|
||||
pid = (pid_t)strtol(dirent[i]->d_name, &end, 10);
|
||||
/* only interested in proper numerical dirents */
|
||||
if (!*end) {
|
||||
/*
|
||||
* We may race with exiting thread, so don't stop just because
|
||||
* one thread couldn't be synthesized.
|
||||
*/
|
||||
__event__synthesize_thread(comm_event, mmap_event, fork_event,
|
||||
namespaces_event, pid, 1, process,
|
||||
tool, machine, mmap_data,
|
||||
proc_map_timeout);
|
||||
}
|
||||
free(dirent[i]);
|
||||
}
|
||||
free(dirent);
|
||||
err = 0;
|
||||
closedir(proc);
|
||||
|
||||
out_free_namespaces:
|
||||
free(namespaces_event);
|
||||
out_free_fork:
|
||||
|
@ -6,6 +6,7 @@
|
||||
#include <linux/refcount.h>
|
||||
#include <linux/list.h>
|
||||
#include <api/fd/array.h>
|
||||
#include <fcntl.h>
|
||||
#include <stdio.h>
|
||||
#include "../perf.h"
|
||||
#include "event.h"
|
||||
|
@ -678,7 +678,7 @@ void perf_evsel__config_callchain(struct perf_evsel *evsel,
|
||||
if (!function) {
|
||||
perf_evsel__set_sample_bit(evsel, REGS_USER);
|
||||
perf_evsel__set_sample_bit(evsel, STACK_USER);
|
||||
attr->sample_regs_user = PERF_REGS_MASK;
|
||||
attr->sample_regs_user |= PERF_REGS_MASK;
|
||||
attr->sample_stack_user = param->dump_size;
|
||||
attr->exclude_callchain_user = 1;
|
||||
} else {
|
||||
@ -931,6 +931,11 @@ void perf_evsel__config(struct perf_evsel *evsel, struct record_opts *opts,
|
||||
perf_evsel__set_sample_bit(evsel, REGS_INTR);
|
||||
}
|
||||
|
||||
if (opts->sample_user_regs) {
|
||||
attr->sample_regs_user |= opts->sample_user_regs;
|
||||
perf_evsel__set_sample_bit(evsel, REGS_USER);
|
||||
}
|
||||
|
||||
if (target__has_cpu(&opts->target) || opts->sample_cpu)
|
||||
perf_evsel__set_sample_bit(evsel, CPU);
|
||||
|
||||
|
@ -137,6 +137,7 @@ struct perf_evsel {
|
||||
const char * metric_name;
|
||||
struct perf_evsel **metric_events;
|
||||
bool collect_stat;
|
||||
bool weak_group;
|
||||
};
|
||||
|
||||
union u64_swap {
|
||||
|
@ -30,7 +30,21 @@ static void dsos__init(struct dsos *dsos)
|
||||
{
|
||||
INIT_LIST_HEAD(&dsos->head);
|
||||
dsos->root = RB_ROOT;
|
||||
pthread_rwlock_init(&dsos->lock, NULL);
|
||||
init_rwsem(&dsos->lock);
|
||||
}
|
||||
|
||||
static void machine__threads_init(struct machine *machine)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < THREADS__TABLE_SIZE; i++) {
|
||||
struct threads *threads = &machine->threads[i];
|
||||
threads->entries = RB_ROOT;
|
||||
init_rwsem(&threads->lock);
|
||||
threads->nr = 0;
|
||||
INIT_LIST_HEAD(&threads->dead);
|
||||
threads->last_match = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
int machine__init(struct machine *machine, const char *root_dir, pid_t pid)
|
||||
@ -40,11 +54,7 @@ int machine__init(struct machine *machine, const char *root_dir, pid_t pid)
|
||||
RB_CLEAR_NODE(&machine->rb_node);
|
||||
dsos__init(&machine->dsos);
|
||||
|
||||
machine->threads = RB_ROOT;
|
||||
pthread_rwlock_init(&machine->threads_lock, NULL);
|
||||
machine->nr_threads = 0;
|
||||
INIT_LIST_HEAD(&machine->dead_threads);
|
||||
machine->last_match = NULL;
|
||||
machine__threads_init(machine);
|
||||
|
||||
machine->vdso_info = NULL;
|
||||
machine->env = NULL;
|
||||
@ -120,7 +130,7 @@ static void dsos__purge(struct dsos *dsos)
|
||||
{
|
||||
struct dso *pos, *n;
|
||||
|
||||
pthread_rwlock_wrlock(&dsos->lock);
|
||||
down_write(&dsos->lock);
|
||||
|
||||
list_for_each_entry_safe(pos, n, &dsos->head, node) {
|
||||
RB_CLEAR_NODE(&pos->rb_node);
|
||||
@ -129,39 +139,49 @@ static void dsos__purge(struct dsos *dsos)
|
||||
dso__put(pos);
|
||||
}
|
||||
|
||||
pthread_rwlock_unlock(&dsos->lock);
|
||||
up_write(&dsos->lock);
|
||||
}
|
||||
|
||||
static void dsos__exit(struct dsos *dsos)
|
||||
{
|
||||
dsos__purge(dsos);
|
||||
pthread_rwlock_destroy(&dsos->lock);
|
||||
exit_rwsem(&dsos->lock);
|
||||
}
|
||||
|
||||
void machine__delete_threads(struct machine *machine)
|
||||
{
|
||||
struct rb_node *nd;
|
||||
int i;
|
||||
|
||||
pthread_rwlock_wrlock(&machine->threads_lock);
|
||||
nd = rb_first(&machine->threads);
|
||||
while (nd) {
|
||||
struct thread *t = rb_entry(nd, struct thread, rb_node);
|
||||
for (i = 0; i < THREADS__TABLE_SIZE; i++) {
|
||||
struct threads *threads = &machine->threads[i];
|
||||
down_write(&threads->lock);
|
||||
nd = rb_first(&threads->entries);
|
||||
while (nd) {
|
||||
struct thread *t = rb_entry(nd, struct thread, rb_node);
|
||||
|
||||
nd = rb_next(nd);
|
||||
__machine__remove_thread(machine, t, false);
|
||||
nd = rb_next(nd);
|
||||
__machine__remove_thread(machine, t, false);
|
||||
}
|
||||
up_write(&threads->lock);
|
||||
}
|
||||
pthread_rwlock_unlock(&machine->threads_lock);
|
||||
}
|
||||
|
||||
void machine__exit(struct machine *machine)
|
||||
{
|
||||
int i;
|
||||
|
||||
machine__destroy_kernel_maps(machine);
|
||||
map_groups__exit(&machine->kmaps);
|
||||
dsos__exit(&machine->dsos);
|
||||
machine__exit_vdso(machine);
|
||||
zfree(&machine->root_dir);
|
||||
zfree(&machine->current_tid);
|
||||
pthread_rwlock_destroy(&machine->threads_lock);
|
||||
|
||||
for (i = 0; i < THREADS__TABLE_SIZE; i++) {
|
||||
struct threads *threads = &machine->threads[i];
|
||||
exit_rwsem(&threads->lock);
|
||||
}
|
||||
}
|
||||
|
||||
void machine__delete(struct machine *machine)
|
||||
@ -379,10 +399,11 @@ static void machine__update_thread_pid(struct machine *machine,
|
||||
* lookup/new thread inserted.
|
||||
*/
|
||||
static struct thread *____machine__findnew_thread(struct machine *machine,
|
||||
struct threads *threads,
|
||||
pid_t pid, pid_t tid,
|
||||
bool create)
|
||||
{
|
||||
struct rb_node **p = &machine->threads.rb_node;
|
||||
struct rb_node **p = &threads->entries.rb_node;
|
||||
struct rb_node *parent = NULL;
|
||||
struct thread *th;
|
||||
|
||||
@ -391,14 +412,14 @@ static struct thread *____machine__findnew_thread(struct machine *machine,
|
||||
* so most of the time we dont have to look up
|
||||
* the full rbtree:
|
||||
*/
|
||||
th = machine->last_match;
|
||||
th = threads->last_match;
|
||||
if (th != NULL) {
|
||||
if (th->tid == tid) {
|
||||
machine__update_thread_pid(machine, th, pid);
|
||||
return thread__get(th);
|
||||
}
|
||||
|
||||
machine->last_match = NULL;
|
||||
threads->last_match = NULL;
|
||||
}
|
||||
|
||||
while (*p != NULL) {
|
||||
@ -406,7 +427,7 @@ static struct thread *____machine__findnew_thread(struct machine *machine,
|
||||
th = rb_entry(parent, struct thread, rb_node);
|
||||
|
||||
if (th->tid == tid) {
|
||||
machine->last_match = th;
|
||||
threads->last_match = th;
|
||||
machine__update_thread_pid(machine, th, pid);
|
||||
return thread__get(th);
|
||||
}
|
||||
@ -423,7 +444,7 @@ static struct thread *____machine__findnew_thread(struct machine *machine,
|
||||
th = thread__new(pid, tid);
|
||||
if (th != NULL) {
|
||||
rb_link_node(&th->rb_node, parent, p);
|
||||
rb_insert_color(&th->rb_node, &machine->threads);
|
||||
rb_insert_color(&th->rb_node, &threads->entries);
|
||||
|
||||
/*
|
||||
* We have to initialize map_groups separately
|
||||
@ -434,7 +455,7 @@ static struct thread *____machine__findnew_thread(struct machine *machine,
|
||||
* leader and that would screwed the rb tree.
|
||||
*/
|
||||
if (thread__init_map_groups(th, machine)) {
|
||||
rb_erase_init(&th->rb_node, &machine->threads);
|
||||
rb_erase_init(&th->rb_node, &threads->entries);
|
||||
RB_CLEAR_NODE(&th->rb_node);
|
||||
thread__put(th);
|
||||
return NULL;
|
||||
@ -443,8 +464,8 @@ static struct thread *____machine__findnew_thread(struct machine *machine,
|
||||
* It is now in the rbtree, get a ref
|
||||
*/
|
||||
thread__get(th);
|
||||
machine->last_match = th;
|
||||
++machine->nr_threads;
|
||||
threads->last_match = th;
|
||||
++threads->nr;
|
||||
}
|
||||
|
||||
return th;
|
||||
@ -452,27 +473,30 @@ static struct thread *____machine__findnew_thread(struct machine *machine,
|
||||
|
||||
struct thread *__machine__findnew_thread(struct machine *machine, pid_t pid, pid_t tid)
|
||||
{
|
||||
return ____machine__findnew_thread(machine, pid, tid, true);
|
||||
return ____machine__findnew_thread(machine, machine__threads(machine, tid), pid, tid, true);
|
||||
}
|
||||
|
||||
struct thread *machine__findnew_thread(struct machine *machine, pid_t pid,
|
||||
pid_t tid)
|
||||
{
|
||||
struct threads *threads = machine__threads(machine, tid);
|
||||
struct thread *th;
|
||||
|
||||
pthread_rwlock_wrlock(&machine->threads_lock);
|
||||
down_write(&threads->lock);
|
||||
th = __machine__findnew_thread(machine, pid, tid);
|
||||
pthread_rwlock_unlock(&machine->threads_lock);
|
||||
up_write(&threads->lock);
|
||||
return th;
|
||||
}
|
||||
|
||||
struct thread *machine__find_thread(struct machine *machine, pid_t pid,
|
||||
pid_t tid)
|
||||
{
|
||||
struct threads *threads = machine__threads(machine, tid);
|
||||
struct thread *th;
|
||||
pthread_rwlock_rdlock(&machine->threads_lock);
|
||||
th = ____machine__findnew_thread(machine, pid, tid, false);
|
||||
pthread_rwlock_unlock(&machine->threads_lock);
|
||||
|
||||
down_read(&threads->lock);
|
||||
th = ____machine__findnew_thread(machine, threads, pid, tid, false);
|
||||
up_read(&threads->lock);
|
||||
return th;
|
||||
}
|
||||
|
||||
@ -564,7 +588,7 @@ static struct dso *machine__findnew_module_dso(struct machine *machine,
|
||||
{
|
||||
struct dso *dso;
|
||||
|
||||
pthread_rwlock_wrlock(&machine->dsos.lock);
|
||||
down_write(&machine->dsos.lock);
|
||||
|
||||
dso = __dsos__find(&machine->dsos, m->name, true);
|
||||
if (!dso) {
|
||||
@ -578,7 +602,7 @@ static struct dso *machine__findnew_module_dso(struct machine *machine,
|
||||
|
||||
dso__get(dso);
|
||||
out_unlock:
|
||||
pthread_rwlock_unlock(&machine->dsos.lock);
|
||||
up_write(&machine->dsos.lock);
|
||||
return dso;
|
||||
}
|
||||
|
||||
@ -719,21 +743,25 @@ size_t machine__fprintf_vmlinux_path(struct machine *machine, FILE *fp)
|
||||
|
||||
size_t machine__fprintf(struct machine *machine, FILE *fp)
|
||||
{
|
||||
size_t ret;
|
||||
struct rb_node *nd;
|
||||
size_t ret;
|
||||
int i;
|
||||
|
||||
pthread_rwlock_rdlock(&machine->threads_lock);
|
||||
for (i = 0; i < THREADS__TABLE_SIZE; i++) {
|
||||
struct threads *threads = &machine->threads[i];
|
||||
|
||||
ret = fprintf(fp, "Threads: %u\n", machine->nr_threads);
|
||||
down_read(&threads->lock);
|
||||
|
||||
for (nd = rb_first(&machine->threads); nd; nd = rb_next(nd)) {
|
||||
struct thread *pos = rb_entry(nd, struct thread, rb_node);
|
||||
ret = fprintf(fp, "Threads: %u\n", threads->nr);
|
||||
|
||||
ret += thread__fprintf(pos, fp);
|
||||
for (nd = rb_first(&threads->entries); nd; nd = rb_next(nd)) {
|
||||
struct thread *pos = rb_entry(nd, struct thread, rb_node);
|
||||
|
||||
ret += thread__fprintf(pos, fp);
|
||||
}
|
||||
|
||||
up_read(&threads->lock);
|
||||
}
|
||||
|
||||
pthread_rwlock_unlock(&machine->threads_lock);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -1292,7 +1320,7 @@ static int machine__process_kernel_mmap_event(struct machine *machine,
|
||||
struct dso *kernel = NULL;
|
||||
struct dso *dso;
|
||||
|
||||
pthread_rwlock_rdlock(&machine->dsos.lock);
|
||||
down_read(&machine->dsos.lock);
|
||||
|
||||
list_for_each_entry(dso, &machine->dsos.head, node) {
|
||||
|
||||
@ -1322,7 +1350,7 @@ static int machine__process_kernel_mmap_event(struct machine *machine,
|
||||
break;
|
||||
}
|
||||
|
||||
pthread_rwlock_unlock(&machine->dsos.lock);
|
||||
up_read(&machine->dsos.lock);
|
||||
|
||||
if (kernel == NULL)
|
||||
kernel = machine__findnew_dso(machine, kmmap_prefix);
|
||||
@ -1479,23 +1507,25 @@ int machine__process_mmap_event(struct machine *machine, union perf_event *event
|
||||
|
||||
static void __machine__remove_thread(struct machine *machine, struct thread *th, bool lock)
|
||||
{
|
||||
if (machine->last_match == th)
|
||||
machine->last_match = NULL;
|
||||
struct threads *threads = machine__threads(machine, th->tid);
|
||||
|
||||
if (threads->last_match == th)
|
||||
threads->last_match = NULL;
|
||||
|
||||
BUG_ON(refcount_read(&th->refcnt) == 0);
|
||||
if (lock)
|
||||
pthread_rwlock_wrlock(&machine->threads_lock);
|
||||
rb_erase_init(&th->rb_node, &machine->threads);
|
||||
down_write(&threads->lock);
|
||||
rb_erase_init(&th->rb_node, &threads->entries);
|
||||
RB_CLEAR_NODE(&th->rb_node);
|
||||
--machine->nr_threads;
|
||||
--threads->nr;
|
||||
/*
|
||||
* Move it first to the dead_threads list, then drop the reference,
|
||||
* if this is the last reference, then the thread__delete destructor
|
||||
* will be called and we will remove it from the dead_threads list.
|
||||
*/
|
||||
list_add_tail(&th->node, &machine->dead_threads);
|
||||
list_add_tail(&th->node, &threads->dead);
|
||||
if (lock)
|
||||
pthread_rwlock_unlock(&machine->threads_lock);
|
||||
up_write(&threads->lock);
|
||||
thread__put(th);
|
||||
}
|
||||
|
||||
@ -2140,21 +2170,26 @@ int machine__for_each_thread(struct machine *machine,
|
||||
int (*fn)(struct thread *thread, void *p),
|
||||
void *priv)
|
||||
{
|
||||
struct threads *threads;
|
||||
struct rb_node *nd;
|
||||
struct thread *thread;
|
||||
int rc = 0;
|
||||
int i;
|
||||
|
||||
for (nd = rb_first(&machine->threads); nd; nd = rb_next(nd)) {
|
||||
thread = rb_entry(nd, struct thread, rb_node);
|
||||
rc = fn(thread, priv);
|
||||
if (rc != 0)
|
||||
return rc;
|
||||
}
|
||||
for (i = 0; i < THREADS__TABLE_SIZE; i++) {
|
||||
threads = &machine->threads[i];
|
||||
for (nd = rb_first(&threads->entries); nd; nd = rb_next(nd)) {
|
||||
thread = rb_entry(nd, struct thread, rb_node);
|
||||
rc = fn(thread, priv);
|
||||
if (rc != 0)
|
||||
return rc;
|
||||
}
|
||||
|
||||
list_for_each_entry(thread, &machine->dead_threads, node) {
|
||||
rc = fn(thread, priv);
|
||||
if (rc != 0)
|
||||
return rc;
|
||||
list_for_each_entry(thread, &threads->dead, node) {
|
||||
rc = fn(thread, priv);
|
||||
if (rc != 0)
|
||||
return rc;
|
||||
}
|
||||
}
|
||||
return rc;
|
||||
}
|
||||
|
@ -6,6 +6,7 @@
|
||||
#include "map.h"
|
||||
#include "dso.h"
|
||||
#include "event.h"
|
||||
#include "rwsem.h"
|
||||
|
||||
struct addr_location;
|
||||
struct branch_stack;
|
||||
@ -23,6 +24,17 @@ extern const char *ref_reloc_sym_names[];
|
||||
|
||||
struct vdso_info;
|
||||
|
||||
#define THREADS__TABLE_BITS 8
|
||||
#define THREADS__TABLE_SIZE (1 << THREADS__TABLE_BITS)
|
||||
|
||||
struct threads {
|
||||
struct rb_root entries;
|
||||
struct rw_semaphore lock;
|
||||
unsigned int nr;
|
||||
struct list_head dead;
|
||||
struct thread *last_match;
|
||||
};
|
||||
|
||||
struct machine {
|
||||
struct rb_node rb_node;
|
||||
pid_t pid;
|
||||
@ -30,11 +42,7 @@ struct machine {
|
||||
bool comm_exec;
|
||||
bool kptr_restrict_warned;
|
||||
char *root_dir;
|
||||
struct rb_root threads;
|
||||
pthread_rwlock_t threads_lock;
|
||||
unsigned int nr_threads;
|
||||
struct list_head dead_threads;
|
||||
struct thread *last_match;
|
||||
struct threads threads[THREADS__TABLE_SIZE];
|
||||
struct vdso_info *vdso_info;
|
||||
struct perf_env *env;
|
||||
struct dsos dsos;
|
||||
@ -48,6 +56,12 @@ struct machine {
|
||||
};
|
||||
};
|
||||
|
||||
static inline struct threads *machine__threads(struct machine *machine, pid_t tid)
|
||||
{
|
||||
/* Cast it to handle tid == -1 */
|
||||
return &machine->threads[(unsigned int)tid % THREADS__TABLE_SIZE];
|
||||
}
|
||||
|
||||
static inline
|
||||
struct map *__machine__kernel_map(struct machine *machine, enum map_type type)
|
||||
{
|
||||
|
@ -488,7 +488,7 @@ u64 map__objdump_2mem(struct map *map, u64 ip)
|
||||
static void maps__init(struct maps *maps)
|
||||
{
|
||||
maps->entries = RB_ROOT;
|
||||
pthread_rwlock_init(&maps->lock, NULL);
|
||||
init_rwsem(&maps->lock);
|
||||
}
|
||||
|
||||
void map_groups__init(struct map_groups *mg, struct machine *machine)
|
||||
@ -517,9 +517,9 @@ static void __maps__purge(struct maps *maps)
|
||||
|
||||
static void maps__exit(struct maps *maps)
|
||||
{
|
||||
pthread_rwlock_wrlock(&maps->lock);
|
||||
down_write(&maps->lock);
|
||||
__maps__purge(maps);
|
||||
pthread_rwlock_unlock(&maps->lock);
|
||||
up_write(&maps->lock);
|
||||
}
|
||||
|
||||
void map_groups__exit(struct map_groups *mg)
|
||||
@ -586,7 +586,7 @@ struct symbol *maps__find_symbol_by_name(struct maps *maps, const char *name,
|
||||
struct symbol *sym;
|
||||
struct rb_node *nd;
|
||||
|
||||
pthread_rwlock_rdlock(&maps->lock);
|
||||
down_read(&maps->lock);
|
||||
|
||||
for (nd = rb_first(&maps->entries); nd; nd = rb_next(nd)) {
|
||||
struct map *pos = rb_entry(nd, struct map, rb_node);
|
||||
@ -602,7 +602,7 @@ struct symbol *maps__find_symbol_by_name(struct maps *maps, const char *name,
|
||||
|
||||
sym = NULL;
|
||||
out:
|
||||
pthread_rwlock_unlock(&maps->lock);
|
||||
up_read(&maps->lock);
|
||||
return sym;
|
||||
}
|
||||
|
||||
@ -638,7 +638,7 @@ static size_t maps__fprintf(struct maps *maps, FILE *fp)
|
||||
size_t printed = 0;
|
||||
struct rb_node *nd;
|
||||
|
||||
pthread_rwlock_rdlock(&maps->lock);
|
||||
down_read(&maps->lock);
|
||||
|
||||
for (nd = rb_first(&maps->entries); nd; nd = rb_next(nd)) {
|
||||
struct map *pos = rb_entry(nd, struct map, rb_node);
|
||||
@ -650,7 +650,7 @@ static size_t maps__fprintf(struct maps *maps, FILE *fp)
|
||||
}
|
||||
}
|
||||
|
||||
pthread_rwlock_unlock(&maps->lock);
|
||||
up_read(&maps->lock);
|
||||
|
||||
return printed;
|
||||
}
|
||||
@ -682,7 +682,7 @@ static int maps__fixup_overlappings(struct maps *maps, struct map *map, FILE *fp
|
||||
struct rb_node *next;
|
||||
int err = 0;
|
||||
|
||||
pthread_rwlock_wrlock(&maps->lock);
|
||||
down_write(&maps->lock);
|
||||
|
||||
root = &maps->entries;
|
||||
next = rb_first(root);
|
||||
@ -750,7 +750,7 @@ static int maps__fixup_overlappings(struct maps *maps, struct map *map, FILE *fp
|
||||
|
||||
err = 0;
|
||||
out:
|
||||
pthread_rwlock_unlock(&maps->lock);
|
||||
up_write(&maps->lock);
|
||||
return err;
|
||||
}
|
||||
|
||||
@ -771,7 +771,7 @@ int map_groups__clone(struct thread *thread,
|
||||
struct map *map;
|
||||
struct maps *maps = &parent->maps[type];
|
||||
|
||||
pthread_rwlock_rdlock(&maps->lock);
|
||||
down_read(&maps->lock);
|
||||
|
||||
for (map = maps__first(maps); map; map = map__next(map)) {
|
||||
struct map *new = map__clone(map);
|
||||
@ -788,7 +788,7 @@ int map_groups__clone(struct thread *thread,
|
||||
|
||||
err = 0;
|
||||
out_unlock:
|
||||
pthread_rwlock_unlock(&maps->lock);
|
||||
up_read(&maps->lock);
|
||||
return err;
|
||||
}
|
||||
|
||||
@ -815,9 +815,9 @@ static void __maps__insert(struct maps *maps, struct map *map)
|
||||
|
||||
void maps__insert(struct maps *maps, struct map *map)
|
||||
{
|
||||
pthread_rwlock_wrlock(&maps->lock);
|
||||
down_write(&maps->lock);
|
||||
__maps__insert(maps, map);
|
||||
pthread_rwlock_unlock(&maps->lock);
|
||||
up_write(&maps->lock);
|
||||
}
|
||||
|
||||
static void __maps__remove(struct maps *maps, struct map *map)
|
||||
@ -828,9 +828,9 @@ static void __maps__remove(struct maps *maps, struct map *map)
|
||||
|
||||
void maps__remove(struct maps *maps, struct map *map)
|
||||
{
|
||||
pthread_rwlock_wrlock(&maps->lock);
|
||||
down_write(&maps->lock);
|
||||
__maps__remove(maps, map);
|
||||
pthread_rwlock_unlock(&maps->lock);
|
||||
up_write(&maps->lock);
|
||||
}
|
||||
|
||||
struct map *maps__find(struct maps *maps, u64 ip)
|
||||
@ -838,7 +838,7 @@ struct map *maps__find(struct maps *maps, u64 ip)
|
||||
struct rb_node **p, *parent = NULL;
|
||||
struct map *m;
|
||||
|
||||
pthread_rwlock_rdlock(&maps->lock);
|
||||
down_read(&maps->lock);
|
||||
|
||||
p = &maps->entries.rb_node;
|
||||
while (*p != NULL) {
|
||||
@ -854,7 +854,7 @@ struct map *maps__find(struct maps *maps, u64 ip)
|
||||
|
||||
m = NULL;
|
||||
out:
|
||||
pthread_rwlock_unlock(&maps->lock);
|
||||
up_read(&maps->lock);
|
||||
return m;
|
||||
}
|
||||
|
||||
|
@ -9,6 +9,7 @@
|
||||
#include <stdio.h>
|
||||
#include <stdbool.h>
|
||||
#include <linux/types.h>
|
||||
#include "rwsem.h"
|
||||
|
||||
enum map_type {
|
||||
MAP__FUNCTION = 0,
|
||||
@ -61,7 +62,7 @@ struct kmap {
|
||||
|
||||
struct maps {
|
||||
struct rb_root entries;
|
||||
pthread_rwlock_t lock;
|
||||
struct rw_semaphore lock;
|
||||
};
|
||||
|
||||
struct map_groups {
|
||||
|
490
tools/perf/util/metricgroup.c
Normal file
490
tools/perf/util/metricgroup.c
Normal file
@ -0,0 +1,490 @@
|
||||
/*
|
||||
* Copyright (c) 2017, Intel Corporation.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify it
|
||||
* under the terms and conditions of the GNU General Public License,
|
||||
* version 2, as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
||||
* more details.
|
||||
*
|
||||
*/
|
||||
|
||||
/* Manage metrics and groups of metrics from JSON files */
|
||||
|
||||
#include "metricgroup.h"
|
||||
#include "evlist.h"
|
||||
#include "strbuf.h"
|
||||
#include "pmu.h"
|
||||
#include "expr.h"
|
||||
#include "rblist.h"
|
||||
#include "pmu.h"
|
||||
#include <string.h>
|
||||
#include <stdbool.h>
|
||||
#include <errno.h>
|
||||
#include "pmu-events/pmu-events.h"
|
||||
#include "strbuf.h"
|
||||
#include "strlist.h"
|
||||
#include <assert.h>
|
||||
#include <ctype.h>
|
||||
|
||||
struct metric_event *metricgroup__lookup(struct rblist *metric_events,
|
||||
struct perf_evsel *evsel,
|
||||
bool create)
|
||||
{
|
||||
struct rb_node *nd;
|
||||
struct metric_event me = {
|
||||
.evsel = evsel
|
||||
};
|
||||
nd = rblist__find(metric_events, &me);
|
||||
if (nd)
|
||||
return container_of(nd, struct metric_event, nd);
|
||||
if (create) {
|
||||
rblist__add_node(metric_events, &me);
|
||||
nd = rblist__find(metric_events, &me);
|
||||
if (nd)
|
||||
return container_of(nd, struct metric_event, nd);
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static int metric_event_cmp(struct rb_node *rb_node, const void *entry)
|
||||
{
|
||||
struct metric_event *a = container_of(rb_node,
|
||||
struct metric_event,
|
||||
nd);
|
||||
const struct metric_event *b = entry;
|
||||
|
||||
if (a->evsel == b->evsel)
|
||||
return 0;
|
||||
if ((char *)a->evsel < (char *)b->evsel)
|
||||
return -1;
|
||||
return +1;
|
||||
}
|
||||
|
||||
static struct rb_node *metric_event_new(struct rblist *rblist __maybe_unused,
|
||||
const void *entry)
|
||||
{
|
||||
struct metric_event *me = malloc(sizeof(struct metric_event));
|
||||
|
||||
if (!me)
|
||||
return NULL;
|
||||
memcpy(me, entry, sizeof(struct metric_event));
|
||||
me->evsel = ((struct metric_event *)entry)->evsel;
|
||||
INIT_LIST_HEAD(&me->head);
|
||||
return &me->nd;
|
||||
}
|
||||
|
||||
static void metricgroup__rblist_init(struct rblist *metric_events)
|
||||
{
|
||||
rblist__init(metric_events);
|
||||
metric_events->node_cmp = metric_event_cmp;
|
||||
metric_events->node_new = metric_event_new;
|
||||
}
|
||||
|
||||
struct egroup {
|
||||
struct list_head nd;
|
||||
int idnum;
|
||||
const char **ids;
|
||||
const char *metric_name;
|
||||
const char *metric_expr;
|
||||
};
|
||||
|
||||
static struct perf_evsel *find_evsel(struct perf_evlist *perf_evlist,
|
||||
const char **ids,
|
||||
int idnum,
|
||||
struct perf_evsel **metric_events)
|
||||
{
|
||||
struct perf_evsel *ev, *start = NULL;
|
||||
int ind = 0;
|
||||
|
||||
evlist__for_each_entry (perf_evlist, ev) {
|
||||
if (!strcmp(ev->name, ids[ind])) {
|
||||
metric_events[ind] = ev;
|
||||
if (ind == 0)
|
||||
start = ev;
|
||||
if (++ind == idnum) {
|
||||
metric_events[ind] = NULL;
|
||||
return start;
|
||||
}
|
||||
} else {
|
||||
ind = 0;
|
||||
start = NULL;
|
||||
}
|
||||
}
|
||||
/*
|
||||
* This can happen when an alias expands to multiple
|
||||
* events, like for uncore events.
|
||||
* We don't support this case for now.
|
||||
*/
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static int metricgroup__setup_events(struct list_head *groups,
|
||||
struct perf_evlist *perf_evlist,
|
||||
struct rblist *metric_events_list)
|
||||
{
|
||||
struct metric_event *me;
|
||||
struct metric_expr *expr;
|
||||
int i = 0;
|
||||
int ret = 0;
|
||||
struct egroup *eg;
|
||||
struct perf_evsel *evsel;
|
||||
|
||||
list_for_each_entry (eg, groups, nd) {
|
||||
struct perf_evsel **metric_events;
|
||||
|
||||
metric_events = calloc(sizeof(void *), eg->idnum + 1);
|
||||
if (!metric_events) {
|
||||
ret = -ENOMEM;
|
||||
break;
|
||||
}
|
||||
evsel = find_evsel(perf_evlist, eg->ids, eg->idnum,
|
||||
metric_events);
|
||||
if (!evsel) {
|
||||
pr_debug("Cannot resolve %s: %s\n",
|
||||
eg->metric_name, eg->metric_expr);
|
||||
continue;
|
||||
}
|
||||
for (i = 0; i < eg->idnum; i++)
|
||||
metric_events[i]->collect_stat = true;
|
||||
me = metricgroup__lookup(metric_events_list, evsel, true);
|
||||
if (!me) {
|
||||
ret = -ENOMEM;
|
||||
break;
|
||||
}
|
||||
expr = malloc(sizeof(struct metric_expr));
|
||||
if (!expr) {
|
||||
ret = -ENOMEM;
|
||||
break;
|
||||
}
|
||||
expr->metric_expr = eg->metric_expr;
|
||||
expr->metric_name = eg->metric_name;
|
||||
expr->metric_events = metric_events;
|
||||
list_add(&expr->nd, &me->head);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
static bool match_metric(const char *n, const char *list)
|
||||
{
|
||||
int len;
|
||||
char *m;
|
||||
|
||||
if (!list)
|
||||
return false;
|
||||
if (!strcmp(list, "all"))
|
||||
return true;
|
||||
if (!n)
|
||||
return !strcasecmp(list, "No_group");
|
||||
len = strlen(list);
|
||||
m = strcasestr(n, list);
|
||||
if (!m)
|
||||
return false;
|
||||
if ((m == n || m[-1] == ';' || m[-1] == ' ') &&
|
||||
(m[len] == 0 || m[len] == ';'))
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
struct mep {
|
||||
struct rb_node nd;
|
||||
const char *name;
|
||||
struct strlist *metrics;
|
||||
};
|
||||
|
||||
static int mep_cmp(struct rb_node *rb_node, const void *entry)
|
||||
{
|
||||
struct mep *a = container_of(rb_node, struct mep, nd);
|
||||
struct mep *b = (struct mep *)entry;
|
||||
|
||||
return strcmp(a->name, b->name);
|
||||
}
|
||||
|
||||
static struct rb_node *mep_new(struct rblist *rl __maybe_unused,
|
||||
const void *entry)
|
||||
{
|
||||
struct mep *me = malloc(sizeof(struct mep));
|
||||
|
||||
if (!me)
|
||||
return NULL;
|
||||
memcpy(me, entry, sizeof(struct mep));
|
||||
me->name = strdup(me->name);
|
||||
if (!me->name)
|
||||
goto out_me;
|
||||
me->metrics = strlist__new(NULL, NULL);
|
||||
if (!me->metrics)
|
||||
goto out_name;
|
||||
return &me->nd;
|
||||
out_name:
|
||||
free((char *)me->name);
|
||||
out_me:
|
||||
free(me);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static struct mep *mep_lookup(struct rblist *groups, const char *name)
|
||||
{
|
||||
struct rb_node *nd;
|
||||
struct mep me = {
|
||||
.name = name
|
||||
};
|
||||
nd = rblist__find(groups, &me);
|
||||
if (nd)
|
||||
return container_of(nd, struct mep, nd);
|
||||
rblist__add_node(groups, &me);
|
||||
nd = rblist__find(groups, &me);
|
||||
if (nd)
|
||||
return container_of(nd, struct mep, nd);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static void mep_delete(struct rblist *rl __maybe_unused,
|
||||
struct rb_node *nd)
|
||||
{
|
||||
struct mep *me = container_of(nd, struct mep, nd);
|
||||
|
||||
strlist__delete(me->metrics);
|
||||
free((void *)me->name);
|
||||
free(me);
|
||||
}
|
||||
|
||||
static void metricgroup__print_strlist(struct strlist *metrics, bool raw)
|
||||
{
|
||||
struct str_node *sn;
|
||||
int n = 0;
|
||||
|
||||
strlist__for_each_entry (sn, metrics) {
|
||||
if (raw)
|
||||
printf("%s%s", n > 0 ? " " : "", sn->s);
|
||||
else
|
||||
printf(" %s\n", sn->s);
|
||||
n++;
|
||||
}
|
||||
if (raw)
|
||||
putchar('\n');
|
||||
}
|
||||
|
||||
void metricgroup__print(bool metrics, bool metricgroups, char *filter,
|
||||
bool raw)
|
||||
{
|
||||
struct pmu_events_map *map = perf_pmu__find_map();
|
||||
struct pmu_event *pe;
|
||||
int i;
|
||||
struct rblist groups;
|
||||
struct rb_node *node, *next;
|
||||
struct strlist *metriclist = NULL;
|
||||
|
||||
if (!map)
|
||||
return;
|
||||
|
||||
if (!metricgroups) {
|
||||
metriclist = strlist__new(NULL, NULL);
|
||||
if (!metriclist)
|
||||
return;
|
||||
}
|
||||
|
||||
rblist__init(&groups);
|
||||
groups.node_new = mep_new;
|
||||
groups.node_cmp = mep_cmp;
|
||||
groups.node_delete = mep_delete;
|
||||
for (i = 0; ; i++) {
|
||||
const char *g;
|
||||
pe = &map->table[i];
|
||||
|
||||
if (!pe->name && !pe->metric_group && !pe->metric_name)
|
||||
break;
|
||||
if (!pe->metric_expr)
|
||||
continue;
|
||||
g = pe->metric_group;
|
||||
if (!g && pe->metric_name) {
|
||||
if (pe->name)
|
||||
continue;
|
||||
g = "No_group";
|
||||
}
|
||||
if (g) {
|
||||
char *omg;
|
||||
char *mg = strdup(g);
|
||||
|
||||
if (!mg)
|
||||
return;
|
||||
omg = mg;
|
||||
while ((g = strsep(&mg, ";")) != NULL) {
|
||||
struct mep *me;
|
||||
char *s;
|
||||
|
||||
if (*g == 0)
|
||||
g = "No_group";
|
||||
while (isspace(*g))
|
||||
g++;
|
||||
if (filter && !strstr(g, filter))
|
||||
continue;
|
||||
if (raw)
|
||||
s = (char *)pe->metric_name;
|
||||
else {
|
||||
if (asprintf(&s, "%s\n\t[%s]",
|
||||
pe->metric_name, pe->desc) < 0)
|
||||
return;
|
||||
}
|
||||
|
||||
if (!s)
|
||||
continue;
|
||||
|
||||
if (!metricgroups) {
|
||||
strlist__add(metriclist, s);
|
||||
} else {
|
||||
me = mep_lookup(&groups, g);
|
||||
if (!me)
|
||||
continue;
|
||||
strlist__add(me->metrics, s);
|
||||
}
|
||||
}
|
||||
free(omg);
|
||||
}
|
||||
}
|
||||
|
||||
if (metricgroups && !raw)
|
||||
printf("\nMetric Groups:\n\n");
|
||||
else if (metrics && !raw)
|
||||
printf("\nMetrics:\n\n");
|
||||
|
||||
for (node = rb_first(&groups.entries); node; node = next) {
|
||||
struct mep *me = container_of(node, struct mep, nd);
|
||||
|
||||
if (metricgroups)
|
||||
printf("%s%s%s", me->name, metrics ? ":" : "", raw ? " " : "\n");
|
||||
if (metrics)
|
||||
metricgroup__print_strlist(me->metrics, raw);
|
||||
next = rb_next(node);
|
||||
rblist__remove_node(&groups, node);
|
||||
}
|
||||
if (!metricgroups)
|
||||
metricgroup__print_strlist(metriclist, raw);
|
||||
strlist__delete(metriclist);
|
||||
}
|
||||
|
||||
static int metricgroup__add_metric(const char *metric, struct strbuf *events,
|
||||
struct list_head *group_list)
|
||||
{
|
||||
struct pmu_events_map *map = perf_pmu__find_map();
|
||||
struct pmu_event *pe;
|
||||
int ret = -EINVAL;
|
||||
int i, j;
|
||||
|
||||
if (!map)
|
||||
return 0;
|
||||
|
||||
for (i = 0; ; i++) {
|
||||
pe = &map->table[i];
|
||||
|
||||
if (!pe->name && !pe->metric_group && !pe->metric_name)
|
||||
break;
|
||||
if (!pe->metric_expr)
|
||||
continue;
|
||||
if (match_metric(pe->metric_group, metric) ||
|
||||
match_metric(pe->metric_name, metric)) {
|
||||
const char **ids;
|
||||
int idnum;
|
||||
struct egroup *eg;
|
||||
|
||||
pr_debug("metric expr %s for %s\n", pe->metric_expr, pe->metric_name);
|
||||
|
||||
if (expr__find_other(pe->metric_expr,
|
||||
NULL, &ids, &idnum) < 0)
|
||||
continue;
|
||||
if (events->len > 0)
|
||||
strbuf_addf(events, ",");
|
||||
for (j = 0; j < idnum; j++) {
|
||||
pr_debug("found event %s\n", ids[j]);
|
||||
strbuf_addf(events, "%s%s",
|
||||
j == 0 ? "{" : ",",
|
||||
ids[j]);
|
||||
}
|
||||
strbuf_addf(events, "}:W");
|
||||
|
||||
eg = malloc(sizeof(struct egroup));
|
||||
if (!eg) {
|
||||
ret = -ENOMEM;
|
||||
break;
|
||||
}
|
||||
eg->ids = ids;
|
||||
eg->idnum = idnum;
|
||||
eg->metric_name = pe->metric_name;
|
||||
eg->metric_expr = pe->metric_expr;
|
||||
list_add_tail(&eg->nd, group_list);
|
||||
ret = 0;
|
||||
}
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int metricgroup__add_metric_list(const char *list, struct strbuf *events,
|
||||
struct list_head *group_list)
|
||||
{
|
||||
char *llist, *nlist, *p;
|
||||
int ret = -EINVAL;
|
||||
|
||||
nlist = strdup(list);
|
||||
if (!nlist)
|
||||
return -ENOMEM;
|
||||
llist = nlist;
|
||||
|
||||
strbuf_init(events, 100);
|
||||
strbuf_addf(events, "%s", "");
|
||||
|
||||
while ((p = strsep(&llist, ",")) != NULL) {
|
||||
ret = metricgroup__add_metric(p, events, group_list);
|
||||
if (ret == -EINVAL) {
|
||||
fprintf(stderr, "Cannot find metric or group `%s'\n",
|
||||
p);
|
||||
break;
|
||||
}
|
||||
}
|
||||
free(nlist);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void metricgroup__free_egroups(struct list_head *group_list)
|
||||
{
|
||||
struct egroup *eg, *egtmp;
|
||||
int i;
|
||||
|
||||
list_for_each_entry_safe (eg, egtmp, group_list, nd) {
|
||||
for (i = 0; i < eg->idnum; i++)
|
||||
free((char *)eg->ids[i]);
|
||||
free(eg->ids);
|
||||
free(eg);
|
||||
}
|
||||
}
|
||||
|
||||
int metricgroup__parse_groups(const struct option *opt,
|
||||
const char *str,
|
||||
struct rblist *metric_events)
|
||||
{
|
||||
struct parse_events_error parse_error;
|
||||
struct perf_evlist *perf_evlist = *(struct perf_evlist **)opt->value;
|
||||
struct strbuf extra_events;
|
||||
LIST_HEAD(group_list);
|
||||
int ret;
|
||||
|
||||
if (metric_events->nr_entries == 0)
|
||||
metricgroup__rblist_init(metric_events);
|
||||
ret = metricgroup__add_metric_list(str, &extra_events, &group_list);
|
||||
if (ret)
|
||||
return ret;
|
||||
pr_debug("adding %s\n", extra_events.buf);
|
||||
memset(&parse_error, 0, sizeof(struct parse_events_error));
|
||||
ret = parse_events(perf_evlist, extra_events.buf, &parse_error);
|
||||
if (ret) {
|
||||
parse_events_print_error(&parse_error, extra_events.buf);
|
||||
goto out;
|
||||
}
|
||||
strbuf_release(&extra_events);
|
||||
ret = metricgroup__setup_events(&group_list, perf_evlist,
|
||||
metric_events);
|
||||
out:
|
||||
metricgroup__free_egroups(&group_list);
|
||||
return ret;
|
||||
}
|
31
tools/perf/util/metricgroup.h
Normal file
31
tools/perf/util/metricgroup.h
Normal file
@ -0,0 +1,31 @@
|
||||
#ifndef METRICGROUP_H
|
||||
#define METRICGROUP_H 1
|
||||
|
||||
#include "linux/list.h"
|
||||
#include "rblist.h"
|
||||
#include <subcmd/parse-options.h>
|
||||
#include "evlist.h"
|
||||
#include "strbuf.h"
|
||||
|
||||
struct metric_event {
|
||||
struct rb_node nd;
|
||||
struct perf_evsel *evsel;
|
||||
struct list_head head; /* list of metric_expr */
|
||||
};
|
||||
|
||||
struct metric_expr {
|
||||
struct list_head nd;
|
||||
const char *metric_expr;
|
||||
const char *metric_name;
|
||||
struct perf_evsel **metric_events;
|
||||
};
|
||||
|
||||
struct metric_event *metricgroup__lookup(struct rblist *metric_events,
|
||||
struct perf_evsel *evsel,
|
||||
bool create);
|
||||
int metricgroup__parse_groups(const struct option *opt,
|
||||
const char *str,
|
||||
struct rblist *metric_events);
|
||||
|
||||
void metricgroup__print(bool metrics, bool groups, char *filter, bool raw);
|
||||
#endif
|
@ -11,6 +11,7 @@
|
||||
#include "event.h"
|
||||
#include <sys/types.h>
|
||||
#include <sys/stat.h>
|
||||
#include <fcntl.h>
|
||||
#include <limits.h>
|
||||
#include <sched.h>
|
||||
#include <stdlib.h>
|
||||
|
@ -28,6 +28,7 @@
|
||||
#include "probe-file.h"
|
||||
#include "asm/bug.h"
|
||||
#include "util/parse-branch-options.h"
|
||||
#include "metricgroup.h"
|
||||
|
||||
#define MAX_NAME_LEN 100
|
||||
|
||||
@ -1218,11 +1219,17 @@ static int __parse_events_add_pmu(struct parse_events_state *parse_state,
|
||||
struct perf_pmu_info info;
|
||||
struct perf_pmu *pmu;
|
||||
struct perf_evsel *evsel;
|
||||
struct parse_events_error *err = parse_state->error;
|
||||
LIST_HEAD(config_terms);
|
||||
|
||||
pmu = perf_pmu__find(name);
|
||||
if (!pmu)
|
||||
if (!pmu) {
|
||||
if (asprintf(&err->str,
|
||||
"Cannot find PMU `%s'. Missing kernel support?",
|
||||
name) < 0)
|
||||
err->str = NULL;
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (pmu->default_config) {
|
||||
memcpy(&attr, pmu->default_config,
|
||||
@ -1366,6 +1373,7 @@ struct event_modifier {
|
||||
int exclude_GH;
|
||||
int sample_read;
|
||||
int pinned;
|
||||
int weak;
|
||||
};
|
||||
|
||||
static int get_event_modifier(struct event_modifier *mod, char *str,
|
||||
@ -1384,6 +1392,7 @@ static int get_event_modifier(struct event_modifier *mod, char *str,
|
||||
|
||||
int exclude = eu | ek | eh;
|
||||
int exclude_GH = evsel ? evsel->exclude_GH : 0;
|
||||
int weak = 0;
|
||||
|
||||
memset(mod, 0, sizeof(*mod));
|
||||
|
||||
@ -1421,6 +1430,8 @@ static int get_event_modifier(struct event_modifier *mod, char *str,
|
||||
sample_read = 1;
|
||||
} else if (*str == 'D') {
|
||||
pinned = 1;
|
||||
} else if (*str == 'W') {
|
||||
weak = 1;
|
||||
} else
|
||||
break;
|
||||
|
||||
@ -1451,6 +1462,7 @@ static int get_event_modifier(struct event_modifier *mod, char *str,
|
||||
mod->exclude_GH = exclude_GH;
|
||||
mod->sample_read = sample_read;
|
||||
mod->pinned = pinned;
|
||||
mod->weak = weak;
|
||||
|
||||
return 0;
|
||||
}
|
||||
@ -1464,7 +1476,7 @@ static int check_modifier(char *str)
|
||||
char *p = str;
|
||||
|
||||
/* The sizeof includes 0 byte as well. */
|
||||
if (strlen(str) > (sizeof("ukhGHpppPSDI") - 1))
|
||||
if (strlen(str) > (sizeof("ukhGHpppPSDIW") - 1))
|
||||
return -1;
|
||||
|
||||
while (*p) {
|
||||
@ -1504,6 +1516,7 @@ int parse_events__modifier_event(struct list_head *list, char *str, bool add)
|
||||
evsel->exclude_GH = mod.exclude_GH;
|
||||
evsel->sample_read = mod.sample_read;
|
||||
evsel->precise_max = mod.precise_max;
|
||||
evsel->weak_group = mod.weak;
|
||||
|
||||
if (perf_evsel__is_group_leader(evsel))
|
||||
evsel->attr.pinned = mod.pinned;
|
||||
@ -1726,8 +1739,8 @@ static int get_term_width(void)
|
||||
return ws.ws_col > MAX_WIDTH ? MAX_WIDTH : ws.ws_col;
|
||||
}
|
||||
|
||||
static void parse_events_print_error(struct parse_events_error *err,
|
||||
const char *event)
|
||||
void parse_events_print_error(struct parse_events_error *err,
|
||||
const char *event)
|
||||
{
|
||||
const char *str = "invalid or unsupported event: ";
|
||||
char _buf[MAX_WIDTH];
|
||||
@ -1782,8 +1795,6 @@ static void parse_events_print_error(struct parse_events_error *err,
|
||||
zfree(&err->str);
|
||||
zfree(&err->help);
|
||||
}
|
||||
|
||||
fprintf(stderr, "Run 'perf list' for a list of valid events\n");
|
||||
}
|
||||
|
||||
#undef MAX_WIDTH
|
||||
@ -1795,8 +1806,10 @@ int parse_events_option(const struct option *opt, const char *str,
|
||||
struct parse_events_error err = { .idx = 0, };
|
||||
int ret = parse_events(evlist, str, &err);
|
||||
|
||||
if (ret)
|
||||
if (ret) {
|
||||
parse_events_print_error(&err, str);
|
||||
fprintf(stderr, "Run 'perf list' for a list of valid events\n");
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
@ -2374,6 +2387,8 @@ void print_events(const char *event_glob, bool name_only, bool quiet_flag,
|
||||
print_tracepoint_events(NULL, NULL, name_only);
|
||||
|
||||
print_sdt_events(NULL, NULL, name_only);
|
||||
|
||||
metricgroup__print(true, true, NULL, name_only);
|
||||
}
|
||||
|
||||
int parse_events__is_hardcoded_term(struct parse_events_term *term)
|
||||
|
@ -202,6 +202,9 @@ int is_valid_tracepoint(const char *event_string);
|
||||
int valid_event_mount(const char *eventfs);
|
||||
char *parse_events_formats_error_string(char *additional_terms);
|
||||
|
||||
void parse_events_print_error(struct parse_events_error *err,
|
||||
const char *event);
|
||||
|
||||
#ifdef HAVE_LIBELF_SUPPORT
|
||||
/*
|
||||
* If the probe point starts with '%',
|
||||
|
@ -161,7 +161,7 @@ name [a-zA-Z_*?][a-zA-Z0-9_*?.]*
|
||||
name_minus [a-zA-Z_*?][a-zA-Z0-9\-_*?.:]*
|
||||
drv_cfg_term [a-zA-Z0-9_\.]+(=[a-zA-Z0-9_*?\.:]+)?
|
||||
/* If you add a modifier you need to update check_modifier() */
|
||||
modifier_event [ukhpPGHSDI]+
|
||||
modifier_event [ukhpPGHSDIW]+
|
||||
modifier_bp [rwx]{1,3}
|
||||
|
||||
%%
|
||||
@ -288,6 +288,7 @@ cpu-migrations|migrations { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COU
|
||||
alignment-faults { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_ALIGNMENT_FAULTS); }
|
||||
emulation-faults { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_EMULATION_FAULTS); }
|
||||
dummy { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_DUMMY); }
|
||||
duration_time { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_DUMMY); }
|
||||
bpf-output { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_BPF_OUTPUT); }
|
||||
|
||||
/*
|
||||
|
@ -2,6 +2,7 @@
|
||||
#include <linux/compiler.h>
|
||||
#include <sys/types.h>
|
||||
#include <errno.h>
|
||||
#include <fcntl.h>
|
||||
#include <sys/stat.h>
|
||||
#include <unistd.h>
|
||||
#include <stdio.h>
|
||||
@ -516,6 +517,47 @@ char * __weak get_cpuid_str(void)
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static char *perf_pmu__getcpuid(void)
|
||||
{
|
||||
char *cpuid;
|
||||
static bool printed;
|
||||
|
||||
cpuid = getenv("PERF_CPUID");
|
||||
if (cpuid)
|
||||
cpuid = strdup(cpuid);
|
||||
if (!cpuid)
|
||||
cpuid = get_cpuid_str();
|
||||
if (!cpuid)
|
||||
return NULL;
|
||||
|
||||
if (!printed) {
|
||||
pr_debug("Using CPUID %s\n", cpuid);
|
||||
printed = true;
|
||||
}
|
||||
return cpuid;
|
||||
}
|
||||
|
||||
struct pmu_events_map *perf_pmu__find_map(void)
|
||||
{
|
||||
struct pmu_events_map *map;
|
||||
char *cpuid = perf_pmu__getcpuid();
|
||||
int i;
|
||||
|
||||
i = 0;
|
||||
for (;;) {
|
||||
map = &pmu_events_map[i++];
|
||||
if (!map->table) {
|
||||
map = NULL;
|
||||
break;
|
||||
}
|
||||
|
||||
if (!strcmp(map->cpuid, cpuid))
|
||||
break;
|
||||
}
|
||||
free(cpuid);
|
||||
return map;
|
||||
}
|
||||
|
||||
/*
|
||||
* From the pmu_events_map, find the table of PMU events that corresponds
|
||||
* to the current running CPU. Then, add all PMU events from that table
|
||||
@ -526,32 +568,11 @@ static void pmu_add_cpu_aliases(struct list_head *head, const char *name)
|
||||
int i;
|
||||
struct pmu_events_map *map;
|
||||
struct pmu_event *pe;
|
||||
char *cpuid;
|
||||
static bool printed;
|
||||
|
||||
cpuid = getenv("PERF_CPUID");
|
||||
if (cpuid)
|
||||
cpuid = strdup(cpuid);
|
||||
if (!cpuid)
|
||||
cpuid = get_cpuid_str();
|
||||
if (!cpuid)
|
||||
map = perf_pmu__find_map();
|
||||
if (!map)
|
||||
return;
|
||||
|
||||
if (!printed) {
|
||||
pr_debug("Using CPUID %s\n", cpuid);
|
||||
printed = true;
|
||||
}
|
||||
|
||||
i = 0;
|
||||
while (1) {
|
||||
map = &pmu_events_map[i++];
|
||||
if (!map->table)
|
||||
goto out;
|
||||
|
||||
if (!strcmp(map->cpuid, cpuid))
|
||||
break;
|
||||
}
|
||||
|
||||
/*
|
||||
* Found a matching PMU events table. Create aliases
|
||||
*/
|
||||
@ -560,8 +581,11 @@ static void pmu_add_cpu_aliases(struct list_head *head, const char *name)
|
||||
const char *pname;
|
||||
|
||||
pe = &map->table[i++];
|
||||
if (!pe->name)
|
||||
if (!pe->name) {
|
||||
if (pe->metric_group || pe->metric_name)
|
||||
continue;
|
||||
break;
|
||||
}
|
||||
|
||||
pname = pe->pmu ? pe->pmu : "cpu";
|
||||
if (strncmp(pname, name, strlen(pname)))
|
||||
@ -575,9 +599,6 @@ static void pmu_add_cpu_aliases(struct list_head *head, const char *name)
|
||||
(char *)pe->metric_expr,
|
||||
(char *)pe->metric_name);
|
||||
}
|
||||
|
||||
out:
|
||||
free(cpuid);
|
||||
}
|
||||
|
||||
struct perf_event_attr * __weak
|
||||
|
@ -90,4 +90,6 @@ int perf_pmu__test(void);
|
||||
|
||||
struct perf_event_attr *perf_pmu__get_default_config(struct perf_pmu *pmu);
|
||||
|
||||
struct pmu_events_map *perf_pmu__find_map(void);
|
||||
|
||||
#endif /* __PMU_H */
|
||||
|
@ -15,6 +15,7 @@
|
||||
*
|
||||
*/
|
||||
#include <errno.h>
|
||||
#include <fcntl.h>
|
||||
#include <sys/stat.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/uio.h>
|
||||
|
@ -143,7 +143,8 @@ struct __name##_sorted *__name = __name##_sorted__new
|
||||
__ilist->rblist.nr_entries)
|
||||
|
||||
/* For 'struct machine->threads' */
|
||||
#define DECLARE_RESORT_RB_MACHINE_THREADS(__name, __machine) \
|
||||
DECLARE_RESORT_RB(__name)(&__machine->threads, __machine->nr_threads)
|
||||
#define DECLARE_RESORT_RB_MACHINE_THREADS(__name, __machine, hash_bucket) \
|
||||
DECLARE_RESORT_RB(__name)(&__machine->threads[hash_bucket].entries, \
|
||||
__machine->threads[hash_bucket].nr)
|
||||
|
||||
#endif /* _PERF_RESORT_RB_H_ */
|
||||
|
32
tools/perf/util/rwsem.c
Normal file
32
tools/perf/util/rwsem.c
Normal file
@ -0,0 +1,32 @@
|
||||
#include "util.h"
|
||||
#include "rwsem.h"
|
||||
|
||||
int init_rwsem(struct rw_semaphore *sem)
|
||||
{
|
||||
return pthread_rwlock_init(&sem->lock, NULL);
|
||||
}
|
||||
|
||||
int exit_rwsem(struct rw_semaphore *sem)
|
||||
{
|
||||
return pthread_rwlock_destroy(&sem->lock);
|
||||
}
|
||||
|
||||
int down_read(struct rw_semaphore *sem)
|
||||
{
|
||||
return perf_singlethreaded ? 0 : pthread_rwlock_rdlock(&sem->lock);
|
||||
}
|
||||
|
||||
int up_read(struct rw_semaphore *sem)
|
||||
{
|
||||
return perf_singlethreaded ? 0 : pthread_rwlock_unlock(&sem->lock);
|
||||
}
|
||||
|
||||
int down_write(struct rw_semaphore *sem)
|
||||
{
|
||||
return perf_singlethreaded ? 0 : pthread_rwlock_wrlock(&sem->lock);
|
||||
}
|
||||
|
||||
int up_write(struct rw_semaphore *sem)
|
||||
{
|
||||
return perf_singlethreaded ? 0 : pthread_rwlock_unlock(&sem->lock);
|
||||
}
|
19
tools/perf/util/rwsem.h
Normal file
19
tools/perf/util/rwsem.h
Normal file
@ -0,0 +1,19 @@
|
||||
#ifndef _PERF_RWSEM_H
|
||||
#define _PERF_RWSEM_H
|
||||
|
||||
#include <pthread.h>
|
||||
|
||||
struct rw_semaphore {
|
||||
pthread_rwlock_t lock;
|
||||
};
|
||||
|
||||
int init_rwsem(struct rw_semaphore *sem);
|
||||
int exit_rwsem(struct rw_semaphore *sem);
|
||||
|
||||
int down_read(struct rw_semaphore *sem);
|
||||
int up_read(struct rw_semaphore *sem);
|
||||
|
||||
int down_write(struct rw_semaphore *sem);
|
||||
int up_write(struct rw_semaphore *sem);
|
||||
|
||||
#endif /* _PERF_RWSEM_H */
|
@ -1847,7 +1847,7 @@ static int __perf_session__process_events(struct perf_session *session,
|
||||
if (data_offset + data_size < file_size)
|
||||
file_size = data_offset + data_size;
|
||||
|
||||
ui_progress__init(&prog, file_size, "Processing events...");
|
||||
ui_progress__init_size(&prog, file_size, "Processing events...");
|
||||
|
||||
mmap_size = MMAP_SIZE;
|
||||
if (mmap_size > file_size) {
|
||||
|
@ -6,6 +6,7 @@
|
||||
#include "rblist.h"
|
||||
#include "evlist.h"
|
||||
#include "expr.h"
|
||||
#include "metricgroup.h"
|
||||
|
||||
enum {
|
||||
CTX_BIT_USER = 1 << 0,
|
||||
@ -55,7 +56,6 @@ struct saved_value {
|
||||
struct rb_node rb_node;
|
||||
struct perf_evsel *evsel;
|
||||
int cpu;
|
||||
int ctx;
|
||||
struct stats stats;
|
||||
};
|
||||
|
||||
@ -66,8 +66,6 @@ static int saved_value_cmp(struct rb_node *rb_node, const void *entry)
|
||||
rb_node);
|
||||
const struct saved_value *b = entry;
|
||||
|
||||
if (a->ctx != b->ctx)
|
||||
return a->ctx - b->ctx;
|
||||
if (a->cpu != b->cpu)
|
||||
return a->cpu - b->cpu;
|
||||
if (a->evsel == b->evsel)
|
||||
@ -89,13 +87,12 @@ static struct rb_node *saved_value_new(struct rblist *rblist __maybe_unused,
|
||||
}
|
||||
|
||||
static struct saved_value *saved_value_lookup(struct perf_evsel *evsel,
|
||||
int cpu, int ctx,
|
||||
int cpu,
|
||||
bool create)
|
||||
{
|
||||
struct rb_node *nd;
|
||||
struct saved_value dm = {
|
||||
.cpu = cpu,
|
||||
.ctx = ctx,
|
||||
.evsel = evsel,
|
||||
};
|
||||
nd = rblist__find(&runtime_saved_values, &dm);
|
||||
@ -231,8 +228,7 @@ void perf_stat__update_shadow_stats(struct perf_evsel *counter, u64 *count,
|
||||
update_stats(&runtime_aperf_stats[ctx][cpu], count[0]);
|
||||
|
||||
if (counter->collect_stat) {
|
||||
struct saved_value *v = saved_value_lookup(counter, cpu, ctx,
|
||||
true);
|
||||
struct saved_value *v = saved_value_lookup(counter, cpu, true);
|
||||
update_stats(&v->stats, count[0]);
|
||||
}
|
||||
}
|
||||
@ -627,15 +623,68 @@ static void print_smi_cost(int cpu, struct perf_evsel *evsel,
|
||||
out->print_metric(out->ctx, NULL, "%4.0f", "SMI#", smi_num);
|
||||
}
|
||||
|
||||
static void generic_metric(const char *metric_expr,
|
||||
struct perf_evsel **metric_events,
|
||||
char *name,
|
||||
const char *metric_name,
|
||||
double avg,
|
||||
int cpu,
|
||||
struct perf_stat_output_ctx *out)
|
||||
{
|
||||
print_metric_t print_metric = out->print_metric;
|
||||
struct parse_ctx pctx;
|
||||
double ratio;
|
||||
int i;
|
||||
void *ctxp = out->ctx;
|
||||
|
||||
expr__ctx_init(&pctx);
|
||||
expr__add_id(&pctx, name, avg);
|
||||
for (i = 0; metric_events[i]; i++) {
|
||||
struct saved_value *v;
|
||||
struct stats *stats;
|
||||
double scale;
|
||||
|
||||
if (!strcmp(metric_events[i]->name, "duration_time")) {
|
||||
stats = &walltime_nsecs_stats;
|
||||
scale = 1e-9;
|
||||
} else {
|
||||
v = saved_value_lookup(metric_events[i], cpu, false);
|
||||
if (!v)
|
||||
break;
|
||||
stats = &v->stats;
|
||||
scale = 1.0;
|
||||
}
|
||||
expr__add_id(&pctx, metric_events[i]->name, avg_stats(stats)*scale);
|
||||
}
|
||||
if (!metric_events[i]) {
|
||||
const char *p = metric_expr;
|
||||
|
||||
if (expr__parse(&ratio, &pctx, &p) == 0)
|
||||
print_metric(ctxp, NULL, "%8.1f",
|
||||
metric_name ?
|
||||
metric_name :
|
||||
out->force_header ? name : "",
|
||||
ratio);
|
||||
else
|
||||
print_metric(ctxp, NULL, NULL,
|
||||
out->force_header ?
|
||||
(metric_name ? metric_name : name) : "", 0);
|
||||
} else
|
||||
print_metric(ctxp, NULL, NULL, "", 0);
|
||||
}
|
||||
|
||||
void perf_stat__print_shadow_stats(struct perf_evsel *evsel,
|
||||
double avg, int cpu,
|
||||
struct perf_stat_output_ctx *out)
|
||||
struct perf_stat_output_ctx *out,
|
||||
struct rblist *metric_events)
|
||||
{
|
||||
void *ctxp = out->ctx;
|
||||
print_metric_t print_metric = out->print_metric;
|
||||
double total, ratio = 0.0, total2;
|
||||
const char *color = NULL;
|
||||
int ctx = evsel_context(evsel);
|
||||
struct metric_event *me;
|
||||
int num = 1;
|
||||
|
||||
if (perf_evsel__match(evsel, HARDWARE, HW_INSTRUCTIONS)) {
|
||||
total = avg_stats(&runtime_cycles_stats[ctx][cpu]);
|
||||
@ -819,33 +868,8 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel,
|
||||
else
|
||||
print_metric(ctxp, NULL, NULL, name, 0);
|
||||
} else if (evsel->metric_expr) {
|
||||
struct parse_ctx pctx;
|
||||
int i;
|
||||
|
||||
expr__ctx_init(&pctx);
|
||||
expr__add_id(&pctx, evsel->name, avg);
|
||||
for (i = 0; evsel->metric_events[i]; i++) {
|
||||
struct saved_value *v;
|
||||
|
||||
v = saved_value_lookup(evsel->metric_events[i], cpu, ctx, false);
|
||||
if (!v)
|
||||
break;
|
||||
expr__add_id(&pctx, evsel->metric_events[i]->name,
|
||||
avg_stats(&v->stats));
|
||||
}
|
||||
if (!evsel->metric_events[i]) {
|
||||
const char *p = evsel->metric_expr;
|
||||
|
||||
if (expr__parse(&ratio, &pctx, &p) == 0)
|
||||
print_metric(ctxp, NULL, "%8.1f",
|
||||
evsel->metric_name ?
|
||||
evsel->metric_name :
|
||||
out->force_header ? evsel->name : "",
|
||||
ratio);
|
||||
else
|
||||
print_metric(ctxp, NULL, NULL, "", 0);
|
||||
} else
|
||||
print_metric(ctxp, NULL, NULL, "", 0);
|
||||
generic_metric(evsel->metric_expr, evsel->metric_events, evsel->name,
|
||||
evsel->metric_name, avg, cpu, out);
|
||||
} else if (runtime_nsecs_stats[cpu].n != 0) {
|
||||
char unit = 'M';
|
||||
char unit_buf[10];
|
||||
@ -863,6 +887,20 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel,
|
||||
} else if (perf_stat_evsel__is(evsel, SMI_NUM)) {
|
||||
print_smi_cost(cpu, evsel, out);
|
||||
} else {
|
||||
print_metric(ctxp, NULL, NULL, NULL, 0);
|
||||
num = 0;
|
||||
}
|
||||
|
||||
if ((me = metricgroup__lookup(metric_events, evsel, false)) != NULL) {
|
||||
struct metric_expr *mexp;
|
||||
|
||||
list_for_each_entry (mexp, &me->head, nd) {
|
||||
if (num++ > 0)
|
||||
out->new_line(ctxp);
|
||||
generic_metric(mexp->metric_expr, mexp->metric_events,
|
||||
evsel->name, mexp->metric_name,
|
||||
avg, cpu, out);
|
||||
}
|
||||
}
|
||||
if (num == 0)
|
||||
print_metric(ctxp, NULL, NULL, NULL, 0);
|
||||
}
|
||||
|
@ -91,9 +91,11 @@ struct perf_stat_output_ctx {
|
||||
bool force_header;
|
||||
};
|
||||
|
||||
struct rblist;
|
||||
void perf_stat__print_shadow_stats(struct perf_evsel *evsel,
|
||||
double avg, int cpu,
|
||||
struct perf_stat_output_ctx *out);
|
||||
struct perf_stat_output_ctx *out,
|
||||
struct rblist *metric_events);
|
||||
void perf_stat__collect_metric_expr(struct perf_evlist *);
|
||||
|
||||
int perf_evlist__alloc_stats(struct perf_evlist *evlist, bool alloc_raw);
|
||||
|
@ -226,7 +226,7 @@ void __map_groups__fixup_end(struct map_groups *mg, enum map_type type)
|
||||
struct maps *maps = &mg->maps[type];
|
||||
struct map *next, *curr;
|
||||
|
||||
pthread_rwlock_wrlock(&maps->lock);
|
||||
down_write(&maps->lock);
|
||||
|
||||
curr = maps__first(maps);
|
||||
if (curr == NULL)
|
||||
@ -246,7 +246,7 @@ void __map_groups__fixup_end(struct map_groups *mg, enum map_type type)
|
||||
curr->end = ~0ULL;
|
||||
|
||||
out_unlock:
|
||||
pthread_rwlock_unlock(&maps->lock);
|
||||
up_write(&maps->lock);
|
||||
}
|
||||
|
||||
struct symbol *symbol__new(u64 start, u64 len, u8 binding, const char *name)
|
||||
@ -1671,7 +1671,7 @@ struct map *map_groups__find_by_name(struct map_groups *mg,
|
||||
struct maps *maps = &mg->maps[type];
|
||||
struct map *map;
|
||||
|
||||
pthread_rwlock_rdlock(&maps->lock);
|
||||
down_read(&maps->lock);
|
||||
|
||||
for (map = maps__first(maps); map; map = map__next(map)) {
|
||||
if (map->dso && strcmp(map->dso->short_name, name) == 0)
|
||||
@ -1681,7 +1681,7 @@ struct map *map_groups__find_by_name(struct map_groups *mg,
|
||||
map = NULL;
|
||||
|
||||
out_unlock:
|
||||
pthread_rwlock_unlock(&maps->lock);
|
||||
up_read(&maps->lock);
|
||||
return map;
|
||||
}
|
||||
|
||||
|
@ -264,7 +264,7 @@ static int __thread__prepare_access(struct thread *thread)
|
||||
struct maps *maps = &thread->mg->maps[i];
|
||||
struct map *map;
|
||||
|
||||
pthread_rwlock_rdlock(&maps->lock);
|
||||
down_read(&maps->lock);
|
||||
|
||||
for (map = maps__first(maps); map; map = map__next(map)) {
|
||||
err = unwind__prepare_access(thread, map, &initialized);
|
||||
@ -272,7 +272,7 @@ static int __thread__prepare_access(struct thread *thread)
|
||||
break;
|
||||
}
|
||||
|
||||
pthread_rwlock_unlock(&maps->lock);
|
||||
up_read(&maps->lock);
|
||||
}
|
||||
|
||||
return err;
|
||||
|
@ -28,7 +28,6 @@
|
||||
#include <sys/types.h>
|
||||
#include <sys/stat.h>
|
||||
#include <sys/wait.h>
|
||||
#include <pthread.h>
|
||||
#include <fcntl.h>
|
||||
#include <unistd.h>
|
||||
#include <errno.h>
|
||||
|
@ -27,7 +27,6 @@
|
||||
#include <sys/stat.h>
|
||||
#include <sys/wait.h>
|
||||
#include <sys/mman.h>
|
||||
#include <pthread.h>
|
||||
#include <fcntl.h>
|
||||
#include <unistd.h>
|
||||
#include <errno.h>
|
||||
|
@ -6,6 +6,7 @@
|
||||
#include <sys/stat.h>
|
||||
#include <sys/utsname.h>
|
||||
#include <dirent.h>
|
||||
#include <fcntl.h>
|
||||
#include <inttypes.h>
|
||||
#include <signal.h>
|
||||
#include <stdio.h>
|
||||
@ -22,6 +23,19 @@
|
||||
/*
|
||||
* XXX We need to find a better place for these things...
|
||||
*/
|
||||
|
||||
bool perf_singlethreaded = true;
|
||||
|
||||
void perf_set_singlethreaded(void)
|
||||
{
|
||||
perf_singlethreaded = true;
|
||||
}
|
||||
|
||||
void perf_set_multithreaded(void)
|
||||
{
|
||||
perf_singlethreaded = false;
|
||||
}
|
||||
|
||||
unsigned int page_size;
|
||||
int cacheline_size;
|
||||
|
||||
@ -174,7 +188,7 @@ static int slow_copyfile(const char *from, const char *to, struct nsinfo *nsi)
|
||||
return err;
|
||||
}
|
||||
|
||||
int copyfile_offset(int ifd, loff_t off_in, int ofd, loff_t off_out, u64 size)
|
||||
static int copyfile_offset(int ifd, loff_t off_in, int ofd, loff_t off_out, u64 size)
|
||||
{
|
||||
void *ptr;
|
||||
loff_t pgoff;
|
||||
|
@ -5,7 +5,6 @@
|
||||
/* glibc 2.20 deprecates _BSD_SOURCE in favour of _DEFAULT_SOURCE */
|
||||
#define _DEFAULT_SOURCE 1
|
||||
|
||||
#include <fcntl.h>
|
||||
#include <stdbool.h>
|
||||
#include <stddef.h>
|
||||
#include <stdlib.h>
|
||||
@ -35,7 +34,6 @@ bool lsdir_no_dot_filter(const char *name, struct dirent *d);
|
||||
int copyfile(const char *from, const char *to);
|
||||
int copyfile_mode(const char *from, const char *to, mode_t mode);
|
||||
int copyfile_ns(const char *from, const char *to, struct nsinfo *nsi);
|
||||
int copyfile_offset(int fromfd, loff_t from_ofs, int tofd, loff_t to_ofs, u64 size);
|
||||
|
||||
ssize_t readn(int fd, void *buf, size_t n);
|
||||
ssize_t writen(int fd, const void *buf, size_t n);
|
||||
@ -64,4 +62,9 @@ int sched_getcpu(void);
|
||||
int setns(int fd, int nstype);
|
||||
#endif
|
||||
|
||||
extern bool perf_singlethreaded;
|
||||
|
||||
void perf_set_singlethreaded(void);
|
||||
void perf_set_multithreaded(void);
|
||||
|
||||
#endif /* GIT_COMPAT_UTIL_H */
|
||||
|
@ -319,7 +319,7 @@ struct dso *machine__findnew_vdso(struct machine *machine,
|
||||
struct vdso_info *vdso_info;
|
||||
struct dso *dso = NULL;
|
||||
|
||||
pthread_rwlock_wrlock(&machine->dsos.lock);
|
||||
down_write(&machine->dsos.lock);
|
||||
if (!machine->vdso_info)
|
||||
machine->vdso_info = vdso_info__new();
|
||||
|
||||
@ -347,7 +347,7 @@ struct dso *machine__findnew_vdso(struct machine *machine,
|
||||
|
||||
out_unlock:
|
||||
dso__get(dso);
|
||||
pthread_rwlock_unlock(&machine->dsos.lock);
|
||||
up_write(&machine->dsos.lock);
|
||||
return dso;
|
||||
}
|
||||
|
||||
|
@ -1,3 +1,4 @@
|
||||
#include <fcntl.h>
|
||||
#include <stdio.h>
|
||||
#include <unistd.h>
|
||||
#include <sys/stat.h>
|
||||
|
Loading…
Reference in New Issue
Block a user