mirror of
https://github.com/AuxXxilium/linux_dsm_epyc7002.git
synced 2024-11-24 02:50:53 +07:00
Merge branch 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull perf updates from Ingo Molnar: "Bigger kernel side changes: - Add backwards writing capability to the perf ring-buffer code, which is preparation for future advanced features like robust 'overwrite support' and snapshot mode. (Wang Nan) - Add pause and resume ioctls for the perf ringbuffer (Wang Nan) - x86 Intel cstate code cleanups and reorgnization (Thomas Gleixner) - x86 Intel uncore and CPU PMU driver updates (Kan Liang, Peter Zijlstra) - x86 AUX (Intel PT) related enhancements and updates (Alexander Shishkin) - x86 MSR PMU driver enhancements and updates (Huang Rui) - ... and lots of other changes spread out over 40+ commits. Biggest tooling side changes: - 'perf trace' features and enhancements. (Arnaldo Carvalho de Melo) - BPF tooling updates (Wang Nan) - 'perf sched' updates (Jiri Olsa) - 'perf probe' updates (Masami Hiramatsu) - ... plus 200+ other enhancements, fixes and cleanups to tools/ The merge commits, the shortlog and the changelogs contain a lot more details" * 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (249 commits) perf/core: Disable the event on a truncated AUX record perf/x86/intel/pt: Generate PMI in the STOP region as well perf buildid-cache: Use lsdir() for looking up buildid caches perf symbols: Use lsdir() for the search in kcore cache directory perf tools: Use SBUILD_ID_SIZE where applicable perf tools: Fix lsdir to set errno correctly perf trace: Move seccomp args beautifiers to tools/perf/trace/beauty/ perf trace: Move flock op beautifier to tools/perf/trace/beauty/ perf build: Add build-test for debug-frame on arm/arm64 perf build: Add build-test for libunwind cross-platforms support perf script: Fix export of callchains with recursion in db-export perf script: Fix callchain addresses in db-export perf script: Fix symbol insertion behavior in db-export perf symbols: Add dso__insert_symbol function perf scripting python: Use Py_FatalError instead of die() perf tools: Remove xrealloc and ALLOC_GROW perf help: Do not use ALLOC_GROW in add_cmd_list perf pmu: Make pmu_formats_string to check return value of strbuf perf header: Make topology checkers to check return value of strbuf perf tools: Make alias handler to check return value of strbuf ...
This commit is contained in:
commit
36db171cc7
@ -60,6 +60,7 @@ show up in /proc/sys/kernel:
|
||||
- panic_on_warn
|
||||
- perf_cpu_time_max_percent
|
||||
- perf_event_paranoid
|
||||
- perf_event_max_stack
|
||||
- pid_max
|
||||
- powersave-nap [ PPC only ]
|
||||
- printk
|
||||
@ -654,6 +655,19 @@ users (without CAP_SYS_ADMIN). The default value is 2.
|
||||
|
||||
==============================================================
|
||||
|
||||
perf_event_max_stack:
|
||||
|
||||
Controls maximum number of stack frames to copy for (attr.sample_type &
|
||||
PERF_SAMPLE_CALLCHAIN) configured events, for instance, when using
|
||||
'perf record -g' or 'perf trace --call-graph fp'.
|
||||
|
||||
This can only be done when no events are in use that have callchains
|
||||
enabled, otherwise writing to this file will return -EBUSY.
|
||||
|
||||
The default value is 127.
|
||||
|
||||
==============================================================
|
||||
|
||||
pid_max:
|
||||
|
||||
PID allocation wrap value. When the kernel's next PID value
|
||||
|
@ -631,7 +631,7 @@ int arch_validate_hwbkpt_settings(struct perf_event *bp)
|
||||
info->address &= ~alignment_mask;
|
||||
info->ctrl.len <<= offset;
|
||||
|
||||
if (!bp->overflow_handler) {
|
||||
if (is_default_overflow_handler(bp)) {
|
||||
/*
|
||||
* Mismatch breakpoints are required for single-stepping
|
||||
* breakpoints.
|
||||
@ -754,7 +754,7 @@ static void watchpoint_handler(unsigned long addr, unsigned int fsr,
|
||||
* mismatch breakpoint so we can single-step over the
|
||||
* watchpoint trigger.
|
||||
*/
|
||||
if (!wp->overflow_handler)
|
||||
if (is_default_overflow_handler(wp))
|
||||
enable_single_step(wp, instruction_pointer(regs));
|
||||
|
||||
unlock:
|
||||
|
@ -75,7 +75,7 @@ perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs)
|
||||
|
||||
tail = (struct frame_tail __user *)regs->ARM_fp - 1;
|
||||
|
||||
while ((entry->nr < PERF_MAX_STACK_DEPTH) &&
|
||||
while ((entry->nr < sysctl_perf_event_max_stack) &&
|
||||
tail && !((unsigned long)tail & 0x3))
|
||||
tail = user_backtrace(tail, entry);
|
||||
}
|
||||
|
@ -616,7 +616,7 @@ static int breakpoint_handler(unsigned long unused, unsigned int esr,
|
||||
perf_bp_event(bp, regs);
|
||||
|
||||
/* Do we need to handle the stepping? */
|
||||
if (!bp->overflow_handler)
|
||||
if (is_default_overflow_handler(bp))
|
||||
step = 1;
|
||||
unlock:
|
||||
rcu_read_unlock();
|
||||
@ -712,7 +712,7 @@ static int watchpoint_handler(unsigned long addr, unsigned int esr,
|
||||
perf_bp_event(wp, regs);
|
||||
|
||||
/* Do we need to handle the stepping? */
|
||||
if (!wp->overflow_handler)
|
||||
if (is_default_overflow_handler(wp))
|
||||
step = 1;
|
||||
|
||||
unlock:
|
||||
|
@ -122,7 +122,7 @@ void perf_callchain_user(struct perf_callchain_entry *entry,
|
||||
|
||||
tail = (struct frame_tail __user *)regs->regs[29];
|
||||
|
||||
while (entry->nr < PERF_MAX_STACK_DEPTH &&
|
||||
while (entry->nr < sysctl_perf_event_max_stack &&
|
||||
tail && !((unsigned long)tail & 0xf))
|
||||
tail = user_backtrace(tail, entry);
|
||||
} else {
|
||||
@ -132,7 +132,7 @@ void perf_callchain_user(struct perf_callchain_entry *entry,
|
||||
|
||||
tail = (struct compat_frame_tail __user *)regs->compat_fp - 1;
|
||||
|
||||
while ((entry->nr < PERF_MAX_STACK_DEPTH) &&
|
||||
while ((entry->nr < sysctl_perf_event_max_stack) &&
|
||||
tail && !((unsigned long)tail & 0x3))
|
||||
tail = compat_user_backtrace(tail, entry);
|
||||
#endif
|
||||
|
@ -65,7 +65,7 @@ perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs)
|
||||
|
||||
--frame;
|
||||
|
||||
while ((entry->nr < PERF_MAX_STACK_DEPTH) && frame)
|
||||
while ((entry->nr < sysctl_perf_event_max_stack) && frame)
|
||||
frame = user_backtrace(frame, entry);
|
||||
}
|
||||
|
||||
|
@ -35,7 +35,7 @@ static void save_raw_perf_callchain(struct perf_callchain_entry *entry,
|
||||
addr = *sp++;
|
||||
if (__kernel_text_address(addr)) {
|
||||
perf_callchain_store(entry, addr);
|
||||
if (entry->nr >= PERF_MAX_STACK_DEPTH)
|
||||
if (entry->nr >= sysctl_perf_event_max_stack)
|
||||
break;
|
||||
}
|
||||
}
|
||||
@ -59,7 +59,7 @@ void perf_callchain_kernel(struct perf_callchain_entry *entry,
|
||||
}
|
||||
do {
|
||||
perf_callchain_store(entry, pc);
|
||||
if (entry->nr >= PERF_MAX_STACK_DEPTH)
|
||||
if (entry->nr >= sysctl_perf_event_max_stack)
|
||||
break;
|
||||
pc = unwind_stack(current, &sp, pc, &ra);
|
||||
} while (pc);
|
||||
|
@ -247,7 +247,7 @@ static void perf_callchain_user_64(struct perf_callchain_entry *entry,
|
||||
sp = regs->gpr[1];
|
||||
perf_callchain_store(entry, next_ip);
|
||||
|
||||
while (entry->nr < PERF_MAX_STACK_DEPTH) {
|
||||
while (entry->nr < sysctl_perf_event_max_stack) {
|
||||
fp = (unsigned long __user *) sp;
|
||||
if (!valid_user_sp(sp, 1) || read_user_stack_64(fp, &next_sp))
|
||||
return;
|
||||
@ -453,7 +453,7 @@ static void perf_callchain_user_32(struct perf_callchain_entry *entry,
|
||||
sp = regs->gpr[1];
|
||||
perf_callchain_store(entry, next_ip);
|
||||
|
||||
while (entry->nr < PERF_MAX_STACK_DEPTH) {
|
||||
while (entry->nr < sysctl_perf_event_max_stack) {
|
||||
fp = (unsigned int __user *) (unsigned long) sp;
|
||||
if (!valid_user_sp(sp, 0) || read_user_stack_32(fp, &next_sp))
|
||||
return;
|
||||
|
@ -1756,7 +1756,7 @@ void perf_callchain_kernel(struct perf_callchain_entry *entry,
|
||||
}
|
||||
}
|
||||
#endif
|
||||
} while (entry->nr < PERF_MAX_STACK_DEPTH);
|
||||
} while (entry->nr < sysctl_perf_event_max_stack);
|
||||
}
|
||||
|
||||
static inline int
|
||||
@ -1790,7 +1790,7 @@ static void perf_callchain_user_64(struct perf_callchain_entry *entry,
|
||||
pc = sf.callers_pc;
|
||||
ufp = (unsigned long)sf.fp + STACK_BIAS;
|
||||
perf_callchain_store(entry, pc);
|
||||
} while (entry->nr < PERF_MAX_STACK_DEPTH);
|
||||
} while (entry->nr < sysctl_perf_event_max_stack);
|
||||
}
|
||||
|
||||
static void perf_callchain_user_32(struct perf_callchain_entry *entry,
|
||||
@ -1822,7 +1822,7 @@ static void perf_callchain_user_32(struct perf_callchain_entry *entry,
|
||||
ufp = (unsigned long)sf.fp;
|
||||
}
|
||||
perf_callchain_store(entry, pc);
|
||||
} while (entry->nr < PERF_MAX_STACK_DEPTH);
|
||||
} while (entry->nr < sysctl_perf_event_max_stack);
|
||||
}
|
||||
|
||||
void
|
||||
|
@ -164,10 +164,6 @@ config INSTRUCTION_DECODER
|
||||
def_bool y
|
||||
depends on KPROBES || PERF_EVENTS || UPROBES
|
||||
|
||||
config PERF_EVENTS_INTEL_UNCORE
|
||||
def_bool y
|
||||
depends on PERF_EVENTS && CPU_SUP_INTEL && PCI
|
||||
|
||||
config OUTPUT_FORMAT
|
||||
string
|
||||
default "elf32-i386" if X86_32
|
||||
@ -1046,6 +1042,8 @@ config X86_THERMAL_VECTOR
|
||||
def_bool y
|
||||
depends on X86_MCE_INTEL
|
||||
|
||||
source "arch/x86/events/Kconfig"
|
||||
|
||||
config X86_LEGACY_VM86
|
||||
bool "Legacy VM86 support"
|
||||
default n
|
||||
@ -1210,15 +1208,6 @@ config MICROCODE_OLD_INTERFACE
|
||||
def_bool y
|
||||
depends on MICROCODE
|
||||
|
||||
config PERF_EVENTS_AMD_POWER
|
||||
depends on PERF_EVENTS && CPU_SUP_AMD
|
||||
tristate "AMD Processor Power Reporting Mechanism"
|
||||
---help---
|
||||
Provide power reporting mechanism support for AMD processors.
|
||||
Currently, it leverages X86_FEATURE_ACC_POWER
|
||||
(CPUID Fn8000_0007_EDX[12]) interface to calculate the
|
||||
average power consumption on Family 15h processors.
|
||||
|
||||
config X86_MSR
|
||||
tristate "/dev/cpu/*/msr - Model-specific register support"
|
||||
---help---
|
||||
|
36
arch/x86/events/Kconfig
Normal file
36
arch/x86/events/Kconfig
Normal file
@ -0,0 +1,36 @@
|
||||
menu "Performance monitoring"
|
||||
|
||||
config PERF_EVENTS_INTEL_UNCORE
|
||||
tristate "Intel uncore performance events"
|
||||
depends on PERF_EVENTS && CPU_SUP_INTEL && PCI
|
||||
default y
|
||||
---help---
|
||||
Include support for Intel uncore performance events. These are
|
||||
available on NehalemEX and more modern processors.
|
||||
|
||||
config PERF_EVENTS_INTEL_RAPL
|
||||
tristate "Intel rapl performance events"
|
||||
depends on PERF_EVENTS && CPU_SUP_INTEL && PCI
|
||||
default y
|
||||
---help---
|
||||
Include support for Intel rapl performance events for power
|
||||
monitoring on modern processors.
|
||||
|
||||
config PERF_EVENTS_INTEL_CSTATE
|
||||
tristate "Intel cstate performance events"
|
||||
depends on PERF_EVENTS && CPU_SUP_INTEL && PCI
|
||||
default y
|
||||
---help---
|
||||
Include support for Intel cstate performance events for power
|
||||
monitoring on modern processors.
|
||||
|
||||
config PERF_EVENTS_AMD_POWER
|
||||
depends on PERF_EVENTS && CPU_SUP_AMD
|
||||
tristate "AMD Processor Power Reporting Mechanism"
|
||||
---help---
|
||||
Provide power reporting mechanism support for AMD processors.
|
||||
Currently, it leverages X86_FEATURE_ACC_POWER
|
||||
(CPUID Fn8000_0007_EDX[12]) interface to calculate the
|
||||
average power consumption on Family 15h processors.
|
||||
|
||||
endmenu
|
@ -6,9 +6,6 @@ obj-$(CONFIG_X86_LOCAL_APIC) += amd/ibs.o msr.o
|
||||
ifdef CONFIG_AMD_IOMMU
|
||||
obj-$(CONFIG_CPU_SUP_AMD) += amd/iommu.o
|
||||
endif
|
||||
obj-$(CONFIG_CPU_SUP_INTEL) += intel/core.o intel/bts.o intel/cqm.o
|
||||
obj-$(CONFIG_CPU_SUP_INTEL) += intel/cstate.o intel/ds.o intel/knc.o
|
||||
obj-$(CONFIG_CPU_SUP_INTEL) += intel/lbr.o intel/p4.o intel/p6.o intel/pt.o
|
||||
obj-$(CONFIG_CPU_SUP_INTEL) += intel/rapl.o msr.o
|
||||
obj-$(CONFIG_PERF_EVENTS_INTEL_UNCORE) += intel/uncore.o intel/uncore_nhmex.o
|
||||
obj-$(CONFIG_PERF_EVENTS_INTEL_UNCORE) += intel/uncore_snb.o intel/uncore_snbep.o
|
||||
|
||||
obj-$(CONFIG_CPU_SUP_INTEL) += msr.o
|
||||
obj-$(CONFIG_CPU_SUP_INTEL) += intel/
|
||||
|
@ -263,6 +263,7 @@ static const struct attribute_group *amd_uncore_attr_groups[] = {
|
||||
};
|
||||
|
||||
static struct pmu amd_nb_pmu = {
|
||||
.task_ctx_nr = perf_invalid_context,
|
||||
.attr_groups = amd_uncore_attr_groups,
|
||||
.name = "amd_nb",
|
||||
.event_init = amd_uncore_event_init,
|
||||
@ -274,6 +275,7 @@ static struct pmu amd_nb_pmu = {
|
||||
};
|
||||
|
||||
static struct pmu amd_l2_pmu = {
|
||||
.task_ctx_nr = perf_invalid_context,
|
||||
.attr_groups = amd_uncore_attr_groups,
|
||||
.name = "amd_l2",
|
||||
.event_init = amd_uncore_event_init,
|
||||
|
@ -360,6 +360,9 @@ int x86_add_exclusive(unsigned int what)
|
||||
{
|
||||
int i;
|
||||
|
||||
if (x86_pmu.lbr_pt_coexist)
|
||||
return 0;
|
||||
|
||||
if (!atomic_inc_not_zero(&x86_pmu.lbr_exclusive[what])) {
|
||||
mutex_lock(&pmc_reserve_mutex);
|
||||
for (i = 0; i < ARRAY_SIZE(x86_pmu.lbr_exclusive); i++) {
|
||||
@ -380,6 +383,9 @@ int x86_add_exclusive(unsigned int what)
|
||||
|
||||
void x86_del_exclusive(unsigned int what)
|
||||
{
|
||||
if (x86_pmu.lbr_pt_coexist)
|
||||
return;
|
||||
|
||||
atomic_dec(&x86_pmu.lbr_exclusive[what]);
|
||||
atomic_dec(&active_events);
|
||||
}
|
||||
@ -2277,7 +2283,7 @@ perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry *entry)
|
||||
|
||||
fp = compat_ptr(ss_base + regs->bp);
|
||||
pagefault_disable();
|
||||
while (entry->nr < PERF_MAX_STACK_DEPTH) {
|
||||
while (entry->nr < sysctl_perf_event_max_stack) {
|
||||
unsigned long bytes;
|
||||
frame.next_frame = 0;
|
||||
frame.return_address = 0;
|
||||
@ -2337,7 +2343,7 @@ perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs)
|
||||
return;
|
||||
|
||||
pagefault_disable();
|
||||
while (entry->nr < PERF_MAX_STACK_DEPTH) {
|
||||
while (entry->nr < sysctl_perf_event_max_stack) {
|
||||
unsigned long bytes;
|
||||
frame.next_frame = NULL;
|
||||
frame.return_address = 0;
|
||||
|
9
arch/x86/events/intel/Makefile
Normal file
9
arch/x86/events/intel/Makefile
Normal file
@ -0,0 +1,9 @@
|
||||
obj-$(CONFIG_CPU_SUP_INTEL) += core.o bts.o cqm.o
|
||||
obj-$(CONFIG_CPU_SUP_INTEL) += ds.o knc.o
|
||||
obj-$(CONFIG_CPU_SUP_INTEL) += lbr.o p4.o p6.o pt.o
|
||||
obj-$(CONFIG_PERF_EVENTS_INTEL_RAPL) += intel-rapl.o
|
||||
intel-rapl-objs := rapl.o
|
||||
obj-$(CONFIG_PERF_EVENTS_INTEL_UNCORE) += intel-uncore.o
|
||||
intel-uncore-objs := uncore.o uncore_nhmex.o uncore_snb.o uncore_snbep.o
|
||||
obj-$(CONFIG_PERF_EVENTS_INTEL_CSTATE) += intel-cstate.o
|
||||
intel-cstate-objs := cstate.o
|
@ -171,18 +171,6 @@ static void bts_buffer_pad_out(struct bts_phys *phys, unsigned long head)
|
||||
memset(page_address(phys->page) + index, 0, phys->size - index);
|
||||
}
|
||||
|
||||
static bool bts_buffer_is_full(struct bts_buffer *buf, struct bts_ctx *bts)
|
||||
{
|
||||
if (buf->snapshot)
|
||||
return false;
|
||||
|
||||
if (local_read(&buf->data_size) >= bts->handle.size ||
|
||||
bts->handle.size - local_read(&buf->data_size) < BTS_RECORD_SIZE)
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static void bts_update(struct bts_ctx *bts)
|
||||
{
|
||||
int cpu = raw_smp_processor_id();
|
||||
@ -213,18 +201,15 @@ static void bts_update(struct bts_ctx *bts)
|
||||
}
|
||||
}
|
||||
|
||||
static int
|
||||
bts_buffer_reset(struct bts_buffer *buf, struct perf_output_handle *handle);
|
||||
|
||||
static void __bts_event_start(struct perf_event *event)
|
||||
{
|
||||
struct bts_ctx *bts = this_cpu_ptr(&bts_ctx);
|
||||
struct bts_buffer *buf = perf_get_aux(&bts->handle);
|
||||
u64 config = 0;
|
||||
|
||||
if (!buf || bts_buffer_is_full(buf, bts))
|
||||
return;
|
||||
|
||||
event->hw.itrace_started = 1;
|
||||
event->hw.state = 0;
|
||||
|
||||
if (!buf->snapshot)
|
||||
config |= ARCH_PERFMON_EVENTSEL_INT;
|
||||
if (!event->attr.exclude_kernel)
|
||||
@ -241,16 +226,41 @@ static void __bts_event_start(struct perf_event *event)
|
||||
wmb();
|
||||
|
||||
intel_pmu_enable_bts(config);
|
||||
|
||||
}
|
||||
|
||||
static void bts_event_start(struct perf_event *event, int flags)
|
||||
{
|
||||
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
|
||||
struct bts_ctx *bts = this_cpu_ptr(&bts_ctx);
|
||||
struct bts_buffer *buf;
|
||||
|
||||
buf = perf_aux_output_begin(&bts->handle, event);
|
||||
if (!buf)
|
||||
goto fail_stop;
|
||||
|
||||
if (bts_buffer_reset(buf, &bts->handle))
|
||||
goto fail_end_stop;
|
||||
|
||||
bts->ds_back.bts_buffer_base = cpuc->ds->bts_buffer_base;
|
||||
bts->ds_back.bts_absolute_maximum = cpuc->ds->bts_absolute_maximum;
|
||||
bts->ds_back.bts_interrupt_threshold = cpuc->ds->bts_interrupt_threshold;
|
||||
|
||||
event->hw.itrace_started = 1;
|
||||
event->hw.state = 0;
|
||||
|
||||
__bts_event_start(event);
|
||||
|
||||
/* PMI handler: this counter is running and likely generating PMIs */
|
||||
ACCESS_ONCE(bts->started) = 1;
|
||||
|
||||
return;
|
||||
|
||||
fail_end_stop:
|
||||
perf_aux_output_end(&bts->handle, 0, false);
|
||||
|
||||
fail_stop:
|
||||
event->hw.state = PERF_HES_STOPPED;
|
||||
}
|
||||
|
||||
static void __bts_event_stop(struct perf_event *event)
|
||||
@ -269,15 +279,32 @@ static void __bts_event_stop(struct perf_event *event)
|
||||
|
||||
static void bts_event_stop(struct perf_event *event, int flags)
|
||||
{
|
||||
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
|
||||
struct bts_ctx *bts = this_cpu_ptr(&bts_ctx);
|
||||
struct bts_buffer *buf = perf_get_aux(&bts->handle);
|
||||
|
||||
/* PMI handler: don't restart this counter */
|
||||
ACCESS_ONCE(bts->started) = 0;
|
||||
|
||||
__bts_event_stop(event);
|
||||
|
||||
if (flags & PERF_EF_UPDATE)
|
||||
if (flags & PERF_EF_UPDATE) {
|
||||
bts_update(bts);
|
||||
|
||||
if (buf) {
|
||||
if (buf->snapshot)
|
||||
bts->handle.head =
|
||||
local_xchg(&buf->data_size,
|
||||
buf->nr_pages << PAGE_SHIFT);
|
||||
perf_aux_output_end(&bts->handle, local_xchg(&buf->data_size, 0),
|
||||
!!local_xchg(&buf->lost, 0));
|
||||
}
|
||||
|
||||
cpuc->ds->bts_index = bts->ds_back.bts_buffer_base;
|
||||
cpuc->ds->bts_buffer_base = bts->ds_back.bts_buffer_base;
|
||||
cpuc->ds->bts_absolute_maximum = bts->ds_back.bts_absolute_maximum;
|
||||
cpuc->ds->bts_interrupt_threshold = bts->ds_back.bts_interrupt_threshold;
|
||||
}
|
||||
}
|
||||
|
||||
void intel_bts_enable_local(void)
|
||||
@ -417,34 +444,14 @@ int intel_bts_interrupt(void)
|
||||
|
||||
static void bts_event_del(struct perf_event *event, int mode)
|
||||
{
|
||||
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
|
||||
struct bts_ctx *bts = this_cpu_ptr(&bts_ctx);
|
||||
struct bts_buffer *buf = perf_get_aux(&bts->handle);
|
||||
|
||||
bts_event_stop(event, PERF_EF_UPDATE);
|
||||
|
||||
if (buf) {
|
||||
if (buf->snapshot)
|
||||
bts->handle.head =
|
||||
local_xchg(&buf->data_size,
|
||||
buf->nr_pages << PAGE_SHIFT);
|
||||
perf_aux_output_end(&bts->handle, local_xchg(&buf->data_size, 0),
|
||||
!!local_xchg(&buf->lost, 0));
|
||||
}
|
||||
|
||||
cpuc->ds->bts_index = bts->ds_back.bts_buffer_base;
|
||||
cpuc->ds->bts_buffer_base = bts->ds_back.bts_buffer_base;
|
||||
cpuc->ds->bts_absolute_maximum = bts->ds_back.bts_absolute_maximum;
|
||||
cpuc->ds->bts_interrupt_threshold = bts->ds_back.bts_interrupt_threshold;
|
||||
}
|
||||
|
||||
static int bts_event_add(struct perf_event *event, int mode)
|
||||
{
|
||||
struct bts_buffer *buf;
|
||||
struct bts_ctx *bts = this_cpu_ptr(&bts_ctx);
|
||||
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
|
||||
struct hw_perf_event *hwc = &event->hw;
|
||||
int ret = -EBUSY;
|
||||
|
||||
event->hw.state = PERF_HES_STOPPED;
|
||||
|
||||
@ -454,26 +461,10 @@ static int bts_event_add(struct perf_event *event, int mode)
|
||||
if (bts->handle.event)
|
||||
return -EBUSY;
|
||||
|
||||
buf = perf_aux_output_begin(&bts->handle, event);
|
||||
if (!buf)
|
||||
return -EINVAL;
|
||||
|
||||
ret = bts_buffer_reset(buf, &bts->handle);
|
||||
if (ret) {
|
||||
perf_aux_output_end(&bts->handle, 0, false);
|
||||
return ret;
|
||||
}
|
||||
|
||||
bts->ds_back.bts_buffer_base = cpuc->ds->bts_buffer_base;
|
||||
bts->ds_back.bts_absolute_maximum = cpuc->ds->bts_absolute_maximum;
|
||||
bts->ds_back.bts_interrupt_threshold = cpuc->ds->bts_interrupt_threshold;
|
||||
|
||||
if (mode & PERF_EF_START) {
|
||||
bts_event_start(event, 0);
|
||||
if (hwc->state & PERF_HES_STOPPED) {
|
||||
bts_event_del(event, 0);
|
||||
return -EBUSY;
|
||||
}
|
||||
if (hwc->state & PERF_HES_STOPPED)
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
@ -1465,6 +1465,140 @@ static __initconst const u64 slm_hw_cache_event_ids
|
||||
},
|
||||
};
|
||||
|
||||
static struct extra_reg intel_glm_extra_regs[] __read_mostly = {
|
||||
/* must define OFFCORE_RSP_X first, see intel_fixup_er() */
|
||||
INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x760005ffbfull, RSP_0),
|
||||
INTEL_UEVENT_EXTRA_REG(0x02b7, MSR_OFFCORE_RSP_1, 0x360005ffbfull, RSP_1),
|
||||
EVENT_EXTRA_END
|
||||
};
|
||||
|
||||
#define GLM_DEMAND_DATA_RD BIT_ULL(0)
|
||||
#define GLM_DEMAND_RFO BIT_ULL(1)
|
||||
#define GLM_ANY_RESPONSE BIT_ULL(16)
|
||||
#define GLM_SNP_NONE_OR_MISS BIT_ULL(33)
|
||||
#define GLM_DEMAND_READ GLM_DEMAND_DATA_RD
|
||||
#define GLM_DEMAND_WRITE GLM_DEMAND_RFO
|
||||
#define GLM_DEMAND_PREFETCH (SNB_PF_DATA_RD|SNB_PF_RFO)
|
||||
#define GLM_LLC_ACCESS GLM_ANY_RESPONSE
|
||||
#define GLM_SNP_ANY (GLM_SNP_NONE_OR_MISS|SNB_NO_FWD|SNB_HITM)
|
||||
#define GLM_LLC_MISS (GLM_SNP_ANY|SNB_NON_DRAM)
|
||||
|
||||
static __initconst const u64 glm_hw_cache_event_ids
|
||||
[PERF_COUNT_HW_CACHE_MAX]
|
||||
[PERF_COUNT_HW_CACHE_OP_MAX]
|
||||
[PERF_COUNT_HW_CACHE_RESULT_MAX] = {
|
||||
[C(L1D)] = {
|
||||
[C(OP_READ)] = {
|
||||
[C(RESULT_ACCESS)] = 0x81d0, /* MEM_UOPS_RETIRED.ALL_LOADS */
|
||||
[C(RESULT_MISS)] = 0x0,
|
||||
},
|
||||
[C(OP_WRITE)] = {
|
||||
[C(RESULT_ACCESS)] = 0x82d0, /* MEM_UOPS_RETIRED.ALL_STORES */
|
||||
[C(RESULT_MISS)] = 0x0,
|
||||
},
|
||||
[C(OP_PREFETCH)] = {
|
||||
[C(RESULT_ACCESS)] = 0x0,
|
||||
[C(RESULT_MISS)] = 0x0,
|
||||
},
|
||||
},
|
||||
[C(L1I)] = {
|
||||
[C(OP_READ)] = {
|
||||
[C(RESULT_ACCESS)] = 0x0380, /* ICACHE.ACCESSES */
|
||||
[C(RESULT_MISS)] = 0x0280, /* ICACHE.MISSES */
|
||||
},
|
||||
[C(OP_WRITE)] = {
|
||||
[C(RESULT_ACCESS)] = -1,
|
||||
[C(RESULT_MISS)] = -1,
|
||||
},
|
||||
[C(OP_PREFETCH)] = {
|
||||
[C(RESULT_ACCESS)] = 0x0,
|
||||
[C(RESULT_MISS)] = 0x0,
|
||||
},
|
||||
},
|
||||
[C(LL)] = {
|
||||
[C(OP_READ)] = {
|
||||
[C(RESULT_ACCESS)] = 0x1b7, /* OFFCORE_RESPONSE */
|
||||
[C(RESULT_MISS)] = 0x1b7, /* OFFCORE_RESPONSE */
|
||||
},
|
||||
[C(OP_WRITE)] = {
|
||||
[C(RESULT_ACCESS)] = 0x1b7, /* OFFCORE_RESPONSE */
|
||||
[C(RESULT_MISS)] = 0x1b7, /* OFFCORE_RESPONSE */
|
||||
},
|
||||
[C(OP_PREFETCH)] = {
|
||||
[C(RESULT_ACCESS)] = 0x1b7, /* OFFCORE_RESPONSE */
|
||||
[C(RESULT_MISS)] = 0x1b7, /* OFFCORE_RESPONSE */
|
||||
},
|
||||
},
|
||||
[C(DTLB)] = {
|
||||
[C(OP_READ)] = {
|
||||
[C(RESULT_ACCESS)] = 0x81d0, /* MEM_UOPS_RETIRED.ALL_LOADS */
|
||||
[C(RESULT_MISS)] = 0x0,
|
||||
},
|
||||
[C(OP_WRITE)] = {
|
||||
[C(RESULT_ACCESS)] = 0x82d0, /* MEM_UOPS_RETIRED.ALL_STORES */
|
||||
[C(RESULT_MISS)] = 0x0,
|
||||
},
|
||||
[C(OP_PREFETCH)] = {
|
||||
[C(RESULT_ACCESS)] = 0x0,
|
||||
[C(RESULT_MISS)] = 0x0,
|
||||
},
|
||||
},
|
||||
[C(ITLB)] = {
|
||||
[C(OP_READ)] = {
|
||||
[C(RESULT_ACCESS)] = 0x00c0, /* INST_RETIRED.ANY_P */
|
||||
[C(RESULT_MISS)] = 0x0481, /* ITLB.MISS */
|
||||
},
|
||||
[C(OP_WRITE)] = {
|
||||
[C(RESULT_ACCESS)] = -1,
|
||||
[C(RESULT_MISS)] = -1,
|
||||
},
|
||||
[C(OP_PREFETCH)] = {
|
||||
[C(RESULT_ACCESS)] = -1,
|
||||
[C(RESULT_MISS)] = -1,
|
||||
},
|
||||
},
|
||||
[C(BPU)] = {
|
||||
[C(OP_READ)] = {
|
||||
[C(RESULT_ACCESS)] = 0x00c4, /* BR_INST_RETIRED.ALL_BRANCHES */
|
||||
[C(RESULT_MISS)] = 0x00c5, /* BR_MISP_RETIRED.ALL_BRANCHES */
|
||||
},
|
||||
[C(OP_WRITE)] = {
|
||||
[C(RESULT_ACCESS)] = -1,
|
||||
[C(RESULT_MISS)] = -1,
|
||||
},
|
||||
[C(OP_PREFETCH)] = {
|
||||
[C(RESULT_ACCESS)] = -1,
|
||||
[C(RESULT_MISS)] = -1,
|
||||
},
|
||||
},
|
||||
};
|
||||
|
||||
static __initconst const u64 glm_hw_cache_extra_regs
|
||||
[PERF_COUNT_HW_CACHE_MAX]
|
||||
[PERF_COUNT_HW_CACHE_OP_MAX]
|
||||
[PERF_COUNT_HW_CACHE_RESULT_MAX] = {
|
||||
[C(LL)] = {
|
||||
[C(OP_READ)] = {
|
||||
[C(RESULT_ACCESS)] = GLM_DEMAND_READ|
|
||||
GLM_LLC_ACCESS,
|
||||
[C(RESULT_MISS)] = GLM_DEMAND_READ|
|
||||
GLM_LLC_MISS,
|
||||
},
|
||||
[C(OP_WRITE)] = {
|
||||
[C(RESULT_ACCESS)] = GLM_DEMAND_WRITE|
|
||||
GLM_LLC_ACCESS,
|
||||
[C(RESULT_MISS)] = GLM_DEMAND_WRITE|
|
||||
GLM_LLC_MISS,
|
||||
},
|
||||
[C(OP_PREFETCH)] = {
|
||||
[C(RESULT_ACCESS)] = GLM_DEMAND_PREFETCH|
|
||||
GLM_LLC_ACCESS,
|
||||
[C(RESULT_MISS)] = GLM_DEMAND_PREFETCH|
|
||||
GLM_LLC_MISS,
|
||||
},
|
||||
},
|
||||
};
|
||||
|
||||
#define KNL_OT_L2_HITE BIT_ULL(19) /* Other Tile L2 Hit */
|
||||
#define KNL_OT_L2_HITF BIT_ULL(20) /* Other Tile L2 Hit */
|
||||
#define KNL_MCDRAM_LOCAL BIT_ULL(21)
|
||||
@ -3447,7 +3581,7 @@ __init int intel_pmu_init(void)
|
||||
memcpy(hw_cache_extra_regs, slm_hw_cache_extra_regs,
|
||||
sizeof(hw_cache_extra_regs));
|
||||
|
||||
intel_pmu_lbr_init_atom();
|
||||
intel_pmu_lbr_init_slm();
|
||||
|
||||
x86_pmu.event_constraints = intel_slm_event_constraints;
|
||||
x86_pmu.pebs_constraints = intel_slm_pebs_event_constraints;
|
||||
@ -3456,6 +3590,30 @@ __init int intel_pmu_init(void)
|
||||
pr_cont("Silvermont events, ");
|
||||
break;
|
||||
|
||||
case 92: /* 14nm Atom "Goldmont" */
|
||||
case 95: /* 14nm Atom "Goldmont Denverton" */
|
||||
memcpy(hw_cache_event_ids, glm_hw_cache_event_ids,
|
||||
sizeof(hw_cache_event_ids));
|
||||
memcpy(hw_cache_extra_regs, glm_hw_cache_extra_regs,
|
||||
sizeof(hw_cache_extra_regs));
|
||||
|
||||
intel_pmu_lbr_init_skl();
|
||||
|
||||
x86_pmu.event_constraints = intel_slm_event_constraints;
|
||||
x86_pmu.pebs_constraints = intel_glm_pebs_event_constraints;
|
||||
x86_pmu.extra_regs = intel_glm_extra_regs;
|
||||
/*
|
||||
* It's recommended to use CPU_CLK_UNHALTED.CORE_P + NPEBS
|
||||
* for precise cycles.
|
||||
* :pp is identical to :ppp
|
||||
*/
|
||||
x86_pmu.pebs_aliases = NULL;
|
||||
x86_pmu.pebs_prec_dist = true;
|
||||
x86_pmu.lbr_pt_coexist = true;
|
||||
x86_pmu.flags |= PMU_FL_HAS_RSP_1;
|
||||
pr_cont("Goldmont events, ");
|
||||
break;
|
||||
|
||||
case 37: /* 32nm Westmere */
|
||||
case 44: /* 32nm Westmere-EP */
|
||||
case 47: /* 32nm Westmere-EX */
|
||||
|
@ -91,6 +91,8 @@
|
||||
#include <asm/cpu_device_id.h>
|
||||
#include "../perf_event.h"
|
||||
|
||||
MODULE_LICENSE("GPL");
|
||||
|
||||
#define DEFINE_CSTATE_FORMAT_ATTR(_var, _name, _format) \
|
||||
static ssize_t __cstate_##_var##_show(struct kobject *kobj, \
|
||||
struct kobj_attribute *attr, \
|
||||
@ -106,22 +108,27 @@ static ssize_t cstate_get_attr_cpumask(struct device *dev,
|
||||
struct device_attribute *attr,
|
||||
char *buf);
|
||||
|
||||
/* Model -> events mapping */
|
||||
struct cstate_model {
|
||||
unsigned long core_events;
|
||||
unsigned long pkg_events;
|
||||
unsigned long quirks;
|
||||
};
|
||||
|
||||
/* Quirk flags */
|
||||
#define SLM_PKG_C6_USE_C7_MSR (1UL << 0)
|
||||
|
||||
struct perf_cstate_msr {
|
||||
u64 msr;
|
||||
struct perf_pmu_events_attr *attr;
|
||||
bool (*test)(int idx);
|
||||
};
|
||||
|
||||
|
||||
/* cstate_core PMU */
|
||||
|
||||
static struct pmu cstate_core_pmu;
|
||||
static bool has_cstate_core;
|
||||
|
||||
enum perf_cstate_core_id {
|
||||
/*
|
||||
* cstate_core events
|
||||
*/
|
||||
enum perf_cstate_core_events {
|
||||
PERF_CSTATE_CORE_C1_RES = 0,
|
||||
PERF_CSTATE_CORE_C3_RES,
|
||||
PERF_CSTATE_CORE_C6_RES,
|
||||
@ -130,69 +137,16 @@ enum perf_cstate_core_id {
|
||||
PERF_CSTATE_CORE_EVENT_MAX,
|
||||
};
|
||||
|
||||
bool test_core(int idx)
|
||||
{
|
||||
if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL ||
|
||||
boot_cpu_data.x86 != 6)
|
||||
return false;
|
||||
|
||||
switch (boot_cpu_data.x86_model) {
|
||||
case 30: /* 45nm Nehalem */
|
||||
case 26: /* 45nm Nehalem-EP */
|
||||
case 46: /* 45nm Nehalem-EX */
|
||||
|
||||
case 37: /* 32nm Westmere */
|
||||
case 44: /* 32nm Westmere-EP */
|
||||
case 47: /* 32nm Westmere-EX */
|
||||
if (idx == PERF_CSTATE_CORE_C3_RES ||
|
||||
idx == PERF_CSTATE_CORE_C6_RES)
|
||||
return true;
|
||||
break;
|
||||
case 42: /* 32nm SandyBridge */
|
||||
case 45: /* 32nm SandyBridge-E/EN/EP */
|
||||
|
||||
case 58: /* 22nm IvyBridge */
|
||||
case 62: /* 22nm IvyBridge-EP/EX */
|
||||
|
||||
case 60: /* 22nm Haswell Core */
|
||||
case 63: /* 22nm Haswell Server */
|
||||
case 69: /* 22nm Haswell ULT */
|
||||
case 70: /* 22nm Haswell + GT3e (Intel Iris Pro graphics) */
|
||||
|
||||
case 61: /* 14nm Broadwell Core-M */
|
||||
case 86: /* 14nm Broadwell Xeon D */
|
||||
case 71: /* 14nm Broadwell + GT3e (Intel Iris Pro graphics) */
|
||||
case 79: /* 14nm Broadwell Server */
|
||||
|
||||
case 78: /* 14nm Skylake Mobile */
|
||||
case 94: /* 14nm Skylake Desktop */
|
||||
if (idx == PERF_CSTATE_CORE_C3_RES ||
|
||||
idx == PERF_CSTATE_CORE_C6_RES ||
|
||||
idx == PERF_CSTATE_CORE_C7_RES)
|
||||
return true;
|
||||
break;
|
||||
case 55: /* 22nm Atom "Silvermont" */
|
||||
case 77: /* 22nm Atom "Silvermont Avoton/Rangely" */
|
||||
case 76: /* 14nm Atom "Airmont" */
|
||||
if (idx == PERF_CSTATE_CORE_C1_RES ||
|
||||
idx == PERF_CSTATE_CORE_C6_RES)
|
||||
return true;
|
||||
break;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
PMU_EVENT_ATTR_STRING(c1-residency, evattr_cstate_core_c1, "event=0x00");
|
||||
PMU_EVENT_ATTR_STRING(c3-residency, evattr_cstate_core_c3, "event=0x01");
|
||||
PMU_EVENT_ATTR_STRING(c6-residency, evattr_cstate_core_c6, "event=0x02");
|
||||
PMU_EVENT_ATTR_STRING(c7-residency, evattr_cstate_core_c7, "event=0x03");
|
||||
|
||||
static struct perf_cstate_msr core_msr[] = {
|
||||
[PERF_CSTATE_CORE_C1_RES] = { MSR_CORE_C1_RES, &evattr_cstate_core_c1, test_core, },
|
||||
[PERF_CSTATE_CORE_C3_RES] = { MSR_CORE_C3_RESIDENCY, &evattr_cstate_core_c3, test_core, },
|
||||
[PERF_CSTATE_CORE_C6_RES] = { MSR_CORE_C6_RESIDENCY, &evattr_cstate_core_c6, test_core, },
|
||||
[PERF_CSTATE_CORE_C7_RES] = { MSR_CORE_C7_RESIDENCY, &evattr_cstate_core_c7, test_core, },
|
||||
[PERF_CSTATE_CORE_C1_RES] = { MSR_CORE_C1_RES, &evattr_cstate_core_c1 },
|
||||
[PERF_CSTATE_CORE_C3_RES] = { MSR_CORE_C3_RESIDENCY, &evattr_cstate_core_c3 },
|
||||
[PERF_CSTATE_CORE_C6_RES] = { MSR_CORE_C6_RESIDENCY, &evattr_cstate_core_c6 },
|
||||
[PERF_CSTATE_CORE_C7_RES] = { MSR_CORE_C7_RESIDENCY, &evattr_cstate_core_c7 },
|
||||
};
|
||||
|
||||
static struct attribute *core_events_attrs[PERF_CSTATE_CORE_EVENT_MAX + 1] = {
|
||||
@ -234,18 +188,11 @@ static const struct attribute_group *core_attr_groups[] = {
|
||||
NULL,
|
||||
};
|
||||
|
||||
/* cstate_core PMU end */
|
||||
|
||||
|
||||
/* cstate_pkg PMU */
|
||||
|
||||
static struct pmu cstate_pkg_pmu;
|
||||
static bool has_cstate_pkg;
|
||||
|
||||
enum perf_cstate_pkg_id {
|
||||
/*
|
||||
* cstate_pkg events
|
||||
*/
|
||||
enum perf_cstate_pkg_events {
|
||||
PERF_CSTATE_PKG_C2_RES = 0,
|
||||
PERF_CSTATE_PKG_C3_RES,
|
||||
PERF_CSTATE_PKG_C6_RES,
|
||||
@ -257,69 +204,6 @@ enum perf_cstate_pkg_id {
|
||||
PERF_CSTATE_PKG_EVENT_MAX,
|
||||
};
|
||||
|
||||
bool test_pkg(int idx)
|
||||
{
|
||||
if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL ||
|
||||
boot_cpu_data.x86 != 6)
|
||||
return false;
|
||||
|
||||
switch (boot_cpu_data.x86_model) {
|
||||
case 30: /* 45nm Nehalem */
|
||||
case 26: /* 45nm Nehalem-EP */
|
||||
case 46: /* 45nm Nehalem-EX */
|
||||
|
||||
case 37: /* 32nm Westmere */
|
||||
case 44: /* 32nm Westmere-EP */
|
||||
case 47: /* 32nm Westmere-EX */
|
||||
if (idx == PERF_CSTATE_CORE_C3_RES ||
|
||||
idx == PERF_CSTATE_CORE_C6_RES ||
|
||||
idx == PERF_CSTATE_CORE_C7_RES)
|
||||
return true;
|
||||
break;
|
||||
case 42: /* 32nm SandyBridge */
|
||||
case 45: /* 32nm SandyBridge-E/EN/EP */
|
||||
|
||||
case 58: /* 22nm IvyBridge */
|
||||
case 62: /* 22nm IvyBridge-EP/EX */
|
||||
|
||||
case 60: /* 22nm Haswell Core */
|
||||
case 63: /* 22nm Haswell Server */
|
||||
case 70: /* 22nm Haswell + GT3e (Intel Iris Pro graphics) */
|
||||
|
||||
case 61: /* 14nm Broadwell Core-M */
|
||||
case 86: /* 14nm Broadwell Xeon D */
|
||||
case 71: /* 14nm Broadwell + GT3e (Intel Iris Pro graphics) */
|
||||
case 79: /* 14nm Broadwell Server */
|
||||
|
||||
case 78: /* 14nm Skylake Mobile */
|
||||
case 94: /* 14nm Skylake Desktop */
|
||||
if (idx == PERF_CSTATE_PKG_C2_RES ||
|
||||
idx == PERF_CSTATE_PKG_C3_RES ||
|
||||
idx == PERF_CSTATE_PKG_C6_RES ||
|
||||
idx == PERF_CSTATE_PKG_C7_RES)
|
||||
return true;
|
||||
break;
|
||||
case 55: /* 22nm Atom "Silvermont" */
|
||||
case 77: /* 22nm Atom "Silvermont Avoton/Rangely" */
|
||||
case 76: /* 14nm Atom "Airmont" */
|
||||
if (idx == PERF_CSTATE_CORE_C6_RES)
|
||||
return true;
|
||||
break;
|
||||
case 69: /* 22nm Haswell ULT */
|
||||
if (idx == PERF_CSTATE_PKG_C2_RES ||
|
||||
idx == PERF_CSTATE_PKG_C3_RES ||
|
||||
idx == PERF_CSTATE_PKG_C6_RES ||
|
||||
idx == PERF_CSTATE_PKG_C7_RES ||
|
||||
idx == PERF_CSTATE_PKG_C8_RES ||
|
||||
idx == PERF_CSTATE_PKG_C9_RES ||
|
||||
idx == PERF_CSTATE_PKG_C10_RES)
|
||||
return true;
|
||||
break;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
PMU_EVENT_ATTR_STRING(c2-residency, evattr_cstate_pkg_c2, "event=0x00");
|
||||
PMU_EVENT_ATTR_STRING(c3-residency, evattr_cstate_pkg_c3, "event=0x01");
|
||||
PMU_EVENT_ATTR_STRING(c6-residency, evattr_cstate_pkg_c6, "event=0x02");
|
||||
@ -329,13 +213,13 @@ PMU_EVENT_ATTR_STRING(c9-residency, evattr_cstate_pkg_c9, "event=0x05");
|
||||
PMU_EVENT_ATTR_STRING(c10-residency, evattr_cstate_pkg_c10, "event=0x06");
|
||||
|
||||
static struct perf_cstate_msr pkg_msr[] = {
|
||||
[PERF_CSTATE_PKG_C2_RES] = { MSR_PKG_C2_RESIDENCY, &evattr_cstate_pkg_c2, test_pkg, },
|
||||
[PERF_CSTATE_PKG_C3_RES] = { MSR_PKG_C3_RESIDENCY, &evattr_cstate_pkg_c3, test_pkg, },
|
||||
[PERF_CSTATE_PKG_C6_RES] = { MSR_PKG_C6_RESIDENCY, &evattr_cstate_pkg_c6, test_pkg, },
|
||||
[PERF_CSTATE_PKG_C7_RES] = { MSR_PKG_C7_RESIDENCY, &evattr_cstate_pkg_c7, test_pkg, },
|
||||
[PERF_CSTATE_PKG_C8_RES] = { MSR_PKG_C8_RESIDENCY, &evattr_cstate_pkg_c8, test_pkg, },
|
||||
[PERF_CSTATE_PKG_C9_RES] = { MSR_PKG_C9_RESIDENCY, &evattr_cstate_pkg_c9, test_pkg, },
|
||||
[PERF_CSTATE_PKG_C10_RES] = { MSR_PKG_C10_RESIDENCY, &evattr_cstate_pkg_c10, test_pkg, },
|
||||
[PERF_CSTATE_PKG_C2_RES] = { MSR_PKG_C2_RESIDENCY, &evattr_cstate_pkg_c2 },
|
||||
[PERF_CSTATE_PKG_C3_RES] = { MSR_PKG_C3_RESIDENCY, &evattr_cstate_pkg_c3 },
|
||||
[PERF_CSTATE_PKG_C6_RES] = { MSR_PKG_C6_RESIDENCY, &evattr_cstate_pkg_c6 },
|
||||
[PERF_CSTATE_PKG_C7_RES] = { MSR_PKG_C7_RESIDENCY, &evattr_cstate_pkg_c7 },
|
||||
[PERF_CSTATE_PKG_C8_RES] = { MSR_PKG_C8_RESIDENCY, &evattr_cstate_pkg_c8 },
|
||||
[PERF_CSTATE_PKG_C9_RES] = { MSR_PKG_C9_RESIDENCY, &evattr_cstate_pkg_c9 },
|
||||
[PERF_CSTATE_PKG_C10_RES] = { MSR_PKG_C10_RESIDENCY, &evattr_cstate_pkg_c10 },
|
||||
};
|
||||
|
||||
static struct attribute *pkg_events_attrs[PERF_CSTATE_PKG_EVENT_MAX + 1] = {
|
||||
@ -366,8 +250,6 @@ static const struct attribute_group *pkg_attr_groups[] = {
|
||||
NULL,
|
||||
};
|
||||
|
||||
/* cstate_pkg PMU end*/
|
||||
|
||||
static ssize_t cstate_get_attr_cpumask(struct device *dev,
|
||||
struct device_attribute *attr,
|
||||
char *buf)
|
||||
@ -385,7 +267,7 @@ static ssize_t cstate_get_attr_cpumask(struct device *dev,
|
||||
static int cstate_pmu_event_init(struct perf_event *event)
|
||||
{
|
||||
u64 cfg = event->attr.config;
|
||||
int ret = 0;
|
||||
int cpu;
|
||||
|
||||
if (event->attr.type != event->pmu->type)
|
||||
return -ENOENT;
|
||||
@ -400,26 +282,36 @@ static int cstate_pmu_event_init(struct perf_event *event)
|
||||
event->attr.sample_period) /* no sampling */
|
||||
return -EINVAL;
|
||||
|
||||
if (event->cpu < 0)
|
||||
return -EINVAL;
|
||||
|
||||
if (event->pmu == &cstate_core_pmu) {
|
||||
if (cfg >= PERF_CSTATE_CORE_EVENT_MAX)
|
||||
return -EINVAL;
|
||||
if (!core_msr[cfg].attr)
|
||||
return -EINVAL;
|
||||
event->hw.event_base = core_msr[cfg].msr;
|
||||
cpu = cpumask_any_and(&cstate_core_cpu_mask,
|
||||
topology_sibling_cpumask(event->cpu));
|
||||
} else if (event->pmu == &cstate_pkg_pmu) {
|
||||
if (cfg >= PERF_CSTATE_PKG_EVENT_MAX)
|
||||
return -EINVAL;
|
||||
if (!pkg_msr[cfg].attr)
|
||||
return -EINVAL;
|
||||
event->hw.event_base = pkg_msr[cfg].msr;
|
||||
} else
|
||||
cpu = cpumask_any_and(&cstate_pkg_cpu_mask,
|
||||
topology_core_cpumask(event->cpu));
|
||||
} else {
|
||||
return -ENOENT;
|
||||
}
|
||||
|
||||
/* must be done before validate_group */
|
||||
if (cpu >= nr_cpu_ids)
|
||||
return -ENODEV;
|
||||
|
||||
event->cpu = cpu;
|
||||
event->hw.config = cfg;
|
||||
event->hw.idx = -1;
|
||||
|
||||
return ret;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline u64 cstate_pmu_read_counter(struct perf_event *event)
|
||||
@ -469,172 +361,91 @@ static int cstate_pmu_event_add(struct perf_event *event, int mode)
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Check if exiting cpu is the designated reader. If so migrate the
|
||||
* events when there is a valid target available
|
||||
*/
|
||||
static void cstate_cpu_exit(int cpu)
|
||||
{
|
||||
int i, id, target;
|
||||
unsigned int target;
|
||||
|
||||
/* cpu exit for cstate core */
|
||||
if (has_cstate_core) {
|
||||
id = topology_core_id(cpu);
|
||||
target = -1;
|
||||
if (has_cstate_core &&
|
||||
cpumask_test_and_clear_cpu(cpu, &cstate_core_cpu_mask)) {
|
||||
|
||||
for_each_online_cpu(i) {
|
||||
if (i == cpu)
|
||||
continue;
|
||||
if (id == topology_core_id(i)) {
|
||||
target = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (cpumask_test_and_clear_cpu(cpu, &cstate_core_cpu_mask) && target >= 0)
|
||||
target = cpumask_any_but(topology_sibling_cpumask(cpu), cpu);
|
||||
/* Migrate events if there is a valid target */
|
||||
if (target < nr_cpu_ids) {
|
||||
cpumask_set_cpu(target, &cstate_core_cpu_mask);
|
||||
WARN_ON(cpumask_empty(&cstate_core_cpu_mask));
|
||||
if (target >= 0)
|
||||
perf_pmu_migrate_context(&cstate_core_pmu, cpu, target);
|
||||
}
|
||||
}
|
||||
|
||||
/* cpu exit for cstate pkg */
|
||||
if (has_cstate_pkg) {
|
||||
id = topology_physical_package_id(cpu);
|
||||
target = -1;
|
||||
if (has_cstate_pkg &&
|
||||
cpumask_test_and_clear_cpu(cpu, &cstate_pkg_cpu_mask)) {
|
||||
|
||||
for_each_online_cpu(i) {
|
||||
if (i == cpu)
|
||||
continue;
|
||||
if (id == topology_physical_package_id(i)) {
|
||||
target = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (cpumask_test_and_clear_cpu(cpu, &cstate_pkg_cpu_mask) && target >= 0)
|
||||
target = cpumask_any_but(topology_core_cpumask(cpu), cpu);
|
||||
/* Migrate events if there is a valid target */
|
||||
if (target < nr_cpu_ids) {
|
||||
cpumask_set_cpu(target, &cstate_pkg_cpu_mask);
|
||||
WARN_ON(cpumask_empty(&cstate_pkg_cpu_mask));
|
||||
if (target >= 0)
|
||||
perf_pmu_migrate_context(&cstate_pkg_pmu, cpu, target);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void cstate_cpu_init(int cpu)
|
||||
{
|
||||
int i, id;
|
||||
unsigned int target;
|
||||
|
||||
/* cpu init for cstate core */
|
||||
if (has_cstate_core) {
|
||||
id = topology_core_id(cpu);
|
||||
for_each_cpu(i, &cstate_core_cpu_mask) {
|
||||
if (id == topology_core_id(i))
|
||||
break;
|
||||
}
|
||||
if (i >= nr_cpu_ids)
|
||||
cpumask_set_cpu(cpu, &cstate_core_cpu_mask);
|
||||
}
|
||||
/*
|
||||
* If this is the first online thread of that core, set it in
|
||||
* the core cpu mask as the designated reader.
|
||||
*/
|
||||
target = cpumask_any_and(&cstate_core_cpu_mask,
|
||||
topology_sibling_cpumask(cpu));
|
||||
|
||||
/* cpu init for cstate pkg */
|
||||
if (has_cstate_pkg) {
|
||||
id = topology_physical_package_id(cpu);
|
||||
for_each_cpu(i, &cstate_pkg_cpu_mask) {
|
||||
if (id == topology_physical_package_id(i))
|
||||
break;
|
||||
}
|
||||
if (i >= nr_cpu_ids)
|
||||
cpumask_set_cpu(cpu, &cstate_pkg_cpu_mask);
|
||||
}
|
||||
if (has_cstate_core && target >= nr_cpu_ids)
|
||||
cpumask_set_cpu(cpu, &cstate_core_cpu_mask);
|
||||
|
||||
/*
|
||||
* If this is the first online thread of that package, set it
|
||||
* in the package cpu mask as the designated reader.
|
||||
*/
|
||||
target = cpumask_any_and(&cstate_pkg_cpu_mask,
|
||||
topology_core_cpumask(cpu));
|
||||
if (has_cstate_pkg && target >= nr_cpu_ids)
|
||||
cpumask_set_cpu(cpu, &cstate_pkg_cpu_mask);
|
||||
}
|
||||
|
||||
static int cstate_cpu_notifier(struct notifier_block *self,
|
||||
unsigned long action, void *hcpu)
|
||||
unsigned long action, void *hcpu)
|
||||
{
|
||||
unsigned int cpu = (long)hcpu;
|
||||
|
||||
switch (action & ~CPU_TASKS_FROZEN) {
|
||||
case CPU_UP_PREPARE:
|
||||
break;
|
||||
case CPU_STARTING:
|
||||
cstate_cpu_init(cpu);
|
||||
break;
|
||||
case CPU_UP_CANCELED:
|
||||
case CPU_DYING:
|
||||
break;
|
||||
case CPU_ONLINE:
|
||||
case CPU_DEAD:
|
||||
break;
|
||||
case CPU_DOWN_PREPARE:
|
||||
cstate_cpu_exit(cpu);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
return NOTIFY_OK;
|
||||
}
|
||||
|
||||
/*
|
||||
* Probe the cstate events and insert the available one into sysfs attrs
|
||||
* Return false if there is no available events.
|
||||
*/
|
||||
static bool cstate_probe_msr(struct perf_cstate_msr *msr,
|
||||
struct attribute **events_attrs,
|
||||
int max_event_nr)
|
||||
{
|
||||
int i, j = 0;
|
||||
u64 val;
|
||||
|
||||
/* Probe the cstate events. */
|
||||
for (i = 0; i < max_event_nr; i++) {
|
||||
if (!msr[i].test(i) || rdmsrl_safe(msr[i].msr, &val))
|
||||
msr[i].attr = NULL;
|
||||
}
|
||||
|
||||
/* List remaining events in the sysfs attrs. */
|
||||
for (i = 0; i < max_event_nr; i++) {
|
||||
if (msr[i].attr)
|
||||
events_attrs[j++] = &msr[i].attr->attr.attr;
|
||||
}
|
||||
events_attrs[j] = NULL;
|
||||
|
||||
return (j > 0) ? true : false;
|
||||
}
|
||||
|
||||
static int __init cstate_init(void)
|
||||
{
|
||||
/* SLM has different MSR for PKG C6 */
|
||||
switch (boot_cpu_data.x86_model) {
|
||||
case 55:
|
||||
case 76:
|
||||
case 77:
|
||||
pkg_msr[PERF_CSTATE_PKG_C6_RES].msr = MSR_PKG_C7_RESIDENCY;
|
||||
}
|
||||
|
||||
if (cstate_probe_msr(core_msr, core_events_attrs, PERF_CSTATE_CORE_EVENT_MAX))
|
||||
has_cstate_core = true;
|
||||
|
||||
if (cstate_probe_msr(pkg_msr, pkg_events_attrs, PERF_CSTATE_PKG_EVENT_MAX))
|
||||
has_cstate_pkg = true;
|
||||
|
||||
return (has_cstate_core || has_cstate_pkg) ? 0 : -ENODEV;
|
||||
}
|
||||
|
||||
static void __init cstate_cpumask_init(void)
|
||||
{
|
||||
int cpu;
|
||||
|
||||
cpu_notifier_register_begin();
|
||||
|
||||
for_each_online_cpu(cpu)
|
||||
cstate_cpu_init(cpu);
|
||||
|
||||
__perf_cpu_notifier(cstate_cpu_notifier);
|
||||
|
||||
cpu_notifier_register_done();
|
||||
}
|
||||
static struct notifier_block cstate_cpu_nb = {
|
||||
.notifier_call = cstate_cpu_notifier,
|
||||
.priority = CPU_PRI_PERF + 1,
|
||||
};
|
||||
|
||||
static struct pmu cstate_core_pmu = {
|
||||
.attr_groups = core_attr_groups,
|
||||
.name = "cstate_core",
|
||||
.task_ctx_nr = perf_invalid_context,
|
||||
.event_init = cstate_pmu_event_init,
|
||||
.add = cstate_pmu_event_add, /* must have */
|
||||
.del = cstate_pmu_event_del, /* must have */
|
||||
.add = cstate_pmu_event_add,
|
||||
.del = cstate_pmu_event_del,
|
||||
.start = cstate_pmu_event_start,
|
||||
.stop = cstate_pmu_event_stop,
|
||||
.read = cstate_pmu_event_update,
|
||||
@ -646,49 +457,203 @@ static struct pmu cstate_pkg_pmu = {
|
||||
.name = "cstate_pkg",
|
||||
.task_ctx_nr = perf_invalid_context,
|
||||
.event_init = cstate_pmu_event_init,
|
||||
.add = cstate_pmu_event_add, /* must have */
|
||||
.del = cstate_pmu_event_del, /* must have */
|
||||
.add = cstate_pmu_event_add,
|
||||
.del = cstate_pmu_event_del,
|
||||
.start = cstate_pmu_event_start,
|
||||
.stop = cstate_pmu_event_stop,
|
||||
.read = cstate_pmu_event_update,
|
||||
.capabilities = PERF_PMU_CAP_NO_INTERRUPT,
|
||||
};
|
||||
|
||||
static void __init cstate_pmus_register(void)
|
||||
static const struct cstate_model nhm_cstates __initconst = {
|
||||
.core_events = BIT(PERF_CSTATE_CORE_C3_RES) |
|
||||
BIT(PERF_CSTATE_CORE_C6_RES),
|
||||
|
||||
.pkg_events = BIT(PERF_CSTATE_PKG_C3_RES) |
|
||||
BIT(PERF_CSTATE_PKG_C6_RES) |
|
||||
BIT(PERF_CSTATE_PKG_C7_RES),
|
||||
};
|
||||
|
||||
static const struct cstate_model snb_cstates __initconst = {
|
||||
.core_events = BIT(PERF_CSTATE_CORE_C3_RES) |
|
||||
BIT(PERF_CSTATE_CORE_C6_RES) |
|
||||
BIT(PERF_CSTATE_CORE_C7_RES),
|
||||
|
||||
.pkg_events = BIT(PERF_CSTATE_PKG_C2_RES) |
|
||||
BIT(PERF_CSTATE_PKG_C3_RES) |
|
||||
BIT(PERF_CSTATE_PKG_C6_RES) |
|
||||
BIT(PERF_CSTATE_PKG_C7_RES),
|
||||
};
|
||||
|
||||
static const struct cstate_model hswult_cstates __initconst = {
|
||||
.core_events = BIT(PERF_CSTATE_CORE_C3_RES) |
|
||||
BIT(PERF_CSTATE_CORE_C6_RES) |
|
||||
BIT(PERF_CSTATE_CORE_C7_RES),
|
||||
|
||||
.pkg_events = BIT(PERF_CSTATE_PKG_C2_RES) |
|
||||
BIT(PERF_CSTATE_PKG_C3_RES) |
|
||||
BIT(PERF_CSTATE_PKG_C6_RES) |
|
||||
BIT(PERF_CSTATE_PKG_C7_RES) |
|
||||
BIT(PERF_CSTATE_PKG_C8_RES) |
|
||||
BIT(PERF_CSTATE_PKG_C9_RES) |
|
||||
BIT(PERF_CSTATE_PKG_C10_RES),
|
||||
};
|
||||
|
||||
static const struct cstate_model slm_cstates __initconst = {
|
||||
.core_events = BIT(PERF_CSTATE_CORE_C1_RES) |
|
||||
BIT(PERF_CSTATE_CORE_C6_RES),
|
||||
|
||||
.pkg_events = BIT(PERF_CSTATE_PKG_C6_RES),
|
||||
.quirks = SLM_PKG_C6_USE_C7_MSR,
|
||||
};
|
||||
|
||||
#define X86_CSTATES_MODEL(model, states) \
|
||||
{ X86_VENDOR_INTEL, 6, model, X86_FEATURE_ANY, (unsigned long) &(states) }
|
||||
|
||||
static const struct x86_cpu_id intel_cstates_match[] __initconst = {
|
||||
X86_CSTATES_MODEL(30, nhm_cstates), /* 45nm Nehalem */
|
||||
X86_CSTATES_MODEL(26, nhm_cstates), /* 45nm Nehalem-EP */
|
||||
X86_CSTATES_MODEL(46, nhm_cstates), /* 45nm Nehalem-EX */
|
||||
|
||||
X86_CSTATES_MODEL(37, nhm_cstates), /* 32nm Westmere */
|
||||
X86_CSTATES_MODEL(44, nhm_cstates), /* 32nm Westmere-EP */
|
||||
X86_CSTATES_MODEL(47, nhm_cstates), /* 32nm Westmere-EX */
|
||||
|
||||
X86_CSTATES_MODEL(42, snb_cstates), /* 32nm SandyBridge */
|
||||
X86_CSTATES_MODEL(45, snb_cstates), /* 32nm SandyBridge-E/EN/EP */
|
||||
|
||||
X86_CSTATES_MODEL(58, snb_cstates), /* 22nm IvyBridge */
|
||||
X86_CSTATES_MODEL(62, snb_cstates), /* 22nm IvyBridge-EP/EX */
|
||||
|
||||
X86_CSTATES_MODEL(60, snb_cstates), /* 22nm Haswell Core */
|
||||
X86_CSTATES_MODEL(63, snb_cstates), /* 22nm Haswell Server */
|
||||
X86_CSTATES_MODEL(70, snb_cstates), /* 22nm Haswell + GT3e */
|
||||
|
||||
X86_CSTATES_MODEL(69, hswult_cstates), /* 22nm Haswell ULT */
|
||||
|
||||
X86_CSTATES_MODEL(55, slm_cstates), /* 22nm Atom Silvermont */
|
||||
X86_CSTATES_MODEL(77, slm_cstates), /* 22nm Atom Avoton/Rangely */
|
||||
X86_CSTATES_MODEL(76, slm_cstates), /* 22nm Atom Airmont */
|
||||
|
||||
X86_CSTATES_MODEL(61, snb_cstates), /* 14nm Broadwell Core-M */
|
||||
X86_CSTATES_MODEL(86, snb_cstates), /* 14nm Broadwell Xeon D */
|
||||
X86_CSTATES_MODEL(71, snb_cstates), /* 14nm Broadwell + GT3e */
|
||||
X86_CSTATES_MODEL(79, snb_cstates), /* 14nm Broadwell Server */
|
||||
|
||||
X86_CSTATES_MODEL(78, snb_cstates), /* 14nm Skylake Mobile */
|
||||
X86_CSTATES_MODEL(94, snb_cstates), /* 14nm Skylake Desktop */
|
||||
{ },
|
||||
};
|
||||
MODULE_DEVICE_TABLE(x86cpu, intel_cstates_match);
|
||||
|
||||
/*
|
||||
* Probe the cstate events and insert the available one into sysfs attrs
|
||||
* Return false if there are no available events.
|
||||
*/
|
||||
static bool __init cstate_probe_msr(const unsigned long evmsk, int max,
|
||||
struct perf_cstate_msr *msr,
|
||||
struct attribute **attrs)
|
||||
{
|
||||
int err;
|
||||
bool found = false;
|
||||
unsigned int bit;
|
||||
u64 val;
|
||||
|
||||
for (bit = 0; bit < max; bit++) {
|
||||
if (test_bit(bit, &evmsk) && !rdmsrl_safe(msr[bit].msr, &val)) {
|
||||
*attrs++ = &msr[bit].attr->attr.attr;
|
||||
found = true;
|
||||
} else {
|
||||
msr[bit].attr = NULL;
|
||||
}
|
||||
}
|
||||
*attrs = NULL;
|
||||
|
||||
return found;
|
||||
}
|
||||
|
||||
static int __init cstate_probe(const struct cstate_model *cm)
|
||||
{
|
||||
/* SLM has different MSR for PKG C6 */
|
||||
if (cm->quirks & SLM_PKG_C6_USE_C7_MSR)
|
||||
pkg_msr[PERF_CSTATE_PKG_C6_RES].msr = MSR_PKG_C7_RESIDENCY;
|
||||
|
||||
has_cstate_core = cstate_probe_msr(cm->core_events,
|
||||
PERF_CSTATE_CORE_EVENT_MAX,
|
||||
core_msr, core_events_attrs);
|
||||
|
||||
has_cstate_pkg = cstate_probe_msr(cm->pkg_events,
|
||||
PERF_CSTATE_PKG_EVENT_MAX,
|
||||
pkg_msr, pkg_events_attrs);
|
||||
|
||||
return (has_cstate_core || has_cstate_pkg) ? 0 : -ENODEV;
|
||||
}
|
||||
|
||||
static inline void cstate_cleanup(void)
|
||||
{
|
||||
if (has_cstate_core)
|
||||
perf_pmu_unregister(&cstate_core_pmu);
|
||||
|
||||
if (has_cstate_pkg)
|
||||
perf_pmu_unregister(&cstate_pkg_pmu);
|
||||
}
|
||||
|
||||
static int __init cstate_init(void)
|
||||
{
|
||||
int cpu, err;
|
||||
|
||||
cpu_notifier_register_begin();
|
||||
for_each_online_cpu(cpu)
|
||||
cstate_cpu_init(cpu);
|
||||
|
||||
if (has_cstate_core) {
|
||||
err = perf_pmu_register(&cstate_core_pmu, cstate_core_pmu.name, -1);
|
||||
if (WARN_ON(err))
|
||||
pr_info("Failed to register PMU %s error %d\n",
|
||||
cstate_core_pmu.name, err);
|
||||
if (err) {
|
||||
has_cstate_core = false;
|
||||
pr_info("Failed to register cstate core pmu\n");
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
||||
if (has_cstate_pkg) {
|
||||
err = perf_pmu_register(&cstate_pkg_pmu, cstate_pkg_pmu.name, -1);
|
||||
if (WARN_ON(err))
|
||||
pr_info("Failed to register PMU %s error %d\n",
|
||||
cstate_pkg_pmu.name, err);
|
||||
if (err) {
|
||||
has_cstate_pkg = false;
|
||||
pr_info("Failed to register cstate pkg pmu\n");
|
||||
cstate_cleanup();
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
__register_cpu_notifier(&cstate_cpu_nb);
|
||||
out:
|
||||
cpu_notifier_register_done();
|
||||
return err;
|
||||
}
|
||||
|
||||
static int __init cstate_pmu_init(void)
|
||||
{
|
||||
const struct x86_cpu_id *id;
|
||||
int err;
|
||||
|
||||
if (cpu_has_hypervisor)
|
||||
if (boot_cpu_has(X86_FEATURE_HYPERVISOR))
|
||||
return -ENODEV;
|
||||
|
||||
err = cstate_init();
|
||||
id = x86_match_cpu(intel_cstates_match);
|
||||
if (!id)
|
||||
return -ENODEV;
|
||||
|
||||
err = cstate_probe((const struct cstate_model *) id->driver_data);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
cstate_cpumask_init();
|
||||
|
||||
cstate_pmus_register();
|
||||
|
||||
return 0;
|
||||
return cstate_init();
|
||||
}
|
||||
module_init(cstate_pmu_init);
|
||||
|
||||
device_initcall(cstate_pmu_init);
|
||||
static void __exit cstate_pmu_exit(void)
|
||||
{
|
||||
cpu_notifier_register_begin();
|
||||
__unregister_cpu_notifier(&cstate_cpu_nb);
|
||||
cstate_cleanup();
|
||||
cpu_notifier_register_done();
|
||||
}
|
||||
module_exit(cstate_pmu_exit);
|
||||
|
@ -645,6 +645,12 @@ struct event_constraint intel_slm_pebs_event_constraints[] = {
|
||||
EVENT_CONSTRAINT_END
|
||||
};
|
||||
|
||||
struct event_constraint intel_glm_pebs_event_constraints[] = {
|
||||
/* Allow all events as PEBS with no flags */
|
||||
INTEL_ALL_EVENT_CONSTRAINT(0, 0x1),
|
||||
EVENT_CONSTRAINT_END
|
||||
};
|
||||
|
||||
struct event_constraint intel_nehalem_pebs_event_constraints[] = {
|
||||
INTEL_PLD_CONSTRAINT(0x100b, 0xf), /* MEM_INST_RETIRED.* */
|
||||
INTEL_FLAGS_EVENT_CONSTRAINT(0x0f, 0xf), /* MEM_UNCORE_RETIRED.* */
|
||||
|
@ -14,7 +14,8 @@ enum {
|
||||
LBR_FORMAT_EIP_FLAGS = 0x03,
|
||||
LBR_FORMAT_EIP_FLAGS2 = 0x04,
|
||||
LBR_FORMAT_INFO = 0x05,
|
||||
LBR_FORMAT_MAX_KNOWN = LBR_FORMAT_INFO,
|
||||
LBR_FORMAT_TIME = 0x06,
|
||||
LBR_FORMAT_MAX_KNOWN = LBR_FORMAT_TIME,
|
||||
};
|
||||
|
||||
static enum {
|
||||
@ -464,6 +465,16 @@ static void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc)
|
||||
abort = !!(info & LBR_INFO_ABORT);
|
||||
cycles = (info & LBR_INFO_CYCLES);
|
||||
}
|
||||
|
||||
if (lbr_format == LBR_FORMAT_TIME) {
|
||||
mis = !!(from & LBR_FROM_FLAG_MISPRED);
|
||||
pred = !mis;
|
||||
skip = 1;
|
||||
cycles = ((to >> 48) & LBR_INFO_CYCLES);
|
||||
|
||||
to = (u64)((((s64)to) << 16) >> 16);
|
||||
}
|
||||
|
||||
if (lbr_flags & LBR_EIP_FLAGS) {
|
||||
mis = !!(from & LBR_FROM_FLAG_MISPRED);
|
||||
pred = !mis;
|
||||
@ -1049,6 +1060,24 @@ void __init intel_pmu_lbr_init_atom(void)
|
||||
pr_cont("8-deep LBR, ");
|
||||
}
|
||||
|
||||
/* slm */
|
||||
void __init intel_pmu_lbr_init_slm(void)
|
||||
{
|
||||
x86_pmu.lbr_nr = 8;
|
||||
x86_pmu.lbr_tos = MSR_LBR_TOS;
|
||||
x86_pmu.lbr_from = MSR_LBR_CORE_FROM;
|
||||
x86_pmu.lbr_to = MSR_LBR_CORE_TO;
|
||||
|
||||
x86_pmu.lbr_sel_mask = LBR_SEL_MASK;
|
||||
x86_pmu.lbr_sel_map = nhm_lbr_sel_map;
|
||||
|
||||
/*
|
||||
* SW branch filter usage:
|
||||
* - compensate for lack of HW filter
|
||||
*/
|
||||
pr_cont("8-deep LBR, ");
|
||||
}
|
||||
|
||||
/* Knights Landing */
|
||||
void intel_pmu_lbr_init_knl(void)
|
||||
{
|
||||
|
@ -67,11 +67,13 @@ static struct pt_cap_desc {
|
||||
PT_CAP(max_subleaf, 0, CR_EAX, 0xffffffff),
|
||||
PT_CAP(cr3_filtering, 0, CR_EBX, BIT(0)),
|
||||
PT_CAP(psb_cyc, 0, CR_EBX, BIT(1)),
|
||||
PT_CAP(ip_filtering, 0, CR_EBX, BIT(2)),
|
||||
PT_CAP(mtc, 0, CR_EBX, BIT(3)),
|
||||
PT_CAP(topa_output, 0, CR_ECX, BIT(0)),
|
||||
PT_CAP(topa_multiple_entries, 0, CR_ECX, BIT(1)),
|
||||
PT_CAP(single_range_output, 0, CR_ECX, BIT(2)),
|
||||
PT_CAP(payloads_lip, 0, CR_ECX, BIT(31)),
|
||||
PT_CAP(num_address_ranges, 1, CR_EAX, 0x3),
|
||||
PT_CAP(mtc_periods, 1, CR_EAX, 0xffff0000),
|
||||
PT_CAP(cycle_thresholds, 1, CR_EBX, 0xffff),
|
||||
PT_CAP(psb_periods, 1, CR_EBX, 0xffff0000),
|
||||
@ -125,9 +127,46 @@ static struct attribute_group pt_format_group = {
|
||||
.attrs = pt_formats_attr,
|
||||
};
|
||||
|
||||
static ssize_t
|
||||
pt_timing_attr_show(struct device *dev, struct device_attribute *attr,
|
||||
char *page)
|
||||
{
|
||||
struct perf_pmu_events_attr *pmu_attr =
|
||||
container_of(attr, struct perf_pmu_events_attr, attr);
|
||||
|
||||
switch (pmu_attr->id) {
|
||||
case 0:
|
||||
return sprintf(page, "%lu\n", pt_pmu.max_nonturbo_ratio);
|
||||
case 1:
|
||||
return sprintf(page, "%u:%u\n",
|
||||
pt_pmu.tsc_art_num,
|
||||
pt_pmu.tsc_art_den);
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
PMU_EVENT_ATTR(max_nonturbo_ratio, timing_attr_max_nonturbo_ratio, 0,
|
||||
pt_timing_attr_show);
|
||||
PMU_EVENT_ATTR(tsc_art_ratio, timing_attr_tsc_art_ratio, 1,
|
||||
pt_timing_attr_show);
|
||||
|
||||
static struct attribute *pt_timing_attr[] = {
|
||||
&timing_attr_max_nonturbo_ratio.attr.attr,
|
||||
&timing_attr_tsc_art_ratio.attr.attr,
|
||||
NULL,
|
||||
};
|
||||
|
||||
static struct attribute_group pt_timing_group = {
|
||||
.attrs = pt_timing_attr,
|
||||
};
|
||||
|
||||
static const struct attribute_group *pt_attr_groups[] = {
|
||||
&pt_cap_group,
|
||||
&pt_format_group,
|
||||
&pt_timing_group,
|
||||
NULL,
|
||||
};
|
||||
|
||||
@ -140,6 +179,23 @@ static int __init pt_pmu_hw_init(void)
|
||||
int ret;
|
||||
long i;
|
||||
|
||||
rdmsrl(MSR_PLATFORM_INFO, reg);
|
||||
pt_pmu.max_nonturbo_ratio = (reg & 0xff00) >> 8;
|
||||
|
||||
/*
|
||||
* if available, read in TSC to core crystal clock ratio,
|
||||
* otherwise, zero for numerator stands for "not enumerated"
|
||||
* as per SDM
|
||||
*/
|
||||
if (boot_cpu_data.cpuid_level >= CPUID_TSC_LEAF) {
|
||||
u32 eax, ebx, ecx, edx;
|
||||
|
||||
cpuid(CPUID_TSC_LEAF, &eax, &ebx, &ecx, &edx);
|
||||
|
||||
pt_pmu.tsc_art_num = ebx;
|
||||
pt_pmu.tsc_art_den = eax;
|
||||
}
|
||||
|
||||
if (boot_cpu_has(X86_FEATURE_VMX)) {
|
||||
/*
|
||||
* Intel SDM, 36.5 "Tracing post-VMXON" says that
|
||||
@ -263,6 +319,75 @@ static bool pt_event_valid(struct perf_event *event)
|
||||
* These all are cpu affine and operate on a local PT
|
||||
*/
|
||||
|
||||
/* Address ranges and their corresponding msr configuration registers */
|
||||
static const struct pt_address_range {
|
||||
unsigned long msr_a;
|
||||
unsigned long msr_b;
|
||||
unsigned int reg_off;
|
||||
} pt_address_ranges[] = {
|
||||
{
|
||||
.msr_a = MSR_IA32_RTIT_ADDR0_A,
|
||||
.msr_b = MSR_IA32_RTIT_ADDR0_B,
|
||||
.reg_off = RTIT_CTL_ADDR0_OFFSET,
|
||||
},
|
||||
{
|
||||
.msr_a = MSR_IA32_RTIT_ADDR1_A,
|
||||
.msr_b = MSR_IA32_RTIT_ADDR1_B,
|
||||
.reg_off = RTIT_CTL_ADDR1_OFFSET,
|
||||
},
|
||||
{
|
||||
.msr_a = MSR_IA32_RTIT_ADDR2_A,
|
||||
.msr_b = MSR_IA32_RTIT_ADDR2_B,
|
||||
.reg_off = RTIT_CTL_ADDR2_OFFSET,
|
||||
},
|
||||
{
|
||||
.msr_a = MSR_IA32_RTIT_ADDR3_A,
|
||||
.msr_b = MSR_IA32_RTIT_ADDR3_B,
|
||||
.reg_off = RTIT_CTL_ADDR3_OFFSET,
|
||||
}
|
||||
};
|
||||
|
||||
static u64 pt_config_filters(struct perf_event *event)
|
||||
{
|
||||
struct pt_filters *filters = event->hw.addr_filters;
|
||||
struct pt *pt = this_cpu_ptr(&pt_ctx);
|
||||
unsigned int range = 0;
|
||||
u64 rtit_ctl = 0;
|
||||
|
||||
if (!filters)
|
||||
return 0;
|
||||
|
||||
perf_event_addr_filters_sync(event);
|
||||
|
||||
for (range = 0; range < filters->nr_filters; range++) {
|
||||
struct pt_filter *filter = &filters->filter[range];
|
||||
|
||||
/*
|
||||
* Note, if the range has zero start/end addresses due
|
||||
* to its dynamic object not being loaded yet, we just
|
||||
* go ahead and program zeroed range, which will simply
|
||||
* produce no data. Note^2: if executable code at 0x0
|
||||
* is a concern, we can set up an "invalid" configuration
|
||||
* such as msr_b < msr_a.
|
||||
*/
|
||||
|
||||
/* avoid redundant msr writes */
|
||||
if (pt->filters.filter[range].msr_a != filter->msr_a) {
|
||||
wrmsrl(pt_address_ranges[range].msr_a, filter->msr_a);
|
||||
pt->filters.filter[range].msr_a = filter->msr_a;
|
||||
}
|
||||
|
||||
if (pt->filters.filter[range].msr_b != filter->msr_b) {
|
||||
wrmsrl(pt_address_ranges[range].msr_b, filter->msr_b);
|
||||
pt->filters.filter[range].msr_b = filter->msr_b;
|
||||
}
|
||||
|
||||
rtit_ctl |= filter->config << pt_address_ranges[range].reg_off;
|
||||
}
|
||||
|
||||
return rtit_ctl;
|
||||
}
|
||||
|
||||
static void pt_config(struct perf_event *event)
|
||||
{
|
||||
u64 reg;
|
||||
@ -272,7 +397,8 @@ static void pt_config(struct perf_event *event)
|
||||
wrmsrl(MSR_IA32_RTIT_STATUS, 0);
|
||||
}
|
||||
|
||||
reg = RTIT_CTL_TOPA | RTIT_CTL_BRANCH_EN | RTIT_CTL_TRACEEN;
|
||||
reg = pt_config_filters(event);
|
||||
reg |= RTIT_CTL_TOPA | RTIT_CTL_BRANCH_EN | RTIT_CTL_TRACEEN;
|
||||
|
||||
if (!event->attr.exclude_kernel)
|
||||
reg |= RTIT_CTL_OS;
|
||||
@ -921,24 +1047,80 @@ static void pt_buffer_free_aux(void *data)
|
||||
kfree(buf);
|
||||
}
|
||||
|
||||
/**
|
||||
* pt_buffer_is_full() - check if the buffer is full
|
||||
* @buf: PT buffer.
|
||||
* @pt: Per-cpu pt handle.
|
||||
*
|
||||
* If the user hasn't read data from the output region that aux_head
|
||||
* points to, the buffer is considered full: the user needs to read at
|
||||
* least this region and update aux_tail to point past it.
|
||||
*/
|
||||
static bool pt_buffer_is_full(struct pt_buffer *buf, struct pt *pt)
|
||||
static int pt_addr_filters_init(struct perf_event *event)
|
||||
{
|
||||
if (buf->snapshot)
|
||||
return false;
|
||||
struct pt_filters *filters;
|
||||
int node = event->cpu == -1 ? -1 : cpu_to_node(event->cpu);
|
||||
|
||||
if (local_read(&buf->data_size) >= pt->handle.size)
|
||||
return true;
|
||||
if (!pt_cap_get(PT_CAP_num_address_ranges))
|
||||
return 0;
|
||||
|
||||
return false;
|
||||
filters = kzalloc_node(sizeof(struct pt_filters), GFP_KERNEL, node);
|
||||
if (!filters)
|
||||
return -ENOMEM;
|
||||
|
||||
if (event->parent)
|
||||
memcpy(filters, event->parent->hw.addr_filters,
|
||||
sizeof(*filters));
|
||||
|
||||
event->hw.addr_filters = filters;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void pt_addr_filters_fini(struct perf_event *event)
|
||||
{
|
||||
kfree(event->hw.addr_filters);
|
||||
event->hw.addr_filters = NULL;
|
||||
}
|
||||
|
||||
static int pt_event_addr_filters_validate(struct list_head *filters)
|
||||
{
|
||||
struct perf_addr_filter *filter;
|
||||
int range = 0;
|
||||
|
||||
list_for_each_entry(filter, filters, entry) {
|
||||
/* PT doesn't support single address triggers */
|
||||
if (!filter->range)
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
if (!filter->inode && !kernel_ip(filter->offset))
|
||||
return -EINVAL;
|
||||
|
||||
if (++range > pt_cap_get(PT_CAP_num_address_ranges))
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void pt_event_addr_filters_sync(struct perf_event *event)
|
||||
{
|
||||
struct perf_addr_filters_head *head = perf_event_addr_filters(event);
|
||||
unsigned long msr_a, msr_b, *offs = event->addr_filters_offs;
|
||||
struct pt_filters *filters = event->hw.addr_filters;
|
||||
struct perf_addr_filter *filter;
|
||||
int range = 0;
|
||||
|
||||
if (!filters)
|
||||
return;
|
||||
|
||||
list_for_each_entry(filter, &head->list, entry) {
|
||||
if (filter->inode && !offs[range]) {
|
||||
msr_a = msr_b = 0;
|
||||
} else {
|
||||
/* apply the offset */
|
||||
msr_a = filter->offset + offs[range];
|
||||
msr_b = filter->size + msr_a;
|
||||
}
|
||||
|
||||
filters->filter[range].msr_a = msr_a;
|
||||
filters->filter[range].msr_b = msr_b;
|
||||
filters->filter[range].config = filter->filter ? 1 : 2;
|
||||
range++;
|
||||
}
|
||||
|
||||
filters->nr_filters = range;
|
||||
}
|
||||
|
||||
/**
|
||||
@ -955,7 +1137,7 @@ void intel_pt_interrupt(void)
|
||||
* after PT has been disabled by pt_event_stop(). Make sure we don't
|
||||
* do anything (particularly, re-enable) for this event here.
|
||||
*/
|
||||
if (!ACCESS_ONCE(pt->handle_nmi))
|
||||
if (!READ_ONCE(pt->handle_nmi))
|
||||
return;
|
||||
|
||||
/*
|
||||
@ -1040,23 +1222,36 @@ EXPORT_SYMBOL_GPL(intel_pt_handle_vmx);
|
||||
|
||||
static void pt_event_start(struct perf_event *event, int mode)
|
||||
{
|
||||
struct hw_perf_event *hwc = &event->hw;
|
||||
struct pt *pt = this_cpu_ptr(&pt_ctx);
|
||||
struct pt_buffer *buf = perf_get_aux(&pt->handle);
|
||||
struct pt_buffer *buf;
|
||||
|
||||
if (READ_ONCE(pt->vmx_on))
|
||||
return;
|
||||
|
||||
if (!buf || pt_buffer_is_full(buf, pt)) {
|
||||
event->hw.state = PERF_HES_STOPPED;
|
||||
return;
|
||||
buf = perf_aux_output_begin(&pt->handle, event);
|
||||
if (!buf)
|
||||
goto fail_stop;
|
||||
|
||||
pt_buffer_reset_offsets(buf, pt->handle.head);
|
||||
if (!buf->snapshot) {
|
||||
if (pt_buffer_reset_markers(buf, &pt->handle))
|
||||
goto fail_end_stop;
|
||||
}
|
||||
|
||||
ACCESS_ONCE(pt->handle_nmi) = 1;
|
||||
event->hw.state = 0;
|
||||
WRITE_ONCE(pt->handle_nmi, 1);
|
||||
hwc->state = 0;
|
||||
|
||||
pt_config_buffer(buf->cur->table, buf->cur_idx,
|
||||
buf->output_off);
|
||||
pt_config(event);
|
||||
|
||||
return;
|
||||
|
||||
fail_end_stop:
|
||||
perf_aux_output_end(&pt->handle, 0, true);
|
||||
fail_stop:
|
||||
hwc->state = PERF_HES_STOPPED;
|
||||
}
|
||||
|
||||
static void pt_event_stop(struct perf_event *event, int mode)
|
||||
@ -1067,7 +1262,7 @@ static void pt_event_stop(struct perf_event *event, int mode)
|
||||
* Protect against the PMI racing with disabling wrmsr,
|
||||
* see comment in intel_pt_interrupt().
|
||||
*/
|
||||
ACCESS_ONCE(pt->handle_nmi) = 0;
|
||||
WRITE_ONCE(pt->handle_nmi, 0);
|
||||
|
||||
pt_config_stop(event);
|
||||
|
||||
@ -1090,19 +1285,7 @@ static void pt_event_stop(struct perf_event *event, int mode)
|
||||
pt_handle_status(pt);
|
||||
|
||||
pt_update_head(pt);
|
||||
}
|
||||
}
|
||||
|
||||
static void pt_event_del(struct perf_event *event, int mode)
|
||||
{
|
||||
struct pt *pt = this_cpu_ptr(&pt_ctx);
|
||||
struct pt_buffer *buf;
|
||||
|
||||
pt_event_stop(event, PERF_EF_UPDATE);
|
||||
|
||||
buf = perf_get_aux(&pt->handle);
|
||||
|
||||
if (buf) {
|
||||
if (buf->snapshot)
|
||||
pt->handle.head =
|
||||
local_xchg(&buf->data_size,
|
||||
@ -1112,9 +1295,13 @@ static void pt_event_del(struct perf_event *event, int mode)
|
||||
}
|
||||
}
|
||||
|
||||
static void pt_event_del(struct perf_event *event, int mode)
|
||||
{
|
||||
pt_event_stop(event, PERF_EF_UPDATE);
|
||||
}
|
||||
|
||||
static int pt_event_add(struct perf_event *event, int mode)
|
||||
{
|
||||
struct pt_buffer *buf;
|
||||
struct pt *pt = this_cpu_ptr(&pt_ctx);
|
||||
struct hw_perf_event *hwc = &event->hw;
|
||||
int ret = -EBUSY;
|
||||
@ -1122,34 +1309,18 @@ static int pt_event_add(struct perf_event *event, int mode)
|
||||
if (pt->handle.event)
|
||||
goto fail;
|
||||
|
||||
buf = perf_aux_output_begin(&pt->handle, event);
|
||||
ret = -EINVAL;
|
||||
if (!buf)
|
||||
goto fail_stop;
|
||||
|
||||
pt_buffer_reset_offsets(buf, pt->handle.head);
|
||||
if (!buf->snapshot) {
|
||||
ret = pt_buffer_reset_markers(buf, &pt->handle);
|
||||
if (ret)
|
||||
goto fail_end_stop;
|
||||
}
|
||||
|
||||
if (mode & PERF_EF_START) {
|
||||
pt_event_start(event, 0);
|
||||
ret = -EBUSY;
|
||||
ret = -EINVAL;
|
||||
if (hwc->state == PERF_HES_STOPPED)
|
||||
goto fail_end_stop;
|
||||
goto fail;
|
||||
} else {
|
||||
hwc->state = PERF_HES_STOPPED;
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
||||
fail_end_stop:
|
||||
perf_aux_output_end(&pt->handle, 0, true);
|
||||
fail_stop:
|
||||
hwc->state = PERF_HES_STOPPED;
|
||||
ret = 0;
|
||||
fail:
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -1159,6 +1330,7 @@ static void pt_event_read(struct perf_event *event)
|
||||
|
||||
static void pt_event_destroy(struct perf_event *event)
|
||||
{
|
||||
pt_addr_filters_fini(event);
|
||||
x86_del_exclusive(x86_lbr_exclusive_pt);
|
||||
}
|
||||
|
||||
@ -1173,6 +1345,11 @@ static int pt_event_init(struct perf_event *event)
|
||||
if (x86_add_exclusive(x86_lbr_exclusive_pt))
|
||||
return -EBUSY;
|
||||
|
||||
if (pt_addr_filters_init(event)) {
|
||||
x86_del_exclusive(x86_lbr_exclusive_pt);
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
event->destroy = pt_event_destroy;
|
||||
|
||||
return 0;
|
||||
@ -1192,7 +1369,7 @@ static __init int pt_init(void)
|
||||
|
||||
BUILD_BUG_ON(sizeof(struct topa) > PAGE_SIZE);
|
||||
|
||||
if (!test_cpu_cap(&boot_cpu_data, X86_FEATURE_INTEL_PT))
|
||||
if (!boot_cpu_has(X86_FEATURE_INTEL_PT))
|
||||
return -ENODEV;
|
||||
|
||||
get_online_cpus();
|
||||
@ -1226,16 +1403,21 @@ static __init int pt_init(void)
|
||||
PERF_PMU_CAP_AUX_NO_SG | PERF_PMU_CAP_AUX_SW_DOUBLEBUF;
|
||||
|
||||
pt_pmu.pmu.capabilities |= PERF_PMU_CAP_EXCLUSIVE | PERF_PMU_CAP_ITRACE;
|
||||
pt_pmu.pmu.attr_groups = pt_attr_groups;
|
||||
pt_pmu.pmu.task_ctx_nr = perf_sw_context;
|
||||
pt_pmu.pmu.event_init = pt_event_init;
|
||||
pt_pmu.pmu.add = pt_event_add;
|
||||
pt_pmu.pmu.del = pt_event_del;
|
||||
pt_pmu.pmu.start = pt_event_start;
|
||||
pt_pmu.pmu.stop = pt_event_stop;
|
||||
pt_pmu.pmu.read = pt_event_read;
|
||||
pt_pmu.pmu.setup_aux = pt_buffer_setup_aux;
|
||||
pt_pmu.pmu.free_aux = pt_buffer_free_aux;
|
||||
pt_pmu.pmu.attr_groups = pt_attr_groups;
|
||||
pt_pmu.pmu.task_ctx_nr = perf_sw_context;
|
||||
pt_pmu.pmu.event_init = pt_event_init;
|
||||
pt_pmu.pmu.add = pt_event_add;
|
||||
pt_pmu.pmu.del = pt_event_del;
|
||||
pt_pmu.pmu.start = pt_event_start;
|
||||
pt_pmu.pmu.stop = pt_event_stop;
|
||||
pt_pmu.pmu.read = pt_event_read;
|
||||
pt_pmu.pmu.setup_aux = pt_buffer_setup_aux;
|
||||
pt_pmu.pmu.free_aux = pt_buffer_free_aux;
|
||||
pt_pmu.pmu.addr_filters_sync = pt_event_addr_filters_sync;
|
||||
pt_pmu.pmu.addr_filters_validate = pt_event_addr_filters_validate;
|
||||
pt_pmu.pmu.nr_addr_filters =
|
||||
pt_cap_get(PT_CAP_num_address_ranges);
|
||||
|
||||
ret = perf_pmu_register(&pt_pmu.pmu, "intel_pt", -1);
|
||||
|
||||
return ret;
|
||||
|
@ -19,6 +19,40 @@
|
||||
#ifndef __INTEL_PT_H__
|
||||
#define __INTEL_PT_H__
|
||||
|
||||
/*
|
||||
* PT MSR bit definitions
|
||||
*/
|
||||
#define RTIT_CTL_TRACEEN BIT(0)
|
||||
#define RTIT_CTL_CYCLEACC BIT(1)
|
||||
#define RTIT_CTL_OS BIT(2)
|
||||
#define RTIT_CTL_USR BIT(3)
|
||||
#define RTIT_CTL_CR3EN BIT(7)
|
||||
#define RTIT_CTL_TOPA BIT(8)
|
||||
#define RTIT_CTL_MTC_EN BIT(9)
|
||||
#define RTIT_CTL_TSC_EN BIT(10)
|
||||
#define RTIT_CTL_DISRETC BIT(11)
|
||||
#define RTIT_CTL_BRANCH_EN BIT(13)
|
||||
#define RTIT_CTL_MTC_RANGE_OFFSET 14
|
||||
#define RTIT_CTL_MTC_RANGE (0x0full << RTIT_CTL_MTC_RANGE_OFFSET)
|
||||
#define RTIT_CTL_CYC_THRESH_OFFSET 19
|
||||
#define RTIT_CTL_CYC_THRESH (0x0full << RTIT_CTL_CYC_THRESH_OFFSET)
|
||||
#define RTIT_CTL_PSB_FREQ_OFFSET 24
|
||||
#define RTIT_CTL_PSB_FREQ (0x0full << RTIT_CTL_PSB_FREQ_OFFSET)
|
||||
#define RTIT_CTL_ADDR0_OFFSET 32
|
||||
#define RTIT_CTL_ADDR0 (0x0full << RTIT_CTL_ADDR0_OFFSET)
|
||||
#define RTIT_CTL_ADDR1_OFFSET 36
|
||||
#define RTIT_CTL_ADDR1 (0x0full << RTIT_CTL_ADDR1_OFFSET)
|
||||
#define RTIT_CTL_ADDR2_OFFSET 40
|
||||
#define RTIT_CTL_ADDR2 (0x0full << RTIT_CTL_ADDR2_OFFSET)
|
||||
#define RTIT_CTL_ADDR3_OFFSET 44
|
||||
#define RTIT_CTL_ADDR3 (0x0full << RTIT_CTL_ADDR3_OFFSET)
|
||||
#define RTIT_STATUS_FILTEREN BIT(0)
|
||||
#define RTIT_STATUS_CONTEXTEN BIT(1)
|
||||
#define RTIT_STATUS_TRIGGEREN BIT(2)
|
||||
#define RTIT_STATUS_BUFFOVF BIT(3)
|
||||
#define RTIT_STATUS_ERROR BIT(4)
|
||||
#define RTIT_STATUS_STOPPED BIT(5)
|
||||
|
||||
/*
|
||||
* Single-entry ToPA: when this close to region boundary, switch
|
||||
* buffers to avoid losing data.
|
||||
@ -48,15 +82,20 @@ struct topa_entry {
|
||||
#define PT_CPUID_LEAVES 2
|
||||
#define PT_CPUID_REGS_NUM 4 /* number of regsters (eax, ebx, ecx, edx) */
|
||||
|
||||
/* TSC to Core Crystal Clock Ratio */
|
||||
#define CPUID_TSC_LEAF 0x15
|
||||
|
||||
enum pt_capabilities {
|
||||
PT_CAP_max_subleaf = 0,
|
||||
PT_CAP_cr3_filtering,
|
||||
PT_CAP_psb_cyc,
|
||||
PT_CAP_ip_filtering,
|
||||
PT_CAP_mtc,
|
||||
PT_CAP_topa_output,
|
||||
PT_CAP_topa_multiple_entries,
|
||||
PT_CAP_single_range_output,
|
||||
PT_CAP_payloads_lip,
|
||||
PT_CAP_num_address_ranges,
|
||||
PT_CAP_mtc_periods,
|
||||
PT_CAP_cycle_thresholds,
|
||||
PT_CAP_psb_periods,
|
||||
@ -66,6 +105,9 @@ struct pt_pmu {
|
||||
struct pmu pmu;
|
||||
u32 caps[PT_CPUID_REGS_NUM * PT_CPUID_LEAVES];
|
||||
bool vmx;
|
||||
unsigned long max_nonturbo_ratio;
|
||||
unsigned int tsc_art_num;
|
||||
unsigned int tsc_art_den;
|
||||
};
|
||||
|
||||
/**
|
||||
@ -104,14 +146,40 @@ struct pt_buffer {
|
||||
struct topa_entry *topa_index[0];
|
||||
};
|
||||
|
||||
#define PT_FILTERS_NUM 4
|
||||
|
||||
/**
|
||||
* struct pt_filter - IP range filter configuration
|
||||
* @msr_a: range start, goes to RTIT_ADDRn_A
|
||||
* @msr_b: range end, goes to RTIT_ADDRn_B
|
||||
* @config: 4-bit field in RTIT_CTL
|
||||
*/
|
||||
struct pt_filter {
|
||||
unsigned long msr_a;
|
||||
unsigned long msr_b;
|
||||
unsigned long config;
|
||||
};
|
||||
|
||||
/**
|
||||
* struct pt_filters - IP range filtering context
|
||||
* @filter: filters defined for this context
|
||||
* @nr_filters: number of defined filters in the @filter array
|
||||
*/
|
||||
struct pt_filters {
|
||||
struct pt_filter filter[PT_FILTERS_NUM];
|
||||
unsigned int nr_filters;
|
||||
};
|
||||
|
||||
/**
|
||||
* struct pt - per-cpu pt context
|
||||
* @handle: perf output handle
|
||||
* @filters: last configured filters
|
||||
* @handle_nmi: do handle PT PMI on this cpu, there's an active event
|
||||
* @vmx_on: 1 if VMX is ON on this cpu
|
||||
*/
|
||||
struct pt {
|
||||
struct perf_output_handle handle;
|
||||
struct pt_filters filters;
|
||||
int handle_nmi;
|
||||
int vmx_on;
|
||||
};
|
||||
|
@ -27,10 +27,14 @@
|
||||
* event: rapl_energy_dram
|
||||
* perf code: 0x3
|
||||
*
|
||||
* dram counter: consumption of the builtin-gpu domain (client only)
|
||||
* gpu counter: consumption of the builtin-gpu domain (client only)
|
||||
* event: rapl_energy_gpu
|
||||
* perf code: 0x4
|
||||
*
|
||||
* psys counter: consumption of the builtin-psys domain (client only)
|
||||
* event: rapl_energy_psys
|
||||
* perf code: 0x5
|
||||
*
|
||||
* We manage those counters as free running (read-only). They may be
|
||||
* use simultaneously by other tools, such as turbostat.
|
||||
*
|
||||
@ -53,6 +57,8 @@
|
||||
#include <asm/cpu_device_id.h>
|
||||
#include "../perf_event.h"
|
||||
|
||||
MODULE_LICENSE("GPL");
|
||||
|
||||
/*
|
||||
* RAPL energy status counters
|
||||
*/
|
||||
@ -64,13 +70,16 @@
|
||||
#define INTEL_RAPL_RAM 0x3 /* pseudo-encoding */
|
||||
#define RAPL_IDX_PP1_NRG_STAT 3 /* gpu */
|
||||
#define INTEL_RAPL_PP1 0x4 /* pseudo-encoding */
|
||||
#define RAPL_IDX_PSYS_NRG_STAT 4 /* psys */
|
||||
#define INTEL_RAPL_PSYS 0x5 /* pseudo-encoding */
|
||||
|
||||
#define NR_RAPL_DOMAINS 0x4
|
||||
#define NR_RAPL_DOMAINS 0x5
|
||||
static const char *const rapl_domain_names[NR_RAPL_DOMAINS] __initconst = {
|
||||
"pp0-core",
|
||||
"package",
|
||||
"dram",
|
||||
"pp1-gpu",
|
||||
"psys",
|
||||
};
|
||||
|
||||
/* Clients have PP0, PKG */
|
||||
@ -89,6 +98,13 @@ static const char *const rapl_domain_names[NR_RAPL_DOMAINS] __initconst = {
|
||||
1<<RAPL_IDX_RAM_NRG_STAT|\
|
||||
1<<RAPL_IDX_PP1_NRG_STAT)
|
||||
|
||||
/* SKL clients have PP0, PKG, RAM, PP1, PSYS */
|
||||
#define RAPL_IDX_SKL_CLN (1<<RAPL_IDX_PP0_NRG_STAT|\
|
||||
1<<RAPL_IDX_PKG_NRG_STAT|\
|
||||
1<<RAPL_IDX_RAM_NRG_STAT|\
|
||||
1<<RAPL_IDX_PP1_NRG_STAT|\
|
||||
1<<RAPL_IDX_PSYS_NRG_STAT)
|
||||
|
||||
/* Knights Landing has PKG, RAM */
|
||||
#define RAPL_IDX_KNL (1<<RAPL_IDX_PKG_NRG_STAT|\
|
||||
1<<RAPL_IDX_RAM_NRG_STAT)
|
||||
@ -360,6 +376,10 @@ static int rapl_pmu_event_init(struct perf_event *event)
|
||||
bit = RAPL_IDX_PP1_NRG_STAT;
|
||||
msr = MSR_PP1_ENERGY_STATUS;
|
||||
break;
|
||||
case INTEL_RAPL_PSYS:
|
||||
bit = RAPL_IDX_PSYS_NRG_STAT;
|
||||
msr = MSR_PLATFORM_ENERGY_STATUS;
|
||||
break;
|
||||
default:
|
||||
return -EINVAL;
|
||||
}
|
||||
@ -414,11 +434,13 @@ RAPL_EVENT_ATTR_STR(energy-cores, rapl_cores, "event=0x01");
|
||||
RAPL_EVENT_ATTR_STR(energy-pkg , rapl_pkg, "event=0x02");
|
||||
RAPL_EVENT_ATTR_STR(energy-ram , rapl_ram, "event=0x03");
|
||||
RAPL_EVENT_ATTR_STR(energy-gpu , rapl_gpu, "event=0x04");
|
||||
RAPL_EVENT_ATTR_STR(energy-psys, rapl_psys, "event=0x05");
|
||||
|
||||
RAPL_EVENT_ATTR_STR(energy-cores.unit, rapl_cores_unit, "Joules");
|
||||
RAPL_EVENT_ATTR_STR(energy-pkg.unit , rapl_pkg_unit, "Joules");
|
||||
RAPL_EVENT_ATTR_STR(energy-ram.unit , rapl_ram_unit, "Joules");
|
||||
RAPL_EVENT_ATTR_STR(energy-gpu.unit , rapl_gpu_unit, "Joules");
|
||||
RAPL_EVENT_ATTR_STR(energy-psys.unit, rapl_psys_unit, "Joules");
|
||||
|
||||
/*
|
||||
* we compute in 0.23 nJ increments regardless of MSR
|
||||
@ -427,6 +449,7 @@ RAPL_EVENT_ATTR_STR(energy-cores.scale, rapl_cores_scale, "2.3283064365386962890
|
||||
RAPL_EVENT_ATTR_STR(energy-pkg.scale, rapl_pkg_scale, "2.3283064365386962890625e-10");
|
||||
RAPL_EVENT_ATTR_STR(energy-ram.scale, rapl_ram_scale, "2.3283064365386962890625e-10");
|
||||
RAPL_EVENT_ATTR_STR(energy-gpu.scale, rapl_gpu_scale, "2.3283064365386962890625e-10");
|
||||
RAPL_EVENT_ATTR_STR(energy-psys.scale, rapl_psys_scale, "2.3283064365386962890625e-10");
|
||||
|
||||
static struct attribute *rapl_events_srv_attr[] = {
|
||||
EVENT_PTR(rapl_cores),
|
||||
@ -476,6 +499,27 @@ static struct attribute *rapl_events_hsw_attr[] = {
|
||||
NULL,
|
||||
};
|
||||
|
||||
static struct attribute *rapl_events_skl_attr[] = {
|
||||
EVENT_PTR(rapl_cores),
|
||||
EVENT_PTR(rapl_pkg),
|
||||
EVENT_PTR(rapl_gpu),
|
||||
EVENT_PTR(rapl_ram),
|
||||
EVENT_PTR(rapl_psys),
|
||||
|
||||
EVENT_PTR(rapl_cores_unit),
|
||||
EVENT_PTR(rapl_pkg_unit),
|
||||
EVENT_PTR(rapl_gpu_unit),
|
||||
EVENT_PTR(rapl_ram_unit),
|
||||
EVENT_PTR(rapl_psys_unit),
|
||||
|
||||
EVENT_PTR(rapl_cores_scale),
|
||||
EVENT_PTR(rapl_pkg_scale),
|
||||
EVENT_PTR(rapl_gpu_scale),
|
||||
EVENT_PTR(rapl_ram_scale),
|
||||
EVENT_PTR(rapl_psys_scale),
|
||||
NULL,
|
||||
};
|
||||
|
||||
static struct attribute *rapl_events_knl_attr[] = {
|
||||
EVENT_PTR(rapl_pkg),
|
||||
EVENT_PTR(rapl_ram),
|
||||
@ -592,6 +636,11 @@ static int rapl_cpu_notifier(struct notifier_block *self,
|
||||
return NOTIFY_OK;
|
||||
}
|
||||
|
||||
static struct notifier_block rapl_cpu_nb = {
|
||||
.notifier_call = rapl_cpu_notifier,
|
||||
.priority = CPU_PRI_PERF + 1,
|
||||
};
|
||||
|
||||
static int rapl_check_hw_unit(bool apply_quirk)
|
||||
{
|
||||
u64 msr_rapl_power_unit_bits;
|
||||
@ -660,7 +709,7 @@ static int __init rapl_prepare_cpus(void)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void __init cleanup_rapl_pmus(void)
|
||||
static void cleanup_rapl_pmus(void)
|
||||
{
|
||||
int i;
|
||||
|
||||
@ -691,52 +740,92 @@ static int __init init_rapl_pmus(void)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static const struct x86_cpu_id rapl_cpu_match[] __initconst = {
|
||||
[0] = { .vendor = X86_VENDOR_INTEL, .family = 6 },
|
||||
[1] = {},
|
||||
#define X86_RAPL_MODEL_MATCH(model, init) \
|
||||
{ X86_VENDOR_INTEL, 6, model, X86_FEATURE_ANY, (unsigned long)&init }
|
||||
|
||||
struct intel_rapl_init_fun {
|
||||
bool apply_quirk;
|
||||
int cntr_mask;
|
||||
struct attribute **attrs;
|
||||
};
|
||||
|
||||
static const struct intel_rapl_init_fun snb_rapl_init __initconst = {
|
||||
.apply_quirk = false,
|
||||
.cntr_mask = RAPL_IDX_CLN,
|
||||
.attrs = rapl_events_cln_attr,
|
||||
};
|
||||
|
||||
static const struct intel_rapl_init_fun hsx_rapl_init __initconst = {
|
||||
.apply_quirk = true,
|
||||
.cntr_mask = RAPL_IDX_SRV,
|
||||
.attrs = rapl_events_srv_attr,
|
||||
};
|
||||
|
||||
static const struct intel_rapl_init_fun hsw_rapl_init __initconst = {
|
||||
.apply_quirk = false,
|
||||
.cntr_mask = RAPL_IDX_HSW,
|
||||
.attrs = rapl_events_hsw_attr,
|
||||
};
|
||||
|
||||
static const struct intel_rapl_init_fun snbep_rapl_init __initconst = {
|
||||
.apply_quirk = false,
|
||||
.cntr_mask = RAPL_IDX_SRV,
|
||||
.attrs = rapl_events_srv_attr,
|
||||
};
|
||||
|
||||
static const struct intel_rapl_init_fun knl_rapl_init __initconst = {
|
||||
.apply_quirk = true,
|
||||
.cntr_mask = RAPL_IDX_KNL,
|
||||
.attrs = rapl_events_knl_attr,
|
||||
};
|
||||
|
||||
static const struct intel_rapl_init_fun skl_rapl_init __initconst = {
|
||||
.apply_quirk = false,
|
||||
.cntr_mask = RAPL_IDX_SKL_CLN,
|
||||
.attrs = rapl_events_skl_attr,
|
||||
};
|
||||
|
||||
static const struct x86_cpu_id rapl_cpu_match[] __initconst = {
|
||||
X86_RAPL_MODEL_MATCH(42, snb_rapl_init), /* Sandy Bridge */
|
||||
X86_RAPL_MODEL_MATCH(45, snbep_rapl_init), /* Sandy Bridge-EP */
|
||||
|
||||
X86_RAPL_MODEL_MATCH(58, snb_rapl_init), /* Ivy Bridge */
|
||||
X86_RAPL_MODEL_MATCH(62, snbep_rapl_init), /* IvyTown */
|
||||
|
||||
X86_RAPL_MODEL_MATCH(60, hsw_rapl_init), /* Haswell */
|
||||
X86_RAPL_MODEL_MATCH(63, hsx_rapl_init), /* Haswell-Server */
|
||||
X86_RAPL_MODEL_MATCH(69, hsw_rapl_init), /* Haswell-Celeron */
|
||||
X86_RAPL_MODEL_MATCH(70, hsw_rapl_init), /* Haswell GT3e */
|
||||
|
||||
X86_RAPL_MODEL_MATCH(61, hsw_rapl_init), /* Broadwell */
|
||||
X86_RAPL_MODEL_MATCH(71, hsw_rapl_init), /* Broadwell-H */
|
||||
X86_RAPL_MODEL_MATCH(79, hsx_rapl_init), /* Broadwell-Server */
|
||||
X86_RAPL_MODEL_MATCH(86, hsx_rapl_init), /* Broadwell Xeon D */
|
||||
|
||||
X86_RAPL_MODEL_MATCH(87, knl_rapl_init), /* Knights Landing */
|
||||
|
||||
X86_RAPL_MODEL_MATCH(78, skl_rapl_init), /* Skylake */
|
||||
X86_RAPL_MODEL_MATCH(94, skl_rapl_init), /* Skylake H/S */
|
||||
{},
|
||||
};
|
||||
|
||||
MODULE_DEVICE_TABLE(x86cpu, rapl_cpu_match);
|
||||
|
||||
static int __init rapl_pmu_init(void)
|
||||
{
|
||||
bool apply_quirk = false;
|
||||
const struct x86_cpu_id *id;
|
||||
struct intel_rapl_init_fun *rapl_init;
|
||||
bool apply_quirk;
|
||||
int ret;
|
||||
|
||||
if (!x86_match_cpu(rapl_cpu_match))
|
||||
id = x86_match_cpu(rapl_cpu_match);
|
||||
if (!id)
|
||||
return -ENODEV;
|
||||
|
||||
switch (boot_cpu_data.x86_model) {
|
||||
case 42: /* Sandy Bridge */
|
||||
case 58: /* Ivy Bridge */
|
||||
rapl_cntr_mask = RAPL_IDX_CLN;
|
||||
rapl_pmu_events_group.attrs = rapl_events_cln_attr;
|
||||
break;
|
||||
case 63: /* Haswell-Server */
|
||||
case 79: /* Broadwell-Server */
|
||||
apply_quirk = true;
|
||||
rapl_cntr_mask = RAPL_IDX_SRV;
|
||||
rapl_pmu_events_group.attrs = rapl_events_srv_attr;
|
||||
break;
|
||||
case 60: /* Haswell */
|
||||
case 69: /* Haswell-Celeron */
|
||||
case 70: /* Haswell GT3e */
|
||||
case 61: /* Broadwell */
|
||||
case 71: /* Broadwell-H */
|
||||
rapl_cntr_mask = RAPL_IDX_HSW;
|
||||
rapl_pmu_events_group.attrs = rapl_events_hsw_attr;
|
||||
break;
|
||||
case 45: /* Sandy Bridge-EP */
|
||||
case 62: /* IvyTown */
|
||||
rapl_cntr_mask = RAPL_IDX_SRV;
|
||||
rapl_pmu_events_group.attrs = rapl_events_srv_attr;
|
||||
break;
|
||||
case 87: /* Knights Landing */
|
||||
apply_quirk = true;
|
||||
rapl_cntr_mask = RAPL_IDX_KNL;
|
||||
rapl_pmu_events_group.attrs = rapl_events_knl_attr;
|
||||
break;
|
||||
default:
|
||||
return -ENODEV;
|
||||
}
|
||||
rapl_init = (struct intel_rapl_init_fun *)id->driver_data;
|
||||
apply_quirk = rapl_init->apply_quirk;
|
||||
rapl_cntr_mask = rapl_init->cntr_mask;
|
||||
rapl_pmu_events_group.attrs = rapl_init->attrs;
|
||||
|
||||
ret = rapl_check_hw_unit(apply_quirk);
|
||||
if (ret)
|
||||
@ -756,7 +845,7 @@ static int __init rapl_pmu_init(void)
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
__perf_cpu_notifier(rapl_cpu_notifier);
|
||||
__register_cpu_notifier(&rapl_cpu_nb);
|
||||
cpu_notifier_register_done();
|
||||
rapl_advertise();
|
||||
return 0;
|
||||
@ -767,4 +856,14 @@ static int __init rapl_pmu_init(void)
|
||||
cpu_notifier_register_done();
|
||||
return ret;
|
||||
}
|
||||
device_initcall(rapl_pmu_init);
|
||||
module_init(rapl_pmu_init);
|
||||
|
||||
static void __exit intel_rapl_exit(void)
|
||||
{
|
||||
cpu_notifier_register_begin();
|
||||
__unregister_cpu_notifier(&rapl_cpu_nb);
|
||||
perf_pmu_unregister(&rapl_pmus->pmu);
|
||||
cleanup_rapl_pmus();
|
||||
cpu_notifier_register_done();
|
||||
}
|
||||
module_exit(intel_rapl_exit);
|
||||
|
@ -1,3 +1,4 @@
|
||||
#include <asm/cpu_device_id.h>
|
||||
#include "uncore.h"
|
||||
|
||||
static struct intel_uncore_type *empty_uncore[] = { NULL, };
|
||||
@ -21,6 +22,8 @@ static struct event_constraint uncore_constraint_fixed =
|
||||
struct event_constraint uncore_constraint_empty =
|
||||
EVENT_CONSTRAINT(0, 0, 0);
|
||||
|
||||
MODULE_LICENSE("GPL");
|
||||
|
||||
static int uncore_pcibus_to_physid(struct pci_bus *bus)
|
||||
{
|
||||
struct pci2phy_map *map;
|
||||
@ -754,7 +757,7 @@ static void uncore_pmu_unregister(struct intel_uncore_pmu *pmu)
|
||||
pmu->registered = false;
|
||||
}
|
||||
|
||||
static void __init __uncore_exit_boxes(struct intel_uncore_type *type, int cpu)
|
||||
static void __uncore_exit_boxes(struct intel_uncore_type *type, int cpu)
|
||||
{
|
||||
struct intel_uncore_pmu *pmu = type->pmus;
|
||||
struct intel_uncore_box *box;
|
||||
@ -770,7 +773,7 @@ static void __init __uncore_exit_boxes(struct intel_uncore_type *type, int cpu)
|
||||
}
|
||||
}
|
||||
|
||||
static void __init uncore_exit_boxes(void *dummy)
|
||||
static void uncore_exit_boxes(void *dummy)
|
||||
{
|
||||
struct intel_uncore_type **types;
|
||||
|
||||
@ -787,7 +790,7 @@ static void uncore_free_boxes(struct intel_uncore_pmu *pmu)
|
||||
kfree(pmu->boxes);
|
||||
}
|
||||
|
||||
static void __init uncore_type_exit(struct intel_uncore_type *type)
|
||||
static void uncore_type_exit(struct intel_uncore_type *type)
|
||||
{
|
||||
struct intel_uncore_pmu *pmu = type->pmus;
|
||||
int i;
|
||||
@ -804,7 +807,7 @@ static void __init uncore_type_exit(struct intel_uncore_type *type)
|
||||
type->events_group = NULL;
|
||||
}
|
||||
|
||||
static void __init uncore_types_exit(struct intel_uncore_type **types)
|
||||
static void uncore_types_exit(struct intel_uncore_type **types)
|
||||
{
|
||||
for (; *types; types++)
|
||||
uncore_type_exit(*types);
|
||||
@ -989,46 +992,6 @@ static int __init uncore_pci_init(void)
|
||||
size_t size;
|
||||
int ret;
|
||||
|
||||
switch (boot_cpu_data.x86_model) {
|
||||
case 45: /* Sandy Bridge-EP */
|
||||
ret = snbep_uncore_pci_init();
|
||||
break;
|
||||
case 62: /* Ivy Bridge-EP */
|
||||
ret = ivbep_uncore_pci_init();
|
||||
break;
|
||||
case 63: /* Haswell-EP */
|
||||
ret = hswep_uncore_pci_init();
|
||||
break;
|
||||
case 79: /* BDX-EP */
|
||||
case 86: /* BDX-DE */
|
||||
ret = bdx_uncore_pci_init();
|
||||
break;
|
||||
case 42: /* Sandy Bridge */
|
||||
ret = snb_uncore_pci_init();
|
||||
break;
|
||||
case 58: /* Ivy Bridge */
|
||||
ret = ivb_uncore_pci_init();
|
||||
break;
|
||||
case 60: /* Haswell */
|
||||
case 69: /* Haswell Celeron */
|
||||
ret = hsw_uncore_pci_init();
|
||||
break;
|
||||
case 61: /* Broadwell */
|
||||
ret = bdw_uncore_pci_init();
|
||||
break;
|
||||
case 87: /* Knights Landing */
|
||||
ret = knl_uncore_pci_init();
|
||||
break;
|
||||
case 94: /* SkyLake */
|
||||
ret = skl_uncore_pci_init();
|
||||
break;
|
||||
default:
|
||||
return -ENODEV;
|
||||
}
|
||||
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
size = max_packages * sizeof(struct pci_extra_dev);
|
||||
uncore_extra_pci_dev = kzalloc(size, GFP_KERNEL);
|
||||
if (!uncore_extra_pci_dev) {
|
||||
@ -1060,7 +1023,7 @@ static int __init uncore_pci_init(void)
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void __init uncore_pci_exit(void)
|
||||
static void uncore_pci_exit(void)
|
||||
{
|
||||
if (pcidrv_registered) {
|
||||
pcidrv_registered = false;
|
||||
@ -1287,46 +1250,6 @@ static int __init uncore_cpu_init(void)
|
||||
{
|
||||
int ret;
|
||||
|
||||
switch (boot_cpu_data.x86_model) {
|
||||
case 26: /* Nehalem */
|
||||
case 30:
|
||||
case 37: /* Westmere */
|
||||
case 44:
|
||||
nhm_uncore_cpu_init();
|
||||
break;
|
||||
case 42: /* Sandy Bridge */
|
||||
case 58: /* Ivy Bridge */
|
||||
case 60: /* Haswell */
|
||||
case 69: /* Haswell */
|
||||
case 70: /* Haswell */
|
||||
case 61: /* Broadwell */
|
||||
case 71: /* Broadwell */
|
||||
snb_uncore_cpu_init();
|
||||
break;
|
||||
case 45: /* Sandy Bridge-EP */
|
||||
snbep_uncore_cpu_init();
|
||||
break;
|
||||
case 46: /* Nehalem-EX */
|
||||
case 47: /* Westmere-EX aka. Xeon E7 */
|
||||
nhmex_uncore_cpu_init();
|
||||
break;
|
||||
case 62: /* Ivy Bridge-EP */
|
||||
ivbep_uncore_cpu_init();
|
||||
break;
|
||||
case 63: /* Haswell-EP */
|
||||
hswep_uncore_cpu_init();
|
||||
break;
|
||||
case 79: /* BDX-EP */
|
||||
case 86: /* BDX-DE */
|
||||
bdx_uncore_cpu_init();
|
||||
break;
|
||||
case 87: /* Knights Landing */
|
||||
knl_uncore_cpu_init();
|
||||
break;
|
||||
default:
|
||||
return -ENODEV;
|
||||
}
|
||||
|
||||
ret = uncore_types_init(uncore_msr_uncores, true);
|
||||
if (ret)
|
||||
goto err;
|
||||
@ -1376,11 +1299,105 @@ static int __init uncore_cpumask_init(bool msr)
|
||||
return 0;
|
||||
}
|
||||
|
||||
#define X86_UNCORE_MODEL_MATCH(model, init) \
|
||||
{ X86_VENDOR_INTEL, 6, model, X86_FEATURE_ANY, (unsigned long)&init }
|
||||
|
||||
struct intel_uncore_init_fun {
|
||||
void (*cpu_init)(void);
|
||||
int (*pci_init)(void);
|
||||
};
|
||||
|
||||
static const struct intel_uncore_init_fun nhm_uncore_init __initconst = {
|
||||
.cpu_init = nhm_uncore_cpu_init,
|
||||
};
|
||||
|
||||
static const struct intel_uncore_init_fun snb_uncore_init __initconst = {
|
||||
.cpu_init = snb_uncore_cpu_init,
|
||||
.pci_init = snb_uncore_pci_init,
|
||||
};
|
||||
|
||||
static const struct intel_uncore_init_fun ivb_uncore_init __initconst = {
|
||||
.cpu_init = snb_uncore_cpu_init,
|
||||
.pci_init = ivb_uncore_pci_init,
|
||||
};
|
||||
|
||||
static const struct intel_uncore_init_fun hsw_uncore_init __initconst = {
|
||||
.cpu_init = snb_uncore_cpu_init,
|
||||
.pci_init = hsw_uncore_pci_init,
|
||||
};
|
||||
|
||||
static const struct intel_uncore_init_fun bdw_uncore_init __initconst = {
|
||||
.cpu_init = snb_uncore_cpu_init,
|
||||
.pci_init = bdw_uncore_pci_init,
|
||||
};
|
||||
|
||||
static const struct intel_uncore_init_fun snbep_uncore_init __initconst = {
|
||||
.cpu_init = snbep_uncore_cpu_init,
|
||||
.pci_init = snbep_uncore_pci_init,
|
||||
};
|
||||
|
||||
static const struct intel_uncore_init_fun nhmex_uncore_init __initconst = {
|
||||
.cpu_init = nhmex_uncore_cpu_init,
|
||||
};
|
||||
|
||||
static const struct intel_uncore_init_fun ivbep_uncore_init __initconst = {
|
||||
.cpu_init = ivbep_uncore_cpu_init,
|
||||
.pci_init = ivbep_uncore_pci_init,
|
||||
};
|
||||
|
||||
static const struct intel_uncore_init_fun hswep_uncore_init __initconst = {
|
||||
.cpu_init = hswep_uncore_cpu_init,
|
||||
.pci_init = hswep_uncore_pci_init,
|
||||
};
|
||||
|
||||
static const struct intel_uncore_init_fun bdx_uncore_init __initconst = {
|
||||
.cpu_init = bdx_uncore_cpu_init,
|
||||
.pci_init = bdx_uncore_pci_init,
|
||||
};
|
||||
|
||||
static const struct intel_uncore_init_fun knl_uncore_init __initconst = {
|
||||
.cpu_init = knl_uncore_cpu_init,
|
||||
.pci_init = knl_uncore_pci_init,
|
||||
};
|
||||
|
||||
static const struct intel_uncore_init_fun skl_uncore_init __initconst = {
|
||||
.pci_init = skl_uncore_pci_init,
|
||||
};
|
||||
|
||||
static const struct x86_cpu_id intel_uncore_match[] __initconst = {
|
||||
X86_UNCORE_MODEL_MATCH(26, nhm_uncore_init), /* Nehalem */
|
||||
X86_UNCORE_MODEL_MATCH(30, nhm_uncore_init),
|
||||
X86_UNCORE_MODEL_MATCH(37, nhm_uncore_init), /* Westmere */
|
||||
X86_UNCORE_MODEL_MATCH(44, nhm_uncore_init),
|
||||
X86_UNCORE_MODEL_MATCH(42, snb_uncore_init), /* Sandy Bridge */
|
||||
X86_UNCORE_MODEL_MATCH(58, ivb_uncore_init), /* Ivy Bridge */
|
||||
X86_UNCORE_MODEL_MATCH(60, hsw_uncore_init), /* Haswell */
|
||||
X86_UNCORE_MODEL_MATCH(69, hsw_uncore_init), /* Haswell Celeron */
|
||||
X86_UNCORE_MODEL_MATCH(70, hsw_uncore_init), /* Haswell */
|
||||
X86_UNCORE_MODEL_MATCH(61, bdw_uncore_init), /* Broadwell */
|
||||
X86_UNCORE_MODEL_MATCH(71, bdw_uncore_init), /* Broadwell */
|
||||
X86_UNCORE_MODEL_MATCH(45, snbep_uncore_init), /* Sandy Bridge-EP */
|
||||
X86_UNCORE_MODEL_MATCH(46, nhmex_uncore_init), /* Nehalem-EX */
|
||||
X86_UNCORE_MODEL_MATCH(47, nhmex_uncore_init), /* Westmere-EX aka. Xeon E7 */
|
||||
X86_UNCORE_MODEL_MATCH(62, ivbep_uncore_init), /* Ivy Bridge-EP */
|
||||
X86_UNCORE_MODEL_MATCH(63, hswep_uncore_init), /* Haswell-EP */
|
||||
X86_UNCORE_MODEL_MATCH(79, bdx_uncore_init), /* BDX-EP */
|
||||
X86_UNCORE_MODEL_MATCH(86, bdx_uncore_init), /* BDX-DE */
|
||||
X86_UNCORE_MODEL_MATCH(87, knl_uncore_init), /* Knights Landing */
|
||||
X86_UNCORE_MODEL_MATCH(94, skl_uncore_init), /* SkyLake */
|
||||
{},
|
||||
};
|
||||
|
||||
MODULE_DEVICE_TABLE(x86cpu, intel_uncore_match);
|
||||
|
||||
static int __init intel_uncore_init(void)
|
||||
{
|
||||
int pret, cret, ret;
|
||||
const struct x86_cpu_id *id;
|
||||
struct intel_uncore_init_fun *uncore_init;
|
||||
int pret = 0, cret = 0, ret;
|
||||
|
||||
if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
|
||||
id = x86_match_cpu(intel_uncore_match);
|
||||
if (!id)
|
||||
return -ENODEV;
|
||||
|
||||
if (cpu_has_hypervisor)
|
||||
@ -1388,8 +1405,17 @@ static int __init intel_uncore_init(void)
|
||||
|
||||
max_packages = topology_max_packages();
|
||||
|
||||
pret = uncore_pci_init();
|
||||
cret = uncore_cpu_init();
|
||||
uncore_init = (struct intel_uncore_init_fun *)id->driver_data;
|
||||
if (uncore_init->pci_init) {
|
||||
pret = uncore_init->pci_init();
|
||||
if (!pret)
|
||||
pret = uncore_pci_init();
|
||||
}
|
||||
|
||||
if (uncore_init->cpu_init) {
|
||||
uncore_init->cpu_init();
|
||||
cret = uncore_cpu_init();
|
||||
}
|
||||
|
||||
if (cret && pret)
|
||||
return -ENODEV;
|
||||
@ -1409,4 +1435,14 @@ static int __init intel_uncore_init(void)
|
||||
cpu_notifier_register_done();
|
||||
return ret;
|
||||
}
|
||||
device_initcall(intel_uncore_init);
|
||||
module_init(intel_uncore_init);
|
||||
|
||||
static void __exit intel_uncore_exit(void)
|
||||
{
|
||||
cpu_notifier_register_begin();
|
||||
__unregister_cpu_notifier(&uncore_cpu_nb);
|
||||
uncore_types_exit(uncore_msr_uncores);
|
||||
uncore_pci_exit();
|
||||
cpu_notifier_register_done();
|
||||
}
|
||||
module_exit(intel_uncore_exit);
|
||||
|
@ -6,6 +6,8 @@ enum perf_msr_id {
|
||||
PERF_MSR_MPERF = 2,
|
||||
PERF_MSR_PPERF = 3,
|
||||
PERF_MSR_SMI = 4,
|
||||
PERF_MSR_PTSC = 5,
|
||||
PERF_MSR_IRPERF = 6,
|
||||
|
||||
PERF_MSR_EVENT_MAX,
|
||||
};
|
||||
@ -15,6 +17,16 @@ static bool test_aperfmperf(int idx)
|
||||
return boot_cpu_has(X86_FEATURE_APERFMPERF);
|
||||
}
|
||||
|
||||
static bool test_ptsc(int idx)
|
||||
{
|
||||
return boot_cpu_has(X86_FEATURE_PTSC);
|
||||
}
|
||||
|
||||
static bool test_irperf(int idx)
|
||||
{
|
||||
return boot_cpu_has(X86_FEATURE_IRPERF);
|
||||
}
|
||||
|
||||
static bool test_intel(int idx)
|
||||
{
|
||||
if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL ||
|
||||
@ -69,18 +81,22 @@ struct perf_msr {
|
||||
bool (*test)(int idx);
|
||||
};
|
||||
|
||||
PMU_EVENT_ATTR_STRING(tsc, evattr_tsc, "event=0x00");
|
||||
PMU_EVENT_ATTR_STRING(aperf, evattr_aperf, "event=0x01");
|
||||
PMU_EVENT_ATTR_STRING(mperf, evattr_mperf, "event=0x02");
|
||||
PMU_EVENT_ATTR_STRING(pperf, evattr_pperf, "event=0x03");
|
||||
PMU_EVENT_ATTR_STRING(smi, evattr_smi, "event=0x04");
|
||||
PMU_EVENT_ATTR_STRING(tsc, evattr_tsc, "event=0x00");
|
||||
PMU_EVENT_ATTR_STRING(aperf, evattr_aperf, "event=0x01");
|
||||
PMU_EVENT_ATTR_STRING(mperf, evattr_mperf, "event=0x02");
|
||||
PMU_EVENT_ATTR_STRING(pperf, evattr_pperf, "event=0x03");
|
||||
PMU_EVENT_ATTR_STRING(smi, evattr_smi, "event=0x04");
|
||||
PMU_EVENT_ATTR_STRING(ptsc, evattr_ptsc, "event=0x05");
|
||||
PMU_EVENT_ATTR_STRING(irperf, evattr_irperf, "event=0x06");
|
||||
|
||||
static struct perf_msr msr[] = {
|
||||
[PERF_MSR_TSC] = { 0, &evattr_tsc, NULL, },
|
||||
[PERF_MSR_APERF] = { MSR_IA32_APERF, &evattr_aperf, test_aperfmperf, },
|
||||
[PERF_MSR_MPERF] = { MSR_IA32_MPERF, &evattr_mperf, test_aperfmperf, },
|
||||
[PERF_MSR_PPERF] = { MSR_PPERF, &evattr_pperf, test_intel, },
|
||||
[PERF_MSR_SMI] = { MSR_SMI_COUNT, &evattr_smi, test_intel, },
|
||||
[PERF_MSR_TSC] = { 0, &evattr_tsc, NULL, },
|
||||
[PERF_MSR_APERF] = { MSR_IA32_APERF, &evattr_aperf, test_aperfmperf, },
|
||||
[PERF_MSR_MPERF] = { MSR_IA32_MPERF, &evattr_mperf, test_aperfmperf, },
|
||||
[PERF_MSR_PPERF] = { MSR_PPERF, &evattr_pperf, test_intel, },
|
||||
[PERF_MSR_SMI] = { MSR_SMI_COUNT, &evattr_smi, test_intel, },
|
||||
[PERF_MSR_PTSC] = { MSR_F15H_PTSC, &evattr_ptsc, test_ptsc, },
|
||||
[PERF_MSR_IRPERF] = { MSR_F17H_IRPERF, &evattr_irperf, test_irperf, },
|
||||
};
|
||||
|
||||
static struct attribute *events_attrs[PERF_MSR_EVENT_MAX + 1] = {
|
||||
|
@ -601,6 +601,7 @@ struct x86_pmu {
|
||||
u64 lbr_sel_mask; /* LBR_SELECT valid bits */
|
||||
const int *lbr_sel_map; /* lbr_select mappings */
|
||||
bool lbr_double_abort; /* duplicated lbr aborts */
|
||||
bool lbr_pt_coexist; /* LBR may coexist with PT */
|
||||
|
||||
/*
|
||||
* Intel PT/LBR/BTS are exclusive
|
||||
@ -859,6 +860,8 @@ extern struct event_constraint intel_atom_pebs_event_constraints[];
|
||||
|
||||
extern struct event_constraint intel_slm_pebs_event_constraints[];
|
||||
|
||||
extern struct event_constraint intel_glm_pebs_event_constraints[];
|
||||
|
||||
extern struct event_constraint intel_nehalem_pebs_event_constraints[];
|
||||
|
||||
extern struct event_constraint intel_westmere_pebs_event_constraints[];
|
||||
@ -907,6 +910,8 @@ void intel_pmu_lbr_init_nhm(void);
|
||||
|
||||
void intel_pmu_lbr_init_atom(void);
|
||||
|
||||
void intel_pmu_lbr_init_slm(void);
|
||||
|
||||
void intel_pmu_lbr_init_snb(void);
|
||||
|
||||
void intel_pmu_lbr_init_hsw(void);
|
||||
|
@ -177,6 +177,7 @@
|
||||
#define X86_FEATURE_PERFCTR_CORE ( 6*32+23) /* core performance counter extensions */
|
||||
#define X86_FEATURE_PERFCTR_NB ( 6*32+24) /* NB performance counter extensions */
|
||||
#define X86_FEATURE_BPEXT (6*32+26) /* data breakpoint extension */
|
||||
#define X86_FEATURE_PTSC ( 6*32+27) /* performance time-stamp counter */
|
||||
#define X86_FEATURE_PERFCTR_L2 ( 6*32+28) /* L2 performance counter extensions */
|
||||
#define X86_FEATURE_MWAITX ( 6*32+29) /* MWAIT extension (MONITORX/MWAITX) */
|
||||
|
||||
@ -250,6 +251,7 @@
|
||||
|
||||
/* AMD-defined CPU features, CPUID level 0x80000008 (ebx), word 13 */
|
||||
#define X86_FEATURE_CLZERO (13*32+0) /* CLZERO instruction */
|
||||
#define X86_FEATURE_IRPERF (13*32+1) /* Instructions Retired Count */
|
||||
|
||||
/* Thermal and Power Management Leaf, CPUID level 0x00000006 (eax), word 14 */
|
||||
#define X86_FEATURE_DTHERM (14*32+ 0) /* Digital Thermal Sensor */
|
||||
|
@ -89,27 +89,16 @@
|
||||
#define MSR_PEBS_LD_LAT_THRESHOLD 0x000003f6
|
||||
|
||||
#define MSR_IA32_RTIT_CTL 0x00000570
|
||||
#define RTIT_CTL_TRACEEN BIT(0)
|
||||
#define RTIT_CTL_CYCLEACC BIT(1)
|
||||
#define RTIT_CTL_OS BIT(2)
|
||||
#define RTIT_CTL_USR BIT(3)
|
||||
#define RTIT_CTL_CR3EN BIT(7)
|
||||
#define RTIT_CTL_TOPA BIT(8)
|
||||
#define RTIT_CTL_MTC_EN BIT(9)
|
||||
#define RTIT_CTL_TSC_EN BIT(10)
|
||||
#define RTIT_CTL_DISRETC BIT(11)
|
||||
#define RTIT_CTL_BRANCH_EN BIT(13)
|
||||
#define RTIT_CTL_MTC_RANGE_OFFSET 14
|
||||
#define RTIT_CTL_MTC_RANGE (0x0full << RTIT_CTL_MTC_RANGE_OFFSET)
|
||||
#define RTIT_CTL_CYC_THRESH_OFFSET 19
|
||||
#define RTIT_CTL_CYC_THRESH (0x0full << RTIT_CTL_CYC_THRESH_OFFSET)
|
||||
#define RTIT_CTL_PSB_FREQ_OFFSET 24
|
||||
#define RTIT_CTL_PSB_FREQ (0x0full << RTIT_CTL_PSB_FREQ_OFFSET)
|
||||
#define MSR_IA32_RTIT_STATUS 0x00000571
|
||||
#define RTIT_STATUS_CONTEXTEN BIT(1)
|
||||
#define RTIT_STATUS_TRIGGEREN BIT(2)
|
||||
#define RTIT_STATUS_ERROR BIT(4)
|
||||
#define RTIT_STATUS_STOPPED BIT(5)
|
||||
#define MSR_IA32_RTIT_STATUS 0x00000571
|
||||
#define MSR_IA32_RTIT_ADDR0_A 0x00000580
|
||||
#define MSR_IA32_RTIT_ADDR0_B 0x00000581
|
||||
#define MSR_IA32_RTIT_ADDR1_A 0x00000582
|
||||
#define MSR_IA32_RTIT_ADDR1_B 0x00000583
|
||||
#define MSR_IA32_RTIT_ADDR2_A 0x00000584
|
||||
#define MSR_IA32_RTIT_ADDR2_B 0x00000585
|
||||
#define MSR_IA32_RTIT_ADDR3_A 0x00000586
|
||||
#define MSR_IA32_RTIT_ADDR3_B 0x00000587
|
||||
#define MSR_IA32_RTIT_CR3_MATCH 0x00000572
|
||||
#define MSR_IA32_RTIT_OUTPUT_BASE 0x00000560
|
||||
#define MSR_IA32_RTIT_OUTPUT_MASK 0x00000561
|
||||
@ -205,6 +194,8 @@
|
||||
#define MSR_CONFIG_TDP_CONTROL 0x0000064B
|
||||
#define MSR_TURBO_ACTIVATION_RATIO 0x0000064C
|
||||
|
||||
#define MSR_PLATFORM_ENERGY_STATUS 0x0000064D
|
||||
|
||||
#define MSR_PKG_WEIGHTED_CORE_C0_RES 0x00000658
|
||||
#define MSR_PKG_ANY_CORE_C0_RES 0x00000659
|
||||
#define MSR_PKG_ANY_GFXE_C0_RES 0x0000065A
|
||||
@ -315,6 +306,9 @@
|
||||
#define MSR_AMD64_IBSOPDATA4 0xc001103d
|
||||
#define MSR_AMD64_IBS_REG_COUNT_MAX 8 /* includes MSR_AMD64_IBSBRTARGET */
|
||||
|
||||
/* Fam 17h MSRs */
|
||||
#define MSR_F17H_IRPERF 0xc00000e9
|
||||
|
||||
/* Fam 16h MSRs */
|
||||
#define MSR_F16H_L2I_PERF_CTL 0xc0010230
|
||||
#define MSR_F16H_L2I_PERF_CTR 0xc0010231
|
||||
@ -328,6 +322,7 @@
|
||||
#define MSR_F15H_PERF_CTR 0xc0010201
|
||||
#define MSR_F15H_NB_PERF_CTL 0xc0010240
|
||||
#define MSR_F15H_NB_PERF_CTR 0xc0010241
|
||||
#define MSR_F15H_PTSC 0xc0010280
|
||||
#define MSR_F15H_IC_CFG 0xc0011021
|
||||
|
||||
/* Fam 10h MSRs */
|
||||
|
@ -578,7 +578,7 @@ static void default_abort_op(struct arch_uprobe *auprobe, struct pt_regs *regs)
|
||||
riprel_post_xol(auprobe, regs);
|
||||
}
|
||||
|
||||
static struct uprobe_xol_ops default_xol_ops = {
|
||||
static const struct uprobe_xol_ops default_xol_ops = {
|
||||
.pre_xol = default_pre_xol_op,
|
||||
.post_xol = default_post_xol_op,
|
||||
.abort = default_abort_op,
|
||||
@ -695,7 +695,7 @@ static void branch_clear_offset(struct arch_uprobe *auprobe, struct insn *insn)
|
||||
0, insn->immediate.nbytes);
|
||||
}
|
||||
|
||||
static struct uprobe_xol_ops branch_xol_ops = {
|
||||
static const struct uprobe_xol_ops branch_xol_ops = {
|
||||
.emulate = branch_emulate_op,
|
||||
.post_xol = branch_post_xol_op,
|
||||
};
|
||||
|
@ -332,14 +332,14 @@ static int callchain_trace(struct stackframe *frame, void *data)
|
||||
void perf_callchain_kernel(struct perf_callchain_entry *entry,
|
||||
struct pt_regs *regs)
|
||||
{
|
||||
xtensa_backtrace_kernel(regs, PERF_MAX_STACK_DEPTH,
|
||||
xtensa_backtrace_kernel(regs, sysctl_perf_event_max_stack,
|
||||
callchain_trace, NULL, entry);
|
||||
}
|
||||
|
||||
void perf_callchain_user(struct perf_callchain_entry *entry,
|
||||
struct pt_regs *regs)
|
||||
{
|
||||
xtensa_backtrace_user(regs, PERF_MAX_STACK_DEPTH,
|
||||
xtensa_backtrace_user(regs, sysctl_perf_event_max_stack,
|
||||
callchain_trace, entry);
|
||||
}
|
||||
|
||||
|
@ -847,6 +847,14 @@ static int cpu_pmu_init(struct arm_pmu *cpu_pmu)
|
||||
if (!platform_get_irq(cpu_pmu->plat_device, 0))
|
||||
cpu_pmu->pmu.capabilities |= PERF_PMU_CAP_NO_INTERRUPT;
|
||||
|
||||
/*
|
||||
* This is a CPU PMU potentially in a heterogeneous configuration (e.g.
|
||||
* big.LITTLE). This is not an uncore PMU, and we have taken ctx
|
||||
* sharing into account (e.g. with our pmu::filter_match callback and
|
||||
* pmu::event_init group validation).
|
||||
*/
|
||||
cpu_pmu->pmu.capabilities |= PERF_PMU_CAP_HETEROGENEOUS_CPUS;
|
||||
|
||||
return 0;
|
||||
|
||||
out_unregister:
|
||||
|
@ -34,6 +34,9 @@
|
||||
#include <asm/processor.h>
|
||||
#include <asm/cpu_device_id.h>
|
||||
|
||||
/* Local defines */
|
||||
#define MSR_PLATFORM_POWER_LIMIT 0x0000065C
|
||||
|
||||
/* bitmasks for RAPL MSRs, used by primitive access functions */
|
||||
#define ENERGY_STATUS_MASK 0xffffffff
|
||||
|
||||
@ -86,6 +89,7 @@ enum rapl_domain_type {
|
||||
RAPL_DOMAIN_PP0, /* core power plane */
|
||||
RAPL_DOMAIN_PP1, /* graphics uncore */
|
||||
RAPL_DOMAIN_DRAM,/* DRAM control_type */
|
||||
RAPL_DOMAIN_PLATFORM, /* PSys control_type */
|
||||
RAPL_DOMAIN_MAX,
|
||||
};
|
||||
|
||||
@ -251,9 +255,11 @@ static const char * const rapl_domain_names[] = {
|
||||
"core",
|
||||
"uncore",
|
||||
"dram",
|
||||
"psys",
|
||||
};
|
||||
|
||||
static struct powercap_control_type *control_type; /* PowerCap Controller */
|
||||
static struct rapl_domain *platform_rapl_domain; /* Platform (PSys) domain */
|
||||
|
||||
/* caller to ensure CPU hotplug lock is held */
|
||||
static struct rapl_package *find_package_by_id(int id)
|
||||
@ -409,6 +415,14 @@ static const struct powercap_zone_ops zone_ops[] = {
|
||||
.set_enable = set_domain_enable,
|
||||
.get_enable = get_domain_enable,
|
||||
},
|
||||
/* RAPL_DOMAIN_PLATFORM */
|
||||
{
|
||||
.get_energy_uj = get_energy_counter,
|
||||
.get_max_energy_range_uj = get_max_energy_counter,
|
||||
.release = release_zone,
|
||||
.set_enable = set_domain_enable,
|
||||
.get_enable = get_domain_enable,
|
||||
},
|
||||
};
|
||||
|
||||
static int set_power_limit(struct powercap_zone *power_zone, int id,
|
||||
@ -1160,6 +1174,13 @@ static int rapl_unregister_powercap(void)
|
||||
powercap_unregister_zone(control_type,
|
||||
&rd_package->power_zone);
|
||||
}
|
||||
|
||||
if (platform_rapl_domain) {
|
||||
powercap_unregister_zone(control_type,
|
||||
&platform_rapl_domain->power_zone);
|
||||
kfree(platform_rapl_domain);
|
||||
}
|
||||
|
||||
powercap_unregister_control_type(control_type);
|
||||
|
||||
return 0;
|
||||
@ -1239,6 +1260,47 @@ static int rapl_package_register_powercap(struct rapl_package *rp)
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int rapl_register_psys(void)
|
||||
{
|
||||
struct rapl_domain *rd;
|
||||
struct powercap_zone *power_zone;
|
||||
u64 val;
|
||||
|
||||
if (rdmsrl_safe_on_cpu(0, MSR_PLATFORM_ENERGY_STATUS, &val) || !val)
|
||||
return -ENODEV;
|
||||
|
||||
if (rdmsrl_safe_on_cpu(0, MSR_PLATFORM_POWER_LIMIT, &val) || !val)
|
||||
return -ENODEV;
|
||||
|
||||
rd = kzalloc(sizeof(*rd), GFP_KERNEL);
|
||||
if (!rd)
|
||||
return -ENOMEM;
|
||||
|
||||
rd->name = rapl_domain_names[RAPL_DOMAIN_PLATFORM];
|
||||
rd->id = RAPL_DOMAIN_PLATFORM;
|
||||
rd->msrs[0] = MSR_PLATFORM_POWER_LIMIT;
|
||||
rd->msrs[1] = MSR_PLATFORM_ENERGY_STATUS;
|
||||
rd->rpl[0].prim_id = PL1_ENABLE;
|
||||
rd->rpl[0].name = pl1_name;
|
||||
rd->rpl[1].prim_id = PL2_ENABLE;
|
||||
rd->rpl[1].name = pl2_name;
|
||||
rd->rp = find_package_by_id(0);
|
||||
|
||||
power_zone = powercap_register_zone(&rd->power_zone, control_type,
|
||||
"psys", NULL,
|
||||
&zone_ops[RAPL_DOMAIN_PLATFORM],
|
||||
2, &constraint_ops);
|
||||
|
||||
if (IS_ERR(power_zone)) {
|
||||
kfree(rd);
|
||||
return PTR_ERR(power_zone);
|
||||
}
|
||||
|
||||
platform_rapl_domain = rd;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int rapl_register_powercap(void)
|
||||
{
|
||||
struct rapl_domain *rd;
|
||||
@ -1255,6 +1317,10 @@ static int rapl_register_powercap(void)
|
||||
list_for_each_entry(rp, &rapl_packages, plist)
|
||||
if (rapl_package_register_powercap(rp))
|
||||
goto err_cleanup_package;
|
||||
|
||||
/* Don't bail out if PSys is not supported */
|
||||
rapl_register_psys();
|
||||
|
||||
return ret;
|
||||
|
||||
err_cleanup_package:
|
||||
@ -1289,6 +1355,9 @@ static int rapl_check_domain(int cpu, int domain)
|
||||
case RAPL_DOMAIN_DRAM:
|
||||
msr = MSR_DRAM_ENERGY_STATUS;
|
||||
break;
|
||||
case RAPL_DOMAIN_PLATFORM:
|
||||
/* PSYS(PLATFORM) is not a CPU domain, so avoid printng error */
|
||||
return -EINVAL;
|
||||
default:
|
||||
pr_err("invalid domain id %d\n", domain);
|
||||
return -EINVAL;
|
||||
|
@ -58,7 +58,7 @@ struct perf_guest_info_callbacks {
|
||||
|
||||
struct perf_callchain_entry {
|
||||
__u64 nr;
|
||||
__u64 ip[PERF_MAX_STACK_DEPTH];
|
||||
__u64 ip[0]; /* /proc/sys/kernel/perf_event_max_stack */
|
||||
};
|
||||
|
||||
struct perf_raw_record {
|
||||
@ -151,6 +151,15 @@ struct hw_perf_event {
|
||||
*/
|
||||
struct task_struct *target;
|
||||
|
||||
/*
|
||||
* PMU would store hardware filter configuration
|
||||
* here.
|
||||
*/
|
||||
void *addr_filters;
|
||||
|
||||
/* Last sync'ed generation of filters */
|
||||
unsigned long addr_filters_gen;
|
||||
|
||||
/*
|
||||
* hw_perf_event::state flags; used to track the PERF_EF_* state.
|
||||
*/
|
||||
@ -216,6 +225,7 @@ struct perf_event;
|
||||
#define PERF_PMU_CAP_AUX_SW_DOUBLEBUF 0x08
|
||||
#define PERF_PMU_CAP_EXCLUSIVE 0x10
|
||||
#define PERF_PMU_CAP_ITRACE 0x20
|
||||
#define PERF_PMU_CAP_HETEROGENEOUS_CPUS 0x40
|
||||
|
||||
/**
|
||||
* struct pmu - generic performance monitoring unit
|
||||
@ -240,6 +250,9 @@ struct pmu {
|
||||
int task_ctx_nr;
|
||||
int hrtimer_interval_ms;
|
||||
|
||||
/* number of address filters this PMU can do */
|
||||
unsigned int nr_addr_filters;
|
||||
|
||||
/*
|
||||
* Fully disable/enable this PMU, can be used to protect from the PMI
|
||||
* as well as for lazy/batch writing of the MSRs.
|
||||
@ -392,12 +405,71 @@ struct pmu {
|
||||
*/
|
||||
void (*free_aux) (void *aux); /* optional */
|
||||
|
||||
/*
|
||||
* Validate address range filters: make sure the HW supports the
|
||||
* requested configuration and number of filters; return 0 if the
|
||||
* supplied filters are valid, -errno otherwise.
|
||||
*
|
||||
* Runs in the context of the ioctl()ing process and is not serialized
|
||||
* with the rest of the PMU callbacks.
|
||||
*/
|
||||
int (*addr_filters_validate) (struct list_head *filters);
|
||||
/* optional */
|
||||
|
||||
/*
|
||||
* Synchronize address range filter configuration:
|
||||
* translate hw-agnostic filters into hardware configuration in
|
||||
* event::hw::addr_filters.
|
||||
*
|
||||
* Runs as a part of filter sync sequence that is done in ->start()
|
||||
* callback by calling perf_event_addr_filters_sync().
|
||||
*
|
||||
* May (and should) traverse event::addr_filters::list, for which its
|
||||
* caller provides necessary serialization.
|
||||
*/
|
||||
void (*addr_filters_sync) (struct perf_event *event);
|
||||
/* optional */
|
||||
|
||||
/*
|
||||
* Filter events for PMU-specific reasons.
|
||||
*/
|
||||
int (*filter_match) (struct perf_event *event); /* optional */
|
||||
};
|
||||
|
||||
/**
|
||||
* struct perf_addr_filter - address range filter definition
|
||||
* @entry: event's filter list linkage
|
||||
* @inode: object file's inode for file-based filters
|
||||
* @offset: filter range offset
|
||||
* @size: filter range size
|
||||
* @range: 1: range, 0: address
|
||||
* @filter: 1: filter/start, 0: stop
|
||||
*
|
||||
* This is a hardware-agnostic filter configuration as specified by the user.
|
||||
*/
|
||||
struct perf_addr_filter {
|
||||
struct list_head entry;
|
||||
struct inode *inode;
|
||||
unsigned long offset;
|
||||
unsigned long size;
|
||||
unsigned int range : 1,
|
||||
filter : 1;
|
||||
};
|
||||
|
||||
/**
|
||||
* struct perf_addr_filters_head - container for address range filters
|
||||
* @list: list of filters for this event
|
||||
* @lock: spinlock that serializes accesses to the @list and event's
|
||||
* (and its children's) filter generations.
|
||||
*
|
||||
* A child event will use parent's @list (and therefore @lock), so they are
|
||||
* bundled together; see perf_event_addr_filters().
|
||||
*/
|
||||
struct perf_addr_filters_head {
|
||||
struct list_head list;
|
||||
raw_spinlock_t lock;
|
||||
};
|
||||
|
||||
/**
|
||||
* enum perf_event_active_state - the states of a event
|
||||
*/
|
||||
@ -566,6 +638,12 @@ struct perf_event {
|
||||
|
||||
atomic_t event_limit;
|
||||
|
||||
/* address range filters */
|
||||
struct perf_addr_filters_head addr_filters;
|
||||
/* vma address array for file-based filders */
|
||||
unsigned long *addr_filters_offs;
|
||||
unsigned long addr_filters_gen;
|
||||
|
||||
void (*destroy)(struct perf_event *);
|
||||
struct rcu_head rcu_head;
|
||||
|
||||
@ -834,9 +912,25 @@ extern int perf_event_overflow(struct perf_event *event,
|
||||
struct perf_sample_data *data,
|
||||
struct pt_regs *regs);
|
||||
|
||||
extern void perf_event_output_forward(struct perf_event *event,
|
||||
struct perf_sample_data *data,
|
||||
struct pt_regs *regs);
|
||||
extern void perf_event_output_backward(struct perf_event *event,
|
||||
struct perf_sample_data *data,
|
||||
struct pt_regs *regs);
|
||||
extern void perf_event_output(struct perf_event *event,
|
||||
struct perf_sample_data *data,
|
||||
struct pt_regs *regs);
|
||||
struct perf_sample_data *data,
|
||||
struct pt_regs *regs);
|
||||
|
||||
static inline bool
|
||||
is_default_overflow_handler(struct perf_event *event)
|
||||
{
|
||||
if (likely(event->overflow_handler == perf_event_output_forward))
|
||||
return true;
|
||||
if (unlikely(event->overflow_handler == perf_event_output_backward))
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
extern void
|
||||
perf_event_header__init_id(struct perf_event_header *header,
|
||||
@ -977,9 +1071,11 @@ get_perf_callchain(struct pt_regs *regs, u32 init_nr, bool kernel, bool user,
|
||||
extern int get_callchain_buffers(void);
|
||||
extern void put_callchain_buffers(void);
|
||||
|
||||
extern int sysctl_perf_event_max_stack;
|
||||
|
||||
static inline int perf_callchain_store(struct perf_callchain_entry *entry, u64 ip)
|
||||
{
|
||||
if (entry->nr < PERF_MAX_STACK_DEPTH) {
|
||||
if (entry->nr < sysctl_perf_event_max_stack) {
|
||||
entry->ip[entry->nr++] = ip;
|
||||
return 0;
|
||||
} else {
|
||||
@ -1001,6 +1097,8 @@ extern int perf_cpu_time_max_percent_handler(struct ctl_table *table, int write,
|
||||
void __user *buffer, size_t *lenp,
|
||||
loff_t *ppos);
|
||||
|
||||
int perf_event_max_stack_handler(struct ctl_table *table, int write,
|
||||
void __user *buffer, size_t *lenp, loff_t *ppos);
|
||||
|
||||
static inline bool perf_paranoid_tracepoint_raw(void)
|
||||
{
|
||||
@ -1045,8 +1143,41 @@ static inline bool has_aux(struct perf_event *event)
|
||||
return event->pmu->setup_aux;
|
||||
}
|
||||
|
||||
static inline bool is_write_backward(struct perf_event *event)
|
||||
{
|
||||
return !!event->attr.write_backward;
|
||||
}
|
||||
|
||||
static inline bool has_addr_filter(struct perf_event *event)
|
||||
{
|
||||
return event->pmu->nr_addr_filters;
|
||||
}
|
||||
|
||||
/*
|
||||
* An inherited event uses parent's filters
|
||||
*/
|
||||
static inline struct perf_addr_filters_head *
|
||||
perf_event_addr_filters(struct perf_event *event)
|
||||
{
|
||||
struct perf_addr_filters_head *ifh = &event->addr_filters;
|
||||
|
||||
if (event->parent)
|
||||
ifh = &event->parent->addr_filters;
|
||||
|
||||
return ifh;
|
||||
}
|
||||
|
||||
extern void perf_event_addr_filters_sync(struct perf_event *event);
|
||||
|
||||
extern int perf_output_begin(struct perf_output_handle *handle,
|
||||
struct perf_event *event, unsigned int size);
|
||||
extern int perf_output_begin_forward(struct perf_output_handle *handle,
|
||||
struct perf_event *event,
|
||||
unsigned int size);
|
||||
extern int perf_output_begin_backward(struct perf_output_handle *handle,
|
||||
struct perf_event *event,
|
||||
unsigned int size);
|
||||
|
||||
extern void perf_output_end(struct perf_output_handle *handle);
|
||||
extern unsigned int perf_output_copy(struct perf_output_handle *handle,
|
||||
const void *buf, unsigned int len);
|
||||
|
@ -340,7 +340,8 @@ struct perf_event_attr {
|
||||
comm_exec : 1, /* flag comm events that are due to an exec */
|
||||
use_clockid : 1, /* use @clockid for time fields */
|
||||
context_switch : 1, /* context switch data */
|
||||
__reserved_1 : 37;
|
||||
write_backward : 1, /* Write ring buffer from end to beginning */
|
||||
__reserved_1 : 36;
|
||||
|
||||
union {
|
||||
__u32 wakeup_events; /* wakeup every n events */
|
||||
@ -401,6 +402,7 @@ struct perf_event_attr {
|
||||
#define PERF_EVENT_IOC_SET_FILTER _IOW('$', 6, char *)
|
||||
#define PERF_EVENT_IOC_ID _IOR('$', 7, __u64 *)
|
||||
#define PERF_EVENT_IOC_SET_BPF _IOW('$', 8, __u32)
|
||||
#define PERF_EVENT_IOC_PAUSE_OUTPUT _IOW('$', 9, __u32)
|
||||
|
||||
enum perf_event_ioc_flags {
|
||||
PERF_IOC_FLAG_GROUP = 1U << 0,
|
||||
|
@ -66,7 +66,7 @@ static struct bpf_map *stack_map_alloc(union bpf_attr *attr)
|
||||
/* check sanity of attributes */
|
||||
if (attr->max_entries == 0 || attr->key_size != 4 ||
|
||||
value_size < 8 || value_size % 8 ||
|
||||
value_size / 8 > PERF_MAX_STACK_DEPTH)
|
||||
value_size / 8 > sysctl_perf_event_max_stack)
|
||||
return ERR_PTR(-EINVAL);
|
||||
|
||||
/* hash table size must be power of 2 */
|
||||
@ -124,8 +124,8 @@ static u64 bpf_get_stackid(u64 r1, u64 r2, u64 flags, u64 r4, u64 r5)
|
||||
struct perf_callchain_entry *trace;
|
||||
struct stack_map_bucket *bucket, *new_bucket, *old_bucket;
|
||||
u32 max_depth = map->value_size / 8;
|
||||
/* stack_map_alloc() checks that max_depth <= PERF_MAX_STACK_DEPTH */
|
||||
u32 init_nr = PERF_MAX_STACK_DEPTH - max_depth;
|
||||
/* stack_map_alloc() checks that max_depth <= sysctl_perf_event_max_stack */
|
||||
u32 init_nr = sysctl_perf_event_max_stack - max_depth;
|
||||
u32 skip = flags & BPF_F_SKIP_FIELD_MASK;
|
||||
u32 hash, id, trace_nr, trace_len;
|
||||
bool user = flags & BPF_F_USER_STACK;
|
||||
@ -143,7 +143,7 @@ static u64 bpf_get_stackid(u64 r1, u64 r2, u64 flags, u64 r4, u64 r5)
|
||||
return -EFAULT;
|
||||
|
||||
/* get_perf_callchain() guarantees that trace->nr >= init_nr
|
||||
* and trace-nr <= PERF_MAX_STACK_DEPTH, so trace_nr <= max_depth
|
||||
* and trace-nr <= sysctl_perf_event_max_stack, so trace_nr <= max_depth
|
||||
*/
|
||||
trace_nr = trace->nr - init_nr;
|
||||
|
||||
|
@ -18,6 +18,14 @@ struct callchain_cpus_entries {
|
||||
struct perf_callchain_entry *cpu_entries[0];
|
||||
};
|
||||
|
||||
int sysctl_perf_event_max_stack __read_mostly = PERF_MAX_STACK_DEPTH;
|
||||
|
||||
static inline size_t perf_callchain_entry__sizeof(void)
|
||||
{
|
||||
return (sizeof(struct perf_callchain_entry) +
|
||||
sizeof(__u64) * sysctl_perf_event_max_stack);
|
||||
}
|
||||
|
||||
static DEFINE_PER_CPU(int, callchain_recursion[PERF_NR_CONTEXTS]);
|
||||
static atomic_t nr_callchain_events;
|
||||
static DEFINE_MUTEX(callchain_mutex);
|
||||
@ -73,7 +81,7 @@ static int alloc_callchain_buffers(void)
|
||||
if (!entries)
|
||||
return -ENOMEM;
|
||||
|
||||
size = sizeof(struct perf_callchain_entry) * PERF_NR_CONTEXTS;
|
||||
size = perf_callchain_entry__sizeof() * PERF_NR_CONTEXTS;
|
||||
|
||||
for_each_possible_cpu(cpu) {
|
||||
entries->cpu_entries[cpu] = kmalloc_node(size, GFP_KERNEL,
|
||||
@ -147,7 +155,8 @@ static struct perf_callchain_entry *get_callchain_entry(int *rctx)
|
||||
|
||||
cpu = smp_processor_id();
|
||||
|
||||
return &entries->cpu_entries[cpu][*rctx];
|
||||
return (((void *)entries->cpu_entries[cpu]) +
|
||||
(*rctx * perf_callchain_entry__sizeof()));
|
||||
}
|
||||
|
||||
static void
|
||||
@ -215,3 +224,25 @@ get_perf_callchain(struct pt_regs *regs, u32 init_nr, bool kernel, bool user,
|
||||
|
||||
return entry;
|
||||
}
|
||||
|
||||
int perf_event_max_stack_handler(struct ctl_table *table, int write,
|
||||
void __user *buffer, size_t *lenp, loff_t *ppos)
|
||||
{
|
||||
int new_value = sysctl_perf_event_max_stack, ret;
|
||||
struct ctl_table new_table = *table;
|
||||
|
||||
new_table.data = &new_value;
|
||||
ret = proc_dointvec_minmax(&new_table, write, buffer, lenp, ppos);
|
||||
if (ret || !write)
|
||||
return ret;
|
||||
|
||||
mutex_lock(&callchain_mutex);
|
||||
if (atomic_read(&nr_callchain_events))
|
||||
ret = -EBUSY;
|
||||
else
|
||||
sysctl_perf_event_max_stack = new_value;
|
||||
|
||||
mutex_unlock(&callchain_mutex);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -11,13 +11,13 @@
|
||||
struct ring_buffer {
|
||||
atomic_t refcount;
|
||||
struct rcu_head rcu_head;
|
||||
struct irq_work irq_work;
|
||||
#ifdef CONFIG_PERF_USE_VMALLOC
|
||||
struct work_struct work;
|
||||
int page_order; /* allocation order */
|
||||
#endif
|
||||
int nr_pages; /* nr of data pages */
|
||||
int overwrite; /* can overwrite itself */
|
||||
int paused; /* can write into ring buffer */
|
||||
|
||||
atomic_t poll; /* POLL_ for wakeups */
|
||||
|
||||
@ -65,6 +65,14 @@ static inline void rb_free_rcu(struct rcu_head *rcu_head)
|
||||
rb_free(rb);
|
||||
}
|
||||
|
||||
static inline void rb_toggle_paused(struct ring_buffer *rb, bool pause)
|
||||
{
|
||||
if (!pause && rb->nr_pages)
|
||||
rb->paused = 0;
|
||||
else
|
||||
rb->paused = 1;
|
||||
}
|
||||
|
||||
extern struct ring_buffer *
|
||||
rb_alloc(int nr_pages, long watermark, int cpu, int flags);
|
||||
extern void perf_event_wakeup(struct perf_event *event);
|
||||
|
@ -102,8 +102,21 @@ static void perf_output_put_handle(struct perf_output_handle *handle)
|
||||
preempt_enable();
|
||||
}
|
||||
|
||||
int perf_output_begin(struct perf_output_handle *handle,
|
||||
struct perf_event *event, unsigned int size)
|
||||
static bool __always_inline
|
||||
ring_buffer_has_space(unsigned long head, unsigned long tail,
|
||||
unsigned long data_size, unsigned int size,
|
||||
bool backward)
|
||||
{
|
||||
if (!backward)
|
||||
return CIRC_SPACE(head, tail, data_size) >= size;
|
||||
else
|
||||
return CIRC_SPACE(tail, head, data_size) >= size;
|
||||
}
|
||||
|
||||
static int __always_inline
|
||||
__perf_output_begin(struct perf_output_handle *handle,
|
||||
struct perf_event *event, unsigned int size,
|
||||
bool backward)
|
||||
{
|
||||
struct ring_buffer *rb;
|
||||
unsigned long tail, offset, head;
|
||||
@ -125,8 +138,11 @@ int perf_output_begin(struct perf_output_handle *handle,
|
||||
if (unlikely(!rb))
|
||||
goto out;
|
||||
|
||||
if (unlikely(!rb->nr_pages))
|
||||
if (unlikely(rb->paused)) {
|
||||
if (rb->nr_pages)
|
||||
local_inc(&rb->lost);
|
||||
goto out;
|
||||
}
|
||||
|
||||
handle->rb = rb;
|
||||
handle->event = event;
|
||||
@ -143,9 +159,12 @@ int perf_output_begin(struct perf_output_handle *handle,
|
||||
do {
|
||||
tail = READ_ONCE(rb->user_page->data_tail);
|
||||
offset = head = local_read(&rb->head);
|
||||
if (!rb->overwrite &&
|
||||
unlikely(CIRC_SPACE(head, tail, perf_data_size(rb)) < size))
|
||||
goto fail;
|
||||
if (!rb->overwrite) {
|
||||
if (unlikely(!ring_buffer_has_space(head, tail,
|
||||
perf_data_size(rb),
|
||||
size, backward)))
|
||||
goto fail;
|
||||
}
|
||||
|
||||
/*
|
||||
* The above forms a control dependency barrier separating the
|
||||
@ -159,9 +178,17 @@ int perf_output_begin(struct perf_output_handle *handle,
|
||||
* See perf_output_put_handle().
|
||||
*/
|
||||
|
||||
head += size;
|
||||
if (!backward)
|
||||
head += size;
|
||||
else
|
||||
head -= size;
|
||||
} while (local_cmpxchg(&rb->head, offset, head) != offset);
|
||||
|
||||
if (backward) {
|
||||
offset = head;
|
||||
head = (u64)(-head);
|
||||
}
|
||||
|
||||
/*
|
||||
* We rely on the implied barrier() by local_cmpxchg() to ensure
|
||||
* none of the data stores below can be lifted up by the compiler.
|
||||
@ -203,6 +230,26 @@ int perf_output_begin(struct perf_output_handle *handle,
|
||||
return -ENOSPC;
|
||||
}
|
||||
|
||||
int perf_output_begin_forward(struct perf_output_handle *handle,
|
||||
struct perf_event *event, unsigned int size)
|
||||
{
|
||||
return __perf_output_begin(handle, event, size, false);
|
||||
}
|
||||
|
||||
int perf_output_begin_backward(struct perf_output_handle *handle,
|
||||
struct perf_event *event, unsigned int size)
|
||||
{
|
||||
return __perf_output_begin(handle, event, size, true);
|
||||
}
|
||||
|
||||
int perf_output_begin(struct perf_output_handle *handle,
|
||||
struct perf_event *event, unsigned int size)
|
||||
{
|
||||
|
||||
return __perf_output_begin(handle, event, size,
|
||||
unlikely(is_write_backward(event)));
|
||||
}
|
||||
|
||||
unsigned int perf_output_copy(struct perf_output_handle *handle,
|
||||
const void *buf, unsigned int len)
|
||||
{
|
||||
@ -221,8 +268,6 @@ void perf_output_end(struct perf_output_handle *handle)
|
||||
rcu_read_unlock();
|
||||
}
|
||||
|
||||
static void rb_irq_work(struct irq_work *work);
|
||||
|
||||
static void
|
||||
ring_buffer_init(struct ring_buffer *rb, long watermark, int flags)
|
||||
{
|
||||
@ -243,16 +288,13 @@ ring_buffer_init(struct ring_buffer *rb, long watermark, int flags)
|
||||
|
||||
INIT_LIST_HEAD(&rb->event_list);
|
||||
spin_lock_init(&rb->event_lock);
|
||||
init_irq_work(&rb->irq_work, rb_irq_work);
|
||||
}
|
||||
|
||||
static void ring_buffer_put_async(struct ring_buffer *rb)
|
||||
{
|
||||
if (!atomic_dec_and_test(&rb->refcount))
|
||||
return;
|
||||
|
||||
rb->rcu_head.next = (void *)rb;
|
||||
irq_work_queue(&rb->irq_work);
|
||||
/*
|
||||
* perf_output_begin() only checks rb->paused, therefore
|
||||
* rb->paused must be true if we have no pages for output.
|
||||
*/
|
||||
if (!rb->nr_pages)
|
||||
rb->paused = 1;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -264,6 +306,10 @@ static void ring_buffer_put_async(struct ring_buffer *rb)
|
||||
* The ordering is similar to that of perf_output_{begin,end}, with
|
||||
* the exception of (B), which should be taken care of by the pmu
|
||||
* driver, since ordering rules will differ depending on hardware.
|
||||
*
|
||||
* Call this from pmu::start(); see the comment in perf_aux_output_end()
|
||||
* about its use in pmu callbacks. Both can also be called from the PMI
|
||||
* handler if needed.
|
||||
*/
|
||||
void *perf_aux_output_begin(struct perf_output_handle *handle,
|
||||
struct perf_event *event)
|
||||
@ -287,6 +333,13 @@ void *perf_aux_output_begin(struct perf_output_handle *handle,
|
||||
if (!rb_has_aux(rb) || !atomic_inc_not_zero(&rb->aux_refcount))
|
||||
goto err;
|
||||
|
||||
/*
|
||||
* If rb::aux_mmap_count is zero (and rb_has_aux() above went through),
|
||||
* the aux buffer is in perf_mmap_close(), about to get freed.
|
||||
*/
|
||||
if (!atomic_read(&rb->aux_mmap_count))
|
||||
goto err_put;
|
||||
|
||||
/*
|
||||
* Nesting is not supported for AUX area, make sure nested
|
||||
* writers are caught early
|
||||
@ -328,10 +381,11 @@ void *perf_aux_output_begin(struct perf_output_handle *handle,
|
||||
return handle->rb->aux_priv;
|
||||
|
||||
err_put:
|
||||
/* can't be last */
|
||||
rb_free_aux(rb);
|
||||
|
||||
err:
|
||||
ring_buffer_put_async(rb);
|
||||
ring_buffer_put(rb);
|
||||
handle->event = NULL;
|
||||
|
||||
return NULL;
|
||||
@ -342,6 +396,10 @@ void *perf_aux_output_begin(struct perf_output_handle *handle,
|
||||
* aux_head and posting a PERF_RECORD_AUX into the perf buffer. It is the
|
||||
* pmu driver's responsibility to observe ordering rules of the hardware,
|
||||
* so that all the data is externally visible before this is called.
|
||||
*
|
||||
* Note: this has to be called from pmu::stop() callback, as the assumption
|
||||
* of the AUX buffer management code is that after pmu::stop(), the AUX
|
||||
* transaction must be stopped and therefore drop the AUX reference count.
|
||||
*/
|
||||
void perf_aux_output_end(struct perf_output_handle *handle, unsigned long size,
|
||||
bool truncated)
|
||||
@ -389,8 +447,9 @@ void perf_aux_output_end(struct perf_output_handle *handle, unsigned long size,
|
||||
handle->event = NULL;
|
||||
|
||||
local_set(&rb->aux_nest, 0);
|
||||
/* can't be last */
|
||||
rb_free_aux(rb);
|
||||
ring_buffer_put_async(rb);
|
||||
ring_buffer_put(rb);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -471,6 +530,14 @@ static void __rb_free_aux(struct ring_buffer *rb)
|
||||
{
|
||||
int pg;
|
||||
|
||||
/*
|
||||
* Should never happen, the last reference should be dropped from
|
||||
* perf_mmap_close() path, which first stops aux transactions (which
|
||||
* in turn are the atomic holders of aux_refcount) and then does the
|
||||
* last rb_free_aux().
|
||||
*/
|
||||
WARN_ON_ONCE(in_atomic());
|
||||
|
||||
if (rb->aux_priv) {
|
||||
rb->free_aux(rb->aux_priv);
|
||||
rb->free_aux = NULL;
|
||||
@ -582,18 +649,7 @@ int rb_alloc_aux(struct ring_buffer *rb, struct perf_event *event,
|
||||
void rb_free_aux(struct ring_buffer *rb)
|
||||
{
|
||||
if (atomic_dec_and_test(&rb->aux_refcount))
|
||||
irq_work_queue(&rb->irq_work);
|
||||
}
|
||||
|
||||
static void rb_irq_work(struct irq_work *work)
|
||||
{
|
||||
struct ring_buffer *rb = container_of(work, struct ring_buffer, irq_work);
|
||||
|
||||
if (!atomic_read(&rb->aux_refcount))
|
||||
__rb_free_aux(rb);
|
||||
|
||||
if (rb->rcu_head.next == (void *)rb)
|
||||
call_rcu(&rb->rcu_head, rb_free_rcu);
|
||||
}
|
||||
|
||||
#ifndef CONFIG_PERF_USE_VMALLOC
|
||||
|
@ -130,6 +130,9 @@ static int one_thousand = 1000;
|
||||
#ifdef CONFIG_PRINTK
|
||||
static int ten_thousand = 10000;
|
||||
#endif
|
||||
#ifdef CONFIG_PERF_EVENTS
|
||||
static int six_hundred_forty_kb = 640 * 1024;
|
||||
#endif
|
||||
|
||||
/* this is needed for the proc_doulongvec_minmax of vm_dirty_bytes */
|
||||
static unsigned long dirty_bytes_min = 2 * PAGE_SIZE;
|
||||
@ -1144,6 +1147,15 @@ static struct ctl_table kern_table[] = {
|
||||
.extra1 = &zero,
|
||||
.extra2 = &one_hundred,
|
||||
},
|
||||
{
|
||||
.procname = "perf_event_max_stack",
|
||||
.data = NULL, /* filled in by handler */
|
||||
.maxlen = sizeof(sysctl_perf_event_max_stack),
|
||||
.mode = 0644,
|
||||
.proc_handler = perf_event_max_stack_handler,
|
||||
.extra1 = &zero,
|
||||
.extra2 = &six_hundred_forty_kb,
|
||||
},
|
||||
#endif
|
||||
#ifdef CONFIG_KMEMCHECK
|
||||
{
|
||||
|
@ -47,6 +47,9 @@ static int perf_trace_event_perm(struct trace_event_call *tp_event,
|
||||
if (perf_paranoid_tracepoint_raw() && !capable(CAP_SYS_ADMIN))
|
||||
return -EPERM;
|
||||
|
||||
if (!is_sampling_event(p_event))
|
||||
return 0;
|
||||
|
||||
/*
|
||||
* We don't allow user space callchains for function trace
|
||||
* event, due to issues with page faults while tracing page
|
||||
|
@ -137,7 +137,8 @@ libsubcmd_clean:
|
||||
$(call descend,lib/subcmd,clean)
|
||||
|
||||
perf_clean:
|
||||
$(call descend,$(@:_clean=),clean)
|
||||
$(Q)mkdir -p $(PERF_O) .
|
||||
$(Q)$(MAKE) --no-print-directory -C perf O=$(PERF_O) subdir= clean
|
||||
|
||||
selftests_clean:
|
||||
$(call descend,testing/$(@:_clean=),clean)
|
||||
|
@ -49,6 +49,10 @@ FEATURE_TESTS_BASIC := \
|
||||
libslang \
|
||||
libcrypto \
|
||||
libunwind \
|
||||
libunwind-x86 \
|
||||
libunwind-x86_64 \
|
||||
libunwind-arm \
|
||||
libunwind-aarch64 \
|
||||
pthread-attr-setaffinity-np \
|
||||
stackprotector-all \
|
||||
timerfd \
|
||||
@ -69,7 +73,9 @@ FEATURE_TESTS_EXTRA := \
|
||||
libbabeltrace \
|
||||
liberty \
|
||||
liberty-z \
|
||||
libunwind-debug-frame
|
||||
libunwind-debug-frame \
|
||||
libunwind-debug-frame-arm \
|
||||
libunwind-debug-frame-aarch64
|
||||
|
||||
FEATURE_TESTS ?= $(FEATURE_TESTS_BASIC)
|
||||
|
||||
|
@ -27,6 +27,12 @@ FILES= \
|
||||
test-libcrypto.bin \
|
||||
test-libunwind.bin \
|
||||
test-libunwind-debug-frame.bin \
|
||||
test-libunwind-x86.bin \
|
||||
test-libunwind-x86_64.bin \
|
||||
test-libunwind-arm.bin \
|
||||
test-libunwind-aarch64.bin \
|
||||
test-libunwind-debug-frame-arm.bin \
|
||||
test-libunwind-debug-frame-aarch64.bin \
|
||||
test-pthread-attr-setaffinity-np.bin \
|
||||
test-stackprotector-all.bin \
|
||||
test-timerfd.bin \
|
||||
@ -103,6 +109,23 @@ $(OUTPUT)test-libunwind.bin:
|
||||
|
||||
$(OUTPUT)test-libunwind-debug-frame.bin:
|
||||
$(BUILD) -lelf
|
||||
$(OUTPUT)test-libunwind-x86.bin:
|
||||
$(BUILD) -lelf -lunwind-x86
|
||||
|
||||
$(OUTPUT)test-libunwind-x86_64.bin:
|
||||
$(BUILD) -lelf -lunwind-x86_64
|
||||
|
||||
$(OUTPUT)test-libunwind-arm.bin:
|
||||
$(BUILD) -lelf -lunwind-arm
|
||||
|
||||
$(OUTPUT)test-libunwind-aarch64.bin:
|
||||
$(BUILD) -lelf -lunwind-aarch64
|
||||
|
||||
$(OUTPUT)test-libunwind-debug-frame-arm.bin:
|
||||
$(BUILD) -lelf -lunwind-arm
|
||||
|
||||
$(OUTPUT)test-libunwind-debug-frame-aarch64.bin:
|
||||
$(BUILD) -lelf -lunwind-aarch64
|
||||
|
||||
$(OUTPUT)test-libaudit.bin:
|
||||
$(BUILD) -laudit
|
||||
|
@ -27,10 +27,9 @@ int main(void)
|
||||
attr.log_level = 0;
|
||||
attr.kern_version = 0;
|
||||
|
||||
attr = attr;
|
||||
/*
|
||||
* Test existence of __NR_bpf and BPF_PROG_LOAD.
|
||||
* This call should fail if we run the testcase.
|
||||
*/
|
||||
return syscall(__NR_bpf, BPF_PROG_LOAD, attr, sizeof(attr));
|
||||
return syscall(__NR_bpf, BPF_PROG_LOAD, &attr, sizeof(attr));
|
||||
}
|
||||
|
26
tools/build/feature/test-libunwind-aarch64.c
Normal file
26
tools/build/feature/test-libunwind-aarch64.c
Normal file
@ -0,0 +1,26 @@
|
||||
#include <libunwind-aarch64.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
extern int UNW_OBJ(dwarf_search_unwind_table) (unw_addr_space_t as,
|
||||
unw_word_t ip,
|
||||
unw_dyn_info_t *di,
|
||||
unw_proc_info_t *pi,
|
||||
int need_unwind_info, void *arg);
|
||||
|
||||
#define dwarf_search_unwind_table UNW_OBJ(dwarf_search_unwind_table)
|
||||
|
||||
static unw_accessors_t accessors;
|
||||
|
||||
int main(void)
|
||||
{
|
||||
unw_addr_space_t addr_space;
|
||||
|
||||
addr_space = unw_create_addr_space(&accessors, 0);
|
||||
if (addr_space)
|
||||
return 0;
|
||||
|
||||
unw_init_remote(NULL, addr_space, NULL);
|
||||
dwarf_search_unwind_table(addr_space, 0, NULL, NULL, 0, NULL);
|
||||
|
||||
return 0;
|
||||
}
|
27
tools/build/feature/test-libunwind-arm.c
Normal file
27
tools/build/feature/test-libunwind-arm.c
Normal file
@ -0,0 +1,27 @@
|
||||
#include <libunwind-arm.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
extern int UNW_OBJ(dwarf_search_unwind_table) (unw_addr_space_t as,
|
||||
unw_word_t ip,
|
||||
unw_dyn_info_t *di,
|
||||
unw_proc_info_t *pi,
|
||||
int need_unwind_info, void *arg);
|
||||
|
||||
|
||||
#define dwarf_search_unwind_table UNW_OBJ(dwarf_search_unwind_table)
|
||||
|
||||
static unw_accessors_t accessors;
|
||||
|
||||
int main(void)
|
||||
{
|
||||
unw_addr_space_t addr_space;
|
||||
|
||||
addr_space = unw_create_addr_space(&accessors, 0);
|
||||
if (addr_space)
|
||||
return 0;
|
||||
|
||||
unw_init_remote(NULL, addr_space, NULL);
|
||||
dwarf_search_unwind_table(addr_space, 0, NULL, NULL, 0, NULL);
|
||||
|
||||
return 0;
|
||||
}
|
16
tools/build/feature/test-libunwind-debug-frame-aarch64.c
Normal file
16
tools/build/feature/test-libunwind-debug-frame-aarch64.c
Normal file
@ -0,0 +1,16 @@
|
||||
#include <libunwind-aarch64.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
extern int
|
||||
UNW_OBJ(dwarf_find_debug_frame) (int found, unw_dyn_info_t *di_debug,
|
||||
unw_word_t ip, unw_word_t segbase,
|
||||
const char *obj_name, unw_word_t start,
|
||||
unw_word_t end);
|
||||
|
||||
#define dwarf_find_debug_frame UNW_OBJ(dwarf_find_debug_frame)
|
||||
|
||||
int main(void)
|
||||
{
|
||||
dwarf_find_debug_frame(0, NULL, 0, 0, NULL, 0, 0);
|
||||
return 0;
|
||||
}
|
16
tools/build/feature/test-libunwind-debug-frame-arm.c
Normal file
16
tools/build/feature/test-libunwind-debug-frame-arm.c
Normal file
@ -0,0 +1,16 @@
|
||||
#include <libunwind-arm.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
extern int
|
||||
UNW_OBJ(dwarf_find_debug_frame) (int found, unw_dyn_info_t *di_debug,
|
||||
unw_word_t ip, unw_word_t segbase,
|
||||
const char *obj_name, unw_word_t start,
|
||||
unw_word_t end);
|
||||
|
||||
#define dwarf_find_debug_frame UNW_OBJ(dwarf_find_debug_frame)
|
||||
|
||||
int main(void)
|
||||
{
|
||||
dwarf_find_debug_frame(0, NULL, 0, 0, NULL, 0, 0);
|
||||
return 0;
|
||||
}
|
27
tools/build/feature/test-libunwind-x86.c
Normal file
27
tools/build/feature/test-libunwind-x86.c
Normal file
@ -0,0 +1,27 @@
|
||||
#include <libunwind-x86.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
extern int UNW_OBJ(dwarf_search_unwind_table) (unw_addr_space_t as,
|
||||
unw_word_t ip,
|
||||
unw_dyn_info_t *di,
|
||||
unw_proc_info_t *pi,
|
||||
int need_unwind_info, void *arg);
|
||||
|
||||
|
||||
#define dwarf_search_unwind_table UNW_OBJ(dwarf_search_unwind_table)
|
||||
|
||||
static unw_accessors_t accessors;
|
||||
|
||||
int main(void)
|
||||
{
|
||||
unw_addr_space_t addr_space;
|
||||
|
||||
addr_space = unw_create_addr_space(&accessors, 0);
|
||||
if (addr_space)
|
||||
return 0;
|
||||
|
||||
unw_init_remote(NULL, addr_space, NULL);
|
||||
dwarf_search_unwind_table(addr_space, 0, NULL, NULL, 0, NULL);
|
||||
|
||||
return 0;
|
||||
}
|
27
tools/build/feature/test-libunwind-x86_64.c
Normal file
27
tools/build/feature/test-libunwind-x86_64.c
Normal file
@ -0,0 +1,27 @@
|
||||
#include <libunwind-x86_64.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
extern int UNW_OBJ(dwarf_search_unwind_table) (unw_addr_space_t as,
|
||||
unw_word_t ip,
|
||||
unw_dyn_info_t *di,
|
||||
unw_proc_info_t *pi,
|
||||
int need_unwind_info, void *arg);
|
||||
|
||||
|
||||
#define dwarf_search_unwind_table UNW_OBJ(dwarf_search_unwind_table)
|
||||
|
||||
static unw_accessors_t accessors;
|
||||
|
||||
int main(void)
|
||||
{
|
||||
unw_addr_space_t addr_space;
|
||||
|
||||
addr_space = unw_create_addr_space(&accessors, 0);
|
||||
if (addr_space)
|
||||
return 0;
|
||||
|
||||
unw_init_remote(NULL, addr_space, NULL);
|
||||
dwarf_search_unwind_table(addr_space, 0, NULL, NULL, 0, NULL);
|
||||
|
||||
return 0;
|
||||
}
|
@ -351,6 +351,19 @@ int filename__read_str(const char *filename, char **buf, size_t *sizep)
|
||||
return err;
|
||||
}
|
||||
|
||||
int procfs__read_str(const char *entry, char **buf, size_t *sizep)
|
||||
{
|
||||
char path[PATH_MAX];
|
||||
const char *procfs = procfs__mountpoint();
|
||||
|
||||
if (!procfs)
|
||||
return -1;
|
||||
|
||||
snprintf(path, sizeof(path), "%s/%s", procfs, entry);
|
||||
|
||||
return filename__read_str(path, buf, sizep);
|
||||
}
|
||||
|
||||
int sysfs__read_ull(const char *entry, unsigned long long *value)
|
||||
{
|
||||
char path[PATH_MAX];
|
||||
|
@ -29,6 +29,8 @@ int filename__read_int(const char *filename, int *value);
|
||||
int filename__read_ull(const char *filename, unsigned long long *value);
|
||||
int filename__read_str(const char *filename, char **buf, size_t *sizep);
|
||||
|
||||
int procfs__read_str(const char *entry, char **buf, size_t *sizep);
|
||||
|
||||
int sysctl__read_int(const char *sysctl, int *value);
|
||||
int sysfs__read_int(const char *entry, int *value);
|
||||
int sysfs__read_ull(const char *entry, unsigned long long *value);
|
||||
|
@ -672,6 +672,7 @@ The letters are:
|
||||
d create a debug log
|
||||
g synthesize a call chain (use with i or x)
|
||||
l synthesize last branch entries (use with i or x)
|
||||
s skip initial number of events
|
||||
|
||||
"Instructions" events look like they were recorded by "perf record -e
|
||||
instructions".
|
||||
@ -730,6 +731,12 @@ from one sample to the next.
|
||||
|
||||
To disable trace decoding entirely, use the option --no-itrace.
|
||||
|
||||
It is also possible to skip events generated (instructions, branches, transactions)
|
||||
at the beginning. This is useful to ignore initialization code.
|
||||
|
||||
--itrace=i0nss1000000
|
||||
|
||||
skips the first million instructions.
|
||||
|
||||
dump option
|
||||
-----------
|
||||
|
@ -7,6 +7,7 @@
|
||||
d create a debug log
|
||||
g synthesize a call chain (use with i or x)
|
||||
l synthesize last branch entries (use with i or x)
|
||||
s skip initial number of events
|
||||
|
||||
The default is all events i.e. the same as --itrace=ibxe
|
||||
|
||||
@ -24,3 +25,10 @@
|
||||
|
||||
Also the number of last branch entries (default 64, max. 1024) for
|
||||
instructions or transactions events can be specified.
|
||||
|
||||
It is also possible to skip events generated (instructions, branches, transactions)
|
||||
at the beginning. This is useful to ignore initialization code.
|
||||
|
||||
--itrace=i0nss1000000
|
||||
|
||||
skips the first million instructions.
|
||||
|
@ -33,7 +33,7 @@ OPTIONS
|
||||
|
||||
-f::
|
||||
--force::
|
||||
Don't complain, do it.
|
||||
Don't do ownership validation.
|
||||
|
||||
-v::
|
||||
--verbose::
|
||||
|
@ -75,7 +75,7 @@ OPTIONS
|
||||
|
||||
-f::
|
||||
--force::
|
||||
Don't complain, do it.
|
||||
Don't do ownership validation.
|
||||
|
||||
--symfs=<directory>::
|
||||
Look for files with symbols relative to this directory.
|
||||
|
@ -93,6 +93,67 @@ raw encoding of 0x1A8 can be used:
|
||||
You should refer to the processor specific documentation for getting these
|
||||
details. Some of them are referenced in the SEE ALSO section below.
|
||||
|
||||
ARBITRARY PMUS
|
||||
--------------
|
||||
|
||||
perf also supports an extended syntax for specifying raw parameters
|
||||
to PMUs. Using this typically requires looking up the specific event
|
||||
in the CPU vendor specific documentation.
|
||||
|
||||
The available PMUs and their raw parameters can be listed with
|
||||
|
||||
ls /sys/devices/*/format
|
||||
|
||||
For example the raw event "LSD.UOPS" core pmu event above could
|
||||
be specified as
|
||||
|
||||
perf stat -e cpu/event=0xa8,umask=0x1,name=LSD.UOPS_CYCLES,cmask=1/ ...
|
||||
|
||||
PER SOCKET PMUS
|
||||
---------------
|
||||
|
||||
Some PMUs are not associated with a core, but with a whole CPU socket.
|
||||
Events on these PMUs generally cannot be sampled, but only counted globally
|
||||
with perf stat -a. They can be bound to one logical CPU, but will measure
|
||||
all the CPUs in the same socket.
|
||||
|
||||
This example measures memory bandwidth every second
|
||||
on the first memory controller on socket 0 of a Intel Xeon system
|
||||
|
||||
perf stat -C 0 -a uncore_imc_0/cas_count_read/,uncore_imc_0/cas_count_write/ -I 1000 ...
|
||||
|
||||
Each memory controller has its own PMU. Measuring the complete system
|
||||
bandwidth would require specifying all imc PMUs (see perf list output),
|
||||
and adding the values together.
|
||||
|
||||
This example measures the combined core power every second
|
||||
|
||||
perf stat -I 1000 -e power/energy-cores/ -a
|
||||
|
||||
ACCESS RESTRICTIONS
|
||||
-------------------
|
||||
|
||||
For non root users generally only context switched PMU events are available.
|
||||
This is normally only the events in the cpu PMU, the predefined events
|
||||
like cycles and instructions and some software events.
|
||||
|
||||
Other PMUs and global measurements are normally root only.
|
||||
Some event qualifiers, such as "any", are also root only.
|
||||
|
||||
This can be overriden by setting the kernel.perf_event_paranoid
|
||||
sysctl to -1, which allows non root to use these events.
|
||||
|
||||
For accessing trace point events perf needs to have read access to
|
||||
/sys/kernel/debug/tracing, even when perf_event_paranoid is in a relaxed
|
||||
setting.
|
||||
|
||||
TRACING
|
||||
-------
|
||||
|
||||
Some PMUs control advanced hardware tracing capabilities, such as Intel PT,
|
||||
that allows low overhead execution tracing. These are described in a separate
|
||||
intel-pt.txt document.
|
||||
|
||||
PARAMETERIZED EVENTS
|
||||
--------------------
|
||||
|
||||
@ -106,6 +167,50 @@ also be supplied. For example:
|
||||
|
||||
perf stat -C 0 -e 'hv_gpci/dtbp_ptitc,phys_processor_idx=0x2/' ...
|
||||
|
||||
EVENT GROUPS
|
||||
------------
|
||||
|
||||
Perf supports time based multiplexing of events, when the number of events
|
||||
active exceeds the number of hardware performance counters. Multiplexing
|
||||
can cause measurement errors when the workload changes its execution
|
||||
profile.
|
||||
|
||||
When metrics are computed using formulas from event counts, it is useful to
|
||||
ensure some events are always measured together as a group to minimize multiplexing
|
||||
errors. Event groups can be specified using { }.
|
||||
|
||||
perf stat -e '{instructions,cycles}' ...
|
||||
|
||||
The number of available performance counters depend on the CPU. A group
|
||||
cannot contain more events than available counters.
|
||||
For example Intel Core CPUs typically have four generic performance counters
|
||||
for the core, plus three fixed counters for instructions, cycles and
|
||||
ref-cycles. Some special events have restrictions on which counter they
|
||||
can schedule, and may not support multiple instances in a single group.
|
||||
When too many events are specified in the group none of them will not
|
||||
be measured.
|
||||
|
||||
Globally pinned events can limit the number of counters available for
|
||||
other groups. On x86 systems, the NMI watchdog pins a counter by default.
|
||||
The nmi watchdog can be disabled as root with
|
||||
|
||||
echo 0 > /proc/sys/kernel/nmi_watchdog
|
||||
|
||||
Events from multiple different PMUs cannot be mixed in a group, with
|
||||
some exceptions for software events.
|
||||
|
||||
LEADER SAMPLING
|
||||
---------------
|
||||
|
||||
perf also supports group leader sampling using the :S specifier.
|
||||
|
||||
perf record -e '{cycles,instructions}:S' ...
|
||||
perf report --group
|
||||
|
||||
Normally all events in a event group sample, but with :S only
|
||||
the first event (the leader) samples, and it only reads the values of the
|
||||
other events in the group.
|
||||
|
||||
OPTIONS
|
||||
-------
|
||||
|
||||
@ -143,5 +248,5 @@ SEE ALSO
|
||||
--------
|
||||
linkperf:perf-stat[1], linkperf:perf-top[1],
|
||||
linkperf:perf-record[1],
|
||||
http://www.intel.com/Assets/PDF/manual/253669.pdf[Intel® 64 and IA-32 Architectures Software Developer's Manual Volume 3B: System Programming Guide],
|
||||
http://www.intel.com/sdm/[Intel® 64 and IA-32 Architectures Software Developer's Manual Volume 3B: System Programming Guide],
|
||||
http://support.amd.com/us/Processor_TechDocs/24593_APM_v2.pdf[AMD64 Architecture Programmer’s Manual Volume 2: System Programming]
|
||||
|
@ -48,6 +48,14 @@ OPTIONS
|
||||
option can be passed in record mode. It will be interpreted the same way as perf
|
||||
record.
|
||||
|
||||
-K::
|
||||
--all-kernel::
|
||||
Configure all used events to run in kernel space.
|
||||
|
||||
-U::
|
||||
--all-user::
|
||||
Configure all used events to run in user space.
|
||||
|
||||
SEE ALSO
|
||||
--------
|
||||
linkperf:perf-record[1], linkperf:perf-report[1]
|
||||
|
@ -347,6 +347,19 @@ Configure all used events to run in kernel space.
|
||||
--all-user::
|
||||
Configure all used events to run in user space.
|
||||
|
||||
--timestamp-filename
|
||||
Append timestamp to output file name.
|
||||
|
||||
--switch-output::
|
||||
Generate multiple perf.data files, timestamp prefixed, switching to a new one
|
||||
when receiving a SIGUSR2.
|
||||
|
||||
A possible use case is to, given an external event, slice the perf.data file
|
||||
that gets then processed, possibly via a perf script, to decide if that
|
||||
particular perf.data snapshot should be kept or not.
|
||||
|
||||
Implies --timestamp-filename, --no-buildid and --no-buildid-cache.
|
||||
|
||||
SEE ALSO
|
||||
--------
|
||||
linkperf:perf-stat[1], linkperf:perf-list[1]
|
||||
|
@ -248,7 +248,7 @@ OPTIONS
|
||||
Note that when using the --itrace option the synthesized callchain size
|
||||
will override this value if the synthesized callchain size is bigger.
|
||||
|
||||
Default: 127
|
||||
Default: /proc/sys/kernel/perf_event_max_stack when present, 127 otherwise.
|
||||
|
||||
-G::
|
||||
--inverted::
|
||||
@ -285,7 +285,7 @@ OPTIONS
|
||||
|
||||
-f::
|
||||
--force::
|
||||
Don't complain, do it.
|
||||
Don't do ownership validation.
|
||||
|
||||
--symfs=<directory>::
|
||||
Look for files with symbols relative to this directory.
|
||||
|
@ -50,6 +50,22 @@ OPTIONS
|
||||
--dump-raw-trace=::
|
||||
Display verbose dump of the sched data.
|
||||
|
||||
OPTIONS for 'perf sched map'
|
||||
----------------------------
|
||||
|
||||
--compact::
|
||||
Show only CPUs with activity. Helps visualizing on high core
|
||||
count systems.
|
||||
|
||||
--cpus::
|
||||
Show just entries with activities for the given CPUs.
|
||||
|
||||
--color-cpus::
|
||||
Highlight the given cpus.
|
||||
|
||||
--color-pids::
|
||||
Highlight the given pids.
|
||||
|
||||
SEE ALSO
|
||||
--------
|
||||
linkperf:perf-record[1]
|
||||
|
@ -259,9 +259,23 @@ include::itrace.txt[]
|
||||
--full-source-path::
|
||||
Show the full path for source files for srcline output.
|
||||
|
||||
--max-stack::
|
||||
Set the stack depth limit when parsing the callchain, anything
|
||||
beyond the specified depth will be ignored. This is a trade-off
|
||||
between information loss and faster processing especially for
|
||||
workloads that can have a very long callchain stack.
|
||||
Note that when using the --itrace option the synthesized callchain size
|
||||
will override this value if the synthesized callchain size is bigger.
|
||||
|
||||
Default: /proc/sys/kernel/perf_event_max_stack when present, 127 otherwise.
|
||||
|
||||
--ns::
|
||||
Use 9 decimal places when displaying time (i.e. show the nanoseconds)
|
||||
|
||||
-f::
|
||||
--force::
|
||||
Don't do ownership validation.
|
||||
|
||||
SEE ALSO
|
||||
--------
|
||||
linkperf:perf-record[1], linkperf:perf-script-perl[1],
|
||||
|
@ -177,7 +177,7 @@ Default is to monitor all CPUS.
|
||||
between information loss and faster processing especially for
|
||||
workloads that can have a very long callchain stack.
|
||||
|
||||
Default: 127
|
||||
Default: /proc/sys/kernel/perf_event_max_stack when present, 127 otherwise.
|
||||
|
||||
--ignore-callees=<regex>::
|
||||
Ignore callees of the function(s) matching the given regex.
|
||||
|
@ -117,9 +117,41 @@ the thread executes on the designated CPUs. Default is to monitor all CPUs.
|
||||
--syscalls::
|
||||
Trace system calls. This options is enabled by default.
|
||||
|
||||
--call-graph [mode,type,min[,limit],order[,key][,branch]]::
|
||||
Setup and enable call-graph (stack chain/backtrace) recording.
|
||||
See `--call-graph` section in perf-record and perf-report
|
||||
man pages for details. The ones that are most useful in 'perf trace'
|
||||
are 'dwarf' and 'lbr', where available, try: 'perf trace --call-graph dwarf'.
|
||||
|
||||
Using this will, for the root user, bump the value of --mmap-pages to 4
|
||||
times the maximum for non-root users, based on the kernel.perf_event_mlock_kb
|
||||
sysctl. This is done only if the user doesn't specify a --mmap-pages value.
|
||||
|
||||
--kernel-syscall-graph::
|
||||
Show the kernel callchains on the syscall exit path.
|
||||
|
||||
--event::
|
||||
Trace other events, see 'perf list' for a complete list.
|
||||
|
||||
--max-stack::
|
||||
Set the stack depth limit when parsing the callchain, anything
|
||||
beyond the specified depth will be ignored. Note that at this point
|
||||
this is just about the presentation part, i.e. the kernel is still
|
||||
not limiting, the overhead of callchains needs to be set via the
|
||||
knobs in --call-graph dwarf.
|
||||
|
||||
Implies '--call-graph dwarf' when --call-graph not present on the
|
||||
command line, on systems where DWARF unwinding was built in.
|
||||
|
||||
Default: /proc/sys/kernel/perf_event_max_stack when present, 127 otherwise.
|
||||
|
||||
--min-stack::
|
||||
Set the stack depth limit when parsing the callchain, anything
|
||||
below the specified depth will be ignored. Disabled by default.
|
||||
|
||||
Implies '--call-graph dwarf' when --call-graph not present on the
|
||||
command line, on systems where DWARF unwinding was built in.
|
||||
|
||||
--proc-map-timeout::
|
||||
When processing pre-existing threads /proc/XXX/mmap, it may take a long time,
|
||||
because the file may be huge. A time out is needed in such cases.
|
||||
|
@ -183,6 +183,11 @@ endif
|
||||
include config/Makefile
|
||||
endif
|
||||
|
||||
ifeq ($(config),0)
|
||||
include $(srctree)/tools/scripts/Makefile.arch
|
||||
-include arch/$(ARCH)/Makefile
|
||||
endif
|
||||
|
||||
# The FEATURE_DUMP_EXPORT holds location of the actual
|
||||
# FEATURE_DUMP file to be used to bypass feature detection
|
||||
# (for bpf or any other subproject)
|
||||
@ -297,8 +302,6 @@ endif
|
||||
# because maintaining the nesting to match is a pain. If
|
||||
# we had "elif" things would have been much nicer...
|
||||
|
||||
-include arch/$(ARCH)/Makefile
|
||||
|
||||
ifneq ($(OUTPUT),)
|
||||
CFLAGS += -I$(OUTPUT)
|
||||
endif
|
||||
@ -390,7 +393,7 @@ endif
|
||||
__build-dir = $(subst $(OUTPUT),,$(dir $@))
|
||||
build-dir = $(if $(__build-dir),$(__build-dir),.)
|
||||
|
||||
prepare: $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)common-cmds.h fixdep
|
||||
prepare: $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)common-cmds.h fixdep archheaders
|
||||
|
||||
$(OUTPUT)%.o: %.c prepare FORCE
|
||||
$(Q)$(MAKE) -f $(srctree)/tools/build/Makefile.build dir=$(build-dir) $@
|
||||
@ -430,7 +433,7 @@ $(patsubst perf-%,%.o,$(PROGRAMS)): $(wildcard */*.h)
|
||||
|
||||
LIBPERF_IN := $(OUTPUT)libperf-in.o
|
||||
|
||||
$(LIBPERF_IN): fixdep FORCE
|
||||
$(LIBPERF_IN): prepare fixdep FORCE
|
||||
$(Q)$(MAKE) $(build)=libperf
|
||||
|
||||
$(LIB_FILE): $(LIBPERF_IN)
|
||||
@ -625,7 +628,7 @@ config-clean:
|
||||
$(call QUIET_CLEAN, config)
|
||||
$(Q)$(MAKE) -C $(srctree)/tools/build/feature/ $(if $(OUTPUT),OUTPUT=$(OUTPUT)feature/,) clean >/dev/null
|
||||
|
||||
clean: $(LIBTRACEEVENT)-clean $(LIBAPI)-clean $(LIBBPF)-clean $(LIBSUBCMD)-clean config-clean
|
||||
clean:: $(LIBTRACEEVENT)-clean $(LIBAPI)-clean $(LIBBPF)-clean $(LIBSUBCMD)-clean config-clean
|
||||
$(call QUIET_CLEAN, core-objs) $(RM) $(LIB_FILE) $(OUTPUT)perf-archive $(OUTPUT)perf-with-kcore $(LANG_BINDINGS)
|
||||
$(Q)find $(if $(OUTPUT),$(OUTPUT),.) -name '*.o' -delete -o -name '\.*.cmd' -delete -o -name '\.*.d' -delete
|
||||
$(Q)$(RM) $(OUTPUT).config-detected
|
||||
@ -662,5 +665,5 @@ FORCE:
|
||||
.PHONY: all install clean config-clean strip install-gtk
|
||||
.PHONY: shell_compatibility_test please_set_SHELL_PATH_to_a_more_modern_shell
|
||||
.PHONY: $(GIT-HEAD-PHONY) TAGS tags cscope FORCE prepare
|
||||
.PHONY: libtraceevent_plugins
|
||||
.PHONY: libtraceevent_plugins archheaders
|
||||
|
||||
|
@ -3,4 +3,5 @@ PERF_HAVE_DWARF_REGS := 1
|
||||
endif
|
||||
|
||||
HAVE_KVM_STAT_SUPPORT := 1
|
||||
PERF_HAVE_ARCH_REGS_QUERY_REGISTER_OFFSET := 1
|
||||
PERF_HAVE_JITDUMP := 1
|
||||
|
@ -10,19 +10,26 @@
|
||||
*/
|
||||
|
||||
#include <stddef.h>
|
||||
#include <errno.h>
|
||||
#include <string.h>
|
||||
#include <dwarf-regs.h>
|
||||
|
||||
#include <linux/ptrace.h>
|
||||
#include <linux/kernel.h>
|
||||
#include "util.h"
|
||||
|
||||
struct pt_regs_dwarfnum {
|
||||
const char *name;
|
||||
unsigned int dwarfnum;
|
||||
unsigned int ptregs_offset;
|
||||
};
|
||||
|
||||
#define STR(s) #s
|
||||
#define REG_DWARFNUM_NAME(r, num) {.name = r, .dwarfnum = num}
|
||||
#define GPR_DWARFNUM_NAME(num) \
|
||||
{.name = STR(%gpr##num), .dwarfnum = num}
|
||||
#define REG_DWARFNUM_END {.name = NULL, .dwarfnum = 0}
|
||||
#define REG_DWARFNUM_NAME(r, num) \
|
||||
{.name = STR(%)STR(r), .dwarfnum = num, \
|
||||
.ptregs_offset = offsetof(struct pt_regs, r)}
|
||||
#define GPR_DWARFNUM_NAME(num) \
|
||||
{.name = STR(%gpr##num), .dwarfnum = num, \
|
||||
.ptregs_offset = offsetof(struct pt_regs, gpr[num])}
|
||||
#define REG_DWARFNUM_END {.name = NULL, .dwarfnum = 0, .ptregs_offset = 0}
|
||||
|
||||
/*
|
||||
* Reference:
|
||||
@ -61,12 +68,12 @@ static const struct pt_regs_dwarfnum regdwarfnum_table[] = {
|
||||
GPR_DWARFNUM_NAME(29),
|
||||
GPR_DWARFNUM_NAME(30),
|
||||
GPR_DWARFNUM_NAME(31),
|
||||
REG_DWARFNUM_NAME("%msr", 66),
|
||||
REG_DWARFNUM_NAME("%ctr", 109),
|
||||
REG_DWARFNUM_NAME("%link", 108),
|
||||
REG_DWARFNUM_NAME("%xer", 101),
|
||||
REG_DWARFNUM_NAME("%dar", 119),
|
||||
REG_DWARFNUM_NAME("%dsisr", 118),
|
||||
REG_DWARFNUM_NAME(msr, 66),
|
||||
REG_DWARFNUM_NAME(ctr, 109),
|
||||
REG_DWARFNUM_NAME(link, 108),
|
||||
REG_DWARFNUM_NAME(xer, 101),
|
||||
REG_DWARFNUM_NAME(dar, 119),
|
||||
REG_DWARFNUM_NAME(dsisr, 118),
|
||||
REG_DWARFNUM_END,
|
||||
};
|
||||
|
||||
@ -86,3 +93,12 @@ const char *get_arch_regstr(unsigned int n)
|
||||
return roff->name;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
int regs_query_register_offset(const char *name)
|
||||
{
|
||||
const struct pt_regs_dwarfnum *roff;
|
||||
for (roff = regdwarfnum_table; roff->name != NULL; roff++)
|
||||
if (!strcmp(roff->name, name))
|
||||
return roff->ptregs_offset;
|
||||
return -EINVAL;
|
||||
}
|
||||
|
@ -19,12 +19,6 @@ bool elf__needs_adjust_symbols(GElf_Ehdr ehdr)
|
||||
ehdr.e_type == ET_DYN;
|
||||
}
|
||||
|
||||
#if defined(_CALL_ELF) && _CALL_ELF == 2
|
||||
void arch__elf_sym_adjust(GElf_Sym *sym)
|
||||
{
|
||||
sym->st_value += PPC64_LOCAL_ENTRY_OFFSET(sym->st_other);
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if !defined(_CALL_ELF) || _CALL_ELF != 2
|
||||
@ -65,18 +59,45 @@ bool arch__prefers_symtab(void)
|
||||
return true;
|
||||
}
|
||||
|
||||
#ifdef HAVE_LIBELF_SUPPORT
|
||||
void arch__sym_update(struct symbol *s, GElf_Sym *sym)
|
||||
{
|
||||
s->arch_sym = sym->st_other;
|
||||
}
|
||||
#endif
|
||||
|
||||
#define PPC64LE_LEP_OFFSET 8
|
||||
|
||||
void arch__fix_tev_from_maps(struct perf_probe_event *pev,
|
||||
struct probe_trace_event *tev, struct map *map)
|
||||
struct probe_trace_event *tev, struct map *map,
|
||||
struct symbol *sym)
|
||||
{
|
||||
int lep_offset;
|
||||
|
||||
/*
|
||||
* ppc64 ABIv2 local entry point is currently always 2 instructions
|
||||
* (8 bytes) after the global entry point.
|
||||
* When probing at a function entry point, we normally always want the
|
||||
* LEP since that catches calls to the function through both the GEP and
|
||||
* the LEP. Hence, we would like to probe at an offset of 8 bytes if
|
||||
* the user only specified the function entry.
|
||||
*
|
||||
* However, if the user specifies an offset, we fall back to using the
|
||||
* GEP since all userspace applications (objdump/readelf) show function
|
||||
* disassembly with offsets from the GEP.
|
||||
*
|
||||
* In addition, we shouldn't specify an offset for kretprobes.
|
||||
*/
|
||||
if (!pev->uprobes && map->dso->symtab_type == DSO_BINARY_TYPE__KALLSYMS) {
|
||||
tev->point.address += PPC64LE_LEP_OFFSET;
|
||||
if (pev->point.offset || pev->point.retprobe || !map || !sym)
|
||||
return;
|
||||
|
||||
lep_offset = PPC64_LOCAL_ENTRY_OFFSET(sym->arch_sym);
|
||||
|
||||
if (map->dso->symtab_type == DSO_BINARY_TYPE__KALLSYMS)
|
||||
tev->point.offset += PPC64LE_LEP_OFFSET;
|
||||
else if (lep_offset) {
|
||||
if (pev->uprobes)
|
||||
tev->point.address += lep_offset;
|
||||
else
|
||||
tev->point.offset += lep_offset;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
@ -4,3 +4,26 @@ endif
|
||||
HAVE_KVM_STAT_SUPPORT := 1
|
||||
PERF_HAVE_ARCH_REGS_QUERY_REGISTER_OFFSET := 1
|
||||
PERF_HAVE_JITDUMP := 1
|
||||
|
||||
###
|
||||
# Syscall table generation
|
||||
#
|
||||
|
||||
out := $(OUTPUT)arch/x86/include/generated/asm
|
||||
header := $(out)/syscalls_64.c
|
||||
sys := $(srctree)/tools/perf/arch/x86/entry/syscalls
|
||||
systbl := $(sys)/syscalltbl.sh
|
||||
|
||||
# Create output directory if not already present
|
||||
_dummy := $(shell [ -d '$(out)' ] || mkdir -p '$(out)')
|
||||
|
||||
$(header): $(sys)/syscall_64.tbl $(systbl)
|
||||
@(test -d ../../kernel -a -d ../../tools -a -d ../perf && ( \
|
||||
(diff -B arch/x86/entry/syscalls/syscall_64.tbl ../../arch/x86/entry/syscalls/syscall_64.tbl >/dev/null) \
|
||||
|| echo "Warning: x86_64's syscall_64.tbl differs from kernel" >&2 )) || true
|
||||
$(Q)$(SHELL) '$(systbl)' $(sys)/syscall_64.tbl 'x86_64' > $@
|
||||
|
||||
clean::
|
||||
$(call QUIET_CLEAN, x86) $(RM) $(header)
|
||||
|
||||
archheaders: $(header)
|
||||
|
376
tools/perf/arch/x86/entry/syscalls/syscall_64.tbl
Normal file
376
tools/perf/arch/x86/entry/syscalls/syscall_64.tbl
Normal file
@ -0,0 +1,376 @@
|
||||
#
|
||||
# 64-bit system call numbers and entry vectors
|
||||
#
|
||||
# The format is:
|
||||
# <number> <abi> <name> <entry point>
|
||||
#
|
||||
# The abi is "common", "64" or "x32" for this file.
|
||||
#
|
||||
0 common read sys_read
|
||||
1 common write sys_write
|
||||
2 common open sys_open
|
||||
3 common close sys_close
|
||||
4 common stat sys_newstat
|
||||
5 common fstat sys_newfstat
|
||||
6 common lstat sys_newlstat
|
||||
7 common poll sys_poll
|
||||
8 common lseek sys_lseek
|
||||
9 common mmap sys_mmap
|
||||
10 common mprotect sys_mprotect
|
||||
11 common munmap sys_munmap
|
||||
12 common brk sys_brk
|
||||
13 64 rt_sigaction sys_rt_sigaction
|
||||
14 common rt_sigprocmask sys_rt_sigprocmask
|
||||
15 64 rt_sigreturn sys_rt_sigreturn/ptregs
|
||||
16 64 ioctl sys_ioctl
|
||||
17 common pread64 sys_pread64
|
||||
18 common pwrite64 sys_pwrite64
|
||||
19 64 readv sys_readv
|
||||
20 64 writev sys_writev
|
||||
21 common access sys_access
|
||||
22 common pipe sys_pipe
|
||||
23 common select sys_select
|
||||
24 common sched_yield sys_sched_yield
|
||||
25 common mremap sys_mremap
|
||||
26 common msync sys_msync
|
||||
27 common mincore sys_mincore
|
||||
28 common madvise sys_madvise
|
||||
29 common shmget sys_shmget
|
||||
30 common shmat sys_shmat
|
||||
31 common shmctl sys_shmctl
|
||||
32 common dup sys_dup
|
||||
33 common dup2 sys_dup2
|
||||
34 common pause sys_pause
|
||||
35 common nanosleep sys_nanosleep
|
||||
36 common getitimer sys_getitimer
|
||||
37 common alarm sys_alarm
|
||||
38 common setitimer sys_setitimer
|
||||
39 common getpid sys_getpid
|
||||
40 common sendfile sys_sendfile64
|
||||
41 common socket sys_socket
|
||||
42 common connect sys_connect
|
||||
43 common accept sys_accept
|
||||
44 common sendto sys_sendto
|
||||
45 64 recvfrom sys_recvfrom
|
||||
46 64 sendmsg sys_sendmsg
|
||||
47 64 recvmsg sys_recvmsg
|
||||
48 common shutdown sys_shutdown
|
||||
49 common bind sys_bind
|
||||
50 common listen sys_listen
|
||||
51 common getsockname sys_getsockname
|
||||
52 common getpeername sys_getpeername
|
||||
53 common socketpair sys_socketpair
|
||||
54 64 setsockopt sys_setsockopt
|
||||
55 64 getsockopt sys_getsockopt
|
||||
56 common clone sys_clone/ptregs
|
||||
57 common fork sys_fork/ptregs
|
||||
58 common vfork sys_vfork/ptregs
|
||||
59 64 execve sys_execve/ptregs
|
||||
60 common exit sys_exit
|
||||
61 common wait4 sys_wait4
|
||||
62 common kill sys_kill
|
||||
63 common uname sys_newuname
|
||||
64 common semget sys_semget
|
||||
65 common semop sys_semop
|
||||
66 common semctl sys_semctl
|
||||
67 common shmdt sys_shmdt
|
||||
68 common msgget sys_msgget
|
||||
69 common msgsnd sys_msgsnd
|
||||
70 common msgrcv sys_msgrcv
|
||||
71 common msgctl sys_msgctl
|
||||
72 common fcntl sys_fcntl
|
||||
73 common flock sys_flock
|
||||
74 common fsync sys_fsync
|
||||
75 common fdatasync sys_fdatasync
|
||||
76 common truncate sys_truncate
|
||||
77 common ftruncate sys_ftruncate
|
||||
78 common getdents sys_getdents
|
||||
79 common getcwd sys_getcwd
|
||||
80 common chdir sys_chdir
|
||||
81 common fchdir sys_fchdir
|
||||
82 common rename sys_rename
|
||||
83 common mkdir sys_mkdir
|
||||
84 common rmdir sys_rmdir
|
||||
85 common creat sys_creat
|
||||
86 common link sys_link
|
||||
87 common unlink sys_unlink
|
||||
88 common symlink sys_symlink
|
||||
89 common readlink sys_readlink
|
||||
90 common chmod sys_chmod
|
||||
91 common fchmod sys_fchmod
|
||||
92 common chown sys_chown
|
||||
93 common fchown sys_fchown
|
||||
94 common lchown sys_lchown
|
||||
95 common umask sys_umask
|
||||
96 common gettimeofday sys_gettimeofday
|
||||
97 common getrlimit sys_getrlimit
|
||||
98 common getrusage sys_getrusage
|
||||
99 common sysinfo sys_sysinfo
|
||||
100 common times sys_times
|
||||
101 64 ptrace sys_ptrace
|
||||
102 common getuid sys_getuid
|
||||
103 common syslog sys_syslog
|
||||
104 common getgid sys_getgid
|
||||
105 common setuid sys_setuid
|
||||
106 common setgid sys_setgid
|
||||
107 common geteuid sys_geteuid
|
||||
108 common getegid sys_getegid
|
||||
109 common setpgid sys_setpgid
|
||||
110 common getppid sys_getppid
|
||||
111 common getpgrp sys_getpgrp
|
||||
112 common setsid sys_setsid
|
||||
113 common setreuid sys_setreuid
|
||||
114 common setregid sys_setregid
|
||||
115 common getgroups sys_getgroups
|
||||
116 common setgroups sys_setgroups
|
||||
117 common setresuid sys_setresuid
|
||||
118 common getresuid sys_getresuid
|
||||
119 common setresgid sys_setresgid
|
||||
120 common getresgid sys_getresgid
|
||||
121 common getpgid sys_getpgid
|
||||
122 common setfsuid sys_setfsuid
|
||||
123 common setfsgid sys_setfsgid
|
||||
124 common getsid sys_getsid
|
||||
125 common capget sys_capget
|
||||
126 common capset sys_capset
|
||||
127 64 rt_sigpending sys_rt_sigpending
|
||||
128 64 rt_sigtimedwait sys_rt_sigtimedwait
|
||||
129 64 rt_sigqueueinfo sys_rt_sigqueueinfo
|
||||
130 common rt_sigsuspend sys_rt_sigsuspend
|
||||
131 64 sigaltstack sys_sigaltstack
|
||||
132 common utime sys_utime
|
||||
133 common mknod sys_mknod
|
||||
134 64 uselib
|
||||
135 common personality sys_personality
|
||||
136 common ustat sys_ustat
|
||||
137 common statfs sys_statfs
|
||||
138 common fstatfs sys_fstatfs
|
||||
139 common sysfs sys_sysfs
|
||||
140 common getpriority sys_getpriority
|
||||
141 common setpriority sys_setpriority
|
||||
142 common sched_setparam sys_sched_setparam
|
||||
143 common sched_getparam sys_sched_getparam
|
||||
144 common sched_setscheduler sys_sched_setscheduler
|
||||
145 common sched_getscheduler sys_sched_getscheduler
|
||||
146 common sched_get_priority_max sys_sched_get_priority_max
|
||||
147 common sched_get_priority_min sys_sched_get_priority_min
|
||||
148 common sched_rr_get_interval sys_sched_rr_get_interval
|
||||
149 common mlock sys_mlock
|
||||
150 common munlock sys_munlock
|
||||
151 common mlockall sys_mlockall
|
||||
152 common munlockall sys_munlockall
|
||||
153 common vhangup sys_vhangup
|
||||
154 common modify_ldt sys_modify_ldt
|
||||
155 common pivot_root sys_pivot_root
|
||||
156 64 _sysctl sys_sysctl
|
||||
157 common prctl sys_prctl
|
||||
158 common arch_prctl sys_arch_prctl
|
||||
159 common adjtimex sys_adjtimex
|
||||
160 common setrlimit sys_setrlimit
|
||||
161 common chroot sys_chroot
|
||||
162 common sync sys_sync
|
||||
163 common acct sys_acct
|
||||
164 common settimeofday sys_settimeofday
|
||||
165 common mount sys_mount
|
||||
166 common umount2 sys_umount
|
||||
167 common swapon sys_swapon
|
||||
168 common swapoff sys_swapoff
|
||||
169 common reboot sys_reboot
|
||||
170 common sethostname sys_sethostname
|
||||
171 common setdomainname sys_setdomainname
|
||||
172 common iopl sys_iopl/ptregs
|
||||
173 common ioperm sys_ioperm
|
||||
174 64 create_module
|
||||
175 common init_module sys_init_module
|
||||
176 common delete_module sys_delete_module
|
||||
177 64 get_kernel_syms
|
||||
178 64 query_module
|
||||
179 common quotactl sys_quotactl
|
||||
180 64 nfsservctl
|
||||
181 common getpmsg
|
||||
182 common putpmsg
|
||||
183 common afs_syscall
|
||||
184 common tuxcall
|
||||
185 common security
|
||||
186 common gettid sys_gettid
|
||||
187 common readahead sys_readahead
|
||||
188 common setxattr sys_setxattr
|
||||
189 common lsetxattr sys_lsetxattr
|
||||
190 common fsetxattr sys_fsetxattr
|
||||
191 common getxattr sys_getxattr
|
||||
192 common lgetxattr sys_lgetxattr
|
||||
193 common fgetxattr sys_fgetxattr
|
||||
194 common listxattr sys_listxattr
|
||||
195 common llistxattr sys_llistxattr
|
||||
196 common flistxattr sys_flistxattr
|
||||
197 common removexattr sys_removexattr
|
||||
198 common lremovexattr sys_lremovexattr
|
||||
199 common fremovexattr sys_fremovexattr
|
||||
200 common tkill sys_tkill
|
||||
201 common time sys_time
|
||||
202 common futex sys_futex
|
||||
203 common sched_setaffinity sys_sched_setaffinity
|
||||
204 common sched_getaffinity sys_sched_getaffinity
|
||||
205 64 set_thread_area
|
||||
206 64 io_setup sys_io_setup
|
||||
207 common io_destroy sys_io_destroy
|
||||
208 common io_getevents sys_io_getevents
|
||||
209 64 io_submit sys_io_submit
|
||||
210 common io_cancel sys_io_cancel
|
||||
211 64 get_thread_area
|
||||
212 common lookup_dcookie sys_lookup_dcookie
|
||||
213 common epoll_create sys_epoll_create
|
||||
214 64 epoll_ctl_old
|
||||
215 64 epoll_wait_old
|
||||
216 common remap_file_pages sys_remap_file_pages
|
||||
217 common getdents64 sys_getdents64
|
||||
218 common set_tid_address sys_set_tid_address
|
||||
219 common restart_syscall sys_restart_syscall
|
||||
220 common semtimedop sys_semtimedop
|
||||
221 common fadvise64 sys_fadvise64
|
||||
222 64 timer_create sys_timer_create
|
||||
223 common timer_settime sys_timer_settime
|
||||
224 common timer_gettime sys_timer_gettime
|
||||
225 common timer_getoverrun sys_timer_getoverrun
|
||||
226 common timer_delete sys_timer_delete
|
||||
227 common clock_settime sys_clock_settime
|
||||
228 common clock_gettime sys_clock_gettime
|
||||
229 common clock_getres sys_clock_getres
|
||||
230 common clock_nanosleep sys_clock_nanosleep
|
||||
231 common exit_group sys_exit_group
|
||||
232 common epoll_wait sys_epoll_wait
|
||||
233 common epoll_ctl sys_epoll_ctl
|
||||
234 common tgkill sys_tgkill
|
||||
235 common utimes sys_utimes
|
||||
236 64 vserver
|
||||
237 common mbind sys_mbind
|
||||
238 common set_mempolicy sys_set_mempolicy
|
||||
239 common get_mempolicy sys_get_mempolicy
|
||||
240 common mq_open sys_mq_open
|
||||
241 common mq_unlink sys_mq_unlink
|
||||
242 common mq_timedsend sys_mq_timedsend
|
||||
243 common mq_timedreceive sys_mq_timedreceive
|
||||
244 64 mq_notify sys_mq_notify
|
||||
245 common mq_getsetattr sys_mq_getsetattr
|
||||
246 64 kexec_load sys_kexec_load
|
||||
247 64 waitid sys_waitid
|
||||
248 common add_key sys_add_key
|
||||
249 common request_key sys_request_key
|
||||
250 common keyctl sys_keyctl
|
||||
251 common ioprio_set sys_ioprio_set
|
||||
252 common ioprio_get sys_ioprio_get
|
||||
253 common inotify_init sys_inotify_init
|
||||
254 common inotify_add_watch sys_inotify_add_watch
|
||||
255 common inotify_rm_watch sys_inotify_rm_watch
|
||||
256 common migrate_pages sys_migrate_pages
|
||||
257 common openat sys_openat
|
||||
258 common mkdirat sys_mkdirat
|
||||
259 common mknodat sys_mknodat
|
||||
260 common fchownat sys_fchownat
|
||||
261 common futimesat sys_futimesat
|
||||
262 common newfstatat sys_newfstatat
|
||||
263 common unlinkat sys_unlinkat
|
||||
264 common renameat sys_renameat
|
||||
265 common linkat sys_linkat
|
||||
266 common symlinkat sys_symlinkat
|
||||
267 common readlinkat sys_readlinkat
|
||||
268 common fchmodat sys_fchmodat
|
||||
269 common faccessat sys_faccessat
|
||||
270 common pselect6 sys_pselect6
|
||||
271 common ppoll sys_ppoll
|
||||
272 common unshare sys_unshare
|
||||
273 64 set_robust_list sys_set_robust_list
|
||||
274 64 get_robust_list sys_get_robust_list
|
||||
275 common splice sys_splice
|
||||
276 common tee sys_tee
|
||||
277 common sync_file_range sys_sync_file_range
|
||||
278 64 vmsplice sys_vmsplice
|
||||
279 64 move_pages sys_move_pages
|
||||
280 common utimensat sys_utimensat
|
||||
281 common epoll_pwait sys_epoll_pwait
|
||||
282 common signalfd sys_signalfd
|
||||
283 common timerfd_create sys_timerfd_create
|
||||
284 common eventfd sys_eventfd
|
||||
285 common fallocate sys_fallocate
|
||||
286 common timerfd_settime sys_timerfd_settime
|
||||
287 common timerfd_gettime sys_timerfd_gettime
|
||||
288 common accept4 sys_accept4
|
||||
289 common signalfd4 sys_signalfd4
|
||||
290 common eventfd2 sys_eventfd2
|
||||
291 common epoll_create1 sys_epoll_create1
|
||||
292 common dup3 sys_dup3
|
||||
293 common pipe2 sys_pipe2
|
||||
294 common inotify_init1 sys_inotify_init1
|
||||
295 64 preadv sys_preadv
|
||||
296 64 pwritev sys_pwritev
|
||||
297 64 rt_tgsigqueueinfo sys_rt_tgsigqueueinfo
|
||||
298 common perf_event_open sys_perf_event_open
|
||||
299 64 recvmmsg sys_recvmmsg
|
||||
300 common fanotify_init sys_fanotify_init
|
||||
301 common fanotify_mark sys_fanotify_mark
|
||||
302 common prlimit64 sys_prlimit64
|
||||
303 common name_to_handle_at sys_name_to_handle_at
|
||||
304 common open_by_handle_at sys_open_by_handle_at
|
||||
305 common clock_adjtime sys_clock_adjtime
|
||||
306 common syncfs sys_syncfs
|
||||
307 64 sendmmsg sys_sendmmsg
|
||||
308 common setns sys_setns
|
||||
309 common getcpu sys_getcpu
|
||||
310 64 process_vm_readv sys_process_vm_readv
|
||||
311 64 process_vm_writev sys_process_vm_writev
|
||||
312 common kcmp sys_kcmp
|
||||
313 common finit_module sys_finit_module
|
||||
314 common sched_setattr sys_sched_setattr
|
||||
315 common sched_getattr sys_sched_getattr
|
||||
316 common renameat2 sys_renameat2
|
||||
317 common seccomp sys_seccomp
|
||||
318 common getrandom sys_getrandom
|
||||
319 common memfd_create sys_memfd_create
|
||||
320 common kexec_file_load sys_kexec_file_load
|
||||
321 common bpf sys_bpf
|
||||
322 64 execveat sys_execveat/ptregs
|
||||
323 common userfaultfd sys_userfaultfd
|
||||
324 common membarrier sys_membarrier
|
||||
325 common mlock2 sys_mlock2
|
||||
326 common copy_file_range sys_copy_file_range
|
||||
327 64 preadv2 sys_preadv2
|
||||
328 64 pwritev2 sys_pwritev2
|
||||
|
||||
#
|
||||
# x32-specific system call numbers start at 512 to avoid cache impact
|
||||
# for native 64-bit operation.
|
||||
#
|
||||
512 x32 rt_sigaction compat_sys_rt_sigaction
|
||||
513 x32 rt_sigreturn sys32_x32_rt_sigreturn
|
||||
514 x32 ioctl compat_sys_ioctl
|
||||
515 x32 readv compat_sys_readv
|
||||
516 x32 writev compat_sys_writev
|
||||
517 x32 recvfrom compat_sys_recvfrom
|
||||
518 x32 sendmsg compat_sys_sendmsg
|
||||
519 x32 recvmsg compat_sys_recvmsg
|
||||
520 x32 execve compat_sys_execve/ptregs
|
||||
521 x32 ptrace compat_sys_ptrace
|
||||
522 x32 rt_sigpending compat_sys_rt_sigpending
|
||||
523 x32 rt_sigtimedwait compat_sys_rt_sigtimedwait
|
||||
524 x32 rt_sigqueueinfo compat_sys_rt_sigqueueinfo
|
||||
525 x32 sigaltstack compat_sys_sigaltstack
|
||||
526 x32 timer_create compat_sys_timer_create
|
||||
527 x32 mq_notify compat_sys_mq_notify
|
||||
528 x32 kexec_load compat_sys_kexec_load
|
||||
529 x32 waitid compat_sys_waitid
|
||||
530 x32 set_robust_list compat_sys_set_robust_list
|
||||
531 x32 get_robust_list compat_sys_get_robust_list
|
||||
532 x32 vmsplice compat_sys_vmsplice
|
||||
533 x32 move_pages compat_sys_move_pages
|
||||
534 x32 preadv compat_sys_preadv64
|
||||
535 x32 pwritev compat_sys_pwritev64
|
||||
536 x32 rt_tgsigqueueinfo compat_sys_rt_tgsigqueueinfo
|
||||
537 x32 recvmmsg compat_sys_recvmmsg
|
||||
538 x32 sendmmsg compat_sys_sendmmsg
|
||||
539 x32 process_vm_readv compat_sys_process_vm_readv
|
||||
540 x32 process_vm_writev compat_sys_process_vm_writev
|
||||
541 x32 setsockopt compat_sys_setsockopt
|
||||
542 x32 getsockopt compat_sys_getsockopt
|
||||
543 x32 io_setup compat_sys_io_setup
|
||||
544 x32 io_submit compat_sys_io_submit
|
||||
545 x32 execveat compat_sys_execveat/ptregs
|
39
tools/perf/arch/x86/entry/syscalls/syscalltbl.sh
Executable file
39
tools/perf/arch/x86/entry/syscalls/syscalltbl.sh
Executable file
@ -0,0 +1,39 @@
|
||||
#!/bin/sh
|
||||
|
||||
in="$1"
|
||||
arch="$2"
|
||||
|
||||
syscall_macro() {
|
||||
nr="$1"
|
||||
name="$2"
|
||||
|
||||
echo " [$nr] = \"$name\","
|
||||
}
|
||||
|
||||
emit() {
|
||||
nr="$1"
|
||||
entry="$2"
|
||||
|
||||
syscall_macro "$nr" "$entry"
|
||||
}
|
||||
|
||||
echo "static const char *syscalltbl_${arch}[] = {"
|
||||
|
||||
sorted_table=$(mktemp /tmp/syscalltbl.XXXXXX)
|
||||
grep '^[0-9]' "$in" | sort -n > $sorted_table
|
||||
|
||||
max_nr=0
|
||||
while read nr abi name entry compat; do
|
||||
if [ $nr -ge 512 ] ; then # discard compat sycalls
|
||||
break
|
||||
fi
|
||||
|
||||
emit "$nr" "$name"
|
||||
max_nr=$nr
|
||||
done < $sorted_table
|
||||
|
||||
rm -f $sorted_table
|
||||
|
||||
echo "};"
|
||||
|
||||
echo "#define SYSCALLTBL_${arch}_MAX_ID ${max_nr}"
|
@ -71,7 +71,7 @@ int test__perf_time_to_tsc(int subtest __maybe_unused)
|
||||
|
||||
CHECK__(parse_events(evlist, "cycles:u", NULL));
|
||||
|
||||
perf_evlist__config(evlist, &opts);
|
||||
perf_evlist__config(evlist, &opts, NULL);
|
||||
|
||||
evsel = perf_evlist__first(evlist);
|
||||
|
||||
|
@ -438,6 +438,11 @@ struct auxtrace_record *intel_bts_recording_init(int *err)
|
||||
if (!intel_bts_pmu)
|
||||
return NULL;
|
||||
|
||||
if (setenv("JITDUMP_USE_ARCH_TIMESTAMP", "1", 1)) {
|
||||
*err = -errno;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
btsr = zalloc(sizeof(struct intel_bts_recording));
|
||||
if (!btsr) {
|
||||
*err = -ENOMEM;
|
||||
|
@ -1027,6 +1027,11 @@ struct auxtrace_record *intel_pt_recording_init(int *err)
|
||||
if (!intel_pt_pmu)
|
||||
return NULL;
|
||||
|
||||
if (setenv("JITDUMP_USE_ARCH_TIMESTAMP", "1", 1)) {
|
||||
*err = -errno;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
ptr = zalloc(sizeof(struct intel_pt_recording));
|
||||
if (!ptr) {
|
||||
*err = -ENOMEM;
|
||||
|
@ -7,7 +7,6 @@
|
||||
#include <linux/types.h>
|
||||
#include "../../util/debug.h"
|
||||
#include "../../util/tsc.h"
|
||||
#include "tsc.h"
|
||||
|
||||
int perf_read_tsc_conversion(const struct perf_event_mmap_page *pc,
|
||||
struct perf_tsc_conversion *tc)
|
||||
@ -46,3 +45,34 @@ u64 rdtsc(void)
|
||||
|
||||
return low | ((u64)high) << 32;
|
||||
}
|
||||
|
||||
int perf_event__synth_time_conv(const struct perf_event_mmap_page *pc,
|
||||
struct perf_tool *tool,
|
||||
perf_event__handler_t process,
|
||||
struct machine *machine)
|
||||
{
|
||||
union perf_event event = {
|
||||
.time_conv = {
|
||||
.header = {
|
||||
.type = PERF_RECORD_TIME_CONV,
|
||||
.size = sizeof(struct time_conv_event),
|
||||
},
|
||||
},
|
||||
};
|
||||
struct perf_tsc_conversion tc;
|
||||
int err;
|
||||
|
||||
err = perf_read_tsc_conversion(pc, &tc);
|
||||
if (err == -EOPNOTSUPP)
|
||||
return 0;
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
pr_debug2("Synthesizing TSC conversion information\n");
|
||||
|
||||
event.time_conv.time_mult = tc.time_mult;
|
||||
event.time_conv.time_shift = tc.time_shift;
|
||||
event.time_conv.time_zero = tc.time_zero;
|
||||
|
||||
return process(tool, &event, NULL, machine);
|
||||
}
|
||||
|
@ -1,17 +0,0 @@
|
||||
#ifndef TOOLS_PERF_ARCH_X86_UTIL_TSC_H__
|
||||
#define TOOLS_PERF_ARCH_X86_UTIL_TSC_H__
|
||||
|
||||
#include <linux/types.h>
|
||||
|
||||
struct perf_tsc_conversion {
|
||||
u16 time_shift;
|
||||
u32 time_mult;
|
||||
u64 time_zero;
|
||||
};
|
||||
|
||||
struct perf_event_mmap_page;
|
||||
|
||||
int perf_read_tsc_conversion(const struct perf_event_mmap_page *pc,
|
||||
struct perf_tsc_conversion *tc);
|
||||
|
||||
#endif /* TOOLS_PERF_ARCH_X86_UTIL_TSC_H__ */
|
@ -83,7 +83,7 @@ static void *workerfn(void *arg)
|
||||
do {
|
||||
int ret;
|
||||
again:
|
||||
ret = futex_lock_pi(w->futex, NULL, 0, futex_flag);
|
||||
ret = futex_lock_pi(w->futex, NULL, futex_flag);
|
||||
|
||||
if (ret) { /* handle lock acquisition */
|
||||
if (!silent)
|
||||
|
@ -57,13 +57,11 @@ futex_wake(u_int32_t *uaddr, int nr_wake, int opflags)
|
||||
|
||||
/**
|
||||
* futex_lock_pi() - block on uaddr as a PI mutex
|
||||
* @detect: whether (1) or not (0) to perform deadlock detection
|
||||
*/
|
||||
static inline int
|
||||
futex_lock_pi(u_int32_t *uaddr, struct timespec *timeout, int detect,
|
||||
int opflags)
|
||||
futex_lock_pi(u_int32_t *uaddr, struct timespec *timeout, int opflags)
|
||||
{
|
||||
return futex(uaddr, FUTEX_LOCK_PI, detect, timeout, NULL, 0, opflags);
|
||||
return futex(uaddr, FUTEX_LOCK_PI, 0, timeout, NULL, 0, opflags);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -6,6 +6,7 @@
|
||||
* Written by Hitoshi Mitake <mitake@dcl.info.waseda.ac.jp>
|
||||
*/
|
||||
|
||||
#include "debug.h"
|
||||
#include "../perf.h"
|
||||
#include "../util/util.h"
|
||||
#include <subcmd/parse-options.h>
|
||||
@ -63,14 +64,16 @@ static struct perf_event_attr cycle_attr = {
|
||||
.config = PERF_COUNT_HW_CPU_CYCLES
|
||||
};
|
||||
|
||||
static void init_cycles(void)
|
||||
static int init_cycles(void)
|
||||
{
|
||||
cycles_fd = sys_perf_event_open(&cycle_attr, getpid(), -1, -1, perf_event_open_cloexec_flag());
|
||||
|
||||
if (cycles_fd < 0 && errno == ENOSYS)
|
||||
die("No CONFIG_PERF_EVENTS=y kernel support configured?\n");
|
||||
else
|
||||
BUG_ON(cycles_fd < 0);
|
||||
if (cycles_fd < 0 && errno == ENOSYS) {
|
||||
pr_debug("No CONFIG_PERF_EVENTS=y kernel support configured?\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
return cycles_fd;
|
||||
}
|
||||
|
||||
static u64 get_cycles(void)
|
||||
@ -155,8 +158,13 @@ static int bench_mem_common(int argc, const char **argv, struct bench_mem_info *
|
||||
|
||||
argc = parse_options(argc, argv, options, info->usage, 0);
|
||||
|
||||
if (use_cycles)
|
||||
init_cycles();
|
||||
if (use_cycles) {
|
||||
i = init_cycles();
|
||||
if (i < 0) {
|
||||
fprintf(stderr, "Failed to open cycles counter\n");
|
||||
return i;
|
||||
}
|
||||
}
|
||||
|
||||
size = (size_t)perf_atoll((char *)size_str);
|
||||
size_total = (double)size * nr_loops;
|
||||
|
@ -12,6 +12,7 @@
|
||||
#include <subcmd/parse-options.h>
|
||||
#include "util/util.h"
|
||||
#include "util/debug.h"
|
||||
#include "util/config.h"
|
||||
|
||||
static bool use_system_config, use_user_config;
|
||||
|
||||
@ -32,13 +33,28 @@ static struct option config_options[] = {
|
||||
OPT_END()
|
||||
};
|
||||
|
||||
static int show_config(const char *key, const char *value,
|
||||
void *cb __maybe_unused)
|
||||
static int show_config(struct perf_config_set *set)
|
||||
{
|
||||
if (value)
|
||||
printf("%s=%s\n", key, value);
|
||||
else
|
||||
printf("%s\n", key);
|
||||
struct perf_config_section *section;
|
||||
struct perf_config_item *item;
|
||||
struct list_head *sections;
|
||||
|
||||
if (set == NULL)
|
||||
return -1;
|
||||
|
||||
sections = &set->sections;
|
||||
if (list_empty(sections))
|
||||
return -1;
|
||||
|
||||
list_for_each_entry(section, sections, node) {
|
||||
list_for_each_entry(item, §ion->items, node) {
|
||||
char *value = item->value;
|
||||
|
||||
if (value)
|
||||
printf("%s.%s=%s\n", section->name,
|
||||
item->name, value);
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
@ -46,6 +62,7 @@ static int show_config(const char *key, const char *value,
|
||||
int cmd_config(int argc, const char **argv, const char *prefix __maybe_unused)
|
||||
{
|
||||
int ret = 0;
|
||||
struct perf_config_set *set;
|
||||
char *user_config = mkpath("%s/.perfconfig", getenv("HOME"));
|
||||
|
||||
argc = parse_options(argc, argv, config_options, config_usage,
|
||||
@ -63,13 +80,19 @@ int cmd_config(int argc, const char **argv, const char *prefix __maybe_unused)
|
||||
else if (use_user_config)
|
||||
config_exclusive_filename = user_config;
|
||||
|
||||
set = perf_config_set__new();
|
||||
if (!set) {
|
||||
ret = -1;
|
||||
goto out_err;
|
||||
}
|
||||
|
||||
switch (actions) {
|
||||
case ACTION_LIST:
|
||||
if (argc) {
|
||||
pr_err("Error: takes no arguments\n");
|
||||
parse_options_usage(config_usage, config_options, "l", 1);
|
||||
} else {
|
||||
ret = perf_config(show_config, NULL);
|
||||
ret = show_config(set);
|
||||
if (ret < 0) {
|
||||
const char * config_filename = config_exclusive_filename;
|
||||
if (!config_exclusive_filename)
|
||||
@ -83,5 +106,7 @@ int cmd_config(int argc, const char **argv, const char *prefix __maybe_unused)
|
||||
usage_with_options(config_usage, config_options);
|
||||
}
|
||||
|
||||
perf_config_set__delete(set);
|
||||
out_err:
|
||||
return ret;
|
||||
}
|
||||
|
@ -428,7 +428,7 @@ static void hists__baseline_only(struct hists *hists)
|
||||
struct rb_root *root;
|
||||
struct rb_node *next;
|
||||
|
||||
if (sort__need_collapse)
|
||||
if (hists__has(hists, need_collapse))
|
||||
root = &hists->entries_collapsed;
|
||||
else
|
||||
root = hists->entries_in;
|
||||
@ -450,7 +450,7 @@ static void hists__precompute(struct hists *hists)
|
||||
struct rb_root *root;
|
||||
struct rb_node *next;
|
||||
|
||||
if (sort__need_collapse)
|
||||
if (hists__has(hists, need_collapse))
|
||||
root = &hists->entries_collapsed;
|
||||
else
|
||||
root = hists->entries_in;
|
||||
|
@ -61,6 +61,7 @@ static int check_emacsclient_version(void)
|
||||
struct child_process ec_process;
|
||||
const char *argv_ec[] = { "emacsclient", "--version", NULL };
|
||||
int version;
|
||||
int ret = -1;
|
||||
|
||||
/* emacsclient prints its version number on stderr */
|
||||
memset(&ec_process, 0, sizeof(ec_process));
|
||||
@ -71,7 +72,10 @@ static int check_emacsclient_version(void)
|
||||
fprintf(stderr, "Failed to start emacsclient.\n");
|
||||
return -1;
|
||||
}
|
||||
strbuf_read(&buffer, ec_process.err, 20);
|
||||
if (strbuf_read(&buffer, ec_process.err, 20) < 0) {
|
||||
fprintf(stderr, "Failed to read emacsclient version\n");
|
||||
goto out;
|
||||
}
|
||||
close(ec_process.err);
|
||||
|
||||
/*
|
||||
@ -82,8 +86,7 @@ static int check_emacsclient_version(void)
|
||||
|
||||
if (prefixcmp(buffer.buf, "emacsclient")) {
|
||||
fprintf(stderr, "Failed to parse emacsclient version.\n");
|
||||
strbuf_release(&buffer);
|
||||
return -1;
|
||||
goto out;
|
||||
}
|
||||
|
||||
version = atoi(buffer.buf + strlen("emacsclient"));
|
||||
@ -92,12 +95,11 @@ static int check_emacsclient_version(void)
|
||||
fprintf(stderr,
|
||||
"emacsclient version '%d' too old (< 22).\n",
|
||||
version);
|
||||
strbuf_release(&buffer);
|
||||
return -1;
|
||||
}
|
||||
|
||||
} else
|
||||
ret = 0;
|
||||
out:
|
||||
strbuf_release(&buffer);
|
||||
return 0;
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void exec_woman_emacs(const char *path, const char *page)
|
||||
|
@ -748,6 +748,7 @@ int cmd_inject(int argc, const char **argv, const char *prefix __maybe_unused)
|
||||
.auxtrace_info = perf_event__repipe_op2_synth,
|
||||
.auxtrace = perf_event__repipe_auxtrace,
|
||||
.auxtrace_error = perf_event__repipe_op2_synth,
|
||||
.time_conv = perf_event__repipe_op2_synth,
|
||||
.finished_round = perf_event__repipe_oe_synth,
|
||||
.build_id = perf_event__repipe_op2_synth,
|
||||
.id_index = perf_event__repipe_op2_synth,
|
||||
|
@ -375,7 +375,7 @@ static u64 find_callsite(struct perf_evsel *evsel, struct perf_sample *sample)
|
||||
}
|
||||
|
||||
al.thread = machine__findnew_thread(machine, sample->pid, sample->tid);
|
||||
sample__resolve_callchain(sample, NULL, evsel, &al, 16);
|
||||
sample__resolve_callchain(sample, &callchain_cursor, NULL, evsel, &al, 16);
|
||||
|
||||
callchain_cursor_commit(&callchain_cursor);
|
||||
while (true) {
|
||||
|
@ -982,7 +982,7 @@ static int kvm_live_open_events(struct perf_kvm_stat *kvm)
|
||||
struct perf_evlist *evlist = kvm->evlist;
|
||||
char sbuf[STRERR_BUFSIZE];
|
||||
|
||||
perf_evlist__config(evlist, &kvm->opts);
|
||||
perf_evlist__config(evlist, &kvm->opts, NULL);
|
||||
|
||||
/*
|
||||
* Note: exclude_{guest,host} do not apply here.
|
||||
|
@ -62,19 +62,22 @@ static int __cmd_record(int argc, const char **argv, struct perf_mem *mem)
|
||||
int rec_argc, i = 0, j;
|
||||
const char **rec_argv;
|
||||
int ret;
|
||||
bool all_user = false, all_kernel = false;
|
||||
struct option options[] = {
|
||||
OPT_CALLBACK('e', "event", &mem, "event",
|
||||
"event selector. use 'perf mem record -e list' to list available events",
|
||||
parse_record_events),
|
||||
OPT_INCR('v', "verbose", &verbose,
|
||||
"be more verbose (show counter open errors, etc)"),
|
||||
OPT_BOOLEAN('U', "--all-user", &all_user, "collect only user level data"),
|
||||
OPT_BOOLEAN('K', "--all-kernel", &all_kernel, "collect only kernel level data"),
|
||||
OPT_END()
|
||||
};
|
||||
|
||||
argc = parse_options(argc, argv, options, record_mem_usage,
|
||||
PARSE_OPT_STOP_AT_NON_OPTION);
|
||||
|
||||
rec_argc = argc + 7; /* max number of arguments */
|
||||
rec_argc = argc + 9; /* max number of arguments */
|
||||
rec_argv = calloc(rec_argc + 1, sizeof(char *));
|
||||
if (!rec_argv)
|
||||
return -1;
|
||||
@ -103,6 +106,12 @@ static int __cmd_record(int argc, const char **argv, struct perf_mem *mem)
|
||||
rec_argv[i++] = perf_mem_events__name(j);
|
||||
};
|
||||
|
||||
if (all_user)
|
||||
rec_argv[i++] = "--all-user";
|
||||
|
||||
if (all_kernel)
|
||||
rec_argv[i++] = "--all-kernel";
|
||||
|
||||
for (j = 0; j < argc; j++, i++)
|
||||
rec_argv[i] = argv[j];
|
||||
|
||||
|
@ -29,10 +29,12 @@
|
||||
#include "util/data.h"
|
||||
#include "util/perf_regs.h"
|
||||
#include "util/auxtrace.h"
|
||||
#include "util/tsc.h"
|
||||
#include "util/parse-branch-options.h"
|
||||
#include "util/parse-regs-options.h"
|
||||
#include "util/llvm-utils.h"
|
||||
#include "util/bpf-loader.h"
|
||||
#include "util/trigger.h"
|
||||
#include "asm/bug.h"
|
||||
|
||||
#include <unistd.h>
|
||||
@ -55,6 +57,8 @@ struct record {
|
||||
bool no_buildid_cache;
|
||||
bool no_buildid_cache_set;
|
||||
bool buildid_all;
|
||||
bool timestamp_filename;
|
||||
bool switch_output;
|
||||
unsigned long long samples;
|
||||
};
|
||||
|
||||
@ -124,9 +128,10 @@ static int record__mmap_read(struct record *rec, int idx)
|
||||
static volatile int done;
|
||||
static volatile int signr = -1;
|
||||
static volatile int child_finished;
|
||||
static volatile int auxtrace_snapshot_enabled;
|
||||
static volatile int auxtrace_snapshot_err;
|
||||
|
||||
static volatile int auxtrace_record__snapshot_started;
|
||||
static DEFINE_TRIGGER(auxtrace_snapshot_trigger);
|
||||
static DEFINE_TRIGGER(switch_output_trigger);
|
||||
|
||||
static void sig_handler(int sig)
|
||||
{
|
||||
@ -244,11 +249,12 @@ static void record__read_auxtrace_snapshot(struct record *rec)
|
||||
{
|
||||
pr_debug("Recording AUX area tracing snapshot\n");
|
||||
if (record__auxtrace_read_snapshot_all(rec) < 0) {
|
||||
auxtrace_snapshot_err = -1;
|
||||
trigger_error(&auxtrace_snapshot_trigger);
|
||||
} else {
|
||||
auxtrace_snapshot_err = auxtrace_record__snapshot_finish(rec->itr);
|
||||
if (!auxtrace_snapshot_err)
|
||||
auxtrace_snapshot_enabled = 1;
|
||||
if (auxtrace_record__snapshot_finish(rec->itr))
|
||||
trigger_error(&auxtrace_snapshot_trigger);
|
||||
else
|
||||
trigger_ready(&auxtrace_snapshot_trigger);
|
||||
}
|
||||
}
|
||||
|
||||
@ -283,7 +289,7 @@ static int record__open(struct record *rec)
|
||||
struct record_opts *opts = &rec->opts;
|
||||
int rc = 0;
|
||||
|
||||
perf_evlist__config(evlist, opts);
|
||||
perf_evlist__config(evlist, opts, &callchain_param);
|
||||
|
||||
evlist__for_each(evlist, pos) {
|
||||
try_again:
|
||||
@ -494,6 +500,73 @@ record__finish_output(struct record *rec)
|
||||
return;
|
||||
}
|
||||
|
||||
static int record__synthesize_workload(struct record *rec)
|
||||
{
|
||||
struct {
|
||||
struct thread_map map;
|
||||
struct thread_map_data map_data;
|
||||
} thread_map;
|
||||
|
||||
thread_map.map.nr = 1;
|
||||
thread_map.map.map[0].pid = rec->evlist->workload.pid;
|
||||
thread_map.map.map[0].comm = NULL;
|
||||
return perf_event__synthesize_thread_map(&rec->tool, &thread_map.map,
|
||||
process_synthesized_event,
|
||||
&rec->session->machines.host,
|
||||
rec->opts.sample_address,
|
||||
rec->opts.proc_map_timeout);
|
||||
}
|
||||
|
||||
static int record__synthesize(struct record *rec);
|
||||
|
||||
static int
|
||||
record__switch_output(struct record *rec, bool at_exit)
|
||||
{
|
||||
struct perf_data_file *file = &rec->file;
|
||||
int fd, err;
|
||||
|
||||
/* Same Size: "2015122520103046"*/
|
||||
char timestamp[] = "InvalidTimestamp";
|
||||
|
||||
rec->samples = 0;
|
||||
record__finish_output(rec);
|
||||
err = fetch_current_timestamp(timestamp, sizeof(timestamp));
|
||||
if (err) {
|
||||
pr_err("Failed to get current timestamp\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
fd = perf_data_file__switch(file, timestamp,
|
||||
rec->session->header.data_offset,
|
||||
at_exit);
|
||||
if (fd >= 0 && !at_exit) {
|
||||
rec->bytes_written = 0;
|
||||
rec->session->header.data_size = 0;
|
||||
}
|
||||
|
||||
if (!quiet)
|
||||
fprintf(stderr, "[ perf record: Dump %s.%s ]\n",
|
||||
file->path, timestamp);
|
||||
|
||||
/* Output tracking events */
|
||||
if (!at_exit) {
|
||||
record__synthesize(rec);
|
||||
|
||||
/*
|
||||
* In 'perf record --switch-output' without -a,
|
||||
* record__synthesize() in record__switch_output() won't
|
||||
* generate tracking events because there's no thread_map
|
||||
* in evlist. Which causes newly created perf.data doesn't
|
||||
* contain map and comm information.
|
||||
* Create a fake thread_map and directly call
|
||||
* perf_event__synthesize_thread_map() for those events.
|
||||
*/
|
||||
if (target__none(&rec->opts.target))
|
||||
record__synthesize_workload(rec);
|
||||
}
|
||||
return fd;
|
||||
}
|
||||
|
||||
static volatile int workload_exec_errno;
|
||||
|
||||
/*
|
||||
@ -512,6 +585,15 @@ static void workload_exec_failed_signal(int signo __maybe_unused,
|
||||
|
||||
static void snapshot_sig_handler(int sig);
|
||||
|
||||
int __weak
|
||||
perf_event__synth_time_conv(const struct perf_event_mmap_page *pc __maybe_unused,
|
||||
struct perf_tool *tool __maybe_unused,
|
||||
perf_event__handler_t process __maybe_unused,
|
||||
struct machine *machine __maybe_unused)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int record__synthesize(struct record *rec)
|
||||
{
|
||||
struct perf_session *session = rec->session;
|
||||
@ -549,6 +631,11 @@ static int record__synthesize(struct record *rec)
|
||||
}
|
||||
}
|
||||
|
||||
err = perf_event__synth_time_conv(rec->evlist->mmap[0].base, tool,
|
||||
process_synthesized_event, machine);
|
||||
if (err)
|
||||
goto out;
|
||||
|
||||
if (rec->opts.full_auxtrace) {
|
||||
err = perf_event__synthesize_auxtrace_info(rec->itr, tool,
|
||||
session, process_synthesized_event);
|
||||
@ -600,10 +687,16 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
|
||||
signal(SIGCHLD, sig_handler);
|
||||
signal(SIGINT, sig_handler);
|
||||
signal(SIGTERM, sig_handler);
|
||||
if (rec->opts.auxtrace_snapshot_mode)
|
||||
|
||||
if (rec->opts.auxtrace_snapshot_mode || rec->switch_output) {
|
||||
signal(SIGUSR2, snapshot_sig_handler);
|
||||
else
|
||||
if (rec->opts.auxtrace_snapshot_mode)
|
||||
trigger_on(&auxtrace_snapshot_trigger);
|
||||
if (rec->switch_output)
|
||||
trigger_on(&switch_output_trigger);
|
||||
} else {
|
||||
signal(SIGUSR2, SIG_IGN);
|
||||
}
|
||||
|
||||
session = perf_session__new(file, false, tool);
|
||||
if (session == NULL) {
|
||||
@ -729,27 +822,45 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
|
||||
perf_evlist__enable(rec->evlist);
|
||||
}
|
||||
|
||||
auxtrace_snapshot_enabled = 1;
|
||||
trigger_ready(&auxtrace_snapshot_trigger);
|
||||
trigger_ready(&switch_output_trigger);
|
||||
for (;;) {
|
||||
unsigned long long hits = rec->samples;
|
||||
|
||||
if (record__mmap_read_all(rec) < 0) {
|
||||
auxtrace_snapshot_enabled = 0;
|
||||
trigger_error(&auxtrace_snapshot_trigger);
|
||||
trigger_error(&switch_output_trigger);
|
||||
err = -1;
|
||||
goto out_child;
|
||||
}
|
||||
|
||||
if (auxtrace_record__snapshot_started) {
|
||||
auxtrace_record__snapshot_started = 0;
|
||||
if (!auxtrace_snapshot_err)
|
||||
if (!trigger_is_error(&auxtrace_snapshot_trigger))
|
||||
record__read_auxtrace_snapshot(rec);
|
||||
if (auxtrace_snapshot_err) {
|
||||
if (trigger_is_error(&auxtrace_snapshot_trigger)) {
|
||||
pr_err("AUX area tracing snapshot failed\n");
|
||||
err = -1;
|
||||
goto out_child;
|
||||
}
|
||||
}
|
||||
|
||||
if (trigger_is_hit(&switch_output_trigger)) {
|
||||
trigger_ready(&switch_output_trigger);
|
||||
|
||||
if (!quiet)
|
||||
fprintf(stderr, "[ perf record: dump data: Woken up %ld times ]\n",
|
||||
waking);
|
||||
waking = 0;
|
||||
fd = record__switch_output(rec, false);
|
||||
if (fd < 0) {
|
||||
pr_err("Failed to switch to new file\n");
|
||||
trigger_error(&switch_output_trigger);
|
||||
err = fd;
|
||||
goto out_child;
|
||||
}
|
||||
}
|
||||
|
||||
if (hits == rec->samples) {
|
||||
if (done || draining)
|
||||
break;
|
||||
@ -772,12 +883,13 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
|
||||
* disable events in this case.
|
||||
*/
|
||||
if (done && !disabled && !target__none(&opts->target)) {
|
||||
auxtrace_snapshot_enabled = 0;
|
||||
trigger_off(&auxtrace_snapshot_trigger);
|
||||
perf_evlist__disable(rec->evlist);
|
||||
disabled = true;
|
||||
}
|
||||
}
|
||||
auxtrace_snapshot_enabled = 0;
|
||||
trigger_off(&auxtrace_snapshot_trigger);
|
||||
trigger_off(&switch_output_trigger);
|
||||
|
||||
if (forks && workload_exec_errno) {
|
||||
char msg[STRERR_BUFSIZE];
|
||||
@ -811,11 +923,22 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
|
||||
/* this will be recalculated during process_buildids() */
|
||||
rec->samples = 0;
|
||||
|
||||
if (!err)
|
||||
record__finish_output(rec);
|
||||
if (!err) {
|
||||
if (!rec->timestamp_filename) {
|
||||
record__finish_output(rec);
|
||||
} else {
|
||||
fd = record__switch_output(rec, true);
|
||||
if (fd < 0) {
|
||||
status = fd;
|
||||
goto out_delete_session;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!err && !quiet) {
|
||||
char samples[128];
|
||||
const char *postfix = rec->timestamp_filename ?
|
||||
".<timestamp>" : "";
|
||||
|
||||
if (rec->samples && !rec->opts.full_auxtrace)
|
||||
scnprintf(samples, sizeof(samples),
|
||||
@ -823,9 +946,9 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
|
||||
else
|
||||
samples[0] = '\0';
|
||||
|
||||
fprintf(stderr, "[ perf record: Captured and wrote %.3f MB %s%s ]\n",
|
||||
fprintf(stderr, "[ perf record: Captured and wrote %.3f MB %s%s%s ]\n",
|
||||
perf_data_file__size(file) / 1024.0 / 1024.0,
|
||||
file->path, samples);
|
||||
file->path, postfix, samples);
|
||||
}
|
||||
|
||||
out_delete_session:
|
||||
@ -833,58 +956,61 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
|
||||
return status;
|
||||
}
|
||||
|
||||
static void callchain_debug(void)
|
||||
static void callchain_debug(struct callchain_param *callchain)
|
||||
{
|
||||
static const char *str[CALLCHAIN_MAX] = { "NONE", "FP", "DWARF", "LBR" };
|
||||
|
||||
pr_debug("callchain: type %s\n", str[callchain_param.record_mode]);
|
||||
pr_debug("callchain: type %s\n", str[callchain->record_mode]);
|
||||
|
||||
if (callchain_param.record_mode == CALLCHAIN_DWARF)
|
||||
if (callchain->record_mode == CALLCHAIN_DWARF)
|
||||
pr_debug("callchain: stack dump size %d\n",
|
||||
callchain_param.dump_size);
|
||||
callchain->dump_size);
|
||||
}
|
||||
|
||||
int record_opts__parse_callchain(struct record_opts *record,
|
||||
struct callchain_param *callchain,
|
||||
const char *arg, bool unset)
|
||||
{
|
||||
int ret;
|
||||
callchain->enabled = !unset;
|
||||
|
||||
/* --no-call-graph */
|
||||
if (unset) {
|
||||
callchain->record_mode = CALLCHAIN_NONE;
|
||||
pr_debug("callchain: disabled\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
ret = parse_callchain_record_opt(arg, callchain);
|
||||
if (!ret) {
|
||||
/* Enable data address sampling for DWARF unwind. */
|
||||
if (callchain->record_mode == CALLCHAIN_DWARF)
|
||||
record->sample_address = true;
|
||||
callchain_debug(callchain);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
int record_parse_callchain_opt(const struct option *opt,
|
||||
const char *arg,
|
||||
int unset)
|
||||
{
|
||||
int ret;
|
||||
struct record_opts *record = (struct record_opts *)opt->value;
|
||||
|
||||
record->callgraph_set = true;
|
||||
callchain_param.enabled = !unset;
|
||||
|
||||
/* --no-call-graph */
|
||||
if (unset) {
|
||||
callchain_param.record_mode = CALLCHAIN_NONE;
|
||||
pr_debug("callchain: disabled\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
ret = parse_callchain_record_opt(arg, &callchain_param);
|
||||
if (!ret) {
|
||||
/* Enable data address sampling for DWARF unwind. */
|
||||
if (callchain_param.record_mode == CALLCHAIN_DWARF)
|
||||
record->sample_address = true;
|
||||
callchain_debug();
|
||||
}
|
||||
|
||||
return ret;
|
||||
return record_opts__parse_callchain(opt->value, &callchain_param, arg, unset);
|
||||
}
|
||||
|
||||
int record_callchain_opt(const struct option *opt,
|
||||
const char *arg __maybe_unused,
|
||||
int unset __maybe_unused)
|
||||
{
|
||||
struct record_opts *record = (struct record_opts *)opt->value;
|
||||
struct callchain_param *callchain = opt->value;
|
||||
|
||||
record->callgraph_set = true;
|
||||
callchain_param.enabled = true;
|
||||
callchain->enabled = true;
|
||||
|
||||
if (callchain_param.record_mode == CALLCHAIN_NONE)
|
||||
callchain_param.record_mode = CALLCHAIN_FP;
|
||||
if (callchain->record_mode == CALLCHAIN_NONE)
|
||||
callchain->record_mode = CALLCHAIN_FP;
|
||||
|
||||
callchain_debug();
|
||||
callchain_debug(callchain);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -1122,7 +1248,7 @@ struct option __record_options[] = {
|
||||
record__parse_mmap_pages),
|
||||
OPT_BOOLEAN(0, "group", &record.opts.group,
|
||||
"put the counters into a counter group"),
|
||||
OPT_CALLBACK_NOOPT('g', NULL, &record.opts,
|
||||
OPT_CALLBACK_NOOPT('g', NULL, &callchain_param,
|
||||
NULL, "enables call-graph recording" ,
|
||||
&record_callchain_opt),
|
||||
OPT_CALLBACK(0, "call-graph", &record.opts,
|
||||
@ -1195,6 +1321,10 @@ struct option __record_options[] = {
|
||||
"file", "vmlinux pathname"),
|
||||
OPT_BOOLEAN(0, "buildid-all", &record.buildid_all,
|
||||
"Record build-id of all DSOs regardless of hits"),
|
||||
OPT_BOOLEAN(0, "timestamp-filename", &record.timestamp_filename,
|
||||
"append timestamp to output filename"),
|
||||
OPT_BOOLEAN(0, "switch-output", &record.switch_output,
|
||||
"Switch output when receive SIGUSR2"),
|
||||
OPT_END()
|
||||
};
|
||||
|
||||
@ -1250,6 +1380,9 @@ int cmd_record(int argc, const char **argv, const char *prefix __maybe_unused)
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (rec->switch_output)
|
||||
rec->timestamp_filename = true;
|
||||
|
||||
if (!rec->itr) {
|
||||
rec->itr = auxtrace_record__init(rec->evlist, &err);
|
||||
if (err)
|
||||
@ -1261,6 +1394,14 @@ int cmd_record(int argc, const char **argv, const char *prefix __maybe_unused)
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
err = bpf__setup_stdout(rec->evlist);
|
||||
if (err) {
|
||||
bpf__strerror_setup_stdout(rec->evlist, err, errbuf, sizeof(errbuf));
|
||||
pr_err("ERROR: Setup BPF stdout failed: %s\n",
|
||||
errbuf);
|
||||
return err;
|
||||
}
|
||||
|
||||
err = -ENOMEM;
|
||||
|
||||
symbol__init(NULL);
|
||||
@ -1275,8 +1416,36 @@ int cmd_record(int argc, const char **argv, const char *prefix __maybe_unused)
|
||||
"If some relocation was applied (e.g. kexec) symbols may be misresolved\n"
|
||||
"even with a suitable vmlinux or kallsyms file.\n\n");
|
||||
|
||||
if (rec->no_buildid_cache || rec->no_buildid)
|
||||
if (rec->no_buildid_cache || rec->no_buildid) {
|
||||
disable_buildid_cache();
|
||||
} else if (rec->switch_output) {
|
||||
/*
|
||||
* In 'perf record --switch-output', disable buildid
|
||||
* generation by default to reduce data file switching
|
||||
* overhead. Still generate buildid if they are required
|
||||
* explicitly using
|
||||
*
|
||||
* perf record --signal-trigger --no-no-buildid \
|
||||
* --no-no-buildid-cache
|
||||
*
|
||||
* Following code equals to:
|
||||
*
|
||||
* if ((rec->no_buildid || !rec->no_buildid_set) &&
|
||||
* (rec->no_buildid_cache || !rec->no_buildid_cache_set))
|
||||
* disable_buildid_cache();
|
||||
*/
|
||||
bool disable = true;
|
||||
|
||||
if (rec->no_buildid_set && !rec->no_buildid)
|
||||
disable = false;
|
||||
if (rec->no_buildid_cache_set && !rec->no_buildid_cache)
|
||||
disable = false;
|
||||
if (disable) {
|
||||
rec->no_buildid = true;
|
||||
rec->no_buildid_cache = true;
|
||||
disable_buildid_cache();
|
||||
}
|
||||
}
|
||||
|
||||
if (rec->evlist->nr_entries == 0 &&
|
||||
perf_evlist__add_default(rec->evlist) < 0) {
|
||||
@ -1335,9 +1504,13 @@ int cmd_record(int argc, const char **argv, const char *prefix __maybe_unused)
|
||||
|
||||
static void snapshot_sig_handler(int sig __maybe_unused)
|
||||
{
|
||||
if (!auxtrace_snapshot_enabled)
|
||||
return;
|
||||
auxtrace_snapshot_enabled = 0;
|
||||
auxtrace_snapshot_err = auxtrace_record__snapshot_start(record.itr);
|
||||
auxtrace_record__snapshot_started = 1;
|
||||
if (trigger_is_ready(&auxtrace_snapshot_trigger)) {
|
||||
trigger_hit(&auxtrace_snapshot_trigger);
|
||||
auxtrace_record__snapshot_started = 1;
|
||||
if (auxtrace_record__snapshot_start(record.itr))
|
||||
trigger_error(&auxtrace_snapshot_trigger);
|
||||
}
|
||||
|
||||
if (trigger_is_ready(&switch_output_trigger))
|
||||
trigger_hit(&switch_output_trigger);
|
||||
}
|
||||
|
@ -47,7 +47,6 @@ struct report {
|
||||
struct perf_tool tool;
|
||||
struct perf_session *session;
|
||||
bool use_tui, use_gtk, use_stdio;
|
||||
bool dont_use_callchains;
|
||||
bool show_full_info;
|
||||
bool show_threads;
|
||||
bool inverted_callchain;
|
||||
@ -235,7 +234,7 @@ static int report__setup_sample_type(struct report *rep)
|
||||
sample_type |= PERF_SAMPLE_BRANCH_STACK;
|
||||
|
||||
if (!is_pipe && !(sample_type & PERF_SAMPLE_CALLCHAIN)) {
|
||||
if (sort__has_parent) {
|
||||
if (perf_hpp_list.parent) {
|
||||
ui__error("Selected --sort parent, but no "
|
||||
"callchain data. Did you call "
|
||||
"'perf record' without -g?\n");
|
||||
@ -247,7 +246,7 @@ static int report__setup_sample_type(struct report *rep)
|
||||
"you call 'perf record' without -g?\n");
|
||||
return -1;
|
||||
}
|
||||
} else if (!rep->dont_use_callchains &&
|
||||
} else if (!callchain_param.enabled &&
|
||||
callchain_param.mode != CHAIN_NONE &&
|
||||
!symbol_conf.use_callchain) {
|
||||
symbol_conf.use_callchain = true;
|
||||
@ -599,13 +598,15 @@ static int __cmd_report(struct report *rep)
|
||||
static int
|
||||
report_parse_callchain_opt(const struct option *opt, const char *arg, int unset)
|
||||
{
|
||||
struct report *rep = (struct report *)opt->value;
|
||||
struct callchain_param *callchain = opt->value;
|
||||
|
||||
callchain->enabled = !unset;
|
||||
/*
|
||||
* --no-call-graph
|
||||
*/
|
||||
if (unset) {
|
||||
rep->dont_use_callchains = true;
|
||||
symbol_conf.use_callchain = false;
|
||||
callchain->mode = CHAIN_NONE;
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -690,7 +691,7 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused)
|
||||
.ordered_events = true,
|
||||
.ordering_requires_timestamps = true,
|
||||
},
|
||||
.max_stack = PERF_MAX_STACK_DEPTH,
|
||||
.max_stack = sysctl_perf_event_max_stack,
|
||||
.pretty_printing_style = "normal",
|
||||
.socket_filter = -1,
|
||||
};
|
||||
@ -734,7 +735,7 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused)
|
||||
"regex filter to identify parent, see: '--sort parent'"),
|
||||
OPT_BOOLEAN('x', "exclude-other", &symbol_conf.exclude_other,
|
||||
"Only display entries with parent-match"),
|
||||
OPT_CALLBACK_DEFAULT('g', "call-graph", &report,
|
||||
OPT_CALLBACK_DEFAULT('g', "call-graph", &callchain_param,
|
||||
"print_type,threshold[,print_limit],order,sort_key[,branch],value",
|
||||
report_callchain_help, &report_parse_callchain_opt,
|
||||
callchain_default_opt),
|
||||
@ -743,7 +744,7 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused)
|
||||
OPT_INTEGER(0, "max-stack", &report.max_stack,
|
||||
"Set the maximum stack depth when parsing the callchain, "
|
||||
"anything beyond the specified depth will be ignored. "
|
||||
"Default: " __stringify(PERF_MAX_STACK_DEPTH)),
|
||||
"Default: kernel.perf_event_max_stack or " __stringify(PERF_MAX_STACK_DEPTH)),
|
||||
OPT_BOOLEAN('G', "inverted", &report.inverted_callchain,
|
||||
"alias for inverted call graph"),
|
||||
OPT_CALLBACK(0, "ignore-callees", NULL, "regex",
|
||||
@ -935,7 +936,7 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused)
|
||||
goto error;
|
||||
}
|
||||
|
||||
sort__need_collapse = true;
|
||||
perf_hpp_list.need_collapse = true;
|
||||
}
|
||||
|
||||
/* Force tty output for header output and per-thread stat. */
|
||||
|
@ -11,6 +11,8 @@
|
||||
#include "util/session.h"
|
||||
#include "util/tool.h"
|
||||
#include "util/cloexec.h"
|
||||
#include "util/thread_map.h"
|
||||
#include "util/color.h"
|
||||
|
||||
#include <subcmd/parse-options.h>
|
||||
#include "util/trace-event.h"
|
||||
@ -122,6 +124,21 @@ struct trace_sched_handler {
|
||||
struct machine *machine);
|
||||
};
|
||||
|
||||
#define COLOR_PIDS PERF_COLOR_BLUE
|
||||
#define COLOR_CPUS PERF_COLOR_BG_RED
|
||||
|
||||
struct perf_sched_map {
|
||||
DECLARE_BITMAP(comp_cpus_mask, MAX_CPUS);
|
||||
int *comp_cpus;
|
||||
bool comp;
|
||||
struct thread_map *color_pids;
|
||||
const char *color_pids_str;
|
||||
struct cpu_map *color_cpus;
|
||||
const char *color_cpus_str;
|
||||
struct cpu_map *cpus;
|
||||
const char *cpus_str;
|
||||
};
|
||||
|
||||
struct perf_sched {
|
||||
struct perf_tool tool;
|
||||
const char *sort_order;
|
||||
@ -173,6 +190,7 @@ struct perf_sched {
|
||||
struct list_head sort_list, cmp_pid;
|
||||
bool force;
|
||||
bool skip_merge;
|
||||
struct perf_sched_map map;
|
||||
};
|
||||
|
||||
static u64 get_nsecs(void)
|
||||
@ -1339,6 +1357,38 @@ static int process_sched_wakeup_event(struct perf_tool *tool,
|
||||
return 0;
|
||||
}
|
||||
|
||||
union map_priv {
|
||||
void *ptr;
|
||||
bool color;
|
||||
};
|
||||
|
||||
static bool thread__has_color(struct thread *thread)
|
||||
{
|
||||
union map_priv priv = {
|
||||
.ptr = thread__priv(thread),
|
||||
};
|
||||
|
||||
return priv.color;
|
||||
}
|
||||
|
||||
static struct thread*
|
||||
map__findnew_thread(struct perf_sched *sched, struct machine *machine, pid_t pid, pid_t tid)
|
||||
{
|
||||
struct thread *thread = machine__findnew_thread(machine, pid, tid);
|
||||
union map_priv priv = {
|
||||
.color = false,
|
||||
};
|
||||
|
||||
if (!sched->map.color_pids || !thread || thread__priv(thread))
|
||||
return thread;
|
||||
|
||||
if (thread_map__has(sched->map.color_pids, tid))
|
||||
priv.color = true;
|
||||
|
||||
thread__set_priv(thread, priv.ptr);
|
||||
return thread;
|
||||
}
|
||||
|
||||
static int map_switch_event(struct perf_sched *sched, struct perf_evsel *evsel,
|
||||
struct perf_sample *sample, struct machine *machine)
|
||||
{
|
||||
@ -1347,13 +1397,25 @@ static int map_switch_event(struct perf_sched *sched, struct perf_evsel *evsel,
|
||||
int new_shortname;
|
||||
u64 timestamp0, timestamp = sample->time;
|
||||
s64 delta;
|
||||
int cpu, this_cpu = sample->cpu;
|
||||
int i, this_cpu = sample->cpu;
|
||||
int cpus_nr;
|
||||
bool new_cpu = false;
|
||||
const char *color = PERF_COLOR_NORMAL;
|
||||
|
||||
BUG_ON(this_cpu >= MAX_CPUS || this_cpu < 0);
|
||||
|
||||
if (this_cpu > sched->max_cpu)
|
||||
sched->max_cpu = this_cpu;
|
||||
|
||||
if (sched->map.comp) {
|
||||
cpus_nr = bitmap_weight(sched->map.comp_cpus_mask, MAX_CPUS);
|
||||
if (!test_and_set_bit(this_cpu, sched->map.comp_cpus_mask)) {
|
||||
sched->map.comp_cpus[cpus_nr++] = this_cpu;
|
||||
new_cpu = true;
|
||||
}
|
||||
} else
|
||||
cpus_nr = sched->max_cpu;
|
||||
|
||||
timestamp0 = sched->cpu_last_switched[this_cpu];
|
||||
sched->cpu_last_switched[this_cpu] = timestamp;
|
||||
if (timestamp0)
|
||||
@ -1366,7 +1428,7 @@ static int map_switch_event(struct perf_sched *sched, struct perf_evsel *evsel,
|
||||
return -1;
|
||||
}
|
||||
|
||||
sched_in = machine__findnew_thread(machine, -1, next_pid);
|
||||
sched_in = map__findnew_thread(sched, machine, -1, next_pid);
|
||||
if (sched_in == NULL)
|
||||
return -1;
|
||||
|
||||
@ -1400,26 +1462,52 @@ static int map_switch_event(struct perf_sched *sched, struct perf_evsel *evsel,
|
||||
new_shortname = 1;
|
||||
}
|
||||
|
||||
for (cpu = 0; cpu <= sched->max_cpu; cpu++) {
|
||||
for (i = 0; i < cpus_nr; i++) {
|
||||
int cpu = sched->map.comp ? sched->map.comp_cpus[i] : i;
|
||||
struct thread *curr_thread = sched->curr_thread[cpu];
|
||||
const char *pid_color = color;
|
||||
const char *cpu_color = color;
|
||||
|
||||
if (curr_thread && thread__has_color(curr_thread))
|
||||
pid_color = COLOR_PIDS;
|
||||
|
||||
if (sched->map.cpus && !cpu_map__has(sched->map.cpus, cpu))
|
||||
continue;
|
||||
|
||||
if (sched->map.color_cpus && cpu_map__has(sched->map.color_cpus, cpu))
|
||||
cpu_color = COLOR_CPUS;
|
||||
|
||||
if (cpu != this_cpu)
|
||||
printf(" ");
|
||||
color_fprintf(stdout, cpu_color, " ");
|
||||
else
|
||||
printf("*");
|
||||
color_fprintf(stdout, cpu_color, "*");
|
||||
|
||||
if (sched->curr_thread[cpu])
|
||||
printf("%2s ", sched->curr_thread[cpu]->shortname);
|
||||
color_fprintf(stdout, pid_color, "%2s ", sched->curr_thread[cpu]->shortname);
|
||||
else
|
||||
printf(" ");
|
||||
color_fprintf(stdout, color, " ");
|
||||
}
|
||||
|
||||
printf(" %12.6f secs ", (double)timestamp/1e9);
|
||||
if (sched->map.cpus && !cpu_map__has(sched->map.cpus, this_cpu))
|
||||
goto out;
|
||||
|
||||
color_fprintf(stdout, color, " %12.6f secs ", (double)timestamp/1e9);
|
||||
if (new_shortname) {
|
||||
printf("%s => %s:%d\n",
|
||||
const char *pid_color = color;
|
||||
|
||||
if (thread__has_color(sched_in))
|
||||
pid_color = COLOR_PIDS;
|
||||
|
||||
color_fprintf(stdout, pid_color, "%s => %s:%d",
|
||||
sched_in->shortname, thread__comm_str(sched_in), sched_in->tid);
|
||||
} else {
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
if (sched->map.comp && new_cpu)
|
||||
color_fprintf(stdout, color, " (CPU %d)", this_cpu);
|
||||
|
||||
out:
|
||||
color_fprintf(stdout, color, "\n");
|
||||
|
||||
thread__put(sched_in);
|
||||
|
||||
return 0;
|
||||
@ -1675,9 +1763,75 @@ static int perf_sched__lat(struct perf_sched *sched)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int setup_map_cpus(struct perf_sched *sched)
|
||||
{
|
||||
struct cpu_map *map;
|
||||
|
||||
sched->max_cpu = sysconf(_SC_NPROCESSORS_CONF);
|
||||
|
||||
if (sched->map.comp) {
|
||||
sched->map.comp_cpus = zalloc(sched->max_cpu * sizeof(int));
|
||||
if (!sched->map.comp_cpus)
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (!sched->map.cpus_str)
|
||||
return 0;
|
||||
|
||||
map = cpu_map__new(sched->map.cpus_str);
|
||||
if (!map) {
|
||||
pr_err("failed to get cpus map from %s\n", sched->map.cpus_str);
|
||||
return -1;
|
||||
}
|
||||
|
||||
sched->map.cpus = map;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int setup_color_pids(struct perf_sched *sched)
|
||||
{
|
||||
struct thread_map *map;
|
||||
|
||||
if (!sched->map.color_pids_str)
|
||||
return 0;
|
||||
|
||||
map = thread_map__new_by_tid_str(sched->map.color_pids_str);
|
||||
if (!map) {
|
||||
pr_err("failed to get thread map from %s\n", sched->map.color_pids_str);
|
||||
return -1;
|
||||
}
|
||||
|
||||
sched->map.color_pids = map;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int setup_color_cpus(struct perf_sched *sched)
|
||||
{
|
||||
struct cpu_map *map;
|
||||
|
||||
if (!sched->map.color_cpus_str)
|
||||
return 0;
|
||||
|
||||
map = cpu_map__new(sched->map.color_cpus_str);
|
||||
if (!map) {
|
||||
pr_err("failed to get thread map from %s\n", sched->map.color_cpus_str);
|
||||
return -1;
|
||||
}
|
||||
|
||||
sched->map.color_cpus = map;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int perf_sched__map(struct perf_sched *sched)
|
||||
{
|
||||
sched->max_cpu = sysconf(_SC_NPROCESSORS_CONF);
|
||||
if (setup_map_cpus(sched))
|
||||
return -1;
|
||||
|
||||
if (setup_color_pids(sched))
|
||||
return -1;
|
||||
|
||||
if (setup_color_cpus(sched))
|
||||
return -1;
|
||||
|
||||
setup_pager();
|
||||
if (perf_sched__read_events(sched))
|
||||
@ -1831,6 +1985,17 @@ int cmd_sched(int argc, const char **argv, const char *prefix __maybe_unused)
|
||||
"dump raw trace in ASCII"),
|
||||
OPT_END()
|
||||
};
|
||||
const struct option map_options[] = {
|
||||
OPT_BOOLEAN(0, "compact", &sched.map.comp,
|
||||
"map output in compact mode"),
|
||||
OPT_STRING(0, "color-pids", &sched.map.color_pids_str, "pids",
|
||||
"highlight given pids in map"),
|
||||
OPT_STRING(0, "color-cpus", &sched.map.color_cpus_str, "cpus",
|
||||
"highlight given CPUs in map"),
|
||||
OPT_STRING(0, "cpus", &sched.map.cpus_str, "cpus",
|
||||
"display given CPUs in map"),
|
||||
OPT_END()
|
||||
};
|
||||
const char * const latency_usage[] = {
|
||||
"perf sched latency [<options>]",
|
||||
NULL
|
||||
@ -1839,6 +2004,10 @@ int cmd_sched(int argc, const char **argv, const char *prefix __maybe_unused)
|
||||
"perf sched replay [<options>]",
|
||||
NULL
|
||||
};
|
||||
const char * const map_usage[] = {
|
||||
"perf sched map [<options>]",
|
||||
NULL
|
||||
};
|
||||
const char *const sched_subcommands[] = { "record", "latency", "map",
|
||||
"replay", "script", NULL };
|
||||
const char *sched_usage[] = {
|
||||
@ -1887,6 +2056,11 @@ int cmd_sched(int argc, const char **argv, const char *prefix __maybe_unused)
|
||||
setup_sorting(&sched, latency_options, latency_usage);
|
||||
return perf_sched__lat(&sched);
|
||||
} else if (!strcmp(argv[0], "map")) {
|
||||
if (argc) {
|
||||
argc = parse_options(argc, argv, map_options, map_usage, 0);
|
||||
if (argc)
|
||||
usage_with_options(map_usage, map_options);
|
||||
}
|
||||
sched.tp_handler = &map_ops;
|
||||
setup_sorting(&sched, latency_options, latency_usage);
|
||||
return perf_sched__map(&sched);
|
||||
|
@ -22,6 +22,7 @@
|
||||
#include "util/thread_map.h"
|
||||
#include "util/stat.h"
|
||||
#include <linux/bitmap.h>
|
||||
#include <linux/stringify.h>
|
||||
#include "asm/bug.h"
|
||||
#include "util/mem-events.h"
|
||||
|
||||
@ -317,19 +318,19 @@ static void set_print_ip_opts(struct perf_event_attr *attr)
|
||||
|
||||
output[type].print_ip_opts = 0;
|
||||
if (PRINT_FIELD(IP))
|
||||
output[type].print_ip_opts |= PRINT_IP_OPT_IP;
|
||||
output[type].print_ip_opts |= EVSEL__PRINT_IP;
|
||||
|
||||
if (PRINT_FIELD(SYM))
|
||||
output[type].print_ip_opts |= PRINT_IP_OPT_SYM;
|
||||
output[type].print_ip_opts |= EVSEL__PRINT_SYM;
|
||||
|
||||
if (PRINT_FIELD(DSO))
|
||||
output[type].print_ip_opts |= PRINT_IP_OPT_DSO;
|
||||
output[type].print_ip_opts |= EVSEL__PRINT_DSO;
|
||||
|
||||
if (PRINT_FIELD(SYMOFFSET))
|
||||
output[type].print_ip_opts |= PRINT_IP_OPT_SYMOFFSET;
|
||||
output[type].print_ip_opts |= EVSEL__PRINT_SYMOFFSET;
|
||||
|
||||
if (PRINT_FIELD(SRCLINE))
|
||||
output[type].print_ip_opts |= PRINT_IP_OPT_SRCLINE;
|
||||
output[type].print_ip_opts |= EVSEL__PRINT_SRCLINE;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -569,18 +570,23 @@ static void print_sample_bts(struct perf_sample *sample,
|
||||
/* print branch_from information */
|
||||
if (PRINT_FIELD(IP)) {
|
||||
unsigned int print_opts = output[attr->type].print_ip_opts;
|
||||
struct callchain_cursor *cursor = NULL;
|
||||
|
||||
if (symbol_conf.use_callchain && sample->callchain) {
|
||||
printf("\n");
|
||||
} else {
|
||||
printf(" ");
|
||||
if (print_opts & PRINT_IP_OPT_SRCLINE) {
|
||||
if (symbol_conf.use_callchain && sample->callchain &&
|
||||
thread__resolve_callchain(al->thread, &callchain_cursor, evsel,
|
||||
sample, NULL, NULL, scripting_max_stack) == 0)
|
||||
cursor = &callchain_cursor;
|
||||
|
||||
if (cursor == NULL) {
|
||||
putchar(' ');
|
||||
if (print_opts & EVSEL__PRINT_SRCLINE) {
|
||||
print_srcline_last = true;
|
||||
print_opts &= ~PRINT_IP_OPT_SRCLINE;
|
||||
print_opts &= ~EVSEL__PRINT_SRCLINE;
|
||||
}
|
||||
}
|
||||
perf_evsel__print_ip(evsel, sample, al, print_opts,
|
||||
scripting_max_stack);
|
||||
} else
|
||||
putchar('\n');
|
||||
|
||||
sample__fprintf_sym(sample, al, 0, print_opts, cursor, stdout);
|
||||
}
|
||||
|
||||
/* print branch_to information */
|
||||
@ -783,14 +789,15 @@ static void process_event(struct perf_script *script,
|
||||
printf("%16" PRIu64, sample->weight);
|
||||
|
||||
if (PRINT_FIELD(IP)) {
|
||||
if (!symbol_conf.use_callchain)
|
||||
printf(" ");
|
||||
else
|
||||
printf("\n");
|
||||
struct callchain_cursor *cursor = NULL;
|
||||
|
||||
perf_evsel__print_ip(evsel, sample, al,
|
||||
output[attr->type].print_ip_opts,
|
||||
scripting_max_stack);
|
||||
if (symbol_conf.use_callchain && sample->callchain &&
|
||||
thread__resolve_callchain(al->thread, &callchain_cursor, evsel,
|
||||
sample, NULL, NULL, scripting_max_stack) == 0)
|
||||
cursor = &callchain_cursor;
|
||||
|
||||
putchar(cursor ? '\n' : ' ');
|
||||
sample__fprintf_sym(sample, al, 0, output[attr->type].print_ip_opts, cursor, stdout);
|
||||
}
|
||||
|
||||
if (PRINT_FIELD(IREGS))
|
||||
@ -1959,6 +1966,7 @@ int cmd_script(int argc, const char **argv, const char *prefix __maybe_unused)
|
||||
.exit = perf_event__process_exit,
|
||||
.fork = perf_event__process_fork,
|
||||
.attr = process_attr,
|
||||
.event_update = perf_event__process_event_update,
|
||||
.tracing_data = perf_event__process_tracing_data,
|
||||
.build_id = perf_event__process_build_id,
|
||||
.id_index = perf_event__process_id_index,
|
||||
@ -2020,6 +2028,10 @@ int cmd_script(int argc, const char **argv, const char *prefix __maybe_unused)
|
||||
"only consider symbols in these pids"),
|
||||
OPT_STRING(0, "tid", &symbol_conf.tid_list_str, "tid[,tid...]",
|
||||
"only consider symbols in these tids"),
|
||||
OPT_UINTEGER(0, "max-stack", &scripting_max_stack,
|
||||
"Set the maximum stack depth when parsing the callchain, "
|
||||
"anything beyond the specified depth will be ignored. "
|
||||
"Default: kernel.perf_event_max_stack or " __stringify(PERF_MAX_STACK_DEPTH)),
|
||||
OPT_BOOLEAN('I', "show-info", &show_full_info,
|
||||
"display extended information from perf.data file"),
|
||||
OPT_BOOLEAN('\0', "show-kernel-path", &symbol_conf.show_kernel_path,
|
||||
@ -2055,6 +2067,8 @@ int cmd_script(int argc, const char **argv, const char *prefix __maybe_unused)
|
||||
NULL
|
||||
};
|
||||
|
||||
scripting_max_stack = sysctl_perf_event_max_stack;
|
||||
|
||||
setup_scripting();
|
||||
|
||||
argc = parse_options_subcommand(argc, argv, options, script_subcommands, script_usage,
|
||||
|
@ -298,6 +298,14 @@ static int read_counter(struct perf_evsel *counter)
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
if (verbose > 1) {
|
||||
fprintf(stat_config.output,
|
||||
"%s: %d: %" PRIu64 " %" PRIu64 " %" PRIu64 "\n",
|
||||
perf_evsel__name(counter),
|
||||
cpu,
|
||||
count->val, count->ena, count->run);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -688,7 +688,7 @@ static int hist_iter__top_callback(struct hist_entry_iter *iter,
|
||||
struct hist_entry *he = iter->he;
|
||||
struct perf_evsel *evsel = iter->evsel;
|
||||
|
||||
if (sort__has_sym && single)
|
||||
if (perf_hpp_list.sym && single)
|
||||
perf_top__record_precise_ip(top, he, evsel->idx, al->addr);
|
||||
|
||||
hist__account_cycles(iter->sample->branch_stack, al, iter->sample,
|
||||
@ -886,7 +886,7 @@ static int perf_top__start_counters(struct perf_top *top)
|
||||
struct perf_evlist *evlist = top->evlist;
|
||||
struct record_opts *opts = &top->record_opts;
|
||||
|
||||
perf_evlist__config(evlist, opts);
|
||||
perf_evlist__config(evlist, opts, &callchain_param);
|
||||
|
||||
evlist__for_each(evlist, counter) {
|
||||
try_again:
|
||||
@ -917,15 +917,15 @@ static int perf_top__start_counters(struct perf_top *top)
|
||||
return -1;
|
||||
}
|
||||
|
||||
static int perf_top__setup_sample_type(struct perf_top *top __maybe_unused)
|
||||
static int callchain_param__setup_sample_type(struct callchain_param *callchain)
|
||||
{
|
||||
if (!sort__has_sym) {
|
||||
if (symbol_conf.use_callchain) {
|
||||
if (!perf_hpp_list.sym) {
|
||||
if (callchain->enabled) {
|
||||
ui__error("Selected -g but \"sym\" not present in --sort/-s.");
|
||||
return -EINVAL;
|
||||
}
|
||||
} else if (callchain_param.mode != CHAIN_NONE) {
|
||||
if (callchain_register_param(&callchain_param) < 0) {
|
||||
} else if (callchain->mode != CHAIN_NONE) {
|
||||
if (callchain_register_param(callchain) < 0) {
|
||||
ui__error("Can't register callchain params.\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
@ -952,7 +952,7 @@ static int __cmd_top(struct perf_top *top)
|
||||
goto out_delete;
|
||||
}
|
||||
|
||||
ret = perf_top__setup_sample_type(top);
|
||||
ret = callchain_param__setup_sample_type(&callchain_param);
|
||||
if (ret)
|
||||
goto out_delete;
|
||||
|
||||
@ -962,7 +962,7 @@ static int __cmd_top(struct perf_top *top)
|
||||
machine__synthesize_threads(&top->session->machines.host, &opts->target,
|
||||
top->evlist->threads, false, opts->proc_map_timeout);
|
||||
|
||||
if (sort__has_socket) {
|
||||
if (perf_hpp_list.socket) {
|
||||
ret = perf_env__read_cpu_topology_map(&perf_env);
|
||||
if (ret < 0)
|
||||
goto out_err_cpu_topo;
|
||||
@ -1045,18 +1045,17 @@ callchain_opt(const struct option *opt, const char *arg, int unset)
|
||||
static int
|
||||
parse_callchain_opt(const struct option *opt, const char *arg, int unset)
|
||||
{
|
||||
struct record_opts *record = (struct record_opts *)opt->value;
|
||||
struct callchain_param *callchain = opt->value;
|
||||
|
||||
record->callgraph_set = true;
|
||||
callchain_param.enabled = !unset;
|
||||
callchain_param.record_mode = CALLCHAIN_FP;
|
||||
callchain->enabled = !unset;
|
||||
callchain->record_mode = CALLCHAIN_FP;
|
||||
|
||||
/*
|
||||
* --no-call-graph
|
||||
*/
|
||||
if (unset) {
|
||||
symbol_conf.use_callchain = false;
|
||||
callchain_param.record_mode = CALLCHAIN_NONE;
|
||||
callchain->record_mode = CALLCHAIN_NONE;
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -1104,7 +1103,7 @@ int cmd_top(int argc, const char **argv, const char *prefix __maybe_unused)
|
||||
},
|
||||
.proc_map_timeout = 500,
|
||||
},
|
||||
.max_stack = PERF_MAX_STACK_DEPTH,
|
||||
.max_stack = sysctl_perf_event_max_stack,
|
||||
.sym_pcnt_filter = 5,
|
||||
};
|
||||
struct record_opts *opts = &top.record_opts;
|
||||
@ -1162,17 +1161,17 @@ int cmd_top(int argc, const char **argv, const char *prefix __maybe_unused)
|
||||
"output field(s): overhead, period, sample plus all of sort keys"),
|
||||
OPT_BOOLEAN('n', "show-nr-samples", &symbol_conf.show_nr_samples,
|
||||
"Show a column with the number of samples"),
|
||||
OPT_CALLBACK_NOOPT('g', NULL, &top.record_opts,
|
||||
OPT_CALLBACK_NOOPT('g', NULL, &callchain_param,
|
||||
NULL, "enables call-graph recording and display",
|
||||
&callchain_opt),
|
||||
OPT_CALLBACK(0, "call-graph", &top.record_opts,
|
||||
OPT_CALLBACK(0, "call-graph", &callchain_param,
|
||||
"record_mode[,record_size],print_type,threshold[,print_limit],order,sort_key[,branch]",
|
||||
top_callchain_help, &parse_callchain_opt),
|
||||
OPT_BOOLEAN(0, "children", &symbol_conf.cumulate_callchain,
|
||||
"Accumulate callchains of children and show total overhead as well"),
|
||||
OPT_INTEGER(0, "max-stack", &top.max_stack,
|
||||
"Set the maximum stack depth when parsing the callchain. "
|
||||
"Default: " __stringify(PERF_MAX_STACK_DEPTH)),
|
||||
"Default: kernel.perf_event_max_stack or " __stringify(PERF_MAX_STACK_DEPTH)),
|
||||
OPT_CALLBACK(0, "ignore-callees", NULL, "regex",
|
||||
"ignore callees of these functions in call graphs",
|
||||
report_parse_ignore_callees_opt),
|
||||
@ -1256,7 +1255,7 @@ int cmd_top(int argc, const char **argv, const char *prefix __maybe_unused)
|
||||
|
||||
sort__mode = SORT_MODE__TOP;
|
||||
/* display thread wants entries to be collapsed in a different tree */
|
||||
sort__need_collapse = 1;
|
||||
perf_hpp_list.need_collapse = 1;
|
||||
|
||||
if (top.use_stdio)
|
||||
use_browser = 0;
|
||||
@ -1312,7 +1311,7 @@ int cmd_top(int argc, const char **argv, const char *prefix __maybe_unused)
|
||||
|
||||
top.sym_evsel = perf_evlist__first(top.evlist);
|
||||
|
||||
if (!symbol_conf.use_callchain) {
|
||||
if (!callchain_param.enabled) {
|
||||
symbol_conf.cumulate_callchain = false;
|
||||
perf_hpp__cancel_cumulate();
|
||||
}
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -27,7 +27,7 @@ NO_PERF_REGS := 1
|
||||
ifeq ($(ARCH),x86)
|
||||
$(call detected,CONFIG_X86)
|
||||
ifeq (${IS_64_BIT}, 1)
|
||||
CFLAGS += -DHAVE_ARCH_X86_64_SUPPORT
|
||||
CFLAGS += -DHAVE_ARCH_X86_64_SUPPORT -DHAVE_SYSCALL_TABLE -I$(OUTPUT)arch/x86/include/generated
|
||||
ARCH_INCLUDE = ../../arch/x86/lib/memcpy_64.S ../../arch/x86/lib/memset_64.S
|
||||
LIBUNWIND_LIBS = -lunwind -lunwind-x86_64
|
||||
$(call detected,CONFIG_X86_64)
|
||||
@ -295,9 +295,6 @@ ifndef NO_LIBELF
|
||||
CFLAGS += -DHAVE_ELF_GETPHDRNUM_SUPPORT
|
||||
endif
|
||||
|
||||
# include ARCH specific config
|
||||
-include $(src-perf)/arch/$(ARCH)/Makefile
|
||||
|
||||
ifndef NO_DWARF
|
||||
ifeq ($(origin PERF_HAVE_DWARF_REGS), undefined)
|
||||
msg := $(warning DWARF register mappings have not been defined for architecture $(ARCH), DWARF support disabled);
|
||||
|
@ -92,6 +92,22 @@ static int get_e_machine(struct jitheader *hdr)
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int use_arch_timestamp;
|
||||
|
||||
static inline uint64_t
|
||||
get_arch_timestamp(void)
|
||||
{
|
||||
#if defined(__i386__) || defined(__x86_64__)
|
||||
unsigned int low, high;
|
||||
|
||||
asm volatile("rdtsc" : "=a" (low), "=d" (high));
|
||||
|
||||
return low | ((uint64_t)high) << 32;
|
||||
#else
|
||||
return 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
#define NSEC_PER_SEC 1000000000
|
||||
static int perf_clk_id = CLOCK_MONOTONIC;
|
||||
|
||||
@ -107,6 +123,9 @@ perf_get_timestamp(void)
|
||||
struct timespec ts;
|
||||
int ret;
|
||||
|
||||
if (use_arch_timestamp)
|
||||
return get_arch_timestamp();
|
||||
|
||||
ret = clock_gettime(perf_clk_id, &ts);
|
||||
if (ret)
|
||||
return 0;
|
||||
@ -203,6 +222,17 @@ perf_close_marker_file(void)
|
||||
munmap(marker_addr, pgsz);
|
||||
}
|
||||
|
||||
static void
|
||||
init_arch_timestamp(void)
|
||||
{
|
||||
char *str = getenv("JITDUMP_USE_ARCH_TIMESTAMP");
|
||||
|
||||
if (!str || !*str || !strcmp(str, "0"))
|
||||
return;
|
||||
|
||||
use_arch_timestamp = 1;
|
||||
}
|
||||
|
||||
void *jvmti_open(void)
|
||||
{
|
||||
int pad_cnt;
|
||||
@ -211,11 +241,17 @@ void *jvmti_open(void)
|
||||
int fd;
|
||||
FILE *fp;
|
||||
|
||||
init_arch_timestamp();
|
||||
|
||||
/*
|
||||
* check if clockid is supported
|
||||
*/
|
||||
if (!perf_get_timestamp())
|
||||
warnx("jvmti: kernel does not support %d clock id", perf_clk_id);
|
||||
if (!perf_get_timestamp()) {
|
||||
if (use_arch_timestamp)
|
||||
warnx("jvmti: arch timestamp not supported");
|
||||
else
|
||||
warnx("jvmti: kernel does not support %d clock id", perf_clk_id);
|
||||
}
|
||||
|
||||
memset(&header, 0, sizeof(header));
|
||||
|
||||
@ -263,6 +299,9 @@ void *jvmti_open(void)
|
||||
|
||||
header.timestamp = perf_get_timestamp();
|
||||
|
||||
if (use_arch_timestamp)
|
||||
header.flags |= JITDUMP_FLAGS_ARCH_TIMESTAMP;
|
||||
|
||||
if (!fwrite(&header, sizeof(header), 1, fp)) {
|
||||
warn("jvmti: cannot write dumpfile header");
|
||||
goto error;
|
||||
|
@ -17,6 +17,7 @@
|
||||
#include <subcmd/parse-options.h>
|
||||
#include "util/bpf-loader.h"
|
||||
#include "util/debug.h"
|
||||
#include <api/fs/fs.h>
|
||||
#include <api/fs/tracing_path.h>
|
||||
#include <pthread.h>
|
||||
#include <stdlib.h>
|
||||
@ -308,9 +309,11 @@ static int handle_alias(int *argcp, const char ***argv)
|
||||
if (*argcp > 1) {
|
||||
struct strbuf buf;
|
||||
|
||||
strbuf_init(&buf, PATH_MAX);
|
||||
strbuf_addstr(&buf, alias_string);
|
||||
sq_quote_argv(&buf, (*argv) + 1, PATH_MAX);
|
||||
if (strbuf_init(&buf, PATH_MAX) < 0 ||
|
||||
strbuf_addstr(&buf, alias_string) < 0 ||
|
||||
sq_quote_argv(&buf, (*argv) + 1,
|
||||
PATH_MAX) < 0)
|
||||
die("Failed to allocate memory.");
|
||||
free(alias_string);
|
||||
alias_string = buf.buf;
|
||||
}
|
||||
@ -533,6 +536,7 @@ int main(int argc, const char **argv)
|
||||
{
|
||||
const char *cmd;
|
||||
char sbuf[STRERR_BUFSIZE];
|
||||
int value;
|
||||
|
||||
/* libsubcmd init */
|
||||
exec_cmd_init("perf", PREFIX, PERF_EXEC_PATH, EXEC_PATH_ENVIRONMENT);
|
||||
@ -542,6 +546,9 @@ int main(int argc, const char **argv)
|
||||
page_size = sysconf(_SC_PAGE_SIZE);
|
||||
cacheline_size = sysconf(_SC_LEVEL1_DCACHE_LINESIZE);
|
||||
|
||||
if (sysctl__read_int("kernel/perf_event_max_stack", &value) == 0)
|
||||
sysctl_perf_event_max_stack = value;
|
||||
|
||||
cmd = extract_argv0_path(argv[0]);
|
||||
if (!cmd)
|
||||
cmd = "perf-help";
|
||||
@ -549,6 +556,7 @@ int main(int argc, const char **argv)
|
||||
srandom(time(NULL));
|
||||
|
||||
perf_config(perf_default_config, NULL);
|
||||
set_buildid_dir(NULL);
|
||||
|
||||
/* get debugfs/tracefs mount point from /proc/mounts */
|
||||
tracing_path_mount();
|
||||
@ -572,7 +580,6 @@ int main(int argc, const char **argv)
|
||||
}
|
||||
if (!prefixcmp(cmd, "trace")) {
|
||||
#ifdef HAVE_LIBAUDIT_SUPPORT
|
||||
set_buildid_dir(NULL);
|
||||
setup_path();
|
||||
argv[0] = "trace";
|
||||
return cmd_trace(argc, argv, NULL);
|
||||
@ -587,7 +594,6 @@ int main(int argc, const char **argv)
|
||||
argc--;
|
||||
handle_options(&argv, &argc, NULL);
|
||||
commit_pager_choice();
|
||||
set_buildid_dir(NULL);
|
||||
|
||||
if (argc > 0) {
|
||||
if (!prefixcmp(argv[0], "--"))
|
||||
|
@ -52,7 +52,6 @@ struct record_opts {
|
||||
bool sample_weight;
|
||||
bool sample_time;
|
||||
bool sample_time_set;
|
||||
bool callgraph_set;
|
||||
bool period;
|
||||
bool running_time;
|
||||
bool full_auxtrace;
|
||||
|
@ -34,10 +34,9 @@ import datetime
|
||||
#
|
||||
# ubuntu:
|
||||
#
|
||||
# $ sudo apt-get install postgresql
|
||||
# $ sudo apt-get install postgresql python-pyside.qtsql libqt4-sql-psql
|
||||
# $ sudo su - postgres
|
||||
# $ createuser <your user id here>
|
||||
# Shall the new role be a superuser? (y/n) y
|
||||
# $ createuser -s <your user id here>
|
||||
#
|
||||
# An example of using this script with Intel PT:
|
||||
#
|
||||
@ -224,11 +223,14 @@ sys.path.append(os.environ['PERF_EXEC_PATH'] + \
|
||||
|
||||
perf_db_export_mode = True
|
||||
perf_db_export_calls = False
|
||||
perf_db_export_callchains = False
|
||||
|
||||
|
||||
def usage():
|
||||
print >> sys.stderr, "Usage is: export-to-postgresql.py <database name> [<columns>] [<calls>]"
|
||||
print >> sys.stderr, "Usage is: export-to-postgresql.py <database name> [<columns>] [<calls>] [<callchains>]"
|
||||
print >> sys.stderr, "where: columns 'all' or 'branches'"
|
||||
print >> sys.stderr, " calls 'calls' => create calls table"
|
||||
print >> sys.stderr, " calls 'calls' => create calls and call_paths table"
|
||||
print >> sys.stderr, " callchains 'callchains' => create call_paths table"
|
||||
raise Exception("Too few arguments")
|
||||
|
||||
if (len(sys.argv) < 2):
|
||||
@ -246,9 +248,11 @@ if columns not in ("all", "branches"):
|
||||
|
||||
branches = (columns == "branches")
|
||||
|
||||
if (len(sys.argv) >= 4):
|
||||
if (sys.argv[3] == "calls"):
|
||||
for i in range(3,len(sys.argv)):
|
||||
if (sys.argv[i] == "calls"):
|
||||
perf_db_export_calls = True
|
||||
elif (sys.argv[i] == "callchains"):
|
||||
perf_db_export_callchains = True
|
||||
else:
|
||||
usage()
|
||||
|
||||
@ -359,14 +363,16 @@ else:
|
||||
'transaction bigint,'
|
||||
'data_src bigint,'
|
||||
'branch_type integer,'
|
||||
'in_tx boolean)')
|
||||
'in_tx boolean,'
|
||||
'call_path_id bigint)')
|
||||
|
||||
if perf_db_export_calls:
|
||||
if perf_db_export_calls or perf_db_export_callchains:
|
||||
do_query(query, 'CREATE TABLE call_paths ('
|
||||
'id bigint NOT NULL,'
|
||||
'parent_id bigint,'
|
||||
'symbol_id bigint,'
|
||||
'ip bigint)')
|
||||
if perf_db_export_calls:
|
||||
do_query(query, 'CREATE TABLE calls ('
|
||||
'id bigint NOT NULL,'
|
||||
'thread_id bigint,'
|
||||
@ -428,7 +434,7 @@ do_query(query, 'CREATE VIEW comm_threads_view AS '
|
||||
'(SELECT tid FROM threads WHERE id = thread_id) AS tid'
|
||||
' FROM comm_threads')
|
||||
|
||||
if perf_db_export_calls:
|
||||
if perf_db_export_calls or perf_db_export_callchains:
|
||||
do_query(query, 'CREATE VIEW call_paths_view AS '
|
||||
'SELECT '
|
||||
'c.id,'
|
||||
@ -444,6 +450,7 @@ if perf_db_export_calls:
|
||||
'(SELECT dso_id FROM symbols WHERE id = p.symbol_id) AS parent_dso_id,'
|
||||
'(SELECT dso FROM symbols_view WHERE id = p.symbol_id) AS parent_dso_short_name'
|
||||
' FROM call_paths c INNER JOIN call_paths p ON p.id = c.parent_id')
|
||||
if perf_db_export_calls:
|
||||
do_query(query, 'CREATE VIEW calls_view AS '
|
||||
'SELECT '
|
||||
'calls.id,'
|
||||
@ -541,8 +548,9 @@ dso_file = open_output_file("dso_table.bin")
|
||||
symbol_file = open_output_file("symbol_table.bin")
|
||||
branch_type_file = open_output_file("branch_type_table.bin")
|
||||
sample_file = open_output_file("sample_table.bin")
|
||||
if perf_db_export_calls:
|
||||
if perf_db_export_calls or perf_db_export_callchains:
|
||||
call_path_file = open_output_file("call_path_table.bin")
|
||||
if perf_db_export_calls:
|
||||
call_file = open_output_file("call_table.bin")
|
||||
|
||||
def trace_begin():
|
||||
@ -554,8 +562,8 @@ def trace_begin():
|
||||
comm_table(0, "unknown")
|
||||
dso_table(0, 0, "unknown", "unknown", "")
|
||||
symbol_table(0, 0, 0, 0, 0, "unknown")
|
||||
sample_table(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)
|
||||
if perf_db_export_calls:
|
||||
sample_table(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)
|
||||
if perf_db_export_calls or perf_db_export_callchains:
|
||||
call_path_table(0, 0, 0, 0)
|
||||
|
||||
unhandled_count = 0
|
||||
@ -571,8 +579,9 @@ def trace_end():
|
||||
copy_output_file(symbol_file, "symbols")
|
||||
copy_output_file(branch_type_file, "branch_types")
|
||||
copy_output_file(sample_file, "samples")
|
||||
if perf_db_export_calls:
|
||||
if perf_db_export_calls or perf_db_export_callchains:
|
||||
copy_output_file(call_path_file, "call_paths")
|
||||
if perf_db_export_calls:
|
||||
copy_output_file(call_file, "calls")
|
||||
|
||||
print datetime.datetime.today(), "Removing intermediate files..."
|
||||
@ -585,8 +594,9 @@ def trace_end():
|
||||
remove_output_file(symbol_file)
|
||||
remove_output_file(branch_type_file)
|
||||
remove_output_file(sample_file)
|
||||
if perf_db_export_calls:
|
||||
if perf_db_export_calls or perf_db_export_callchains:
|
||||
remove_output_file(call_path_file)
|
||||
if perf_db_export_calls:
|
||||
remove_output_file(call_file)
|
||||
os.rmdir(output_dir_name)
|
||||
print datetime.datetime.today(), "Adding primary keys"
|
||||
@ -599,8 +609,9 @@ def trace_end():
|
||||
do_query(query, 'ALTER TABLE symbols ADD PRIMARY KEY (id)')
|
||||
do_query(query, 'ALTER TABLE branch_types ADD PRIMARY KEY (id)')
|
||||
do_query(query, 'ALTER TABLE samples ADD PRIMARY KEY (id)')
|
||||
if perf_db_export_calls:
|
||||
if perf_db_export_calls or perf_db_export_callchains:
|
||||
do_query(query, 'ALTER TABLE call_paths ADD PRIMARY KEY (id)')
|
||||
if perf_db_export_calls:
|
||||
do_query(query, 'ALTER TABLE calls ADD PRIMARY KEY (id)')
|
||||
|
||||
print datetime.datetime.today(), "Adding foreign keys"
|
||||
@ -623,10 +634,11 @@ def trace_end():
|
||||
'ADD CONSTRAINT symbolfk FOREIGN KEY (symbol_id) REFERENCES symbols (id),'
|
||||
'ADD CONSTRAINT todsofk FOREIGN KEY (to_dso_id) REFERENCES dsos (id),'
|
||||
'ADD CONSTRAINT tosymbolfk FOREIGN KEY (to_symbol_id) REFERENCES symbols (id)')
|
||||
if perf_db_export_calls:
|
||||
if perf_db_export_calls or perf_db_export_callchains:
|
||||
do_query(query, 'ALTER TABLE call_paths '
|
||||
'ADD CONSTRAINT parentfk FOREIGN KEY (parent_id) REFERENCES call_paths (id),'
|
||||
'ADD CONSTRAINT symbolfk FOREIGN KEY (symbol_id) REFERENCES symbols (id)')
|
||||
if perf_db_export_calls:
|
||||
do_query(query, 'ALTER TABLE calls '
|
||||
'ADD CONSTRAINT threadfk FOREIGN KEY (thread_id) REFERENCES threads (id),'
|
||||
'ADD CONSTRAINT commfk FOREIGN KEY (comm_id) REFERENCES comms (id),'
|
||||
@ -694,11 +706,11 @@ def branch_type_table(branch_type, name, *x):
|
||||
value = struct.pack(fmt, 2, 4, branch_type, n, name)
|
||||
branch_type_file.write(value)
|
||||
|
||||
def sample_table(sample_id, evsel_id, machine_id, thread_id, comm_id, dso_id, symbol_id, sym_offset, ip, time, cpu, to_dso_id, to_symbol_id, to_sym_offset, to_ip, period, weight, transaction, data_src, branch_type, in_tx, *x):
|
||||
def sample_table(sample_id, evsel_id, machine_id, thread_id, comm_id, dso_id, symbol_id, sym_offset, ip, time, cpu, to_dso_id, to_symbol_id, to_sym_offset, to_ip, period, weight, transaction, data_src, branch_type, in_tx, call_path_id, *x):
|
||||
if branches:
|
||||
value = struct.pack("!hiqiqiqiqiqiqiqiqiqiqiiiqiqiqiqiiiB", 17, 8, sample_id, 8, evsel_id, 8, machine_id, 8, thread_id, 8, comm_id, 8, dso_id, 8, symbol_id, 8, sym_offset, 8, ip, 8, time, 4, cpu, 8, to_dso_id, 8, to_symbol_id, 8, to_sym_offset, 8, to_ip, 4, branch_type, 1, in_tx)
|
||||
value = struct.pack("!hiqiqiqiqiqiqiqiqiqiqiiiqiqiqiqiiiBiq", 18, 8, sample_id, 8, evsel_id, 8, machine_id, 8, thread_id, 8, comm_id, 8, dso_id, 8, symbol_id, 8, sym_offset, 8, ip, 8, time, 4, cpu, 8, to_dso_id, 8, to_symbol_id, 8, to_sym_offset, 8, to_ip, 4, branch_type, 1, in_tx, 8, call_path_id)
|
||||
else:
|
||||
value = struct.pack("!hiqiqiqiqiqiqiqiqiqiqiiiqiqiqiqiqiqiqiqiiiB", 21, 8, sample_id, 8, evsel_id, 8, machine_id, 8, thread_id, 8, comm_id, 8, dso_id, 8, symbol_id, 8, sym_offset, 8, ip, 8, time, 4, cpu, 8, to_dso_id, 8, to_symbol_id, 8, to_sym_offset, 8, to_ip, 8, period, 8, weight, 8, transaction, 8, data_src, 4, branch_type, 1, in_tx)
|
||||
value = struct.pack("!hiqiqiqiqiqiqiqiqiqiqiiiqiqiqiqiqiqiqiqiiiBiq", 22, 8, sample_id, 8, evsel_id, 8, machine_id, 8, thread_id, 8, comm_id, 8, dso_id, 8, symbol_id, 8, sym_offset, 8, ip, 8, time, 4, cpu, 8, to_dso_id, 8, to_symbol_id, 8, to_sym_offset, 8, to_ip, 8, period, 8, weight, 8, transaction, 8, data_src, 4, branch_type, 1, in_tx, 8, call_path_id)
|
||||
sample_file.write(value)
|
||||
|
||||
def call_path_table(cp_id, parent_id, symbol_id, ip, *x):
|
||||
|
@ -37,6 +37,8 @@ perf-y += topology.o
|
||||
perf-y += cpumap.o
|
||||
perf-y += stat.o
|
||||
perf-y += event_update.o
|
||||
perf-y += event-times.o
|
||||
perf-y += backward-ring-buffer.o
|
||||
|
||||
$(OUTPUT)tests/llvm-src-base.c: tests/bpf-script-example.c tests/Build
|
||||
$(call rule_mkdir)
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user