mirror of
https://github.com/AuxXxilium/linux_dsm_epyc7002.git
synced 2024-12-28 10:25:26 +07:00
878068ea27
Starting from Icelake, XMM registers can be collected in PEBS record. But current code only output the pt_regs. Add a new struct x86_perf_regs for both pt_regs and xmm_regs. The xmm_regs will be used later to keep a pointer to PEBS record which has XMM information. XMM registers are 128 bit. To simplify the code, they are handled like two different registers, which means setting two bits in the register bitmap. This also allows only sampling the lower 64bit bits in XMM. The index of XMM registers starts from 32. There are 16 XMM registers. So all reserved space for regs are used. Remove REG_RESERVED. Add PERF_REG_X86_XMM_MAX, which stands for the max number of all x86 regs including both GPRs and XMM. Add REG_NOSUPPORT for 32bit to exclude unsupported registers. Previous platforms can not collect XMM information in PEBS record. Adding pebs_no_xmm_regs to indicate the unsupported platforms. The common code still validates the supported registers. However, it cannot check model specific registers, e.g. XMM. Add extra check in x86_pmu_hw_config() to reject invalid config of regs_user and regs_intr. The regs_user never supports XMM collection. The regs_intr only supports XMM collection when sampling PEBS event on icelake and later platforms. Originally-by: Andi Kleen <ak@linux.intel.com> Suggested-by: Peter Zijlstra (Intel) <peterz@infradead.org> Signed-off-by: Kan Liang <kan.liang@linux.intel.com> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com> Cc: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Jiri Olsa <jolsa@redhat.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Stephane Eranian <eranian@google.com> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Vince Weaver <vincent.weaver@maine.edu> Cc: acme@kernel.org Cc: jolsa@kernel.org Link: https://lkml.kernel.org/r/20190402194509.2832-3-kan.liang@linux.intel.com Signed-off-by: Ingo Molnar <mingo@kernel.org>
193 lines
5.3 KiB
C
193 lines
5.3 KiB
C
// SPDX-License-Identifier: GPL-2.0
|
|
#include <linux/errno.h>
|
|
#include <linux/kernel.h>
|
|
#include <linux/sched.h>
|
|
#include <linux/sched/task_stack.h>
|
|
#include <linux/perf_event.h>
|
|
#include <linux/bug.h>
|
|
#include <linux/stddef.h>
|
|
#include <asm/perf_regs.h>
|
|
#include <asm/ptrace.h>
|
|
|
|
#ifdef CONFIG_X86_32
|
|
#define PERF_REG_X86_MAX PERF_REG_X86_32_MAX
|
|
#else
|
|
#define PERF_REG_X86_MAX PERF_REG_X86_64_MAX
|
|
#endif
|
|
|
|
#define PT_REGS_OFFSET(id, r) [id] = offsetof(struct pt_regs, r)
|
|
|
|
static unsigned int pt_regs_offset[PERF_REG_X86_MAX] = {
|
|
PT_REGS_OFFSET(PERF_REG_X86_AX, ax),
|
|
PT_REGS_OFFSET(PERF_REG_X86_BX, bx),
|
|
PT_REGS_OFFSET(PERF_REG_X86_CX, cx),
|
|
PT_REGS_OFFSET(PERF_REG_X86_DX, dx),
|
|
PT_REGS_OFFSET(PERF_REG_X86_SI, si),
|
|
PT_REGS_OFFSET(PERF_REG_X86_DI, di),
|
|
PT_REGS_OFFSET(PERF_REG_X86_BP, bp),
|
|
PT_REGS_OFFSET(PERF_REG_X86_SP, sp),
|
|
PT_REGS_OFFSET(PERF_REG_X86_IP, ip),
|
|
PT_REGS_OFFSET(PERF_REG_X86_FLAGS, flags),
|
|
PT_REGS_OFFSET(PERF_REG_X86_CS, cs),
|
|
PT_REGS_OFFSET(PERF_REG_X86_SS, ss),
|
|
#ifdef CONFIG_X86_32
|
|
PT_REGS_OFFSET(PERF_REG_X86_DS, ds),
|
|
PT_REGS_OFFSET(PERF_REG_X86_ES, es),
|
|
PT_REGS_OFFSET(PERF_REG_X86_FS, fs),
|
|
PT_REGS_OFFSET(PERF_REG_X86_GS, gs),
|
|
#else
|
|
/*
|
|
* The pt_regs struct does not store
|
|
* ds, es, fs, gs in 64 bit mode.
|
|
*/
|
|
(unsigned int) -1,
|
|
(unsigned int) -1,
|
|
(unsigned int) -1,
|
|
(unsigned int) -1,
|
|
#endif
|
|
#ifdef CONFIG_X86_64
|
|
PT_REGS_OFFSET(PERF_REG_X86_R8, r8),
|
|
PT_REGS_OFFSET(PERF_REG_X86_R9, r9),
|
|
PT_REGS_OFFSET(PERF_REG_X86_R10, r10),
|
|
PT_REGS_OFFSET(PERF_REG_X86_R11, r11),
|
|
PT_REGS_OFFSET(PERF_REG_X86_R12, r12),
|
|
PT_REGS_OFFSET(PERF_REG_X86_R13, r13),
|
|
PT_REGS_OFFSET(PERF_REG_X86_R14, r14),
|
|
PT_REGS_OFFSET(PERF_REG_X86_R15, r15),
|
|
#endif
|
|
};
|
|
|
|
u64 perf_reg_value(struct pt_regs *regs, int idx)
|
|
{
|
|
struct x86_perf_regs *perf_regs;
|
|
|
|
if (idx >= PERF_REG_X86_XMM0 && idx < PERF_REG_X86_XMM_MAX) {
|
|
perf_regs = container_of(regs, struct x86_perf_regs, regs);
|
|
if (!perf_regs->xmm_regs)
|
|
return 0;
|
|
return perf_regs->xmm_regs[idx - PERF_REG_X86_XMM0];
|
|
}
|
|
|
|
if (WARN_ON_ONCE(idx >= ARRAY_SIZE(pt_regs_offset)))
|
|
return 0;
|
|
|
|
return regs_get_register(regs, pt_regs_offset[idx]);
|
|
}
|
|
|
|
#ifdef CONFIG_X86_32
|
|
#define REG_NOSUPPORT ((1ULL << PERF_REG_X86_R8) | \
|
|
(1ULL << PERF_REG_X86_R9) | \
|
|
(1ULL << PERF_REG_X86_R10) | \
|
|
(1ULL << PERF_REG_X86_R11) | \
|
|
(1ULL << PERF_REG_X86_R12) | \
|
|
(1ULL << PERF_REG_X86_R13) | \
|
|
(1ULL << PERF_REG_X86_R14) | \
|
|
(1ULL << PERF_REG_X86_R15))
|
|
|
|
int perf_reg_validate(u64 mask)
|
|
{
|
|
if (!mask || (mask & REG_NOSUPPORT))
|
|
return -EINVAL;
|
|
|
|
return 0;
|
|
}
|
|
|
|
u64 perf_reg_abi(struct task_struct *task)
|
|
{
|
|
return PERF_SAMPLE_REGS_ABI_32;
|
|
}
|
|
|
|
void perf_get_regs_user(struct perf_regs *regs_user,
|
|
struct pt_regs *regs,
|
|
struct pt_regs *regs_user_copy)
|
|
{
|
|
regs_user->regs = task_pt_regs(current);
|
|
regs_user->abi = perf_reg_abi(current);
|
|
}
|
|
#else /* CONFIG_X86_64 */
|
|
#define REG_NOSUPPORT ((1ULL << PERF_REG_X86_DS) | \
|
|
(1ULL << PERF_REG_X86_ES) | \
|
|
(1ULL << PERF_REG_X86_FS) | \
|
|
(1ULL << PERF_REG_X86_GS))
|
|
|
|
int perf_reg_validate(u64 mask)
|
|
{
|
|
if (!mask || (mask & REG_NOSUPPORT))
|
|
return -EINVAL;
|
|
|
|
return 0;
|
|
}
|
|
|
|
u64 perf_reg_abi(struct task_struct *task)
|
|
{
|
|
if (test_tsk_thread_flag(task, TIF_IA32))
|
|
return PERF_SAMPLE_REGS_ABI_32;
|
|
else
|
|
return PERF_SAMPLE_REGS_ABI_64;
|
|
}
|
|
|
|
void perf_get_regs_user(struct perf_regs *regs_user,
|
|
struct pt_regs *regs,
|
|
struct pt_regs *regs_user_copy)
|
|
{
|
|
struct pt_regs *user_regs = task_pt_regs(current);
|
|
|
|
/*
|
|
* If we're in an NMI that interrupted task_pt_regs setup, then
|
|
* we can't sample user regs at all. This check isn't really
|
|
* sufficient, though, as we could be in an NMI inside an interrupt
|
|
* that happened during task_pt_regs setup.
|
|
*/
|
|
if (regs->sp > (unsigned long)&user_regs->r11 &&
|
|
regs->sp <= (unsigned long)(user_regs + 1)) {
|
|
regs_user->abi = PERF_SAMPLE_REGS_ABI_NONE;
|
|
regs_user->regs = NULL;
|
|
return;
|
|
}
|
|
|
|
/*
|
|
* These registers are always saved on 64-bit syscall entry.
|
|
* On 32-bit entry points, they are saved too except r8..r11.
|
|
*/
|
|
regs_user_copy->ip = user_regs->ip;
|
|
regs_user_copy->ax = user_regs->ax;
|
|
regs_user_copy->cx = user_regs->cx;
|
|
regs_user_copy->dx = user_regs->dx;
|
|
regs_user_copy->si = user_regs->si;
|
|
regs_user_copy->di = user_regs->di;
|
|
regs_user_copy->r8 = user_regs->r8;
|
|
regs_user_copy->r9 = user_regs->r9;
|
|
regs_user_copy->r10 = user_regs->r10;
|
|
regs_user_copy->r11 = user_regs->r11;
|
|
regs_user_copy->orig_ax = user_regs->orig_ax;
|
|
regs_user_copy->flags = user_regs->flags;
|
|
regs_user_copy->sp = user_regs->sp;
|
|
regs_user_copy->cs = user_regs->cs;
|
|
regs_user_copy->ss = user_regs->ss;
|
|
/*
|
|
* Store user space frame-pointer value on sample
|
|
* to facilitate stack unwinding for cases when
|
|
* user space executable code has such support
|
|
* enabled at compile time:
|
|
*/
|
|
regs_user_copy->bp = user_regs->bp;
|
|
|
|
regs_user_copy->bx = -1;
|
|
regs_user_copy->r12 = -1;
|
|
regs_user_copy->r13 = -1;
|
|
regs_user_copy->r14 = -1;
|
|
regs_user_copy->r15 = -1;
|
|
/*
|
|
* For this to be at all useful, we need a reasonable guess for
|
|
* the ABI. Be careful: we're in NMI context, and we're
|
|
* considering current to be the current task, so we should
|
|
* be careful not to look at any other percpu variables that might
|
|
* change during context switches.
|
|
*/
|
|
regs_user->abi = user_64bit_mode(user_regs) ?
|
|
PERF_SAMPLE_REGS_ABI_64 : PERF_SAMPLE_REGS_ABI_32;
|
|
|
|
regs_user->regs = regs_user_copy;
|
|
}
|
|
#endif /* CONFIG_X86_32 */
|