mirror of
https://github.com/AuxXxilium/linux_dsm_epyc7002.git
synced 2024-12-26 03:45:40 +07:00
cd99ddbea2
In TLB miss handlers, updating the perf counter is only useful when performing a perf analysis. As it has a noticeable overhead, let's only do it when needed. In order to do so, the exit of the miss handlers will be patched when starting/stopping 'perf': the first register restore instruction of each exit point will be replaced by a jump to the counting code. Once this is done, CONFIG_PPC_8xx_PERF_EVENT becomes useless as this feature doesn't add any overhead. Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
218 lines
5.4 KiB
C
218 lines
5.4 KiB
C
/*
|
|
* Performance event support - PPC 8xx
|
|
*
|
|
* Copyright 2016 Christophe Leroy, CS Systemes d'Information
|
|
*
|
|
* This program is free software; you can redistribute it and/or
|
|
* modify it under the terms of the GNU General Public License
|
|
* as published by the Free Software Foundation; either version
|
|
* 2 of the License, or (at your option) any later version.
|
|
*/
|
|
|
|
#include <linux/kernel.h>
|
|
#include <linux/sched.h>
|
|
#include <linux/perf_event.h>
|
|
#include <linux/percpu.h>
|
|
#include <linux/hardirq.h>
|
|
#include <asm/pmc.h>
|
|
#include <asm/machdep.h>
|
|
#include <asm/firmware.h>
|
|
#include <asm/ptrace.h>
|
|
#include <asm/code-patching.h>
|
|
|
|
#define PERF_8xx_ID_CPU_CYCLES 1
|
|
#define PERF_8xx_ID_HW_INSTRUCTIONS 2
|
|
#define PERF_8xx_ID_ITLB_LOAD_MISS 3
|
|
#define PERF_8xx_ID_DTLB_LOAD_MISS 4
|
|
|
|
#define C(x) PERF_COUNT_HW_CACHE_##x
|
|
#define DTLB_LOAD_MISS (C(DTLB) | (C(OP_READ) << 8) | (C(RESULT_MISS) << 16))
|
|
#define ITLB_LOAD_MISS (C(ITLB) | (C(OP_READ) << 8) | (C(RESULT_MISS) << 16))
|
|
|
|
extern unsigned long itlb_miss_counter, dtlb_miss_counter;
|
|
extern atomic_t instruction_counter;
|
|
extern unsigned int itlb_miss_perf, dtlb_miss_perf;
|
|
extern unsigned int itlb_miss_exit_1, itlb_miss_exit_2;
|
|
extern unsigned int dtlb_miss_exit_1, dtlb_miss_exit_2, dtlb_miss_exit_3;
|
|
|
|
static atomic_t insn_ctr_ref;
|
|
static atomic_t itlb_miss_ref;
|
|
static atomic_t dtlb_miss_ref;
|
|
|
|
static s64 get_insn_ctr(void)
|
|
{
|
|
int ctr;
|
|
unsigned long counta;
|
|
|
|
do {
|
|
ctr = atomic_read(&instruction_counter);
|
|
counta = mfspr(SPRN_COUNTA);
|
|
} while (ctr != atomic_read(&instruction_counter));
|
|
|
|
return ((s64)ctr << 16) | (counta >> 16);
|
|
}
|
|
|
|
static int event_type(struct perf_event *event)
|
|
{
|
|
switch (event->attr.type) {
|
|
case PERF_TYPE_HARDWARE:
|
|
if (event->attr.config == PERF_COUNT_HW_CPU_CYCLES)
|
|
return PERF_8xx_ID_CPU_CYCLES;
|
|
if (event->attr.config == PERF_COUNT_HW_INSTRUCTIONS)
|
|
return PERF_8xx_ID_HW_INSTRUCTIONS;
|
|
break;
|
|
case PERF_TYPE_HW_CACHE:
|
|
if (event->attr.config == ITLB_LOAD_MISS)
|
|
return PERF_8xx_ID_ITLB_LOAD_MISS;
|
|
if (event->attr.config == DTLB_LOAD_MISS)
|
|
return PERF_8xx_ID_DTLB_LOAD_MISS;
|
|
break;
|
|
case PERF_TYPE_RAW:
|
|
break;
|
|
default:
|
|
return -ENOENT;
|
|
}
|
|
return -EOPNOTSUPP;
|
|
}
|
|
|
|
static int mpc8xx_pmu_event_init(struct perf_event *event)
|
|
{
|
|
int type = event_type(event);
|
|
|
|
if (type < 0)
|
|
return type;
|
|
return 0;
|
|
}
|
|
|
|
static int mpc8xx_pmu_add(struct perf_event *event, int flags)
|
|
{
|
|
int type = event_type(event);
|
|
s64 val = 0;
|
|
|
|
if (type < 0)
|
|
return type;
|
|
|
|
switch (type) {
|
|
case PERF_8xx_ID_CPU_CYCLES:
|
|
val = get_tb();
|
|
break;
|
|
case PERF_8xx_ID_HW_INSTRUCTIONS:
|
|
if (atomic_inc_return(&insn_ctr_ref) == 1)
|
|
mtspr(SPRN_ICTRL, 0xc0080007);
|
|
val = get_insn_ctr();
|
|
break;
|
|
case PERF_8xx_ID_ITLB_LOAD_MISS:
|
|
if (atomic_inc_return(&itlb_miss_ref) == 1) {
|
|
unsigned long target = (unsigned long)&itlb_miss_perf;
|
|
|
|
patch_branch(&itlb_miss_exit_1, target, 0);
|
|
#ifndef CONFIG_PIN_TLB_TEXT
|
|
patch_branch(&itlb_miss_exit_2, target, 0);
|
|
#endif
|
|
}
|
|
val = itlb_miss_counter;
|
|
break;
|
|
case PERF_8xx_ID_DTLB_LOAD_MISS:
|
|
if (atomic_inc_return(&dtlb_miss_ref) == 1) {
|
|
unsigned long target = (unsigned long)&dtlb_miss_perf;
|
|
|
|
patch_branch(&dtlb_miss_exit_1, target, 0);
|
|
patch_branch(&dtlb_miss_exit_2, target, 0);
|
|
patch_branch(&dtlb_miss_exit_3, target, 0);
|
|
}
|
|
val = dtlb_miss_counter;
|
|
break;
|
|
}
|
|
local64_set(&event->hw.prev_count, val);
|
|
return 0;
|
|
}
|
|
|
|
static void mpc8xx_pmu_read(struct perf_event *event)
|
|
{
|
|
int type = event_type(event);
|
|
s64 prev, val = 0, delta = 0;
|
|
|
|
if (type < 0)
|
|
return;
|
|
|
|
do {
|
|
prev = local64_read(&event->hw.prev_count);
|
|
switch (type) {
|
|
case PERF_8xx_ID_CPU_CYCLES:
|
|
val = get_tb();
|
|
delta = 16 * (val - prev);
|
|
break;
|
|
case PERF_8xx_ID_HW_INSTRUCTIONS:
|
|
val = get_insn_ctr();
|
|
delta = prev - val;
|
|
if (delta < 0)
|
|
delta += 0x1000000000000LL;
|
|
break;
|
|
case PERF_8xx_ID_ITLB_LOAD_MISS:
|
|
val = itlb_miss_counter;
|
|
delta = (s64)((s32)val - (s32)prev);
|
|
break;
|
|
case PERF_8xx_ID_DTLB_LOAD_MISS:
|
|
val = dtlb_miss_counter;
|
|
delta = (s64)((s32)val - (s32)prev);
|
|
break;
|
|
}
|
|
} while (local64_cmpxchg(&event->hw.prev_count, prev, val) != prev);
|
|
|
|
local64_add(delta, &event->count);
|
|
}
|
|
|
|
static void mpc8xx_pmu_del(struct perf_event *event, int flags)
|
|
{
|
|
/* mfspr r10, SPRN_SPRG_SCRATCH0 */
|
|
unsigned int insn = PPC_INST_MFSPR | __PPC_RS(R10) |
|
|
__PPC_SPR(SPRN_SPRG_SCRATCH0);
|
|
|
|
mpc8xx_pmu_read(event);
|
|
|
|
/* If it was the last user, stop counting to avoid useles overhead */
|
|
switch (event_type(event)) {
|
|
case PERF_8xx_ID_CPU_CYCLES:
|
|
break;
|
|
case PERF_8xx_ID_HW_INSTRUCTIONS:
|
|
if (atomic_dec_return(&insn_ctr_ref) == 0)
|
|
mtspr(SPRN_ICTRL, 7);
|
|
break;
|
|
case PERF_8xx_ID_ITLB_LOAD_MISS:
|
|
if (atomic_dec_return(&itlb_miss_ref) == 0) {
|
|
patch_instruction(&itlb_miss_exit_1, insn);
|
|
#ifndef CONFIG_PIN_TLB_TEXT
|
|
patch_instruction(&itlb_miss_exit_2, insn);
|
|
#endif
|
|
}
|
|
break;
|
|
case PERF_8xx_ID_DTLB_LOAD_MISS:
|
|
if (atomic_dec_return(&dtlb_miss_ref) == 0) {
|
|
patch_instruction(&dtlb_miss_exit_1, insn);
|
|
patch_instruction(&dtlb_miss_exit_2, insn);
|
|
patch_instruction(&dtlb_miss_exit_3, insn);
|
|
}
|
|
break;
|
|
}
|
|
}
|
|
|
|
static struct pmu mpc8xx_pmu = {
|
|
.event_init = mpc8xx_pmu_event_init,
|
|
.add = mpc8xx_pmu_add,
|
|
.del = mpc8xx_pmu_del,
|
|
.read = mpc8xx_pmu_read,
|
|
.capabilities = PERF_PMU_CAP_NO_INTERRUPT |
|
|
PERF_PMU_CAP_NO_NMI,
|
|
};
|
|
|
|
static int init_mpc8xx_pmu(void)
|
|
{
|
|
mtspr(SPRN_ICTRL, 7);
|
|
mtspr(SPRN_CMPA, 0);
|
|
mtspr(SPRN_COUNTA, 0xffff);
|
|
|
|
return perf_pmu_register(&mpc8xx_pmu, "cpu", PERF_TYPE_RAW);
|
|
}
|
|
|
|
early_initcall(init_mpc8xx_pmu);
|