mirror of
https://github.com/AuxXxilium/linux_dsm_epyc7002.git
synced 2024-12-28 11:18:45 +07:00
146f76cc84
Commit07da1ffaa1
("KVM: arm64: Remove host_cpu_context member from vcpu structure") has, by removing the host CPU context pointer, exposed that kvm_vcpu_pmu_restore_guest is called in preemptible contexts: [ 266.932442] BUG: using smp_processor_id() in preemptible [00000000] code: qemu-system-aar/779 [ 266.939721] caller is debug_smp_processor_id+0x20/0x30 [ 266.944157] CPU: 2 PID: 779 Comm: qemu-system-aar Tainted: G E 5.8.0-rc3-00015-g8d4aa58b2fe3 #1374 [ 266.954268] Hardware name: amlogic w400/w400, BIOS 2020.04 05/22/2020 [ 266.960640] Call trace: [ 266.963064] dump_backtrace+0x0/0x1e0 [ 266.966679] show_stack+0x20/0x30 [ 266.969959] dump_stack+0xe4/0x154 [ 266.973338] check_preemption_disabled+0xf8/0x108 [ 266.977978] debug_smp_processor_id+0x20/0x30 [ 266.982307] kvm_vcpu_pmu_restore_guest+0x2c/0x68 [ 266.986949] access_pmcr+0xf8/0x128 [ 266.990399] perform_access+0x8c/0x250 [ 266.994108] kvm_handle_sys_reg+0x10c/0x2f8 [ 266.998247] handle_exit+0x78/0x200 [ 267.001697] kvm_arch_vcpu_ioctl_run+0x2ac/0xab8 Note that the bug was always there, it is only the switch to using percpu accessors that made it obvious. The fix is to wrap these accesses in a preempt-disabled section, so that we sample a coherent context on trap from the guest. Fixes:435e53fb5e
("arm64: KVM: Enable VHE support for :G/:H perf event modifiers") Cc:: Andrew Murray <amurray@thegoodpenguin.co.uk> Signed-off-by: Marc Zyngier <maz@kernel.org>
203 lines
5.0 KiB
C
203 lines
5.0 KiB
C
// SPDX-License-Identifier: GPL-2.0
|
|
/*
|
|
* Copyright 2019 Arm Limited
|
|
* Author: Andrew Murray <Andrew.Murray@arm.com>
|
|
*/
|
|
#include <linux/kvm_host.h>
|
|
#include <linux/perf_event.h>
|
|
#include <asm/kvm_hyp.h>
|
|
|
|
/*
|
|
* Given the perf event attributes and system type, determine
|
|
* if we are going to need to switch counters at guest entry/exit.
|
|
*/
|
|
static bool kvm_pmu_switch_needed(struct perf_event_attr *attr)
|
|
{
|
|
/**
|
|
* With VHE the guest kernel runs at EL1 and the host at EL2,
|
|
* where user (EL0) is excluded then we have no reason to switch
|
|
* counters.
|
|
*/
|
|
if (has_vhe() && attr->exclude_user)
|
|
return false;
|
|
|
|
/* Only switch if attributes are different */
|
|
return (attr->exclude_host != attr->exclude_guest);
|
|
}
|
|
|
|
/*
|
|
* Add events to track that we may want to switch at guest entry/exit
|
|
* time.
|
|
*/
|
|
void kvm_set_pmu_events(u32 set, struct perf_event_attr *attr)
|
|
{
|
|
struct kvm_host_data *ctx = this_cpu_ptr(&kvm_host_data);
|
|
|
|
if (!kvm_pmu_switch_needed(attr))
|
|
return;
|
|
|
|
if (!attr->exclude_host)
|
|
ctx->pmu_events.events_host |= set;
|
|
if (!attr->exclude_guest)
|
|
ctx->pmu_events.events_guest |= set;
|
|
}
|
|
|
|
/*
|
|
* Stop tracking events
|
|
*/
|
|
void kvm_clr_pmu_events(u32 clr)
|
|
{
|
|
struct kvm_host_data *ctx = this_cpu_ptr(&kvm_host_data);
|
|
|
|
ctx->pmu_events.events_host &= ~clr;
|
|
ctx->pmu_events.events_guest &= ~clr;
|
|
}
|
|
|
|
#define PMEVTYPER_READ_CASE(idx) \
|
|
case idx: \
|
|
return read_sysreg(pmevtyper##idx##_el0)
|
|
|
|
#define PMEVTYPER_WRITE_CASE(idx) \
|
|
case idx: \
|
|
write_sysreg(val, pmevtyper##idx##_el0); \
|
|
break
|
|
|
|
#define PMEVTYPER_CASES(readwrite) \
|
|
PMEVTYPER_##readwrite##_CASE(0); \
|
|
PMEVTYPER_##readwrite##_CASE(1); \
|
|
PMEVTYPER_##readwrite##_CASE(2); \
|
|
PMEVTYPER_##readwrite##_CASE(3); \
|
|
PMEVTYPER_##readwrite##_CASE(4); \
|
|
PMEVTYPER_##readwrite##_CASE(5); \
|
|
PMEVTYPER_##readwrite##_CASE(6); \
|
|
PMEVTYPER_##readwrite##_CASE(7); \
|
|
PMEVTYPER_##readwrite##_CASE(8); \
|
|
PMEVTYPER_##readwrite##_CASE(9); \
|
|
PMEVTYPER_##readwrite##_CASE(10); \
|
|
PMEVTYPER_##readwrite##_CASE(11); \
|
|
PMEVTYPER_##readwrite##_CASE(12); \
|
|
PMEVTYPER_##readwrite##_CASE(13); \
|
|
PMEVTYPER_##readwrite##_CASE(14); \
|
|
PMEVTYPER_##readwrite##_CASE(15); \
|
|
PMEVTYPER_##readwrite##_CASE(16); \
|
|
PMEVTYPER_##readwrite##_CASE(17); \
|
|
PMEVTYPER_##readwrite##_CASE(18); \
|
|
PMEVTYPER_##readwrite##_CASE(19); \
|
|
PMEVTYPER_##readwrite##_CASE(20); \
|
|
PMEVTYPER_##readwrite##_CASE(21); \
|
|
PMEVTYPER_##readwrite##_CASE(22); \
|
|
PMEVTYPER_##readwrite##_CASE(23); \
|
|
PMEVTYPER_##readwrite##_CASE(24); \
|
|
PMEVTYPER_##readwrite##_CASE(25); \
|
|
PMEVTYPER_##readwrite##_CASE(26); \
|
|
PMEVTYPER_##readwrite##_CASE(27); \
|
|
PMEVTYPER_##readwrite##_CASE(28); \
|
|
PMEVTYPER_##readwrite##_CASE(29); \
|
|
PMEVTYPER_##readwrite##_CASE(30)
|
|
|
|
/*
|
|
* Read a value direct from PMEVTYPER<idx> where idx is 0-30
|
|
* or PMCCFILTR_EL0 where idx is ARMV8_PMU_CYCLE_IDX (31).
|
|
*/
|
|
static u64 kvm_vcpu_pmu_read_evtype_direct(int idx)
|
|
{
|
|
switch (idx) {
|
|
PMEVTYPER_CASES(READ);
|
|
case ARMV8_PMU_CYCLE_IDX:
|
|
return read_sysreg(pmccfiltr_el0);
|
|
default:
|
|
WARN_ON(1);
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* Write a value direct to PMEVTYPER<idx> where idx is 0-30
|
|
* or PMCCFILTR_EL0 where idx is ARMV8_PMU_CYCLE_IDX (31).
|
|
*/
|
|
static void kvm_vcpu_pmu_write_evtype_direct(int idx, u32 val)
|
|
{
|
|
switch (idx) {
|
|
PMEVTYPER_CASES(WRITE);
|
|
case ARMV8_PMU_CYCLE_IDX:
|
|
write_sysreg(val, pmccfiltr_el0);
|
|
break;
|
|
default:
|
|
WARN_ON(1);
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Modify ARMv8 PMU events to include EL0 counting
|
|
*/
|
|
static void kvm_vcpu_pmu_enable_el0(unsigned long events)
|
|
{
|
|
u64 typer;
|
|
u32 counter;
|
|
|
|
for_each_set_bit(counter, &events, 32) {
|
|
typer = kvm_vcpu_pmu_read_evtype_direct(counter);
|
|
typer &= ~ARMV8_PMU_EXCLUDE_EL0;
|
|
kvm_vcpu_pmu_write_evtype_direct(counter, typer);
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Modify ARMv8 PMU events to exclude EL0 counting
|
|
*/
|
|
static void kvm_vcpu_pmu_disable_el0(unsigned long events)
|
|
{
|
|
u64 typer;
|
|
u32 counter;
|
|
|
|
for_each_set_bit(counter, &events, 32) {
|
|
typer = kvm_vcpu_pmu_read_evtype_direct(counter);
|
|
typer |= ARMV8_PMU_EXCLUDE_EL0;
|
|
kvm_vcpu_pmu_write_evtype_direct(counter, typer);
|
|
}
|
|
}
|
|
|
|
/*
|
|
* On VHE ensure that only guest events have EL0 counting enabled.
|
|
* This is called from both vcpu_{load,put} and the sysreg handling.
|
|
* Since the latter is preemptible, special care must be taken to
|
|
* disable preemption.
|
|
*/
|
|
void kvm_vcpu_pmu_restore_guest(struct kvm_vcpu *vcpu)
|
|
{
|
|
struct kvm_host_data *host;
|
|
u32 events_guest, events_host;
|
|
|
|
if (!has_vhe())
|
|
return;
|
|
|
|
preempt_disable();
|
|
host = this_cpu_ptr(&kvm_host_data);
|
|
events_guest = host->pmu_events.events_guest;
|
|
events_host = host->pmu_events.events_host;
|
|
|
|
kvm_vcpu_pmu_enable_el0(events_guest);
|
|
kvm_vcpu_pmu_disable_el0(events_host);
|
|
preempt_enable();
|
|
}
|
|
|
|
/*
|
|
* On VHE ensure that only host events have EL0 counting enabled
|
|
*/
|
|
void kvm_vcpu_pmu_restore_host(struct kvm_vcpu *vcpu)
|
|
{
|
|
struct kvm_host_data *host;
|
|
u32 events_guest, events_host;
|
|
|
|
if (!has_vhe())
|
|
return;
|
|
|
|
host = this_cpu_ptr(&kvm_host_data);
|
|
events_guest = host->pmu_events.events_guest;
|
|
events_host = host->pmu_events.events_host;
|
|
|
|
kvm_vcpu_pmu_enable_el0(events_host);
|
|
kvm_vcpu_pmu_disable_el0(events_guest);
|
|
}
|