perf/x86/kvm: Avoid unnecessary work in guest filtering

KVM added a workaround for PEBS events leaking into guests with
commit:

  26a4f3c08d ("perf/x86: disable PEBS on a guest entry.")

This uses the VT entry/exit list to add an extra disable of the
PEBS_ENABLE MSR.

Intel also added a fix for this issue to microcode updates on
Haswell/Broadwell/Skylake.

It turns out using the MSR entry/exit list makes VM exits
significantly slower. The list is only needed for disabling
PEBS, because the GLOBAL_CTRL change gets optimized by
KVM into changing the VMCS.

Check for the microcode updates that have the microcode
fix for leaking PEBS, and disable the extra entry/exit list
entry for PEBS_ENABLE. In addition we always clear the
GLOBAL_CTRL for the PEBS counter while running in the guest,
which is enough to make them never fire at the wrong
side of the host/guest transition.

The overhead for VM exits with the filtering active with the patch is
reduced from 8% to 4%.

The microcode patch has already been merged into future platforms.
This patch is one-off thing. The quirks is used here.

For other old platforms which doesn't have microcode patch and quirks,
extra disable of the PEBS_ENABLE MSR is still required.

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Cc: bp@alien8.de
Link: https://lkml.kernel.org/r/1549319013-4522-2-git-send-email-kan.liang@linux.intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
This commit is contained in:
Andi Kleen 2019-02-04 14:23:30 -08:00 committed by Ingo Molnar
parent f26d9db21b
commit 9b545c04ab
3 changed files with 75 additions and 16 deletions

View File

@ -18,6 +18,7 @@
#include <asm/hardirq.h> #include <asm/hardirq.h>
#include <asm/intel-family.h> #include <asm/intel-family.h>
#include <asm/apic.h> #include <asm/apic.h>
#include <asm/cpu_device_id.h>
#include "../perf_event.h" #include "../perf_event.h"
@ -3206,16 +3207,27 @@ static struct perf_guest_switch_msr *intel_guest_get_msrs(int *nr)
arr[0].msr = MSR_CORE_PERF_GLOBAL_CTRL; arr[0].msr = MSR_CORE_PERF_GLOBAL_CTRL;
arr[0].host = x86_pmu.intel_ctrl & ~cpuc->intel_ctrl_guest_mask; arr[0].host = x86_pmu.intel_ctrl & ~cpuc->intel_ctrl_guest_mask;
arr[0].guest = x86_pmu.intel_ctrl & ~cpuc->intel_ctrl_host_mask; arr[0].guest = x86_pmu.intel_ctrl & ~cpuc->intel_ctrl_host_mask;
/* if (x86_pmu.flags & PMU_FL_PEBS_ALL)
* If PMU counter has PEBS enabled it is not enough to disable counter arr[0].guest &= ~cpuc->pebs_enabled;
* on a guest entry since PEBS memory write can overshoot guest entry else
* and corrupt guest memory. Disabling PEBS solves the problem. arr[0].guest &= ~(cpuc->pebs_enabled & PEBS_COUNTER_MASK);
*/ *nr = 1;
arr[1].msr = MSR_IA32_PEBS_ENABLE;
arr[1].host = cpuc->pebs_enabled; if (x86_pmu.pebs && x86_pmu.pebs_no_isolation) {
arr[1].guest = 0; /*
* If PMU counter has PEBS enabled it is not enough to
* disable counter on a guest entry since PEBS memory
* write can overshoot guest entry and corrupt guest
* memory. Disabling PEBS solves the problem.
*
* Don't do this if the CPU already enforces it.
*/
arr[1].msr = MSR_IA32_PEBS_ENABLE;
arr[1].host = cpuc->pebs_enabled;
arr[1].guest = 0;
*nr = 2;
}
*nr = 2;
return arr; return arr;
} }
@ -3739,6 +3751,47 @@ static __init void intel_clovertown_quirk(void)
x86_pmu.pebs_constraints = NULL; x86_pmu.pebs_constraints = NULL;
} }
static const struct x86_cpu_desc isolation_ucodes[] = {
INTEL_CPU_DESC(INTEL_FAM6_HASWELL_CORE, 3, 0x0000001f),
INTEL_CPU_DESC(INTEL_FAM6_HASWELL_ULT, 1, 0x0000001e),
INTEL_CPU_DESC(INTEL_FAM6_HASWELL_GT3E, 1, 0x00000015),
INTEL_CPU_DESC(INTEL_FAM6_HASWELL_X, 2, 0x00000037),
INTEL_CPU_DESC(INTEL_FAM6_HASWELL_X, 4, 0x0000000a),
INTEL_CPU_DESC(INTEL_FAM6_BROADWELL_CORE, 4, 0x00000023),
INTEL_CPU_DESC(INTEL_FAM6_BROADWELL_GT3E, 1, 0x00000014),
INTEL_CPU_DESC(INTEL_FAM6_BROADWELL_XEON_D, 2, 0x00000010),
INTEL_CPU_DESC(INTEL_FAM6_BROADWELL_XEON_D, 3, 0x07000009),
INTEL_CPU_DESC(INTEL_FAM6_BROADWELL_XEON_D, 4, 0x0f000009),
INTEL_CPU_DESC(INTEL_FAM6_BROADWELL_XEON_D, 5, 0x0e000002),
INTEL_CPU_DESC(INTEL_FAM6_BROADWELL_X, 2, 0x0b000014),
INTEL_CPU_DESC(INTEL_FAM6_SKYLAKE_X, 3, 0x00000021),
INTEL_CPU_DESC(INTEL_FAM6_SKYLAKE_X, 4, 0x00000000),
INTEL_CPU_DESC(INTEL_FAM6_SKYLAKE_MOBILE, 3, 0x0000007c),
INTEL_CPU_DESC(INTEL_FAM6_SKYLAKE_DESKTOP, 3, 0x0000007c),
INTEL_CPU_DESC(INTEL_FAM6_KABYLAKE_DESKTOP, 9, 0x0000004e),
INTEL_CPU_DESC(INTEL_FAM6_KABYLAKE_MOBILE, 9, 0x0000004e),
INTEL_CPU_DESC(INTEL_FAM6_KABYLAKE_MOBILE, 10, 0x0000004e),
INTEL_CPU_DESC(INTEL_FAM6_KABYLAKE_MOBILE, 11, 0x0000004e),
INTEL_CPU_DESC(INTEL_FAM6_KABYLAKE_MOBILE, 12, 0x0000004e),
INTEL_CPU_DESC(INTEL_FAM6_KABYLAKE_DESKTOP, 10, 0x0000004e),
INTEL_CPU_DESC(INTEL_FAM6_KABYLAKE_DESKTOP, 11, 0x0000004e),
INTEL_CPU_DESC(INTEL_FAM6_KABYLAKE_DESKTOP, 12, 0x0000004e),
INTEL_CPU_DESC(INTEL_FAM6_KABYLAKE_DESKTOP, 13, 0x0000004e),
{}
};
static void intel_check_pebs_isolation(void)
{
x86_pmu.pebs_no_isolation = !x86_cpu_has_min_microcode_rev(isolation_ucodes);
}
static __init void intel_pebs_isolation_quirk(void)
{
WARN_ON_ONCE(x86_pmu.check_microcode);
x86_pmu.check_microcode = intel_check_pebs_isolation;
intel_check_pebs_isolation();
}
static int intel_snb_pebs_broken(int cpu) static int intel_snb_pebs_broken(int cpu)
{ {
u32 rev = UINT_MAX; /* default to broken for unknown models */ u32 rev = UINT_MAX; /* default to broken for unknown models */
@ -4431,6 +4484,7 @@ __init int intel_pmu_init(void)
case INTEL_FAM6_HASWELL_ULT: case INTEL_FAM6_HASWELL_ULT:
case INTEL_FAM6_HASWELL_GT3E: case INTEL_FAM6_HASWELL_GT3E:
x86_add_quirk(intel_ht_bug); x86_add_quirk(intel_ht_bug);
x86_add_quirk(intel_pebs_isolation_quirk);
x86_pmu.late_ack = true; x86_pmu.late_ack = true;
memcpy(hw_cache_event_ids, hsw_hw_cache_event_ids, sizeof(hw_cache_event_ids)); memcpy(hw_cache_event_ids, hsw_hw_cache_event_ids, sizeof(hw_cache_event_ids));
memcpy(hw_cache_extra_regs, hsw_hw_cache_extra_regs, sizeof(hw_cache_extra_regs)); memcpy(hw_cache_extra_regs, hsw_hw_cache_extra_regs, sizeof(hw_cache_extra_regs));
@ -4462,6 +4516,7 @@ __init int intel_pmu_init(void)
case INTEL_FAM6_BROADWELL_XEON_D: case INTEL_FAM6_BROADWELL_XEON_D:
case INTEL_FAM6_BROADWELL_GT3E: case INTEL_FAM6_BROADWELL_GT3E:
case INTEL_FAM6_BROADWELL_X: case INTEL_FAM6_BROADWELL_X:
x86_add_quirk(intel_pebs_isolation_quirk);
x86_pmu.late_ack = true; x86_pmu.late_ack = true;
memcpy(hw_cache_event_ids, hsw_hw_cache_event_ids, sizeof(hw_cache_event_ids)); memcpy(hw_cache_event_ids, hsw_hw_cache_event_ids, sizeof(hw_cache_event_ids));
memcpy(hw_cache_extra_regs, hsw_hw_cache_extra_regs, sizeof(hw_cache_extra_regs)); memcpy(hw_cache_extra_regs, hsw_hw_cache_extra_regs, sizeof(hw_cache_extra_regs));
@ -4524,6 +4579,7 @@ __init int intel_pmu_init(void)
case INTEL_FAM6_SKYLAKE_X: case INTEL_FAM6_SKYLAKE_X:
case INTEL_FAM6_KABYLAKE_MOBILE: case INTEL_FAM6_KABYLAKE_MOBILE:
case INTEL_FAM6_KABYLAKE_DESKTOP: case INTEL_FAM6_KABYLAKE_DESKTOP:
x86_add_quirk(intel_pebs_isolation_quirk);
x86_pmu.late_ack = true; x86_pmu.late_ack = true;
memcpy(hw_cache_event_ids, skl_hw_cache_event_ids, sizeof(hw_cache_event_ids)); memcpy(hw_cache_event_ids, skl_hw_cache_event_ids, sizeof(hw_cache_event_ids));
memcpy(hw_cache_extra_regs, skl_hw_cache_extra_regs, sizeof(hw_cache_extra_regs)); memcpy(hw_cache_extra_regs, skl_hw_cache_extra_regs, sizeof(hw_cache_extra_regs));

View File

@ -1628,6 +1628,8 @@ void __init intel_ds_init(void)
x86_pmu.bts = boot_cpu_has(X86_FEATURE_BTS); x86_pmu.bts = boot_cpu_has(X86_FEATURE_BTS);
x86_pmu.pebs = boot_cpu_has(X86_FEATURE_PEBS); x86_pmu.pebs = boot_cpu_has(X86_FEATURE_PEBS);
x86_pmu.pebs_buffer_size = PEBS_BUFFER_SIZE; x86_pmu.pebs_buffer_size = PEBS_BUFFER_SIZE;
if (x86_pmu.version <= 4)
x86_pmu.pebs_no_isolation = 1;
if (x86_pmu.pebs) { if (x86_pmu.pebs) {
char pebs_type = x86_pmu.intel_cap.pebs_trap ? '+' : '-'; char pebs_type = x86_pmu.intel_cap.pebs_trap ? '+' : '-';
int format = x86_pmu.intel_cap.pebs_format; int format = x86_pmu.intel_cap.pebs_format;

View File

@ -601,13 +601,14 @@ struct x86_pmu {
/* /*
* Intel DebugStore bits * Intel DebugStore bits
*/ */
unsigned int bts :1, unsigned int bts :1,
bts_active :1, bts_active :1,
pebs :1, pebs :1,
pebs_active :1, pebs_active :1,
pebs_broken :1, pebs_broken :1,
pebs_prec_dist :1, pebs_prec_dist :1,
pebs_no_tlb :1; pebs_no_tlb :1,
pebs_no_isolation :1;
int pebs_record_size; int pebs_record_size;
int pebs_buffer_size; int pebs_buffer_size;
void (*drain_pebs)(struct pt_regs *regs); void (*drain_pebs)(struct pt_regs *regs);