mirror of
https://github.com/AuxXxilium/linux_dsm_epyc7002.git
synced 2025-01-15 20:16:17 +07:00
752272f16d
- Data abort report and injection - Steal time support - GICv4 performance improvements - vgic ITS emulation fixes - Simplify FWB handling - Enable halt polling counters - Make the emulated timer PREEMPT_RT compliant s390: - Small fixes and cleanups - selftest improvements - yield improvements PPC: - Add capability to tell userspace whether we can single-step the guest. - Improve the allocation of XIVE virtual processor IDs - Rewrite interrupt synthesis code to deliver interrupts in virtual mode when appropriate. - Minor cleanups and improvements. x86: - XSAVES support for AMD - more accurate report of nested guest TSC to the nested hypervisor - retpoline optimizations - support for nested 5-level page tables - PMU virtualization optimizations, and improved support for nested PMU virtualization - correct latching of INITs for nested virtualization - IOAPIC optimization - TSX_CTRL virtualization for more TAA happiness - improved allocation and flushing of SEV ASIDs - many bugfixes and cleanups -----BEGIN PGP SIGNATURE----- Version: GnuPG v2.0.22 (GNU/Linux) iQEcBAABAgAGBQJd27PMAAoJEL/70l94x66DspsH+gPc6YWtKJFJH58Zj8NrNh6y t0FwDFcvUa51+m4jaY4L5Y8+zqu1dZFnPPhFGqNWpxrjCEvE/glQJv3BiUX06Seh aYUHNymGoYCTJOHaaGhV+NlgQaDuZOCOkIsOLAPehyFd1KojwB+FRC0xmO6aROPw 9yQgYrKuK1UUn5HwxBNrMS4+Xv+2iKv/9sTnq1G4W2qX2NZQg84LVPg1zIdkCh3D 3GOvoCBEk3ivQqjmdE7rP/InPr0XvW0b6TFhchIk8J6jEIQFHsmOUefiTvTxsIHV OKAZwvyeYPrYHA/aDZpaBmY2aR0ydfKDUQcviNIJoF1vOktGs0hvl3VbsmG8QCg= =OSI1 -----END PGP SIGNATURE----- Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm Pull KVM updates from Paolo Bonzini: "ARM: - data abort report and injection - steal time support - GICv4 performance improvements - vgic ITS emulation fixes - simplify FWB handling - enable halt polling counters - make the emulated timer PREEMPT_RT compliant s390: - small fixes and cleanups - selftest improvements - yield improvements PPC: - add capability to tell userspace whether we can single-step the guest - improve the allocation of XIVE virtual processor IDs - rewrite interrupt synthesis code to deliver interrupts in virtual mode when appropriate. - minor cleanups and improvements. x86: - XSAVES support for AMD - more accurate report of nested guest TSC to the nested hypervisor - retpoline optimizations - support for nested 5-level page tables - PMU virtualization optimizations, and improved support for nested PMU virtualization - correct latching of INITs for nested virtualization - IOAPIC optimization - TSX_CTRL virtualization for more TAA happiness - improved allocation and flushing of SEV ASIDs - many bugfixes and cleanups" * tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (127 commits) kvm: nVMX: Relax guest IA32_FEATURE_CONTROL constraints KVM: x86: Grab KVM's srcu lock when setting nested state KVM: x86: Open code shared_msr_update() in its only caller KVM: Fix jump label out_free_* in kvm_init() KVM: x86: Remove a spurious export of a static function KVM: x86: create mmu/ subdirectory KVM: nVMX: Remove unnecessary TLB flushes on L1<->L2 switches when L1 use apic-access-page KVM: x86: remove set but not used variable 'called' KVM: nVMX: Do not mark vmcs02->apic_access_page as dirty when unpinning KVM: vmx: use MSR_IA32_TSX_CTRL to hard-disable TSX on guest that lack it KVM: vmx: implement MSR_IA32_TSX_CTRL disable RTM functionality KVM: x86: implement MSR_IA32_TSX_CTRL effect on CPUID KVM: x86: do not modify masked bits of shared MSRs KVM: x86: fix presentation of TSX feature in ARCH_CAPABILITIES KVM: PPC: Book3S HV: XIVE: Fix potential page leak on error path KVM: PPC: Book3S HV: XIVE: Free previous EQ page when setting up a new one KVM: nVMX: Assume TLB entries of L1 and L2 are tagged differently if L0 use EPT KVM: x86: Unexport kvm_vcpu_reload_apic_access_page() KVM: nVMX: add CR4_LA57 bit to nested CR4_FIXED1 KVM: nVMX: Use semi-colon instead of comma for exit-handlers initialization ...
717 lines
22 KiB
C
717 lines
22 KiB
C
/* SPDX-License-Identifier: GPL-2.0-only */
|
|
/*
|
|
* Copyright (C) 2012,2013 - ARM Ltd
|
|
* Author: Marc Zyngier <marc.zyngier@arm.com>
|
|
*
|
|
* Derived from arch/arm/include/asm/kvm_host.h:
|
|
* Copyright (C) 2012 - Virtual Open Systems and Columbia University
|
|
* Author: Christoffer Dall <c.dall@virtualopensystems.com>
|
|
*/
|
|
|
|
#ifndef __ARM64_KVM_HOST_H__
|
|
#define __ARM64_KVM_HOST_H__
|
|
|
|
#include <linux/bitmap.h>
|
|
#include <linux/types.h>
|
|
#include <linux/jump_label.h>
|
|
#include <linux/kvm_types.h>
|
|
#include <linux/percpu.h>
|
|
#include <asm/arch_gicv3.h>
|
|
#include <asm/barrier.h>
|
|
#include <asm/cpufeature.h>
|
|
#include <asm/cputype.h>
|
|
#include <asm/daifflags.h>
|
|
#include <asm/fpsimd.h>
|
|
#include <asm/kvm.h>
|
|
#include <asm/kvm_asm.h>
|
|
#include <asm/kvm_mmio.h>
|
|
#include <asm/thread_info.h>
|
|
|
|
#define __KVM_HAVE_ARCH_INTC_INITIALIZED
|
|
|
|
#define KVM_USER_MEM_SLOTS 512
|
|
#define KVM_HALT_POLL_NS_DEFAULT 500000
|
|
|
|
#include <kvm/arm_vgic.h>
|
|
#include <kvm/arm_arch_timer.h>
|
|
#include <kvm/arm_pmu.h>
|
|
|
|
#define KVM_MAX_VCPUS VGIC_V3_MAX_CPUS
|
|
|
|
#define KVM_VCPU_MAX_FEATURES 7
|
|
|
|
#define KVM_REQ_SLEEP \
|
|
KVM_ARCH_REQ_FLAGS(0, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
|
|
#define KVM_REQ_IRQ_PENDING KVM_ARCH_REQ(1)
|
|
#define KVM_REQ_VCPU_RESET KVM_ARCH_REQ(2)
|
|
#define KVM_REQ_RECORD_STEAL KVM_ARCH_REQ(3)
|
|
|
|
DECLARE_STATIC_KEY_FALSE(userspace_irqchip_in_use);
|
|
|
|
extern unsigned int kvm_sve_max_vl;
|
|
int kvm_arm_init_sve(void);
|
|
|
|
int __attribute_const__ kvm_target_cpu(void);
|
|
int kvm_reset_vcpu(struct kvm_vcpu *vcpu);
|
|
void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu);
|
|
int kvm_arch_vm_ioctl_check_extension(struct kvm *kvm, long ext);
|
|
void __extended_idmap_trampoline(phys_addr_t boot_pgd, phys_addr_t idmap_start);
|
|
|
|
struct kvm_vmid {
|
|
/* The VMID generation used for the virt. memory system */
|
|
u64 vmid_gen;
|
|
u32 vmid;
|
|
};
|
|
|
|
struct kvm_arch {
|
|
struct kvm_vmid vmid;
|
|
|
|
/* stage2 entry level table */
|
|
pgd_t *pgd;
|
|
phys_addr_t pgd_phys;
|
|
|
|
/* VTCR_EL2 value for this VM */
|
|
u64 vtcr;
|
|
|
|
/* The last vcpu id that ran on each physical CPU */
|
|
int __percpu *last_vcpu_ran;
|
|
|
|
/* The maximum number of vCPUs depends on the used GIC model */
|
|
int max_vcpus;
|
|
|
|
/* Interrupt controller */
|
|
struct vgic_dist vgic;
|
|
|
|
/* Mandated version of PSCI */
|
|
u32 psci_version;
|
|
|
|
/*
|
|
* If we encounter a data abort without valid instruction syndrome
|
|
* information, report this to user space. User space can (and
|
|
* should) opt in to this feature if KVM_CAP_ARM_NISV_TO_USER is
|
|
* supported.
|
|
*/
|
|
bool return_nisv_io_abort_to_user;
|
|
};
|
|
|
|
#define KVM_NR_MEM_OBJS 40
|
|
|
|
/*
|
|
* We don't want allocation failures within the mmu code, so we preallocate
|
|
* enough memory for a single page fault in a cache.
|
|
*/
|
|
struct kvm_mmu_memory_cache {
|
|
int nobjs;
|
|
void *objects[KVM_NR_MEM_OBJS];
|
|
};
|
|
|
|
struct kvm_vcpu_fault_info {
|
|
u32 esr_el2; /* Hyp Syndrom Register */
|
|
u64 far_el2; /* Hyp Fault Address Register */
|
|
u64 hpfar_el2; /* Hyp IPA Fault Address Register */
|
|
u64 disr_el1; /* Deferred [SError] Status Register */
|
|
};
|
|
|
|
/*
|
|
* 0 is reserved as an invalid value.
|
|
* Order should be kept in sync with the save/restore code.
|
|
*/
|
|
enum vcpu_sysreg {
|
|
__INVALID_SYSREG__,
|
|
MPIDR_EL1, /* MultiProcessor Affinity Register */
|
|
CSSELR_EL1, /* Cache Size Selection Register */
|
|
SCTLR_EL1, /* System Control Register */
|
|
ACTLR_EL1, /* Auxiliary Control Register */
|
|
CPACR_EL1, /* Coprocessor Access Control */
|
|
ZCR_EL1, /* SVE Control */
|
|
TTBR0_EL1, /* Translation Table Base Register 0 */
|
|
TTBR1_EL1, /* Translation Table Base Register 1 */
|
|
TCR_EL1, /* Translation Control Register */
|
|
ESR_EL1, /* Exception Syndrome Register */
|
|
AFSR0_EL1, /* Auxiliary Fault Status Register 0 */
|
|
AFSR1_EL1, /* Auxiliary Fault Status Register 1 */
|
|
FAR_EL1, /* Fault Address Register */
|
|
MAIR_EL1, /* Memory Attribute Indirection Register */
|
|
VBAR_EL1, /* Vector Base Address Register */
|
|
CONTEXTIDR_EL1, /* Context ID Register */
|
|
TPIDR_EL0, /* Thread ID, User R/W */
|
|
TPIDRRO_EL0, /* Thread ID, User R/O */
|
|
TPIDR_EL1, /* Thread ID, Privileged */
|
|
AMAIR_EL1, /* Aux Memory Attribute Indirection Register */
|
|
CNTKCTL_EL1, /* Timer Control Register (EL1) */
|
|
PAR_EL1, /* Physical Address Register */
|
|
MDSCR_EL1, /* Monitor Debug System Control Register */
|
|
MDCCINT_EL1, /* Monitor Debug Comms Channel Interrupt Enable Reg */
|
|
DISR_EL1, /* Deferred Interrupt Status Register */
|
|
|
|
/* Performance Monitors Registers */
|
|
PMCR_EL0, /* Control Register */
|
|
PMSELR_EL0, /* Event Counter Selection Register */
|
|
PMEVCNTR0_EL0, /* Event Counter Register (0-30) */
|
|
PMEVCNTR30_EL0 = PMEVCNTR0_EL0 + 30,
|
|
PMCCNTR_EL0, /* Cycle Counter Register */
|
|
PMEVTYPER0_EL0, /* Event Type Register (0-30) */
|
|
PMEVTYPER30_EL0 = PMEVTYPER0_EL0 + 30,
|
|
PMCCFILTR_EL0, /* Cycle Count Filter Register */
|
|
PMCNTENSET_EL0, /* Count Enable Set Register */
|
|
PMINTENSET_EL1, /* Interrupt Enable Set Register */
|
|
PMOVSSET_EL0, /* Overflow Flag Status Set Register */
|
|
PMSWINC_EL0, /* Software Increment Register */
|
|
PMUSERENR_EL0, /* User Enable Register */
|
|
|
|
/* Pointer Authentication Registers in a strict increasing order. */
|
|
APIAKEYLO_EL1,
|
|
APIAKEYHI_EL1,
|
|
APIBKEYLO_EL1,
|
|
APIBKEYHI_EL1,
|
|
APDAKEYLO_EL1,
|
|
APDAKEYHI_EL1,
|
|
APDBKEYLO_EL1,
|
|
APDBKEYHI_EL1,
|
|
APGAKEYLO_EL1,
|
|
APGAKEYHI_EL1,
|
|
|
|
/* 32bit specific registers. Keep them at the end of the range */
|
|
DACR32_EL2, /* Domain Access Control Register */
|
|
IFSR32_EL2, /* Instruction Fault Status Register */
|
|
FPEXC32_EL2, /* Floating-Point Exception Control Register */
|
|
DBGVCR32_EL2, /* Debug Vector Catch Register */
|
|
|
|
NR_SYS_REGS /* Nothing after this line! */
|
|
};
|
|
|
|
/* 32bit mapping */
|
|
#define c0_MPIDR (MPIDR_EL1 * 2) /* MultiProcessor ID Register */
|
|
#define c0_CSSELR (CSSELR_EL1 * 2)/* Cache Size Selection Register */
|
|
#define c1_SCTLR (SCTLR_EL1 * 2) /* System Control Register */
|
|
#define c1_ACTLR (ACTLR_EL1 * 2) /* Auxiliary Control Register */
|
|
#define c1_CPACR (CPACR_EL1 * 2) /* Coprocessor Access Control */
|
|
#define c2_TTBR0 (TTBR0_EL1 * 2) /* Translation Table Base Register 0 */
|
|
#define c2_TTBR0_high (c2_TTBR0 + 1) /* TTBR0 top 32 bits */
|
|
#define c2_TTBR1 (TTBR1_EL1 * 2) /* Translation Table Base Register 1 */
|
|
#define c2_TTBR1_high (c2_TTBR1 + 1) /* TTBR1 top 32 bits */
|
|
#define c2_TTBCR (TCR_EL1 * 2) /* Translation Table Base Control R. */
|
|
#define c3_DACR (DACR32_EL2 * 2)/* Domain Access Control Register */
|
|
#define c5_DFSR (ESR_EL1 * 2) /* Data Fault Status Register */
|
|
#define c5_IFSR (IFSR32_EL2 * 2)/* Instruction Fault Status Register */
|
|
#define c5_ADFSR (AFSR0_EL1 * 2) /* Auxiliary Data Fault Status R */
|
|
#define c5_AIFSR (AFSR1_EL1 * 2) /* Auxiliary Instr Fault Status R */
|
|
#define c6_DFAR (FAR_EL1 * 2) /* Data Fault Address Register */
|
|
#define c6_IFAR (c6_DFAR + 1) /* Instruction Fault Address Register */
|
|
#define c7_PAR (PAR_EL1 * 2) /* Physical Address Register */
|
|
#define c7_PAR_high (c7_PAR + 1) /* PAR top 32 bits */
|
|
#define c10_PRRR (MAIR_EL1 * 2) /* Primary Region Remap Register */
|
|
#define c10_NMRR (c10_PRRR + 1) /* Normal Memory Remap Register */
|
|
#define c12_VBAR (VBAR_EL1 * 2) /* Vector Base Address Register */
|
|
#define c13_CID (CONTEXTIDR_EL1 * 2) /* Context ID Register */
|
|
#define c13_TID_URW (TPIDR_EL0 * 2) /* Thread ID, User R/W */
|
|
#define c13_TID_URO (TPIDRRO_EL0 * 2)/* Thread ID, User R/O */
|
|
#define c13_TID_PRIV (TPIDR_EL1 * 2) /* Thread ID, Privileged */
|
|
#define c10_AMAIR0 (AMAIR_EL1 * 2) /* Aux Memory Attr Indirection Reg */
|
|
#define c10_AMAIR1 (c10_AMAIR0 + 1)/* Aux Memory Attr Indirection Reg */
|
|
#define c14_CNTKCTL (CNTKCTL_EL1 * 2) /* Timer Control Register (PL1) */
|
|
|
|
#define cp14_DBGDSCRext (MDSCR_EL1 * 2)
|
|
#define cp14_DBGBCR0 (DBGBCR0_EL1 * 2)
|
|
#define cp14_DBGBVR0 (DBGBVR0_EL1 * 2)
|
|
#define cp14_DBGBXVR0 (cp14_DBGBVR0 + 1)
|
|
#define cp14_DBGWCR0 (DBGWCR0_EL1 * 2)
|
|
#define cp14_DBGWVR0 (DBGWVR0_EL1 * 2)
|
|
#define cp14_DBGDCCINT (MDCCINT_EL1 * 2)
|
|
|
|
#define NR_COPRO_REGS (NR_SYS_REGS * 2)
|
|
|
|
struct kvm_cpu_context {
|
|
struct kvm_regs gp_regs;
|
|
union {
|
|
u64 sys_regs[NR_SYS_REGS];
|
|
u32 copro[NR_COPRO_REGS];
|
|
};
|
|
|
|
struct kvm_vcpu *__hyp_running_vcpu;
|
|
};
|
|
|
|
struct kvm_pmu_events {
|
|
u32 events_host;
|
|
u32 events_guest;
|
|
};
|
|
|
|
struct kvm_host_data {
|
|
struct kvm_cpu_context host_ctxt;
|
|
struct kvm_pmu_events pmu_events;
|
|
};
|
|
|
|
typedef struct kvm_host_data kvm_host_data_t;
|
|
|
|
struct vcpu_reset_state {
|
|
unsigned long pc;
|
|
unsigned long r0;
|
|
bool be;
|
|
bool reset;
|
|
};
|
|
|
|
struct kvm_vcpu_arch {
|
|
struct kvm_cpu_context ctxt;
|
|
void *sve_state;
|
|
unsigned int sve_max_vl;
|
|
|
|
/* HYP configuration */
|
|
u64 hcr_el2;
|
|
u32 mdcr_el2;
|
|
|
|
/* Exception Information */
|
|
struct kvm_vcpu_fault_info fault;
|
|
|
|
/* State of various workarounds, see kvm_asm.h for bit assignment */
|
|
u64 workaround_flags;
|
|
|
|
/* Miscellaneous vcpu state flags */
|
|
u64 flags;
|
|
|
|
/*
|
|
* We maintain more than a single set of debug registers to support
|
|
* debugging the guest from the host and to maintain separate host and
|
|
* guest state during world switches. vcpu_debug_state are the debug
|
|
* registers of the vcpu as the guest sees them. host_debug_state are
|
|
* the host registers which are saved and restored during
|
|
* world switches. external_debug_state contains the debug
|
|
* values we want to debug the guest. This is set via the
|
|
* KVM_SET_GUEST_DEBUG ioctl.
|
|
*
|
|
* debug_ptr points to the set of debug registers that should be loaded
|
|
* onto the hardware when running the guest.
|
|
*/
|
|
struct kvm_guest_debug_arch *debug_ptr;
|
|
struct kvm_guest_debug_arch vcpu_debug_state;
|
|
struct kvm_guest_debug_arch external_debug_state;
|
|
|
|
/* Pointer to host CPU context */
|
|
struct kvm_cpu_context *host_cpu_context;
|
|
|
|
struct thread_info *host_thread_info; /* hyp VA */
|
|
struct user_fpsimd_state *host_fpsimd_state; /* hyp VA */
|
|
|
|
struct {
|
|
/* {Break,watch}point registers */
|
|
struct kvm_guest_debug_arch regs;
|
|
/* Statistical profiling extension */
|
|
u64 pmscr_el1;
|
|
} host_debug_state;
|
|
|
|
/* VGIC state */
|
|
struct vgic_cpu vgic_cpu;
|
|
struct arch_timer_cpu timer_cpu;
|
|
struct kvm_pmu pmu;
|
|
|
|
/*
|
|
* Anything that is not used directly from assembly code goes
|
|
* here.
|
|
*/
|
|
|
|
/*
|
|
* Guest registers we preserve during guest debugging.
|
|
*
|
|
* These shadow registers are updated by the kvm_handle_sys_reg
|
|
* trap handler if the guest accesses or updates them while we
|
|
* are using guest debug.
|
|
*/
|
|
struct {
|
|
u32 mdscr_el1;
|
|
} guest_debug_preserved;
|
|
|
|
/* vcpu power-off state */
|
|
bool power_off;
|
|
|
|
/* Don't run the guest (internal implementation need) */
|
|
bool pause;
|
|
|
|
/* IO related fields */
|
|
struct kvm_decode mmio_decode;
|
|
|
|
/* Cache some mmu pages needed inside spinlock regions */
|
|
struct kvm_mmu_memory_cache mmu_page_cache;
|
|
|
|
/* Target CPU and feature flags */
|
|
int target;
|
|
DECLARE_BITMAP(features, KVM_VCPU_MAX_FEATURES);
|
|
|
|
/* Detect first run of a vcpu */
|
|
bool has_run_once;
|
|
|
|
/* Virtual SError ESR to restore when HCR_EL2.VSE is set */
|
|
u64 vsesr_el2;
|
|
|
|
/* Additional reset state */
|
|
struct vcpu_reset_state reset_state;
|
|
|
|
/* True when deferrable sysregs are loaded on the physical CPU,
|
|
* see kvm_vcpu_load_sysregs and kvm_vcpu_put_sysregs. */
|
|
bool sysregs_loaded_on_cpu;
|
|
|
|
/* Guest PV state */
|
|
struct {
|
|
u64 steal;
|
|
u64 last_steal;
|
|
gpa_t base;
|
|
} steal;
|
|
};
|
|
|
|
/* Pointer to the vcpu's SVE FFR for sve_{save,load}_state() */
|
|
#define vcpu_sve_pffr(vcpu) ((void *)((char *)((vcpu)->arch.sve_state) + \
|
|
sve_ffr_offset((vcpu)->arch.sve_max_vl)))
|
|
|
|
#define vcpu_sve_state_size(vcpu) ({ \
|
|
size_t __size_ret; \
|
|
unsigned int __vcpu_vq; \
|
|
\
|
|
if (WARN_ON(!sve_vl_valid((vcpu)->arch.sve_max_vl))) { \
|
|
__size_ret = 0; \
|
|
} else { \
|
|
__vcpu_vq = sve_vq_from_vl((vcpu)->arch.sve_max_vl); \
|
|
__size_ret = SVE_SIG_REGS_SIZE(__vcpu_vq); \
|
|
} \
|
|
\
|
|
__size_ret; \
|
|
})
|
|
|
|
/* vcpu_arch flags field values: */
|
|
#define KVM_ARM64_DEBUG_DIRTY (1 << 0)
|
|
#define KVM_ARM64_FP_ENABLED (1 << 1) /* guest FP regs loaded */
|
|
#define KVM_ARM64_FP_HOST (1 << 2) /* host FP regs loaded */
|
|
#define KVM_ARM64_HOST_SVE_IN_USE (1 << 3) /* backup for host TIF_SVE */
|
|
#define KVM_ARM64_HOST_SVE_ENABLED (1 << 4) /* SVE enabled for EL0 */
|
|
#define KVM_ARM64_GUEST_HAS_SVE (1 << 5) /* SVE exposed to guest */
|
|
#define KVM_ARM64_VCPU_SVE_FINALIZED (1 << 6) /* SVE config completed */
|
|
#define KVM_ARM64_GUEST_HAS_PTRAUTH (1 << 7) /* PTRAUTH exposed to guest */
|
|
|
|
#define vcpu_has_sve(vcpu) (system_supports_sve() && \
|
|
((vcpu)->arch.flags & KVM_ARM64_GUEST_HAS_SVE))
|
|
|
|
#define vcpu_has_ptrauth(vcpu) ((system_supports_address_auth() || \
|
|
system_supports_generic_auth()) && \
|
|
((vcpu)->arch.flags & KVM_ARM64_GUEST_HAS_PTRAUTH))
|
|
|
|
#define vcpu_gp_regs(v) (&(v)->arch.ctxt.gp_regs)
|
|
|
|
/*
|
|
* Only use __vcpu_sys_reg if you know you want the memory backed version of a
|
|
* register, and not the one most recently accessed by a running VCPU. For
|
|
* example, for userspace access or for system registers that are never context
|
|
* switched, but only emulated.
|
|
*/
|
|
#define __vcpu_sys_reg(v,r) ((v)->arch.ctxt.sys_regs[(r)])
|
|
|
|
u64 vcpu_read_sys_reg(const struct kvm_vcpu *vcpu, int reg);
|
|
void vcpu_write_sys_reg(struct kvm_vcpu *vcpu, u64 val, int reg);
|
|
|
|
/*
|
|
* CP14 and CP15 live in the same array, as they are backed by the
|
|
* same system registers.
|
|
*/
|
|
#define vcpu_cp14(v,r) ((v)->arch.ctxt.copro[(r)])
|
|
#define vcpu_cp15(v,r) ((v)->arch.ctxt.copro[(r)])
|
|
|
|
struct kvm_vm_stat {
|
|
ulong remote_tlb_flush;
|
|
};
|
|
|
|
struct kvm_vcpu_stat {
|
|
u64 halt_successful_poll;
|
|
u64 halt_attempted_poll;
|
|
u64 halt_poll_invalid;
|
|
u64 halt_wakeup;
|
|
u64 hvc_exit_stat;
|
|
u64 wfe_exit_stat;
|
|
u64 wfi_exit_stat;
|
|
u64 mmio_exit_user;
|
|
u64 mmio_exit_kernel;
|
|
u64 exits;
|
|
};
|
|
|
|
int kvm_vcpu_preferred_target(struct kvm_vcpu_init *init);
|
|
unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu);
|
|
int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *indices);
|
|
int kvm_arm_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg);
|
|
int kvm_arm_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg);
|
|
int __kvm_arm_vcpu_get_events(struct kvm_vcpu *vcpu,
|
|
struct kvm_vcpu_events *events);
|
|
|
|
int __kvm_arm_vcpu_set_events(struct kvm_vcpu *vcpu,
|
|
struct kvm_vcpu_events *events);
|
|
|
|
#define KVM_ARCH_WANT_MMU_NOTIFIER
|
|
int kvm_unmap_hva_range(struct kvm *kvm,
|
|
unsigned long start, unsigned long end);
|
|
int kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte);
|
|
int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end);
|
|
int kvm_test_age_hva(struct kvm *kvm, unsigned long hva);
|
|
|
|
struct kvm_vcpu *kvm_arm_get_running_vcpu(void);
|
|
struct kvm_vcpu * __percpu *kvm_get_running_vcpus(void);
|
|
void kvm_arm_halt_guest(struct kvm *kvm);
|
|
void kvm_arm_resume_guest(struct kvm *kvm);
|
|
|
|
u64 __kvm_call_hyp(void *hypfn, ...);
|
|
|
|
/*
|
|
* The couple of isb() below are there to guarantee the same behaviour
|
|
* on VHE as on !VHE, where the eret to EL1 acts as a context
|
|
* synchronization event.
|
|
*/
|
|
#define kvm_call_hyp(f, ...) \
|
|
do { \
|
|
if (has_vhe()) { \
|
|
f(__VA_ARGS__); \
|
|
isb(); \
|
|
} else { \
|
|
__kvm_call_hyp(kvm_ksym_ref(f), ##__VA_ARGS__); \
|
|
} \
|
|
} while(0)
|
|
|
|
#define kvm_call_hyp_ret(f, ...) \
|
|
({ \
|
|
typeof(f(__VA_ARGS__)) ret; \
|
|
\
|
|
if (has_vhe()) { \
|
|
ret = f(__VA_ARGS__); \
|
|
isb(); \
|
|
} else { \
|
|
ret = __kvm_call_hyp(kvm_ksym_ref(f), \
|
|
##__VA_ARGS__); \
|
|
} \
|
|
\
|
|
ret; \
|
|
})
|
|
|
|
void force_vm_exit(const cpumask_t *mask);
|
|
void kvm_mmu_wp_memory_region(struct kvm *kvm, int slot);
|
|
|
|
int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *run,
|
|
int exception_index);
|
|
void handle_exit_early(struct kvm_vcpu *vcpu, struct kvm_run *run,
|
|
int exception_index);
|
|
|
|
int kvm_perf_init(void);
|
|
int kvm_perf_teardown(void);
|
|
|
|
long kvm_hypercall_pv_features(struct kvm_vcpu *vcpu);
|
|
gpa_t kvm_init_stolen_time(struct kvm_vcpu *vcpu);
|
|
void kvm_update_stolen_time(struct kvm_vcpu *vcpu);
|
|
|
|
int kvm_arm_pvtime_set_attr(struct kvm_vcpu *vcpu,
|
|
struct kvm_device_attr *attr);
|
|
int kvm_arm_pvtime_get_attr(struct kvm_vcpu *vcpu,
|
|
struct kvm_device_attr *attr);
|
|
int kvm_arm_pvtime_has_attr(struct kvm_vcpu *vcpu,
|
|
struct kvm_device_attr *attr);
|
|
|
|
static inline void kvm_arm_pvtime_vcpu_init(struct kvm_vcpu_arch *vcpu_arch)
|
|
{
|
|
vcpu_arch->steal.base = GPA_INVALID;
|
|
}
|
|
|
|
static inline bool kvm_arm_is_pvtime_enabled(struct kvm_vcpu_arch *vcpu_arch)
|
|
{
|
|
return (vcpu_arch->steal.base != GPA_INVALID);
|
|
}
|
|
|
|
void kvm_set_sei_esr(struct kvm_vcpu *vcpu, u64 syndrome);
|
|
|
|
struct kvm_vcpu *kvm_mpidr_to_vcpu(struct kvm *kvm, unsigned long mpidr);
|
|
|
|
DECLARE_PER_CPU(kvm_host_data_t, kvm_host_data);
|
|
|
|
static inline void kvm_init_host_cpu_context(struct kvm_cpu_context *cpu_ctxt)
|
|
{
|
|
/* The host's MPIDR is immutable, so let's set it up at boot time */
|
|
cpu_ctxt->sys_regs[MPIDR_EL1] = read_cpuid_mpidr();
|
|
}
|
|
|
|
void __kvm_enable_ssbs(void);
|
|
|
|
static inline void __cpu_init_hyp_mode(phys_addr_t pgd_ptr,
|
|
unsigned long hyp_stack_ptr,
|
|
unsigned long vector_ptr)
|
|
{
|
|
/*
|
|
* Calculate the raw per-cpu offset without a translation from the
|
|
* kernel's mapping to the linear mapping, and store it in tpidr_el2
|
|
* so that we can use adr_l to access per-cpu variables in EL2.
|
|
*/
|
|
u64 tpidr_el2 = ((u64)this_cpu_ptr(&kvm_host_data) -
|
|
(u64)kvm_ksym_ref(kvm_host_data));
|
|
|
|
/*
|
|
* Call initialization code, and switch to the full blown HYP code.
|
|
* If the cpucaps haven't been finalized yet, something has gone very
|
|
* wrong, and hyp will crash and burn when it uses any
|
|
* cpus_have_const_cap() wrapper.
|
|
*/
|
|
BUG_ON(!static_branch_likely(&arm64_const_caps_ready));
|
|
__kvm_call_hyp((void *)pgd_ptr, hyp_stack_ptr, vector_ptr, tpidr_el2);
|
|
|
|
/*
|
|
* Disabling SSBD on a non-VHE system requires us to enable SSBS
|
|
* at EL2.
|
|
*/
|
|
if (!has_vhe() && this_cpu_has_cap(ARM64_SSBS) &&
|
|
arm64_get_ssbd_state() == ARM64_SSBD_FORCE_DISABLE) {
|
|
kvm_call_hyp(__kvm_enable_ssbs);
|
|
}
|
|
}
|
|
|
|
static inline bool kvm_arch_requires_vhe(void)
|
|
{
|
|
/*
|
|
* The Arm architecture specifies that implementation of SVE
|
|
* requires VHE also to be implemented. The KVM code for arm64
|
|
* relies on this when SVE is present:
|
|
*/
|
|
if (system_supports_sve())
|
|
return true;
|
|
|
|
/* Some implementations have defects that confine them to VHE */
|
|
if (cpus_have_cap(ARM64_WORKAROUND_1165522))
|
|
return true;
|
|
|
|
return false;
|
|
}
|
|
|
|
void kvm_arm_vcpu_ptrauth_trap(struct kvm_vcpu *vcpu);
|
|
|
|
static inline void kvm_arch_hardware_unsetup(void) {}
|
|
static inline void kvm_arch_sync_events(struct kvm *kvm) {}
|
|
static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {}
|
|
static inline void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu) {}
|
|
|
|
void kvm_arm_init_debug(void);
|
|
void kvm_arm_setup_debug(struct kvm_vcpu *vcpu);
|
|
void kvm_arm_clear_debug(struct kvm_vcpu *vcpu);
|
|
void kvm_arm_reset_debug_ptr(struct kvm_vcpu *vcpu);
|
|
int kvm_arm_vcpu_arch_set_attr(struct kvm_vcpu *vcpu,
|
|
struct kvm_device_attr *attr);
|
|
int kvm_arm_vcpu_arch_get_attr(struct kvm_vcpu *vcpu,
|
|
struct kvm_device_attr *attr);
|
|
int kvm_arm_vcpu_arch_has_attr(struct kvm_vcpu *vcpu,
|
|
struct kvm_device_attr *attr);
|
|
|
|
static inline void __cpu_init_stage2(void) {}
|
|
|
|
/* Guest/host FPSIMD coordination helpers */
|
|
int kvm_arch_vcpu_run_map_fp(struct kvm_vcpu *vcpu);
|
|
void kvm_arch_vcpu_load_fp(struct kvm_vcpu *vcpu);
|
|
void kvm_arch_vcpu_ctxsync_fp(struct kvm_vcpu *vcpu);
|
|
void kvm_arch_vcpu_put_fp(struct kvm_vcpu *vcpu);
|
|
|
|
static inline bool kvm_pmu_counter_deferred(struct perf_event_attr *attr)
|
|
{
|
|
return (!has_vhe() && attr->exclude_host);
|
|
}
|
|
|
|
#ifdef CONFIG_KVM /* Avoid conflicts with core headers if CONFIG_KVM=n */
|
|
static inline int kvm_arch_vcpu_run_pid_change(struct kvm_vcpu *vcpu)
|
|
{
|
|
return kvm_arch_vcpu_run_map_fp(vcpu);
|
|
}
|
|
|
|
void kvm_set_pmu_events(u32 set, struct perf_event_attr *attr);
|
|
void kvm_clr_pmu_events(u32 clr);
|
|
|
|
void kvm_vcpu_pmu_restore_guest(struct kvm_vcpu *vcpu);
|
|
void kvm_vcpu_pmu_restore_host(struct kvm_vcpu *vcpu);
|
|
#else
|
|
static inline void kvm_set_pmu_events(u32 set, struct perf_event_attr *attr) {}
|
|
static inline void kvm_clr_pmu_events(u32 clr) {}
|
|
#endif
|
|
|
|
static inline void kvm_arm_vhe_guest_enter(void)
|
|
{
|
|
local_daif_mask();
|
|
|
|
/*
|
|
* Having IRQs masked via PMR when entering the guest means the GIC
|
|
* will not signal the CPU of interrupts of lower priority, and the
|
|
* only way to get out will be via guest exceptions.
|
|
* Naturally, we want to avoid this.
|
|
*
|
|
* local_daif_mask() already sets GIC_PRIO_PSR_I_SET, we just need a
|
|
* dsb to ensure the redistributor is forwards EL2 IRQs to the CPU.
|
|
*/
|
|
pmr_sync();
|
|
}
|
|
|
|
static inline void kvm_arm_vhe_guest_exit(void)
|
|
{
|
|
/*
|
|
* local_daif_restore() takes care to properly restore PSTATE.DAIF
|
|
* and the GIC PMR if the host is using IRQ priorities.
|
|
*/
|
|
local_daif_restore(DAIF_PROCCTX_NOIRQ);
|
|
|
|
/*
|
|
* When we exit from the guest we change a number of CPU configuration
|
|
* parameters, such as traps. Make sure these changes take effect
|
|
* before running the host or additional guests.
|
|
*/
|
|
isb();
|
|
}
|
|
|
|
#define KVM_BP_HARDEN_UNKNOWN -1
|
|
#define KVM_BP_HARDEN_WA_NEEDED 0
|
|
#define KVM_BP_HARDEN_NOT_REQUIRED 1
|
|
|
|
static inline int kvm_arm_harden_branch_predictor(void)
|
|
{
|
|
switch (get_spectre_v2_workaround_state()) {
|
|
case ARM64_BP_HARDEN_WA_NEEDED:
|
|
return KVM_BP_HARDEN_WA_NEEDED;
|
|
case ARM64_BP_HARDEN_NOT_REQUIRED:
|
|
return KVM_BP_HARDEN_NOT_REQUIRED;
|
|
case ARM64_BP_HARDEN_UNKNOWN:
|
|
default:
|
|
return KVM_BP_HARDEN_UNKNOWN;
|
|
}
|
|
}
|
|
|
|
#define KVM_SSBD_UNKNOWN -1
|
|
#define KVM_SSBD_FORCE_DISABLE 0
|
|
#define KVM_SSBD_KERNEL 1
|
|
#define KVM_SSBD_FORCE_ENABLE 2
|
|
#define KVM_SSBD_MITIGATED 3
|
|
|
|
static inline int kvm_arm_have_ssbd(void)
|
|
{
|
|
switch (arm64_get_ssbd_state()) {
|
|
case ARM64_SSBD_FORCE_DISABLE:
|
|
return KVM_SSBD_FORCE_DISABLE;
|
|
case ARM64_SSBD_KERNEL:
|
|
return KVM_SSBD_KERNEL;
|
|
case ARM64_SSBD_FORCE_ENABLE:
|
|
return KVM_SSBD_FORCE_ENABLE;
|
|
case ARM64_SSBD_MITIGATED:
|
|
return KVM_SSBD_MITIGATED;
|
|
case ARM64_SSBD_UNKNOWN:
|
|
default:
|
|
return KVM_SSBD_UNKNOWN;
|
|
}
|
|
}
|
|
|
|
void kvm_vcpu_load_sysregs(struct kvm_vcpu *vcpu);
|
|
void kvm_vcpu_put_sysregs(struct kvm_vcpu *vcpu);
|
|
|
|
void kvm_set_ipa_limit(void);
|
|
|
|
#define __KVM_HAVE_ARCH_VM_ALLOC
|
|
struct kvm *kvm_arch_alloc_vm(void);
|
|
void kvm_arch_free_vm(struct kvm *kvm);
|
|
|
|
int kvm_arm_setup_stage2(struct kvm *kvm, unsigned long type);
|
|
|
|
int kvm_arm_vcpu_finalize(struct kvm_vcpu *vcpu, int feature);
|
|
bool kvm_arm_vcpu_is_finalized(struct kvm_vcpu *vcpu);
|
|
|
|
#define kvm_arm_vcpu_sve_finalized(vcpu) \
|
|
((vcpu)->arch.flags & KVM_ARM64_VCPU_SVE_FINALIZED)
|
|
|
|
#endif /* __ARM64_KVM_HOST_H__ */
|