s390: implement diag318

x86:
 * Report last CPU for debugging
 * Emulate smaller MAXPHYADDR in the guest than in the host
 * .noinstr and tracing fixes from Thomas
 * nested SVM page table switching optimization and fixes
 
 Generic:
 * Unify shadow MMU cache data structures across architectures
 -----BEGIN PGP SIGNATURE-----
 
 iQFIBAABCAAyFiEE8TM4V0tmI4mGbHaCv/vSX3jHroMFAl8pC+oUHHBib256aW5p
 QHJlZGhhdC5jb20ACgkQv/vSX3jHroNcOwgAjomqtEqQNlp7DdZT7VyyklzbxX1/
 ud7v+oOJ8K4sFlf64lSthjPo3N9rzZCcw+yOXmuyuITngXOGc3tzIwXpCzpLtuQ1
 WO1Ql3B/2dCi3lP5OMmsO1UAZqy9pKLg1dfeYUPk48P5+p7d/NPmk+Em5kIYzKm5
 JsaHfCp2EEXomwmljNJ8PQ1vTjIQSSzlgYUBZxmCkaaX7zbEUMtxAQCStHmt8B84
 33LczwXBm3viSWrzsoBV37I70+tseugiSGsCfUyupXOvq55d6D9FCqtCb45Hn4Vh
 Ik8ggKdalsk/reiGEwNw1/3nr6mRMkHSbl+Mhc4waOIFf9dn0urgQgOaDg==
 =YVx0
 -----END PGP SIGNATURE-----

Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm

Pull KVM updates from Paolo Bonzini:
 "s390:
   - implement diag318

  x86:
   - Report last CPU for debugging
   - Emulate smaller MAXPHYADDR in the guest than in the host
   - .noinstr and tracing fixes from Thomas
   - nested SVM page table switching optimization and fixes

  Generic:
   - Unify shadow MMU cache data structures across architectures"

* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (127 commits)
  KVM: SVM: Fix sev_pin_memory() error handling
  KVM: LAPIC: Set the TDCR settable bits
  KVM: x86: Specify max TDP level via kvm_configure_mmu()
  KVM: x86/mmu: Rename max_page_level to max_huge_page_level
  KVM: x86: Dynamically calculate TDP level from max level and MAXPHYADDR
  KVM: VXM: Remove temporary WARN on expected vs. actual EPTP level mismatch
  KVM: x86: Pull the PGD's level from the MMU instead of recalculating it
  KVM: VMX: Make vmx_load_mmu_pgd() static
  KVM: x86/mmu: Add separate helper for shadow NPT root page role calc
  KVM: VMX: Drop a duplicate declaration of construct_eptp()
  KVM: nSVM: Correctly set the shadow NPT root level in its MMU role
  KVM: Using macros instead of magic values
  MIPS: KVM: Fix build error caused by 'kvm_run' cleanup
  KVM: nSVM: remove nonsensical EXITINFO1 adjustment on nested NPF
  KVM: x86: Add a capability for GUEST_MAXPHYADDR < HOST_MAXPHYADDR support
  KVM: VMX: optimize #PF injection when MAXPHYADDR does not match
  KVM: VMX: Add guest physical address check in EPT violation and misconfig
  KVM: VMX: introduce vmx_need_pf_intercept
  KVM: x86: update exception bitmap on CPUID changes
  KVM: x86: rename update_bp_intercept to update_exception_bitmap
  ...
This commit is contained in:
Linus Torvalds 2020-08-06 12:59:31 -07:00
commit 921d2597ab
71 changed files with 1640 additions and 1219 deletions

View File

@ -5804,8 +5804,9 @@
panic() code such as dumping handler.
xen_nopvspin [X86,XEN]
Disables the ticketlock slowpath using Xen PV
optimizations.
Disables the qspinlock slowpath using Xen PV optimizations.
This parameter is obsoleted by "nopvspin" parameter, which
has equivalent effect for XEN platform.
xen_nopv [X86]
Disables the PV optimizations forcing the HVM guest to
@ -5831,6 +5832,11 @@
as generic guest with no PV drivers. Currently support
XEN HVM, KVM, HYPER_V and VMWARE guest.
nopvspin [X86,XEN,KVM]
Disables the qspinlock slow path using PV optimizations
which allow the hypervisor to 'idle' the guest on lock
contention.
xirc2ps_cs= [NET,PCMCIA]
Format:
<irq>,<irq_mask>,<io>,<full_duplex>,<do_sound>,<lockup_hack>[,<irq2>[,<irq3>[,<irq4>]]]

View File

@ -669,6 +669,10 @@ MSRs that have been set successfully.
Defines the vcpu responses to the cpuid instruction. Applications
should use the KVM_SET_CPUID2 ioctl if available.
Note, when this IOCTL fails, KVM gives no guarantees that previous valid CPUID
configuration (if there is) is not corrupted. Userspace can get a copy of the
resulting CPUID configuration through KVM_GET_CPUID2 in case.
::
struct kvm_cpuid_entry {
@ -4795,6 +4799,7 @@ hardware_exit_reason.
/* KVM_EXIT_FAIL_ENTRY */
struct {
__u64 hardware_entry_failure_reason;
__u32 cpu; /* if KVM_LAST_CPU */
} fail_entry;
If exit_reason is KVM_EXIT_FAIL_ENTRY, the vcpu could not be run due

View File

@ -27,12 +27,12 @@ struct kvm_sys_reg_target_table {
void kvm_register_target_sys_reg_table(unsigned int target,
struct kvm_sys_reg_target_table *table);
int kvm_handle_cp14_load_store(struct kvm_vcpu *vcpu, struct kvm_run *run);
int kvm_handle_cp14_32(struct kvm_vcpu *vcpu, struct kvm_run *run);
int kvm_handle_cp14_64(struct kvm_vcpu *vcpu, struct kvm_run *run);
int kvm_handle_cp15_32(struct kvm_vcpu *vcpu, struct kvm_run *run);
int kvm_handle_cp15_64(struct kvm_vcpu *vcpu, struct kvm_run *run);
int kvm_handle_sys_reg(struct kvm_vcpu *vcpu, struct kvm_run *run);
int kvm_handle_cp14_load_store(struct kvm_vcpu *vcpu);
int kvm_handle_cp14_32(struct kvm_vcpu *vcpu);
int kvm_handle_cp14_64(struct kvm_vcpu *vcpu);
int kvm_handle_cp15_32(struct kvm_vcpu *vcpu);
int kvm_handle_cp15_64(struct kvm_vcpu *vcpu);
int kvm_handle_sys_reg(struct kvm_vcpu *vcpu);
#define kvm_coproc_table_init kvm_sys_reg_table_init
void kvm_sys_reg_table_init(void);

View File

@ -97,17 +97,6 @@ struct kvm_arch {
bool return_nisv_io_abort_to_user;
};
#define KVM_NR_MEM_OBJS 40
/*
* We don't want allocation failures within the mmu code, so we preallocate
* enough memory for a single page fault in a cache.
*/
struct kvm_mmu_memory_cache {
int nobjs;
void *objects[KVM_NR_MEM_OBJS];
};
struct kvm_vcpu_fault_info {
u32 esr_el2; /* Hyp Syndrom Register */
u64 far_el2; /* Hyp Fault Address Register */
@ -486,18 +475,15 @@ u64 __kvm_call_hyp(void *hypfn, ...);
void force_vm_exit(const cpumask_t *mask);
void kvm_mmu_wp_memory_region(struct kvm *kvm, int slot);
int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *run,
int exception_index);
void handle_exit_early(struct kvm_vcpu *vcpu, struct kvm_run *run,
int exception_index);
int handle_exit(struct kvm_vcpu *vcpu, int exception_index);
void handle_exit_early(struct kvm_vcpu *vcpu, int exception_index);
/* MMIO helpers */
void kvm_mmio_write_buf(void *buf, unsigned int len, unsigned long data);
unsigned long kvm_mmio_read_buf(const void *buf, unsigned int len);
int kvm_handle_mmio_return(struct kvm_vcpu *vcpu, struct kvm_run *run);
int io_mem_abort(struct kvm_vcpu *vcpu, struct kvm_run *run,
phys_addr_t fault_ipa);
int kvm_handle_mmio_return(struct kvm_vcpu *vcpu);
int io_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa);
int kvm_perf_init(void);
int kvm_perf_teardown(void);

View File

@ -139,7 +139,7 @@ void kvm_free_stage2_pgd(struct kvm *kvm);
int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa,
phys_addr_t pa, unsigned long size, bool writable);
int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run);
int kvm_handle_guest_abort(struct kvm_vcpu *vcpu);
void kvm_mmu_free_memory_caches(struct kvm_vcpu *vcpu);

View File

@ -0,0 +1,8 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _ASM_ARM64_KVM_TYPES_H
#define _ASM_ARM64_KVM_TYPES_H
#define KVM_ARCH_NR_OBJS_PER_MEMORY_CACHE 40
#endif /* _ASM_ARM64_KVM_TYPES_H */

View File

@ -270,6 +270,8 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
vcpu->arch.target = -1;
bitmap_zero(vcpu->arch.features, KVM_VCPU_MAX_FEATURES);
vcpu->arch.mmu_page_cache.gfp_zero = __GFP_ZERO;
/* Set up the timer */
kvm_timer_vcpu_init(vcpu);
@ -658,7 +660,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
return ret;
if (run->exit_reason == KVM_EXIT_MMIO) {
ret = kvm_handle_mmio_return(vcpu, run);
ret = kvm_handle_mmio_return(vcpu);
if (ret)
return ret;
}
@ -810,11 +812,11 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
trace_kvm_exit(ret, kvm_vcpu_trap_get_class(vcpu), *vcpu_pc(vcpu));
/* Exit types that need handling before we can be preempted */
handle_exit_early(vcpu, run, ret);
handle_exit_early(vcpu, ret);
preempt_enable();
ret = handle_exit(vcpu, run, ret);
ret = handle_exit(vcpu, ret);
}
/* Tell userspace about in-kernel device output levels */

View File

@ -25,7 +25,7 @@
#define CREATE_TRACE_POINTS
#include "trace_handle_exit.h"
typedef int (*exit_handle_fn)(struct kvm_vcpu *, struct kvm_run *);
typedef int (*exit_handle_fn)(struct kvm_vcpu *);
static void kvm_handle_guest_serror(struct kvm_vcpu *vcpu, u32 esr)
{
@ -33,7 +33,7 @@ static void kvm_handle_guest_serror(struct kvm_vcpu *vcpu, u32 esr)
kvm_inject_vabt(vcpu);
}
static int handle_hvc(struct kvm_vcpu *vcpu, struct kvm_run *run)
static int handle_hvc(struct kvm_vcpu *vcpu)
{
int ret;
@ -50,7 +50,7 @@ static int handle_hvc(struct kvm_vcpu *vcpu, struct kvm_run *run)
return ret;
}
static int handle_smc(struct kvm_vcpu *vcpu, struct kvm_run *run)
static int handle_smc(struct kvm_vcpu *vcpu)
{
/*
* "If an SMC instruction executed at Non-secure EL1 is
@ -69,7 +69,7 @@ static int handle_smc(struct kvm_vcpu *vcpu, struct kvm_run *run)
* Guest access to FP/ASIMD registers are routed to this handler only
* when the system doesn't support FP/ASIMD.
*/
static int handle_no_fpsimd(struct kvm_vcpu *vcpu, struct kvm_run *run)
static int handle_no_fpsimd(struct kvm_vcpu *vcpu)
{
kvm_inject_undefined(vcpu);
return 1;
@ -87,7 +87,7 @@ static int handle_no_fpsimd(struct kvm_vcpu *vcpu, struct kvm_run *run)
* world-switches and schedule other host processes until there is an
* incoming IRQ or FIQ to the VM.
*/
static int kvm_handle_wfx(struct kvm_vcpu *vcpu, struct kvm_run *run)
static int kvm_handle_wfx(struct kvm_vcpu *vcpu)
{
if (kvm_vcpu_get_hsr(vcpu) & ESR_ELx_WFx_ISS_WFE) {
trace_kvm_wfx_arm64(*vcpu_pc(vcpu), true);
@ -109,16 +109,16 @@ static int kvm_handle_wfx(struct kvm_vcpu *vcpu, struct kvm_run *run)
* kvm_handle_guest_debug - handle a debug exception instruction
*
* @vcpu: the vcpu pointer
* @run: access to the kvm_run structure for results
*
* We route all debug exceptions through the same handler. If both the
* guest and host are using the same debug facilities it will be up to
* userspace to re-inject the correct exception for guest delivery.
*
* @return: 0 (while setting run->exit_reason), -1 for error
* @return: 0 (while setting vcpu->run->exit_reason), -1 for error
*/
static int kvm_handle_guest_debug(struct kvm_vcpu *vcpu, struct kvm_run *run)
static int kvm_handle_guest_debug(struct kvm_vcpu *vcpu)
{
struct kvm_run *run = vcpu->run;
u32 hsr = kvm_vcpu_get_hsr(vcpu);
int ret = 0;
@ -144,7 +144,7 @@ static int kvm_handle_guest_debug(struct kvm_vcpu *vcpu, struct kvm_run *run)
return ret;
}
static int kvm_handle_unknown_ec(struct kvm_vcpu *vcpu, struct kvm_run *run)
static int kvm_handle_unknown_ec(struct kvm_vcpu *vcpu)
{
u32 hsr = kvm_vcpu_get_hsr(vcpu);
@ -155,7 +155,7 @@ static int kvm_handle_unknown_ec(struct kvm_vcpu *vcpu, struct kvm_run *run)
return 1;
}
static int handle_sve(struct kvm_vcpu *vcpu, struct kvm_run *run)
static int handle_sve(struct kvm_vcpu *vcpu)
{
/* Until SVE is supported for guests: */
kvm_inject_undefined(vcpu);
@ -167,7 +167,7 @@ static int handle_sve(struct kvm_vcpu *vcpu, struct kvm_run *run)
* a NOP). If we get here, it is that we didn't fixup ptrauth on exit, and all
* that we can do is give the guest an UNDEF.
*/
static int kvm_handle_ptrauth(struct kvm_vcpu *vcpu, struct kvm_run *run)
static int kvm_handle_ptrauth(struct kvm_vcpu *vcpu)
{
kvm_inject_undefined(vcpu);
return 1;
@ -212,7 +212,7 @@ static exit_handle_fn kvm_get_exit_handler(struct kvm_vcpu *vcpu)
* KVM_EXIT_DEBUG, otherwise userspace needs to complete its
* emulation first.
*/
static int handle_trap_exceptions(struct kvm_vcpu *vcpu, struct kvm_run *run)
static int handle_trap_exceptions(struct kvm_vcpu *vcpu)
{
int handled;
@ -227,7 +227,7 @@ static int handle_trap_exceptions(struct kvm_vcpu *vcpu, struct kvm_run *run)
exit_handle_fn exit_handler;
exit_handler = kvm_get_exit_handler(vcpu);
handled = exit_handler(vcpu, run);
handled = exit_handler(vcpu);
}
return handled;
@ -237,9 +237,10 @@ static int handle_trap_exceptions(struct kvm_vcpu *vcpu, struct kvm_run *run)
* Return > 0 to return to guest, < 0 on error, 0 (and set exit_reason) on
* proper exit to userspace.
*/
int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *run,
int exception_index)
int handle_exit(struct kvm_vcpu *vcpu, int exception_index)
{
struct kvm_run *run = vcpu->run;
if (ARM_SERROR_PENDING(exception_index)) {
u8 hsr_ec = ESR_ELx_EC(kvm_vcpu_get_hsr(vcpu));
@ -265,7 +266,7 @@ int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *run,
case ARM_EXCEPTION_EL1_SERROR:
return 1;
case ARM_EXCEPTION_TRAP:
return handle_trap_exceptions(vcpu, run);
return handle_trap_exceptions(vcpu);
case ARM_EXCEPTION_HYP_GONE:
/*
* EL2 has been reset to the hyp-stub. This happens when a guest
@ -289,8 +290,7 @@ int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *run,
}
/* For exit types that need handling before we can be preempted */
void handle_exit_early(struct kvm_vcpu *vcpu, struct kvm_run *run,
int exception_index)
void handle_exit_early(struct kvm_vcpu *vcpu, int exception_index)
{
if (ARM_SERROR_PENDING(exception_index)) {
if (this_cpu_has_cap(ARM64_HAS_RAS_EXTN)) {

View File

@ -77,9 +77,8 @@ unsigned long kvm_mmio_read_buf(const void *buf, unsigned int len)
* or in-kernel IO emulation
*
* @vcpu: The VCPU pointer
* @run: The VCPU run struct containing the mmio data
*/
int kvm_handle_mmio_return(struct kvm_vcpu *vcpu, struct kvm_run *run)
int kvm_handle_mmio_return(struct kvm_vcpu *vcpu)
{
unsigned long data;
unsigned int len;
@ -92,6 +91,8 @@ int kvm_handle_mmio_return(struct kvm_vcpu *vcpu, struct kvm_run *run)
vcpu->mmio_needed = 0;
if (!kvm_vcpu_dabt_iswrite(vcpu)) {
struct kvm_run *run = vcpu->run;
len = kvm_vcpu_dabt_get_as(vcpu);
data = kvm_mmio_read_buf(run->mmio.data, len);
@ -119,9 +120,9 @@ int kvm_handle_mmio_return(struct kvm_vcpu *vcpu, struct kvm_run *run)
return 0;
}
int io_mem_abort(struct kvm_vcpu *vcpu, struct kvm_run *run,
phys_addr_t fault_ipa)
int io_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa)
{
struct kvm_run *run = vcpu->run;
unsigned long data;
unsigned long rt;
int ret;
@ -188,7 +189,7 @@ int io_mem_abort(struct kvm_vcpu *vcpu, struct kvm_run *run,
if (!is_write)
memcpy(run->mmio.data, data_buf, len);
vcpu->stat.mmio_exit_kernel++;
kvm_handle_mmio_return(vcpu, run);
kvm_handle_mmio_return(vcpu);
return 1;
}

View File

@ -124,38 +124,6 @@ static void stage2_dissolve_pud(struct kvm *kvm, phys_addr_t addr, pud_t *pudp)
put_page(virt_to_page(pudp));
}
static int mmu_topup_memory_cache(struct kvm_mmu_memory_cache *cache,
int min, int max)
{
void *page;
BUG_ON(max > KVM_NR_MEM_OBJS);
if (cache->nobjs >= min)
return 0;
while (cache->nobjs < max) {
page = (void *)__get_free_page(GFP_PGTABLE_USER);
if (!page)
return -ENOMEM;
cache->objects[cache->nobjs++] = page;
}
return 0;
}
static void mmu_free_memory_cache(struct kvm_mmu_memory_cache *mc)
{
while (mc->nobjs)
free_page((unsigned long)mc->objects[--mc->nobjs]);
}
static void *mmu_memory_cache_alloc(struct kvm_mmu_memory_cache *mc)
{
void *p;
BUG_ON(!mc || !mc->nobjs);
p = mc->objects[--mc->nobjs];
return p;
}
static void clear_stage2_pgd_entry(struct kvm *kvm, pgd_t *pgd, phys_addr_t addr)
{
p4d_t *p4d_table __maybe_unused = stage2_p4d_offset(kvm, pgd, 0UL);
@ -1132,7 +1100,7 @@ static p4d_t *stage2_get_p4d(struct kvm *kvm, struct kvm_mmu_memory_cache *cache
if (stage2_pgd_none(kvm, *pgd)) {
if (!cache)
return NULL;
p4d = mmu_memory_cache_alloc(cache);
p4d = kvm_mmu_memory_cache_alloc(cache);
stage2_pgd_populate(kvm, pgd, p4d);
get_page(virt_to_page(pgd));
}
@ -1150,7 +1118,7 @@ static pud_t *stage2_get_pud(struct kvm *kvm, struct kvm_mmu_memory_cache *cache
if (stage2_p4d_none(kvm, *p4d)) {
if (!cache)
return NULL;
pud = mmu_memory_cache_alloc(cache);
pud = kvm_mmu_memory_cache_alloc(cache);
stage2_p4d_populate(kvm, p4d, pud);
get_page(virt_to_page(p4d));
}
@ -1171,7 +1139,7 @@ static pmd_t *stage2_get_pmd(struct kvm *kvm, struct kvm_mmu_memory_cache *cache
if (stage2_pud_none(kvm, *pud)) {
if (!cache)
return NULL;
pmd = mmu_memory_cache_alloc(cache);
pmd = kvm_mmu_memory_cache_alloc(cache);
stage2_pud_populate(kvm, pud, pmd);
get_page(virt_to_page(pud));
}
@ -1377,7 +1345,7 @@ static int stage2_set_pte(struct kvm *kvm, struct kvm_mmu_memory_cache *cache,
if (stage2_pud_none(kvm, *pud)) {
if (!cache)
return 0; /* ignore calls from kvm_set_spte_hva */
pmd = mmu_memory_cache_alloc(cache);
pmd = kvm_mmu_memory_cache_alloc(cache);
stage2_pud_populate(kvm, pud, pmd);
get_page(virt_to_page(pud));
}
@ -1402,7 +1370,7 @@ static int stage2_set_pte(struct kvm *kvm, struct kvm_mmu_memory_cache *cache,
if (pmd_none(*pmd)) {
if (!cache)
return 0; /* ignore calls from kvm_set_spte_hva */
pte = mmu_memory_cache_alloc(cache);
pte = kvm_mmu_memory_cache_alloc(cache);
kvm_pmd_populate(pmd, pte);
get_page(virt_to_page(pmd));
}
@ -1469,7 +1437,7 @@ int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa,
phys_addr_t addr, end;
int ret = 0;
unsigned long pfn;
struct kvm_mmu_memory_cache cache = { 0, };
struct kvm_mmu_memory_cache cache = { 0, __GFP_ZERO, NULL, };
end = (guest_ipa + size + PAGE_SIZE - 1) & PAGE_MASK;
pfn = __phys_to_pfn(pa);
@ -1480,9 +1448,8 @@ int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa,
if (writable)
pte = kvm_s2pte_mkwrite(pte);
ret = mmu_topup_memory_cache(&cache,
kvm_mmu_cache_min_pages(kvm),
KVM_NR_MEM_OBJS);
ret = kvm_mmu_topup_memory_cache(&cache,
kvm_mmu_cache_min_pages(kvm));
if (ret)
goto out;
spin_lock(&kvm->mmu_lock);
@ -1496,7 +1463,7 @@ int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa,
}
out:
mmu_free_memory_cache(&cache);
kvm_mmu_free_memory_cache(&cache);
return ret;
}
@ -1882,8 +1849,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
mmap_read_unlock(current->mm);
/* We need minimum second+third level pages */
ret = mmu_topup_memory_cache(memcache, kvm_mmu_cache_min_pages(kvm),
KVM_NR_MEM_OBJS);
ret = kvm_mmu_topup_memory_cache(memcache, kvm_mmu_cache_min_pages(kvm));
if (ret)
return ret;
@ -2050,7 +2016,6 @@ static void handle_access_fault(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa)
/**
* kvm_handle_guest_abort - handles all 2nd stage aborts
* @vcpu: the VCPU pointer
* @run: the kvm_run structure
*
* Any abort that gets to the host is almost guaranteed to be caused by a
* missing second stage translation table entry, which can mean that either the
@ -2059,7 +2024,7 @@ static void handle_access_fault(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa)
* space. The distinction is based on the IPA causing the fault and whether this
* memory region has been registered as standard RAM by user space.
*/
int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run)
int kvm_handle_guest_abort(struct kvm_vcpu *vcpu)
{
unsigned long fault_status;
phys_addr_t fault_ipa;
@ -2138,7 +2103,7 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run)
* of the page size.
*/
fault_ipa |= kvm_vcpu_get_hfar(vcpu) & ((1 << 12) - 1);
ret = io_mem_abort(vcpu, run, fault_ipa);
ret = io_mem_abort(vcpu, fault_ipa);
goto out_unlock;
}
@ -2307,7 +2272,7 @@ int kvm_test_age_hva(struct kvm *kvm, unsigned long hva)
void kvm_mmu_free_memory_caches(struct kvm_vcpu *vcpu)
{
mmu_free_memory_cache(&vcpu->arch.mmu_page_cache);
kvm_mmu_free_memory_cache(&vcpu->arch.mmu_page_cache);
}
phys_addr_t kvm_mmu_get_httbr(void)

View File

@ -2156,7 +2156,7 @@ static const struct sys_reg_desc *find_reg(const struct sys_reg_params *params,
return bsearch((void *)pval, table, num, sizeof(table[0]), match_sys_reg);
}
int kvm_handle_cp14_load_store(struct kvm_vcpu *vcpu, struct kvm_run *run)
int kvm_handle_cp14_load_store(struct kvm_vcpu *vcpu)
{
kvm_inject_undefined(vcpu);
return 1;
@ -2335,7 +2335,7 @@ static int kvm_handle_cp_32(struct kvm_vcpu *vcpu,
return 1;
}
int kvm_handle_cp15_64(struct kvm_vcpu *vcpu, struct kvm_run *run)
int kvm_handle_cp15_64(struct kvm_vcpu *vcpu)
{
const struct sys_reg_desc *target_specific;
size_t num;
@ -2346,7 +2346,7 @@ int kvm_handle_cp15_64(struct kvm_vcpu *vcpu, struct kvm_run *run)
target_specific, num);
}
int kvm_handle_cp15_32(struct kvm_vcpu *vcpu, struct kvm_run *run)
int kvm_handle_cp15_32(struct kvm_vcpu *vcpu)
{
const struct sys_reg_desc *target_specific;
size_t num;
@ -2357,14 +2357,14 @@ int kvm_handle_cp15_32(struct kvm_vcpu *vcpu, struct kvm_run *run)
target_specific, num);
}
int kvm_handle_cp14_64(struct kvm_vcpu *vcpu, struct kvm_run *run)
int kvm_handle_cp14_64(struct kvm_vcpu *vcpu)
{
return kvm_handle_cp_64(vcpu,
cp14_64_regs, ARRAY_SIZE(cp14_64_regs),
NULL, 0);
}
int kvm_handle_cp14_32(struct kvm_vcpu *vcpu, struct kvm_run *run)
int kvm_handle_cp14_32(struct kvm_vcpu *vcpu)
{
return kvm_handle_cp_32(vcpu,
cp14_regs, ARRAY_SIZE(cp14_regs),
@ -2416,9 +2416,8 @@ static void reset_sys_reg_descs(struct kvm_vcpu *vcpu,
/**
* kvm_handle_sys_reg -- handles a mrs/msr trap on a guest sys_reg access
* @vcpu: The VCPU pointer
* @run: The kvm_run struct
*/
int kvm_handle_sys_reg(struct kvm_vcpu *vcpu, struct kvm_run *run)
int kvm_handle_sys_reg(struct kvm_vcpu *vcpu)
{
struct sys_reg_params params;
unsigned long esr = kvm_vcpu_get_hsr(vcpu);

View File

@ -2182,6 +2182,7 @@ endchoice
config KVM_GUEST
bool "KVM Guest Kernel"
depends on CPU_MIPS32_R2
depends on BROKEN_ON_SMP
help
Select this option if building a guest kernel for KVM (Trap & Emulate)

View File

@ -335,17 +335,6 @@ struct kvm_mips_tlb {
long tlb_lo[2];
};
#define KVM_NR_MEM_OBJS 4
/*
* We don't want allocation failures within the mmu code, so we preallocate
* enough memory for a single page fault in a cache.
*/
struct kvm_mmu_memory_cache {
int nobjs;
void *objects[KVM_NR_MEM_OBJS];
};
#define KVM_MIPS_AUX_FPU 0x1
#define KVM_MIPS_AUX_MSA 0x2
@ -854,8 +843,8 @@ struct kvm_mips_callbacks {
const struct kvm_one_reg *reg, s64 v);
int (*vcpu_load)(struct kvm_vcpu *vcpu, int cpu);
int (*vcpu_put)(struct kvm_vcpu *vcpu, int cpu);
int (*vcpu_run)(struct kvm_run *run, struct kvm_vcpu *vcpu);
void (*vcpu_reenter)(struct kvm_run *run, struct kvm_vcpu *vcpu);
int (*vcpu_run)(struct kvm_vcpu *vcpu);
void (*vcpu_reenter)(struct kvm_vcpu *vcpu);
};
extern struct kvm_mips_callbacks *kvm_mips_callbacks;
int kvm_mips_emulation_init(struct kvm_mips_callbacks **install_callbacks);
@ -910,7 +899,6 @@ extern int kvm_mips_handle_mapped_seg_tlb_fault(struct kvm_vcpu *vcpu,
extern enum emulation_result kvm_mips_handle_tlbmiss(u32 cause,
u32 *opc,
struct kvm_run *run,
struct kvm_vcpu *vcpu,
bool write_fault);
@ -1021,83 +1009,67 @@ static inline bool kvm_is_ifetch_fault(struct kvm_vcpu_arch *vcpu)
extern enum emulation_result kvm_mips_emulate_inst(u32 cause,
u32 *opc,
struct kvm_run *run,
struct kvm_vcpu *vcpu);
long kvm_mips_guest_exception_base(struct kvm_vcpu *vcpu);
extern enum emulation_result kvm_mips_emulate_syscall(u32 cause,
u32 *opc,
struct kvm_run *run,
struct kvm_vcpu *vcpu);
extern enum emulation_result kvm_mips_emulate_tlbmiss_ld(u32 cause,
u32 *opc,
struct kvm_run *run,
struct kvm_vcpu *vcpu);
extern enum emulation_result kvm_mips_emulate_tlbinv_ld(u32 cause,
u32 *opc,
struct kvm_run *run,
struct kvm_vcpu *vcpu);
extern enum emulation_result kvm_mips_emulate_tlbmiss_st(u32 cause,
u32 *opc,
struct kvm_run *run,
struct kvm_vcpu *vcpu);
extern enum emulation_result kvm_mips_emulate_tlbinv_st(u32 cause,
u32 *opc,
struct kvm_run *run,
struct kvm_vcpu *vcpu);
extern enum emulation_result kvm_mips_emulate_tlbmod(u32 cause,
u32 *opc,
struct kvm_run *run,
struct kvm_vcpu *vcpu);
extern enum emulation_result kvm_mips_emulate_fpu_exc(u32 cause,
u32 *opc,
struct kvm_run *run,
struct kvm_vcpu *vcpu);
extern enum emulation_result kvm_mips_handle_ri(u32 cause,
u32 *opc,
struct kvm_run *run,
struct kvm_vcpu *vcpu);
extern enum emulation_result kvm_mips_emulate_ri_exc(u32 cause,
u32 *opc,
struct kvm_run *run,
struct kvm_vcpu *vcpu);
extern enum emulation_result kvm_mips_emulate_bp_exc(u32 cause,
u32 *opc,
struct kvm_run *run,
struct kvm_vcpu *vcpu);
extern enum emulation_result kvm_mips_emulate_trap_exc(u32 cause,
u32 *opc,
struct kvm_run *run,
struct kvm_vcpu *vcpu);
extern enum emulation_result kvm_mips_emulate_msafpe_exc(u32 cause,
u32 *opc,
struct kvm_run *run,
struct kvm_vcpu *vcpu);
extern enum emulation_result kvm_mips_emulate_fpe_exc(u32 cause,
u32 *opc,
struct kvm_run *run,
struct kvm_vcpu *vcpu);
extern enum emulation_result kvm_mips_emulate_msadis_exc(u32 cause,
u32 *opc,
struct kvm_run *run,
struct kvm_vcpu *vcpu);
extern enum emulation_result kvm_mips_complete_mmio_load(struct kvm_vcpu *vcpu,
struct kvm_run *run);
extern enum emulation_result kvm_mips_complete_mmio_load(struct kvm_vcpu *vcpu);
u32 kvm_mips_read_count(struct kvm_vcpu *vcpu);
void kvm_mips_write_count(struct kvm_vcpu *vcpu, u32 count);
@ -1126,26 +1098,21 @@ static inline void kvm_vz_lose_htimer(struct kvm_vcpu *vcpu) {}
enum emulation_result kvm_mips_check_privilege(u32 cause,
u32 *opc,
struct kvm_run *run,
struct kvm_vcpu *vcpu);
enum emulation_result kvm_mips_emulate_cache(union mips_instruction inst,
u32 *opc,
u32 cause,
struct kvm_run *run,
struct kvm_vcpu *vcpu);
enum emulation_result kvm_mips_emulate_CP0(union mips_instruction inst,
u32 *opc,
u32 cause,
struct kvm_run *run,
struct kvm_vcpu *vcpu);
enum emulation_result kvm_mips_emulate_store(union mips_instruction inst,
u32 cause,
struct kvm_run *run,
struct kvm_vcpu *vcpu);
enum emulation_result kvm_mips_emulate_load(union mips_instruction inst,
u32 cause,
struct kvm_run *run,
struct kvm_vcpu *vcpu);
/* COP0 */

View File

@ -0,0 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _ASM_MIPS_KVM_TYPES_H
#define _ASM_MIPS_KVM_TYPES_H
#define KVM_ARCH_NR_OBJS_PER_MEMORY_CACHE 4
#endif /* _ASM_MIPS_KVM_TYPES_H */

View File

@ -1,31 +0,0 @@
KVM/MIPS Trap & Emulate Release Notes
=====================================
(1) KVM/MIPS should support MIPS32R2 and beyond. It has been tested on the following platforms:
Malta Board with FPGA based 34K
Sigma Designs TangoX board with a 24K based 8654 SoC.
Malta Board with 74K @ 1GHz
(2) Both Guest kernel and Guest Userspace execute in UM.
Guest User address space: 0x00000000 -> 0x40000000
Guest Kernel Unmapped: 0x40000000 -> 0x60000000
Guest Kernel Mapped: 0x60000000 -> 0x80000000
Guest Usermode virtual memory is limited to 1GB.
(2) 16K Page Sizes: Both Host Kernel and Guest Kernel should have the same page size, currently at least 16K.
Note that due to cache aliasing issues, 4K page sizes are NOT supported.
(3) No HugeTLB Support
Both the host kernel and Guest kernel should have the page size set to 16K.
This will be implemented in a future release.
(4) KVM/MIPS does not have support for SMP Guests
Linux-3.7-rc2 based SMP guest hangs due to the following code sequence in the generated TLB handlers:
LL/TLBP/SC. Since the TLBP instruction causes a trap the reservation gets cleared
when we ERET back to the guest. This causes the guest to hang in an infinite loop.
This will be fixed in a future release.
(5) Use Host FPU
Currently KVM/MIPS emulates a 24K CPU without a FPU.
This will be fixed in a future release

View File

@ -37,10 +37,11 @@ choice
config KVM_MIPS_TE
bool "Trap & Emulate"
depends on CPU_MIPS32_R2
help
Use trap and emulate to virtualize 32-bit guests in user mode. This
does not require any special hardware Virtualization support beyond
standard MIPS32/64 r2 or later, but it does require the guest kernel
standard MIPS32 r2 or later, but it does require the guest kernel
to be configured with CONFIG_KVM_GUEST=y so that it resides in the
user address segment.

View File

@ -1262,7 +1262,6 @@ unsigned int kvm_mips_config5_wrmask(struct kvm_vcpu *vcpu)
enum emulation_result kvm_mips_emulate_CP0(union mips_instruction inst,
u32 *opc, u32 cause,
struct kvm_run *run,
struct kvm_vcpu *vcpu)
{
struct mips_coproc *cop0 = vcpu->arch.cop0;
@ -1597,12 +1596,12 @@ enum emulation_result kvm_mips_emulate_CP0(union mips_instruction inst,
enum emulation_result kvm_mips_emulate_store(union mips_instruction inst,
u32 cause,
struct kvm_run *run,
struct kvm_vcpu *vcpu)
{
int r;
enum emulation_result er;
u32 rt;
struct kvm_run *run = vcpu->run;
void *data = run->mmio.data;
unsigned int imme;
unsigned long curr_pc;
@ -1863,7 +1862,7 @@ enum emulation_result kvm_mips_emulate_store(union mips_instruction inst,
vcpu->arch.gprs[rt], *(u64 *)data);
break;
default:
kvm_err("Godson Exteneded GS-Store not yet supported (inst=0x%08x)\n",
kvm_err("Godson Extended GS-Store not yet supported (inst=0x%08x)\n",
inst.word);
break;
}
@ -1896,9 +1895,9 @@ enum emulation_result kvm_mips_emulate_store(union mips_instruction inst,
}
enum emulation_result kvm_mips_emulate_load(union mips_instruction inst,
u32 cause, struct kvm_run *run,
struct kvm_vcpu *vcpu)
u32 cause, struct kvm_vcpu *vcpu)
{
struct kvm_run *run = vcpu->run;
int r;
enum emulation_result er;
unsigned long curr_pc;
@ -2107,7 +2106,7 @@ enum emulation_result kvm_mips_emulate_load(union mips_instruction inst,
vcpu->mmio_needed = 30; /* signed */
break;
default:
kvm_err("Godson Exteneded GS-Load for float not yet supported (inst=0x%08x)\n",
kvm_err("Godson Extended GS-Load for float not yet supported (inst=0x%08x)\n",
inst.word);
break;
}
@ -2128,7 +2127,7 @@ enum emulation_result kvm_mips_emulate_load(union mips_instruction inst,
run->mmio.phys_addr, run->mmio.len, run->mmio.data);
if (!r) {
kvm_mips_complete_mmio_load(vcpu, run);
kvm_mips_complete_mmio_load(vcpu);
vcpu->mmio_needed = 0;
return EMULATE_DONE;
}
@ -2140,7 +2139,6 @@ enum emulation_result kvm_mips_emulate_load(union mips_instruction inst,
static enum emulation_result kvm_mips_guest_cache_op(int (*fn)(unsigned long),
unsigned long curr_pc,
unsigned long addr,
struct kvm_run *run,
struct kvm_vcpu *vcpu,
u32 cause)
{
@ -2168,13 +2166,13 @@ static enum emulation_result kvm_mips_guest_cache_op(int (*fn)(unsigned long),
/* no matching guest TLB */
vcpu->arch.host_cp0_badvaddr = addr;
vcpu->arch.pc = curr_pc;
kvm_mips_emulate_tlbmiss_ld(cause, NULL, run, vcpu);
kvm_mips_emulate_tlbmiss_ld(cause, NULL, vcpu);
return EMULATE_EXCEPT;
case KVM_MIPS_TLBINV:
/* invalid matching guest TLB */
vcpu->arch.host_cp0_badvaddr = addr;
vcpu->arch.pc = curr_pc;
kvm_mips_emulate_tlbinv_ld(cause, NULL, run, vcpu);
kvm_mips_emulate_tlbinv_ld(cause, NULL, vcpu);
return EMULATE_EXCEPT;
default:
break;
@ -2184,7 +2182,6 @@ static enum emulation_result kvm_mips_guest_cache_op(int (*fn)(unsigned long),
enum emulation_result kvm_mips_emulate_cache(union mips_instruction inst,
u32 *opc, u32 cause,
struct kvm_run *run,
struct kvm_vcpu *vcpu)
{
enum emulation_result er = EMULATE_DONE;
@ -2274,7 +2271,7 @@ enum emulation_result kvm_mips_emulate_cache(union mips_instruction inst,
* guest's behalf.
*/
er = kvm_mips_guest_cache_op(protected_writeback_dcache_line,
curr_pc, va, run, vcpu, cause);
curr_pc, va, vcpu, cause);
if (er != EMULATE_DONE)
goto done;
#ifdef CONFIG_KVM_MIPS_DYN_TRANS
@ -2287,11 +2284,11 @@ enum emulation_result kvm_mips_emulate_cache(union mips_instruction inst,
} else if (op_inst == Hit_Invalidate_I) {
/* Perform the icache synchronisation on the guest's behalf */
er = kvm_mips_guest_cache_op(protected_writeback_dcache_line,
curr_pc, va, run, vcpu, cause);
curr_pc, va, vcpu, cause);
if (er != EMULATE_DONE)
goto done;
er = kvm_mips_guest_cache_op(protected_flush_icache_line,
curr_pc, va, run, vcpu, cause);
curr_pc, va, vcpu, cause);
if (er != EMULATE_DONE)
goto done;
@ -2317,7 +2314,6 @@ enum emulation_result kvm_mips_emulate_cache(union mips_instruction inst,
}
enum emulation_result kvm_mips_emulate_inst(u32 cause, u32 *opc,
struct kvm_run *run,
struct kvm_vcpu *vcpu)
{
union mips_instruction inst;
@ -2333,14 +2329,14 @@ enum emulation_result kvm_mips_emulate_inst(u32 cause, u32 *opc,
switch (inst.r_format.opcode) {
case cop0_op:
er = kvm_mips_emulate_CP0(inst, opc, cause, run, vcpu);
er = kvm_mips_emulate_CP0(inst, opc, cause, vcpu);
break;
#ifndef CONFIG_CPU_MIPSR6
case cache_op:
++vcpu->stat.cache_exits;
trace_kvm_exit(vcpu, KVM_TRACE_EXIT_CACHE);
er = kvm_mips_emulate_cache(inst, opc, cause, run, vcpu);
er = kvm_mips_emulate_cache(inst, opc, cause, vcpu);
break;
#else
case spec3_op:
@ -2348,7 +2344,7 @@ enum emulation_result kvm_mips_emulate_inst(u32 cause, u32 *opc,
case cache6_op:
++vcpu->stat.cache_exits;
trace_kvm_exit(vcpu, KVM_TRACE_EXIT_CACHE);
er = kvm_mips_emulate_cache(inst, opc, cause, run,
er = kvm_mips_emulate_cache(inst, opc, cause,
vcpu);
break;
default:
@ -2388,7 +2384,6 @@ long kvm_mips_guest_exception_base(struct kvm_vcpu *vcpu)
enum emulation_result kvm_mips_emulate_syscall(u32 cause,
u32 *opc,
struct kvm_run *run,
struct kvm_vcpu *vcpu)
{
struct mips_coproc *cop0 = vcpu->arch.cop0;
@ -2423,7 +2418,6 @@ enum emulation_result kvm_mips_emulate_syscall(u32 cause,
enum emulation_result kvm_mips_emulate_tlbmiss_ld(u32 cause,
u32 *opc,
struct kvm_run *run,
struct kvm_vcpu *vcpu)
{
struct mips_coproc *cop0 = vcpu->arch.cop0;
@ -2467,7 +2461,6 @@ enum emulation_result kvm_mips_emulate_tlbmiss_ld(u32 cause,
enum emulation_result kvm_mips_emulate_tlbinv_ld(u32 cause,
u32 *opc,
struct kvm_run *run,
struct kvm_vcpu *vcpu)
{
struct mips_coproc *cop0 = vcpu->arch.cop0;
@ -2509,7 +2502,6 @@ enum emulation_result kvm_mips_emulate_tlbinv_ld(u32 cause,
enum emulation_result kvm_mips_emulate_tlbmiss_st(u32 cause,
u32 *opc,
struct kvm_run *run,
struct kvm_vcpu *vcpu)
{
struct mips_coproc *cop0 = vcpu->arch.cop0;
@ -2551,7 +2543,6 @@ enum emulation_result kvm_mips_emulate_tlbmiss_st(u32 cause,
enum emulation_result kvm_mips_emulate_tlbinv_st(u32 cause,
u32 *opc,
struct kvm_run *run,
struct kvm_vcpu *vcpu)
{
struct mips_coproc *cop0 = vcpu->arch.cop0;
@ -2592,7 +2583,6 @@ enum emulation_result kvm_mips_emulate_tlbinv_st(u32 cause,
enum emulation_result kvm_mips_emulate_tlbmod(u32 cause,
u32 *opc,
struct kvm_run *run,
struct kvm_vcpu *vcpu)
{
struct mips_coproc *cop0 = vcpu->arch.cop0;
@ -2632,7 +2622,6 @@ enum emulation_result kvm_mips_emulate_tlbmod(u32 cause,
enum emulation_result kvm_mips_emulate_fpu_exc(u32 cause,
u32 *opc,
struct kvm_run *run,
struct kvm_vcpu *vcpu)
{
struct mips_coproc *cop0 = vcpu->arch.cop0;
@ -2661,7 +2650,6 @@ enum emulation_result kvm_mips_emulate_fpu_exc(u32 cause,
enum emulation_result kvm_mips_emulate_ri_exc(u32 cause,
u32 *opc,
struct kvm_run *run,
struct kvm_vcpu *vcpu)
{
struct mips_coproc *cop0 = vcpu->arch.cop0;
@ -2696,7 +2684,6 @@ enum emulation_result kvm_mips_emulate_ri_exc(u32 cause,
enum emulation_result kvm_mips_emulate_bp_exc(u32 cause,
u32 *opc,
struct kvm_run *run,
struct kvm_vcpu *vcpu)
{
struct mips_coproc *cop0 = vcpu->arch.cop0;
@ -2731,7 +2718,6 @@ enum emulation_result kvm_mips_emulate_bp_exc(u32 cause,
enum emulation_result kvm_mips_emulate_trap_exc(u32 cause,
u32 *opc,
struct kvm_run *run,
struct kvm_vcpu *vcpu)
{
struct mips_coproc *cop0 = vcpu->arch.cop0;
@ -2766,7 +2752,6 @@ enum emulation_result kvm_mips_emulate_trap_exc(u32 cause,
enum emulation_result kvm_mips_emulate_msafpe_exc(u32 cause,
u32 *opc,
struct kvm_run *run,
struct kvm_vcpu *vcpu)
{
struct mips_coproc *cop0 = vcpu->arch.cop0;
@ -2801,7 +2786,6 @@ enum emulation_result kvm_mips_emulate_msafpe_exc(u32 cause,
enum emulation_result kvm_mips_emulate_fpe_exc(u32 cause,
u32 *opc,
struct kvm_run *run,
struct kvm_vcpu *vcpu)
{
struct mips_coproc *cop0 = vcpu->arch.cop0;
@ -2836,7 +2820,6 @@ enum emulation_result kvm_mips_emulate_fpe_exc(u32 cause,
enum emulation_result kvm_mips_emulate_msadis_exc(u32 cause,
u32 *opc,
struct kvm_run *run,
struct kvm_vcpu *vcpu)
{
struct mips_coproc *cop0 = vcpu->arch.cop0;
@ -2870,7 +2853,6 @@ enum emulation_result kvm_mips_emulate_msadis_exc(u32 cause,
}
enum emulation_result kvm_mips_handle_ri(u32 cause, u32 *opc,
struct kvm_run *run,
struct kvm_vcpu *vcpu)
{
struct mips_coproc *cop0 = vcpu->arch.cop0;
@ -2959,12 +2941,12 @@ enum emulation_result kvm_mips_handle_ri(u32 cause, u32 *opc,
* branch target), and pass the RI exception to the guest OS.
*/
vcpu->arch.pc = curr_pc;
return kvm_mips_emulate_ri_exc(cause, opc, run, vcpu);
return kvm_mips_emulate_ri_exc(cause, opc, vcpu);
}
enum emulation_result kvm_mips_complete_mmio_load(struct kvm_vcpu *vcpu,
struct kvm_run *run)
enum emulation_result kvm_mips_complete_mmio_load(struct kvm_vcpu *vcpu)
{
struct kvm_run *run = vcpu->run;
unsigned long *gpr = &vcpu->arch.gprs[vcpu->arch.io_gpr];
enum emulation_result er = EMULATE_DONE;
@ -3107,7 +3089,6 @@ enum emulation_result kvm_mips_complete_mmio_load(struct kvm_vcpu *vcpu,
static enum emulation_result kvm_mips_emulate_exc(u32 cause,
u32 *opc,
struct kvm_run *run,
struct kvm_vcpu *vcpu)
{
u32 exccode = (cause >> CAUSEB_EXCCODE) & 0x1f;
@ -3145,7 +3126,6 @@ static enum emulation_result kvm_mips_emulate_exc(u32 cause,
enum emulation_result kvm_mips_check_privilege(u32 cause,
u32 *opc,
struct kvm_run *run,
struct kvm_vcpu *vcpu)
{
enum emulation_result er = EMULATE_DONE;
@ -3227,7 +3207,7 @@ enum emulation_result kvm_mips_check_privilege(u32 cause,
}
if (er == EMULATE_PRIV_FAIL)
kvm_mips_emulate_exc(cause, opc, run, vcpu);
kvm_mips_emulate_exc(cause, opc, vcpu);
return er;
}
@ -3241,7 +3221,6 @@ enum emulation_result kvm_mips_check_privilege(u32 cause,
*/
enum emulation_result kvm_mips_handle_tlbmiss(u32 cause,
u32 *opc,
struct kvm_run *run,
struct kvm_vcpu *vcpu,
bool write_fault)
{
@ -3265,9 +3244,9 @@ enum emulation_result kvm_mips_handle_tlbmiss(u32 cause,
KVM_ENTRYHI_ASID));
if (index < 0) {
if (exccode == EXCCODE_TLBL) {
er = kvm_mips_emulate_tlbmiss_ld(cause, opc, run, vcpu);
er = kvm_mips_emulate_tlbmiss_ld(cause, opc, vcpu);
} else if (exccode == EXCCODE_TLBS) {
er = kvm_mips_emulate_tlbmiss_st(cause, opc, run, vcpu);
er = kvm_mips_emulate_tlbmiss_st(cause, opc, vcpu);
} else {
kvm_err("%s: invalid exc code: %d\n", __func__,
exccode);
@ -3282,10 +3261,10 @@ enum emulation_result kvm_mips_handle_tlbmiss(u32 cause,
*/
if (!TLB_IS_VALID(*tlb, va)) {
if (exccode == EXCCODE_TLBL) {
er = kvm_mips_emulate_tlbinv_ld(cause, opc, run,
er = kvm_mips_emulate_tlbinv_ld(cause, opc,
vcpu);
} else if (exccode == EXCCODE_TLBS) {
er = kvm_mips_emulate_tlbinv_st(cause, opc, run,
er = kvm_mips_emulate_tlbinv_st(cause, opc,
vcpu);
} else {
kvm_err("%s: invalid exc code: %d\n", __func__,

View File

@ -450,7 +450,6 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
{
struct kvm_run *run = vcpu->run;
int r = -EINTR;
vcpu_load(vcpu);
@ -459,11 +458,11 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
if (vcpu->mmio_needed) {
if (!vcpu->mmio_is_write)
kvm_mips_complete_mmio_load(vcpu, run);
kvm_mips_complete_mmio_load(vcpu);
vcpu->mmio_needed = 0;
}
if (run->immediate_exit)
if (vcpu->run->immediate_exit)
goto out;
lose_fpu(1);
@ -480,7 +479,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
*/
smp_store_mb(vcpu->mode, IN_GUEST_MODE);
r = kvm_mips_callbacks->vcpu_run(run, vcpu);
r = kvm_mips_callbacks->vcpu_run(vcpu);
trace_kvm_out(vcpu);
guest_exit_irqoff();
@ -1236,7 +1235,7 @@ int kvm_mips_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu)
* end up causing an exception to be delivered to the Guest
* Kernel
*/
er = kvm_mips_check_privilege(cause, opc, run, vcpu);
er = kvm_mips_check_privilege(cause, opc, vcpu);
if (er == EMULATE_PRIV_FAIL) {
goto skip_emul;
} else if (er == EMULATE_FAIL) {
@ -1385,7 +1384,7 @@ int kvm_mips_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu)
*/
smp_store_mb(vcpu->mode, IN_GUEST_MODE);
kvm_mips_callbacks->vcpu_reenter(run, vcpu);
kvm_mips_callbacks->vcpu_reenter(vcpu);
/*
* If FPU / MSA are enabled (i.e. the guest's FPU / MSA context

View File

@ -25,41 +25,9 @@
#define KVM_MMU_CACHE_MIN_PAGES 2
#endif
static int mmu_topup_memory_cache(struct kvm_mmu_memory_cache *cache,
int min, int max)
{
void *page;
BUG_ON(max > KVM_NR_MEM_OBJS);
if (cache->nobjs >= min)
return 0;
while (cache->nobjs < max) {
page = (void *)__get_free_page(GFP_KERNEL);
if (!page)
return -ENOMEM;
cache->objects[cache->nobjs++] = page;
}
return 0;
}
static void mmu_free_memory_cache(struct kvm_mmu_memory_cache *mc)
{
while (mc->nobjs)
free_page((unsigned long)mc->objects[--mc->nobjs]);
}
static void *mmu_memory_cache_alloc(struct kvm_mmu_memory_cache *mc)
{
void *p;
BUG_ON(!mc || !mc->nobjs);
p = mc->objects[--mc->nobjs];
return p;
}
void kvm_mmu_free_memory_caches(struct kvm_vcpu *vcpu)
{
mmu_free_memory_cache(&vcpu->arch.mmu_page_cache);
kvm_mmu_free_memory_cache(&vcpu->arch.mmu_page_cache);
}
/**
@ -153,7 +121,7 @@ static pte_t *kvm_mips_walk_pgd(pgd_t *pgd, struct kvm_mmu_memory_cache *cache,
if (!cache)
return NULL;
new_pmd = mmu_memory_cache_alloc(cache);
new_pmd = kvm_mmu_memory_cache_alloc(cache);
pmd_init((unsigned long)new_pmd,
(unsigned long)invalid_pte_table);
pud_populate(NULL, pud, new_pmd);
@ -164,7 +132,7 @@ static pte_t *kvm_mips_walk_pgd(pgd_t *pgd, struct kvm_mmu_memory_cache *cache,
if (!cache)
return NULL;
new_pte = mmu_memory_cache_alloc(cache);
new_pte = kvm_mmu_memory_cache_alloc(cache);
clear_page(new_pte);
pmd_populate_kernel(NULL, pmd, new_pte);
}
@ -711,8 +679,7 @@ static int kvm_mips_map_page(struct kvm_vcpu *vcpu, unsigned long gpa,
goto out;
/* We need a minimum of cached pages ready for page table creation */
err = mmu_topup_memory_cache(memcache, KVM_MMU_CACHE_MIN_PAGES,
KVM_NR_MEM_OBJS);
err = kvm_mmu_topup_memory_cache(memcache, KVM_MMU_CACHE_MIN_PAGES);
if (err)
goto out;
@ -796,8 +763,7 @@ static pte_t *kvm_trap_emul_pte_for_gva(struct kvm_vcpu *vcpu,
int ret;
/* We need a minimum of cached pages ready for page table creation */
ret = mmu_topup_memory_cache(memcache, KVM_MMU_CACHE_MIN_PAGES,
KVM_NR_MEM_OBJS);
ret = kvm_mmu_topup_memory_cache(memcache, KVM_MMU_CACHE_MIN_PAGES);
if (ret)
return NULL;

View File

@ -67,7 +67,6 @@ static int kvm_trap_emul_no_handler(struct kvm_vcpu *vcpu)
static int kvm_trap_emul_handle_cop_unusable(struct kvm_vcpu *vcpu)
{
struct mips_coproc *cop0 = vcpu->arch.cop0;
struct kvm_run *run = vcpu->run;
u32 __user *opc = (u32 __user *) vcpu->arch.pc;
u32 cause = vcpu->arch.host_cp0_cause;
enum emulation_result er = EMULATE_DONE;
@ -81,14 +80,14 @@ static int kvm_trap_emul_handle_cop_unusable(struct kvm_vcpu *vcpu)
* Unusable/no FPU in guest:
* deliver guest COP1 Unusable Exception
*/
er = kvm_mips_emulate_fpu_exc(cause, opc, run, vcpu);
er = kvm_mips_emulate_fpu_exc(cause, opc, vcpu);
} else {
/* Restore FPU state */
kvm_own_fpu(vcpu);
er = EMULATE_DONE;
}
} else {
er = kvm_mips_emulate_inst(cause, opc, run, vcpu);
er = kvm_mips_emulate_inst(cause, opc, vcpu);
}
switch (er) {
@ -97,12 +96,12 @@ static int kvm_trap_emul_handle_cop_unusable(struct kvm_vcpu *vcpu)
break;
case EMULATE_FAIL:
run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
ret = RESUME_HOST;
break;
case EMULATE_WAIT:
run->exit_reason = KVM_EXIT_INTR;
vcpu->run->exit_reason = KVM_EXIT_INTR;
ret = RESUME_HOST;
break;
@ -116,8 +115,7 @@ static int kvm_trap_emul_handle_cop_unusable(struct kvm_vcpu *vcpu)
return ret;
}
static int kvm_mips_bad_load(u32 cause, u32 *opc, struct kvm_run *run,
struct kvm_vcpu *vcpu)
static int kvm_mips_bad_load(u32 cause, u32 *opc, struct kvm_vcpu *vcpu)
{
enum emulation_result er;
union mips_instruction inst;
@ -125,7 +123,7 @@ static int kvm_mips_bad_load(u32 cause, u32 *opc, struct kvm_run *run,
/* A code fetch fault doesn't count as an MMIO */
if (kvm_is_ifetch_fault(&vcpu->arch)) {
run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
return RESUME_HOST;
}
@ -134,23 +132,22 @@ static int kvm_mips_bad_load(u32 cause, u32 *opc, struct kvm_run *run,
opc += 1;
err = kvm_get_badinstr(opc, vcpu, &inst.word);
if (err) {
run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
return RESUME_HOST;
}
/* Emulate the load */
er = kvm_mips_emulate_load(inst, cause, run, vcpu);
er = kvm_mips_emulate_load(inst, cause, vcpu);
if (er == EMULATE_FAIL) {
kvm_err("Emulate load from MMIO space failed\n");
run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
} else {
run->exit_reason = KVM_EXIT_MMIO;
vcpu->run->exit_reason = KVM_EXIT_MMIO;
}
return RESUME_HOST;
}
static int kvm_mips_bad_store(u32 cause, u32 *opc, struct kvm_run *run,
struct kvm_vcpu *vcpu)
static int kvm_mips_bad_store(u32 cause, u32 *opc, struct kvm_vcpu *vcpu)
{
enum emulation_result er;
union mips_instruction inst;
@ -161,34 +158,33 @@ static int kvm_mips_bad_store(u32 cause, u32 *opc, struct kvm_run *run,
opc += 1;
err = kvm_get_badinstr(opc, vcpu, &inst.word);
if (err) {
run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
return RESUME_HOST;
}
/* Emulate the store */
er = kvm_mips_emulate_store(inst, cause, run, vcpu);
er = kvm_mips_emulate_store(inst, cause, vcpu);
if (er == EMULATE_FAIL) {
kvm_err("Emulate store to MMIO space failed\n");
run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
} else {
run->exit_reason = KVM_EXIT_MMIO;
vcpu->run->exit_reason = KVM_EXIT_MMIO;
}
return RESUME_HOST;
}
static int kvm_mips_bad_access(u32 cause, u32 *opc, struct kvm_run *run,
static int kvm_mips_bad_access(u32 cause, u32 *opc,
struct kvm_vcpu *vcpu, bool store)
{
if (store)
return kvm_mips_bad_store(cause, opc, run, vcpu);
return kvm_mips_bad_store(cause, opc, vcpu);
else
return kvm_mips_bad_load(cause, opc, run, vcpu);
return kvm_mips_bad_load(cause, opc, vcpu);
}
static int kvm_trap_emul_handle_tlb_mod(struct kvm_vcpu *vcpu)
{
struct mips_coproc *cop0 = vcpu->arch.cop0;
struct kvm_run *run = vcpu->run;
u32 __user *opc = (u32 __user *) vcpu->arch.pc;
unsigned long badvaddr = vcpu->arch.host_cp0_badvaddr;
u32 cause = vcpu->arch.host_cp0_cause;
@ -212,12 +208,12 @@ static int kvm_trap_emul_handle_tlb_mod(struct kvm_vcpu *vcpu)
* They would indicate stale host TLB entries.
*/
if (unlikely(index < 0)) {
run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
return RESUME_HOST;
}
tlb = vcpu->arch.guest_tlb + index;
if (unlikely(!TLB_IS_VALID(*tlb, badvaddr))) {
run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
return RESUME_HOST;
}
@ -226,23 +222,23 @@ static int kvm_trap_emul_handle_tlb_mod(struct kvm_vcpu *vcpu)
* exception. Relay that on to the guest so it can handle it.
*/
if (!TLB_IS_DIRTY(*tlb, badvaddr)) {
kvm_mips_emulate_tlbmod(cause, opc, run, vcpu);
kvm_mips_emulate_tlbmod(cause, opc, vcpu);
return RESUME_GUEST;
}
if (kvm_mips_handle_mapped_seg_tlb_fault(vcpu, tlb, badvaddr,
true))
/* Not writable, needs handling as MMIO */
return kvm_mips_bad_store(cause, opc, run, vcpu);
return kvm_mips_bad_store(cause, opc, vcpu);
return RESUME_GUEST;
} else if (KVM_GUEST_KSEGX(badvaddr) == KVM_GUEST_KSEG0) {
if (kvm_mips_handle_kseg0_tlb_fault(badvaddr, vcpu, true) < 0)
/* Not writable, needs handling as MMIO */
return kvm_mips_bad_store(cause, opc, run, vcpu);
return kvm_mips_bad_store(cause, opc, vcpu);
return RESUME_GUEST;
} else {
/* host kernel addresses are all handled as MMIO */
return kvm_mips_bad_store(cause, opc, run, vcpu);
return kvm_mips_bad_store(cause, opc, vcpu);
}
}
@ -276,7 +272,7 @@ static int kvm_trap_emul_handle_tlb_miss(struct kvm_vcpu *vcpu, bool store)
* into the shadow host TLB
*/
er = kvm_mips_handle_tlbmiss(cause, opc, run, vcpu, store);
er = kvm_mips_handle_tlbmiss(cause, opc, vcpu, store);
if (er == EMULATE_DONE)
ret = RESUME_GUEST;
else {
@ -289,14 +285,14 @@ static int kvm_trap_emul_handle_tlb_miss(struct kvm_vcpu *vcpu, bool store)
* not expect to ever get them
*/
if (kvm_mips_handle_kseg0_tlb_fault(badvaddr, vcpu, store) < 0)
ret = kvm_mips_bad_access(cause, opc, run, vcpu, store);
ret = kvm_mips_bad_access(cause, opc, vcpu, store);
} else if (KVM_GUEST_KERNEL_MODE(vcpu)
&& (KSEGX(badvaddr) == CKSEG0 || KSEGX(badvaddr) == CKSEG1)) {
/*
* With EVA we may get a TLB exception instead of an address
* error when the guest performs MMIO to KSeg1 addresses.
*/
ret = kvm_mips_bad_access(cause, opc, run, vcpu, store);
ret = kvm_mips_bad_access(cause, opc, vcpu, store);
} else {
kvm_err("Illegal TLB %s fault address , cause %#x, PC: %p, BadVaddr: %#lx\n",
store ? "ST" : "LD", cause, opc, badvaddr);
@ -320,7 +316,6 @@ static int kvm_trap_emul_handle_tlb_ld_miss(struct kvm_vcpu *vcpu)
static int kvm_trap_emul_handle_addr_err_st(struct kvm_vcpu *vcpu)
{
struct kvm_run *run = vcpu->run;
u32 __user *opc = (u32 __user *) vcpu->arch.pc;
unsigned long badvaddr = vcpu->arch.host_cp0_badvaddr;
u32 cause = vcpu->arch.host_cp0_cause;
@ -328,11 +323,11 @@ static int kvm_trap_emul_handle_addr_err_st(struct kvm_vcpu *vcpu)
if (KVM_GUEST_KERNEL_MODE(vcpu)
&& (KSEGX(badvaddr) == CKSEG0 || KSEGX(badvaddr) == CKSEG1)) {
ret = kvm_mips_bad_store(cause, opc, run, vcpu);
ret = kvm_mips_bad_store(cause, opc, vcpu);
} else {
kvm_err("Address Error (STORE): cause %#x, PC: %p, BadVaddr: %#lx\n",
cause, opc, badvaddr);
run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
ret = RESUME_HOST;
}
return ret;
@ -340,18 +335,17 @@ static int kvm_trap_emul_handle_addr_err_st(struct kvm_vcpu *vcpu)
static int kvm_trap_emul_handle_addr_err_ld(struct kvm_vcpu *vcpu)
{
struct kvm_run *run = vcpu->run;
u32 __user *opc = (u32 __user *) vcpu->arch.pc;
unsigned long badvaddr = vcpu->arch.host_cp0_badvaddr;
u32 cause = vcpu->arch.host_cp0_cause;
int ret = RESUME_GUEST;
if (KSEGX(badvaddr) == CKSEG0 || KSEGX(badvaddr) == CKSEG1) {
ret = kvm_mips_bad_load(cause, opc, run, vcpu);
ret = kvm_mips_bad_load(cause, opc, vcpu);
} else {
kvm_err("Address Error (LOAD): cause %#x, PC: %p, BadVaddr: %#lx\n",
cause, opc, badvaddr);
run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
ret = RESUME_HOST;
}
return ret;
@ -359,17 +353,16 @@ static int kvm_trap_emul_handle_addr_err_ld(struct kvm_vcpu *vcpu)
static int kvm_trap_emul_handle_syscall(struct kvm_vcpu *vcpu)
{
struct kvm_run *run = vcpu->run;
u32 __user *opc = (u32 __user *) vcpu->arch.pc;
u32 cause = vcpu->arch.host_cp0_cause;
enum emulation_result er = EMULATE_DONE;
int ret = RESUME_GUEST;
er = kvm_mips_emulate_syscall(cause, opc, run, vcpu);
er = kvm_mips_emulate_syscall(cause, opc, vcpu);
if (er == EMULATE_DONE)
ret = RESUME_GUEST;
else {
run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
ret = RESUME_HOST;
}
return ret;
@ -377,17 +370,16 @@ static int kvm_trap_emul_handle_syscall(struct kvm_vcpu *vcpu)
static int kvm_trap_emul_handle_res_inst(struct kvm_vcpu *vcpu)
{
struct kvm_run *run = vcpu->run;
u32 __user *opc = (u32 __user *) vcpu->arch.pc;
u32 cause = vcpu->arch.host_cp0_cause;
enum emulation_result er = EMULATE_DONE;
int ret = RESUME_GUEST;
er = kvm_mips_handle_ri(cause, opc, run, vcpu);
er = kvm_mips_handle_ri(cause, opc, vcpu);
if (er == EMULATE_DONE)
ret = RESUME_GUEST;
else {
run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
ret = RESUME_HOST;
}
return ret;
@ -395,17 +387,16 @@ static int kvm_trap_emul_handle_res_inst(struct kvm_vcpu *vcpu)
static int kvm_trap_emul_handle_break(struct kvm_vcpu *vcpu)
{
struct kvm_run *run = vcpu->run;
u32 __user *opc = (u32 __user *) vcpu->arch.pc;
u32 cause = vcpu->arch.host_cp0_cause;
enum emulation_result er = EMULATE_DONE;
int ret = RESUME_GUEST;
er = kvm_mips_emulate_bp_exc(cause, opc, run, vcpu);
er = kvm_mips_emulate_bp_exc(cause, opc, vcpu);
if (er == EMULATE_DONE)
ret = RESUME_GUEST;
else {
run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
ret = RESUME_HOST;
}
return ret;
@ -413,17 +404,16 @@ static int kvm_trap_emul_handle_break(struct kvm_vcpu *vcpu)
static int kvm_trap_emul_handle_trap(struct kvm_vcpu *vcpu)
{
struct kvm_run *run = vcpu->run;
u32 __user *opc = (u32 __user *)vcpu->arch.pc;
u32 cause = vcpu->arch.host_cp0_cause;
enum emulation_result er = EMULATE_DONE;
int ret = RESUME_GUEST;
er = kvm_mips_emulate_trap_exc(cause, opc, run, vcpu);
er = kvm_mips_emulate_trap_exc(cause, opc, vcpu);
if (er == EMULATE_DONE) {
ret = RESUME_GUEST;
} else {
run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
ret = RESUME_HOST;
}
return ret;
@ -431,17 +421,16 @@ static int kvm_trap_emul_handle_trap(struct kvm_vcpu *vcpu)
static int kvm_trap_emul_handle_msa_fpe(struct kvm_vcpu *vcpu)
{
struct kvm_run *run = vcpu->run;
u32 __user *opc = (u32 __user *)vcpu->arch.pc;
u32 cause = vcpu->arch.host_cp0_cause;
enum emulation_result er = EMULATE_DONE;
int ret = RESUME_GUEST;
er = kvm_mips_emulate_msafpe_exc(cause, opc, run, vcpu);
er = kvm_mips_emulate_msafpe_exc(cause, opc, vcpu);
if (er == EMULATE_DONE) {
ret = RESUME_GUEST;
} else {
run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
ret = RESUME_HOST;
}
return ret;
@ -449,17 +438,16 @@ static int kvm_trap_emul_handle_msa_fpe(struct kvm_vcpu *vcpu)
static int kvm_trap_emul_handle_fpe(struct kvm_vcpu *vcpu)
{
struct kvm_run *run = vcpu->run;
u32 __user *opc = (u32 __user *)vcpu->arch.pc;
u32 cause = vcpu->arch.host_cp0_cause;
enum emulation_result er = EMULATE_DONE;
int ret = RESUME_GUEST;
er = kvm_mips_emulate_fpe_exc(cause, opc, run, vcpu);
er = kvm_mips_emulate_fpe_exc(cause, opc, vcpu);
if (er == EMULATE_DONE) {
ret = RESUME_GUEST;
} else {
run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
ret = RESUME_HOST;
}
return ret;
@ -474,7 +462,6 @@ static int kvm_trap_emul_handle_fpe(struct kvm_vcpu *vcpu)
static int kvm_trap_emul_handle_msa_disabled(struct kvm_vcpu *vcpu)
{
struct mips_coproc *cop0 = vcpu->arch.cop0;
struct kvm_run *run = vcpu->run;
u32 __user *opc = (u32 __user *) vcpu->arch.pc;
u32 cause = vcpu->arch.host_cp0_cause;
enum emulation_result er = EMULATE_DONE;
@ -486,10 +473,10 @@ static int kvm_trap_emul_handle_msa_disabled(struct kvm_vcpu *vcpu)
* No MSA in guest, or FPU enabled and not in FR=1 mode,
* guest reserved instruction exception
*/
er = kvm_mips_emulate_ri_exc(cause, opc, run, vcpu);
er = kvm_mips_emulate_ri_exc(cause, opc, vcpu);
} else if (!(kvm_read_c0_guest_config5(cop0) & MIPS_CONF5_MSAEN)) {
/* MSA disabled by guest, guest MSA disabled exception */
er = kvm_mips_emulate_msadis_exc(cause, opc, run, vcpu);
er = kvm_mips_emulate_msadis_exc(cause, opc, vcpu);
} else {
/* Restore MSA/FPU state */
kvm_own_msa(vcpu);
@ -502,7 +489,7 @@ static int kvm_trap_emul_handle_msa_disabled(struct kvm_vcpu *vcpu)
break;
case EMULATE_FAIL:
run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
ret = RESUME_HOST;
break;
@ -1184,8 +1171,7 @@ void kvm_trap_emul_gva_lockless_end(struct kvm_vcpu *vcpu)
local_irq_enable();
}
static void kvm_trap_emul_vcpu_reenter(struct kvm_run *run,
struct kvm_vcpu *vcpu)
static void kvm_trap_emul_vcpu_reenter(struct kvm_vcpu *vcpu)
{
struct mm_struct *kern_mm = &vcpu->arch.guest_kernel_mm;
struct mm_struct *user_mm = &vcpu->arch.guest_user_mm;
@ -1228,7 +1214,7 @@ static void kvm_trap_emul_vcpu_reenter(struct kvm_run *run,
check_mmu_context(mm);
}
static int kvm_trap_emul_vcpu_run(struct kvm_run *run, struct kvm_vcpu *vcpu)
static int kvm_trap_emul_vcpu_run(struct kvm_vcpu *vcpu)
{
int cpu = smp_processor_id();
int r;
@ -1237,7 +1223,7 @@ static int kvm_trap_emul_vcpu_run(struct kvm_run *run, struct kvm_vcpu *vcpu)
kvm_mips_deliver_interrupts(vcpu,
kvm_read_c0_guest_cause(vcpu->arch.cop0));
kvm_trap_emul_vcpu_reenter(run, vcpu);
kvm_trap_emul_vcpu_reenter(vcpu);
/*
* We use user accessors to access guest memory, but we don't want to
@ -1255,7 +1241,7 @@ static int kvm_trap_emul_vcpu_run(struct kvm_run *run, struct kvm_vcpu *vcpu)
*/
kvm_mips_suspend_mm(cpu);
r = vcpu->arch.vcpu_run(run, vcpu);
r = vcpu->arch.vcpu_run(vcpu->run, vcpu);
/* We may have migrated while handling guest exits */
cpu = smp_processor_id();

View File

@ -874,7 +874,6 @@ static void kvm_write_maari(struct kvm_vcpu *vcpu, unsigned long val)
static enum emulation_result kvm_vz_gpsi_cop0(union mips_instruction inst,
u32 *opc, u32 cause,
struct kvm_run *run,
struct kvm_vcpu *vcpu)
{
struct mips_coproc *cop0 = vcpu->arch.cop0;
@ -1074,7 +1073,6 @@ static enum emulation_result kvm_vz_gpsi_cop0(union mips_instruction inst,
static enum emulation_result kvm_vz_gpsi_cache(union mips_instruction inst,
u32 *opc, u32 cause,
struct kvm_run *run,
struct kvm_vcpu *vcpu)
{
enum emulation_result er = EMULATE_DONE;
@ -1217,7 +1215,6 @@ static enum emulation_result kvm_trap_vz_handle_gpsi(u32 cause, u32 *opc,
{
enum emulation_result er = EMULATE_DONE;
struct kvm_vcpu_arch *arch = &vcpu->arch;
struct kvm_run *run = vcpu->run;
union mips_instruction inst;
int rd, rt, sel;
int err;
@ -1233,12 +1230,12 @@ static enum emulation_result kvm_trap_vz_handle_gpsi(u32 cause, u32 *opc,
switch (inst.r_format.opcode) {
case cop0_op:
er = kvm_vz_gpsi_cop0(inst, opc, cause, run, vcpu);
er = kvm_vz_gpsi_cop0(inst, opc, cause, vcpu);
break;
#ifndef CONFIG_CPU_MIPSR6
case cache_op:
trace_kvm_exit(vcpu, KVM_TRACE_EXIT_CACHE);
er = kvm_vz_gpsi_cache(inst, opc, cause, run, vcpu);
er = kvm_vz_gpsi_cache(inst, opc, cause, vcpu);
break;
#endif
#ifdef CONFIG_CPU_LOONGSON64
@ -1251,7 +1248,7 @@ static enum emulation_result kvm_trap_vz_handle_gpsi(u32 cause, u32 *opc,
#ifdef CONFIG_CPU_MIPSR6
case cache6_op:
trace_kvm_exit(vcpu, KVM_TRACE_EXIT_CACHE);
er = kvm_vz_gpsi_cache(inst, opc, cause, run, vcpu);
er = kvm_vz_gpsi_cache(inst, opc, cause, vcpu);
break;
#endif
case rdhwr_op:
@ -1553,7 +1550,6 @@ static int kvm_trap_vz_handle_guest_exit(struct kvm_vcpu *vcpu)
*/
static int kvm_trap_vz_handle_cop_unusable(struct kvm_vcpu *vcpu)
{
struct kvm_run *run = vcpu->run;
u32 cause = vcpu->arch.host_cp0_cause;
enum emulation_result er = EMULATE_FAIL;
int ret = RESUME_GUEST;
@ -1581,7 +1577,7 @@ static int kvm_trap_vz_handle_cop_unusable(struct kvm_vcpu *vcpu)
break;
case EMULATE_FAIL:
run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
ret = RESUME_HOST;
break;
@ -1600,8 +1596,6 @@ static int kvm_trap_vz_handle_cop_unusable(struct kvm_vcpu *vcpu)
*/
static int kvm_trap_vz_handle_msa_disabled(struct kvm_vcpu *vcpu)
{
struct kvm_run *run = vcpu->run;
/*
* If MSA not present or not exposed to guest or FR=0, the MSA operation
* should have been treated as a reserved instruction!
@ -1612,7 +1606,7 @@ static int kvm_trap_vz_handle_msa_disabled(struct kvm_vcpu *vcpu)
(read_gc0_status() & (ST0_CU1 | ST0_FR)) == ST0_CU1 ||
!(read_gc0_config5() & MIPS_CONF5_MSAEN) ||
vcpu->arch.aux_inuse & KVM_MIPS_AUX_MSA) {
run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
return RESUME_HOST;
}
@ -1648,7 +1642,7 @@ static int kvm_trap_vz_handle_tlb_ld_miss(struct kvm_vcpu *vcpu)
}
/* Treat as MMIO */
er = kvm_mips_emulate_load(inst, cause, run, vcpu);
er = kvm_mips_emulate_load(inst, cause, vcpu);
if (er == EMULATE_FAIL) {
kvm_err("Guest Emulate Load from MMIO space failed: PC: %p, BadVaddr: %#lx\n",
opc, badvaddr);
@ -1695,7 +1689,7 @@ static int kvm_trap_vz_handle_tlb_st_miss(struct kvm_vcpu *vcpu)
}
/* Treat as MMIO */
er = kvm_mips_emulate_store(inst, cause, run, vcpu);
er = kvm_mips_emulate_store(inst, cause, vcpu);
if (er == EMULATE_FAIL) {
kvm_err("Guest Emulate Store to MMIO space failed: PC: %p, BadVaddr: %#lx\n",
opc, badvaddr);
@ -3242,7 +3236,7 @@ static void kvm_vz_flush_shadow_memslot(struct kvm *kvm,
kvm_vz_flush_shadow_all(kvm);
}
static void kvm_vz_vcpu_reenter(struct kvm_run *run, struct kvm_vcpu *vcpu)
static void kvm_vz_vcpu_reenter(struct kvm_vcpu *vcpu)
{
int cpu = smp_processor_id();
int preserve_guest_tlb;
@ -3258,7 +3252,7 @@ static void kvm_vz_vcpu_reenter(struct kvm_run *run, struct kvm_vcpu *vcpu)
kvm_vz_vcpu_load_wired(vcpu);
}
static int kvm_vz_vcpu_run(struct kvm_run *run, struct kvm_vcpu *vcpu)
static int kvm_vz_vcpu_run(struct kvm_vcpu *vcpu)
{
int cpu = smp_processor_id();
int r;
@ -3271,7 +3265,7 @@ static int kvm_vz_vcpu_run(struct kvm_run *run, struct kvm_vcpu *vcpu)
kvm_vz_vcpu_load_tlb(vcpu, cpu);
kvm_vz_vcpu_load_wired(vcpu);
r = vcpu->arch.vcpu_run(run, vcpu);
r = vcpu->arch.vcpu_run(vcpu->run, vcpu);
kvm_vz_vcpu_save_wired(vcpu);

View File

@ -4,6 +4,7 @@ generated-y += syscall_table_64.h
generated-y += syscall_table_c32.h
generated-y += syscall_table_spu.h
generic-y += export.h
generic-y += kvm_types.h
generic-y += local64.h
generic-y += mcs_spinlock.h
generic-y += vtime.h

View File

@ -6,5 +6,6 @@ generated-y += unistd_nr.h
generic-y += asm-offsets.h
generic-y += export.h
generic-y += kvm_types.h
generic-y += local64.h
generic-y += mcs_spinlock.h

View File

@ -298,10 +298,8 @@ struct diag26c_mac_resp {
union diag318_info {
unsigned long val;
struct {
unsigned int cpnc : 8;
unsigned int cpvc_linux : 24;
unsigned char cpvc_distro[3];
unsigned char zero;
unsigned long cpnc : 8;
unsigned long cpvc : 56;
};
};

View File

@ -260,7 +260,8 @@ struct kvm_s390_sie_block {
__u32 scaol; /* 0x0064 */
__u8 sdf; /* 0x0068 */
__u8 epdx; /* 0x0069 */
__u8 reserved6a[2]; /* 0x006a */
__u8 cpnc; /* 0x006a */
__u8 reserved6b; /* 0x006b */
__u32 todpr; /* 0x006c */
#define GISA_FORMAT1 0x00000001
__u32 gd; /* 0x0070 */
@ -745,6 +746,7 @@ struct kvm_vcpu_arch {
bool gs_enabled;
bool skey_enabled;
struct kvm_s390_pv_vcpu pv;
union diag318_info diag318_info;
};
struct kvm_vm_stat {

View File

@ -231,11 +231,13 @@ struct kvm_guest_debug_arch {
#define KVM_SYNC_GSCB (1UL << 9)
#define KVM_SYNC_BPBC (1UL << 10)
#define KVM_SYNC_ETOKEN (1UL << 11)
#define KVM_SYNC_DIAG318 (1UL << 12)
#define KVM_SYNC_S390_VALID_FIELDS \
(KVM_SYNC_PREFIX | KVM_SYNC_GPRS | KVM_SYNC_ACRS | KVM_SYNC_CRS | \
KVM_SYNC_ARCH0 | KVM_SYNC_PFAULT | KVM_SYNC_VRS | KVM_SYNC_RICCB | \
KVM_SYNC_FPRS | KVM_SYNC_GSCB | KVM_SYNC_BPBC | KVM_SYNC_ETOKEN)
KVM_SYNC_FPRS | KVM_SYNC_GSCB | KVM_SYNC_BPBC | KVM_SYNC_ETOKEN | \
KVM_SYNC_DIAG318)
/* length and alignment of the sdnx as a power of two */
#define SDNXC 8
@ -264,7 +266,8 @@ struct kvm_sync_regs {
__u8 reserved2 : 7;
__u8 padding1[51]; /* riccb needs to be 64byte aligned */
__u8 riccb[64]; /* runtime instrumentation controls block */
__u8 padding2[192]; /* sdnx needs to be 256byte aligned */
__u64 diag318; /* diagnose 0x318 info */
__u8 padding2[184]; /* sdnx needs to be 256byte aligned */
union {
__u8 sdnx[SDNXL]; /* state description annex */
struct {

View File

@ -1021,8 +1021,7 @@ static void __init setup_control_program_code(void)
{
union diag318_info diag318_info = {
.cpnc = CPNC_LINUX,
.cpvc_linux = 0,
.cpvc_distro = {0},
.cpvc = 0,
};
if (!sclp.has_diag318)

View File

@ -545,6 +545,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
case KVM_CAP_S390_AIS_MIGRATION:
case KVM_CAP_S390_VCPU_RESETS:
case KVM_CAP_SET_GUEST_DEBUG:
case KVM_CAP_S390_DIAG318:
r = 1;
break;
case KVM_CAP_S390_HPAGE_1M:
@ -3267,7 +3268,8 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
KVM_SYNC_ACRS |
KVM_SYNC_CRS |
KVM_SYNC_ARCH0 |
KVM_SYNC_PFAULT;
KVM_SYNC_PFAULT |
KVM_SYNC_DIAG318;
kvm_s390_set_prefix(vcpu, 0);
if (test_kvm_facility(vcpu->kvm, 64))
vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
@ -3562,6 +3564,7 @@ static void kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
vcpu->arch.sie_block->pp = 0;
vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
vcpu->arch.sie_block->todpr = 0;
vcpu->arch.sie_block->cpnc = 0;
}
}
@ -3579,6 +3582,7 @@ static void kvm_arch_vcpu_ioctl_clear_reset(struct kvm_vcpu *vcpu)
regs->etoken = 0;
regs->etoken_extension = 0;
regs->diag318 = 0;
}
int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
@ -3954,33 +3958,31 @@ bool kvm_arch_can_dequeue_async_page_present(struct kvm_vcpu *vcpu)
return true;
}
static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
static bool kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
{
hva_t hva;
struct kvm_arch_async_pf arch;
int rc;
if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
return 0;
return false;
if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
vcpu->arch.pfault_compare)
return 0;
return false;
if (psw_extint_disabled(vcpu))
return 0;
return false;
if (kvm_s390_vcpu_has_irq(vcpu, 0))
return 0;
return false;
if (!(vcpu->arch.sie_block->gcr[0] & CR0_SERVICE_SIGNAL_SUBMASK))
return 0;
return false;
if (!vcpu->arch.gmap->pfault_enabled)
return 0;
return false;
hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
hva += current->thread.gmap_addr & ~PAGE_MASK;
if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
return 0;
return false;
rc = kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
return rc;
return kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
}
static int vcpu_pre_run(struct kvm_vcpu *vcpu)
@ -4175,8 +4177,9 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
return rc;
}
static void sync_regs_fmt2(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
static void sync_regs_fmt2(struct kvm_vcpu *vcpu)
{
struct kvm_run *kvm_run = vcpu->run;
struct runtime_instr_cb *riccb;
struct gs_cb *gscb;
@ -4196,6 +4199,10 @@ static void sync_regs_fmt2(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
kvm_clear_async_pf_completion_queue(vcpu);
}
if (kvm_run->kvm_dirty_regs & KVM_SYNC_DIAG318) {
vcpu->arch.diag318_info.val = kvm_run->s.regs.diag318;
vcpu->arch.sie_block->cpnc = vcpu->arch.diag318_info.cpnc;
}
/*
* If userspace sets the riccb (e.g. after migration) to a valid state,
* we should enable RI here instead of doing the lazy enablement.
@ -4242,8 +4249,10 @@ static void sync_regs_fmt2(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
/* SIE will load etoken directly from SDNX and therefore kvm_run */
}
static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
static void sync_regs(struct kvm_vcpu *vcpu)
{
struct kvm_run *kvm_run = vcpu->run;
if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
@ -4272,7 +4281,7 @@ static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
/* Sync fmt2 only data */
if (likely(!kvm_s390_pv_cpu_is_protected(vcpu))) {
sync_regs_fmt2(vcpu, kvm_run);
sync_regs_fmt2(vcpu);
} else {
/*
* In several places we have to modify our internal view to
@ -4291,12 +4300,15 @@ static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
kvm_run->kvm_dirty_regs = 0;
}
static void store_regs_fmt2(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
static void store_regs_fmt2(struct kvm_vcpu *vcpu)
{
struct kvm_run *kvm_run = vcpu->run;
kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
kvm_run->s.regs.bpbc = (vcpu->arch.sie_block->fpf & FPF_BPBC) == FPF_BPBC;
kvm_run->s.regs.diag318 = vcpu->arch.diag318_info.val;
if (MACHINE_HAS_GS) {
__ctl_set_bit(2, 4);
if (vcpu->arch.gs_enabled)
@ -4312,8 +4324,10 @@ static void store_regs_fmt2(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
/* SIE will save etoken directly into SDNX and therefore kvm_run */
}
static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
static void store_regs(struct kvm_vcpu *vcpu)
{
struct kvm_run *kvm_run = vcpu->run;
kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
@ -4332,7 +4346,7 @@ static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
if (likely(!kvm_s390_pv_cpu_is_protected(vcpu)))
store_regs_fmt2(vcpu, kvm_run);
store_regs_fmt2(vcpu);
}
int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
@ -4370,7 +4384,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
goto out;
}
sync_regs(vcpu, kvm_run);
sync_regs(vcpu);
enable_cpu_timer_accounting(vcpu);
might_fault();
@ -4392,7 +4406,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
}
disable_cpu_timer_accounting(vcpu);
store_regs(vcpu, kvm_run);
store_regs(vcpu);
kvm_sigset_deactivate(vcpu);

View File

@ -548,6 +548,7 @@ static int shadow_scb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
scb_s->ecd |= scb_o->ecd & ECD_ETOKENF;
scb_s->hpid = HPID_VSIE;
scb_s->cpnc = scb_o->cpnc;
prepare_ibc(vcpu, vsie_page);
rc = shadow_crycb(vcpu, vsie_page);

View File

@ -803,6 +803,7 @@ config KVM_GUEST
depends on PARAVIRT
select PARAVIRT_CLOCK
select ARCH_CPUIDLE_HALTPOLL
select X86_HV_CALLBACK_VECTOR
default y
help
This option enables various optimizations for running under the KVM

View File

@ -67,12 +67,12 @@ static inline void kvm_set_cpu_l1tf_flush_l1d(void)
__this_cpu_write(irq_stat.kvm_cpu_l1tf_flush_l1d, 1);
}
static inline void kvm_clear_cpu_l1tf_flush_l1d(void)
static __always_inline void kvm_clear_cpu_l1tf_flush_l1d(void)
{
__this_cpu_write(irq_stat.kvm_cpu_l1tf_flush_l1d, 0);
}
static inline bool kvm_get_cpu_l1tf_flush_l1d(void)
static __always_inline bool kvm_get_cpu_l1tf_flush_l1d(void)
{
return __this_cpu_read(irq_stat.kvm_cpu_l1tf_flush_l1d);
}

View File

@ -632,6 +632,10 @@ DECLARE_IDTENTRY_SYSVEC(HYPERVISOR_CALLBACK_VECTOR, sysvec_acrn_hv_callback);
DECLARE_IDTENTRY_SYSVEC(HYPERVISOR_CALLBACK_VECTOR, sysvec_xen_hvm_callback);
#endif
#ifdef CONFIG_KVM_GUEST
DECLARE_IDTENTRY_SYSVEC(HYPERVISOR_CALLBACK_VECTOR, sysvec_kvm_asyncpf_interrupt);
#endif
#undef X86_TRAP_OTHER
#endif

View File

@ -193,8 +193,6 @@ struct x86_exception;
enum x86_intercept;
enum x86_intercept_stage;
#define KVM_NR_MEM_OBJS 40
#define KVM_NR_DB_REGS 4
#define DR6_BD (1 << 13)
@ -245,15 +243,6 @@ enum x86_intercept_stage;
struct kvm_kernel_irq_routing_entry;
/*
* We don't want allocation failures within the mmu code, so we preallocate
* enough memory for a single page fault in a cache.
*/
struct kvm_mmu_memory_cache {
int nobjs;
void *objects[KVM_NR_MEM_OBJS];
};
/*
* the pages used as guest page table on soft mmu are tracked by
* kvm_memory_slot.arch.gfn_track which is 16 bits, so the role bits used
@ -322,43 +311,6 @@ struct kvm_rmap_head {
unsigned long val;
};
struct kvm_mmu_page {
struct list_head link;
struct hlist_node hash_link;
struct list_head lpage_disallowed_link;
bool unsync;
u8 mmu_valid_gen;
bool mmio_cached;
bool lpage_disallowed; /* Can't be replaced by an equiv large page */
/*
* The following two entries are used to key the shadow page in the
* hash table.
*/
union kvm_mmu_page_role role;
gfn_t gfn;
u64 *spt;
/* hold the gfn of each spte inside spt */
gfn_t *gfns;
int root_count; /* Currently serving as active root */
unsigned int unsync_children;
struct kvm_rmap_head parent_ptes; /* rmap pointers to parent sptes */
DECLARE_BITMAP(unsync_child_bitmap, 512);
#ifdef CONFIG_X86_32
/*
* Used out of the mmu-lock to avoid reading spte values while an
* update is in progress; see the comments in __get_spte_lockless().
*/
int clear_spte_count;
#endif
/* Number of writes since the last time traversal visited this page. */
atomic_t write_flooding_count;
};
struct kvm_pio_request {
unsigned long linear_rip;
unsigned long count;
@ -384,6 +336,8 @@ struct kvm_mmu_root_info {
#define KVM_MMU_NUM_PREV_ROOTS 3
struct kvm_mmu_page;
/*
* x86 supports 4 paging modes (5-level 64-bit, 4-level 64-bit, 3-level 32-bit,
* and 2-level 32-bit). The kvm_mmu structure abstracts the details of the
@ -580,6 +534,7 @@ struct kvm_vcpu_arch {
unsigned long cr3;
unsigned long cr4;
unsigned long cr4_guest_owned_bits;
unsigned long cr4_guest_rsvd_bits;
unsigned long cr8;
u32 host_pkru;
u32 pkru;
@ -635,7 +590,8 @@ struct kvm_vcpu_arch {
struct kvm_mmu *walk_mmu;
struct kvm_mmu_memory_cache mmu_pte_list_desc_cache;
struct kvm_mmu_memory_cache mmu_page_cache;
struct kvm_mmu_memory_cache mmu_shadow_page_cache;
struct kvm_mmu_memory_cache mmu_gfn_array_cache;
struct kvm_mmu_memory_cache mmu_page_header_cache;
/*
@ -683,7 +639,7 @@ struct kvm_vcpu_arch {
struct kvm_cpuid_entry2 cpuid_entries[KVM_MAX_CPUID_ENTRIES];
int maxphyaddr;
int tdp_level;
int max_tdp_level;
/* emulate context */
@ -827,6 +783,9 @@ struct kvm_vcpu_arch {
/* Flush the L1 Data cache for L1TF mitigation on VMENTER */
bool l1tf_flush_l1d;
/* Host CPU on which VM-entry was most recently attempted */
unsigned int last_vmentry_cpu;
/* AMD MSRC001_0015 Hardware Configuration */
u64 msr_hwcr;
};
@ -1083,7 +1042,7 @@ struct kvm_x86_ops {
void (*hardware_unsetup)(void);
bool (*cpu_has_accelerated_tpr)(void);
bool (*has_emulated_msr)(u32 index);
void (*cpuid_update)(struct kvm_vcpu *vcpu);
void (*vcpu_after_set_cpuid)(struct kvm_vcpu *vcpu);
unsigned int vm_size;
int (*vm_init)(struct kvm *kvm);
@ -1098,7 +1057,7 @@ struct kvm_x86_ops {
void (*vcpu_load)(struct kvm_vcpu *vcpu, int cpu);
void (*vcpu_put)(struct kvm_vcpu *vcpu);
void (*update_bp_intercept)(struct kvm_vcpu *vcpu);
void (*update_exception_bitmap)(struct kvm_vcpu *vcpu);
int (*get_msr)(struct kvm_vcpu *vcpu, struct msr_data *msr);
int (*set_msr)(struct kvm_vcpu *vcpu, struct msr_data *msr);
u64 (*get_segment_base)(struct kvm_vcpu *vcpu, int seg);
@ -1174,10 +1133,10 @@ struct kvm_x86_ops {
int (*sync_pir_to_irr)(struct kvm_vcpu *vcpu);
int (*set_tss_addr)(struct kvm *kvm, unsigned int addr);
int (*set_identity_map_addr)(struct kvm *kvm, u64 ident_addr);
int (*get_tdp_level)(struct kvm_vcpu *vcpu);
u64 (*get_mt_mask)(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio);
void (*load_mmu_pgd)(struct kvm_vcpu *vcpu, unsigned long cr3);
void (*load_mmu_pgd)(struct kvm_vcpu *vcpu, unsigned long pgd,
int pgd_level);
bool (*has_wbinvd_exit)(void);
@ -1220,7 +1179,6 @@ struct kvm_x86_ops {
void (*enable_log_dirty_pt_masked)(struct kvm *kvm,
struct kvm_memory_slot *slot,
gfn_t offset, unsigned long mask);
int (*write_log_dirty)(struct kvm_vcpu *vcpu, gpa_t l2_gpa);
/* pmu operations of sub-arch */
const struct kvm_pmu_ops *pmu_ops;
@ -1281,6 +1239,7 @@ struct kvm_x86_nested_ops {
struct kvm_nested_state __user *user_kvm_nested_state,
struct kvm_nested_state *kvm_state);
bool (*get_vmcs12_pages)(struct kvm_vcpu *vcpu);
int (*write_log_dirty)(struct kvm_vcpu *vcpu, gpa_t l2_gpa);
int (*enable_evmcs)(struct kvm_vcpu *vcpu,
uint16_t *vmcs_version);
@ -1304,7 +1263,7 @@ struct kvm_arch_async_pf {
};
extern u64 __read_mostly host_efer;
extern bool __read_mostly allow_smaller_maxphyaddr;
extern struct kvm_x86_ops kvm_x86_ops;
#define __KVM_HAVE_ARCH_VM_ALLOC
@ -1549,20 +1508,8 @@ void kvm_mmu_invpcid_gva(struct kvm_vcpu *vcpu, gva_t gva, unsigned long pcid);
void kvm_mmu_new_pgd(struct kvm_vcpu *vcpu, gpa_t new_pgd, bool skip_tlb_flush,
bool skip_mmu_sync);
void kvm_configure_mmu(bool enable_tdp, int tdp_page_level);
static inline gpa_t translate_gpa(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access,
struct x86_exception *exception)
{
return gpa;
}
static inline struct kvm_mmu_page *page_header(hpa_t shadow_page)
{
struct page *page = pfn_to_page(shadow_page >> PAGE_SHIFT);
return (struct kvm_mmu_page *)page_private(page);
}
void kvm_configure_mmu(bool enable_tdp, int tdp_max_root_level,
int tdp_huge_page_level);
static inline u16 kvm_read_ldt(void)
{
@ -1636,7 +1583,15 @@ asmlinkage void kvm_spurious_fault(void);
insn "\n\t" \
"jmp 668f \n\t" \
"667: \n\t" \
"1: \n\t" \
".pushsection .discard.instr_begin \n\t" \
".long 1b - . \n\t" \
".popsection \n\t" \
"call kvm_spurious_fault \n\t" \
"1: \n\t" \
".pushsection .discard.instr_end \n\t" \
".long 1b - . \n\t" \
".popsection \n\t" \
"668: \n\t" \
_ASM_EXTABLE(666b, 667b)

View File

@ -4,6 +4,7 @@
#include <asm/processor.h>
#include <asm/alternative.h>
#include <linux/interrupt.h>
#include <uapi/asm/kvm_para.h>
extern void kvmclock_init(void);
@ -18,7 +19,7 @@ static inline bool kvm_check_and_clear_guest_paused(void)
#endif /* CONFIG_KVM_GUEST */
#define KVM_HYPERCALL \
ALTERNATIVE(".byte 0x0f,0x01,0xc1", ".byte 0x0f,0x01,0xd9", X86_FEATURE_VMMCALL)
ALTERNATIVE("vmcall", "vmmcall", X86_FEATURE_VMMCALL)
/* For KVM hypercalls, a three-byte sequence of either the vmcall or the vmmcall
* instruction. The hypervisor may replace it with something else but only the

View File

@ -0,0 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _ASM_X86_KVM_TYPES_H
#define _ASM_X86_KVM_TYPES_H
#define KVM_ARCH_NR_OBJS_PER_MEMORY_CACHE 40
#endif /* _ASM_X86_KVM_TYPES_H */

View File

@ -32,6 +32,7 @@ extern void native_queued_spin_lock_slowpath(struct qspinlock *lock, u32 val);
extern void __pv_init_lock_hash(void);
extern void __pv_queued_spin_lock_slowpath(struct qspinlock *lock, u32 val);
extern void __raw_callee_save___pv_queued_spin_unlock(struct qspinlock *lock);
extern bool nopvspin;
#define queued_spin_unlock queued_spin_unlock
/**

View File

@ -7,8 +7,11 @@
* Authors: Anthony Liguori <aliguori@us.ibm.com>
*/
#define pr_fmt(fmt) "kvm-guest: " fmt
#include <linux/context_tracking.h>
#include <linux/init.h>
#include <linux/irq.h>
#include <linux/kernel.h>
#include <linux/kvm_para.h>
#include <linux/cpu.h>
@ -232,16 +235,11 @@ EXPORT_SYMBOL_GPL(kvm_read_and_reset_apf_flags);
noinstr bool __kvm_handle_async_pf(struct pt_regs *regs, u32 token)
{
u32 reason = kvm_read_and_reset_apf_flags();
u32 flags = kvm_read_and_reset_apf_flags();
irqentry_state_t state;
switch (reason) {
case KVM_PV_REASON_PAGE_NOT_PRESENT:
case KVM_PV_REASON_PAGE_READY:
break;
default:
if (!flags)
return false;
}
state = irqentry_enter(regs);
instrumentation_begin();
@ -254,13 +252,13 @@ noinstr bool __kvm_handle_async_pf(struct pt_regs *regs, u32 token)
if (unlikely(!(regs->flags & X86_EFLAGS_IF)))
panic("Host injected async #PF in interrupt disabled region\n");
if (reason == KVM_PV_REASON_PAGE_NOT_PRESENT) {
if (flags & KVM_PV_REASON_PAGE_NOT_PRESENT) {
if (unlikely(!(user_mode(regs))))
panic("Host injected async #PF in kernel mode\n");
/* Page is swapped out by the host. */
kvm_async_pf_task_wait_schedule(token);
} else {
kvm_async_pf_task_wake(token);
WARN_ONCE(1, "Unexpected async PF flags: %x\n", flags);
}
instrumentation_end();
@ -268,6 +266,27 @@ noinstr bool __kvm_handle_async_pf(struct pt_regs *regs, u32 token)
return true;
}
DEFINE_IDTENTRY_SYSVEC(sysvec_kvm_asyncpf_interrupt)
{
struct pt_regs *old_regs = set_irq_regs(regs);
u32 token;
irqentry_state_t state;
state = irqentry_enter(regs);
inc_irq_stat(irq_hv_callback_count);
if (__this_cpu_read(apf_reason.enabled)) {
token = __this_cpu_read(apf_reason.token);
kvm_async_pf_task_wake(token);
__this_cpu_write(apf_reason.token, 0);
wrmsrl(MSR_KVM_ASYNC_PF_ACK, 1);
}
irqentry_exit(regs, state);
set_irq_regs(old_regs);
}
static void __init paravirt_ops_setup(void)
{
pv_info.name = "KVM";
@ -289,8 +308,8 @@ static void kvm_register_steal_time(void)
return;
wrmsrl(MSR_KVM_STEAL_TIME, (slow_virt_to_phys(st) | KVM_MSR_ENABLED));
pr_info("kvm-stealtime: cpu %d, msr %llx\n",
cpu, (unsigned long long) slow_virt_to_phys(st));
pr_info("stealtime: cpu %d, msr %llx\n", cpu,
(unsigned long long) slow_virt_to_phys(st));
}
static DEFINE_PER_CPU_DECRYPTED(unsigned long, kvm_apic_eoi) = KVM_PV_EOI_DISABLED;
@ -311,17 +330,19 @@ static notrace void kvm_guest_apic_eoi_write(u32 reg, u32 val)
static void kvm_guest_cpu_init(void)
{
if (kvm_para_has_feature(KVM_FEATURE_ASYNC_PF) && kvmapf) {
u64 pa;
if (kvm_para_has_feature(KVM_FEATURE_ASYNC_PF_INT) && kvmapf) {
u64 pa = slow_virt_to_phys(this_cpu_ptr(&apf_reason));
WARN_ON_ONCE(!static_branch_likely(&kvm_async_pf_enabled));
pa = slow_virt_to_phys(this_cpu_ptr(&apf_reason));
pa |= KVM_ASYNC_PF_ENABLED;
pa |= KVM_ASYNC_PF_ENABLED | KVM_ASYNC_PF_DELIVERY_AS_INT;
if (kvm_para_has_feature(KVM_FEATURE_ASYNC_PF_VMEXIT))
pa |= KVM_ASYNC_PF_DELIVERY_AS_PF_VMEXIT;
wrmsrl(MSR_KVM_ASYNC_PF_INT, HYPERVISOR_CALLBACK_VECTOR);
wrmsrl(MSR_KVM_ASYNC_PF_EN, pa);
__this_cpu_write(apf_reason.enabled, 1);
pr_info("KVM setup async PF for cpu %d\n", smp_processor_id());
@ -493,7 +514,8 @@ static void __send_ipi_mask(const struct cpumask *mask, int vector)
} else {
ret = kvm_hypercall4(KVM_HC_SEND_IPI, (unsigned long)ipi_bitmap,
(unsigned long)(ipi_bitmap >> BITS_PER_LONG), min, icr);
WARN_ONCE(ret < 0, "KVM: failed to send PV IPI: %ld", ret);
WARN_ONCE(ret < 0, "kvm-guest: failed to send PV IPI: %ld",
ret);
min = max = apic_id;
ipi_bitmap = 0;
}
@ -503,7 +525,8 @@ static void __send_ipi_mask(const struct cpumask *mask, int vector)
if (ipi_bitmap) {
ret = kvm_hypercall4(KVM_HC_SEND_IPI, (unsigned long)ipi_bitmap,
(unsigned long)(ipi_bitmap >> BITS_PER_LONG), min, icr);
WARN_ONCE(ret < 0, "KVM: failed to send PV IPI: %ld", ret);
WARN_ONCE(ret < 0, "kvm-guest: failed to send PV IPI: %ld",
ret);
}
local_irq_restore(flags);
@ -533,7 +556,7 @@ static void kvm_setup_pv_ipi(void)
{
apic->send_IPI_mask = kvm_send_ipi_mask;
apic->send_IPI_mask_allbutself = kvm_send_ipi_mask_allbutself;
pr_info("KVM setup pv IPIs\n");
pr_info("setup PV IPIs\n");
}
static void kvm_smp_send_call_func_ipi(const struct cpumask *mask)
@ -551,13 +574,6 @@ static void kvm_smp_send_call_func_ipi(const struct cpumask *mask)
}
}
static void __init kvm_smp_prepare_cpus(unsigned int max_cpus)
{
native_smp_prepare_cpus(max_cpus);
if (kvm_para_has_hint(KVM_HINTS_REALTIME))
static_branch_disable(&virt_spin_lock_key);
}
static void __init kvm_smp_prepare_boot_cpu(void)
{
/*
@ -646,19 +662,20 @@ static void __init kvm_guest_init(void)
if (kvm_para_has_feature(KVM_FEATURE_PV_EOI))
apic_set_eoi_write(kvm_guest_apic_eoi_write);
if (kvm_para_has_feature(KVM_FEATURE_ASYNC_PF) && kvmapf)
if (kvm_para_has_feature(KVM_FEATURE_ASYNC_PF_INT) && kvmapf) {
static_branch_enable(&kvm_async_pf_enabled);
alloc_intr_gate(HYPERVISOR_CALLBACK_VECTOR, asm_sysvec_kvm_asyncpf_interrupt);
}
#ifdef CONFIG_SMP
smp_ops.smp_prepare_cpus = kvm_smp_prepare_cpus;
smp_ops.smp_prepare_boot_cpu = kvm_smp_prepare_boot_cpu;
if (pv_sched_yield_supported()) {
smp_ops.send_call_func_ipi = kvm_smp_send_call_func_ipi;
pr_info("KVM setup pv sched yield\n");
pr_info("setup PV sched yield\n");
}
if (cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "x86/kvm:online",
kvm_cpu_online, kvm_cpu_down_prepare) < 0)
pr_err("kvm_guest: Failed to install cpu hotplug callbacks\n");
pr_err("failed to install cpu hotplug callbacks\n");
#else
sev_map_percpu_data();
kvm_guest_cpu_init();
@ -854,16 +871,36 @@ asm(
*/
void __init kvm_spinlock_init(void)
{
/* Does host kernel support KVM_FEATURE_PV_UNHALT? */
if (!kvm_para_has_feature(KVM_FEATURE_PV_UNHALT))
/*
* In case host doesn't support KVM_FEATURE_PV_UNHALT there is still an
* advantage of keeping virt_spin_lock_key enabled: virt_spin_lock() is
* preferred over native qspinlock when vCPU is preempted.
*/
if (!kvm_para_has_feature(KVM_FEATURE_PV_UNHALT)) {
pr_info("PV spinlocks disabled, no host support\n");
return;
}
if (kvm_para_has_hint(KVM_HINTS_REALTIME))
return;
/*
* Disable PV spinlocks and use native qspinlock when dedicated pCPUs
* are available.
*/
if (kvm_para_has_hint(KVM_HINTS_REALTIME)) {
pr_info("PV spinlocks disabled with KVM_HINTS_REALTIME hints\n");
goto out;
}
/* Don't use the pvqspinlock code if there is only 1 vCPU. */
if (num_possible_cpus() == 1)
return;
if (num_possible_cpus() == 1) {
pr_info("PV spinlocks disabled, single CPU\n");
goto out;
}
if (nopvspin) {
pr_info("PV spinlocks disabled, forced by \"nopvspin\" parameter\n");
goto out;
}
pr_info("PV spinlocks enabled\n");
__pv_init_lock_hash();
pv_ops.lock.queued_spin_lock_slowpath = __pv_queued_spin_lock_slowpath;
@ -876,6 +913,13 @@ void __init kvm_spinlock_init(void)
pv_ops.lock.vcpu_is_preempted =
PV_CALLEE_SAVE(__kvm_vcpu_is_preempted);
}
/*
* When PV spinlock is enabled which is preferred over
* virt_spin_lock(), virt_spin_lock_key's value is meaningless.
* Just disable it anyway.
*/
out:
static_branch_disable(&virt_spin_lock_key);
}
#endif /* CONFIG_PARAVIRT_SPINLOCKS */
@ -895,8 +939,8 @@ static void kvm_enable_host_haltpoll(void *i)
void arch_haltpoll_enable(unsigned int cpu)
{
if (!kvm_para_has_feature(KVM_FEATURE_POLL_CONTROL)) {
pr_err_once("kvm: host does not support poll control\n");
pr_err_once("kvm: host upgrade recommended\n");
pr_err_once("host does not support poll control\n");
pr_err_once("host upgrade recommended\n");
return;
}

View File

@ -54,48 +54,9 @@ static u32 xstate_required_size(u64 xstate_bv, bool compacted)
#define F feature_bit
int kvm_update_cpuid(struct kvm_vcpu *vcpu)
static int kvm_check_cpuid(struct kvm_vcpu *vcpu)
{
struct kvm_cpuid_entry2 *best;
struct kvm_lapic *apic = vcpu->arch.apic;
best = kvm_find_cpuid_entry(vcpu, 1, 0);
if (!best)
return 0;
/* Update OSXSAVE bit */
if (boot_cpu_has(X86_FEATURE_XSAVE) && best->function == 0x1)
cpuid_entry_change(best, X86_FEATURE_OSXSAVE,
kvm_read_cr4_bits(vcpu, X86_CR4_OSXSAVE));
cpuid_entry_change(best, X86_FEATURE_APIC,
vcpu->arch.apic_base & MSR_IA32_APICBASE_ENABLE);
if (apic) {
if (cpuid_entry_has(best, X86_FEATURE_TSC_DEADLINE_TIMER))
apic->lapic_timer.timer_mode_mask = 3 << 17;
else
apic->lapic_timer.timer_mode_mask = 1 << 17;
}
best = kvm_find_cpuid_entry(vcpu, 7, 0);
if (best && boot_cpu_has(X86_FEATURE_PKU) && best->function == 0x7)
cpuid_entry_change(best, X86_FEATURE_OSPKE,
kvm_read_cr4_bits(vcpu, X86_CR4_PKE));
best = kvm_find_cpuid_entry(vcpu, 0xD, 0);
if (!best) {
vcpu->arch.guest_supported_xcr0 = 0;
} else {
vcpu->arch.guest_supported_xcr0 =
(best->eax | ((u64)best->edx << 32)) & supported_xcr0;
best->ebx = xstate_required_size(vcpu->arch.xcr0, false);
}
best = kvm_find_cpuid_entry(vcpu, 0xD, 1);
if (best && (cpuid_entry_has(best, X86_FEATURE_XSAVES) ||
cpuid_entry_has(best, X86_FEATURE_XSAVEC)))
best->ebx = xstate_required_size(vcpu->arch.xcr0, true);
/*
* The existing code assumes virtual address is 48-bit or 57-bit in the
@ -109,6 +70,38 @@ int kvm_update_cpuid(struct kvm_vcpu *vcpu)
return -EINVAL;
}
return 0;
}
void kvm_update_cpuid_runtime(struct kvm_vcpu *vcpu)
{
struct kvm_cpuid_entry2 *best;
best = kvm_find_cpuid_entry(vcpu, 1, 0);
if (best) {
/* Update OSXSAVE bit */
if (boot_cpu_has(X86_FEATURE_XSAVE))
cpuid_entry_change(best, X86_FEATURE_OSXSAVE,
kvm_read_cr4_bits(vcpu, X86_CR4_OSXSAVE));
cpuid_entry_change(best, X86_FEATURE_APIC,
vcpu->arch.apic_base & MSR_IA32_APICBASE_ENABLE);
}
best = kvm_find_cpuid_entry(vcpu, 7, 0);
if (best && boot_cpu_has(X86_FEATURE_PKU) && best->function == 0x7)
cpuid_entry_change(best, X86_FEATURE_OSPKE,
kvm_read_cr4_bits(vcpu, X86_CR4_PKE));
best = kvm_find_cpuid_entry(vcpu, 0xD, 0);
if (best)
best->ebx = xstate_required_size(vcpu->arch.xcr0, false);
best = kvm_find_cpuid_entry(vcpu, 0xD, 1);
if (best && (cpuid_entry_has(best, X86_FEATURE_XSAVES) ||
cpuid_entry_has(best, X86_FEATURE_XSAVEC)))
best->ebx = xstate_required_size(vcpu->arch.xcr0, true);
best = kvm_find_cpuid_entry(vcpu, KVM_CPUID_FEATURES, 0);
if (kvm_hlt_in_guest(vcpu->kvm) && best &&
(best->eax & (1 << KVM_FEATURE_PV_UNHALT)))
@ -121,14 +114,39 @@ int kvm_update_cpuid(struct kvm_vcpu *vcpu)
vcpu->arch.ia32_misc_enable_msr &
MSR_IA32_MISC_ENABLE_MWAIT);
}
}
static void kvm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
{
struct kvm_lapic *apic = vcpu->arch.apic;
struct kvm_cpuid_entry2 *best;
kvm_x86_ops.vcpu_after_set_cpuid(vcpu);
best = kvm_find_cpuid_entry(vcpu, 1, 0);
if (best && apic) {
if (cpuid_entry_has(best, X86_FEATURE_TSC_DEADLINE_TIMER))
apic->lapic_timer.timer_mode_mask = 3 << 17;
else
apic->lapic_timer.timer_mode_mask = 1 << 17;
kvm_apic_set_version(vcpu);
}
best = kvm_find_cpuid_entry(vcpu, 0xD, 0);
if (!best)
vcpu->arch.guest_supported_xcr0 = 0;
else
vcpu->arch.guest_supported_xcr0 =
(best->eax | ((u64)best->edx << 32)) & supported_xcr0;
/* Note, maxphyaddr must be updated before tdp_level. */
vcpu->arch.maxphyaddr = cpuid_query_maxphyaddr(vcpu);
vcpu->arch.tdp_level = kvm_x86_ops.get_tdp_level(vcpu);
kvm_mmu_reset_context(vcpu);
kvm_pmu_refresh(vcpu);
return 0;
vcpu->arch.cr4_guest_rsvd_bits =
__cr4_reserved_bits(guest_cpuid_has, vcpu);
kvm_x86_ops.update_exception_bitmap(vcpu);
}
static int is_efer_nx(void)
@ -203,10 +221,16 @@ int kvm_vcpu_ioctl_set_cpuid(struct kvm_vcpu *vcpu,
vcpu->arch.cpuid_entries[i].padding[2] = 0;
}
vcpu->arch.cpuid_nent = cpuid->nent;
r = kvm_check_cpuid(vcpu);
if (r) {
vcpu->arch.cpuid_nent = 0;
kvfree(cpuid_entries);
goto out;
}
cpuid_fix_nx_cap(vcpu);
kvm_apic_set_version(vcpu);
kvm_x86_ops.cpuid_update(vcpu);
r = kvm_update_cpuid(vcpu);
kvm_update_cpuid_runtime(vcpu);
kvm_vcpu_after_set_cpuid(vcpu);
kvfree(cpuid_entries);
out:
@ -227,9 +251,14 @@ int kvm_vcpu_ioctl_set_cpuid2(struct kvm_vcpu *vcpu,
cpuid->nent * sizeof(struct kvm_cpuid_entry2)))
goto out;
vcpu->arch.cpuid_nent = cpuid->nent;
kvm_apic_set_version(vcpu);
kvm_x86_ops.cpuid_update(vcpu);
r = kvm_update_cpuid(vcpu);
r = kvm_check_cpuid(vcpu);
if (r) {
vcpu->arch.cpuid_nent = 0;
goto out;
}
kvm_update_cpuid_runtime(vcpu);
kvm_vcpu_after_set_cpuid(vcpu);
out:
return r;
}
@ -604,7 +633,7 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
eax.split.bit_width = cap.bit_width_gp;
eax.split.mask_length = cap.events_mask_len;
edx.split.num_counters_fixed = cap.num_counters_fixed;
edx.split.num_counters_fixed = min(cap.num_counters_fixed, MAX_FIXED_COUNTERS);
edx.split.bit_width_fixed = cap.bit_width_fixed;
edx.split.reserved = 0;

View File

@ -9,7 +9,7 @@
extern u32 kvm_cpu_caps[NCAPINTS] __read_mostly;
void kvm_set_cpu_caps(void);
int kvm_update_cpuid(struct kvm_vcpu *vcpu);
void kvm_update_cpuid_runtime(struct kvm_vcpu *vcpu);
struct kvm_cpuid_entry2 *kvm_find_cpuid_entry(struct kvm_vcpu *vcpu,
u32 function, u32 index);
int kvm_dev_ioctl_get_cpuid(struct kvm_cpuid2 *cpuid,

View File

@ -354,7 +354,6 @@ static inline int apic_lvt_nmi_mode(u32 lvt_val)
void kvm_apic_set_version(struct kvm_vcpu *vcpu)
{
struct kvm_lapic *apic = vcpu->arch.apic;
struct kvm_cpuid_entry2 *feat;
u32 v = APIC_VERSION;
if (!lapic_in_kernel(vcpu))
@ -367,8 +366,7 @@ void kvm_apic_set_version(struct kvm_vcpu *vcpu)
* version first and level-triggered interrupts never get EOIed in
* IOAPIC.
*/
feat = kvm_find_cpuid_entry(apic->vcpu, 0x1, 0);
if (feat && (feat->ecx & (1 << (X86_FEATURE_X2APIC & 31))) &&
if (guest_cpuid_has(vcpu, X86_FEATURE_X2APIC) &&
!ioapic_in_kernel(vcpu->kvm))
v |= APIC_LVR_DIRECTED_EOI;
kvm_lapic_set_reg(apic, APIC_LVR, v);
@ -2068,7 +2066,7 @@ int kvm_lapic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val)
case APIC_TDCR: {
uint32_t old_divisor = apic->divide_count;
kvm_lapic_set_reg(apic, APIC_TDCR, val);
kvm_lapic_set_reg(apic, APIC_TDCR, val & 0xb);
update_divide_count(apic);
if (apic->divide_count != old_divisor &&
apic->lapic_timer.period) {
@ -2085,7 +2083,8 @@ int kvm_lapic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val)
case APIC_SELF_IPI:
if (apic_x2apic_mode(apic)) {
kvm_lapic_reg_write(apic, APIC_ICR, 0x40000 | (val & 0xff));
kvm_lapic_reg_write(apic, APIC_ICR,
APIC_DEST_SELF | (val & APIC_VECTOR_MASK));
} else
ret = 1;
break;
@ -2232,7 +2231,7 @@ void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value)
vcpu->arch.apic_base = value;
if ((old_value ^ value) & MSR_IA32_APICBASE_ENABLE)
kvm_update_cpuid(vcpu);
kvm_update_cpuid_runtime(vcpu);
if (!apic)
return;

View File

@ -4,6 +4,7 @@
#include <linux/kvm_host.h>
#include "kvm_cache_regs.h"
#include "cpuid.h"
#define PT64_PT_BITS 9
#define PT64_ENT_PER_PAGE (1 << PT64_PT_BITS)
@ -57,22 +58,14 @@ void
reset_shadow_zero_bits_mask(struct kvm_vcpu *vcpu, struct kvm_mmu *context);
void kvm_init_mmu(struct kvm_vcpu *vcpu, bool reset_roots);
void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu, u32 cr0, u32 cr4, u32 efer);
void kvm_init_shadow_npt_mmu(struct kvm_vcpu *vcpu, u32 cr0, u32 cr4, u32 efer,
gpa_t nested_cr3);
void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly,
bool accessed_dirty, gpa_t new_eptp);
bool kvm_can_do_async_pf(struct kvm_vcpu *vcpu);
int kvm_handle_page_fault(struct kvm_vcpu *vcpu, u64 error_code,
u64 fault_address, char *insn, int insn_len);
static inline unsigned long kvm_mmu_available_pages(struct kvm *kvm)
{
if (kvm->arch.n_max_mmu_pages > kvm->arch.n_used_mmu_pages)
return kvm->arch.n_max_mmu_pages -
kvm->arch.n_used_mmu_pages;
return 0;
}
static inline int kvm_mmu_reload(struct kvm_vcpu *vcpu)
{
if (likely(vcpu->arch.mmu->root_hpa != INVALID_PAGE))
@ -97,9 +90,13 @@ static inline unsigned long kvm_get_active_pcid(struct kvm_vcpu *vcpu)
static inline void kvm_mmu_load_pgd(struct kvm_vcpu *vcpu)
{
if (VALID_PAGE(vcpu->arch.mmu->root_hpa))
kvm_x86_ops.load_mmu_pgd(vcpu, vcpu->arch.mmu->root_hpa |
kvm_get_active_pcid(vcpu));
u64 root_hpa = vcpu->arch.mmu->root_hpa;
if (!VALID_PAGE(root_hpa))
return;
kvm_x86_ops.load_mmu_pgd(vcpu, root_hpa | kvm_get_active_pcid(vcpu),
vcpu->arch.mmu->shadow_root_level);
}
int kvm_tdp_page_fault(struct kvm_vcpu *vcpu, gpa_t gpa, u32 error_code,
@ -158,6 +155,11 @@ static inline bool is_write_protection(struct kvm_vcpu *vcpu)
return kvm_read_cr0_bits(vcpu, X86_CR0_WP);
}
static inline bool kvm_mmu_is_illegal_gpa(struct kvm_vcpu *vcpu, gpa_t gpa)
{
return (gpa >= BIT_ULL(cpuid_maxphyaddr(vcpu)));
}
/*
* Check if a given access (described through the I/D, W/R and U/S bits of a
* page fault error code pfec) causes a permission fault with the given PTE
@ -218,11 +220,7 @@ static inline u8 permission_fault(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
void kvm_zap_gfn_range(struct kvm *kvm, gfn_t gfn_start, gfn_t gfn_end);
void kvm_mmu_gfn_disallow_lpage(struct kvm_memory_slot *slot, gfn_t gfn);
void kvm_mmu_gfn_allow_lpage(struct kvm_memory_slot *slot, gfn_t gfn);
bool kvm_mmu_slot_gfn_write_protect(struct kvm *kvm,
struct kvm_memory_slot *slot, u64 gfn);
int kvm_arch_write_log_dirty(struct kvm_vcpu *vcpu, gpa_t l2_gpa);
int kvm_arch_write_log_dirty(struct kvm_vcpu *vcpu);
int kvm_mmu_post_init_vm(struct kvm *kvm);
void kvm_mmu_pre_destroy_vm(struct kvm *kvm);

View File

@ -18,6 +18,7 @@
#include "irq.h"
#include "ioapic.h"
#include "mmu.h"
#include "mmu_internal.h"
#include "x86.h"
#include "kvm_cache_regs.h"
#include "kvm_emulate.h"
@ -91,7 +92,8 @@ module_param_named(flush_on_reuse, force_flush_and_sync_on_reuse, bool, 0644);
*/
bool tdp_enabled = false;
static int max_page_level __read_mostly;
static int max_huge_page_level __read_mostly;
static int max_tdp_level __read_mostly;
enum {
AUDIT_PRE_PAGE_FAULT,
@ -515,6 +517,18 @@ static bool check_mmio_spte(struct kvm_vcpu *vcpu, u64 spte)
return likely(kvm_gen == spte_gen);
}
static gpa_t translate_gpa(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access,
struct x86_exception *exception)
{
/* Check if guest physical address doesn't exceed guest maximum */
if (kvm_mmu_is_illegal_gpa(vcpu, gpa)) {
exception->error_code |= PFERR_RSVD_MASK;
return UNMAPPED_GVA;
}
return gpa;
}
/*
* Sets the shadow PTE masks used by the MMU.
*
@ -676,7 +690,7 @@ union split_spte {
static void count_spte_clear(u64 *sptep, u64 spte)
{
struct kvm_mmu_page *sp = page_header(__pa(sptep));
struct kvm_mmu_page *sp = sptep_to_sp(sptep);
if (is_shadow_present_pte(spte))
return;
@ -760,7 +774,7 @@ static u64 __update_clear_spte_slow(u64 *sptep, u64 spte)
*/
static u64 __get_spte_lockless(u64 *sptep)
{
struct kvm_mmu_page *sp = page_header(__pa(sptep));
struct kvm_mmu_page *sp = sptep_to_sp(sptep);
union split_spte spte, *orig = (union split_spte *)sptep;
int count;
@ -1060,94 +1074,40 @@ static void walk_shadow_page_lockless_end(struct kvm_vcpu *vcpu)
local_irq_enable();
}
static int mmu_topup_memory_cache(struct kvm_mmu_memory_cache *cache,
struct kmem_cache *base_cache, int min)
{
void *obj;
if (cache->nobjs >= min)
return 0;
while (cache->nobjs < ARRAY_SIZE(cache->objects)) {
obj = kmem_cache_zalloc(base_cache, GFP_KERNEL_ACCOUNT);
if (!obj)
return cache->nobjs >= min ? 0 : -ENOMEM;
cache->objects[cache->nobjs++] = obj;
}
return 0;
}
static int mmu_memory_cache_free_objects(struct kvm_mmu_memory_cache *cache)
{
return cache->nobjs;
}
static void mmu_free_memory_cache(struct kvm_mmu_memory_cache *mc,
struct kmem_cache *cache)
{
while (mc->nobjs)
kmem_cache_free(cache, mc->objects[--mc->nobjs]);
}
static int mmu_topup_memory_cache_page(struct kvm_mmu_memory_cache *cache,
int min)
{
void *page;
if (cache->nobjs >= min)
return 0;
while (cache->nobjs < ARRAY_SIZE(cache->objects)) {
page = (void *)__get_free_page(GFP_KERNEL_ACCOUNT);
if (!page)
return cache->nobjs >= min ? 0 : -ENOMEM;
cache->objects[cache->nobjs++] = page;
}
return 0;
}
static void mmu_free_memory_cache_page(struct kvm_mmu_memory_cache *mc)
{
while (mc->nobjs)
free_page((unsigned long)mc->objects[--mc->nobjs]);
}
static int mmu_topup_memory_caches(struct kvm_vcpu *vcpu)
static int mmu_topup_memory_caches(struct kvm_vcpu *vcpu, bool maybe_indirect)
{
int r;
r = mmu_topup_memory_cache(&vcpu->arch.mmu_pte_list_desc_cache,
pte_list_desc_cache, 8 + PTE_PREFETCH_NUM);
/* 1 rmap, 1 parent PTE per level, and the prefetched rmaps. */
r = kvm_mmu_topup_memory_cache(&vcpu->arch.mmu_pte_list_desc_cache,
1 + PT64_ROOT_MAX_LEVEL + PTE_PREFETCH_NUM);
if (r)
goto out;
r = mmu_topup_memory_cache_page(&vcpu->arch.mmu_page_cache, 8);
return r;
r = kvm_mmu_topup_memory_cache(&vcpu->arch.mmu_shadow_page_cache,
PT64_ROOT_MAX_LEVEL);
if (r)
goto out;
r = mmu_topup_memory_cache(&vcpu->arch.mmu_page_header_cache,
mmu_page_header_cache, 4);
out:
return r;
return r;
if (maybe_indirect) {
r = kvm_mmu_topup_memory_cache(&vcpu->arch.mmu_gfn_array_cache,
PT64_ROOT_MAX_LEVEL);
if (r)
return r;
}
return kvm_mmu_topup_memory_cache(&vcpu->arch.mmu_page_header_cache,
PT64_ROOT_MAX_LEVEL);
}
static void mmu_free_memory_caches(struct kvm_vcpu *vcpu)
{
mmu_free_memory_cache(&vcpu->arch.mmu_pte_list_desc_cache,
pte_list_desc_cache);
mmu_free_memory_cache_page(&vcpu->arch.mmu_page_cache);
mmu_free_memory_cache(&vcpu->arch.mmu_page_header_cache,
mmu_page_header_cache);
}
static void *mmu_memory_cache_alloc(struct kvm_mmu_memory_cache *mc)
{
void *p;
BUG_ON(!mc->nobjs);
p = mc->objects[--mc->nobjs];
return p;
kvm_mmu_free_memory_cache(&vcpu->arch.mmu_pte_list_desc_cache);
kvm_mmu_free_memory_cache(&vcpu->arch.mmu_shadow_page_cache);
kvm_mmu_free_memory_cache(&vcpu->arch.mmu_gfn_array_cache);
kvm_mmu_free_memory_cache(&vcpu->arch.mmu_page_header_cache);
}
static struct pte_list_desc *mmu_alloc_pte_list_desc(struct kvm_vcpu *vcpu)
{
return mmu_memory_cache_alloc(&vcpu->arch.mmu_pte_list_desc_cache);
return kvm_mmu_memory_cache_alloc(&vcpu->arch.mmu_pte_list_desc_cache);
}
static void mmu_free_pte_list_desc(struct pte_list_desc *pte_list_desc)
@ -1415,10 +1375,10 @@ static struct kvm_rmap_head *gfn_to_rmap(struct kvm *kvm, gfn_t gfn,
static bool rmap_can_add(struct kvm_vcpu *vcpu)
{
struct kvm_mmu_memory_cache *cache;
struct kvm_mmu_memory_cache *mc;
cache = &vcpu->arch.mmu_pte_list_desc_cache;
return mmu_memory_cache_free_objects(cache);
mc = &vcpu->arch.mmu_pte_list_desc_cache;
return kvm_mmu_memory_cache_nr_free_objects(mc);
}
static int rmap_add(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn)
@ -1426,7 +1386,7 @@ static int rmap_add(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn)
struct kvm_mmu_page *sp;
struct kvm_rmap_head *rmap_head;
sp = page_header(__pa(spte));
sp = sptep_to_sp(spte);
kvm_mmu_page_set_gfn(sp, spte - sp->spt, gfn);
rmap_head = gfn_to_rmap(vcpu->kvm, gfn, sp);
return pte_list_add(vcpu, spte, rmap_head);
@ -1438,7 +1398,7 @@ static void rmap_remove(struct kvm *kvm, u64 *spte)
gfn_t gfn;
struct kvm_rmap_head *rmap_head;
sp = page_header(__pa(spte));
sp = sptep_to_sp(spte);
gfn = kvm_mmu_page_get_gfn(sp, spte - sp->spt);
rmap_head = gfn_to_rmap(kvm, gfn, sp);
__pte_list_remove(spte, rmap_head);
@ -1530,7 +1490,7 @@ static void drop_spte(struct kvm *kvm, u64 *sptep)
static bool __drop_large_spte(struct kvm *kvm, u64 *sptep)
{
if (is_large_pte(*sptep)) {
WARN_ON(page_header(__pa(sptep))->role.level == PG_LEVEL_4K);
WARN_ON(sptep_to_sp(sptep)->role.level == PG_LEVEL_4K);
drop_spte(kvm, sptep);
--kvm->stat.lpages;
return true;
@ -1542,7 +1502,7 @@ static bool __drop_large_spte(struct kvm *kvm, u64 *sptep)
static void drop_large_spte(struct kvm_vcpu *vcpu, u64 *sptep)
{
if (__drop_large_spte(vcpu->kvm, sptep)) {
struct kvm_mmu_page *sp = page_header(__pa(sptep));
struct kvm_mmu_page *sp = sptep_to_sp(sptep);
kvm_flush_remote_tlbs_with_address(vcpu->kvm, sp->gfn,
KVM_PAGES_PER_HPAGE(sp->role.level));
@ -1738,21 +1698,6 @@ void kvm_arch_mmu_enable_log_dirty_pt_masked(struct kvm *kvm,
kvm_mmu_write_protect_pt_masked(kvm, slot, gfn_offset, mask);
}
/**
* kvm_arch_write_log_dirty - emulate dirty page logging
* @vcpu: Guest mode vcpu
*
* Emulate arch specific page modification logging for the
* nested hypervisor
*/
int kvm_arch_write_log_dirty(struct kvm_vcpu *vcpu, gpa_t l2_gpa)
{
if (kvm_x86_ops.write_log_dirty)
return kvm_x86_ops.write_log_dirty(vcpu, l2_gpa);
return 0;
}
bool kvm_mmu_slot_gfn_write_protect(struct kvm *kvm,
struct kvm_memory_slot *slot, u64 gfn)
{
@ -2016,7 +1961,7 @@ static void rmap_recycle(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn)
struct kvm_rmap_head *rmap_head;
struct kvm_mmu_page *sp;
sp = page_header(__pa(spte));
sp = sptep_to_sp(spte);
rmap_head = gfn_to_rmap(vcpu->kvm, gfn, sp);
@ -2105,10 +2050,10 @@ static struct kvm_mmu_page *kvm_mmu_alloc_page(struct kvm_vcpu *vcpu, int direct
{
struct kvm_mmu_page *sp;
sp = mmu_memory_cache_alloc(&vcpu->arch.mmu_page_header_cache);
sp->spt = mmu_memory_cache_alloc(&vcpu->arch.mmu_page_cache);
sp = kvm_mmu_memory_cache_alloc(&vcpu->arch.mmu_page_header_cache);
sp->spt = kvm_mmu_memory_cache_alloc(&vcpu->arch.mmu_shadow_page_cache);
if (!direct)
sp->gfns = mmu_memory_cache_alloc(&vcpu->arch.mmu_page_cache);
sp->gfns = kvm_mmu_memory_cache_alloc(&vcpu->arch.mmu_gfn_array_cache);
set_page_private(virt_to_page(sp->spt), (unsigned long)sp);
/*
@ -2138,7 +2083,7 @@ static void mark_unsync(u64 *spte)
struct kvm_mmu_page *sp;
unsigned int index;
sp = page_header(__pa(spte));
sp = sptep_to_sp(spte);
index = spte - sp->spt;
if (__test_and_set_bit(index, sp->unsync_child_bitmap))
return;
@ -2207,7 +2152,7 @@ static int __mmu_unsync_walk(struct kvm_mmu_page *sp,
continue;
}
child = page_header(ent & PT64_BASE_ADDR_MASK);
child = to_shadow_page(ent & PT64_BASE_ADDR_MASK);
if (child->unsync_children) {
if (mmu_pages_add(pvec, child, i))
@ -2258,15 +2203,14 @@ static bool kvm_mmu_prepare_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp,
static void kvm_mmu_commit_zap_page(struct kvm *kvm,
struct list_head *invalid_list);
#define for_each_valid_sp(_kvm, _sp, _gfn) \
hlist_for_each_entry(_sp, \
&(_kvm)->arch.mmu_page_hash[kvm_page_table_hashfn(_gfn)], hash_link) \
#define for_each_valid_sp(_kvm, _sp, _list) \
hlist_for_each_entry(_sp, _list, hash_link) \
if (is_obsolete_sp((_kvm), (_sp))) { \
} else
#define for_each_gfn_indirect_valid_sp(_kvm, _sp, _gfn) \
for_each_valid_sp(_kvm, _sp, _gfn) \
for_each_valid_sp(_kvm, _sp, \
&(_kvm)->arch.mmu_page_hash[kvm_page_table_hashfn(_gfn)]) \
if ((_sp)->gfn != (_gfn) || (_sp)->role.direct) {} else
static inline bool is_ept_sp(struct kvm_mmu_page *sp)
@ -2464,9 +2408,7 @@ static void __clear_sp_write_flooding_count(struct kvm_mmu_page *sp)
static void clear_sp_write_flooding_count(u64 *spte)
{
struct kvm_mmu_page *sp = page_header(__pa(spte));
__clear_sp_write_flooding_count(sp);
__clear_sp_write_flooding_count(sptep_to_sp(spte));
}
static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
@ -2476,7 +2418,9 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
int direct,
unsigned int access)
{
bool direct_mmu = vcpu->arch.mmu->direct_map;
union kvm_mmu_page_role role;
struct hlist_head *sp_list;
unsigned quadrant;
struct kvm_mmu_page *sp;
bool need_sync = false;
@ -2490,13 +2434,14 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
if (role.direct)
role.gpte_is_8_bytes = true;
role.access = access;
if (!vcpu->arch.mmu->direct_map
&& vcpu->arch.mmu->root_level <= PT32_ROOT_LEVEL) {
if (!direct_mmu && vcpu->arch.mmu->root_level <= PT32_ROOT_LEVEL) {
quadrant = gaddr >> (PAGE_SHIFT + (PT64_PT_BITS * level));
quadrant &= (1 << ((PT32_PT_BITS - PT64_PT_BITS) * level)) - 1;
role.quadrant = quadrant;
}
for_each_valid_sp(vcpu->kvm, sp, gfn) {
sp_list = &vcpu->kvm->arch.mmu_page_hash[kvm_page_table_hashfn(gfn)];
for_each_valid_sp(vcpu->kvm, sp, sp_list) {
if (sp->gfn != gfn) {
collisions++;
continue;
@ -2508,6 +2453,9 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
if (sp->role.word != role.word)
continue;
if (direct_mmu)
goto trace_get_page;
if (sp->unsync) {
/* The page is good, but __kvm_sync_page might still end
* up zapping it. If so, break in order to rebuild it.
@ -2523,6 +2471,8 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
kvm_make_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu);
__clear_sp_write_flooding_count(sp);
trace_get_page:
trace_kvm_mmu_get_page(sp, false);
goto out;
}
@ -2533,8 +2483,7 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
sp->gfn = gfn;
sp->role = role;
hlist_add_head(&sp->hash_link,
&vcpu->kvm->arch.mmu_page_hash[kvm_page_table_hashfn(gfn)]);
hlist_add_head(&sp->hash_link, sp_list);
if (!direct) {
/*
* we should do write protection before syncing pages
@ -2548,7 +2497,6 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
if (level > PG_LEVEL_4K && need_sync)
flush |= kvm_sync_pages(vcpu, gfn, &invalid_list);
}
clear_page(sp->spt);
trace_kvm_mmu_get_page(sp, true);
kvm_mmu_flush_or_zap(vcpu, &invalid_list, false, flush);
@ -2657,7 +2605,7 @@ static void validate_direct_spte(struct kvm_vcpu *vcpu, u64 *sptep,
* so we should update the spte at this point to get
* a new sp with the correct access.
*/
child = page_header(*sptep & PT64_BASE_ADDR_MASK);
child = to_shadow_page(*sptep & PT64_BASE_ADDR_MASK);
if (child->role.access == direct_access)
return;
@ -2679,7 +2627,7 @@ static bool mmu_page_zap_pte(struct kvm *kvm, struct kvm_mmu_page *sp,
if (is_large_pte(pte))
--kvm->stat.lpages;
} else {
child = page_header(pte & PT64_BASE_ADDR_MASK);
child = to_shadow_page(pte & PT64_BASE_ADDR_MASK);
drop_parent_pte(child, spte);
}
return true;
@ -2757,10 +2705,23 @@ static bool __kvm_mmu_prepare_zap_page(struct kvm *kvm,
if (!sp->root_count) {
/* Count self */
(*nr_zapped)++;
list_move(&sp->link, invalid_list);
/*
* Already invalid pages (previously active roots) are not on
* the active page list. See list_del() in the "else" case of
* !sp->root_count.
*/
if (sp->role.invalid)
list_add(&sp->link, invalid_list);
else
list_move(&sp->link, invalid_list);
kvm_mod_used_mmu_pages(kvm, -1);
} else {
list_move(&sp->link, &kvm->arch.active_mmu_pages);
/*
* Remove the active root from the active page list, the root
* will be explicitly freed when the root_count hits zero.
*/
list_del(&sp->link);
/*
* Obsolete pages cannot be used on any vCPUs, see the comment
@ -2812,33 +2773,60 @@ static void kvm_mmu_commit_zap_page(struct kvm *kvm,
}
}
static bool prepare_zap_oldest_mmu_page(struct kvm *kvm,
struct list_head *invalid_list)
static unsigned long kvm_mmu_zap_oldest_mmu_pages(struct kvm *kvm,
unsigned long nr_to_zap)
{
struct kvm_mmu_page *sp;
unsigned long total_zapped = 0;
struct kvm_mmu_page *sp, *tmp;
LIST_HEAD(invalid_list);
bool unstable;
int nr_zapped;
if (list_empty(&kvm->arch.active_mmu_pages))
return false;
return 0;
sp = list_last_entry(&kvm->arch.active_mmu_pages,
struct kvm_mmu_page, link);
return kvm_mmu_prepare_zap_page(kvm, sp, invalid_list);
restart:
list_for_each_entry_safe(sp, tmp, &kvm->arch.active_mmu_pages, link) {
/*
* Don't zap active root pages, the page itself can't be freed
* and zapping it will just force vCPUs to realloc and reload.
*/
if (sp->root_count)
continue;
unstable = __kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list,
&nr_zapped);
total_zapped += nr_zapped;
if (total_zapped >= nr_to_zap)
break;
if (unstable)
goto restart;
}
kvm_mmu_commit_zap_page(kvm, &invalid_list);
kvm->stat.mmu_recycled += total_zapped;
return total_zapped;
}
static inline unsigned long kvm_mmu_available_pages(struct kvm *kvm)
{
if (kvm->arch.n_max_mmu_pages > kvm->arch.n_used_mmu_pages)
return kvm->arch.n_max_mmu_pages -
kvm->arch.n_used_mmu_pages;
return 0;
}
static int make_mmu_pages_available(struct kvm_vcpu *vcpu)
{
LIST_HEAD(invalid_list);
unsigned long avail = kvm_mmu_available_pages(vcpu->kvm);
if (likely(kvm_mmu_available_pages(vcpu->kvm) >= KVM_MIN_FREE_MMU_PAGES))
if (likely(avail >= KVM_MIN_FREE_MMU_PAGES))
return 0;
while (kvm_mmu_available_pages(vcpu->kvm) < KVM_REFILL_PAGES) {
if (!prepare_zap_oldest_mmu_page(vcpu->kvm, &invalid_list))
break;
++vcpu->kvm->stat.mmu_recycled;
}
kvm_mmu_commit_zap_page(vcpu->kvm, &invalid_list);
kvm_mmu_zap_oldest_mmu_pages(vcpu->kvm, KVM_REFILL_PAGES - avail);
if (!kvm_mmu_available_pages(vcpu->kvm))
return -ENOSPC;
@ -2851,17 +2839,12 @@ static int make_mmu_pages_available(struct kvm_vcpu *vcpu)
*/
void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned long goal_nr_mmu_pages)
{
LIST_HEAD(invalid_list);
spin_lock(&kvm->mmu_lock);
if (kvm->arch.n_used_mmu_pages > goal_nr_mmu_pages) {
/* Need to free some mmu pages to achieve the goal. */
while (kvm->arch.n_used_mmu_pages > goal_nr_mmu_pages)
if (!prepare_zap_oldest_mmu_page(kvm, &invalid_list))
break;
kvm_mmu_zap_oldest_mmu_pages(kvm, kvm->arch.n_used_mmu_pages -
goal_nr_mmu_pages);
kvm_mmu_commit_zap_page(kvm, &invalid_list);
goal_nr_mmu_pages = kvm->arch.n_used_mmu_pages;
}
@ -2999,7 +2982,7 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
if (set_mmio_spte(vcpu, sptep, gfn, pfn, pte_access))
return 0;
sp = page_header(__pa(sptep));
sp = sptep_to_sp(sptep);
if (sp_ad_disabled(sp))
spte |= SPTE_AD_DISABLED_MASK;
else if (kvm_vcpu_ad_need_write_protect(vcpu))
@ -3102,7 +3085,7 @@ static int mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
struct kvm_mmu_page *child;
u64 pte = *sptep;
child = page_header(pte & PT64_BASE_ADDR_MASK);
child = to_shadow_page(pte & PT64_BASE_ADDR_MASK);
drop_parent_pte(child, sptep);
flush = true;
} else if (pfn != spte_to_pfn(*sptep)) {
@ -3212,7 +3195,7 @@ static void direct_pte_prefetch(struct kvm_vcpu *vcpu, u64 *sptep)
{
struct kvm_mmu_page *sp;
sp = page_header(__pa(sptep));
sp = sptep_to_sp(sptep);
/*
* Without accessed bits, there's no way to distinguish between
@ -3274,7 +3257,7 @@ static int kvm_mmu_hugepage_adjust(struct kvm_vcpu *vcpu, gfn_t gfn,
if (!slot)
return PG_LEVEL_4K;
max_level = min(max_level, max_page_level);
max_level = min(max_level, max_huge_page_level);
for ( ; max_level > PG_LEVEL_4K; max_level--) {
linfo = lpage_info_slot(gfn, slot, max_level);
if (!linfo->disallow_lpage)
@ -3520,7 +3503,7 @@ static bool fast_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
if (!is_shadow_present_pte(spte))
break;
sp = page_header(__pa(iterator.sptep));
sp = sptep_to_sp(iterator.sptep);
if (!is_last_spte(spte, sp->role.level))
break;
@ -3607,7 +3590,7 @@ static void mmu_free_root_page(struct kvm *kvm, hpa_t *root_hpa,
if (!VALID_PAGE(*root_hpa))
return;
sp = page_header(*root_hpa & PT64_BASE_ADDR_MASK);
sp = to_shadow_page(*root_hpa & PT64_BASE_ADDR_MASK);
--sp->root_count;
if (!sp->root_count && sp->role.invalid)
kvm_mmu_prepare_zap_page(kvm, sp, invalid_list);
@ -3668,7 +3651,7 @@ static int mmu_check_root(struct kvm_vcpu *vcpu, gfn_t root_gfn)
{
int ret = 0;
if (!kvm_is_visible_gfn(vcpu->kvm, root_gfn)) {
if (!kvm_vcpu_is_visible_gfn(vcpu, root_gfn)) {
kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
ret = 1;
}
@ -3837,7 +3820,7 @@ void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu)
if (vcpu->arch.mmu->root_level >= PT64_ROOT_4LEVEL) {
hpa_t root = vcpu->arch.mmu->root_hpa;
sp = page_header(root);
sp = to_shadow_page(root);
/*
* Even if another CPU was marking the SP as unsync-ed
@ -3871,7 +3854,7 @@ void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu)
if (root && VALID_PAGE(root)) {
root &= PT64_BASE_ADDR_MASK;
sp = page_header(root);
sp = to_shadow_page(root);
mmu_sync_children(vcpu, sp);
}
}
@ -4045,8 +4028,8 @@ static void shadow_page_table_clear_flood(struct kvm_vcpu *vcpu, gva_t addr)
walk_shadow_page_lockless_end(vcpu);
}
static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
gfn_t gfn)
static bool kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
gfn_t gfn)
{
struct kvm_arch_async_pf arch;
@ -4108,16 +4091,16 @@ static int direct_page_fault(struct kvm_vcpu *vcpu, gpa_t gpa, u32 error_code,
if (page_fault_handle_page_track(vcpu, error_code, gfn))
return RET_PF_EMULATE;
r = mmu_topup_memory_caches(vcpu);
if (fast_page_fault(vcpu, gpa, error_code))
return RET_PF_RETRY;
r = mmu_topup_memory_caches(vcpu, false);
if (r)
return r;
if (lpage_disallowed)
max_level = PG_LEVEL_4K;
if (fast_page_fault(vcpu, gpa, error_code))
return RET_PF_RETRY;
mmu_seq = vcpu->kvm->mmu_notifier_seq;
smp_rmb();
@ -4131,7 +4114,8 @@ static int direct_page_fault(struct kvm_vcpu *vcpu, gpa_t gpa, u32 error_code,
spin_lock(&vcpu->kvm->mmu_lock);
if (mmu_notifier_retry(vcpu->kvm, mmu_seq))
goto out_unlock;
if (make_mmu_pages_available(vcpu) < 0)
r = make_mmu_pages_available(vcpu);
if (r)
goto out_unlock;
r = __direct_map(vcpu, gpa, write, map_writable, max_level, pfn,
prefault, is_tdp && lpage_disallowed);
@ -4156,6 +4140,7 @@ int kvm_handle_page_fault(struct kvm_vcpu *vcpu, u64 error_code,
u64 fault_address, char *insn, int insn_len)
{
int r = 1;
u32 flags = vcpu->arch.apf.host_apf_flags;
#ifndef CONFIG_X86_64
/* A 64-bit CR2 should be impossible on 32-bit KVM. */
@ -4164,28 +4149,22 @@ int kvm_handle_page_fault(struct kvm_vcpu *vcpu, u64 error_code,
#endif
vcpu->arch.l1tf_flush_l1d = true;
switch (vcpu->arch.apf.host_apf_flags) {
default:
if (!flags) {
trace_kvm_page_fault(fault_address, error_code);
if (kvm_event_needs_reinjection(vcpu))
kvm_mmu_unprotect_page_virt(vcpu, fault_address);
r = kvm_mmu_page_fault(vcpu, fault_address, error_code, insn,
insn_len);
break;
case KVM_PV_REASON_PAGE_NOT_PRESENT:
} else if (flags & KVM_PV_REASON_PAGE_NOT_PRESENT) {
vcpu->arch.apf.host_apf_flags = 0;
local_irq_disable();
kvm_async_pf_task_wait_schedule(fault_address);
local_irq_enable();
break;
case KVM_PV_REASON_PAGE_READY:
vcpu->arch.apf.host_apf_flags = 0;
local_irq_disable();
kvm_async_pf_task_wake(fault_address);
local_irq_enable();
break;
} else {
WARN_ONCE(1, "Unexpected host async PF flags: %x\n", flags);
}
return r;
}
EXPORT_SYMBOL_GPL(kvm_handle_page_fault);
@ -4227,8 +4206,8 @@ static inline bool is_root_usable(struct kvm_mmu_root_info *root, gpa_t pgd,
union kvm_mmu_page_role role)
{
return (role.direct || pgd == root->pgd) &&
VALID_PAGE(root->hpa) && page_header(root->hpa) &&
role.word == page_header(root->hpa)->role.word;
VALID_PAGE(root->hpa) && to_shadow_page(root->hpa) &&
role.word == to_shadow_page(root->hpa)->role.word;
}
/*
@ -4277,8 +4256,7 @@ static bool fast_pgd_switch(struct kvm_vcpu *vcpu, gpa_t new_pgd,
*/
if (mmu->shadow_root_level >= PT64_ROOT_4LEVEL &&
mmu->root_level >= PT64_ROOT_4LEVEL)
return !mmu_check_root(vcpu, new_pgd >> PAGE_SHIFT) &&
cached_root_available(vcpu, new_pgd, new_role);
return cached_root_available(vcpu, new_pgd, new_role);
return false;
}
@ -4313,7 +4291,7 @@ static void __kvm_mmu_new_pgd(struct kvm_vcpu *vcpu, gpa_t new_pgd,
*/
vcpu_clear_mmio_info(vcpu, MMIO_GVA_ANY);
__clear_sp_write_flooding_count(page_header(vcpu->arch.mmu->root_hpa));
__clear_sp_write_flooding_count(to_shadow_page(vcpu->arch.mmu->root_hpa));
}
void kvm_mmu_new_pgd(struct kvm_vcpu *vcpu, gpa_t new_pgd, bool skip_tlb_flush,
@ -4869,13 +4847,22 @@ static union kvm_mmu_role kvm_calc_mmu_role_common(struct kvm_vcpu *vcpu,
return role;
}
static inline int kvm_mmu_get_tdp_level(struct kvm_vcpu *vcpu)
{
/* Use 5-level TDP if and only if it's useful/necessary. */
if (max_tdp_level == 5 && cpuid_maxphyaddr(vcpu) <= 48)
return 4;
return max_tdp_level;
}
static union kvm_mmu_role
kvm_calc_tdp_mmu_root_page_role(struct kvm_vcpu *vcpu, bool base_only)
{
union kvm_mmu_role role = kvm_calc_mmu_role_common(vcpu, base_only);
role.base.ad_disabled = (shadow_accessed_mask == 0);
role.base.level = vcpu->arch.tdp_level;
role.base.level = kvm_mmu_get_tdp_level(vcpu);
role.base.direct = true;
role.base.gpte_is_8_bytes = true;
@ -4884,7 +4871,7 @@ kvm_calc_tdp_mmu_root_page_role(struct kvm_vcpu *vcpu, bool base_only)
static void init_kvm_tdp_mmu(struct kvm_vcpu *vcpu)
{
struct kvm_mmu *context = vcpu->arch.mmu;
struct kvm_mmu *context = &vcpu->arch.root_mmu;
union kvm_mmu_role new_role =
kvm_calc_tdp_mmu_root_page_role(vcpu, false);
@ -4896,7 +4883,7 @@ static void init_kvm_tdp_mmu(struct kvm_vcpu *vcpu)
context->sync_page = nonpaging_sync_page;
context->invlpg = NULL;
context->update_pte = nonpaging_update_pte;
context->shadow_root_level = vcpu->arch.tdp_level;
context->shadow_root_level = kvm_mmu_get_tdp_level(vcpu);
context->direct_map = true;
context->get_guest_pgd = get_cr3;
context->get_pdptr = kvm_pdptr_read;
@ -4931,7 +4918,7 @@ static void init_kvm_tdp_mmu(struct kvm_vcpu *vcpu)
}
static union kvm_mmu_role
kvm_calc_shadow_mmu_root_page_role(struct kvm_vcpu *vcpu, bool base_only)
kvm_calc_shadow_root_page_role_common(struct kvm_vcpu *vcpu, bool base_only)
{
union kvm_mmu_role role = kvm_calc_mmu_role_common(vcpu, base_only);
@ -4939,9 +4926,19 @@ kvm_calc_shadow_mmu_root_page_role(struct kvm_vcpu *vcpu, bool base_only)
!is_write_protection(vcpu);
role.base.smap_andnot_wp = role.ext.cr4_smap &&
!is_write_protection(vcpu);
role.base.direct = !is_paging(vcpu);
role.base.gpte_is_8_bytes = !!is_pae(vcpu);
return role;
}
static union kvm_mmu_role
kvm_calc_shadow_mmu_root_page_role(struct kvm_vcpu *vcpu, bool base_only)
{
union kvm_mmu_role role =
kvm_calc_shadow_root_page_role_common(vcpu, base_only);
role.base.direct = !is_paging(vcpu);
if (!is_long_mode(vcpu))
role.base.level = PT32E_ROOT_LEVEL;
else if (is_la57_mode(vcpu))
@ -4952,15 +4949,10 @@ kvm_calc_shadow_mmu_root_page_role(struct kvm_vcpu *vcpu, bool base_only)
return role;
}
void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu, u32 cr0, u32 cr4, u32 efer)
static void shadow_mmu_init_context(struct kvm_vcpu *vcpu, struct kvm_mmu *context,
u32 cr0, u32 cr4, u32 efer,
union kvm_mmu_role new_role)
{
struct kvm_mmu *context = vcpu->arch.mmu;
union kvm_mmu_role new_role =
kvm_calc_shadow_mmu_root_page_role(vcpu, false);
if (new_role.as_u64 == context->mmu_role.as_u64)
return;
if (!(cr0 & X86_CR0_PG))
nonpaging_init_context(vcpu, context);
else if (efer & EFER_LMA)
@ -4973,7 +4965,43 @@ void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu, u32 cr0, u32 cr4, u32 efer)
context->mmu_role.as_u64 = new_role.as_u64;
reset_shadow_zero_bits_mask(vcpu, context);
}
EXPORT_SYMBOL_GPL(kvm_init_shadow_mmu);
static void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu, u32 cr0, u32 cr4, u32 efer)
{
struct kvm_mmu *context = &vcpu->arch.root_mmu;
union kvm_mmu_role new_role =
kvm_calc_shadow_mmu_root_page_role(vcpu, false);
if (new_role.as_u64 != context->mmu_role.as_u64)
shadow_mmu_init_context(vcpu, context, cr0, cr4, efer, new_role);
}
static union kvm_mmu_role
kvm_calc_shadow_npt_root_page_role(struct kvm_vcpu *vcpu)
{
union kvm_mmu_role role =
kvm_calc_shadow_root_page_role_common(vcpu, false);
role.base.direct = false;
role.base.level = kvm_mmu_get_tdp_level(vcpu);
return role;
}
void kvm_init_shadow_npt_mmu(struct kvm_vcpu *vcpu, u32 cr0, u32 cr4, u32 efer,
gpa_t nested_cr3)
{
struct kvm_mmu *context = &vcpu->arch.guest_mmu;
union kvm_mmu_role new_role = kvm_calc_shadow_npt_root_page_role(vcpu);
context->shadow_root_level = new_role.base.level;
__kvm_mmu_new_pgd(vcpu, nested_cr3, new_role.base, false, false);
if (new_role.as_u64 != context->mmu_role.as_u64)
shadow_mmu_init_context(vcpu, context, cr0, cr4, efer, new_role);
}
EXPORT_SYMBOL_GPL(kvm_init_shadow_npt_mmu);
static union kvm_mmu_role
kvm_calc_shadow_ept_root_page_role(struct kvm_vcpu *vcpu, bool accessed_dirty,
@ -5007,7 +5035,7 @@ kvm_calc_shadow_ept_root_page_role(struct kvm_vcpu *vcpu, bool accessed_dirty,
void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly,
bool accessed_dirty, gpa_t new_eptp)
{
struct kvm_mmu *context = vcpu->arch.mmu;
struct kvm_mmu *context = &vcpu->arch.guest_mmu;
u8 level = vmx_eptp_page_walk_level(new_eptp);
union kvm_mmu_role new_role =
kvm_calc_shadow_ept_root_page_role(vcpu, accessed_dirty,
@ -5041,7 +5069,7 @@ EXPORT_SYMBOL_GPL(kvm_init_shadow_ept_mmu);
static void init_kvm_softmmu(struct kvm_vcpu *vcpu)
{
struct kvm_mmu *context = vcpu->arch.mmu;
struct kvm_mmu *context = &vcpu->arch.root_mmu;
kvm_init_shadow_mmu(vcpu,
kvm_read_cr0_bits(vcpu, X86_CR0_PG),
@ -5151,7 +5179,7 @@ int kvm_mmu_load(struct kvm_vcpu *vcpu)
{
int r;
r = mmu_topup_memory_caches(vcpu);
r = mmu_topup_memory_caches(vcpu, !vcpu->arch.mmu->direct_map);
if (r)
goto out;
r = mmu_alloc_roots(vcpu);
@ -5345,7 +5373,7 @@ static void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
* or not since pte prefetch is skiped if it does not have
* enough objects in the cache.
*/
mmu_topup_memory_caches(vcpu);
mmu_topup_memory_caches(vcpu, true);
spin_lock(&vcpu->kvm->mmu_lock);
@ -5553,23 +5581,25 @@ void kvm_mmu_invpcid_gva(struct kvm_vcpu *vcpu, gva_t gva, unsigned long pcid)
}
EXPORT_SYMBOL_GPL(kvm_mmu_invpcid_gva);
void kvm_configure_mmu(bool enable_tdp, int tdp_page_level)
void kvm_configure_mmu(bool enable_tdp, int tdp_max_root_level,
int tdp_huge_page_level)
{
tdp_enabled = enable_tdp;
max_tdp_level = tdp_max_root_level;
/*
* max_page_level reflects the capabilities of KVM's MMU irrespective
* max_huge_page_level reflects KVM's MMU capabilities irrespective
* of kernel support, e.g. KVM may be capable of using 1GB pages when
* the kernel is not. But, KVM never creates a page size greater than
* what is used by the kernel for any given HVA, i.e. the kernel's
* capabilities are ultimately consulted by kvm_mmu_hugepage_adjust().
*/
if (tdp_enabled)
max_page_level = tdp_page_level;
max_huge_page_level = tdp_huge_page_level;
else if (boot_cpu_has(X86_FEATURE_GBPAGES))
max_page_level = PG_LEVEL_1G;
max_huge_page_level = PG_LEVEL_1G;
else
max_page_level = PG_LEVEL_2M;
max_huge_page_level = PG_LEVEL_2M;
}
EXPORT_SYMBOL_GPL(kvm_configure_mmu);
@ -5665,7 +5695,7 @@ static int alloc_mmu_pages(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu)
* SVM's 32-bit NPT support, TDP paging doesn't use PAE paging and can
* skip allocating the PDP table.
*/
if (tdp_enabled && vcpu->arch.tdp_level > PT32E_ROOT_LEVEL)
if (tdp_enabled && kvm_mmu_get_tdp_level(vcpu) > PT32E_ROOT_LEVEL)
return 0;
page = alloc_page(GFP_KERNEL_ACCOUNT | __GFP_DMA32);
@ -5684,6 +5714,14 @@ int kvm_mmu_create(struct kvm_vcpu *vcpu)
uint i;
int ret;
vcpu->arch.mmu_pte_list_desc_cache.kmem_cache = pte_list_desc_cache;
vcpu->arch.mmu_pte_list_desc_cache.gfp_zero = __GFP_ZERO;
vcpu->arch.mmu_page_header_cache.kmem_cache = mmu_page_header_cache;
vcpu->arch.mmu_page_header_cache.gfp_zero = __GFP_ZERO;
vcpu->arch.mmu_shadow_page_cache.gfp_zero = __GFP_ZERO;
vcpu->arch.mmu = &vcpu->arch.root_mmu;
vcpu->arch.walk_mmu = &vcpu->arch.root_mmu;
@ -5732,12 +5770,11 @@ static void kvm_zap_obsolete_pages(struct kvm *kvm)
break;
/*
* Skip invalid pages with a non-zero root count, zapping pages
* with a non-zero root count will never succeed, i.e. the page
* will get thrown back on active_mmu_pages and we'll get stuck
* in an infinite loop.
* Invalid pages should never land back on the list of active
* pages. Skip the bogus page, otherwise we'll get stuck in an
* infinite loop if the page gets put back on the list (again).
*/
if (sp->role.invalid && sp->root_count)
if (WARN_ON(sp->role.invalid))
continue;
/*
@ -5904,7 +5941,7 @@ static bool kvm_mmu_zap_collapsible_spte(struct kvm *kvm,
restart:
for_each_rmap_spte(rmap_head, &iter, sptep) {
sp = page_header(__pa(sptep));
sp = sptep_to_sp(sptep);
pfn = spte_to_pfn(*sptep);
/*
@ -6015,7 +6052,7 @@ void kvm_mmu_zap_all(struct kvm *kvm)
spin_lock(&kvm->mmu_lock);
restart:
list_for_each_entry_safe(sp, node, &kvm->arch.active_mmu_pages, link) {
if (sp->role.invalid && sp->root_count)
if (WARN_ON(sp->role.invalid))
continue;
if (__kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list, &ign))
goto restart;
@ -6092,9 +6129,7 @@ mmu_shrink_scan(struct shrinker *shrink, struct shrink_control *sc)
goto unlock;
}
if (prepare_zap_oldest_mmu_page(kvm, &invalid_list))
freed++;
kvm_mmu_commit_zap_page(kvm, &invalid_list);
freed = kvm_mmu_zap_oldest_mmu_pages(kvm, sc->nr_to_scan);
unlock:
spin_unlock(&kvm->mmu_lock);

View File

@ -45,7 +45,7 @@ static void __mmu_spte_walk(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
!is_last_spte(ent[i], level)) {
struct kvm_mmu_page *child;
child = page_header(ent[i] & PT64_BASE_ADDR_MASK);
child = to_shadow_page(ent[i] & PT64_BASE_ADDR_MASK);
__mmu_spte_walk(vcpu, child, fn, level - 1);
}
}
@ -62,7 +62,7 @@ static void mmu_spte_walk(struct kvm_vcpu *vcpu, inspect_spte_fn fn)
if (vcpu->arch.mmu->root_level >= PT64_ROOT_4LEVEL) {
hpa_t root = vcpu->arch.mmu->root_hpa;
sp = page_header(root);
sp = to_shadow_page(root);
__mmu_spte_walk(vcpu, sp, fn, vcpu->arch.mmu->root_level);
return;
}
@ -72,7 +72,7 @@ static void mmu_spte_walk(struct kvm_vcpu *vcpu, inspect_spte_fn fn)
if (root && VALID_PAGE(root)) {
root &= PT64_BASE_ADDR_MASK;
sp = page_header(root);
sp = to_shadow_page(root);
__mmu_spte_walk(vcpu, sp, fn, 2);
}
}
@ -97,7 +97,7 @@ static void audit_mappings(struct kvm_vcpu *vcpu, u64 *sptep, int level)
kvm_pfn_t pfn;
hpa_t hpa;
sp = page_header(__pa(sptep));
sp = sptep_to_sp(sptep);
if (sp->unsync) {
if (level != PG_LEVEL_4K) {
@ -132,7 +132,7 @@ static void inspect_spte_has_rmap(struct kvm *kvm, u64 *sptep)
struct kvm_memory_slot *slot;
gfn_t gfn;
rev_sp = page_header(__pa(sptep));
rev_sp = sptep_to_sp(sptep);
gfn = kvm_mmu_page_get_gfn(rev_sp, sptep - rev_sp->spt);
slots = kvm_memslots_for_spte_role(kvm, rev_sp->role);
@ -165,7 +165,7 @@ static void audit_sptes_have_rmaps(struct kvm_vcpu *vcpu, u64 *sptep, int level)
static void audit_spte_after_sync(struct kvm_vcpu *vcpu, u64 *sptep, int level)
{
struct kvm_mmu_page *sp = page_header(__pa(sptep));
struct kvm_mmu_page *sp = sptep_to_sp(sptep);
if (vcpu->kvm->arch.audit_point == AUDIT_POST_SYNC && sp->unsync)
audit_printk(vcpu->kvm, "meet unsync sp(%p) after sync "

View File

@ -0,0 +1,63 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef __KVM_X86_MMU_INTERNAL_H
#define __KVM_X86_MMU_INTERNAL_H
#include <linux/types.h>
#include <asm/kvm_host.h>
struct kvm_mmu_page {
struct list_head link;
struct hlist_node hash_link;
struct list_head lpage_disallowed_link;
bool unsync;
u8 mmu_valid_gen;
bool mmio_cached;
bool lpage_disallowed; /* Can't be replaced by an equiv large page */
/*
* The following two entries are used to key the shadow page in the
* hash table.
*/
union kvm_mmu_page_role role;
gfn_t gfn;
u64 *spt;
/* hold the gfn of each spte inside spt */
gfn_t *gfns;
int root_count; /* Currently serving as active root */
unsigned int unsync_children;
struct kvm_rmap_head parent_ptes; /* rmap pointers to parent sptes */
DECLARE_BITMAP(unsync_child_bitmap, 512);
#ifdef CONFIG_X86_32
/*
* Used out of the mmu-lock to avoid reading spte values while an
* update is in progress; see the comments in __get_spte_lockless().
*/
int clear_spte_count;
#endif
/* Number of writes since the last time traversal visited this page. */
atomic_t write_flooding_count;
};
static inline struct kvm_mmu_page *to_shadow_page(hpa_t shadow_page)
{
struct page *page = pfn_to_page(shadow_page >> PAGE_SHIFT);
return (struct kvm_mmu_page *)page_private(page);
}
static inline struct kvm_mmu_page *sptep_to_sp(u64 *sptep)
{
return to_shadow_page(__pa(sptep));
}
void kvm_mmu_gfn_disallow_lpage(struct kvm_memory_slot *slot, gfn_t gfn);
void kvm_mmu_gfn_allow_lpage(struct kvm_memory_slot *slot, gfn_t gfn);
bool kvm_mmu_slot_gfn_write_protect(struct kvm *kvm,
struct kvm_memory_slot *slot, u64 gfn);
#endif /* __KVM_X86_MMU_INTERNAL_H */

View File

@ -387,7 +387,7 @@ TRACE_EVENT(
#endif /* _TRACE_KVMMMU_H */
#undef TRACE_INCLUDE_PATH
#define TRACE_INCLUDE_PATH .
#define TRACE_INCLUDE_PATH mmu
#undef TRACE_INCLUDE_FILE
#define TRACE_INCLUDE_FILE mmutrace

View File

@ -16,7 +16,7 @@
#include <asm/kvm_page_track.h>
#include "mmu.h"
#include "mmu_internal.h"
void kvm_page_track_free_memslot(struct kvm_memory_slot *slot)
{

View File

@ -260,7 +260,7 @@ static int FNAME(update_accessed_dirty_bits)(struct kvm_vcpu *vcpu,
!(pte & PT_GUEST_DIRTY_MASK)) {
trace_kvm_mmu_set_dirty_bit(table_gfn, index, sizeof(pte));
#if PTTYPE == PTTYPE_EPT
if (kvm_arch_write_log_dirty(vcpu, addr))
if (kvm_x86_ops.nested_ops->write_log_dirty(vcpu, addr))
return -EINVAL;
#endif
pte |= PT_GUEST_DIRTY_MASK;
@ -596,7 +596,7 @@ static void FNAME(pte_prefetch)(struct kvm_vcpu *vcpu, struct guest_walker *gw,
u64 *spte;
int i;
sp = page_header(__pa(sptep));
sp = sptep_to_sp(sptep);
if (sp->role.level > PG_LEVEL_4K)
return;
@ -789,10 +789,6 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gpa_t addr, u32 error_code,
pgprintk("%s: addr %lx err %x\n", __func__, addr, error_code);
r = mmu_topup_memory_caches(vcpu);
if (r)
return r;
/*
* If PFEC.RSVD is set, this is a shadow page fault.
* The bit needs to be cleared before walking guest page tables.
@ -820,6 +816,10 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gpa_t addr, u32 error_code,
return RET_PF_EMULATE;
}
r = mmu_topup_memory_caches(vcpu, true);
if (r)
return r;
vcpu->arch.write_fault_to_shadow_pgtable = false;
is_self_change_mapping = FNAME(is_self_change_mapping)(vcpu,
@ -866,7 +866,8 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gpa_t addr, u32 error_code,
goto out_unlock;
kvm_mmu_audit(vcpu, AUDIT_PRE_PAGE_FAULT);
if (make_mmu_pages_available(vcpu) < 0)
r = make_mmu_pages_available(vcpu);
if (r)
goto out_unlock;
r = FNAME(fetch)(vcpu, addr, &walker, write_fault, max_level, pfn,
map_writable, prefault, lpage_disallowed);
@ -903,7 +904,7 @@ static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva, hpa_t root_hpa)
* No need to check return value here, rmap_can_add() can
* help us to skip pte prefetch later.
*/
mmu_topup_memory_caches(vcpu);
mmu_topup_memory_caches(vcpu, true);
if (!VALID_PAGE(root_hpa)) {
WARN_ON(1);
@ -915,7 +916,7 @@ static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva, hpa_t root_hpa)
level = iterator.level;
sptep = iterator.sptep;
sp = page_header(__pa(sptep));
sp = sptep_to_sp(sptep);
if (is_last_spte(*sptep, level)) {
pt_element_t gpte;
gpa_t pte_gpa;

View File

@ -372,6 +372,11 @@ int kvm_pmu_rdpmc(struct kvm_vcpu *vcpu, unsigned idx, u64 *data)
if (!pmc)
return 1;
if (!(kvm_read_cr4(vcpu) & X86_CR4_PCE) &&
(kvm_x86_ops.get_cpl(vcpu) != 0) &&
(kvm_read_cr0(vcpu) & X86_CR0_PE))
return 1;
*data = pmc_read_counter(pmc) & mask;
return 0;
}

View File

@ -15,6 +15,8 @@
#define VMWARE_BACKDOOR_PMC_REAL_TIME 0x10001
#define VMWARE_BACKDOOR_PMC_APPARENT_TIME 0x10002
#define MAX_FIXED_COUNTERS 3
struct kvm_event_hw_type_mapping {
u8 eventsel;
u8 unit_mask;

View File

@ -665,7 +665,7 @@ void svm_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu)
} else {
vmcb->control.int_ctl &= ~AVIC_ENABLE_MASK;
}
mark_dirty(vmcb, VMCB_AVIC);
vmcb_mark_dirty(vmcb, VMCB_AVIC);
svm_set_pi_irte_mode(vcpu, activated);
}

View File

@ -48,13 +48,6 @@ static void nested_svm_inject_npf_exit(struct kvm_vcpu *vcpu,
svm->vmcb->control.exit_info_1 &= ~0xffffffffULL;
svm->vmcb->control.exit_info_1 |= fault->error_code;
/*
* The present bit is always zero for page structure faults on real
* hardware.
*/
if (svm->vmcb->control.exit_info_1 & (2ULL << 32))
svm->vmcb->control.exit_info_1 &= ~1;
nested_svm_vmexit(svm);
}
@ -87,11 +80,11 @@ static void nested_svm_init_mmu_context(struct kvm_vcpu *vcpu)
WARN_ON(mmu_is_nested(vcpu));
vcpu->arch.mmu = &vcpu->arch.guest_mmu;
kvm_init_shadow_mmu(vcpu, X86_CR0_PG, hsave->save.cr4, hsave->save.efer);
kvm_init_shadow_npt_mmu(vcpu, X86_CR0_PG, hsave->save.cr4, hsave->save.efer,
svm->nested.ctl.nested_cr3);
vcpu->arch.mmu->get_guest_pgd = nested_svm_get_tdp_cr3;
vcpu->arch.mmu->get_pdptr = nested_svm_get_tdp_pdptr;
vcpu->arch.mmu->inject_page_fault = nested_svm_inject_npf_exit;
vcpu->arch.mmu->shadow_root_level = vcpu->arch.tdp_level;
reset_shadow_zero_bits_mask(vcpu, vcpu->arch.mmu);
vcpu->arch.walk_mmu = &vcpu->arch.nested_mmu;
}
@ -106,7 +99,7 @@ void recalc_intercepts(struct vcpu_svm *svm)
{
struct vmcb_control_area *c, *h, *g;
mark_dirty(svm->vmcb, VMCB_INTERCEPTS);
vmcb_mark_dirty(svm->vmcb, VMCB_INTERCEPTS);
if (!is_guest_mode(&svm->vcpu))
return;
@ -222,8 +215,9 @@ static bool nested_vmcb_check_controls(struct vmcb_control_area *control)
return true;
}
static bool nested_vmcb_checks(struct vmcb *vmcb)
static bool nested_vmcb_checks(struct vcpu_svm *svm, struct vmcb *vmcb)
{
bool nested_vmcb_lma;
if ((vmcb->save.efer & EFER_SVME) == 0)
return false;
@ -231,6 +225,30 @@ static bool nested_vmcb_checks(struct vmcb *vmcb)
(vmcb->save.cr0 & X86_CR0_NW))
return false;
if (!kvm_dr6_valid(vmcb->save.dr6) || !kvm_dr7_valid(vmcb->save.dr7))
return false;
nested_vmcb_lma =
(vmcb->save.efer & EFER_LME) &&
(vmcb->save.cr0 & X86_CR0_PG);
if (!nested_vmcb_lma) {
if (vmcb->save.cr4 & X86_CR4_PAE) {
if (vmcb->save.cr3 & MSR_CR3_LEGACY_PAE_RESERVED_MASK)
return false;
} else {
if (vmcb->save.cr3 & MSR_CR3_LEGACY_RESERVED_MASK)
return false;
}
} else {
if (!(vmcb->save.cr4 & X86_CR4_PAE) ||
!(vmcb->save.cr0 & X86_CR0_PE) ||
(vmcb->save.cr3 & MSR_CR3_LONG_RESERVED_MASK))
return false;
}
if (kvm_valid_cr4(&svm->vcpu, vmcb->save.cr4))
return false;
return nested_vmcb_check_controls(&vmcb->control);
}
@ -258,7 +276,7 @@ void sync_nested_vmcb_control(struct vcpu_svm *svm)
/* Only a few fields of int_ctl are written by the processor. */
mask = V_IRQ_MASK | V_TPR_MASK;
if (!(svm->nested.ctl.int_ctl & V_INTR_MASKING_MASK) &&
is_intercept(svm, INTERCEPT_VINTR)) {
svm_is_intercept(svm, INTERCEPT_VINTR)) {
/*
* In order to request an interrupt window, L0 is usurping
* svm->vmcb->control.int_ctl and possibly setting V_IRQ
@ -310,6 +328,42 @@ static void nested_vmcb_save_pending_event(struct vcpu_svm *svm,
nested_vmcb->control.exit_int_info = exit_int_info;
}
static inline bool nested_npt_enabled(struct vcpu_svm *svm)
{
return svm->nested.ctl.nested_ctl & SVM_NESTED_CTL_NP_ENABLE;
}
/*
* Load guest's/host's cr3 on nested vmentry or vmexit. @nested_npt is true
* if we are emulating VM-Entry into a guest with NPT enabled.
*/
static int nested_svm_load_cr3(struct kvm_vcpu *vcpu, unsigned long cr3,
bool nested_npt)
{
if (cr3 & rsvd_bits(cpuid_maxphyaddr(vcpu), 63))
return -EINVAL;
if (!nested_npt && is_pae_paging(vcpu) &&
(cr3 != kvm_read_cr3(vcpu) || pdptrs_changed(vcpu))) {
if (!load_pdptrs(vcpu, vcpu->arch.walk_mmu, cr3))
return -EINVAL;
}
/*
* TODO: optimize unconditional TLB flush/MMU sync here and in
* kvm_init_shadow_npt_mmu().
*/
if (!nested_npt)
kvm_mmu_new_pgd(vcpu, cr3, false, false);
vcpu->arch.cr3 = cr3;
kvm_register_mark_available(vcpu, VCPU_EXREG_CR3);
kvm_init_mmu(vcpu, false);
return 0;
}
static void nested_prepare_vmcb_save(struct vcpu_svm *svm, struct vmcb *nested_vmcb)
{
/* Load the nested guest state */
@ -323,8 +377,6 @@ static void nested_prepare_vmcb_save(struct vcpu_svm *svm, struct vmcb *nested_v
svm_set_efer(&svm->vcpu, nested_vmcb->save.efer);
svm_set_cr0(&svm->vcpu, nested_vmcb->save.cr0);
svm_set_cr4(&svm->vcpu, nested_vmcb->save.cr4);
(void)kvm_set_cr3(&svm->vcpu, nested_vmcb->save.cr3);
svm->vmcb->save.cr2 = svm->vcpu.arch.cr2 = nested_vmcb->save.cr2;
kvm_rax_write(&svm->vcpu, nested_vmcb->save.rax);
kvm_rsp_write(&svm->vcpu, nested_vmcb->save.rsp);
@ -342,14 +394,10 @@ static void nested_prepare_vmcb_save(struct vcpu_svm *svm, struct vmcb *nested_v
static void nested_prepare_vmcb_control(struct vcpu_svm *svm)
{
const u32 mask = V_INTR_MASKING_MASK | V_GIF_ENABLE_MASK | V_GIF_MASK;
if (svm->nested.ctl.nested_ctl & SVM_NESTED_CTL_NP_ENABLE)
if (nested_npt_enabled(svm))
nested_svm_init_mmu_context(&svm->vcpu);
/* Guest paging mode is active - reset mmu */
kvm_mmu_reset_context(&svm->vcpu);
svm_flush_tlb(&svm->vcpu);
svm->vmcb->control.tsc_offset = svm->vcpu.arch.tsc_offset =
svm->vcpu.arch.l1_tsc_offset + svm->nested.ctl.tsc_offset;
@ -375,18 +423,27 @@ static void nested_prepare_vmcb_control(struct vcpu_svm *svm)
*/
recalc_intercepts(svm);
mark_all_dirty(svm->vmcb);
vmcb_mark_all_dirty(svm->vmcb);
}
void enter_svm_guest_mode(struct vcpu_svm *svm, u64 vmcb_gpa,
int enter_svm_guest_mode(struct vcpu_svm *svm, u64 vmcb_gpa,
struct vmcb *nested_vmcb)
{
int ret;
svm->nested.vmcb = vmcb_gpa;
load_nested_vmcb_control(svm, &nested_vmcb->control);
nested_prepare_vmcb_save(svm, nested_vmcb);
nested_prepare_vmcb_control(svm);
ret = nested_svm_load_cr3(&svm->vcpu, nested_vmcb->save.cr3,
nested_npt_enabled(svm));
if (ret)
return ret;
svm_set_gif(svm, true);
return 0;
}
int nested_svm_vmrun(struct vcpu_svm *svm)
@ -416,7 +473,7 @@ int nested_svm_vmrun(struct vcpu_svm *svm)
nested_vmcb = map.hva;
if (!nested_vmcb_checks(nested_vmcb)) {
if (!nested_vmcb_checks(svm, nested_vmcb)) {
nested_vmcb->control.exit_code = SVM_EXIT_ERR;
nested_vmcb->control.exit_code_hi = 0;
nested_vmcb->control.exit_info_1 = 0;
@ -464,16 +521,22 @@ int nested_svm_vmrun(struct vcpu_svm *svm)
copy_vmcb_control_area(&hsave->control, &vmcb->control);
svm->nested.nested_run_pending = 1;
enter_svm_guest_mode(svm, vmcb_gpa, nested_vmcb);
if (!nested_svm_vmrun_msrpm(svm)) {
svm->vmcb->control.exit_code = SVM_EXIT_ERR;
svm->vmcb->control.exit_code_hi = 0;
svm->vmcb->control.exit_info_1 = 0;
svm->vmcb->control.exit_info_2 = 0;
if (enter_svm_guest_mode(svm, vmcb_gpa, nested_vmcb))
goto out_exit_err;
nested_svm_vmexit(svm);
}
if (nested_svm_vmrun_msrpm(svm))
goto out;
out_exit_err:
svm->nested.nested_run_pending = 0;
svm->vmcb->control.exit_code = SVM_EXIT_ERR;
svm->vmcb->control.exit_code_hi = 0;
svm->vmcb->control.exit_info_1 = 0;
svm->vmcb->control.exit_info_2 = 0;
nested_svm_vmexit(svm);
out:
kvm_vcpu_unmap(&svm->vcpu, &map, true);
@ -585,12 +648,6 @@ int nested_svm_vmexit(struct vcpu_svm *svm)
svm_set_efer(&svm->vcpu, hsave->save.efer);
svm_set_cr0(&svm->vcpu, hsave->save.cr0 | X86_CR0_PE);
svm_set_cr4(&svm->vcpu, hsave->save.cr4);
if (npt_enabled) {
svm->vmcb->save.cr3 = hsave->save.cr3;
svm->vcpu.arch.cr3 = hsave->save.cr3;
} else {
(void)kvm_set_cr3(&svm->vcpu, hsave->save.cr3);
}
kvm_rax_write(&svm->vcpu, hsave->save.rax);
kvm_rsp_write(&svm->vcpu, hsave->save.rsp);
kvm_rip_write(&svm->vcpu, hsave->save.rip);
@ -598,7 +655,7 @@ int nested_svm_vmexit(struct vcpu_svm *svm)
svm->vmcb->save.cpl = 0;
svm->vmcb->control.exit_int_info = 0;
mark_all_dirty(svm->vmcb);
vmcb_mark_all_dirty(svm->vmcb);
trace_kvm_nested_vmexit_inject(nested_vmcb->control.exit_code,
nested_vmcb->control.exit_info_1,
@ -610,8 +667,13 @@ int nested_svm_vmexit(struct vcpu_svm *svm)
kvm_vcpu_unmap(&svm->vcpu, &map, true);
nested_svm_uninit_mmu_context(&svm->vcpu);
kvm_mmu_reset_context(&svm->vcpu);
kvm_mmu_load(&svm->vcpu);
rc = nested_svm_load_cr3(&svm->vcpu, hsave->save.cr3, false);
if (rc)
return 1;
if (npt_enabled)
svm->vmcb->save.cr3 = hsave->save.cr3;
/*
* Drop what we picked up for L2 via svm_complete_interrupts() so it

View File

@ -313,13 +313,15 @@ static struct page **sev_pin_memory(struct kvm *kvm, unsigned long uaddr,
int write)
{
struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
unsigned long npages, npinned, size;
unsigned long npages, size;
int npinned;
unsigned long locked, lock_limit;
struct page **pages;
unsigned long first, last;
int ret;
if (ulen == 0 || uaddr + ulen < uaddr)
return NULL;
return ERR_PTR(-EINVAL);
/* Calculate number of pages. */
first = (uaddr & PAGE_MASK) >> PAGE_SHIFT;
@ -330,9 +332,12 @@ static struct page **sev_pin_memory(struct kvm *kvm, unsigned long uaddr,
lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
if (locked > lock_limit && !capable(CAP_IPC_LOCK)) {
pr_err("SEV: %lu locked pages exceed the lock limit of %lu.\n", locked, lock_limit);
return NULL;
return ERR_PTR(-ENOMEM);
}
if (WARN_ON_ONCE(npages > INT_MAX))
return ERR_PTR(-EINVAL);
/* Avoid using vmalloc for smaller buffers. */
size = npages * sizeof(struct page *);
if (size > PAGE_SIZE)
@ -341,12 +346,13 @@ static struct page **sev_pin_memory(struct kvm *kvm, unsigned long uaddr,
pages = kmalloc(size, GFP_KERNEL_ACCOUNT);
if (!pages)
return NULL;
return ERR_PTR(-ENOMEM);
/* Pin the user virtual address. */
npinned = get_user_pages_fast(uaddr, npages, write ? FOLL_WRITE : 0, pages);
npinned = pin_user_pages_fast(uaddr, npages, write ? FOLL_WRITE : 0, pages);
if (npinned != npages) {
pr_err("SEV: Failure locking %lu pages.\n", npages);
ret = -ENOMEM;
goto err;
}
@ -357,10 +363,10 @@ static struct page **sev_pin_memory(struct kvm *kvm, unsigned long uaddr,
err:
if (npinned > 0)
release_pages(pages, npinned);
unpin_user_pages(pages, npinned);
kvfree(pages);
return NULL;
return ERR_PTR(ret);
}
static void sev_unpin_memory(struct kvm *kvm, struct page **pages,
@ -368,7 +374,7 @@ static void sev_unpin_memory(struct kvm *kvm, struct page **pages,
{
struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
release_pages(pages, npages);
unpin_user_pages(pages, npages);
kvfree(pages);
sev->pages_locked -= npages;
}
@ -434,8 +440,8 @@ static int sev_launch_update_data(struct kvm *kvm, struct kvm_sev_cmd *argp)
/* Lock the user memory. */
inpages = sev_pin_memory(kvm, vaddr, size, &npages, 1);
if (!inpages) {
ret = -ENOMEM;
if (IS_ERR(inpages)) {
ret = PTR_ERR(inpages);
goto e_free;
}
@ -789,13 +795,13 @@ static int sev_dbg_crypt(struct kvm *kvm, struct kvm_sev_cmd *argp, bool dec)
/* lock userspace source and destination page */
src_p = sev_pin_memory(kvm, vaddr & PAGE_MASK, PAGE_SIZE, &n, 0);
if (!src_p)
return -EFAULT;
if (IS_ERR(src_p))
return PTR_ERR(src_p);
dst_p = sev_pin_memory(kvm, dst_vaddr & PAGE_MASK, PAGE_SIZE, &n, 1);
if (!dst_p) {
if (IS_ERR(dst_p)) {
sev_unpin_memory(kvm, src_p, n);
return -EFAULT;
return PTR_ERR(dst_p);
}
/*
@ -860,8 +866,8 @@ static int sev_launch_secret(struct kvm *kvm, struct kvm_sev_cmd *argp)
return -EFAULT;
pages = sev_pin_memory(kvm, params.guest_uaddr, params.guest_len, &n, 1);
if (!pages)
return -ENOMEM;
if (IS_ERR(pages))
return PTR_ERR(pages);
/*
* The secret must be copied into contiguous memory region, lets verify
@ -987,8 +993,8 @@ int svm_register_enc_region(struct kvm *kvm,
return -ENOMEM;
region->pages = sev_pin_memory(kvm, range->addr, range->size, &region->npages, 1);
if (!region->pages) {
ret = -ENOMEM;
if (IS_ERR(region->pages)) {
ret = PTR_ERR(region->pages);
goto e_free;
}
@ -1180,11 +1186,10 @@ void pre_sev_run(struct vcpu_svm *svm, int cpu)
* 2) or this VMCB was executed on different host CPU in previous VMRUNs.
*/
if (sd->sev_vmcbs[asid] == svm->vmcb &&
svm->last_cpu == cpu)
svm->vcpu.arch.last_vmentry_cpu == cpu)
return;
svm->last_cpu = cpu;
sd->sev_vmcbs[asid] = svm->vmcb;
svm->vmcb->control.tlb_ctl = TLB_CONTROL_FLUSH_ASID;
mark_dirty(svm->vmcb, VMCB_ASID);
vmcb_mark_dirty(svm->vmcb, VMCB_ASID);
}

View File

@ -254,7 +254,7 @@ static inline void invlpga(unsigned long addr, u32 asid)
asm volatile (__ex("invlpga %1, %0") : : "c"(asid), "a"(addr));
}
static int get_npt_level(struct kvm_vcpu *vcpu)
static int get_max_npt_level(void)
{
#ifdef CONFIG_X86_64
return PT64_ROOT_4LEVEL;
@ -282,7 +282,7 @@ void svm_set_efer(struct kvm_vcpu *vcpu, u64 efer)
}
svm->vmcb->save.efer = efer | EFER_SVME;
mark_dirty(svm->vmcb, VMCB_CR);
vmcb_mark_dirty(svm->vmcb, VMCB_CR);
}
static int is_external_interrupt(u32 info)
@ -713,7 +713,7 @@ static void grow_ple_window(struct kvm_vcpu *vcpu)
pause_filter_count_max);
if (control->pause_filter_count != old) {
mark_dirty(svm->vmcb, VMCB_INTERCEPTS);
vmcb_mark_dirty(svm->vmcb, VMCB_INTERCEPTS);
trace_kvm_ple_window_update(vcpu->vcpu_id,
control->pause_filter_count, old);
}
@ -731,7 +731,7 @@ static void shrink_ple_window(struct kvm_vcpu *vcpu)
pause_filter_count_shrink,
pause_filter_count);
if (control->pause_filter_count != old) {
mark_dirty(svm->vmcb, VMCB_INTERCEPTS);
vmcb_mark_dirty(svm->vmcb, VMCB_INTERCEPTS);
trace_kvm_ple_window_update(vcpu->vcpu_id,
control->pause_filter_count, old);
}
@ -885,7 +885,7 @@ static __init int svm_hardware_setup(void)
if (npt_enabled && !npt)
npt_enabled = false;
kvm_configure_mmu(npt_enabled, PG_LEVEL_1G);
kvm_configure_mmu(npt_enabled, get_max_npt_level(), PG_LEVEL_1G);
pr_info("kvm: Nested Paging %sabled\n", npt_enabled ? "en" : "dis");
if (nrips) {
@ -924,6 +924,21 @@ static __init int svm_hardware_setup(void)
svm_set_cpu_caps();
/*
* It seems that on AMD processors PTE's accessed bit is
* being set by the CPU hardware before the NPF vmexit.
* This is not expected behaviour and our tests fail because
* of it.
* A workaround here is to disable support for
* GUEST_MAXPHYADDR < HOST_MAXPHYADDR if NPT is enabled.
* In this case userspace can know if there is support using
* KVM_CAP_SMALLER_MAXPHYADDR extension and decide how to handle
* it
* If future AMD CPU models change the behaviour described above,
* this variable can be changed accordingly
*/
allow_smaller_maxphyaddr = !npt_enabled;
return 0;
err:
@ -966,7 +981,7 @@ static u64 svm_write_l1_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
svm->vmcb->control.tsc_offset = offset + g_tsc_offset;
mark_dirty(svm->vmcb, VMCB_INTERCEPTS);
vmcb_mark_dirty(svm->vmcb, VMCB_INTERCEPTS);
return svm->vmcb->control.tsc_offset;
}
@ -1002,38 +1017,38 @@ static void init_vmcb(struct vcpu_svm *svm)
if (enable_vmware_backdoor)
set_exception_intercept(svm, GP_VECTOR);
set_intercept(svm, INTERCEPT_INTR);
set_intercept(svm, INTERCEPT_NMI);
set_intercept(svm, INTERCEPT_SMI);
set_intercept(svm, INTERCEPT_SELECTIVE_CR0);
set_intercept(svm, INTERCEPT_RDPMC);
set_intercept(svm, INTERCEPT_CPUID);
set_intercept(svm, INTERCEPT_INVD);
set_intercept(svm, INTERCEPT_INVLPG);
set_intercept(svm, INTERCEPT_INVLPGA);
set_intercept(svm, INTERCEPT_IOIO_PROT);
set_intercept(svm, INTERCEPT_MSR_PROT);
set_intercept(svm, INTERCEPT_TASK_SWITCH);
set_intercept(svm, INTERCEPT_SHUTDOWN);
set_intercept(svm, INTERCEPT_VMRUN);
set_intercept(svm, INTERCEPT_VMMCALL);
set_intercept(svm, INTERCEPT_VMLOAD);
set_intercept(svm, INTERCEPT_VMSAVE);
set_intercept(svm, INTERCEPT_STGI);
set_intercept(svm, INTERCEPT_CLGI);
set_intercept(svm, INTERCEPT_SKINIT);
set_intercept(svm, INTERCEPT_WBINVD);
set_intercept(svm, INTERCEPT_XSETBV);
set_intercept(svm, INTERCEPT_RDPRU);
set_intercept(svm, INTERCEPT_RSM);
svm_set_intercept(svm, INTERCEPT_INTR);
svm_set_intercept(svm, INTERCEPT_NMI);
svm_set_intercept(svm, INTERCEPT_SMI);
svm_set_intercept(svm, INTERCEPT_SELECTIVE_CR0);
svm_set_intercept(svm, INTERCEPT_RDPMC);
svm_set_intercept(svm, INTERCEPT_CPUID);
svm_set_intercept(svm, INTERCEPT_INVD);
svm_set_intercept(svm, INTERCEPT_INVLPG);
svm_set_intercept(svm, INTERCEPT_INVLPGA);
svm_set_intercept(svm, INTERCEPT_IOIO_PROT);
svm_set_intercept(svm, INTERCEPT_MSR_PROT);
svm_set_intercept(svm, INTERCEPT_TASK_SWITCH);
svm_set_intercept(svm, INTERCEPT_SHUTDOWN);
svm_set_intercept(svm, INTERCEPT_VMRUN);
svm_set_intercept(svm, INTERCEPT_VMMCALL);
svm_set_intercept(svm, INTERCEPT_VMLOAD);
svm_set_intercept(svm, INTERCEPT_VMSAVE);
svm_set_intercept(svm, INTERCEPT_STGI);
svm_set_intercept(svm, INTERCEPT_CLGI);
svm_set_intercept(svm, INTERCEPT_SKINIT);
svm_set_intercept(svm, INTERCEPT_WBINVD);
svm_set_intercept(svm, INTERCEPT_XSETBV);
svm_set_intercept(svm, INTERCEPT_RDPRU);
svm_set_intercept(svm, INTERCEPT_RSM);
if (!kvm_mwait_in_guest(svm->vcpu.kvm)) {
set_intercept(svm, INTERCEPT_MONITOR);
set_intercept(svm, INTERCEPT_MWAIT);
svm_set_intercept(svm, INTERCEPT_MONITOR);
svm_set_intercept(svm, INTERCEPT_MWAIT);
}
if (!kvm_hlt_in_guest(svm->vcpu.kvm))
set_intercept(svm, INTERCEPT_HLT);
svm_set_intercept(svm, INTERCEPT_HLT);
control->iopm_base_pa = __sme_set(iopm_base);
control->msrpm_base_pa = __sme_set(__pa(svm->msrpm));
@ -1077,7 +1092,7 @@ static void init_vmcb(struct vcpu_svm *svm)
if (npt_enabled) {
/* Setup VMCB for Nested Paging */
control->nested_ctl |= SVM_NESTED_CTL_NP_ENABLE;
clr_intercept(svm, INTERCEPT_INVLPG);
svm_clr_intercept(svm, INTERCEPT_INVLPG);
clr_exception_intercept(svm, PF_VECTOR);
clr_cr_intercept(svm, INTERCEPT_CR3_READ);
clr_cr_intercept(svm, INTERCEPT_CR3_WRITE);
@ -1094,9 +1109,9 @@ static void init_vmcb(struct vcpu_svm *svm)
control->pause_filter_count = pause_filter_count;
if (pause_filter_thresh)
control->pause_filter_thresh = pause_filter_thresh;
set_intercept(svm, INTERCEPT_PAUSE);
svm_set_intercept(svm, INTERCEPT_PAUSE);
} else {
clr_intercept(svm, INTERCEPT_PAUSE);
svm_clr_intercept(svm, INTERCEPT_PAUSE);
}
if (kvm_vcpu_apicv_active(&svm->vcpu))
@ -1107,14 +1122,14 @@ static void init_vmcb(struct vcpu_svm *svm)
* in VMCB and clear intercepts to avoid #VMEXIT.
*/
if (vls) {
clr_intercept(svm, INTERCEPT_VMLOAD);
clr_intercept(svm, INTERCEPT_VMSAVE);
svm_clr_intercept(svm, INTERCEPT_VMLOAD);
svm_clr_intercept(svm, INTERCEPT_VMSAVE);
svm->vmcb->control.virt_ext |= VIRTUAL_VMLOAD_VMSAVE_ENABLE_MASK;
}
if (vgif) {
clr_intercept(svm, INTERCEPT_STGI);
clr_intercept(svm, INTERCEPT_CLGI);
svm_clr_intercept(svm, INTERCEPT_STGI);
svm_clr_intercept(svm, INTERCEPT_CLGI);
svm->vmcb->control.int_ctl |= V_GIF_ENABLE_MASK;
}
@ -1123,7 +1138,7 @@ static void init_vmcb(struct vcpu_svm *svm)
clr_exception_intercept(svm, UD_VECTOR);
}
mark_all_dirty(svm->vmcb);
vmcb_mark_all_dirty(svm->vmcb);
enable_gif(svm);
@ -1257,7 +1272,7 @@ static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
if (unlikely(cpu != vcpu->cpu)) {
svm->asid_generation = 0;
mark_all_dirty(svm->vmcb);
vmcb_mark_all_dirty(svm->vmcb);
}
#ifdef CONFIG_X86_64
@ -1356,7 +1371,7 @@ static void svm_set_vintr(struct vcpu_svm *svm)
/* The following fields are ignored when AVIC is enabled */
WARN_ON(kvm_vcpu_apicv_active(&svm->vcpu));
set_intercept(svm, INTERCEPT_VINTR);
svm_set_intercept(svm, INTERCEPT_VINTR);
/*
* This is just a dummy VINTR to actually cause a vmexit to happen.
@ -1367,13 +1382,13 @@ static void svm_set_vintr(struct vcpu_svm *svm)
control->int_ctl &= ~V_INTR_PRIO_MASK;
control->int_ctl |= V_IRQ_MASK |
((/*control->int_vector >> 4*/ 0xf) << V_INTR_PRIO_SHIFT);
mark_dirty(svm->vmcb, VMCB_INTR);
vmcb_mark_dirty(svm->vmcb, VMCB_INTR);
}
static void svm_clear_vintr(struct vcpu_svm *svm)
{
const u32 mask = V_TPR_MASK | V_GIF_ENABLE_MASK | V_GIF_MASK | V_INTR_MASKING_MASK;
clr_intercept(svm, INTERCEPT_VINTR);
svm_clr_intercept(svm, INTERCEPT_VINTR);
/* Drop int_ctl fields related to VINTR injection. */
svm->vmcb->control.int_ctl &= mask;
@ -1385,7 +1400,7 @@ static void svm_clear_vintr(struct vcpu_svm *svm)
svm->vmcb->control.int_ctl |= svm->nested.ctl.int_ctl & ~mask;
}
mark_dirty(svm->vmcb, VMCB_INTR);
vmcb_mark_dirty(svm->vmcb, VMCB_INTR);
}
static struct vmcb_seg *svm_seg(struct kvm_vcpu *vcpu, int seg)
@ -1503,7 +1518,7 @@ static void svm_set_idt(struct kvm_vcpu *vcpu, struct desc_ptr *dt)
svm->vmcb->save.idtr.limit = dt->size;
svm->vmcb->save.idtr.base = dt->address ;
mark_dirty(svm->vmcb, VMCB_DT);
vmcb_mark_dirty(svm->vmcb, VMCB_DT);
}
static void svm_get_gdt(struct kvm_vcpu *vcpu, struct desc_ptr *dt)
@ -1520,7 +1535,7 @@ static void svm_set_gdt(struct kvm_vcpu *vcpu, struct desc_ptr *dt)
svm->vmcb->save.gdtr.limit = dt->size;
svm->vmcb->save.gdtr.base = dt->address ;
mark_dirty(svm->vmcb, VMCB_DT);
vmcb_mark_dirty(svm->vmcb, VMCB_DT);
}
static void update_cr0_intercept(struct vcpu_svm *svm)
@ -1531,7 +1546,7 @@ static void update_cr0_intercept(struct vcpu_svm *svm)
*hcr0 = (*hcr0 & ~SVM_CR0_SELECTIVE_MASK)
| (gcr0 & SVM_CR0_SELECTIVE_MASK);
mark_dirty(svm->vmcb, VMCB_CR);
vmcb_mark_dirty(svm->vmcb, VMCB_CR);
if (gcr0 == *hcr0) {
clr_cr_intercept(svm, INTERCEPT_CR0_READ);
@ -1572,7 +1587,7 @@ void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
if (kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_CD_NW_CLEARED))
cr0 &= ~(X86_CR0_CD | X86_CR0_NW);
svm->vmcb->save.cr0 = cr0;
mark_dirty(svm->vmcb, VMCB_CR);
vmcb_mark_dirty(svm->vmcb, VMCB_CR);
update_cr0_intercept(svm);
}
@ -1592,7 +1607,7 @@ int svm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
cr4 |= X86_CR4_PAE;
cr4 |= host_cr4_mce;
to_svm(vcpu)->vmcb->save.cr4 = cr4;
mark_dirty(to_svm(vcpu)->vmcb, VMCB_CR);
vmcb_mark_dirty(to_svm(vcpu)->vmcb, VMCB_CR);
return 0;
}
@ -1624,10 +1639,10 @@ static void svm_set_segment(struct kvm_vcpu *vcpu,
/* This is symmetric with svm_get_segment() */
svm->vmcb->save.cpl = (var->dpl & 3);
mark_dirty(svm->vmcb, VMCB_SEG);
vmcb_mark_dirty(svm->vmcb, VMCB_SEG);
}
static void update_bp_intercept(struct kvm_vcpu *vcpu)
static void update_exception_bitmap(struct kvm_vcpu *vcpu)
{
struct vcpu_svm *svm = to_svm(vcpu);
@ -1636,8 +1651,7 @@ static void update_bp_intercept(struct kvm_vcpu *vcpu)
if (vcpu->guest_debug & KVM_GUESTDBG_ENABLE) {
if (vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP)
set_exception_intercept(svm, BP_VECTOR);
} else
vcpu->guest_debug = 0;
}
}
static void new_asid(struct vcpu_svm *svm, struct svm_cpu_data *sd)
@ -1651,7 +1665,7 @@ static void new_asid(struct vcpu_svm *svm, struct svm_cpu_data *sd)
svm->asid_generation = sd->asid_generation;
svm->vmcb->control.asid = sd->next_asid++;
mark_dirty(svm->vmcb, VMCB_ASID);
vmcb_mark_dirty(svm->vmcb, VMCB_ASID);
}
static void svm_set_dr6(struct vcpu_svm *svm, unsigned long value)
@ -1660,7 +1674,7 @@ static void svm_set_dr6(struct vcpu_svm *svm, unsigned long value)
if (unlikely(value != vmcb->save.dr6)) {
vmcb->save.dr6 = value;
mark_dirty(vmcb, VMCB_DR);
vmcb_mark_dirty(vmcb, VMCB_DR);
}
}
@ -1687,7 +1701,7 @@ static void svm_set_dr7(struct kvm_vcpu *vcpu, unsigned long value)
struct vcpu_svm *svm = to_svm(vcpu);
svm->vmcb->save.dr7 = value;
mark_dirty(svm->vmcb, VMCB_DR);
vmcb_mark_dirty(svm->vmcb, VMCB_DR);
}
static int pf_interception(struct vcpu_svm *svm)
@ -2000,8 +2014,8 @@ void svm_set_gif(struct vcpu_svm *svm, bool value)
* again while processing KVM_REQ_EVENT if needed.
*/
if (vgif_enabled(svm))
clr_intercept(svm, INTERCEPT_STGI);
if (is_intercept(svm, INTERCEPT_VINTR))
svm_clr_intercept(svm, INTERCEPT_STGI);
if (svm_is_intercept(svm, INTERCEPT_VINTR))
svm_clear_vintr(svm);
enable_gif(svm);
@ -2162,7 +2176,7 @@ static int cpuid_interception(struct vcpu_svm *svm)
static int iret_interception(struct vcpu_svm *svm)
{
++svm->vcpu.stat.nmi_window_exits;
clr_intercept(svm, INTERCEPT_IRET);
svm_clr_intercept(svm, INTERCEPT_IRET);
svm->vcpu.arch.hflags |= HF_IRET_MASK;
svm->nmi_iret_rip = kvm_rip_read(&svm->vcpu);
kvm_make_request(KVM_REQ_EVENT, &svm->vcpu);
@ -2358,8 +2372,10 @@ static int svm_get_msr_feature(struct kvm_msr_entry *msr)
if (boot_cpu_has(X86_FEATURE_LFENCE_RDTSC))
msr->data |= MSR_F10H_DECFG_LFENCE_SERIALIZE;
break;
case MSR_IA32_PERF_CAPABILITIES:
return 0;
default:
return 1;
return KVM_MSR_RET_INVALID;
}
return 0;
@ -2512,7 +2528,7 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
return 1;
vcpu->arch.pat = data;
svm->vmcb->save.g_pat = data;
mark_dirty(svm->vmcb, VMCB_NPT);
vmcb_mark_dirty(svm->vmcb, VMCB_NPT);
break;
case MSR_IA32_SPEC_CTRL:
if (!msr->host_initiated &&
@ -2522,7 +2538,7 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
!guest_cpuid_has(vcpu, X86_FEATURE_AMD_SSBD))
return 1;
if (data & ~kvm_spec_ctrl_valid_bits(vcpu))
if (kvm_spec_ctrl_test_value(data))
return 1;
svm->spec_ctrl = data;
@ -2617,7 +2633,7 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
return 1;
svm->vmcb->save.dbgctl = data;
mark_dirty(svm->vmcb, VMCB_LBR);
vmcb_mark_dirty(svm->vmcb, VMCB_LBR);
if (data & (1ULL<<0))
svm_enable_lbrv(svm);
else
@ -2947,6 +2963,7 @@ static int handle_exit(struct kvm_vcpu *vcpu, fastpath_t exit_fastpath)
kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY;
kvm_run->fail_entry.hardware_entry_failure_reason
= svm->vmcb->control.exit_code;
kvm_run->fail_entry.cpu = vcpu->arch.last_vmentry_cpu;
dump_vmcb(vcpu);
return 0;
}
@ -2970,8 +2987,9 @@ static int handle_exit(struct kvm_vcpu *vcpu, fastpath_t exit_fastpath)
vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
vcpu->run->internal.suberror =
KVM_INTERNAL_ERROR_UNEXPECTED_EXIT_REASON;
vcpu->run->internal.ndata = 1;
vcpu->run->internal.ndata = 2;
vcpu->run->internal.data[0] = exit_code;
vcpu->run->internal.data[1] = vcpu->arch.last_vmentry_cpu;
return 0;
}
@ -2992,21 +3010,18 @@ static int handle_exit(struct kvm_vcpu *vcpu, fastpath_t exit_fastpath)
static void reload_tss(struct kvm_vcpu *vcpu)
{
int cpu = raw_smp_processor_id();
struct svm_cpu_data *sd = per_cpu(svm_data, vcpu->cpu);
struct svm_cpu_data *sd = per_cpu(svm_data, cpu);
sd->tss_desc->type = 9; /* available 32/64-bit TSS */
load_TR_desc();
}
static void pre_svm_run(struct vcpu_svm *svm)
{
int cpu = raw_smp_processor_id();
struct svm_cpu_data *sd = per_cpu(svm_data, cpu);
struct svm_cpu_data *sd = per_cpu(svm_data, svm->vcpu.cpu);
if (sev_guest(svm->vcpu.kvm))
return pre_sev_run(svm, cpu);
return pre_sev_run(svm, svm->vcpu.cpu);
/* FIXME: handle wraparound of asid_generation */
if (svm->asid_generation != sd->asid_generation)
@ -3019,7 +3034,7 @@ static void svm_inject_nmi(struct kvm_vcpu *vcpu)
svm->vmcb->control.event_inj = SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_NMI;
vcpu->arch.hflags |= HF_NMI_MASK;
set_intercept(svm, INTERCEPT_IRET);
svm_set_intercept(svm, INTERCEPT_IRET);
++vcpu->stat.nmi_injections;
}
@ -3040,7 +3055,7 @@ static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr)
{
struct vcpu_svm *svm = to_svm(vcpu);
if (svm_nested_virtualize_tpr(vcpu))
if (nested_svm_virtualize_tpr(vcpu))
return;
clr_cr_intercept(svm, INTERCEPT_CR8_WRITE);
@ -3096,10 +3111,10 @@ static void svm_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked)
if (masked) {
svm->vcpu.arch.hflags |= HF_NMI_MASK;
set_intercept(svm, INTERCEPT_IRET);
svm_set_intercept(svm, INTERCEPT_IRET);
} else {
svm->vcpu.arch.hflags &= ~HF_NMI_MASK;
clr_intercept(svm, INTERCEPT_IRET);
svm_clr_intercept(svm, INTERCEPT_IRET);
}
}
@ -3179,7 +3194,7 @@ static void enable_nmi_window(struct kvm_vcpu *vcpu)
if (!gif_set(svm)) {
if (vgif_enabled(svm))
set_intercept(svm, INTERCEPT_STGI);
svm_set_intercept(svm, INTERCEPT_STGI);
return; /* STGI will cause a vm exit */
}
@ -3234,7 +3249,7 @@ static inline void sync_cr8_to_lapic(struct kvm_vcpu *vcpu)
{
struct vcpu_svm *svm = to_svm(vcpu);
if (svm_nested_virtualize_tpr(vcpu))
if (nested_svm_virtualize_tpr(vcpu))
return;
if (!is_cr_intercept(svm, INTERCEPT_CR8_WRITE)) {
@ -3248,7 +3263,7 @@ static inline void sync_lapic_to_cr8(struct kvm_vcpu *vcpu)
struct vcpu_svm *svm = to_svm(vcpu);
u64 cr8;
if (svm_nested_virtualize_tpr(vcpu) ||
if (nested_svm_virtualize_tpr(vcpu) ||
kvm_vcpu_apicv_active(vcpu))
return;
@ -3344,6 +3359,60 @@ static fastpath_t svm_exit_handlers_fastpath(struct kvm_vcpu *vcpu)
void __svm_vcpu_run(unsigned long vmcb_pa, unsigned long *regs);
static noinstr void svm_vcpu_enter_exit(struct kvm_vcpu *vcpu,
struct vcpu_svm *svm)
{
/*
* VMENTER enables interrupts (host state), but the kernel state is
* interrupts disabled when this is invoked. Also tell RCU about
* it. This is the same logic as for exit_to_user_mode().
*
* This ensures that e.g. latency analysis on the host observes
* guest mode as interrupt enabled.
*
* guest_enter_irqoff() informs context tracking about the
* transition to guest mode and if enabled adjusts RCU state
* accordingly.
*/
instrumentation_begin();
trace_hardirqs_on_prepare();
lockdep_hardirqs_on_prepare(CALLER_ADDR0);
instrumentation_end();
guest_enter_irqoff();
lockdep_hardirqs_on(CALLER_ADDR0);
__svm_vcpu_run(svm->vmcb_pa, (unsigned long *)&svm->vcpu.arch.regs);
#ifdef CONFIG_X86_64
native_wrmsrl(MSR_GS_BASE, svm->host.gs_base);
#else
loadsegment(fs, svm->host.fs);
#ifndef CONFIG_X86_32_LAZY_GS
loadsegment(gs, svm->host.gs);
#endif
#endif
/*
* VMEXIT disables interrupts (host state), but tracing and lockdep
* have them in state 'on' as recorded before entering guest mode.
* Same as enter_from_user_mode().
*
* guest_exit_irqoff() restores host context and reinstates RCU if
* enabled and required.
*
* This needs to be done before the below as native_read_msr()
* contains a tracepoint and x86_spec_ctrl_restore_host() calls
* into world and some more.
*/
lockdep_hardirqs_off(CALLER_ADDR0);
guest_exit_irqoff();
instrumentation_begin();
trace_hardirqs_off_finish();
instrumentation_end();
}
static __no_kcsan fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu)
{
fastpath_t exit_fastpath;
@ -3399,16 +3468,7 @@ static __no_kcsan fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu)
*/
x86_spec_ctrl_set_guest(svm->spec_ctrl, svm->virt_spec_ctrl);
__svm_vcpu_run(svm->vmcb_pa, (unsigned long *)&svm->vcpu.arch.regs);
#ifdef CONFIG_X86_64
wrmsrl(MSR_GS_BASE, svm->host.gs_base);
#else
loadsegment(fs, svm->host.fs);
#ifndef CONFIG_X86_32_LAZY_GS
loadsegment(gs, svm->host.gs);
#endif
#endif
svm_vcpu_enter_exit(vcpu, svm);
/*
* We do not use IBRS in the kernel. If this vCPU has used the
@ -3477,11 +3537,12 @@ static __no_kcsan fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu)
SVM_EXIT_EXCP_BASE + MC_VECTOR))
svm_handle_mce(svm);
mark_all_clean(svm->vmcb);
vmcb_mark_all_clean(svm->vmcb);
return exit_fastpath;
}
static void svm_load_mmu_pgd(struct kvm_vcpu *vcpu, unsigned long root)
static void svm_load_mmu_pgd(struct kvm_vcpu *vcpu, unsigned long root,
int root_level)
{
struct vcpu_svm *svm = to_svm(vcpu);
unsigned long cr3;
@ -3489,7 +3550,7 @@ static void svm_load_mmu_pgd(struct kvm_vcpu *vcpu, unsigned long root)
cr3 = __sme_set(root);
if (npt_enabled) {
svm->vmcb->control.nested_cr3 = cr3;
mark_dirty(svm->vmcb, VMCB_NPT);
vmcb_mark_dirty(svm->vmcb, VMCB_NPT);
/* Loading L2's CR3 is handled by enter_svm_guest_mode. */
if (!test_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail))
@ -3498,7 +3559,7 @@ static void svm_load_mmu_pgd(struct kvm_vcpu *vcpu, unsigned long root)
}
svm->vmcb->save.cr3 = cr3;
mark_dirty(svm->vmcb, VMCB_CR);
vmcb_mark_dirty(svm->vmcb, VMCB_CR);
}
static int is_disabled(void)
@ -3551,7 +3612,7 @@ static u64 svm_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio)
return 0;
}
static void svm_cpuid_update(struct kvm_vcpu *vcpu)
static void svm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
{
struct vcpu_svm *svm = to_svm(vcpu);
@ -3843,6 +3904,7 @@ static int svm_pre_leave_smm(struct kvm_vcpu *vcpu, const char *smstate)
struct kvm_host_map map;
u64 guest;
u64 vmcb;
int ret = 0;
guest = GET_SMSTATE(u64, smstate, 0x7ed8);
vmcb = GET_SMSTATE(u64, smstate, 0x7ee0);
@ -3851,10 +3913,11 @@ static int svm_pre_leave_smm(struct kvm_vcpu *vcpu, const char *smstate)
if (kvm_vcpu_map(&svm->vcpu, gpa_to_gfn(vmcb), &map) == -EINVAL)
return 1;
nested_vmcb = map.hva;
enter_svm_guest_mode(svm, vmcb, nested_vmcb);
ret = enter_svm_guest_mode(svm, vmcb, nested_vmcb);
kvm_vcpu_unmap(&svm->vcpu, &map, true);
}
return 0;
return ret;
}
static void enable_smi_window(struct kvm_vcpu *vcpu)
@ -3863,7 +3926,7 @@ static void enable_smi_window(struct kvm_vcpu *vcpu)
if (!gif_set(svm)) {
if (vgif_enabled(svm))
set_intercept(svm, INTERCEPT_STGI);
svm_set_intercept(svm, INTERCEPT_STGI);
/* STGI will cause a vm exit */
} else {
/* We must be in SMM; RSM will cause a vmexit anyway. */
@ -3992,7 +4055,7 @@ static struct kvm_x86_ops svm_x86_ops __initdata = {
.vcpu_blocking = svm_vcpu_blocking,
.vcpu_unblocking = svm_vcpu_unblocking,
.update_bp_intercept = update_bp_intercept,
.update_exception_bitmap = update_exception_bitmap,
.get_msr_feature = svm_get_msr_feature,
.get_msr = svm_get_msr,
.set_msr = svm_set_msr,
@ -4049,12 +4112,11 @@ static struct kvm_x86_ops svm_x86_ops __initdata = {
.set_tss_addr = svm_set_tss_addr,
.set_identity_map_addr = svm_set_identity_map_addr,
.get_tdp_level = get_npt_level,
.get_mt_mask = svm_get_mt_mask,
.get_exit_info = svm_get_exit_info,
.cpuid_update = svm_cpuid_update,
.vcpu_after_set_cpuid = svm_vcpu_after_set_cpuid,
.has_wbinvd_exit = svm_has_wbinvd_exit,

View File

@ -81,7 +81,7 @@ struct kvm_svm {
struct kvm_vcpu;
struct nested_state {
struct svm_nested_state {
struct vmcb *hsave;
u64 hsave_msr;
u64 vm_cr_msr;
@ -133,7 +133,7 @@ struct vcpu_svm {
ulong nmi_iret_rip;
struct nested_state nested;
struct svm_nested_state nested;
bool nmi_singlestep;
u64 nmi_singlestep_guest_rflags;
@ -158,9 +158,6 @@ struct vcpu_svm {
*/
struct list_head ir_list;
spinlock_t ir_list_lock;
/* which host CPU was used for running this vcpu */
unsigned int last_cpu;
};
struct svm_cpu_data {
@ -188,18 +185,18 @@ static inline struct kvm_svm *to_kvm_svm(struct kvm *kvm)
return container_of(kvm, struct kvm_svm, kvm);
}
static inline void mark_all_dirty(struct vmcb *vmcb)
static inline void vmcb_mark_all_dirty(struct vmcb *vmcb)
{
vmcb->control.clean = 0;
}
static inline void mark_all_clean(struct vmcb *vmcb)
static inline void vmcb_mark_all_clean(struct vmcb *vmcb)
{
vmcb->control.clean = ((1 << VMCB_DIRTY_MAX) - 1)
& ~VMCB_ALWAYS_DIRTY_MASK;
}
static inline void mark_dirty(struct vmcb *vmcb, int bit)
static inline void vmcb_mark_dirty(struct vmcb *vmcb, int bit)
{
vmcb->control.clean &= ~(1 << bit);
}
@ -293,7 +290,7 @@ static inline void clr_exception_intercept(struct vcpu_svm *svm, int bit)
recalc_intercepts(svm);
}
static inline void set_intercept(struct vcpu_svm *svm, int bit)
static inline void svm_set_intercept(struct vcpu_svm *svm, int bit)
{
struct vmcb *vmcb = get_host_vmcb(svm);
@ -302,7 +299,7 @@ static inline void set_intercept(struct vcpu_svm *svm, int bit)
recalc_intercepts(svm);
}
static inline void clr_intercept(struct vcpu_svm *svm, int bit)
static inline void svm_clr_intercept(struct vcpu_svm *svm, int bit)
{
struct vmcb *vmcb = get_host_vmcb(svm);
@ -311,7 +308,7 @@ static inline void clr_intercept(struct vcpu_svm *svm, int bit)
recalc_intercepts(svm);
}
static inline bool is_intercept(struct vcpu_svm *svm, int bit)
static inline bool svm_is_intercept(struct vcpu_svm *svm, int bit)
{
return (svm->vmcb->control.intercept & (1ULL << bit)) != 0;
}
@ -346,7 +343,10 @@ static inline bool gif_set(struct vcpu_svm *svm)
}
/* svm.c */
#define MSR_INVALID 0xffffffffU
#define MSR_CR3_LEGACY_RESERVED_MASK 0xfe7U
#define MSR_CR3_LEGACY_PAE_RESERVED_MASK 0x7U
#define MSR_CR3_LONG_RESERVED_MASK 0xfff0000000000fe7U
#define MSR_INVALID 0xffffffffU
u32 svm_msrpm_offset(u32 msr);
void svm_set_efer(struct kvm_vcpu *vcpu, u64 efer);
@ -365,7 +365,7 @@ void svm_set_gif(struct vcpu_svm *svm, bool value);
#define NESTED_EXIT_DONE 1 /* Exit caused nested vmexit */
#define NESTED_EXIT_CONTINUE 2 /* Further checks needed */
static inline bool svm_nested_virtualize_tpr(struct kvm_vcpu *vcpu)
static inline bool nested_svm_virtualize_tpr(struct kvm_vcpu *vcpu)
{
struct vcpu_svm *svm = to_svm(vcpu);
@ -387,8 +387,8 @@ static inline bool nested_exit_on_nmi(struct vcpu_svm *svm)
return (svm->nested.ctl.intercept & (1ULL << INTERCEPT_NMI));
}
void enter_svm_guest_mode(struct vcpu_svm *svm, u64 vmcb_gpa,
struct vmcb *nested_vmcb);
int enter_svm_guest_mode(struct vcpu_svm *svm, u64 vmcb_gpa,
struct vmcb *nested_vmcb);
void svm_leave_nested(struct vcpu_svm *svm);
int nested_svm_vmrun(struct vcpu_svm *svm);
void nested_svm_vmloadsave(struct vmcb *from_vmcb, struct vmcb *to_vmcb);
@ -420,7 +420,7 @@ extern int avic;
static inline void avic_update_vapic_bar(struct vcpu_svm *svm, u64 data)
{
svm->vmcb->control.avic_vapic_bar = data & VMCB_AVIC_APIC_BAR_MASK;
mark_dirty(svm->vmcb, VMCB_AVIC);
vmcb_mark_dirty(svm->vmcb, VMCB_AVIC);
}
static inline bool avic_vcpu_is_running(struct kvm_vcpu *vcpu)

View File

@ -27,7 +27,7 @@
#define VCPU_R15 __VCPU_REGS_R15 * WORD_SIZE
#endif
.text
.section .noinstr.text, "ax"
/**
* __svm_vcpu_run - Run a vCPU via a transition to SVM guest mode

View File

@ -171,15 +171,6 @@ static int nested_vmx_failInvalid(struct kvm_vcpu *vcpu)
static int nested_vmx_failValid(struct kvm_vcpu *vcpu,
u32 vm_instruction_error)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
/*
* failValid writes the error number to the current VMCS, which
* can't be done if there isn't a current VMCS.
*/
if (vmx->nested.current_vmptr == -1ull && !vmx->nested.hv_evmcs)
return nested_vmx_failInvalid(vcpu);
vmx_set_rflags(vcpu, (vmx_get_rflags(vcpu)
& ~(X86_EFLAGS_CF | X86_EFLAGS_PF | X86_EFLAGS_AF |
X86_EFLAGS_SF | X86_EFLAGS_OF))
@ -192,6 +183,20 @@ static int nested_vmx_failValid(struct kvm_vcpu *vcpu,
return kvm_skip_emulated_instruction(vcpu);
}
static int nested_vmx_fail(struct kvm_vcpu *vcpu, u32 vm_instruction_error)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
/*
* failValid writes the error number to the current VMCS, which
* can't be done if there isn't a current VMCS.
*/
if (vmx->nested.current_vmptr == -1ull && !vmx->nested.hv_evmcs)
return nested_vmx_failInvalid(vcpu);
return nested_vmx_failValid(vcpu, vm_instruction_error);
}
static void nested_vmx_abort(struct kvm_vcpu *vcpu, u32 indicator)
{
/* TODO: not to reset guest simply here. */
@ -2157,7 +2162,8 @@ static void prepare_vmcs02_constant_state(struct vcpu_vmx *vmx)
* consistency checks.
*/
if (enable_ept && nested_early_check)
vmcs_write64(EPT_POINTER, construct_eptp(&vmx->vcpu, 0));
vmcs_write64(EPT_POINTER,
construct_eptp(&vmx->vcpu, 0, PT64_ROOT_4LEVEL));
/* All VMFUNCs are currently emulated through L0 vmexits. */
if (cpu_has_vmx_vmfunc())
@ -2433,22 +2439,28 @@ static void prepare_vmcs02_rare(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12)
/*
* Whether page-faults are trapped is determined by a combination of
* 3 settings: PFEC_MASK, PFEC_MATCH and EXCEPTION_BITMAP.PF.
* If enable_ept, L0 doesn't care about page faults and we should
* set all of these to L1's desires. However, if !enable_ept, L0 does
* care about (at least some) page faults, and because it is not easy
* (if at all possible?) to merge L0 and L1's desires, we simply ask
* to exit on each and every L2 page fault. This is done by setting
* MASK=MATCH=0 and (see below) EB.PF=1.
* 3 settings: PFEC_MASK, PFEC_MATCH and EXCEPTION_BITMAP.PF. If L0
* doesn't care about page faults then we should set all of these to
* L1's desires. However, if L0 does care about (some) page faults, it
* is not easy (if at all possible?) to merge L0 and L1's desires, we
* simply ask to exit on each and every L2 page fault. This is done by
* setting MASK=MATCH=0 and (see below) EB.PF=1.
* Note that below we don't need special code to set EB.PF beyond the
* "or"ing of the EB of vmcs01 and vmcs12, because when enable_ept,
* vmcs01's EB.PF is 0 so the "or" will take vmcs12's value, and when
* !enable_ept, EB.PF is 1, so the "or" will always be 1.
*/
vmcs_write32(PAGE_FAULT_ERROR_CODE_MASK,
enable_ept ? vmcs12->page_fault_error_code_mask : 0);
vmcs_write32(PAGE_FAULT_ERROR_CODE_MATCH,
enable_ept ? vmcs12->page_fault_error_code_match : 0);
if (vmx_need_pf_intercept(&vmx->vcpu)) {
/*
* TODO: if both L0 and L1 need the same MASK and MATCH,
* go ahead and use it?
*/
vmcs_write32(PAGE_FAULT_ERROR_CODE_MASK, 0);
vmcs_write32(PAGE_FAULT_ERROR_CODE_MATCH, 0);
} else {
vmcs_write32(PAGE_FAULT_ERROR_CODE_MASK, vmcs12->page_fault_error_code_mask);
vmcs_write32(PAGE_FAULT_ERROR_CODE_MATCH, vmcs12->page_fault_error_code_match);
}
if (cpu_has_vmx_apicv()) {
vmcs_write64(EOI_EXIT_BITMAP0, vmcs12->eoi_exit_bitmap0);
@ -3205,6 +3217,43 @@ static bool nested_get_vmcs12_pages(struct kvm_vcpu *vcpu)
return true;
}
static int nested_vmx_write_pml_buffer(struct kvm_vcpu *vcpu, gpa_t gpa)
{
struct vmcs12 *vmcs12;
struct vcpu_vmx *vmx = to_vmx(vcpu);
gpa_t dst;
if (WARN_ON_ONCE(!is_guest_mode(vcpu)))
return 0;
if (WARN_ON_ONCE(vmx->nested.pml_full))
return 1;
/*
* Check if PML is enabled for the nested guest. Whether eptp bit 6 is
* set is already checked as part of A/D emulation.
*/
vmcs12 = get_vmcs12(vcpu);
if (!nested_cpu_has_pml(vmcs12))
return 0;
if (vmcs12->guest_pml_index >= PML_ENTITY_NUM) {
vmx->nested.pml_full = true;
return 1;
}
gpa &= ~0xFFFull;
dst = vmcs12->pml_address + sizeof(u64) * vmcs12->guest_pml_index;
if (kvm_write_guest_page(vcpu->kvm, gpa_to_gfn(dst), &gpa,
offset_in_page(dst), sizeof(gpa)))
return 0;
vmcs12->guest_pml_index--;
return 0;
}
/*
* Intel's VMX Instruction Reference specifies a common set of prerequisites
* for running VMX instructions (except VMXON, whose prerequisites are
@ -3456,19 +3505,18 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
* when using the merged vmcs02.
*/
if (interrupt_shadow & KVM_X86_SHADOW_INT_MOV_SS)
return nested_vmx_failValid(vcpu,
VMXERR_ENTRY_EVENTS_BLOCKED_BY_MOV_SS);
return nested_vmx_fail(vcpu, VMXERR_ENTRY_EVENTS_BLOCKED_BY_MOV_SS);
if (vmcs12->launch_state == launch)
return nested_vmx_failValid(vcpu,
return nested_vmx_fail(vcpu,
launch ? VMXERR_VMLAUNCH_NONCLEAR_VMCS
: VMXERR_VMRESUME_NONLAUNCHED_VMCS);
if (nested_vmx_check_controls(vcpu, vmcs12))
return nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD);
return nested_vmx_fail(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD);
if (nested_vmx_check_host_state(vcpu, vmcs12))
return nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_HOST_STATE_FIELD);
return nested_vmx_fail(vcpu, VMXERR_ENTRY_INVALID_HOST_STATE_FIELD);
/*
* We're finally done with prerequisite checking, and can start with
@ -3517,7 +3565,7 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
if (status == NVMX_VMENTRY_VMEXIT)
return 1;
WARN_ON_ONCE(status != NVMX_VMENTRY_VMFAIL);
return nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD);
return nested_vmx_fail(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD);
}
/*
@ -4460,7 +4508,7 @@ void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 vm_exit_reason,
* flag and the VM-instruction error field of the VMCS
* accordingly, and skip the emulated instruction.
*/
(void)nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD);
(void)nested_vmx_fail(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD);
/*
* Restore L1's host state to KVM's software model. We're here
@ -4760,8 +4808,7 @@ static int handle_vmon(struct kvm_vcpu *vcpu)
}
if (vmx->nested.vmxon)
return nested_vmx_failValid(vcpu,
VMXERR_VMXON_IN_VMX_ROOT_OPERATION);
return nested_vmx_fail(vcpu, VMXERR_VMXON_IN_VMX_ROOT_OPERATION);
if ((vmx->msr_ia32_feature_control & VMXON_NEEDED_FEATURES)
!= VMXON_NEEDED_FEATURES) {
@ -4852,12 +4899,10 @@ static int handle_vmclear(struct kvm_vcpu *vcpu)
return r;
if (!page_address_valid(vcpu, vmptr))
return nested_vmx_failValid(vcpu,
VMXERR_VMCLEAR_INVALID_ADDRESS);
return nested_vmx_fail(vcpu, VMXERR_VMCLEAR_INVALID_ADDRESS);
if (vmptr == vmx->nested.vmxon_ptr)
return nested_vmx_failValid(vcpu,
VMXERR_VMCLEAR_VMXON_POINTER);
return nested_vmx_fail(vcpu, VMXERR_VMCLEAR_VMXON_POINTER);
/*
* When Enlightened VMEntry is enabled on the calling CPU we treat
@ -4927,8 +4972,7 @@ static int handle_vmread(struct kvm_vcpu *vcpu)
offset = vmcs_field_to_offset(field);
if (offset < 0)
return nested_vmx_failValid(vcpu,
VMXERR_UNSUPPORTED_VMCS_COMPONENT);
return nested_vmx_fail(vcpu, VMXERR_UNSUPPORTED_VMCS_COMPONENT);
if (!is_guest_mode(vcpu) && is_vmcs12_ext_field(field))
copy_vmcs02_to_vmcs12_rare(vcpu, vmcs12);
@ -5031,8 +5075,7 @@ static int handle_vmwrite(struct kvm_vcpu *vcpu)
offset = vmcs_field_to_offset(field);
if (offset < 0)
return nested_vmx_failValid(vcpu,
VMXERR_UNSUPPORTED_VMCS_COMPONENT);
return nested_vmx_fail(vcpu, VMXERR_UNSUPPORTED_VMCS_COMPONENT);
/*
* If the vCPU supports "VMWRITE to any supported field in the
@ -5040,8 +5083,7 @@ static int handle_vmwrite(struct kvm_vcpu *vcpu)
*/
if (vmcs_field_readonly(field) &&
!nested_cpu_has_vmwrite_any_field(vcpu))
return nested_vmx_failValid(vcpu,
VMXERR_VMWRITE_READ_ONLY_VMCS_COMPONENT);
return nested_vmx_fail(vcpu, VMXERR_VMWRITE_READ_ONLY_VMCS_COMPONENT);
/*
* Ensure vmcs12 is up-to-date before any VMWRITE that dirties
@ -5116,12 +5158,10 @@ static int handle_vmptrld(struct kvm_vcpu *vcpu)
return r;
if (!page_address_valid(vcpu, vmptr))
return nested_vmx_failValid(vcpu,
VMXERR_VMPTRLD_INVALID_ADDRESS);
return nested_vmx_fail(vcpu, VMXERR_VMPTRLD_INVALID_ADDRESS);
if (vmptr == vmx->nested.vmxon_ptr)
return nested_vmx_failValid(vcpu,
VMXERR_VMPTRLD_VMXON_POINTER);
return nested_vmx_fail(vcpu, VMXERR_VMPTRLD_VMXON_POINTER);
/* Forbid normal VMPTRLD if Enlightened version was used */
if (vmx->nested.hv_evmcs)
@ -5138,7 +5178,7 @@ static int handle_vmptrld(struct kvm_vcpu *vcpu)
* given physical address won't match the required
* VMCS12_REVISION identifier.
*/
return nested_vmx_failValid(vcpu,
return nested_vmx_fail(vcpu,
VMXERR_VMPTRLD_INCORRECT_VMCS_REVISION_ID);
}
@ -5148,7 +5188,7 @@ static int handle_vmptrld(struct kvm_vcpu *vcpu)
(new_vmcs12->hdr.shadow_vmcs &&
!nested_cpu_has_vmx_shadow_vmcs(vcpu))) {
kvm_vcpu_unmap(vcpu, &map, false);
return nested_vmx_failValid(vcpu,
return nested_vmx_fail(vcpu,
VMXERR_VMPTRLD_INCORRECT_VMCS_REVISION_ID);
}
@ -5233,8 +5273,7 @@ static int handle_invept(struct kvm_vcpu *vcpu)
types = (vmx->nested.msrs.ept_caps >> VMX_EPT_EXTENT_SHIFT) & 6;
if (type >= 32 || !(types & (1 << type)))
return nested_vmx_failValid(vcpu,
VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID);
return nested_vmx_fail(vcpu, VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID);
/* According to the Intel VMX instruction reference, the memory
* operand is read even if it isn't needed (e.g., for type==global)
@ -5255,7 +5294,7 @@ static int handle_invept(struct kvm_vcpu *vcpu)
switch (type) {
case VMX_EPT_EXTENT_CONTEXT:
if (!nested_vmx_check_eptp(vcpu, operand.eptp))
return nested_vmx_failValid(vcpu,
return nested_vmx_fail(vcpu,
VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID);
roots_to_free = 0;
@ -5315,7 +5354,7 @@ static int handle_invvpid(struct kvm_vcpu *vcpu)
VMX_VPID_EXTENT_SUPPORTED_MASK) >> 8;
if (type >= 32 || !(types & (1 << type)))
return nested_vmx_failValid(vcpu,
return nested_vmx_fail(vcpu,
VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID);
/* according to the intel vmx instruction reference, the memory
@ -5329,7 +5368,7 @@ static int handle_invvpid(struct kvm_vcpu *vcpu)
return vmx_handle_memory_failure(vcpu, r, &e);
if (operand.vpid >> 16)
return nested_vmx_failValid(vcpu,
return nested_vmx_fail(vcpu,
VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID);
vpid02 = nested_get_vpid02(vcpu);
@ -5337,14 +5376,14 @@ static int handle_invvpid(struct kvm_vcpu *vcpu)
case VMX_VPID_EXTENT_INDIVIDUAL_ADDR:
if (!operand.vpid ||
is_noncanonical_address(operand.gla, vcpu))
return nested_vmx_failValid(vcpu,
return nested_vmx_fail(vcpu,
VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID);
vpid_sync_vcpu_addr(vpid02, operand.gla);
break;
case VMX_VPID_EXTENT_SINGLE_CONTEXT:
case VMX_VPID_EXTENT_SINGLE_NON_GLOBAL:
if (!operand.vpid)
return nested_vmx_failValid(vcpu,
return nested_vmx_fail(vcpu,
VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID);
vpid_sync_context(vpid02);
break;
@ -6333,7 +6372,8 @@ void nested_vmx_setup_ctls_msrs(struct nested_vmx_msrs *msrs, u32 ept_caps)
/*
* secondary cpu-based controls. Do not include those that
* depend on CPUID bits, they are added later by vmx_cpuid_update.
* depend on CPUID bits, they are added later by
* vmx_vcpu_after_set_cpuid.
*/
if (msrs->procbased_ctls_high & CPU_BASED_ACTIVATE_SECONDARY_CONTROLS)
rdmsr(MSR_IA32_VMX_PROCBASED_CTLS2,
@ -6514,6 +6554,7 @@ struct kvm_x86_nested_ops vmx_nested_ops = {
.get_state = vmx_get_nested_state,
.set_state = vmx_set_nested_state,
.get_vmcs12_pages = nested_get_vmcs12_pages,
.write_log_dirty = nested_vmx_write_pml_buffer,
.enable_evmcs = nested_enable_evmcs,
.get_evmcs_version = nested_get_evmcs_version,
};

View File

@ -146,7 +146,9 @@ do { \
: : op1 : "cc" : error, fault); \
return; \
error: \
instrumentation_begin(); \
insn##_error(error_args); \
instrumentation_end(); \
return; \
fault: \
kvm_spurious_fault(); \
@ -161,7 +163,9 @@ do { \
: : op1, op2 : "cc" : error, fault); \
return; \
error: \
instrumentation_begin(); \
insn##_error(error_args); \
instrumentation_end(); \
return; \
fault: \
kvm_spurious_fault(); \

View File

@ -180,9 +180,6 @@ static bool intel_is_valid_msr(struct kvm_vcpu *vcpu, u32 msr)
case MSR_CORE_PERF_GLOBAL_OVF_CTRL:
ret = pmu->version > 1;
break;
case MSR_IA32_PERF_CAPABILITIES:
ret = 1;
break;
default:
ret = get_gp_pmc(pmu, msr, MSR_IA32_PERFCTR0) ||
get_gp_pmc(pmu, msr, MSR_P6_EVNTSEL0) ||
@ -224,12 +221,6 @@ static int intel_pmu_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
case MSR_CORE_PERF_GLOBAL_OVF_CTRL:
msr_info->data = pmu->global_ovf_ctrl;
return 0;
case MSR_IA32_PERF_CAPABILITIES:
if (!msr_info->host_initiated &&
!guest_cpuid_has(vcpu, X86_FEATURE_PDCM))
return 1;
msr_info->data = vcpu->arch.perf_capabilities;
return 0;
default:
if ((pmc = get_gp_pmc(pmu, msr, MSR_IA32_PERFCTR0)) ||
(pmc = get_gp_pmc(pmu, msr, MSR_IA32_PMC0))) {
@ -289,14 +280,6 @@ static int intel_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
return 0;
}
break;
case MSR_IA32_PERF_CAPABILITIES:
if (!msr_info->host_initiated)
return 1;
if (guest_cpuid_has(vcpu, X86_FEATURE_PDCM) ?
(data & ~vmx_get_perf_capabilities()) : data)
return 1;
vcpu->arch.perf_capabilities = data;
return 0;
default:
if ((pmc = get_gp_pmc(pmu, msr, MSR_IA32_PERFCTR0)) ||
(pmc = get_gp_pmc(pmu, msr, MSR_IA32_PMC0))) {

View File

@ -27,7 +27,7 @@
#define VCPU_R15 __VCPU_REGS_R15 * WORD_SIZE
#endif
.text
.section .noinstr.text, "ax"
/**
* vmx_vmenter - VM-Enter the current loaded VMCS
@ -234,6 +234,9 @@ SYM_FUNC_START(__vmx_vcpu_run)
jmp 1b
SYM_FUNC_END(__vmx_vcpu_run)
.section .text, "ax"
/**
* vmread_error_trampoline - Trampoline from inline asm to vmread_error()
* @field: VMCS field encoding that failed

View File

@ -781,7 +781,7 @@ void update_exception_bitmap(struct kvm_vcpu *vcpu)
eb |= 1u << BP_VECTOR;
if (to_vmx(vcpu)->rmode.vm86_active)
eb = ~0;
if (enable_ept)
if (!vmx_need_pf_intercept(vcpu))
eb &= ~(1u << PF_VECTOR);
/* When we are running a nested L2 guest and L1 specified for it a
@ -1816,7 +1816,7 @@ static int vmx_get_msr_feature(struct kvm_msr_entry *msr)
msr->data = vmx_get_perf_capabilities();
return 0;
default:
return 1;
return KVM_MSR_RET_INVALID;
}
}
@ -2063,7 +2063,7 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
!guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL))
return 1;
if (data & ~kvm_spec_ctrl_valid_bits(vcpu))
if (kvm_spec_ctrl_test_value(data))
return 1;
vmx->spec_ctrl = data;
@ -2934,14 +2934,16 @@ static void vmx_flush_tlb_all(struct kvm_vcpu *vcpu)
static void vmx_flush_tlb_current(struct kvm_vcpu *vcpu)
{
u64 root_hpa = vcpu->arch.mmu->root_hpa;
struct kvm_mmu *mmu = vcpu->arch.mmu;
u64 root_hpa = mmu->root_hpa;
/* No flush required if the current context is invalid. */
if (!VALID_PAGE(root_hpa))
return;
if (enable_ept)
ept_sync_context(construct_eptp(vcpu, root_hpa));
ept_sync_context(construct_eptp(vcpu, root_hpa,
mmu->shadow_root_level));
else if (!is_guest_mode(vcpu))
vpid_sync_context(to_vmx(vcpu)->vpid);
else
@ -3064,26 +3066,19 @@ void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
vmx->emulation_required = emulation_required(vcpu);
}
static int vmx_get_tdp_level(struct kvm_vcpu *vcpu)
static int vmx_get_max_tdp_level(void)
{
if (cpu_has_vmx_ept_5levels() && (cpuid_maxphyaddr(vcpu) > 48))
if (cpu_has_vmx_ept_5levels())
return 5;
return 4;
}
static int get_ept_level(struct kvm_vcpu *vcpu)
{
if (is_guest_mode(vcpu) && nested_cpu_has_ept(get_vmcs12(vcpu)))
return vmx_eptp_page_walk_level(nested_ept_get_eptp(vcpu));
return vmx_get_tdp_level(vcpu);
}
u64 construct_eptp(struct kvm_vcpu *vcpu, unsigned long root_hpa)
u64 construct_eptp(struct kvm_vcpu *vcpu, unsigned long root_hpa,
int root_level)
{
u64 eptp = VMX_EPTP_MT_WB;
eptp |= (get_ept_level(vcpu) == 5) ? VMX_EPTP_PWL_5 : VMX_EPTP_PWL_4;
eptp |= (root_level == 5) ? VMX_EPTP_PWL_5 : VMX_EPTP_PWL_4;
if (enable_ept_ad_bits &&
(!is_guest_mode(vcpu) || nested_ept_ad_enabled(vcpu)))
@ -3093,7 +3088,8 @@ u64 construct_eptp(struct kvm_vcpu *vcpu, unsigned long root_hpa)
return eptp;
}
void vmx_load_mmu_pgd(struct kvm_vcpu *vcpu, unsigned long pgd)
static void vmx_load_mmu_pgd(struct kvm_vcpu *vcpu, unsigned long pgd,
int pgd_level)
{
struct kvm *kvm = vcpu->kvm;
bool update_guest_cr3 = true;
@ -3101,7 +3097,7 @@ void vmx_load_mmu_pgd(struct kvm_vcpu *vcpu, unsigned long pgd)
u64 eptp;
if (enable_ept) {
eptp = construct_eptp(vcpu, pgd);
eptp = construct_eptp(vcpu, pgd, pgd_level);
vmcs_write64(EPT_POINTER, eptp);
if (kvm_x86_ops.tlb_remote_flush) {
@ -4356,6 +4352,16 @@ static void init_vmcs(struct vcpu_vmx *vmx)
vmx->pt_desc.guest.output_mask = 0x7F;
vmcs_write64(GUEST_IA32_RTIT_CTL, 0);
}
/*
* If EPT is enabled, #PF is only trapped if MAXPHYADDR is mismatched
* between guest and host. In that case we only care about present
* faults.
*/
if (enable_ept) {
vmcs_write32(PAGE_FAULT_ERROR_CODE_MASK, PFERR_PRESENT_MASK);
vmcs_write32(PAGE_FAULT_ERROR_CODE_MATCH, PFERR_PRESENT_MASK);
}
}
static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
@ -4782,18 +4788,25 @@ static int handle_exception_nmi(struct kvm_vcpu *vcpu)
!(is_page_fault(intr_info) && !(error_code & PFERR_RSVD_MASK))) {
vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_SIMUL_EX;
vcpu->run->internal.ndata = 3;
vcpu->run->internal.ndata = 4;
vcpu->run->internal.data[0] = vect_info;
vcpu->run->internal.data[1] = intr_info;
vcpu->run->internal.data[2] = error_code;
vcpu->run->internal.data[3] = vcpu->arch.last_vmentry_cpu;
return 0;
}
if (is_page_fault(intr_info)) {
cr2 = vmx_get_exit_qual(vcpu);
/* EPT won't cause page fault directly */
WARN_ON_ONCE(!vcpu->arch.apf.host_apf_flags && enable_ept);
return kvm_handle_page_fault(vcpu, error_code, cr2, NULL, 0);
if (enable_ept && !vcpu->arch.apf.host_apf_flags) {
/*
* EPT will cause page fault only if we need to
* detect illegal GPAs.
*/
kvm_fixup_and_inject_pf_error(vcpu, cr2, error_code);
return 1;
} else
return kvm_handle_page_fault(vcpu, error_code, cr2, NULL, 0);
}
ex_no = intr_info & INTR_INFO_VECTOR_MASK;
@ -5309,6 +5322,18 @@ static int handle_ept_violation(struct kvm_vcpu *vcpu)
PFERR_GUEST_FINAL_MASK : PFERR_GUEST_PAGE_MASK;
vcpu->arch.exit_qualification = exit_qualification;
/*
* Check that the GPA doesn't exceed physical memory limits, as that is
* a guest page fault. We have to emulate the instruction here, because
* if the illegal address is that of a paging structure, then
* EPT_VIOLATION_ACC_WRITE bit is set. Alternatively, if supported we
* would also use advanced VM-exit information for EPT violations to
* reconstruct the page fault error code.
*/
if (unlikely(kvm_mmu_is_illegal_gpa(vcpu, gpa)))
return kvm_emulate_instruction(vcpu, 0);
return kvm_mmu_page_fault(vcpu, gpa, error_code, NULL, 0);
}
@ -6005,6 +6030,7 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu, fastpath_t exit_fastpath)
vcpu->run->exit_reason = KVM_EXIT_FAIL_ENTRY;
vcpu->run->fail_entry.hardware_entry_failure_reason
= exit_reason;
vcpu->run->fail_entry.cpu = vcpu->arch.last_vmentry_cpu;
return 0;
}
@ -6013,6 +6039,7 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu, fastpath_t exit_fastpath)
vcpu->run->exit_reason = KVM_EXIT_FAIL_ENTRY;
vcpu->run->fail_entry.hardware_entry_failure_reason
= vmcs_read32(VM_INSTRUCTION_ERROR);
vcpu->run->fail_entry.cpu = vcpu->arch.last_vmentry_cpu;
return 0;
}
@ -6039,6 +6066,8 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu, fastpath_t exit_fastpath)
vcpu->run->internal.data[3] =
vmcs_read64(GUEST_PHYSICAL_ADDRESS);
}
vcpu->run->internal.data[vcpu->run->internal.ndata++] =
vcpu->arch.last_vmentry_cpu;
return 0;
}
@ -6094,8 +6123,9 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu, fastpath_t exit_fastpath)
vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
vcpu->run->internal.suberror =
KVM_INTERNAL_ERROR_UNEXPECTED_EXIT_REASON;
vcpu->run->internal.ndata = 1;
vcpu->run->internal.ndata = 2;
vcpu->run->internal.data[0] = exit_reason;
vcpu->run->internal.data[1] = vcpu->arch.last_vmentry_cpu;
return 0;
}
@ -6109,7 +6139,7 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu, fastpath_t exit_fastpath)
* information but as all relevant affected CPUs have 32KiB L1D cache size
* there is no point in doing so.
*/
static void vmx_l1d_flush(struct kvm_vcpu *vcpu)
static noinstr void vmx_l1d_flush(struct kvm_vcpu *vcpu)
{
int size = PAGE_SIZE << L1D_CACHE_ORDER;
@ -6142,7 +6172,7 @@ static void vmx_l1d_flush(struct kvm_vcpu *vcpu)
vcpu->stat.l1d_flush++;
if (static_cpu_has(X86_FEATURE_FLUSH_L1D)) {
wrmsrl(MSR_IA32_FLUSH_CMD, L1D_FLUSH);
native_wrmsrl(MSR_IA32_FLUSH_CMD, L1D_FLUSH);
return;
}
@ -6628,7 +6658,7 @@ static void vmx_update_hv_timer(struct kvm_vcpu *vcpu)
}
}
void vmx_update_host_rsp(struct vcpu_vmx *vmx, unsigned long host_rsp)
void noinstr vmx_update_host_rsp(struct vcpu_vmx *vmx, unsigned long host_rsp)
{
if (unlikely(host_rsp != vmx->loaded_vmcs->host_state.rsp)) {
vmx->loaded_vmcs->host_state.rsp = host_rsp;
@ -6650,6 +6680,63 @@ static fastpath_t vmx_exit_handlers_fastpath(struct kvm_vcpu *vcpu)
bool __vmx_vcpu_run(struct vcpu_vmx *vmx, unsigned long *regs, bool launched);
static noinstr void vmx_vcpu_enter_exit(struct kvm_vcpu *vcpu,
struct vcpu_vmx *vmx)
{
/*
* VMENTER enables interrupts (host state), but the kernel state is
* interrupts disabled when this is invoked. Also tell RCU about
* it. This is the same logic as for exit_to_user_mode().
*
* This ensures that e.g. latency analysis on the host observes
* guest mode as interrupt enabled.
*
* guest_enter_irqoff() informs context tracking about the
* transition to guest mode and if enabled adjusts RCU state
* accordingly.
*/
instrumentation_begin();
trace_hardirqs_on_prepare();
lockdep_hardirqs_on_prepare(CALLER_ADDR0);
instrumentation_end();
guest_enter_irqoff();
lockdep_hardirqs_on(CALLER_ADDR0);
/* L1D Flush includes CPU buffer clear to mitigate MDS */
if (static_branch_unlikely(&vmx_l1d_should_flush))
vmx_l1d_flush(vcpu);
else if (static_branch_unlikely(&mds_user_clear))
mds_clear_cpu_buffers();
if (vcpu->arch.cr2 != native_read_cr2())
native_write_cr2(vcpu->arch.cr2);
vmx->fail = __vmx_vcpu_run(vmx, (unsigned long *)&vcpu->arch.regs,
vmx->loaded_vmcs->launched);
vcpu->arch.cr2 = native_read_cr2();
/*
* VMEXIT disables interrupts (host state), but tracing and lockdep
* have them in state 'on' as recorded before entering guest mode.
* Same as enter_from_user_mode().
*
* guest_exit_irqoff() restores host context and reinstates RCU if
* enabled and required.
*
* This needs to be done before the below as native_read_msr()
* contains a tracepoint and x86_spec_ctrl_restore_host() calls
* into world and some more.
*/
lockdep_hardirqs_off(CALLER_ADDR0);
guest_exit_irqoff();
instrumentation_begin();
trace_hardirqs_off_finish();
instrumentation_end();
}
static fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu)
{
fastpath_t exit_fastpath;
@ -6724,19 +6811,8 @@ static fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu)
*/
x86_spec_ctrl_set_guest(vmx->spec_ctrl, 0);
/* L1D Flush includes CPU buffer clear to mitigate MDS */
if (static_branch_unlikely(&vmx_l1d_should_flush))
vmx_l1d_flush(vcpu);
else if (static_branch_unlikely(&mds_user_clear))
mds_clear_cpu_buffers();
if (vcpu->arch.cr2 != read_cr2())
write_cr2(vcpu->arch.cr2);
vmx->fail = __vmx_vcpu_run(vmx, (unsigned long *)&vcpu->arch.regs,
vmx->loaded_vmcs->launched);
vcpu->arch.cr2 = read_cr2();
/* The actual VMENTER/EXIT is in the .noinstr.text section. */
vmx_vcpu_enter_exit(vcpu, vmx);
/*
* We do not use IBRS in the kernel. If this vCPU has used the
@ -7229,7 +7305,7 @@ static void update_intel_pt_cfg(struct kvm_vcpu *vcpu)
vmx->pt_desc.ctl_bitmask &= ~(0xfULL << (32 + i * 4));
}
static void vmx_cpuid_update(struct kvm_vcpu *vcpu)
static void vmx_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
@ -7478,42 +7554,6 @@ static void vmx_flush_log_dirty(struct kvm *kvm)
kvm_flush_pml_buffers(kvm);
}
static int vmx_write_pml_buffer(struct kvm_vcpu *vcpu, gpa_t gpa)
{
struct vmcs12 *vmcs12;
struct vcpu_vmx *vmx = to_vmx(vcpu);
gpa_t dst;
if (is_guest_mode(vcpu)) {
WARN_ON_ONCE(vmx->nested.pml_full);
/*
* Check if PML is enabled for the nested guest.
* Whether eptp bit 6 is set is already checked
* as part of A/D emulation.
*/
vmcs12 = get_vmcs12(vcpu);
if (!nested_cpu_has_pml(vmcs12))
return 0;
if (vmcs12->guest_pml_index >= PML_ENTITY_NUM) {
vmx->nested.pml_full = true;
return 1;
}
gpa &= ~0xFFFull;
dst = vmcs12->pml_address + sizeof(u64) * vmcs12->guest_pml_index;
if (kvm_write_guest_page(vcpu->kvm, gpa_to_gfn(dst), &gpa,
offset_in_page(dst), sizeof(gpa)))
return 0;
vmcs12->guest_pml_index--;
}
return 0;
}
static void vmx_enable_log_dirty_pt_masked(struct kvm *kvm,
struct kvm_memory_slot *memslot,
gfn_t offset, unsigned long mask)
@ -7858,7 +7898,7 @@ static struct kvm_x86_ops vmx_x86_ops __initdata = {
.vcpu_load = vmx_vcpu_load,
.vcpu_put = vmx_vcpu_put,
.update_bp_intercept = update_exception_bitmap,
.update_exception_bitmap = update_exception_bitmap,
.get_msr_feature = vmx_get_msr_feature,
.get_msr = vmx_get_msr,
.set_msr = vmx_set_msr,
@ -7918,12 +7958,11 @@ static struct kvm_x86_ops vmx_x86_ops __initdata = {
.set_tss_addr = vmx_set_tss_addr,
.set_identity_map_addr = vmx_set_identity_map_addr,
.get_tdp_level = vmx_get_tdp_level,
.get_mt_mask = vmx_get_mt_mask,
.get_exit_info = vmx_get_exit_info,
.cpuid_update = vmx_cpuid_update,
.vcpu_after_set_cpuid = vmx_vcpu_after_set_cpuid,
.has_wbinvd_exit = cpu_has_vmx_wbinvd_exit,
@ -7942,7 +7981,6 @@ static struct kvm_x86_ops vmx_x86_ops __initdata = {
.slot_disable_log_dirty = vmx_slot_disable_log_dirty,
.flush_log_dirty = vmx_flush_log_dirty,
.enable_log_dirty_pt_masked = vmx_enable_log_dirty_pt_masked,
.write_log_dirty = vmx_write_pml_buffer,
.pre_block = vmx_pre_block,
.post_block = vmx_post_block,
@ -8070,7 +8108,7 @@ static __init int hardware_setup(void)
ept_lpage_level = PG_LEVEL_2M;
else
ept_lpage_level = PG_LEVEL_4K;
kvm_configure_mmu(enable_ept, ept_lpage_level);
kvm_configure_mmu(enable_ept, vmx_get_max_tdp_level(), ept_lpage_level);
/*
* Only enable PML when hardware supports PML feature, and both EPT
@ -8265,6 +8303,13 @@ static int __init vmx_init(void)
#endif
vmx_check_vmcs12_offsets();
/*
* Intel processors don't have problems with
* GUEST_MAXPHYADDR < HOST_MAXPHYADDR so enable
* it for VMX by default
*/
allow_smaller_maxphyaddr = true;
return 0;
}
module_init(vmx_init);

View File

@ -11,6 +11,7 @@
#include "kvm_cache_regs.h"
#include "ops.h"
#include "vmcs.h"
#include "cpuid.h"
extern const u32 vmx_msr_index[];
@ -337,11 +338,11 @@ void vmx_set_efer(struct kvm_vcpu *vcpu, u64 efer);
void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0);
int vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4);
void set_cr4_guest_host_mask(struct vcpu_vmx *vmx);
void vmx_load_mmu_pgd(struct kvm_vcpu *vcpu, unsigned long cr3);
void ept_save_pdptrs(struct kvm_vcpu *vcpu);
void vmx_get_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg);
void vmx_set_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg);
u64 construct_eptp(struct kvm_vcpu *vcpu, unsigned long root_hpa);
u64 construct_eptp(struct kvm_vcpu *vcpu, unsigned long root_hpa,
int root_level);
void update_exception_bitmap(struct kvm_vcpu *vcpu);
void vmx_update_msr_bitmap(struct kvm_vcpu *vcpu);
bool vmx_nmi_blocked(struct kvm_vcpu *vcpu);
@ -536,8 +537,6 @@ static inline struct vmcs *alloc_vmcs(bool shadow)
GFP_KERNEL_ACCOUNT);
}
u64 construct_eptp(struct kvm_vcpu *vcpu, unsigned long root_hpa);
static inline void decache_tsc_multiplier(struct vcpu_vmx *vmx)
{
vmx->current_tsc_ratio = vmx->vcpu.arch.tsc_scaling_ratio;
@ -550,6 +549,11 @@ static inline bool vmx_has_waitpkg(struct vcpu_vmx *vmx)
SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE;
}
static inline bool vmx_need_pf_intercept(struct kvm_vcpu *vcpu)
{
return !enable_ept || cpuid_maxphyaddr(vcpu) < boot_cpu_data.x86_phys_bits;
}
void dump_vmcs(void);
#endif /* __KVM_X86_VMX_H */

View File

@ -188,6 +188,9 @@ static struct kvm_shared_msrs __percpu *shared_msrs;
u64 __read_mostly host_efer;
EXPORT_SYMBOL_GPL(host_efer);
bool __read_mostly allow_smaller_maxphyaddr;
EXPORT_SYMBOL_GPL(allow_smaller_maxphyaddr);
static u64 __read_mostly host_xss;
u64 __read_mostly supported_xss;
EXPORT_SYMBOL_GPL(supported_xss);
@ -244,6 +247,29 @@ static struct kmem_cache *x86_fpu_cache;
static struct kmem_cache *x86_emulator_cache;
/*
* When called, it means the previous get/set msr reached an invalid msr.
* Return 0 if we want to ignore/silent this failed msr access, or 1 if we want
* to fail the caller.
*/
static int kvm_msr_ignored_check(struct kvm_vcpu *vcpu, u32 msr,
u64 data, bool write)
{
const char *op = write ? "wrmsr" : "rdmsr";
if (ignore_msrs) {
if (report_ignored_msrs)
vcpu_unimpl(vcpu, "ignored %s: 0x%x data 0x%llx\n",
op, msr, data);
/* Mask the error */
return 0;
} else {
vcpu_debug_ratelimited(vcpu, "unhandled %s: 0x%x data 0x%llx\n",
op, msr, data);
return 1;
}
}
static struct kmem_cache *kvm_alloc_emulator_cache(void)
{
unsigned int useroffset = offsetof(struct x86_emulate_ctxt, src);
@ -380,7 +406,7 @@ int kvm_set_apic_base(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
}
EXPORT_SYMBOL_GPL(kvm_set_apic_base);
asmlinkage __visible void kvm_spurious_fault(void)
asmlinkage __visible noinstr void kvm_spurious_fault(void)
{
/* Fault while not rebooting. We want the trace. */
BUG_ON(!kvm_rebooting);
@ -776,6 +802,7 @@ EXPORT_SYMBOL_GPL(pdptrs_changed);
int kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
{
unsigned long old_cr0 = kvm_read_cr0(vcpu);
unsigned long pdptr_bits = X86_CR0_CD | X86_CR0_NW | X86_CR0_PG;
unsigned long update_bits = X86_CR0_PG | X86_CR0_WP;
cr0 |= X86_CR0_ET;
@ -793,9 +820,9 @@ int kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
if ((cr0 & X86_CR0_PG) && !(cr0 & X86_CR0_PE))
return 1;
if (!is_paging(vcpu) && (cr0 & X86_CR0_PG)) {
if (cr0 & X86_CR0_PG) {
#ifdef CONFIG_X86_64
if ((vcpu->arch.efer & EFER_LME)) {
if (!is_paging(vcpu) && (vcpu->arch.efer & EFER_LME)) {
int cs_db, cs_l;
if (!is_pae(vcpu))
@ -805,8 +832,8 @@ int kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
return 1;
} else
#endif
if (is_pae(vcpu) && !load_pdptrs(vcpu, vcpu->arch.walk_mmu,
kvm_read_cr3(vcpu)))
if (is_pae(vcpu) && ((cr0 ^ old_cr0) & pdptr_bits) &&
!load_pdptrs(vcpu, vcpu->arch.walk_mmu, kvm_read_cr3(vcpu)))
return 1;
}
@ -917,7 +944,7 @@ static int __kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr)
vcpu->arch.xcr0 = xcr0;
if ((xcr0 ^ old_xcr0) & XFEATURE_MASK_EXTEND)
kvm_update_cpuid(vcpu);
kvm_update_cpuid_runtime(vcpu);
return 0;
}
@ -932,37 +959,17 @@ int kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr)
}
EXPORT_SYMBOL_GPL(kvm_set_xcr);
#define __cr4_reserved_bits(__cpu_has, __c) \
({ \
u64 __reserved_bits = CR4_RESERVED_BITS; \
\
if (!__cpu_has(__c, X86_FEATURE_XSAVE)) \
__reserved_bits |= X86_CR4_OSXSAVE; \
if (!__cpu_has(__c, X86_FEATURE_SMEP)) \
__reserved_bits |= X86_CR4_SMEP; \
if (!__cpu_has(__c, X86_FEATURE_SMAP)) \
__reserved_bits |= X86_CR4_SMAP; \
if (!__cpu_has(__c, X86_FEATURE_FSGSBASE)) \
__reserved_bits |= X86_CR4_FSGSBASE; \
if (!__cpu_has(__c, X86_FEATURE_PKU)) \
__reserved_bits |= X86_CR4_PKE; \
if (!__cpu_has(__c, X86_FEATURE_LA57)) \
__reserved_bits |= X86_CR4_LA57; \
if (!__cpu_has(__c, X86_FEATURE_UMIP)) \
__reserved_bits |= X86_CR4_UMIP; \
__reserved_bits; \
})
static int kvm_valid_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
int kvm_valid_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
{
if (cr4 & cr4_reserved_bits)
return -EINVAL;
if (cr4 & __cr4_reserved_bits(guest_cpuid_has, vcpu))
if (cr4 & vcpu->arch.cr4_guest_rsvd_bits)
return -EINVAL;
return 0;
}
EXPORT_SYMBOL_GPL(kvm_valid_cr4);
int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
{
@ -1001,7 +1008,7 @@ int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
kvm_mmu_reset_context(vcpu);
if ((cr4 ^ old_cr4) & (X86_CR4_OSXSAVE | X86_CR4_PKE))
kvm_update_cpuid(vcpu);
kvm_update_cpuid_runtime(vcpu);
return 0;
}
@ -1111,7 +1118,7 @@ static int __kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val)
case 4:
/* fall through */
case 6:
if (val & 0xffffffff00000000ULL)
if (!kvm_dr6_valid(val))
return -1; /* #GP */
vcpu->arch.dr6 = (val & DR6_VOLATILE) | kvm_dr6_fixed(vcpu);
break;
@ -1390,8 +1397,7 @@ static int kvm_get_msr_feature(struct kvm_msr_entry *msr)
rdmsrl_safe(msr->index, &msr->data);
break;
default:
if (kvm_x86_ops.get_msr_feature(msr))
return 1;
return kvm_x86_ops.get_msr_feature(msr);
}
return 0;
}
@ -1403,6 +1409,13 @@ static int do_get_msr_feature(struct kvm_vcpu *vcpu, unsigned index, u64 *data)
msr.index = index;
r = kvm_get_msr_feature(&msr);
if (r == KVM_MSR_RET_INVALID) {
/* Unconditionally clear the output for simplicity */
*data = 0;
r = kvm_msr_ignored_check(vcpu, index, 0, false);
}
if (r)
return r;
@ -1517,6 +1530,17 @@ static int __kvm_set_msr(struct kvm_vcpu *vcpu, u32 index, u64 data,
return kvm_x86_ops.set_msr(vcpu, &msr);
}
static int kvm_set_msr_ignored_check(struct kvm_vcpu *vcpu,
u32 index, u64 data, bool host_initiated)
{
int ret = __kvm_set_msr(vcpu, index, data, host_initiated);
if (ret == KVM_MSR_RET_INVALID)
ret = kvm_msr_ignored_check(vcpu, index, data, true);
return ret;
}
/*
* Read the MSR specified by @index into @data. Select MSR specific fault
* checks are bypassed if @host_initiated is %true.
@ -1538,15 +1562,29 @@ int __kvm_get_msr(struct kvm_vcpu *vcpu, u32 index, u64 *data,
return ret;
}
static int kvm_get_msr_ignored_check(struct kvm_vcpu *vcpu,
u32 index, u64 *data, bool host_initiated)
{
int ret = __kvm_get_msr(vcpu, index, data, host_initiated);
if (ret == KVM_MSR_RET_INVALID) {
/* Unconditionally clear *data for simplicity */
*data = 0;
ret = kvm_msr_ignored_check(vcpu, index, 0, false);
}
return ret;
}
int kvm_get_msr(struct kvm_vcpu *vcpu, u32 index, u64 *data)
{
return __kvm_get_msr(vcpu, index, data, false);
return kvm_get_msr_ignored_check(vcpu, index, data, false);
}
EXPORT_SYMBOL_GPL(kvm_get_msr);
int kvm_set_msr(struct kvm_vcpu *vcpu, u32 index, u64 data)
{
return __kvm_set_msr(vcpu, index, data, false);
return kvm_set_msr_ignored_check(vcpu, index, data, false);
}
EXPORT_SYMBOL_GPL(kvm_set_msr);
@ -1666,12 +1704,12 @@ EXPORT_SYMBOL_GPL(handle_fastpath_set_msr_irqoff);
*/
static int do_get_msr(struct kvm_vcpu *vcpu, unsigned index, u64 *data)
{
return __kvm_get_msr(vcpu, index, data, true);
return kvm_get_msr_ignored_check(vcpu, index, data, true);
}
static int do_set_msr(struct kvm_vcpu *vcpu, unsigned index, u64 *data)
{
return __kvm_set_msr(vcpu, index, *data, true);
return kvm_set_msr_ignored_check(vcpu, index, *data, true);
}
#ifdef CONFIG_X86_64
@ -2823,6 +2861,20 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
return 1;
vcpu->arch.arch_capabilities = data;
break;
case MSR_IA32_PERF_CAPABILITIES: {
struct kvm_msr_entry msr_ent = {.index = msr, .data = 0};
if (!msr_info->host_initiated)
return 1;
if (guest_cpuid_has(vcpu, X86_FEATURE_PDCM) && kvm_get_msr_feature(&msr_ent))
return 1;
if (data & ~msr_ent.data)
return 1;
vcpu->arch.perf_capabilities = data;
return 0;
}
case MSR_EFER:
return set_efer(vcpu, msr_info);
case MSR_K7_HWCR:
@ -2882,7 +2934,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
if (!guest_cpuid_has(vcpu, X86_FEATURE_XMM3))
return 1;
vcpu->arch.ia32_misc_enable_msr = data;
kvm_update_cpuid(vcpu);
kvm_update_cpuid_runtime(vcpu);
} else {
vcpu->arch.ia32_misc_enable_msr = data;
}
@ -3067,17 +3119,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
return xen_hvm_config(vcpu, data);
if (kvm_pmu_is_valid_msr(vcpu, msr))
return kvm_pmu_set_msr(vcpu, msr_info);
if (!ignore_msrs) {
vcpu_debug_ratelimited(vcpu, "unhandled wrmsr: 0x%x data 0x%llx\n",
msr, data);
return 1;
} else {
if (report_ignored_msrs)
vcpu_unimpl(vcpu,
"ignored wrmsr: 0x%x data 0x%llx\n",
msr, data);
break;
}
return KVM_MSR_RET_INVALID;
}
return 0;
}
@ -3173,6 +3215,12 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
return 1;
msr_info->data = vcpu->arch.arch_capabilities;
break;
case MSR_IA32_PERF_CAPABILITIES:
if (!msr_info->host_initiated &&
!guest_cpuid_has(vcpu, X86_FEATURE_PDCM))
return 1;
msr_info->data = vcpu->arch.perf_capabilities;
break;
case MSR_IA32_POWER_CTL:
msr_info->data = vcpu->arch.msr_ia32_power_ctl;
break;
@ -3332,17 +3380,7 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
default:
if (kvm_pmu_is_valid_msr(vcpu, msr_info->index))
return kvm_pmu_get_msr(vcpu, msr_info);
if (!ignore_msrs) {
vcpu_debug_ratelimited(vcpu, "unhandled rdmsr: 0x%x\n",
msr_info->index);
return 1;
} else {
if (report_ignored_msrs)
vcpu_unimpl(vcpu, "ignored rdmsr: 0x%x\n",
msr_info->index);
msr_info->data = 0;
}
break;
return KVM_MSR_RET_INVALID;
}
return 0;
}
@ -3477,6 +3515,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
case KVM_CAP_MSR_PLATFORM_INFO:
case KVM_CAP_EXCEPTION_PAYLOAD:
case KVM_CAP_SET_GUEST_DEBUG:
case KVM_CAP_LAST_CPU:
r = 1;
break;
case KVM_CAP_SYNC_REGS:
@ -3539,6 +3578,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
case KVM_CAP_HYPERV_ENLIGHTENED_VMCS:
r = kvm_x86_ops.nested_ops->enable_evmcs != NULL;
break;
case KVM_CAP_SMALLER_MAXPHYADDR:
r = (int) allow_smaller_maxphyaddr;
break;
default:
break;
}
@ -8155,7 +8197,7 @@ static void enter_smm(struct kvm_vcpu *vcpu)
kvm_x86_ops.set_efer(vcpu, 0);
#endif
kvm_update_cpuid(vcpu);
kvm_update_cpuid_runtime(vcpu);
kvm_mmu_reset_context(vcpu);
}
@ -8507,7 +8549,6 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
}
trace_kvm_entry(vcpu->vcpu_id);
guest_enter_irqoff();
fpregs_assert_state_consistent();
if (test_thread_flag(TIF_NEED_FPU_LOAD))
@ -8549,6 +8590,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
if (hw_breakpoint_active())
hw_breakpoint_restore();
vcpu->arch.last_vmentry_cpu = vcpu->cpu;
vcpu->arch.last_guest_tsc = kvm_read_l1_tsc(vcpu, rdtsc());
vcpu->mode = OUTSIDE_GUEST_MODE;
@ -8569,7 +8611,6 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
local_irq_disable();
kvm_after_interrupt(vcpu);
guest_exit_irqoff();
if (lapic_in_kernel(vcpu)) {
s64 delta = vcpu->arch.apic->lapic_timer.advance_expire_delta;
if (delta != S64_MIN) {
@ -9174,7 +9215,7 @@ static int __set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
(X86_CR4_OSXSAVE | X86_CR4_PKE));
kvm_x86_ops.set_cr4(vcpu, sregs->cr4);
if (cpuid_update_needed)
kvm_update_cpuid(vcpu);
kvm_update_cpuid_runtime(vcpu);
idx = srcu_read_lock(&vcpu->kvm->srcu);
if (is_pae_paging(vcpu)) {
@ -9278,7 +9319,7 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
*/
kvm_set_rflags(vcpu, rflags);
kvm_x86_ops.update_bp_intercept(vcpu);
kvm_x86_ops.update_exception_bitmap(vcpu);
r = 0;
@ -9476,7 +9517,6 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
fx_init(vcpu);
vcpu->arch.maxphyaddr = cpuid_query_maxphyaddr(vcpu);
vcpu->arch.tdp_level = kvm_x86_ops.get_tdp_level(vcpu);
vcpu->arch.pat = MSR_IA32_CR_PAT_DEFAULT;
@ -10673,28 +10713,53 @@ bool kvm_arch_no_poll(struct kvm_vcpu *vcpu)
}
EXPORT_SYMBOL_GPL(kvm_arch_no_poll);
u64 kvm_spec_ctrl_valid_bits(struct kvm_vcpu *vcpu)
int kvm_spec_ctrl_test_value(u64 value)
{
uint64_t bits = SPEC_CTRL_IBRS | SPEC_CTRL_STIBP | SPEC_CTRL_SSBD;
/*
* test that setting IA32_SPEC_CTRL to given value
* is allowed by the host processor
*/
/* The STIBP bit doesn't fault even if it's not advertised */
if (!guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL) &&
!guest_cpuid_has(vcpu, X86_FEATURE_AMD_IBRS))
bits &= ~(SPEC_CTRL_IBRS | SPEC_CTRL_STIBP);
if (!boot_cpu_has(X86_FEATURE_SPEC_CTRL) &&
!boot_cpu_has(X86_FEATURE_AMD_IBRS))
bits &= ~(SPEC_CTRL_IBRS | SPEC_CTRL_STIBP);
u64 saved_value;
unsigned long flags;
int ret = 0;
if (!guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL_SSBD) &&
!guest_cpuid_has(vcpu, X86_FEATURE_AMD_SSBD))
bits &= ~SPEC_CTRL_SSBD;
if (!boot_cpu_has(X86_FEATURE_SPEC_CTRL_SSBD) &&
!boot_cpu_has(X86_FEATURE_AMD_SSBD))
bits &= ~SPEC_CTRL_SSBD;
local_irq_save(flags);
return bits;
if (rdmsrl_safe(MSR_IA32_SPEC_CTRL, &saved_value))
ret = 1;
else if (wrmsrl_safe(MSR_IA32_SPEC_CTRL, value))
ret = 1;
else
wrmsrl(MSR_IA32_SPEC_CTRL, saved_value);
local_irq_restore(flags);
return ret;
}
EXPORT_SYMBOL_GPL(kvm_spec_ctrl_valid_bits);
EXPORT_SYMBOL_GPL(kvm_spec_ctrl_test_value);
void kvm_fixup_and_inject_pf_error(struct kvm_vcpu *vcpu, gva_t gva, u16 error_code)
{
struct x86_exception fault;
if (!(error_code & PFERR_PRESENT_MASK) ||
vcpu->arch.walk_mmu->gva_to_gpa(vcpu, gva, error_code, &fault) != UNMAPPED_GVA) {
/*
* If vcpu->arch.walk_mmu->gva_to_gpa succeeded, the page
* tables probably do not match the TLB. Just proceed
* with the error code that the processor gave.
*/
fault.vector = PF_VECTOR;
fault.error_code_valid = true;
fault.error_code = error_code;
fault.nested_page_fault = false;
fault.address = gva;
}
vcpu->arch.walk_mmu->inject_page_fault(vcpu, &fault);
}
EXPORT_SYMBOL_GPL(kvm_fixup_and_inject_pf_error);
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_exit);
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_fast_mmio);

View File

@ -272,6 +272,7 @@ int kvm_mtrr_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata);
bool kvm_mtrr_check_gfn_range_consistency(struct kvm_vcpu *vcpu, gfn_t gfn,
int page_num);
bool kvm_vector_hashing_enabled(void);
void kvm_fixup_and_inject_pf_error(struct kvm_vcpu *vcpu, gva_t gva, u16 error_code);
int x86_emulate_instruction(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
int emulation_type, void *insn, int insn_len);
fastpath_t handle_fastpath_set_msr_irqoff(struct kvm_vcpu *vcpu);
@ -360,10 +361,41 @@ static inline bool kvm_dr7_valid(u64 data)
/* Bits [63:32] are reserved */
return !(data >> 32);
}
static inline bool kvm_dr6_valid(u64 data)
{
/* Bits [63:32] are reserved */
return !(data >> 32);
}
void kvm_load_guest_xsave_state(struct kvm_vcpu *vcpu);
void kvm_load_host_xsave_state(struct kvm_vcpu *vcpu);
u64 kvm_spec_ctrl_valid_bits(struct kvm_vcpu *vcpu);
int kvm_spec_ctrl_test_value(u64 value);
int kvm_valid_cr4(struct kvm_vcpu *vcpu, unsigned long cr4);
bool kvm_vcpu_exit_request(struct kvm_vcpu *vcpu);
#define KVM_MSR_RET_INVALID 2
#define __cr4_reserved_bits(__cpu_has, __c) \
({ \
u64 __reserved_bits = CR4_RESERVED_BITS; \
\
if (!__cpu_has(__c, X86_FEATURE_XSAVE)) \
__reserved_bits |= X86_CR4_OSXSAVE; \
if (!__cpu_has(__c, X86_FEATURE_SMEP)) \
__reserved_bits |= X86_CR4_SMEP; \
if (!__cpu_has(__c, X86_FEATURE_SMAP)) \
__reserved_bits |= X86_CR4_SMAP; \
if (!__cpu_has(__c, X86_FEATURE_FSGSBASE)) \
__reserved_bits |= X86_CR4_FSGSBASE; \
if (!__cpu_has(__c, X86_FEATURE_PKU)) \
__reserved_bits |= X86_CR4_PKE; \
if (!__cpu_has(__c, X86_FEATURE_LA57)) \
__reserved_bits |= X86_CR4_LA57; \
if (!__cpu_has(__c, X86_FEATURE_UMIP)) \
__reserved_bits |= X86_CR4_UMIP; \
if (!__cpu_has(__c, X86_FEATURE_VMX)) \
__reserved_bits |= X86_CR4_VMXE; \
__reserved_bits; \
})
#endif

View File

@ -114,9 +114,8 @@ PV_CALLEE_SAVE_REGS_THUNK(xen_vcpu_stolen);
*/
void __init xen_init_spinlocks(void)
{
/* Don't need to use pvqspinlock code if there is only 1 vCPU. */
if (num_possible_cpus() == 1)
if (num_possible_cpus() == 1 || nopvspin)
xen_pvspin = false;
if (!xen_pvspin) {
@ -137,6 +136,7 @@ void __init xen_init_spinlocks(void)
static __init int xen_parse_nopvspin(char *arg)
{
pr_notice("\"xen_nopvspin\" is deprecated, please use \"nopvspin\" instead\n");
xen_pvspin = false;
return 0;
}

View File

@ -0,0 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _ASM_GENERIC_KVM_TYPES_H
#define _ASM_GENERIC_KVM_TYPES_H
#endif

View File

@ -211,8 +211,8 @@ struct kvm_async_pf {
void kvm_clear_async_pf_completion_queue(struct kvm_vcpu *vcpu);
void kvm_check_async_pf_completion(struct kvm_vcpu *vcpu);
int kvm_setup_async_pf(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
unsigned long hva, struct kvm_arch_async_pf *arch);
bool kvm_setup_async_pf(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
unsigned long hva, struct kvm_arch_async_pf *arch);
int kvm_async_pf_wakeup_all(struct kvm_vcpu *vcpu);
#endif
@ -774,6 +774,7 @@ int kvm_clear_guest_page(struct kvm *kvm, gfn_t gfn, int offset, int len);
int kvm_clear_guest(struct kvm *kvm, gpa_t gpa, unsigned long len);
struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn);
bool kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn);
bool kvm_vcpu_is_visible_gfn(struct kvm_vcpu *vcpu, gfn_t gfn);
unsigned long kvm_host_page_size(struct kvm_vcpu *vcpu, gfn_t gfn);
void mark_page_dirty(struct kvm *kvm, gfn_t gfn);
@ -816,6 +817,13 @@ void kvm_vcpu_on_spin(struct kvm_vcpu *vcpu, bool usermode_vcpu_not_eligible);
void kvm_flush_remote_tlbs(struct kvm *kvm);
void kvm_reload_remote_mmus(struct kvm *kvm);
#ifdef KVM_ARCH_NR_OBJS_PER_MEMORY_CACHE
int kvm_mmu_topup_memory_cache(struct kvm_mmu_memory_cache *mc, int min);
int kvm_mmu_memory_cache_nr_free_objects(struct kvm_mmu_memory_cache *mc);
void kvm_mmu_free_memory_cache(struct kvm_mmu_memory_cache *mc);
void *kvm_mmu_memory_cache_alloc(struct kvm_mmu_memory_cache *mc);
#endif
bool kvm_make_vcpus_request_mask(struct kvm *kvm, unsigned int req,
struct kvm_vcpu *except,
unsigned long *vcpu_bitmap, cpumask_var_t tmp);

View File

@ -20,6 +20,8 @@ enum kvm_mr_change;
#include <linux/types.h>
#include <asm/kvm_types.h>
/*
* Address types:
*
@ -58,4 +60,21 @@ struct gfn_to_pfn_cache {
bool dirty;
};
#ifdef KVM_ARCH_NR_OBJS_PER_MEMORY_CACHE
/*
* Memory caches are used to preallocate memory ahead of various MMU flows,
* e.g. page fault handlers. Gracefully handling allocation failures deep in
* MMU flows is problematic, as is triggering reclaim, I/O, etc... while
* holding MMU locks. Note, these caches act more like prefetch buffers than
* classical caches, i.e. objects are not returned to the cache on being freed.
*/
struct kvm_mmu_memory_cache {
int nobjs;
gfp_t gfp_zero;
struct kmem_cache *kmem_cache;
void *objects[KVM_ARCH_NR_OBJS_PER_MEMORY_CACHE];
};
#endif
#endif /* __KVM_TYPES_H__ */

View File

@ -289,6 +289,7 @@ struct kvm_run {
/* KVM_EXIT_FAIL_ENTRY */
struct {
__u64 hardware_entry_failure_reason;
__u32 cpu;
} fail_entry;
/* KVM_EXIT_EXCEPTION */
struct {
@ -1031,6 +1032,9 @@ struct kvm_ppc_resize_hpt {
#define KVM_CAP_PPC_SECURE_GUEST 181
#define KVM_CAP_HALT_POLL 182
#define KVM_CAP_ASYNC_PF_INT 183
#define KVM_CAP_LAST_CPU 184
#define KVM_CAP_SMALLER_MAXPHYADDR 185
#define KVM_CAP_S390_DIAG318 186
#ifdef KVM_CAP_IRQ_ROUTING

View File

@ -581,4 +581,11 @@ EXPORT_SYMBOL(queued_spin_lock_slowpath);
#include "qspinlock_paravirt.h"
#include "qspinlock.c"
bool nopvspin __initdata;
static __init int parse_nopvspin(char *arg)
{
nopvspin = true;
return 0;
}
early_param("nopvspin", parse_nopvspin);
#endif

View File

@ -156,17 +156,21 @@ void kvm_check_async_pf_completion(struct kvm_vcpu *vcpu)
}
}
int kvm_setup_async_pf(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
unsigned long hva, struct kvm_arch_async_pf *arch)
/*
* Try to schedule a job to handle page fault asynchronously. Returns 'true' on
* success, 'false' on failure (page fault has to be handled synchronously).
*/
bool kvm_setup_async_pf(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
unsigned long hva, struct kvm_arch_async_pf *arch)
{
struct kvm_async_pf *work;
if (vcpu->async_pf.queued >= ASYNC_PF_PER_VCPU)
return 0;
return false;
/* Arch specific code should not do async PF in this case */
if (unlikely(kvm_is_error_hva(hva)))
return 0;
return false;
/*
* do alloc nowait since if we are going to sleep anyway we
@ -174,7 +178,7 @@ int kvm_setup_async_pf(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
*/
work = kmem_cache_zalloc(async_pf_cache, GFP_NOWAIT | __GFP_NOWARN);
if (!work)
return 0;
return false;
work->wakeup_all = false;
work->vcpu = vcpu;
@ -193,7 +197,7 @@ int kvm_setup_async_pf(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
schedule_work(&work->work);
return 1;
return true;
}
int kvm_async_pf_wakeup_all(struct kvm_vcpu *vcpu)

View File

@ -340,6 +340,61 @@ void kvm_reload_remote_mmus(struct kvm *kvm)
kvm_make_all_cpus_request(kvm, KVM_REQ_MMU_RELOAD);
}
#ifdef KVM_ARCH_NR_OBJS_PER_MEMORY_CACHE
static inline void *mmu_memory_cache_alloc_obj(struct kvm_mmu_memory_cache *mc,
gfp_t gfp_flags)
{
gfp_flags |= mc->gfp_zero;
if (mc->kmem_cache)
return kmem_cache_alloc(mc->kmem_cache, gfp_flags);
else
return (void *)__get_free_page(gfp_flags);
}
int kvm_mmu_topup_memory_cache(struct kvm_mmu_memory_cache *mc, int min)
{
void *obj;
if (mc->nobjs >= min)
return 0;
while (mc->nobjs < ARRAY_SIZE(mc->objects)) {
obj = mmu_memory_cache_alloc_obj(mc, GFP_KERNEL_ACCOUNT);
if (!obj)
return mc->nobjs >= min ? 0 : -ENOMEM;
mc->objects[mc->nobjs++] = obj;
}
return 0;
}
int kvm_mmu_memory_cache_nr_free_objects(struct kvm_mmu_memory_cache *mc)
{
return mc->nobjs;
}
void kvm_mmu_free_memory_cache(struct kvm_mmu_memory_cache *mc)
{
while (mc->nobjs) {
if (mc->kmem_cache)
kmem_cache_free(mc->kmem_cache, mc->objects[--mc->nobjs]);
else
free_page((unsigned long)mc->objects[--mc->nobjs]);
}
}
void *kvm_mmu_memory_cache_alloc(struct kvm_mmu_memory_cache *mc)
{
void *p;
if (WARN_ON(!mc->nobjs))
p = mmu_memory_cache_alloc_obj(mc, GFP_ATOMIC | __GFP_ACCOUNT);
else
p = mc->objects[--mc->nobjs];
BUG_ON(!p);
return p;
}
#endif
static void kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id)
{
mutex_init(&vcpu->mutex);
@ -1626,6 +1681,14 @@ bool kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn)
}
EXPORT_SYMBOL_GPL(kvm_is_visible_gfn);
bool kvm_vcpu_is_visible_gfn(struct kvm_vcpu *vcpu, gfn_t gfn)
{
struct kvm_memory_slot *memslot = kvm_vcpu_gfn_to_memslot(vcpu, gfn);
return kvm_is_visible_memslot(memslot);
}
EXPORT_SYMBOL_GPL(kvm_vcpu_is_visible_gfn);
unsigned long kvm_host_page_size(struct kvm_vcpu *vcpu, gfn_t gfn)
{
struct vm_area_struct *vma;