Merge branch 'kvm-updates/2.6.29' of git://git.kernel.org/pub/scm/linux/kernel/git/avi/kvm

* 'kvm-updates/2.6.29' of git://git.kernel.org/pub/scm/linux/kernel/git/avi/kvm: (140 commits)
  KVM: MMU: handle large host sptes on invlpg/resync
  KVM: Add locking to virtual i8259 interrupt controller
  KVM: MMU: Don't treat a global pte as such if cr4.pge is cleared
  MAINTAINERS: Maintainership changes for kvm/ia64
  KVM: ia64: Fix kvm_arch_vcpu_ioctl_[gs]et_regs()
  KVM: x86: Rework user space NMI injection as KVM_CAP_USER_NMI
  KVM: VMX: Fix pending NMI-vs.-IRQ race for user space irqchip
  KVM: fix handling of ACK from shared guest IRQ
  KVM: MMU: check for present pdptr shadow page in walk_shadow
  KVM: Consolidate userspace memory capability reporting into common code
  KVM: Advertise the bug in memory region destruction as fixed
  KVM: use cpumask_var_t for cpus_hardware_enabled
  KVM: use modern cpumask primitives, no cpumask_t on stack
  KVM: Extract core of kvm_flush_remote_tlbs/kvm_reload_remote_mmus
  KVM: set owner of cpu and vm file operations
  anon_inodes: use fops->owner for module refcount
  x86: KVM guest: kvm_get_tsc_khz: return khz, not lpj
  KVM: MMU: prepopulate the shadow on invlpg
  KVM: MMU: skip global pgtables on sync due to cr3 switch
  KVM: MMU: collapse remote TLB flushes on root sync
  ...
This commit is contained in:
Linus Torvalds 2009-01-02 11:41:11 -08:00
commit 597b0d2162
68 changed files with 4752 additions and 2598 deletions

View File

@ -2542,8 +2542,6 @@ W: http://kvm.qumranet.com
S: Supported
KERNEL VIRTUAL MACHINE For Itanium (KVM/IA64)
P: Anthony Xu
M: anthony.xu@intel.com
P: Xiantao Zhang
M: xiantao.zhang@intel.com
L: kvm-ia64@vger.kernel.org

View File

@ -166,8 +166,6 @@ struct saved_vpd {
};
struct kvm_regs {
char *saved_guest;
char *saved_stack;
struct saved_vpd vpd;
/*Arch-regs*/
int mp_state;
@ -200,6 +198,10 @@ struct kvm_regs {
unsigned long fp_psr; /*used for lazy float register */
unsigned long saved_gp;
/*for phycial emulation */
union context saved_guest;
unsigned long reserved[64]; /* for future use */
};
struct kvm_sregs {

View File

@ -23,17 +23,6 @@
#ifndef __ASM_KVM_HOST_H
#define __ASM_KVM_HOST_H
#include <linux/types.h>
#include <linux/mm.h>
#include <linux/kvm.h>
#include <linux/kvm_para.h>
#include <linux/kvm_types.h>
#include <asm/pal.h>
#include <asm/sal.h>
#define KVM_MAX_VCPUS 4
#define KVM_MEMORY_SLOTS 32
/* memory slots that does not exposed to userspace */
#define KVM_PRIVATE_MEM_SLOTS 4
@ -50,70 +39,132 @@
#define EXIT_REASON_EXTERNAL_INTERRUPT 6
#define EXIT_REASON_IPI 7
#define EXIT_REASON_PTC_G 8
#define EXIT_REASON_DEBUG 20
/*Define vmm address space and vm data space.*/
#define KVM_VMM_SIZE (16UL<<20)
#define KVM_VMM_SIZE (__IA64_UL_CONST(16)<<20)
#define KVM_VMM_SHIFT 24
#define KVM_VMM_BASE 0xD000000000000000UL
#define VMM_SIZE (8UL<<20)
#define KVM_VMM_BASE 0xD000000000000000
#define VMM_SIZE (__IA64_UL_CONST(8)<<20)
/*
* Define vm_buffer, used by PAL Services, base address.
* Note: vmbuffer is in the VMM-BLOCK, the size must be < 8M
* Note: vm_buffer is in the VMM-BLOCK, the size must be < 8M
*/
#define KVM_VM_BUFFER_BASE (KVM_VMM_BASE + VMM_SIZE)
#define KVM_VM_BUFFER_SIZE (8UL<<20)
#define KVM_VM_BUFFER_SIZE (__IA64_UL_CONST(8)<<20)
/*Define Virtual machine data layout.*/
#define KVM_VM_DATA_SHIFT 24
#define KVM_VM_DATA_SIZE (1UL << KVM_VM_DATA_SHIFT)
#define KVM_VM_DATA_BASE (KVM_VMM_BASE + KVM_VMM_SIZE)
/*
* kvm guest's data area looks as follow:
*
* +----------------------+ ------- KVM_VM_DATA_SIZE
* | vcpu[n]'s data | | ___________________KVM_STK_OFFSET
* | | | / |
* | .......... | | /vcpu's struct&stack |
* | .......... | | /---------------------|---- 0
* | vcpu[5]'s data | | / vpd |
* | vcpu[4]'s data | |/-----------------------|
* | vcpu[3]'s data | / vtlb |
* | vcpu[2]'s data | /|------------------------|
* | vcpu[1]'s data |/ | vhpt |
* | vcpu[0]'s data |____________________________|
* +----------------------+ |
* | memory dirty log | |
* +----------------------+ |
* | vm's data struct | |
* +----------------------+ |
* | | |
* | | |
* | | |
* | | |
* | | |
* | | |
* | | |
* | vm's p2m table | |
* | | |
* | | |
* | | | |
* vm's data->| | | |
* +----------------------+ ------- 0
* To support large memory, needs to increase the size of p2m.
* To support more vcpus, needs to ensure it has enough space to
* hold vcpus' data.
*/
#define KVM_VM_DATA_SHIFT 26
#define KVM_VM_DATA_SIZE (__IA64_UL_CONST(1) << KVM_VM_DATA_SHIFT)
#define KVM_VM_DATA_BASE (KVM_VMM_BASE + KVM_VM_DATA_SIZE)
#define KVM_P2M_BASE KVM_VM_DATA_BASE
#define KVM_P2M_OFS 0
#define KVM_P2M_SIZE (8UL << 20)
#define KVM_P2M_BASE KVM_VM_DATA_BASE
#define KVM_P2M_SIZE (__IA64_UL_CONST(24) << 20)
#define KVM_VHPT_BASE (KVM_P2M_BASE + KVM_P2M_SIZE)
#define KVM_VHPT_OFS KVM_P2M_SIZE
#define KVM_VHPT_BLOCK_SIZE (2UL << 20)
#define VHPT_SHIFT 18
#define VHPT_SIZE (1UL << VHPT_SHIFT)
#define VHPT_NUM_ENTRIES (1<<(VHPT_SHIFT-5))
#define VHPT_SHIFT 16
#define VHPT_SIZE (__IA64_UL_CONST(1) << VHPT_SHIFT)
#define VHPT_NUM_ENTRIES (__IA64_UL_CONST(1) << (VHPT_SHIFT-5))
#define KVM_VTLB_BASE (KVM_VHPT_BASE+KVM_VHPT_BLOCK_SIZE)
#define KVM_VTLB_OFS (KVM_VHPT_OFS+KVM_VHPT_BLOCK_SIZE)
#define KVM_VTLB_BLOCK_SIZE (1UL<<20)
#define VTLB_SHIFT 17
#define VTLB_SIZE (1UL<<VTLB_SHIFT)
#define VTLB_NUM_ENTRIES (1<<(VTLB_SHIFT-5))
#define VTLB_SHIFT 16
#define VTLB_SIZE (__IA64_UL_CONST(1) << VTLB_SHIFT)
#define VTLB_NUM_ENTRIES (1UL << (VHPT_SHIFT-5))
#define KVM_VPD_BASE (KVM_VTLB_BASE+KVM_VTLB_BLOCK_SIZE)
#define KVM_VPD_OFS (KVM_VTLB_OFS+KVM_VTLB_BLOCK_SIZE)
#define KVM_VPD_BLOCK_SIZE (2UL<<20)
#define VPD_SHIFT 16
#define VPD_SIZE (1UL<<VPD_SHIFT)
#define VPD_SHIFT 16
#define VPD_SIZE (__IA64_UL_CONST(1) << VPD_SHIFT)
#define KVM_VCPU_BASE (KVM_VPD_BASE+KVM_VPD_BLOCK_SIZE)
#define KVM_VCPU_OFS (KVM_VPD_OFS+KVM_VPD_BLOCK_SIZE)
#define KVM_VCPU_BLOCK_SIZE (2UL<<20)
#define VCPU_SHIFT 18
#define VCPU_SIZE (1UL<<VCPU_SHIFT)
#define MAX_VCPU_NUM KVM_VCPU_BLOCK_SIZE/VCPU_SIZE
#define VCPU_STRUCT_SHIFT 16
#define VCPU_STRUCT_SIZE (__IA64_UL_CONST(1) << VCPU_STRUCT_SHIFT)
#define KVM_VM_BASE (KVM_VCPU_BASE+KVM_VCPU_BLOCK_SIZE)
#define KVM_VM_OFS (KVM_VCPU_OFS+KVM_VCPU_BLOCK_SIZE)
#define KVM_VM_BLOCK_SIZE (1UL<<19)
#define KVM_STK_OFFSET VCPU_STRUCT_SIZE
#define KVM_MEM_DIRTY_LOG_BASE (KVM_VM_BASE+KVM_VM_BLOCK_SIZE)
#define KVM_MEM_DIRTY_LOG_OFS (KVM_VM_OFS+KVM_VM_BLOCK_SIZE)
#define KVM_MEM_DIRTY_LOG_SIZE (1UL<<19)
#define KVM_VM_STRUCT_SHIFT 19
#define KVM_VM_STRUCT_SIZE (__IA64_UL_CONST(1) << KVM_VM_STRUCT_SHIFT)
/* Get vpd, vhpt, tlb, vcpu, base*/
#define VPD_ADDR(n) (KVM_VPD_BASE+n*VPD_SIZE)
#define VHPT_ADDR(n) (KVM_VHPT_BASE+n*VHPT_SIZE)
#define VTLB_ADDR(n) (KVM_VTLB_BASE+n*VTLB_SIZE)
#define VCPU_ADDR(n) (KVM_VCPU_BASE+n*VCPU_SIZE)
#define KVM_MEM_DIRY_LOG_SHIFT 19
#define KVM_MEM_DIRTY_LOG_SIZE (__IA64_UL_CONST(1) << KVM_MEM_DIRY_LOG_SHIFT)
#ifndef __ASSEMBLY__
/*Define the max vcpus and memory for Guests.*/
#define KVM_MAX_VCPUS (KVM_VM_DATA_SIZE - KVM_P2M_SIZE - KVM_VM_STRUCT_SIZE -\
KVM_MEM_DIRTY_LOG_SIZE) / sizeof(struct kvm_vcpu_data)
#define KVM_MAX_MEM_SIZE (KVM_P2M_SIZE >> 3 << PAGE_SHIFT)
#define VMM_LOG_LEN 256
#include <linux/types.h>
#include <linux/mm.h>
#include <linux/kvm.h>
#include <linux/kvm_para.h>
#include <linux/kvm_types.h>
#include <asm/pal.h>
#include <asm/sal.h>
#include <asm/page.h>
struct kvm_vcpu_data {
char vcpu_vhpt[VHPT_SIZE];
char vcpu_vtlb[VTLB_SIZE];
char vcpu_vpd[VPD_SIZE];
char vcpu_struct[VCPU_STRUCT_SIZE];
};
struct kvm_vm_data {
char kvm_p2m[KVM_P2M_SIZE];
char kvm_vm_struct[KVM_VM_STRUCT_SIZE];
char kvm_mem_dirty_log[KVM_MEM_DIRTY_LOG_SIZE];
struct kvm_vcpu_data vcpu_data[KVM_MAX_VCPUS];
};
#define VCPU_BASE(n) KVM_VM_DATA_BASE + \
offsetof(struct kvm_vm_data, vcpu_data[n])
#define VM_BASE KVM_VM_DATA_BASE + \
offsetof(struct kvm_vm_data, kvm_vm_struct)
#define KVM_MEM_DIRTY_LOG_BASE KVM_VM_DATA_BASE + \
offsetof(struct kvm_vm_data, kvm_mem_dirty_log)
#define VHPT_BASE(n) (VCPU_BASE(n) + offsetof(struct kvm_vcpu_data, vcpu_vhpt))
#define VTLB_BASE(n) (VCPU_BASE(n) + offsetof(struct kvm_vcpu_data, vcpu_vtlb))
#define VPD_BASE(n) (VCPU_BASE(n) + offsetof(struct kvm_vcpu_data, vcpu_vpd))
#define VCPU_STRUCT_BASE(n) (VCPU_BASE(n) + \
offsetof(struct kvm_vcpu_data, vcpu_struct))
/*IO section definitions*/
#define IOREQ_READ 1
@ -389,6 +440,7 @@ struct kvm_vcpu_arch {
unsigned long opcode;
unsigned long cause;
char log_buf[VMM_LOG_LEN];
union context host;
union context guest;
};
@ -403,14 +455,13 @@ struct kvm_sal_data {
};
struct kvm_arch {
spinlock_t dirty_log_lock;
unsigned long vm_base;
unsigned long metaphysical_rr0;
unsigned long metaphysical_rr4;
unsigned long vmm_init_rr;
unsigned long vhpt_base;
unsigned long vtlb_base;
unsigned long vpd_base;
spinlock_t dirty_log_lock;
struct kvm_ioapic *vioapic;
struct kvm_vm_stat stat;
struct kvm_sal_data rdv_sal_data;
@ -512,7 +563,7 @@ struct kvm_pt_regs {
static inline struct kvm_pt_regs *vcpu_regs(struct kvm_vcpu *v)
{
return (struct kvm_pt_regs *) ((unsigned long) v + IA64_STK_OFFSET) - 1;
return (struct kvm_pt_regs *) ((unsigned long) v + KVM_STK_OFFSET) - 1;
}
typedef int kvm_vmm_entry(void);
@ -531,5 +582,6 @@ int kvm_pal_emul(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run);
void kvm_sal_emul(struct kvm_vcpu *vcpu);
static inline void kvm_inject_nmi(struct kvm_vcpu *vcpu) {}
#endif /* __ASSEMBLY__*/
#endif

View File

@ -60,7 +60,7 @@ obj-$(CONFIG_KVM) += kvm.o
CFLAGS_vcpu.o += -mfixed-range=f2-f5,f12-f127
kvm-intel-objs = vmm.o vmm_ivt.o trampoline.o vcpu.o optvfault.o mmio.o \
vtlb.o process.o
vtlb.o process.o kvm_lib.o
#Add link memcpy and memset to avoid possible structure assignment error
kvm-intel-objs += memcpy.o memset.o
obj-$(CONFIG_KVM_INTEL) += kvm-intel.o

View File

@ -24,19 +24,10 @@
#include <linux/autoconf.h>
#include <linux/kvm_host.h>
#include <linux/kbuild.h>
#include "vcpu.h"
#define task_struct kvm_vcpu
#define DEFINE(sym, val) \
asm volatile("\n->" #sym " (%0) " #val : : "i" (val))
#define BLANK() asm volatile("\n->" : :)
#define OFFSET(_sym, _str, _mem) \
DEFINE(_sym, offsetof(_str, _mem));
void foo(void)
{
DEFINE(VMM_TASK_SIZE, sizeof(struct kvm_vcpu));

View File

@ -180,7 +180,6 @@ int kvm_dev_ioctl_check_extension(long ext)
switch (ext) {
case KVM_CAP_IRQCHIP:
case KVM_CAP_USER_MEMORY:
case KVM_CAP_MP_STATE:
r = 1;
@ -439,7 +438,6 @@ int kvm_emulate_halt(struct kvm_vcpu *vcpu)
expires = div64_u64(itc_diff, cyc_per_usec);
kt = ktime_set(0, 1000 * expires);
down_read(&vcpu->kvm->slots_lock);
vcpu->arch.ht_active = 1;
hrtimer_start(p_ht, kt, HRTIMER_MODE_ABS);
@ -452,7 +450,6 @@ int kvm_emulate_halt(struct kvm_vcpu *vcpu)
if (vcpu->arch.mp_state == KVM_MP_STATE_HALTED)
vcpu->arch.mp_state =
KVM_MP_STATE_RUNNABLE;
up_read(&vcpu->kvm->slots_lock);
if (vcpu->arch.mp_state != KVM_MP_STATE_RUNNABLE)
return -EINTR;
@ -476,6 +473,13 @@ static int handle_external_interrupt(struct kvm_vcpu *vcpu,
return 1;
}
static int handle_vcpu_debug(struct kvm_vcpu *vcpu,
struct kvm_run *kvm_run)
{
printk("VMM: %s", vcpu->arch.log_buf);
return 1;
}
static int (*kvm_vti_exit_handlers[])(struct kvm_vcpu *vcpu,
struct kvm_run *kvm_run) = {
[EXIT_REASON_VM_PANIC] = handle_vm_error,
@ -487,6 +491,7 @@ static int (*kvm_vti_exit_handlers[])(struct kvm_vcpu *vcpu,
[EXIT_REASON_EXTERNAL_INTERRUPT] = handle_external_interrupt,
[EXIT_REASON_IPI] = handle_ipi,
[EXIT_REASON_PTC_G] = handle_global_purge,
[EXIT_REASON_DEBUG] = handle_vcpu_debug,
};
@ -698,27 +703,24 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
return r;
}
/*
* Allocate 16M memory for every vm to hold its specific data.
* Its memory map is defined in kvm_host.h.
*/
static struct kvm *kvm_alloc_kvm(void)
{
struct kvm *kvm;
uint64_t vm_base;
BUG_ON(sizeof(struct kvm) > KVM_VM_STRUCT_SIZE);
vm_base = __get_free_pages(GFP_KERNEL, get_order(KVM_VM_DATA_SIZE));
if (!vm_base)
return ERR_PTR(-ENOMEM);
printk(KERN_DEBUG"kvm: VM data's base Address:0x%lx\n", vm_base);
/* Zero all pages before use! */
memset((void *)vm_base, 0, KVM_VM_DATA_SIZE);
kvm = (struct kvm *)(vm_base + KVM_VM_OFS);
kvm = (struct kvm *)(vm_base +
offsetof(struct kvm_vm_data, kvm_vm_struct));
kvm->arch.vm_base = vm_base;
printk(KERN_DEBUG"kvm: vm's data area:0x%lx\n", vm_base);
return kvm;
}
@ -760,21 +762,12 @@ static void kvm_build_io_pmt(struct kvm *kvm)
static void kvm_init_vm(struct kvm *kvm)
{
long vm_base;
BUG_ON(!kvm);
kvm->arch.metaphysical_rr0 = GUEST_PHYSICAL_RR0;
kvm->arch.metaphysical_rr4 = GUEST_PHYSICAL_RR4;
kvm->arch.vmm_init_rr = VMM_INIT_RR;
vm_base = kvm->arch.vm_base;
if (vm_base) {
kvm->arch.vhpt_base = vm_base + KVM_VHPT_OFS;
kvm->arch.vtlb_base = vm_base + KVM_VTLB_OFS;
kvm->arch.vpd_base = vm_base + KVM_VPD_OFS;
}
/*
*Fill P2M entries for MMIO/IO ranges
*/
@ -838,9 +831,8 @@ static int kvm_vm_ioctl_set_irqchip(struct kvm *kvm, struct kvm_irqchip *chip)
int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
{
int i;
struct vpd *vpd = to_host(vcpu->kvm, vcpu->arch.vpd);
int r;
int i;
vcpu_load(vcpu);
@ -857,18 +849,7 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
vpd->vpr = regs->vpd.vpr;
r = -EFAULT;
r = copy_from_user(&vcpu->arch.guest, regs->saved_guest,
sizeof(union context));
if (r)
goto out;
r = copy_from_user(vcpu + 1, regs->saved_stack +
sizeof(struct kvm_vcpu),
IA64_STK_OFFSET - sizeof(struct kvm_vcpu));
if (r)
goto out;
vcpu->arch.exit_data =
((struct kvm_vcpu *)(regs->saved_stack))->arch.exit_data;
memcpy(&vcpu->arch.guest, &regs->saved_guest, sizeof(union context));
RESTORE_REGS(mp_state);
RESTORE_REGS(vmm_rr);
@ -902,9 +883,8 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
set_bit(KVM_REQ_RESUME, &vcpu->requests);
vcpu_put(vcpu);
r = 0;
out:
return r;
return 0;
}
long kvm_arch_vm_ioctl(struct file *filp,
@ -1166,10 +1146,11 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
/*Set entry address for first run.*/
regs->cr_iip = PALE_RESET_ENTRY;
/*Initilize itc offset for vcpus*/
/*Initialize itc offset for vcpus*/
itc_offset = 0UL - ia64_getreg(_IA64_REG_AR_ITC);
for (i = 0; i < MAX_VCPU_NUM; i++) {
v = (struct kvm_vcpu *)((char *)vcpu + VCPU_SIZE * i);
for (i = 0; i < KVM_MAX_VCPUS; i++) {
v = (struct kvm_vcpu *)((char *)vcpu +
sizeof(struct kvm_vcpu_data) * i);
v->arch.itc_offset = itc_offset;
v->arch.last_itc = 0;
}
@ -1183,7 +1164,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
vcpu->arch.apic->vcpu = vcpu;
p_ctx->gr[1] = 0;
p_ctx->gr[12] = (unsigned long)((char *)vmm_vcpu + IA64_STK_OFFSET);
p_ctx->gr[12] = (unsigned long)((char *)vmm_vcpu + KVM_STK_OFFSET);
p_ctx->gr[13] = (unsigned long)vmm_vcpu;
p_ctx->psr = 0x1008522000UL;
p_ctx->ar[40] = FPSR_DEFAULT; /*fpsr*/
@ -1218,12 +1199,12 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
vcpu->arch.hlt_timer.function = hlt_timer_fn;
vcpu->arch.last_run_cpu = -1;
vcpu->arch.vpd = (struct vpd *)VPD_ADDR(vcpu->vcpu_id);
vcpu->arch.vpd = (struct vpd *)VPD_BASE(vcpu->vcpu_id);
vcpu->arch.vsa_base = kvm_vsa_base;
vcpu->arch.__gp = kvm_vmm_gp;
vcpu->arch.dirty_log_lock_pa = __pa(&kvm->arch.dirty_log_lock);
vcpu->arch.vhpt.hash = (struct thash_data *)VHPT_ADDR(vcpu->vcpu_id);
vcpu->arch.vtlb.hash = (struct thash_data *)VTLB_ADDR(vcpu->vcpu_id);
vcpu->arch.vhpt.hash = (struct thash_data *)VHPT_BASE(vcpu->vcpu_id);
vcpu->arch.vtlb.hash = (struct thash_data *)VTLB_BASE(vcpu->vcpu_id);
init_ptce_info(vcpu);
r = 0;
@ -1273,12 +1254,22 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
int r;
int cpu;
BUG_ON(sizeof(struct kvm_vcpu) > VCPU_STRUCT_SIZE/2);
r = -EINVAL;
if (id >= KVM_MAX_VCPUS) {
printk(KERN_ERR"kvm: Can't configure vcpus > %ld",
KVM_MAX_VCPUS);
goto fail;
}
r = -ENOMEM;
if (!vm_base) {
printk(KERN_ERR"kvm: Create vcpu[%d] error!\n", id);
goto fail;
}
vcpu = (struct kvm_vcpu *)(vm_base + KVM_VCPU_OFS + VCPU_SIZE * id);
vcpu = (struct kvm_vcpu *)(vm_base + offsetof(struct kvm_vm_data,
vcpu_data[id].vcpu_struct));
vcpu->kvm = kvm;
cpu = get_cpu();
@ -1374,9 +1365,9 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
{
int i;
int r;
struct vpd *vpd = to_host(vcpu->kvm, vcpu->arch.vpd);
int i;
vcpu_load(vcpu);
for (i = 0; i < 16; i++) {
@ -1391,14 +1382,8 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
regs->vpd.vpsr = vpd->vpsr;
regs->vpd.vpr = vpd->vpr;
r = -EFAULT;
r = copy_to_user(regs->saved_guest, &vcpu->arch.guest,
sizeof(union context));
if (r)
goto out;
r = copy_to_user(regs->saved_stack, (void *)vcpu, IA64_STK_OFFSET);
if (r)
goto out;
memcpy(&regs->saved_guest, &vcpu->arch.guest, sizeof(union context));
SAVE_REGS(mp_state);
SAVE_REGS(vmm_rr);
memcpy(regs->itrs, vcpu->arch.itrs, sizeof(struct thash_data) * NITRS);
@ -1426,10 +1411,9 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
SAVE_REGS(metaphysical_saved_rr4);
SAVE_REGS(fp_psr);
SAVE_REGS(saved_gp);
vcpu_put(vcpu);
r = 0;
out:
return r;
return 0;
}
void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
@ -1457,6 +1441,9 @@ int kvm_arch_set_memory_region(struct kvm *kvm,
struct kvm_memory_slot *memslot = &kvm->memslots[mem->slot];
unsigned long base_gfn = memslot->base_gfn;
if (base_gfn + npages > (KVM_MAX_MEM_SIZE >> PAGE_SHIFT))
return -ENOMEM;
for (i = 0; i < npages; i++) {
pfn = gfn_to_pfn(kvm, base_gfn + i);
if (!kvm_is_mmio_pfn(pfn)) {
@ -1631,8 +1618,8 @@ static int kvm_ia64_sync_dirty_log(struct kvm *kvm,
struct kvm_memory_slot *memslot;
int r, i;
long n, base;
unsigned long *dirty_bitmap = (unsigned long *)((void *)kvm - KVM_VM_OFS
+ KVM_MEM_DIRTY_LOG_OFS);
unsigned long *dirty_bitmap = (unsigned long *)(kvm->arch.vm_base +
offsetof(struct kvm_vm_data, kvm_mem_dirty_log));
r = -EINVAL;
if (log->slot >= KVM_MEMORY_SLOTS)

15
arch/ia64/kvm/kvm_lib.c Normal file
View File

@ -0,0 +1,15 @@
/*
* kvm_lib.c: Compile some libraries for kvm-intel module.
*
* Just include kernel's library, and disable symbols export.
* Copyright (C) 2008, Intel Corporation.
* Xiantao Zhang (xiantao.zhang@intel.com)
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*
*/
#undef CONFIG_MODULES
#include "../../../lib/vsprintf.c"
#include "../../../lib/ctype.c"

View File

@ -24,6 +24,8 @@
#include <asm/asmmacro.h>
#include <asm/types.h>
#include <asm/kregs.h>
#include <asm/kvm_host.h>
#include "asm-offsets.h"
#define KVM_MINSTATE_START_SAVE_MIN \
@ -33,7 +35,7 @@
addl r22 = VMM_RBS_OFFSET,r1; /* compute base of RBS */ \
;; \
lfetch.fault.excl.nt1 [r22]; \
addl r1 = IA64_STK_OFFSET-VMM_PT_REGS_SIZE,r1; /* compute base of memory stack */ \
addl r1 = KVM_STK_OFFSET-VMM_PT_REGS_SIZE, r1; \
mov r23 = ar.bspstore; /* save ar.bspstore */ \
;; \
mov ar.bspstore = r22; /* switch to kernel RBS */\

View File

@ -27,7 +27,8 @@
*/
static inline uint64_t *kvm_host_get_pmt(struct kvm *kvm)
{
return (uint64_t *)(kvm->arch.vm_base + KVM_P2M_OFS);
return (uint64_t *)(kvm->arch.vm_base +
offsetof(struct kvm_vm_data, kvm_p2m));
}
static inline void kvm_set_pmt_entry(struct kvm *kvm, gfn_t gfn,

View File

@ -66,31 +66,25 @@ void lsapic_write(struct kvm_vcpu *v, unsigned long addr,
switch (addr) {
case PIB_OFST_INTA:
/*panic_domain(NULL, "Undefined write on PIB INTA\n");*/
panic_vm(v);
panic_vm(v, "Undefined write on PIB INTA\n");
break;
case PIB_OFST_XTP:
if (length == 1) {
vlsapic_write_xtp(v, val);
} else {
/*panic_domain(NULL,
"Undefined write on PIB XTP\n");*/
panic_vm(v);
panic_vm(v, "Undefined write on PIB XTP\n");
}
break;
default:
if (PIB_LOW_HALF(addr)) {
/*lower half */
/*Lower half */
if (length != 8)
/*panic_domain(NULL,
"Can't LHF write with size %ld!\n",
length);*/
panic_vm(v);
panic_vm(v, "Can't LHF write with size %ld!\n",
length);
else
vlsapic_write_ipi(v, addr, val);
} else { /* upper half
printk("IPI-UHF write %lx\n",addr);*/
panic_vm(v);
} else { /*Upper half */
panic_vm(v, "IPI-UHF write %lx\n", addr);
}
break;
}
@ -108,22 +102,18 @@ unsigned long lsapic_read(struct kvm_vcpu *v, unsigned long addr,
if (length == 1) /* 1 byte load */
; /* There is no i8259, there is no INTA access*/
else
/*panic_domain(NULL,"Undefined read on PIB INTA\n"); */
panic_vm(v);
panic_vm(v, "Undefined read on PIB INTA\n");
break;
case PIB_OFST_XTP:
if (length == 1) {
result = VLSAPIC_XTP(v);
/* printk("read xtp %lx\n", result); */
} else {
/*panic_domain(NULL,
"Undefined read on PIB XTP\n");*/
panic_vm(v);
panic_vm(v, "Undefined read on PIB XTP\n");
}
break;
default:
panic_vm(v);
panic_vm(v, "Undefined addr access for lsapic!\n");
break;
}
return result;
@ -162,7 +152,7 @@ static void mmio_access(struct kvm_vcpu *vcpu, u64 src_pa, u64 *dest,
/* it's necessary to ensure zero extending */
*dest = p->u.ioreq.data & (~0UL >> (64-(s*8)));
} else
panic_vm(vcpu);
panic_vm(vcpu, "Unhandled mmio access returned!\n");
out:
local_irq_restore(psr);
return ;
@ -324,7 +314,9 @@ void emulate_io_inst(struct kvm_vcpu *vcpu, u64 padr, u64 ma)
return;
} else {
inst_type = -1;
panic_vm(vcpu);
panic_vm(vcpu, "Unsupported MMIO access instruction! \
Bunld[0]=0x%lx, Bundle[1]=0x%lx\n",
bundle.i64[0], bundle.i64[1]);
}
size = 1 << size;
@ -335,7 +327,7 @@ void emulate_io_inst(struct kvm_vcpu *vcpu, u64 padr, u64 ma)
if (inst_type == SL_INTEGER)
vcpu_set_gr(vcpu, inst.M1.r1, data, 0);
else
panic_vm(vcpu);
panic_vm(vcpu, "Unsupported instruction type!\n");
}
vcpu_increment_iip(vcpu);

View File

@ -527,7 +527,8 @@ void reflect_interruption(u64 ifa, u64 isr, u64 iim,
vector = vec2off[vec];
if (!(vpsr & IA64_PSR_IC) && (vector != IA64_DATA_NESTED_TLB_VECTOR)) {
panic_vm(vcpu);
panic_vm(vcpu, "Interruption with vector :0x%lx occurs "
"with psr.ic = 0\n", vector);
return;
}
@ -586,7 +587,7 @@ static void set_pal_call_result(struct kvm_vcpu *vcpu)
vcpu_set_gr(vcpu, 10, p->u.pal_data.ret.v1, 0);
vcpu_set_gr(vcpu, 11, p->u.pal_data.ret.v2, 0);
} else
panic_vm(vcpu);
panic_vm(vcpu, "Mis-set for exit reason!\n");
}
static void set_sal_call_data(struct kvm_vcpu *vcpu)
@ -614,7 +615,7 @@ static void set_sal_call_result(struct kvm_vcpu *vcpu)
vcpu_set_gr(vcpu, 10, p->u.sal_data.ret.r10, 0);
vcpu_set_gr(vcpu, 11, p->u.sal_data.ret.r11, 0);
} else
panic_vm(vcpu);
panic_vm(vcpu, "Mis-set for exit reason!\n");
}
void kvm_ia64_handle_break(unsigned long ifa, struct kvm_pt_regs *regs,
@ -680,7 +681,7 @@ static void generate_exirq(struct kvm_vcpu *vcpu)
vpsr = VCPU(vcpu, vpsr);
isr = vpsr & IA64_PSR_RI;
if (!(vpsr & IA64_PSR_IC))
panic_vm(vcpu);
panic_vm(vcpu, "Trying to inject one IRQ with psr.ic=0\n");
reflect_interruption(0, isr, 0, 12, regs); /* EXT IRQ */
}
@ -941,8 +942,20 @@ static void vcpu_do_resume(struct kvm_vcpu *vcpu)
ia64_set_pta(vcpu->arch.vhpt.pta.val);
}
static void vmm_sanity_check(struct kvm_vcpu *vcpu)
{
struct exit_ctl_data *p = &vcpu->arch.exit_data;
if (!vmm_sanity && p->exit_reason != EXIT_REASON_DEBUG) {
panic_vm(vcpu, "Failed to do vmm sanity check,"
"it maybe caused by crashed vmm!!\n\n");
}
}
static void kvm_do_resume_op(struct kvm_vcpu *vcpu)
{
vmm_sanity_check(vcpu); /*Guarantee vcpu runing on healthy vmm!*/
if (test_and_clear_bit(KVM_REQ_RESUME, &vcpu->requests)) {
vcpu_do_resume(vcpu);
return;
@ -968,3 +981,11 @@ void vmm_transition(struct kvm_vcpu *vcpu)
1, 0, 0, 0, 0, 0);
kvm_do_resume_op(vcpu);
}
void vmm_panic_handler(u64 vec)
{
struct kvm_vcpu *vcpu = current_vcpu;
vmm_sanity = 0;
panic_vm(vcpu, "Unexpected interruption occurs in VMM, vector:0x%lx\n",
vec2off[vec]);
}

View File

@ -816,8 +816,9 @@ static void vcpu_set_itc(struct kvm_vcpu *vcpu, u64 val)
unsigned long vitv = VCPU(vcpu, itv);
if (vcpu->vcpu_id == 0) {
for (i = 0; i < MAX_VCPU_NUM; i++) {
v = (struct kvm_vcpu *)((char *)vcpu + VCPU_SIZE * i);
for (i = 0; i < KVM_MAX_VCPUS; i++) {
v = (struct kvm_vcpu *)((char *)vcpu +
sizeof(struct kvm_vcpu_data) * i);
VMX(v, itc_offset) = itc_offset;
VMX(v, last_itc) = 0;
}
@ -1650,7 +1651,8 @@ void vcpu_set_psr(struct kvm_vcpu *vcpu, unsigned long val)
* Otherwise panic
*/
if (val & (IA64_PSR_PK | IA64_PSR_IS | IA64_PSR_VM))
panic_vm(vcpu);
panic_vm(vcpu, "Only support guests with vpsr.pk =0 \
& vpsr.is=0\n");
/*
* For those IA64_PSR bits: id/da/dd/ss/ed/ia
@ -2103,7 +2105,7 @@ void kvm_init_all_rr(struct kvm_vcpu *vcpu)
if (is_physical_mode(vcpu)) {
if (vcpu->arch.mode_flags & GUEST_PHY_EMUL)
panic_vm(vcpu);
panic_vm(vcpu, "Machine Status conflicts!\n");
ia64_set_rr((VRN0 << VRN_SHIFT), vcpu->arch.metaphysical_rr0);
ia64_dv_serialize_data();
@ -2152,10 +2154,70 @@ int vmm_entry(void)
return 0;
}
void panic_vm(struct kvm_vcpu *v)
static void kvm_show_registers(struct kvm_pt_regs *regs)
{
struct exit_ctl_data *p = &v->arch.exit_data;
unsigned long ip = regs->cr_iip + ia64_psr(regs)->ri;
struct kvm_vcpu *vcpu = current_vcpu;
if (vcpu != NULL)
printk("vcpu 0x%p vcpu %d\n",
vcpu, vcpu->vcpu_id);
printk("psr : %016lx ifs : %016lx ip : [<%016lx>]\n",
regs->cr_ipsr, regs->cr_ifs, ip);
printk("unat: %016lx pfs : %016lx rsc : %016lx\n",
regs->ar_unat, regs->ar_pfs, regs->ar_rsc);
printk("rnat: %016lx bspstore: %016lx pr : %016lx\n",
regs->ar_rnat, regs->ar_bspstore, regs->pr);
printk("ldrs: %016lx ccv : %016lx fpsr: %016lx\n",
regs->loadrs, regs->ar_ccv, regs->ar_fpsr);
printk("csd : %016lx ssd : %016lx\n", regs->ar_csd, regs->ar_ssd);
printk("b0 : %016lx b6 : %016lx b7 : %016lx\n", regs->b0,
regs->b6, regs->b7);
printk("f6 : %05lx%016lx f7 : %05lx%016lx\n",
regs->f6.u.bits[1], regs->f6.u.bits[0],
regs->f7.u.bits[1], regs->f7.u.bits[0]);
printk("f8 : %05lx%016lx f9 : %05lx%016lx\n",
regs->f8.u.bits[1], regs->f8.u.bits[0],
regs->f9.u.bits[1], regs->f9.u.bits[0]);
printk("f10 : %05lx%016lx f11 : %05lx%016lx\n",
regs->f10.u.bits[1], regs->f10.u.bits[0],
regs->f11.u.bits[1], regs->f11.u.bits[0]);
printk("r1 : %016lx r2 : %016lx r3 : %016lx\n", regs->r1,
regs->r2, regs->r3);
printk("r8 : %016lx r9 : %016lx r10 : %016lx\n", regs->r8,
regs->r9, regs->r10);
printk("r11 : %016lx r12 : %016lx r13 : %016lx\n", regs->r11,
regs->r12, regs->r13);
printk("r14 : %016lx r15 : %016lx r16 : %016lx\n", regs->r14,
regs->r15, regs->r16);
printk("r17 : %016lx r18 : %016lx r19 : %016lx\n", regs->r17,
regs->r18, regs->r19);
printk("r20 : %016lx r21 : %016lx r22 : %016lx\n", regs->r20,
regs->r21, regs->r22);
printk("r23 : %016lx r24 : %016lx r25 : %016lx\n", regs->r23,
regs->r24, regs->r25);
printk("r26 : %016lx r27 : %016lx r28 : %016lx\n", regs->r26,
regs->r27, regs->r28);
printk("r29 : %016lx r30 : %016lx r31 : %016lx\n", regs->r29,
regs->r30, regs->r31);
}
void panic_vm(struct kvm_vcpu *v, const char *fmt, ...)
{
va_list args;
char buf[256];
struct kvm_pt_regs *regs = vcpu_regs(v);
struct exit_ctl_data *p = &v->arch.exit_data;
va_start(args, fmt);
vsnprintf(buf, sizeof(buf), fmt, args);
va_end(args);
printk(buf);
kvm_show_registers(regs);
p->exit_reason = EXIT_REASON_VM_PANIC;
vmm_transition(v);
/*Never to return*/

View File

@ -737,9 +737,12 @@ void kvm_init_vtlb(struct kvm_vcpu *v);
void kvm_init_vhpt(struct kvm_vcpu *v);
void thash_init(struct thash_cb *hcb, u64 sz);
void panic_vm(struct kvm_vcpu *v);
void panic_vm(struct kvm_vcpu *v, const char *fmt, ...);
extern u64 ia64_call_vsa(u64 proc, u64 arg1, u64 arg2, u64 arg3,
u64 arg4, u64 arg5, u64 arg6, u64 arg7);
extern long vmm_sanity;
#endif
#endif /* __VCPU_H__ */

View File

@ -20,6 +20,7 @@
*/
#include<linux/kernel.h>
#include<linux/module.h>
#include<asm/fpswa.h>
@ -31,6 +32,8 @@ MODULE_LICENSE("GPL");
extern char kvm_ia64_ivt;
extern fpswa_interface_t *vmm_fpswa_interface;
long vmm_sanity = 1;
struct kvm_vmm_info vmm_info = {
.module = THIS_MODULE,
.vmm_entry = vmm_entry,
@ -62,5 +65,31 @@ void vmm_spin_unlock(spinlock_t *lock)
{
_vmm_raw_spin_unlock(lock);
}
static void vcpu_debug_exit(struct kvm_vcpu *vcpu)
{
struct exit_ctl_data *p = &vcpu->arch.exit_data;
long psr;
local_irq_save(psr);
p->exit_reason = EXIT_REASON_DEBUG;
vmm_transition(vcpu);
local_irq_restore(psr);
}
asmlinkage int printk(const char *fmt, ...)
{
struct kvm_vcpu *vcpu = current_vcpu;
va_list args;
int r;
memset(vcpu->arch.log_buf, 0, VMM_LOG_LEN);
va_start(args, fmt);
r = vsnprintf(vcpu->arch.log_buf, VMM_LOG_LEN, fmt, args);
va_end(args);
vcpu_debug_exit(vcpu);
return r;
}
module_init(kvm_vmm_init)
module_exit(kvm_vmm_exit)

File diff suppressed because it is too large Load Diff

View File

@ -183,8 +183,8 @@ void mark_pages_dirty(struct kvm_vcpu *v, u64 pte, u64 ps)
u64 i, dirty_pages = 1;
u64 base_gfn = (pte&_PAGE_PPN_MASK) >> PAGE_SHIFT;
spinlock_t *lock = __kvm_va(v->arch.dirty_log_lock_pa);
void *dirty_bitmap = (void *)v - (KVM_VCPU_OFS + v->vcpu_id * VCPU_SIZE)
+ KVM_MEM_DIRTY_LOG_OFS;
void *dirty_bitmap = (void *)KVM_MEM_DIRTY_LOG_BASE;
dirty_pages <<= ps <= PAGE_SHIFT ? 0 : ps - PAGE_SHIFT;
vmm_spin_lock(lock);

View File

@ -0,0 +1,80 @@
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License, version 2, as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*
* Copyright IBM Corp. 2008
*
* Authors: Hollis Blanchard <hollisb@us.ibm.com>
*/
#ifndef __ASM_PPC_DISASSEMBLE_H__
#define __ASM_PPC_DISASSEMBLE_H__
#include <linux/types.h>
static inline unsigned int get_op(u32 inst)
{
return inst >> 26;
}
static inline unsigned int get_xop(u32 inst)
{
return (inst >> 1) & 0x3ff;
}
static inline unsigned int get_sprn(u32 inst)
{
return ((inst >> 16) & 0x1f) | ((inst >> 6) & 0x3e0);
}
static inline unsigned int get_dcrn(u32 inst)
{
return ((inst >> 16) & 0x1f) | ((inst >> 6) & 0x3e0);
}
static inline unsigned int get_rt(u32 inst)
{
return (inst >> 21) & 0x1f;
}
static inline unsigned int get_rs(u32 inst)
{
return (inst >> 21) & 0x1f;
}
static inline unsigned int get_ra(u32 inst)
{
return (inst >> 16) & 0x1f;
}
static inline unsigned int get_rb(u32 inst)
{
return (inst >> 11) & 0x1f;
}
static inline unsigned int get_rc(u32 inst)
{
return inst & 0x1;
}
static inline unsigned int get_ws(u32 inst)
{
return (inst >> 11) & 0x1f;
}
static inline unsigned int get_d(u32 inst)
{
return inst & 0xffff;
}
#endif /* __ASM_PPC_DISASSEMBLE_H__ */

View File

@ -0,0 +1,61 @@
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License, version 2, as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*
* Copyright IBM Corp. 2008
*
* Authors: Hollis Blanchard <hollisb@us.ibm.com>
*/
#ifndef __ASM_44X_H__
#define __ASM_44X_H__
#include <linux/kvm_host.h>
#define PPC44x_TLB_SIZE 64
/* If the guest is expecting it, this can be as large as we like; we'd just
* need to find some way of advertising it. */
#define KVM44x_GUEST_TLB_SIZE 64
struct kvmppc_44x_shadow_ref {
struct page *page;
u16 gtlb_index;
u8 writeable;
u8 tid;
};
struct kvmppc_vcpu_44x {
/* Unmodified copy of the guest's TLB. */
struct kvmppc_44x_tlbe guest_tlb[KVM44x_GUEST_TLB_SIZE];
/* References to guest pages in the hardware TLB. */
struct kvmppc_44x_shadow_ref shadow_refs[PPC44x_TLB_SIZE];
/* State of the shadow TLB at guest context switch time. */
struct kvmppc_44x_tlbe shadow_tlb[PPC44x_TLB_SIZE];
u8 shadow_tlb_mod[PPC44x_TLB_SIZE];
struct kvm_vcpu vcpu;
};
static inline struct kvmppc_vcpu_44x *to_44x(struct kvm_vcpu *vcpu)
{
return container_of(vcpu, struct kvmppc_vcpu_44x, vcpu);
}
void kvmppc_set_pid(struct kvm_vcpu *vcpu, u32 new_pid);
void kvmppc_44x_tlb_put(struct kvm_vcpu *vcpu);
void kvmppc_44x_tlb_load(struct kvm_vcpu *vcpu);
#endif /* __ASM_44X_H__ */

View File

@ -64,27 +64,58 @@ struct kvm_vcpu_stat {
u32 halt_wakeup;
};
struct tlbe {
struct kvmppc_44x_tlbe {
u32 tid; /* Only the low 8 bits are used. */
u32 word0;
u32 word1;
u32 word2;
};
enum kvm_exit_types {
MMIO_EXITS,
DCR_EXITS,
SIGNAL_EXITS,
ITLB_REAL_MISS_EXITS,
ITLB_VIRT_MISS_EXITS,
DTLB_REAL_MISS_EXITS,
DTLB_VIRT_MISS_EXITS,
SYSCALL_EXITS,
ISI_EXITS,
DSI_EXITS,
EMULATED_INST_EXITS,
EMULATED_MTMSRWE_EXITS,
EMULATED_WRTEE_EXITS,
EMULATED_MTSPR_EXITS,
EMULATED_MFSPR_EXITS,
EMULATED_MTMSR_EXITS,
EMULATED_MFMSR_EXITS,
EMULATED_TLBSX_EXITS,
EMULATED_TLBWE_EXITS,
EMULATED_RFI_EXITS,
DEC_EXITS,
EXT_INTR_EXITS,
HALT_WAKEUP,
USR_PR_INST,
FP_UNAVAIL,
DEBUG_EXITS,
TIMEINGUEST,
__NUMBER_OF_KVM_EXIT_TYPES
};
/* allow access to big endian 32bit upper/lower parts and 64bit var */
struct kvmppc_exit_timing {
union {
u64 tv64;
struct {
u32 tbu, tbl;
} tv32;
};
};
struct kvm_arch {
};
struct kvm_vcpu_arch {
/* Unmodified copy of the guest's TLB. */
struct tlbe guest_tlb[PPC44x_TLB_SIZE];
/* TLB that's actually used when the guest is running. */
struct tlbe shadow_tlb[PPC44x_TLB_SIZE];
/* Pages which are referenced in the shadow TLB. */
struct page *shadow_pages[PPC44x_TLB_SIZE];
/* Track which TLB entries we've modified in the current exit. */
u8 shadow_tlb_mod[PPC44x_TLB_SIZE];
u32 host_stack;
u32 host_pid;
u32 host_dbcr0;
@ -94,32 +125,32 @@ struct kvm_vcpu_arch {
u32 host_msr;
u64 fpr[32];
u32 gpr[32];
ulong gpr[32];
u32 pc;
ulong pc;
u32 cr;
u32 ctr;
u32 lr;
u32 xer;
ulong ctr;
ulong lr;
ulong xer;
u32 msr;
ulong msr;
u32 mmucr;
u32 sprg0;
u32 sprg1;
u32 sprg2;
u32 sprg3;
u32 sprg4;
u32 sprg5;
u32 sprg6;
u32 sprg7;
u32 srr0;
u32 srr1;
u32 csrr0;
u32 csrr1;
u32 dsrr0;
u32 dsrr1;
u32 dear;
u32 esr;
ulong sprg0;
ulong sprg1;
ulong sprg2;
ulong sprg3;
ulong sprg4;
ulong sprg5;
ulong sprg6;
ulong sprg7;
ulong srr0;
ulong srr1;
ulong csrr0;
ulong csrr1;
ulong dsrr0;
ulong dsrr1;
ulong dear;
ulong esr;
u32 dec;
u32 decar;
u32 tbl;
@ -127,7 +158,7 @@ struct kvm_vcpu_arch {
u32 tcr;
u32 tsr;
u32 ivor[16];
u32 ivpr;
ulong ivpr;
u32 pir;
u32 shadow_pid;
@ -140,9 +171,22 @@ struct kvm_vcpu_arch {
u32 dbcr0;
u32 dbcr1;
#ifdef CONFIG_KVM_EXIT_TIMING
struct kvmppc_exit_timing timing_exit;
struct kvmppc_exit_timing timing_last_enter;
u32 last_exit_type;
u32 timing_count_type[__NUMBER_OF_KVM_EXIT_TYPES];
u64 timing_sum_duration[__NUMBER_OF_KVM_EXIT_TYPES];
u64 timing_sum_quad_duration[__NUMBER_OF_KVM_EXIT_TYPES];
u64 timing_min_duration[__NUMBER_OF_KVM_EXIT_TYPES];
u64 timing_max_duration[__NUMBER_OF_KVM_EXIT_TYPES];
u64 timing_last_exit;
struct dentry *debugfs_exit_timing;
#endif
u32 last_inst;
u32 fault_dear;
u32 fault_esr;
ulong fault_dear;
ulong fault_esr;
gpa_t paddr_accessed;
u8 io_gpr; /* GPR used as IO source/target */

View File

@ -29,11 +29,6 @@
#include <linux/kvm_types.h>
#include <linux/kvm_host.h>
struct kvm_tlb {
struct tlbe guest_tlb[PPC44x_TLB_SIZE];
struct tlbe shadow_tlb[PPC44x_TLB_SIZE];
};
enum emulation_result {
EMULATE_DONE, /* no further processing */
EMULATE_DO_MMIO, /* kvm_run filled with MMIO request */
@ -41,9 +36,6 @@ enum emulation_result {
EMULATE_FAIL, /* can't emulate this instruction */
};
extern const unsigned char exception_priority[];
extern const unsigned char priority_exception[];
extern int __kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu);
extern char kvmppc_handlers_start[];
extern unsigned long kvmppc_handler_len;
@ -58,51 +50,44 @@ extern int kvmppc_handle_store(struct kvm_run *run, struct kvm_vcpu *vcpu,
extern int kvmppc_emulate_instruction(struct kvm_run *run,
struct kvm_vcpu *vcpu);
extern int kvmppc_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu);
extern void kvmppc_emulate_dec(struct kvm_vcpu *vcpu);
extern void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 gvaddr, gfn_t gfn,
u64 asid, u32 flags);
extern void kvmppc_mmu_invalidate(struct kvm_vcpu *vcpu, gva_t eaddr,
gva_t eend, u32 asid);
extern void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 gvaddr, gpa_t gpaddr,
u64 asid, u32 flags, u32 max_bytes,
unsigned int gtlb_idx);
extern void kvmppc_mmu_priv_switch(struct kvm_vcpu *vcpu, int usermode);
extern void kvmppc_mmu_switch_pid(struct kvm_vcpu *vcpu, u32 pid);
/* XXX Book E specific */
extern void kvmppc_tlbe_set_modified(struct kvm_vcpu *vcpu, unsigned int i);
/* Core-specific hooks */
extern void kvmppc_check_and_deliver_interrupts(struct kvm_vcpu *vcpu);
extern struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm,
unsigned int id);
extern void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu);
extern int kvmppc_core_vcpu_setup(struct kvm_vcpu *vcpu);
extern int kvmppc_core_check_processor_compat(void);
extern int kvmppc_core_vcpu_translate(struct kvm_vcpu *vcpu,
struct kvm_translation *tr);
static inline void kvmppc_queue_exception(struct kvm_vcpu *vcpu, int exception)
{
unsigned int priority = exception_priority[exception];
set_bit(priority, &vcpu->arch.pending_exceptions);
}
extern void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu);
extern void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu);
static inline void kvmppc_clear_exception(struct kvm_vcpu *vcpu, int exception)
{
unsigned int priority = exception_priority[exception];
clear_bit(priority, &vcpu->arch.pending_exceptions);
}
extern void kvmppc_core_load_guest_debugstate(struct kvm_vcpu *vcpu);
extern void kvmppc_core_load_host_debugstate(struct kvm_vcpu *vcpu);
/* Helper function for "full" MSR writes. No need to call this if only EE is
* changing. */
static inline void kvmppc_set_msr(struct kvm_vcpu *vcpu, u32 new_msr)
{
if ((new_msr & MSR_PR) != (vcpu->arch.msr & MSR_PR))
kvmppc_mmu_priv_switch(vcpu, new_msr & MSR_PR);
extern void kvmppc_core_deliver_interrupts(struct kvm_vcpu *vcpu);
extern int kvmppc_core_pending_dec(struct kvm_vcpu *vcpu);
extern void kvmppc_core_queue_program(struct kvm_vcpu *vcpu);
extern void kvmppc_core_queue_dec(struct kvm_vcpu *vcpu);
extern void kvmppc_core_queue_external(struct kvm_vcpu *vcpu,
struct kvm_interrupt *irq);
vcpu->arch.msr = new_msr;
extern int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
unsigned int op, int *advance);
extern int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs);
extern int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt);
if (vcpu->arch.msr & MSR_WE)
kvm_vcpu_block(vcpu);
}
static inline void kvmppc_set_pid(struct kvm_vcpu *vcpu, u32 new_pid)
{
if (vcpu->arch.pid != new_pid) {
vcpu->arch.pid = new_pid;
vcpu->arch.swap_pid = 1;
}
}
extern int kvmppc_booke_init(void);
extern void kvmppc_booke_exit(void);
extern void kvmppc_core_destroy_mmu(struct kvm_vcpu *vcpu);

View File

@ -56,6 +56,7 @@
#ifndef __ASSEMBLY__
extern unsigned int tlb_44x_hwater;
extern unsigned int tlb_44x_index;
typedef struct {
unsigned int id;

View File

@ -23,9 +23,6 @@
#include <linux/mm.h>
#include <linux/suspend.h>
#include <linux/hrtimer.h>
#ifdef CONFIG_KVM
#include <linux/kvm_host.h>
#endif
#ifdef CONFIG_PPC64
#include <linux/time.h>
#include <linux/hardirq.h>
@ -51,6 +48,9 @@
#ifdef CONFIG_PPC_ISERIES
#include <asm/iseries/alpaca.h>
#endif
#ifdef CONFIG_KVM
#include <asm/kvm_44x.h>
#endif
#if defined(CONFIG_BOOKE) || defined(CONFIG_40x)
#include "head_booke.h"
@ -357,12 +357,10 @@ int main(void)
DEFINE(PTE_SIZE, sizeof(pte_t));
#ifdef CONFIG_KVM
DEFINE(TLBE_BYTES, sizeof(struct tlbe));
DEFINE(TLBE_BYTES, sizeof(struct kvmppc_44x_tlbe));
DEFINE(VCPU_HOST_STACK, offsetof(struct kvm_vcpu, arch.host_stack));
DEFINE(VCPU_HOST_PID, offsetof(struct kvm_vcpu, arch.host_pid));
DEFINE(VCPU_SHADOW_TLB, offsetof(struct kvm_vcpu, arch.shadow_tlb));
DEFINE(VCPU_SHADOW_MOD, offsetof(struct kvm_vcpu, arch.shadow_tlb_mod));
DEFINE(VCPU_GPRS, offsetof(struct kvm_vcpu, arch.gpr));
DEFINE(VCPU_LR, offsetof(struct kvm_vcpu, arch.lr));
DEFINE(VCPU_CR, offsetof(struct kvm_vcpu, arch.cr));
@ -385,5 +383,16 @@ int main(void)
DEFINE(PTE_T_LOG2, PTE_T_LOG2);
#endif
#ifdef CONFIG_KVM_EXIT_TIMING
DEFINE(VCPU_TIMING_EXIT_TBU, offsetof(struct kvm_vcpu,
arch.timing_exit.tv32.tbu));
DEFINE(VCPU_TIMING_EXIT_TBL, offsetof(struct kvm_vcpu,
arch.timing_exit.tv32.tbl));
DEFINE(VCPU_TIMING_LAST_ENTER_TBU, offsetof(struct kvm_vcpu,
arch.timing_last_enter.tv32.tbu));
DEFINE(VCPU_TIMING_LAST_ENTER_TBL, offsetof(struct kvm_vcpu,
arch.timing_last_enter.tv32.tbl));
#endif
return 0;
}

228
arch/powerpc/kvm/44x.c Normal file
View File

@ -0,0 +1,228 @@
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License, version 2, as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*
* Copyright IBM Corp. 2008
*
* Authors: Hollis Blanchard <hollisb@us.ibm.com>
*/
#include <linux/kvm_host.h>
#include <linux/err.h>
#include <asm/reg.h>
#include <asm/cputable.h>
#include <asm/tlbflush.h>
#include <asm/kvm_44x.h>
#include <asm/kvm_ppc.h>
#include "44x_tlb.h"
/* Note: clearing MSR[DE] just means that the debug interrupt will not be
* delivered *immediately*. Instead, it simply sets the appropriate DBSR bits.
* If those DBSR bits are still set when MSR[DE] is re-enabled, the interrupt
* will be delivered as an "imprecise debug event" (which is indicated by
* DBSR[IDE].
*/
static void kvm44x_disable_debug_interrupts(void)
{
mtmsr(mfmsr() & ~MSR_DE);
}
void kvmppc_core_load_host_debugstate(struct kvm_vcpu *vcpu)
{
kvm44x_disable_debug_interrupts();
mtspr(SPRN_IAC1, vcpu->arch.host_iac[0]);
mtspr(SPRN_IAC2, vcpu->arch.host_iac[1]);
mtspr(SPRN_IAC3, vcpu->arch.host_iac[2]);
mtspr(SPRN_IAC4, vcpu->arch.host_iac[3]);
mtspr(SPRN_DBCR1, vcpu->arch.host_dbcr1);
mtspr(SPRN_DBCR2, vcpu->arch.host_dbcr2);
mtspr(SPRN_DBCR0, vcpu->arch.host_dbcr0);
mtmsr(vcpu->arch.host_msr);
}
void kvmppc_core_load_guest_debugstate(struct kvm_vcpu *vcpu)
{
struct kvm_guest_debug *dbg = &vcpu->guest_debug;
u32 dbcr0 = 0;
vcpu->arch.host_msr = mfmsr();
kvm44x_disable_debug_interrupts();
/* Save host debug register state. */
vcpu->arch.host_iac[0] = mfspr(SPRN_IAC1);
vcpu->arch.host_iac[1] = mfspr(SPRN_IAC2);
vcpu->arch.host_iac[2] = mfspr(SPRN_IAC3);
vcpu->arch.host_iac[3] = mfspr(SPRN_IAC4);
vcpu->arch.host_dbcr0 = mfspr(SPRN_DBCR0);
vcpu->arch.host_dbcr1 = mfspr(SPRN_DBCR1);
vcpu->arch.host_dbcr2 = mfspr(SPRN_DBCR2);
/* set registers up for guest */
if (dbg->bp[0]) {
mtspr(SPRN_IAC1, dbg->bp[0]);
dbcr0 |= DBCR0_IAC1 | DBCR0_IDM;
}
if (dbg->bp[1]) {
mtspr(SPRN_IAC2, dbg->bp[1]);
dbcr0 |= DBCR0_IAC2 | DBCR0_IDM;
}
if (dbg->bp[2]) {
mtspr(SPRN_IAC3, dbg->bp[2]);
dbcr0 |= DBCR0_IAC3 | DBCR0_IDM;
}
if (dbg->bp[3]) {
mtspr(SPRN_IAC4, dbg->bp[3]);
dbcr0 |= DBCR0_IAC4 | DBCR0_IDM;
}
mtspr(SPRN_DBCR0, dbcr0);
mtspr(SPRN_DBCR1, 0);
mtspr(SPRN_DBCR2, 0);
}
void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
{
kvmppc_44x_tlb_load(vcpu);
}
void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu)
{
kvmppc_44x_tlb_put(vcpu);
}
int kvmppc_core_check_processor_compat(void)
{
int r;
if (strcmp(cur_cpu_spec->platform, "ppc440") == 0)
r = 0;
else
r = -ENOTSUPP;
return r;
}
int kvmppc_core_vcpu_setup(struct kvm_vcpu *vcpu)
{
struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
struct kvmppc_44x_tlbe *tlbe = &vcpu_44x->guest_tlb[0];
int i;
tlbe->tid = 0;
tlbe->word0 = PPC44x_TLB_16M | PPC44x_TLB_VALID;
tlbe->word1 = 0;
tlbe->word2 = PPC44x_TLB_SX | PPC44x_TLB_SW | PPC44x_TLB_SR;
tlbe++;
tlbe->tid = 0;
tlbe->word0 = 0xef600000 | PPC44x_TLB_4K | PPC44x_TLB_VALID;
tlbe->word1 = 0xef600000;
tlbe->word2 = PPC44x_TLB_SX | PPC44x_TLB_SW | PPC44x_TLB_SR
| PPC44x_TLB_I | PPC44x_TLB_G;
/* Since the guest can directly access the timebase, it must know the
* real timebase frequency. Accordingly, it must see the state of
* CCR1[TCS]. */
vcpu->arch.ccr1 = mfspr(SPRN_CCR1);
for (i = 0; i < ARRAY_SIZE(vcpu_44x->shadow_refs); i++)
vcpu_44x->shadow_refs[i].gtlb_index = -1;
return 0;
}
/* 'linear_address' is actually an encoding of AS|PID|EADDR . */
int kvmppc_core_vcpu_translate(struct kvm_vcpu *vcpu,
struct kvm_translation *tr)
{
struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
struct kvmppc_44x_tlbe *gtlbe;
int index;
gva_t eaddr;
u8 pid;
u8 as;
eaddr = tr->linear_address;
pid = (tr->linear_address >> 32) & 0xff;
as = (tr->linear_address >> 40) & 0x1;
index = kvmppc_44x_tlb_index(vcpu, eaddr, pid, as);
if (index == -1) {
tr->valid = 0;
return 0;
}
gtlbe = &vcpu_44x->guest_tlb[index];
tr->physical_address = tlb_xlate(gtlbe, eaddr);
/* XXX what does "writeable" and "usermode" even mean? */
tr->valid = 1;
return 0;
}
struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
{
struct kvmppc_vcpu_44x *vcpu_44x;
struct kvm_vcpu *vcpu;
int err;
vcpu_44x = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
if (!vcpu_44x) {
err = -ENOMEM;
goto out;
}
vcpu = &vcpu_44x->vcpu;
err = kvm_vcpu_init(vcpu, kvm, id);
if (err)
goto free_vcpu;
return vcpu;
free_vcpu:
kmem_cache_free(kvm_vcpu_cache, vcpu_44x);
out:
return ERR_PTR(err);
}
void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu)
{
struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
kvm_vcpu_uninit(vcpu);
kmem_cache_free(kvm_vcpu_cache, vcpu_44x);
}
static int kvmppc_44x_init(void)
{
int r;
r = kvmppc_booke_init();
if (r)
return r;
return kvm_init(NULL, sizeof(struct kvmppc_vcpu_44x), THIS_MODULE);
}
static void kvmppc_44x_exit(void)
{
kvmppc_booke_exit();
}
module_init(kvmppc_44x_init);
module_exit(kvmppc_44x_exit);

View File

@ -0,0 +1,371 @@
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License, version 2, as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*
* Copyright IBM Corp. 2008
*
* Authors: Hollis Blanchard <hollisb@us.ibm.com>
*/
#include <asm/kvm_ppc.h>
#include <asm/dcr.h>
#include <asm/dcr-regs.h>
#include <asm/disassemble.h>
#include <asm/kvm_44x.h>
#include "timing.h"
#include "booke.h"
#include "44x_tlb.h"
#define OP_RFI 19
#define XOP_RFI 50
#define XOP_MFMSR 83
#define XOP_WRTEE 131
#define XOP_MTMSR 146
#define XOP_WRTEEI 163
#define XOP_MFDCR 323
#define XOP_MTDCR 451
#define XOP_TLBSX 914
#define XOP_ICCCI 966
#define XOP_TLBWE 978
static void kvmppc_emul_rfi(struct kvm_vcpu *vcpu)
{
vcpu->arch.pc = vcpu->arch.srr0;
kvmppc_set_msr(vcpu, vcpu->arch.srr1);
}
int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
unsigned int inst, int *advance)
{
int emulated = EMULATE_DONE;
int dcrn;
int ra;
int rb;
int rc;
int rs;
int rt;
int ws;
switch (get_op(inst)) {
case OP_RFI:
switch (get_xop(inst)) {
case XOP_RFI:
kvmppc_emul_rfi(vcpu);
kvmppc_set_exit_type(vcpu, EMULATED_RFI_EXITS);
*advance = 0;
break;
default:
emulated = EMULATE_FAIL;
break;
}
break;
case 31:
switch (get_xop(inst)) {
case XOP_MFMSR:
rt = get_rt(inst);
vcpu->arch.gpr[rt] = vcpu->arch.msr;
kvmppc_set_exit_type(vcpu, EMULATED_MFMSR_EXITS);
break;
case XOP_MTMSR:
rs = get_rs(inst);
kvmppc_set_exit_type(vcpu, EMULATED_MTMSR_EXITS);
kvmppc_set_msr(vcpu, vcpu->arch.gpr[rs]);
break;
case XOP_WRTEE:
rs = get_rs(inst);
vcpu->arch.msr = (vcpu->arch.msr & ~MSR_EE)
| (vcpu->arch.gpr[rs] & MSR_EE);
kvmppc_set_exit_type(vcpu, EMULATED_WRTEE_EXITS);
break;
case XOP_WRTEEI:
vcpu->arch.msr = (vcpu->arch.msr & ~MSR_EE)
| (inst & MSR_EE);
kvmppc_set_exit_type(vcpu, EMULATED_WRTEE_EXITS);
break;
case XOP_MFDCR:
dcrn = get_dcrn(inst);
rt = get_rt(inst);
/* The guest may access CPR0 registers to determine the timebase
* frequency, and it must know the real host frequency because it
* can directly access the timebase registers.
*
* It would be possible to emulate those accesses in userspace,
* but userspace can really only figure out the end frequency.
* We could decompose that into the factors that compute it, but
* that's tricky math, and it's easier to just report the real
* CPR0 values.
*/
switch (dcrn) {
case DCRN_CPR0_CONFIG_ADDR:
vcpu->arch.gpr[rt] = vcpu->arch.cpr0_cfgaddr;
break;
case DCRN_CPR0_CONFIG_DATA:
local_irq_disable();
mtdcr(DCRN_CPR0_CONFIG_ADDR,
vcpu->arch.cpr0_cfgaddr);
vcpu->arch.gpr[rt] = mfdcr(DCRN_CPR0_CONFIG_DATA);
local_irq_enable();
break;
default:
run->dcr.dcrn = dcrn;
run->dcr.data = 0;
run->dcr.is_write = 0;
vcpu->arch.io_gpr = rt;
vcpu->arch.dcr_needed = 1;
kvmppc_account_exit(vcpu, DCR_EXITS);
emulated = EMULATE_DO_DCR;
}
break;
case XOP_MTDCR:
dcrn = get_dcrn(inst);
rs = get_rs(inst);
/* emulate some access in kernel */
switch (dcrn) {
case DCRN_CPR0_CONFIG_ADDR:
vcpu->arch.cpr0_cfgaddr = vcpu->arch.gpr[rs];
break;
default:
run->dcr.dcrn = dcrn;
run->dcr.data = vcpu->arch.gpr[rs];
run->dcr.is_write = 1;
vcpu->arch.dcr_needed = 1;
kvmppc_account_exit(vcpu, DCR_EXITS);
emulated = EMULATE_DO_DCR;
}
break;
case XOP_TLBWE:
ra = get_ra(inst);
rs = get_rs(inst);
ws = get_ws(inst);
emulated = kvmppc_44x_emul_tlbwe(vcpu, ra, rs, ws);
break;
case XOP_TLBSX:
rt = get_rt(inst);
ra = get_ra(inst);
rb = get_rb(inst);
rc = get_rc(inst);
emulated = kvmppc_44x_emul_tlbsx(vcpu, rt, ra, rb, rc);
break;
case XOP_ICCCI:
break;
default:
emulated = EMULATE_FAIL;
}
break;
default:
emulated = EMULATE_FAIL;
}
return emulated;
}
int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs)
{
switch (sprn) {
case SPRN_MMUCR:
vcpu->arch.mmucr = vcpu->arch.gpr[rs]; break;
case SPRN_PID:
kvmppc_set_pid(vcpu, vcpu->arch.gpr[rs]); break;
case SPRN_CCR0:
vcpu->arch.ccr0 = vcpu->arch.gpr[rs]; break;
case SPRN_CCR1:
vcpu->arch.ccr1 = vcpu->arch.gpr[rs]; break;
case SPRN_DEAR:
vcpu->arch.dear = vcpu->arch.gpr[rs]; break;
case SPRN_ESR:
vcpu->arch.esr = vcpu->arch.gpr[rs]; break;
case SPRN_DBCR0:
vcpu->arch.dbcr0 = vcpu->arch.gpr[rs]; break;
case SPRN_DBCR1:
vcpu->arch.dbcr1 = vcpu->arch.gpr[rs]; break;
case SPRN_TSR:
vcpu->arch.tsr &= ~vcpu->arch.gpr[rs]; break;
case SPRN_TCR:
vcpu->arch.tcr = vcpu->arch.gpr[rs];
kvmppc_emulate_dec(vcpu);
break;
/* Note: SPRG4-7 are user-readable. These values are
* loaded into the real SPRGs when resuming the
* guest. */
case SPRN_SPRG4:
vcpu->arch.sprg4 = vcpu->arch.gpr[rs]; break;
case SPRN_SPRG5:
vcpu->arch.sprg5 = vcpu->arch.gpr[rs]; break;
case SPRN_SPRG6:
vcpu->arch.sprg6 = vcpu->arch.gpr[rs]; break;
case SPRN_SPRG7:
vcpu->arch.sprg7 = vcpu->arch.gpr[rs]; break;
case SPRN_IVPR:
vcpu->arch.ivpr = vcpu->arch.gpr[rs];
break;
case SPRN_IVOR0:
vcpu->arch.ivor[BOOKE_IRQPRIO_CRITICAL] = vcpu->arch.gpr[rs];
break;
case SPRN_IVOR1:
vcpu->arch.ivor[BOOKE_IRQPRIO_MACHINE_CHECK] = vcpu->arch.gpr[rs];
break;
case SPRN_IVOR2:
vcpu->arch.ivor[BOOKE_IRQPRIO_DATA_STORAGE] = vcpu->arch.gpr[rs];
break;
case SPRN_IVOR3:
vcpu->arch.ivor[BOOKE_IRQPRIO_INST_STORAGE] = vcpu->arch.gpr[rs];
break;
case SPRN_IVOR4:
vcpu->arch.ivor[BOOKE_IRQPRIO_EXTERNAL] = vcpu->arch.gpr[rs];
break;
case SPRN_IVOR5:
vcpu->arch.ivor[BOOKE_IRQPRIO_ALIGNMENT] = vcpu->arch.gpr[rs];
break;
case SPRN_IVOR6:
vcpu->arch.ivor[BOOKE_IRQPRIO_PROGRAM] = vcpu->arch.gpr[rs];
break;
case SPRN_IVOR7:
vcpu->arch.ivor[BOOKE_IRQPRIO_FP_UNAVAIL] = vcpu->arch.gpr[rs];
break;
case SPRN_IVOR8:
vcpu->arch.ivor[BOOKE_IRQPRIO_SYSCALL] = vcpu->arch.gpr[rs];
break;
case SPRN_IVOR9:
vcpu->arch.ivor[BOOKE_IRQPRIO_AP_UNAVAIL] = vcpu->arch.gpr[rs];
break;
case SPRN_IVOR10:
vcpu->arch.ivor[BOOKE_IRQPRIO_DECREMENTER] = vcpu->arch.gpr[rs];
break;
case SPRN_IVOR11:
vcpu->arch.ivor[BOOKE_IRQPRIO_FIT] = vcpu->arch.gpr[rs];
break;
case SPRN_IVOR12:
vcpu->arch.ivor[BOOKE_IRQPRIO_WATCHDOG] = vcpu->arch.gpr[rs];
break;
case SPRN_IVOR13:
vcpu->arch.ivor[BOOKE_IRQPRIO_DTLB_MISS] = vcpu->arch.gpr[rs];
break;
case SPRN_IVOR14:
vcpu->arch.ivor[BOOKE_IRQPRIO_ITLB_MISS] = vcpu->arch.gpr[rs];
break;
case SPRN_IVOR15:
vcpu->arch.ivor[BOOKE_IRQPRIO_DEBUG] = vcpu->arch.gpr[rs];
break;
default:
return EMULATE_FAIL;
}
kvmppc_set_exit_type(vcpu, EMULATED_MTSPR_EXITS);
return EMULATE_DONE;
}
int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt)
{
switch (sprn) {
/* 440 */
case SPRN_MMUCR:
vcpu->arch.gpr[rt] = vcpu->arch.mmucr; break;
case SPRN_CCR0:
vcpu->arch.gpr[rt] = vcpu->arch.ccr0; break;
case SPRN_CCR1:
vcpu->arch.gpr[rt] = vcpu->arch.ccr1; break;
/* Book E */
case SPRN_PID:
vcpu->arch.gpr[rt] = vcpu->arch.pid; break;
case SPRN_IVPR:
vcpu->arch.gpr[rt] = vcpu->arch.ivpr; break;
case SPRN_DEAR:
vcpu->arch.gpr[rt] = vcpu->arch.dear; break;
case SPRN_ESR:
vcpu->arch.gpr[rt] = vcpu->arch.esr; break;
case SPRN_DBCR0:
vcpu->arch.gpr[rt] = vcpu->arch.dbcr0; break;
case SPRN_DBCR1:
vcpu->arch.gpr[rt] = vcpu->arch.dbcr1; break;
case SPRN_IVOR0:
vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_CRITICAL];
break;
case SPRN_IVOR1:
vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_MACHINE_CHECK];
break;
case SPRN_IVOR2:
vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_DATA_STORAGE];
break;
case SPRN_IVOR3:
vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_INST_STORAGE];
break;
case SPRN_IVOR4:
vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_EXTERNAL];
break;
case SPRN_IVOR5:
vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_ALIGNMENT];
break;
case SPRN_IVOR6:
vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_PROGRAM];
break;
case SPRN_IVOR7:
vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_FP_UNAVAIL];
break;
case SPRN_IVOR8:
vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_SYSCALL];
break;
case SPRN_IVOR9:
vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_AP_UNAVAIL];
break;
case SPRN_IVOR10:
vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_DECREMENTER];
break;
case SPRN_IVOR11:
vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_FIT];
break;
case SPRN_IVOR12:
vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_WATCHDOG];
break;
case SPRN_IVOR13:
vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_DTLB_MISS];
break;
case SPRN_IVOR14:
vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_ITLB_MISS];
break;
case SPRN_IVOR15:
vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_DEBUG];
break;
default:
return EMULATE_FAIL;
}
kvmppc_set_exit_type(vcpu, EMULATED_MFSPR_EXITS);
return EMULATE_DONE;
}

View File

@ -22,20 +22,103 @@
#include <linux/kvm.h>
#include <linux/kvm_host.h>
#include <linux/highmem.h>
#include <asm/tlbflush.h>
#include <asm/mmu-44x.h>
#include <asm/kvm_ppc.h>
#include <asm/kvm_44x.h>
#include "timing.h"
#include "44x_tlb.h"
#ifndef PPC44x_TLBE_SIZE
#define PPC44x_TLBE_SIZE PPC44x_TLB_4K
#endif
#define PAGE_SIZE_4K (1<<12)
#define PAGE_MASK_4K (~(PAGE_SIZE_4K - 1))
#define PPC44x_TLB_UATTR_MASK \
(PPC44x_TLB_U0|PPC44x_TLB_U1|PPC44x_TLB_U2|PPC44x_TLB_U3)
#define PPC44x_TLB_USER_PERM_MASK (PPC44x_TLB_UX|PPC44x_TLB_UR|PPC44x_TLB_UW)
#define PPC44x_TLB_SUPER_PERM_MASK (PPC44x_TLB_SX|PPC44x_TLB_SR|PPC44x_TLB_SW)
static unsigned int kvmppc_tlb_44x_pos;
#ifdef DEBUG
void kvmppc_dump_tlbs(struct kvm_vcpu *vcpu)
{
struct kvmppc_44x_tlbe *tlbe;
int i;
printk("vcpu %d TLB dump:\n", vcpu->vcpu_id);
printk("| %2s | %3s | %8s | %8s | %8s |\n",
"nr", "tid", "word0", "word1", "word2");
for (i = 0; i < ARRAY_SIZE(vcpu_44x->guest_tlb); i++) {
tlbe = &vcpu_44x->guest_tlb[i];
if (tlbe->word0 & PPC44x_TLB_VALID)
printk(" G%2d | %02X | %08X | %08X | %08X |\n",
i, tlbe->tid, tlbe->word0, tlbe->word1,
tlbe->word2);
}
}
#endif
static inline void kvmppc_44x_tlbie(unsigned int index)
{
/* 0 <= index < 64, so the V bit is clear and we can use the index as
* word0. */
asm volatile(
"tlbwe %[index], %[index], 0\n"
:
: [index] "r"(index)
);
}
static inline void kvmppc_44x_tlbre(unsigned int index,
struct kvmppc_44x_tlbe *tlbe)
{
asm volatile(
"tlbre %[word0], %[index], 0\n"
"mfspr %[tid], %[sprn_mmucr]\n"
"andi. %[tid], %[tid], 0xff\n"
"tlbre %[word1], %[index], 1\n"
"tlbre %[word2], %[index], 2\n"
: [word0] "=r"(tlbe->word0),
[word1] "=r"(tlbe->word1),
[word2] "=r"(tlbe->word2),
[tid] "=r"(tlbe->tid)
: [index] "r"(index),
[sprn_mmucr] "i"(SPRN_MMUCR)
: "cc"
);
}
static inline void kvmppc_44x_tlbwe(unsigned int index,
struct kvmppc_44x_tlbe *stlbe)
{
unsigned long tmp;
asm volatile(
"mfspr %[tmp], %[sprn_mmucr]\n"
"rlwimi %[tmp], %[tid], 0, 0xff\n"
"mtspr %[sprn_mmucr], %[tmp]\n"
"tlbwe %[word0], %[index], 0\n"
"tlbwe %[word1], %[index], 1\n"
"tlbwe %[word2], %[index], 2\n"
: [tmp] "=&r"(tmp)
: [word0] "r"(stlbe->word0),
[word1] "r"(stlbe->word1),
[word2] "r"(stlbe->word2),
[tid] "r"(stlbe->tid),
[index] "r"(index),
[sprn_mmucr] "i"(SPRN_MMUCR)
);
}
static u32 kvmppc_44x_tlb_shadow_attrib(u32 attrib, int usermode)
{
/* Mask off reserved bits. */
attrib &= PPC44x_TLB_PERM_MASK|PPC44x_TLB_ATTR_MASK;
/* We only care about the guest's permission and user bits. */
attrib &= PPC44x_TLB_PERM_MASK|PPC44x_TLB_UATTR_MASK;
if (!usermode) {
/* Guest is in supervisor mode, so we need to translate guest
@ -47,18 +130,60 @@ static u32 kvmppc_44x_tlb_shadow_attrib(u32 attrib, int usermode)
/* Make sure host can always access this memory. */
attrib |= PPC44x_TLB_SX|PPC44x_TLB_SR|PPC44x_TLB_SW;
/* WIMGE = 0b00100 */
attrib |= PPC44x_TLB_M;
return attrib;
}
/* Load shadow TLB back into hardware. */
void kvmppc_44x_tlb_load(struct kvm_vcpu *vcpu)
{
struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
int i;
for (i = 0; i <= tlb_44x_hwater; i++) {
struct kvmppc_44x_tlbe *stlbe = &vcpu_44x->shadow_tlb[i];
if (get_tlb_v(stlbe) && get_tlb_ts(stlbe))
kvmppc_44x_tlbwe(i, stlbe);
}
}
static void kvmppc_44x_tlbe_set_modified(struct kvmppc_vcpu_44x *vcpu_44x,
unsigned int i)
{
vcpu_44x->shadow_tlb_mod[i] = 1;
}
/* Save hardware TLB to the vcpu, and invalidate all guest mappings. */
void kvmppc_44x_tlb_put(struct kvm_vcpu *vcpu)
{
struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
int i;
for (i = 0; i <= tlb_44x_hwater; i++) {
struct kvmppc_44x_tlbe *stlbe = &vcpu_44x->shadow_tlb[i];
if (vcpu_44x->shadow_tlb_mod[i])
kvmppc_44x_tlbre(i, stlbe);
if (get_tlb_v(stlbe) && get_tlb_ts(stlbe))
kvmppc_44x_tlbie(i);
}
}
/* Search the guest TLB for a matching entry. */
int kvmppc_44x_tlb_index(struct kvm_vcpu *vcpu, gva_t eaddr, unsigned int pid,
unsigned int as)
{
struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
int i;
/* XXX Replace loop with fancy data structures. */
for (i = 0; i < PPC44x_TLB_SIZE; i++) {
struct tlbe *tlbe = &vcpu->arch.guest_tlb[i];
for (i = 0; i < ARRAY_SIZE(vcpu_44x->guest_tlb); i++) {
struct kvmppc_44x_tlbe *tlbe = &vcpu_44x->guest_tlb[i];
unsigned int tid;
if (eaddr < get_tlb_eaddr(tlbe))
@ -83,78 +208,89 @@ int kvmppc_44x_tlb_index(struct kvm_vcpu *vcpu, gva_t eaddr, unsigned int pid,
return -1;
}
struct tlbe *kvmppc_44x_itlb_search(struct kvm_vcpu *vcpu, gva_t eaddr)
int kvmppc_44x_itlb_index(struct kvm_vcpu *vcpu, gva_t eaddr)
{
unsigned int as = !!(vcpu->arch.msr & MSR_IS);
unsigned int index;
index = kvmppc_44x_tlb_index(vcpu, eaddr, vcpu->arch.pid, as);
if (index == -1)
return NULL;
return &vcpu->arch.guest_tlb[index];
return kvmppc_44x_tlb_index(vcpu, eaddr, vcpu->arch.pid, as);
}
struct tlbe *kvmppc_44x_dtlb_search(struct kvm_vcpu *vcpu, gva_t eaddr)
int kvmppc_44x_dtlb_index(struct kvm_vcpu *vcpu, gva_t eaddr)
{
unsigned int as = !!(vcpu->arch.msr & MSR_DS);
unsigned int index;
index = kvmppc_44x_tlb_index(vcpu, eaddr, vcpu->arch.pid, as);
if (index == -1)
return NULL;
return &vcpu->arch.guest_tlb[index];
return kvmppc_44x_tlb_index(vcpu, eaddr, vcpu->arch.pid, as);
}
static int kvmppc_44x_tlbe_is_writable(struct tlbe *tlbe)
static void kvmppc_44x_shadow_release(struct kvmppc_vcpu_44x *vcpu_44x,
unsigned int stlb_index)
{
return tlbe->word2 & (PPC44x_TLB_SW|PPC44x_TLB_UW);
}
struct kvmppc_44x_shadow_ref *ref = &vcpu_44x->shadow_refs[stlb_index];
static void kvmppc_44x_shadow_release(struct kvm_vcpu *vcpu,
unsigned int index)
{
struct tlbe *stlbe = &vcpu->arch.shadow_tlb[index];
struct page *page = vcpu->arch.shadow_pages[index];
if (!ref->page)
return;
if (get_tlb_v(stlbe)) {
if (kvmppc_44x_tlbe_is_writable(stlbe))
kvm_release_page_dirty(page);
else
kvm_release_page_clean(page);
}
/* Discard from the TLB. */
/* Note: we could actually invalidate a host mapping, if the host overwrote
* this TLB entry since we inserted a guest mapping. */
kvmppc_44x_tlbie(stlb_index);
/* Now release the page. */
if (ref->writeable)
kvm_release_page_dirty(ref->page);
else
kvm_release_page_clean(ref->page);
ref->page = NULL;
/* XXX set tlb_44x_index to stlb_index? */
KVMTRACE_1D(STLB_INVAL, &vcpu_44x->vcpu, stlb_index, handler);
}
void kvmppc_core_destroy_mmu(struct kvm_vcpu *vcpu)
{
struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
int i;
for (i = 0; i <= tlb_44x_hwater; i++)
kvmppc_44x_shadow_release(vcpu, i);
kvmppc_44x_shadow_release(vcpu_44x, i);
}
void kvmppc_tlbe_set_modified(struct kvm_vcpu *vcpu, unsigned int i)
{
vcpu->arch.shadow_tlb_mod[i] = 1;
}
/* Caller must ensure that the specified guest TLB entry is safe to insert into
* the shadow TLB. */
void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 gvaddr, gfn_t gfn, u64 asid,
u32 flags)
/**
* kvmppc_mmu_map -- create a host mapping for guest memory
*
* If the guest wanted a larger page than the host supports, only the first
* host page is mapped here and the rest are demand faulted.
*
* If the guest wanted a smaller page than the host page size, we map only the
* guest-size page (i.e. not a full host page mapping).
*
* Caller must ensure that the specified guest TLB entry is safe to insert into
* the shadow TLB.
*/
void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 gvaddr, gpa_t gpaddr, u64 asid,
u32 flags, u32 max_bytes, unsigned int gtlb_index)
{
struct kvmppc_44x_tlbe stlbe;
struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
struct kvmppc_44x_shadow_ref *ref;
struct page *new_page;
struct tlbe *stlbe;
hpa_t hpaddr;
gfn_t gfn;
unsigned int victim;
/* Future optimization: don't overwrite the TLB entry containing the
* current PC (or stack?). */
victim = kvmppc_tlb_44x_pos++;
if (kvmppc_tlb_44x_pos > tlb_44x_hwater)
kvmppc_tlb_44x_pos = 0;
stlbe = &vcpu->arch.shadow_tlb[victim];
/* Select TLB entry to clobber. Indirectly guard against races with the TLB
* miss handler by disabling interrupts. */
local_irq_disable();
victim = ++tlb_44x_index;
if (victim > tlb_44x_hwater)
victim = 0;
tlb_44x_index = victim;
local_irq_enable();
/* Get reference to new page. */
gfn = gpaddr >> PAGE_SHIFT;
new_page = gfn_to_page(vcpu->kvm, gfn);
if (is_error_page(new_page)) {
printk(KERN_ERR "Couldn't get guest page for gfn %lx!\n", gfn);
@ -163,10 +299,8 @@ void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 gvaddr, gfn_t gfn, u64 asid,
}
hpaddr = page_to_phys(new_page);
/* Drop reference to old page. */
kvmppc_44x_shadow_release(vcpu, victim);
vcpu->arch.shadow_pages[victim] = new_page;
/* Invalidate any previous shadow mappings. */
kvmppc_44x_shadow_release(vcpu_44x, victim);
/* XXX Make sure (va, size) doesn't overlap any other
* entries. 440x6 user manual says the result would be
@ -174,78 +308,193 @@ void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 gvaddr, gfn_t gfn, u64 asid,
/* XXX what about AS? */
stlbe->tid = !(asid & 0xff);
/* Force TS=1 for all guest mappings. */
/* For now we hardcode 4KB mappings, but it will be important to
* use host large pages in the future. */
stlbe->word0 = (gvaddr & PAGE_MASK) | PPC44x_TLB_VALID | PPC44x_TLB_TS
| PPC44x_TLB_4K;
stlbe->word1 = (hpaddr & 0xfffffc00) | ((hpaddr >> 32) & 0xf);
stlbe->word2 = kvmppc_44x_tlb_shadow_attrib(flags,
vcpu->arch.msr & MSR_PR);
kvmppc_tlbe_set_modified(vcpu, victim);
stlbe.word0 = PPC44x_TLB_VALID | PPC44x_TLB_TS;
KVMTRACE_5D(STLB_WRITE, vcpu, victim,
stlbe->tid, stlbe->word0, stlbe->word1, stlbe->word2,
handler);
if (max_bytes >= PAGE_SIZE) {
/* Guest mapping is larger than or equal to host page size. We can use
* a "native" host mapping. */
stlbe.word0 |= (gvaddr & PAGE_MASK) | PPC44x_TLBE_SIZE;
} else {
/* Guest mapping is smaller than host page size. We must restrict the
* size of the mapping to be at most the smaller of the two, but for
* simplicity we fall back to a 4K mapping (this is probably what the
* guest is using anyways). */
stlbe.word0 |= (gvaddr & PAGE_MASK_4K) | PPC44x_TLB_4K;
/* 'hpaddr' is a host page, which is larger than the mapping we're
* inserting here. To compensate, we must add the in-page offset to the
* sub-page. */
hpaddr |= gpaddr & (PAGE_MASK ^ PAGE_MASK_4K);
}
stlbe.word1 = (hpaddr & 0xfffffc00) | ((hpaddr >> 32) & 0xf);
stlbe.word2 = kvmppc_44x_tlb_shadow_attrib(flags,
vcpu->arch.msr & MSR_PR);
stlbe.tid = !(asid & 0xff);
/* Keep track of the reference so we can properly release it later. */
ref = &vcpu_44x->shadow_refs[victim];
ref->page = new_page;
ref->gtlb_index = gtlb_index;
ref->writeable = !!(stlbe.word2 & PPC44x_TLB_UW);
ref->tid = stlbe.tid;
/* Insert shadow mapping into hardware TLB. */
kvmppc_44x_tlbe_set_modified(vcpu_44x, victim);
kvmppc_44x_tlbwe(victim, &stlbe);
KVMTRACE_5D(STLB_WRITE, vcpu, victim, stlbe.tid, stlbe.word0, stlbe.word1,
stlbe.word2, handler);
}
void kvmppc_mmu_invalidate(struct kvm_vcpu *vcpu, gva_t eaddr,
gva_t eend, u32 asid)
/* For a particular guest TLB entry, invalidate the corresponding host TLB
* mappings and release the host pages. */
static void kvmppc_44x_invalidate(struct kvm_vcpu *vcpu,
unsigned int gtlb_index)
{
unsigned int pid = !(asid & 0xff);
struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
int i;
/* XXX Replace loop with fancy data structures. */
for (i = 0; i <= tlb_44x_hwater; i++) {
struct tlbe *stlbe = &vcpu->arch.shadow_tlb[i];
unsigned int tid;
if (!get_tlb_v(stlbe))
continue;
if (eend < get_tlb_eaddr(stlbe))
continue;
if (eaddr > get_tlb_end(stlbe))
continue;
tid = get_tlb_tid(stlbe);
if (tid && (tid != pid))
continue;
kvmppc_44x_shadow_release(vcpu, i);
stlbe->word0 = 0;
kvmppc_tlbe_set_modified(vcpu, i);
KVMTRACE_5D(STLB_INVAL, vcpu, i,
stlbe->tid, stlbe->word0, stlbe->word1,
stlbe->word2, handler);
for (i = 0; i < ARRAY_SIZE(vcpu_44x->shadow_refs); i++) {
struct kvmppc_44x_shadow_ref *ref = &vcpu_44x->shadow_refs[i];
if (ref->gtlb_index == gtlb_index)
kvmppc_44x_shadow_release(vcpu_44x, i);
}
}
/* Invalidate all mappings on the privilege switch after PID has been changed.
* The guest always runs with PID=1, so we must clear the entire TLB when
* switching address spaces. */
void kvmppc_mmu_priv_switch(struct kvm_vcpu *vcpu, int usermode)
{
int i;
if (vcpu->arch.swap_pid) {
/* XXX Replace loop with fancy data structures. */
for (i = 0; i <= tlb_44x_hwater; i++) {
struct tlbe *stlbe = &vcpu->arch.shadow_tlb[i];
/* Future optimization: clear only userspace mappings. */
kvmppc_44x_shadow_release(vcpu, i);
stlbe->word0 = 0;
kvmppc_tlbe_set_modified(vcpu, i);
KVMTRACE_5D(STLB_INVAL, vcpu, i,
stlbe->tid, stlbe->word0, stlbe->word1,
stlbe->word2, handler);
}
vcpu->arch.swap_pid = 0;
}
vcpu->arch.shadow_pid = !usermode;
}
void kvmppc_set_pid(struct kvm_vcpu *vcpu, u32 new_pid)
{
struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
int i;
if (unlikely(vcpu->arch.pid == new_pid))
return;
vcpu->arch.pid = new_pid;
/* Guest userspace runs with TID=0 mappings and PID=0, to make sure it
* can't access guest kernel mappings (TID=1). When we switch to a new
* guest PID, which will also use host PID=0, we must discard the old guest
* userspace mappings. */
for (i = 0; i < ARRAY_SIZE(vcpu_44x->shadow_refs); i++) {
struct kvmppc_44x_shadow_ref *ref = &vcpu_44x->shadow_refs[i];
if (ref->tid == 0)
kvmppc_44x_shadow_release(vcpu_44x, i);
}
}
static int tlbe_is_host_safe(const struct kvm_vcpu *vcpu,
const struct kvmppc_44x_tlbe *tlbe)
{
gpa_t gpa;
if (!get_tlb_v(tlbe))
return 0;
/* Does it match current guest AS? */
/* XXX what about IS != DS? */
if (get_tlb_ts(tlbe) != !!(vcpu->arch.msr & MSR_IS))
return 0;
gpa = get_tlb_raddr(tlbe);
if (!gfn_to_memslot(vcpu->kvm, gpa >> PAGE_SHIFT))
/* Mapping is not for RAM. */
return 0;
return 1;
}
int kvmppc_44x_emul_tlbwe(struct kvm_vcpu *vcpu, u8 ra, u8 rs, u8 ws)
{
struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
struct kvmppc_44x_tlbe *tlbe;
unsigned int gtlb_index;
gtlb_index = vcpu->arch.gpr[ra];
if (gtlb_index > KVM44x_GUEST_TLB_SIZE) {
printk("%s: index %d\n", __func__, gtlb_index);
kvmppc_dump_vcpu(vcpu);
return EMULATE_FAIL;
}
tlbe = &vcpu_44x->guest_tlb[gtlb_index];
/* Invalidate shadow mappings for the about-to-be-clobbered TLB entry. */
if (tlbe->word0 & PPC44x_TLB_VALID)
kvmppc_44x_invalidate(vcpu, gtlb_index);
switch (ws) {
case PPC44x_TLB_PAGEID:
tlbe->tid = get_mmucr_stid(vcpu);
tlbe->word0 = vcpu->arch.gpr[rs];
break;
case PPC44x_TLB_XLAT:
tlbe->word1 = vcpu->arch.gpr[rs];
break;
case PPC44x_TLB_ATTRIB:
tlbe->word2 = vcpu->arch.gpr[rs];
break;
default:
return EMULATE_FAIL;
}
if (tlbe_is_host_safe(vcpu, tlbe)) {
u64 asid;
gva_t eaddr;
gpa_t gpaddr;
u32 flags;
u32 bytes;
eaddr = get_tlb_eaddr(tlbe);
gpaddr = get_tlb_raddr(tlbe);
/* Use the advertised page size to mask effective and real addrs. */
bytes = get_tlb_bytes(tlbe);
eaddr &= ~(bytes - 1);
gpaddr &= ~(bytes - 1);
asid = (tlbe->word0 & PPC44x_TLB_TS) | tlbe->tid;
flags = tlbe->word2 & 0xffff;
kvmppc_mmu_map(vcpu, eaddr, gpaddr, asid, flags, bytes, gtlb_index);
}
KVMTRACE_5D(GTLB_WRITE, vcpu, gtlb_index, tlbe->tid, tlbe->word0,
tlbe->word1, tlbe->word2, handler);
kvmppc_set_exit_type(vcpu, EMULATED_TLBWE_EXITS);
return EMULATE_DONE;
}
int kvmppc_44x_emul_tlbsx(struct kvm_vcpu *vcpu, u8 rt, u8 ra, u8 rb, u8 rc)
{
u32 ea;
int gtlb_index;
unsigned int as = get_mmucr_sts(vcpu);
unsigned int pid = get_mmucr_stid(vcpu);
ea = vcpu->arch.gpr[rb];
if (ra)
ea += vcpu->arch.gpr[ra];
gtlb_index = kvmppc_44x_tlb_index(vcpu, ea, pid, as);
if (rc) {
if (gtlb_index < 0)
vcpu->arch.cr &= ~0x20000000;
else
vcpu->arch.cr |= 0x20000000;
}
vcpu->arch.gpr[rt] = gtlb_index;
kvmppc_set_exit_type(vcpu, EMULATED_TLBSX_EXITS);
return EMULATE_DONE;
}

View File

@ -25,48 +25,52 @@
extern int kvmppc_44x_tlb_index(struct kvm_vcpu *vcpu, gva_t eaddr,
unsigned int pid, unsigned int as);
extern struct tlbe *kvmppc_44x_dtlb_search(struct kvm_vcpu *vcpu, gva_t eaddr);
extern struct tlbe *kvmppc_44x_itlb_search(struct kvm_vcpu *vcpu, gva_t eaddr);
extern int kvmppc_44x_dtlb_index(struct kvm_vcpu *vcpu, gva_t eaddr);
extern int kvmppc_44x_itlb_index(struct kvm_vcpu *vcpu, gva_t eaddr);
extern int kvmppc_44x_emul_tlbsx(struct kvm_vcpu *vcpu, u8 rt, u8 ra, u8 rb,
u8 rc);
extern int kvmppc_44x_emul_tlbwe(struct kvm_vcpu *vcpu, u8 ra, u8 rs, u8 ws);
/* TLB helper functions */
static inline unsigned int get_tlb_size(const struct tlbe *tlbe)
static inline unsigned int get_tlb_size(const struct kvmppc_44x_tlbe *tlbe)
{
return (tlbe->word0 >> 4) & 0xf;
}
static inline gva_t get_tlb_eaddr(const struct tlbe *tlbe)
static inline gva_t get_tlb_eaddr(const struct kvmppc_44x_tlbe *tlbe)
{
return tlbe->word0 & 0xfffffc00;
}
static inline gva_t get_tlb_bytes(const struct tlbe *tlbe)
static inline gva_t get_tlb_bytes(const struct kvmppc_44x_tlbe *tlbe)
{
unsigned int pgsize = get_tlb_size(tlbe);
return 1 << 10 << (pgsize << 1);
}
static inline gva_t get_tlb_end(const struct tlbe *tlbe)
static inline gva_t get_tlb_end(const struct kvmppc_44x_tlbe *tlbe)
{
return get_tlb_eaddr(tlbe) + get_tlb_bytes(tlbe) - 1;
}
static inline u64 get_tlb_raddr(const struct tlbe *tlbe)
static inline u64 get_tlb_raddr(const struct kvmppc_44x_tlbe *tlbe)
{
u64 word1 = tlbe->word1;
return ((word1 & 0xf) << 32) | (word1 & 0xfffffc00);
}
static inline unsigned int get_tlb_tid(const struct tlbe *tlbe)
static inline unsigned int get_tlb_tid(const struct kvmppc_44x_tlbe *tlbe)
{
return tlbe->tid & 0xff;
}
static inline unsigned int get_tlb_ts(const struct tlbe *tlbe)
static inline unsigned int get_tlb_ts(const struct kvmppc_44x_tlbe *tlbe)
{
return (tlbe->word0 >> 8) & 0x1;
}
static inline unsigned int get_tlb_v(const struct tlbe *tlbe)
static inline unsigned int get_tlb_v(const struct kvmppc_44x_tlbe *tlbe)
{
return (tlbe->word0 >> 9) & 0x1;
}
@ -81,7 +85,7 @@ static inline unsigned int get_mmucr_sts(const struct kvm_vcpu *vcpu)
return (vcpu->arch.mmucr >> 16) & 0x1;
}
static inline gpa_t tlb_xlate(struct tlbe *tlbe, gva_t eaddr)
static inline gpa_t tlb_xlate(struct kvmppc_44x_tlbe *tlbe, gva_t eaddr)
{
unsigned int pgmask = get_tlb_bytes(tlbe) - 1;

View File

@ -15,27 +15,33 @@ menuconfig VIRTUALIZATION
if VIRTUALIZATION
config KVM
bool "Kernel-based Virtual Machine (KVM) support"
depends on 44x && EXPERIMENTAL
bool
select PREEMPT_NOTIFIERS
select ANON_INODES
# We can only run on Book E hosts so far
select KVM_BOOKE_HOST
config KVM_440
bool "KVM support for PowerPC 440 processors"
depends on EXPERIMENTAL && 44x
select KVM
---help---
Support hosting virtualized guest machines. You will also
need to select one or more of the processor modules below.
Support running unmodified 440 guest kernels in virtual machines on
440 host processors.
This module provides access to the hardware capabilities through
a character device node named /dev/kvm.
If unsure, say N.
config KVM_BOOKE_HOST
bool "KVM host support for Book E PowerPC processors"
depends on KVM && 44x
config KVM_EXIT_TIMING
bool "Detailed exit timing"
depends on KVM
---help---
Provides host support for KVM on Book E PowerPC processors. Currently
this works on 440 processors only.
Calculate elapsed time for every exit/enter cycle. A per-vcpu
report is available in debugfs kvm/vm#_vcpu#_timing.
The overhead is relatively small, however it is not recommended for
production environments.
If unsure, say N.
config KVM_TRACE
bool "KVM trace support"

View File

@ -8,10 +8,16 @@ common-objs-y = $(addprefix ../../../virt/kvm/, kvm_main.o coalesced_mmio.o)
common-objs-$(CONFIG_KVM_TRACE) += $(addprefix ../../../virt/kvm/, kvm_trace.o)
kvm-objs := $(common-objs-y) powerpc.o emulate.o booke_guest.o
kvm-objs := $(common-objs-y) powerpc.o emulate.o
obj-$(CONFIG_KVM_EXIT_TIMING) += timing.o
obj-$(CONFIG_KVM) += kvm.o
AFLAGS_booke_interrupts.o := -I$(obj)
kvm-booke-host-objs := booke_host.o booke_interrupts.o 44x_tlb.o
obj-$(CONFIG_KVM_BOOKE_HOST) += kvm-booke-host.o
kvm-440-objs := \
booke.o \
booke_interrupts.o \
44x.o \
44x_tlb.o \
44x_emulate.o
obj-$(CONFIG_KVM_440) += kvm-440.o

View File

@ -24,21 +24,26 @@
#include <linux/module.h>
#include <linux/vmalloc.h>
#include <linux/fs.h>
#include <asm/cputable.h>
#include <asm/uaccess.h>
#include <asm/kvm_ppc.h>
#include "timing.h"
#include <asm/cacheflush.h>
#include <asm/kvm_44x.h>
#include "booke.h"
#include "44x_tlb.h"
unsigned long kvmppc_booke_handlers;
#define VM_STAT(x) offsetof(struct kvm, stat.x), KVM_STAT_VM
#define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
struct kvm_stats_debugfs_item debugfs_entries[] = {
{ "exits", VCPU_STAT(sum_exits) },
{ "mmio", VCPU_STAT(mmio_exits) },
{ "dcr", VCPU_STAT(dcr_exits) },
{ "sig", VCPU_STAT(signal_exits) },
{ "light", VCPU_STAT(light_exits) },
{ "itlb_r", VCPU_STAT(itlb_real_miss_exits) },
{ "itlb_v", VCPU_STAT(itlb_virt_miss_exits) },
{ "dtlb_r", VCPU_STAT(dtlb_real_miss_exits) },
@ -53,103 +58,19 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
{ NULL }
};
static const u32 interrupt_msr_mask[16] = {
[BOOKE_INTERRUPT_CRITICAL] = MSR_ME,
[BOOKE_INTERRUPT_MACHINE_CHECK] = 0,
[BOOKE_INTERRUPT_DATA_STORAGE] = MSR_CE|MSR_ME|MSR_DE,
[BOOKE_INTERRUPT_INST_STORAGE] = MSR_CE|MSR_ME|MSR_DE,
[BOOKE_INTERRUPT_EXTERNAL] = MSR_CE|MSR_ME|MSR_DE,
[BOOKE_INTERRUPT_ALIGNMENT] = MSR_CE|MSR_ME|MSR_DE,
[BOOKE_INTERRUPT_PROGRAM] = MSR_CE|MSR_ME|MSR_DE,
[BOOKE_INTERRUPT_FP_UNAVAIL] = MSR_CE|MSR_ME|MSR_DE,
[BOOKE_INTERRUPT_SYSCALL] = MSR_CE|MSR_ME|MSR_DE,
[BOOKE_INTERRUPT_AP_UNAVAIL] = MSR_CE|MSR_ME|MSR_DE,
[BOOKE_INTERRUPT_DECREMENTER] = MSR_CE|MSR_ME|MSR_DE,
[BOOKE_INTERRUPT_FIT] = MSR_CE|MSR_ME|MSR_DE,
[BOOKE_INTERRUPT_WATCHDOG] = MSR_ME,
[BOOKE_INTERRUPT_DTLB_MISS] = MSR_CE|MSR_ME|MSR_DE,
[BOOKE_INTERRUPT_ITLB_MISS] = MSR_CE|MSR_ME|MSR_DE,
[BOOKE_INTERRUPT_DEBUG] = MSR_ME,
};
const unsigned char exception_priority[] = {
[BOOKE_INTERRUPT_DATA_STORAGE] = 0,
[BOOKE_INTERRUPT_INST_STORAGE] = 1,
[BOOKE_INTERRUPT_ALIGNMENT] = 2,
[BOOKE_INTERRUPT_PROGRAM] = 3,
[BOOKE_INTERRUPT_FP_UNAVAIL] = 4,
[BOOKE_INTERRUPT_SYSCALL] = 5,
[BOOKE_INTERRUPT_AP_UNAVAIL] = 6,
[BOOKE_INTERRUPT_DTLB_MISS] = 7,
[BOOKE_INTERRUPT_ITLB_MISS] = 8,
[BOOKE_INTERRUPT_MACHINE_CHECK] = 9,
[BOOKE_INTERRUPT_DEBUG] = 10,
[BOOKE_INTERRUPT_CRITICAL] = 11,
[BOOKE_INTERRUPT_WATCHDOG] = 12,
[BOOKE_INTERRUPT_EXTERNAL] = 13,
[BOOKE_INTERRUPT_FIT] = 14,
[BOOKE_INTERRUPT_DECREMENTER] = 15,
};
const unsigned char priority_exception[] = {
BOOKE_INTERRUPT_DATA_STORAGE,
BOOKE_INTERRUPT_INST_STORAGE,
BOOKE_INTERRUPT_ALIGNMENT,
BOOKE_INTERRUPT_PROGRAM,
BOOKE_INTERRUPT_FP_UNAVAIL,
BOOKE_INTERRUPT_SYSCALL,
BOOKE_INTERRUPT_AP_UNAVAIL,
BOOKE_INTERRUPT_DTLB_MISS,
BOOKE_INTERRUPT_ITLB_MISS,
BOOKE_INTERRUPT_MACHINE_CHECK,
BOOKE_INTERRUPT_DEBUG,
BOOKE_INTERRUPT_CRITICAL,
BOOKE_INTERRUPT_WATCHDOG,
BOOKE_INTERRUPT_EXTERNAL,
BOOKE_INTERRUPT_FIT,
BOOKE_INTERRUPT_DECREMENTER,
};
void kvmppc_dump_tlbs(struct kvm_vcpu *vcpu)
{
struct tlbe *tlbe;
int i;
printk("vcpu %d TLB dump:\n", vcpu->vcpu_id);
printk("| %2s | %3s | %8s | %8s | %8s |\n",
"nr", "tid", "word0", "word1", "word2");
for (i = 0; i < PPC44x_TLB_SIZE; i++) {
tlbe = &vcpu->arch.guest_tlb[i];
if (tlbe->word0 & PPC44x_TLB_VALID)
printk(" G%2d | %02X | %08X | %08X | %08X |\n",
i, tlbe->tid, tlbe->word0, tlbe->word1,
tlbe->word2);
}
for (i = 0; i < PPC44x_TLB_SIZE; i++) {
tlbe = &vcpu->arch.shadow_tlb[i];
if (tlbe->word0 & PPC44x_TLB_VALID)
printk(" S%2d | %02X | %08X | %08X | %08X |\n",
i, tlbe->tid, tlbe->word0, tlbe->word1,
tlbe->word2);
}
}
/* TODO: use vcpu_printf() */
void kvmppc_dump_vcpu(struct kvm_vcpu *vcpu)
{
int i;
printk("pc: %08x msr: %08x\n", vcpu->arch.pc, vcpu->arch.msr);
printk("lr: %08x ctr: %08x\n", vcpu->arch.lr, vcpu->arch.ctr);
printk("srr0: %08x srr1: %08x\n", vcpu->arch.srr0, vcpu->arch.srr1);
printk("pc: %08lx msr: %08lx\n", vcpu->arch.pc, vcpu->arch.msr);
printk("lr: %08lx ctr: %08lx\n", vcpu->arch.lr, vcpu->arch.ctr);
printk("srr0: %08lx srr1: %08lx\n", vcpu->arch.srr0, vcpu->arch.srr1);
printk("exceptions: %08lx\n", vcpu->arch.pending_exceptions);
for (i = 0; i < 32; i += 4) {
printk("gpr%02d: %08x %08x %08x %08x\n", i,
printk("gpr%02d: %08lx %08lx %08lx %08lx\n", i,
vcpu->arch.gpr[i],
vcpu->arch.gpr[i+1],
vcpu->arch.gpr[i+2],
@ -157,69 +78,96 @@ void kvmppc_dump_vcpu(struct kvm_vcpu *vcpu)
}
}
/* Check if we are ready to deliver the interrupt */
static int kvmppc_can_deliver_interrupt(struct kvm_vcpu *vcpu, int interrupt)
static void kvmppc_booke_queue_irqprio(struct kvm_vcpu *vcpu,
unsigned int priority)
{
int r;
switch (interrupt) {
case BOOKE_INTERRUPT_CRITICAL:
r = vcpu->arch.msr & MSR_CE;
break;
case BOOKE_INTERRUPT_MACHINE_CHECK:
r = vcpu->arch.msr & MSR_ME;
break;
case BOOKE_INTERRUPT_EXTERNAL:
r = vcpu->arch.msr & MSR_EE;
break;
case BOOKE_INTERRUPT_DECREMENTER:
r = vcpu->arch.msr & MSR_EE;
break;
case BOOKE_INTERRUPT_FIT:
r = vcpu->arch.msr & MSR_EE;
break;
case BOOKE_INTERRUPT_WATCHDOG:
r = vcpu->arch.msr & MSR_CE;
break;
case BOOKE_INTERRUPT_DEBUG:
r = vcpu->arch.msr & MSR_DE;
break;
default:
r = 1;
}
return r;
set_bit(priority, &vcpu->arch.pending_exceptions);
}
static void kvmppc_deliver_interrupt(struct kvm_vcpu *vcpu, int interrupt)
void kvmppc_core_queue_program(struct kvm_vcpu *vcpu)
{
switch (interrupt) {
case BOOKE_INTERRUPT_DECREMENTER:
vcpu->arch.tsr |= TSR_DIS;
kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_PROGRAM);
}
void kvmppc_core_queue_dec(struct kvm_vcpu *vcpu)
{
kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_DECREMENTER);
}
int kvmppc_core_pending_dec(struct kvm_vcpu *vcpu)
{
return test_bit(BOOKE_IRQPRIO_DECREMENTER, &vcpu->arch.pending_exceptions);
}
void kvmppc_core_queue_external(struct kvm_vcpu *vcpu,
struct kvm_interrupt *irq)
{
kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_EXTERNAL);
}
/* Deliver the interrupt of the corresponding priority, if possible. */
static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu,
unsigned int priority)
{
int allowed = 0;
ulong msr_mask;
switch (priority) {
case BOOKE_IRQPRIO_PROGRAM:
case BOOKE_IRQPRIO_DTLB_MISS:
case BOOKE_IRQPRIO_ITLB_MISS:
case BOOKE_IRQPRIO_SYSCALL:
case BOOKE_IRQPRIO_DATA_STORAGE:
case BOOKE_IRQPRIO_INST_STORAGE:
case BOOKE_IRQPRIO_FP_UNAVAIL:
case BOOKE_IRQPRIO_AP_UNAVAIL:
case BOOKE_IRQPRIO_ALIGNMENT:
allowed = 1;
msr_mask = MSR_CE|MSR_ME|MSR_DE;
break;
case BOOKE_IRQPRIO_CRITICAL:
case BOOKE_IRQPRIO_WATCHDOG:
allowed = vcpu->arch.msr & MSR_CE;
msr_mask = MSR_ME;
break;
case BOOKE_IRQPRIO_MACHINE_CHECK:
allowed = vcpu->arch.msr & MSR_ME;
msr_mask = 0;
break;
case BOOKE_IRQPRIO_EXTERNAL:
case BOOKE_IRQPRIO_DECREMENTER:
case BOOKE_IRQPRIO_FIT:
allowed = vcpu->arch.msr & MSR_EE;
msr_mask = MSR_CE|MSR_ME|MSR_DE;
break;
case BOOKE_IRQPRIO_DEBUG:
allowed = vcpu->arch.msr & MSR_DE;
msr_mask = MSR_ME;
break;
}
vcpu->arch.srr0 = vcpu->arch.pc;
vcpu->arch.srr1 = vcpu->arch.msr;
vcpu->arch.pc = vcpu->arch.ivpr | vcpu->arch.ivor[interrupt];
kvmppc_set_msr(vcpu, vcpu->arch.msr & interrupt_msr_mask[interrupt]);
if (allowed) {
vcpu->arch.srr0 = vcpu->arch.pc;
vcpu->arch.srr1 = vcpu->arch.msr;
vcpu->arch.pc = vcpu->arch.ivpr | vcpu->arch.ivor[priority];
kvmppc_set_msr(vcpu, vcpu->arch.msr & msr_mask);
clear_bit(priority, &vcpu->arch.pending_exceptions);
}
return allowed;
}
/* Check pending exceptions and deliver one, if possible. */
void kvmppc_check_and_deliver_interrupts(struct kvm_vcpu *vcpu)
void kvmppc_core_deliver_interrupts(struct kvm_vcpu *vcpu)
{
unsigned long *pending = &vcpu->arch.pending_exceptions;
unsigned int exception;
unsigned int priority;
priority = find_first_bit(pending, BITS_PER_BYTE * sizeof(*pending));
priority = __ffs(*pending);
while (priority <= BOOKE_MAX_INTERRUPT) {
exception = priority_exception[priority];
if (kvmppc_can_deliver_interrupt(vcpu, exception)) {
kvmppc_clear_exception(vcpu, exception);
kvmppc_deliver_interrupt(vcpu, exception);
if (kvmppc_booke_irqprio_deliver(vcpu, priority))
break;
}
priority = find_next_bit(pending,
BITS_PER_BYTE * sizeof(*pending),
@ -238,6 +186,9 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
enum emulation_result er;
int r = RESUME_HOST;
/* update before a new last_exit_type is rewritten */
kvmppc_update_timing_stats(vcpu);
local_irq_enable();
run->exit_reason = KVM_EXIT_UNKNOWN;
@ -251,21 +202,19 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
break;
case BOOKE_INTERRUPT_EXTERNAL:
kvmppc_account_exit(vcpu, EXT_INTR_EXITS);
if (need_resched())
cond_resched();
r = RESUME_GUEST;
break;
case BOOKE_INTERRUPT_DECREMENTER:
/* Since we switched IVPR back to the host's value, the host
* handled this interrupt the moment we enabled interrupts.
* Now we just offer it a chance to reschedule the guest. */
/* XXX At this point the TLB still holds our shadow TLB, so if
* we do reschedule the host will fault over it. Perhaps we
* should politely restore the host's entries to minimize
* misses before ceding control. */
kvmppc_account_exit(vcpu, DEC_EXITS);
if (need_resched())
cond_resched();
if (exit_nr == BOOKE_INTERRUPT_DECREMENTER)
vcpu->stat.dec_exits++;
else
vcpu->stat.ext_intr_exits++;
r = RESUME_GUEST;
break;
@ -274,17 +223,19 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
/* Program traps generated by user-level software must be handled
* by the guest kernel. */
vcpu->arch.esr = vcpu->arch.fault_esr;
kvmppc_queue_exception(vcpu, BOOKE_INTERRUPT_PROGRAM);
kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_PROGRAM);
r = RESUME_GUEST;
kvmppc_account_exit(vcpu, USR_PR_INST);
break;
}
er = kvmppc_emulate_instruction(run, vcpu);
switch (er) {
case EMULATE_DONE:
/* don't overwrite subtypes, just account kvm_stats */
kvmppc_account_exit_stat(vcpu, EMULATED_INST_EXITS);
/* Future optimization: only reload non-volatiles if
* they were actually modified by emulation. */
vcpu->stat.emulated_inst_exits++;
r = RESUME_GUEST_NV;
break;
case EMULATE_DO_DCR:
@ -293,7 +244,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
break;
case EMULATE_FAIL:
/* XXX Deliver Program interrupt to guest. */
printk(KERN_CRIT "%s: emulation at %x failed (%08x)\n",
printk(KERN_CRIT "%s: emulation at %lx failed (%08x)\n",
__func__, vcpu->arch.pc, vcpu->arch.last_inst);
/* For debugging, encode the failing instruction and
* report it to userspace. */
@ -307,48 +258,53 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
break;
case BOOKE_INTERRUPT_FP_UNAVAIL:
kvmppc_queue_exception(vcpu, exit_nr);
kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_FP_UNAVAIL);
kvmppc_account_exit(vcpu, FP_UNAVAIL);
r = RESUME_GUEST;
break;
case BOOKE_INTERRUPT_DATA_STORAGE:
vcpu->arch.dear = vcpu->arch.fault_dear;
vcpu->arch.esr = vcpu->arch.fault_esr;
kvmppc_queue_exception(vcpu, exit_nr);
vcpu->stat.dsi_exits++;
kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_DATA_STORAGE);
kvmppc_account_exit(vcpu, DSI_EXITS);
r = RESUME_GUEST;
break;
case BOOKE_INTERRUPT_INST_STORAGE:
vcpu->arch.esr = vcpu->arch.fault_esr;
kvmppc_queue_exception(vcpu, exit_nr);
vcpu->stat.isi_exits++;
kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_INST_STORAGE);
kvmppc_account_exit(vcpu, ISI_EXITS);
r = RESUME_GUEST;
break;
case BOOKE_INTERRUPT_SYSCALL:
kvmppc_queue_exception(vcpu, exit_nr);
vcpu->stat.syscall_exits++;
kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_SYSCALL);
kvmppc_account_exit(vcpu, SYSCALL_EXITS);
r = RESUME_GUEST;
break;
/* XXX move to a 440-specific file. */
case BOOKE_INTERRUPT_DTLB_MISS: {
struct tlbe *gtlbe;
struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
struct kvmppc_44x_tlbe *gtlbe;
unsigned long eaddr = vcpu->arch.fault_dear;
int gtlb_index;
gfn_t gfn;
/* Check the guest TLB. */
gtlbe = kvmppc_44x_dtlb_search(vcpu, eaddr);
if (!gtlbe) {
gtlb_index = kvmppc_44x_dtlb_index(vcpu, eaddr);
if (gtlb_index < 0) {
/* The guest didn't have a mapping for it. */
kvmppc_queue_exception(vcpu, exit_nr);
kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_DTLB_MISS);
vcpu->arch.dear = vcpu->arch.fault_dear;
vcpu->arch.esr = vcpu->arch.fault_esr;
vcpu->stat.dtlb_real_miss_exits++;
kvmppc_account_exit(vcpu, DTLB_REAL_MISS_EXITS);
r = RESUME_GUEST;
break;
}
gtlbe = &vcpu_44x->guest_tlb[gtlb_index];
vcpu->arch.paddr_accessed = tlb_xlate(gtlbe, eaddr);
gfn = vcpu->arch.paddr_accessed >> PAGE_SHIFT;
@ -359,38 +315,45 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
* b) the guest used a large mapping which we're faking
* Either way, we need to satisfy the fault without
* invoking the guest. */
kvmppc_mmu_map(vcpu, eaddr, gfn, gtlbe->tid,
gtlbe->word2);
vcpu->stat.dtlb_virt_miss_exits++;
kvmppc_mmu_map(vcpu, eaddr, vcpu->arch.paddr_accessed, gtlbe->tid,
gtlbe->word2, get_tlb_bytes(gtlbe), gtlb_index);
kvmppc_account_exit(vcpu, DTLB_VIRT_MISS_EXITS);
r = RESUME_GUEST;
} else {
/* Guest has mapped and accessed a page which is not
* actually RAM. */
r = kvmppc_emulate_mmio(run, vcpu);
kvmppc_account_exit(vcpu, MMIO_EXITS);
}
break;
}
/* XXX move to a 440-specific file. */
case BOOKE_INTERRUPT_ITLB_MISS: {
struct tlbe *gtlbe;
struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
struct kvmppc_44x_tlbe *gtlbe;
unsigned long eaddr = vcpu->arch.pc;
gpa_t gpaddr;
gfn_t gfn;
int gtlb_index;
r = RESUME_GUEST;
/* Check the guest TLB. */
gtlbe = kvmppc_44x_itlb_search(vcpu, eaddr);
if (!gtlbe) {
gtlb_index = kvmppc_44x_itlb_index(vcpu, eaddr);
if (gtlb_index < 0) {
/* The guest didn't have a mapping for it. */
kvmppc_queue_exception(vcpu, exit_nr);
vcpu->stat.itlb_real_miss_exits++;
kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_ITLB_MISS);
kvmppc_account_exit(vcpu, ITLB_REAL_MISS_EXITS);
break;
}
vcpu->stat.itlb_virt_miss_exits++;
kvmppc_account_exit(vcpu, ITLB_VIRT_MISS_EXITS);
gfn = tlb_xlate(gtlbe, eaddr) >> PAGE_SHIFT;
gtlbe = &vcpu_44x->guest_tlb[gtlb_index];
gpaddr = tlb_xlate(gtlbe, eaddr);
gfn = gpaddr >> PAGE_SHIFT;
if (kvm_is_visible_gfn(vcpu->kvm, gfn)) {
/* The guest TLB had a mapping, but the shadow TLB
@ -399,12 +362,11 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
* b) the guest used a large mapping which we're faking
* Either way, we need to satisfy the fault without
* invoking the guest. */
kvmppc_mmu_map(vcpu, eaddr, gfn, gtlbe->tid,
gtlbe->word2);
kvmppc_mmu_map(vcpu, eaddr, gpaddr, gtlbe->tid,
gtlbe->word2, get_tlb_bytes(gtlbe), gtlb_index);
} else {
/* Guest mapped and leaped at non-RAM! */
kvmppc_queue_exception(vcpu,
BOOKE_INTERRUPT_MACHINE_CHECK);
kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_MACHINE_CHECK);
}
break;
@ -421,6 +383,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
mtspr(SPRN_DBSR, dbsr);
run->exit_reason = KVM_EXIT_DEBUG;
kvmppc_account_exit(vcpu, DEBUG_EXITS);
r = RESUME_HOST;
break;
}
@ -432,10 +395,8 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
local_irq_disable();
kvmppc_check_and_deliver_interrupts(vcpu);
kvmppc_core_deliver_interrupts(vcpu);
/* Do some exit accounting. */
vcpu->stat.sum_exits++;
if (!(r & RESUME_HOST)) {
/* To avoid clobbering exit_reason, only check for signals if
* we aren't already exiting to userspace for some other
@ -443,22 +404,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
if (signal_pending(current)) {
run->exit_reason = KVM_EXIT_INTR;
r = (-EINTR << 2) | RESUME_HOST | (r & RESUME_FLAG_NV);
vcpu->stat.signal_exits++;
} else {
vcpu->stat.light_exits++;
}
} else {
switch (run->exit_reason) {
case KVM_EXIT_MMIO:
vcpu->stat.mmio_exits++;
break;
case KVM_EXIT_DCR:
vcpu->stat.dcr_exits++;
break;
case KVM_EXIT_INTR:
vcpu->stat.signal_exits++;
break;
kvmppc_account_exit(vcpu, SIGNAL_EXITS);
}
}
@ -468,20 +414,6 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
/* Initial guest state: 16MB mapping 0 -> 0, PC = 0, MSR = 0, R1 = 16MB */
int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
{
struct tlbe *tlbe = &vcpu->arch.guest_tlb[0];
tlbe->tid = 0;
tlbe->word0 = PPC44x_TLB_16M | PPC44x_TLB_VALID;
tlbe->word1 = 0;
tlbe->word2 = PPC44x_TLB_SX | PPC44x_TLB_SW | PPC44x_TLB_SR;
tlbe++;
tlbe->tid = 0;
tlbe->word0 = 0xef600000 | PPC44x_TLB_4K | PPC44x_TLB_VALID;
tlbe->word1 = 0xef600000;
tlbe->word2 = PPC44x_TLB_SX | PPC44x_TLB_SW | PPC44x_TLB_SR
| PPC44x_TLB_I | PPC44x_TLB_G;
vcpu->arch.pc = 0;
vcpu->arch.msr = 0;
vcpu->arch.gpr[1] = (16<<20) - 8; /* -8 for the callee-save LR slot */
@ -492,12 +424,9 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
* before it's programmed its own IVPR. */
vcpu->arch.ivpr = 0x55550000;
/* Since the guest can directly access the timebase, it must know the
* real timebase frequency. Accordingly, it must see the state of
* CCR1[TCS]. */
vcpu->arch.ccr1 = mfspr(SPRN_CCR1);
kvmppc_init_timing_stats(vcpu);
return 0;
return kvmppc_core_vcpu_setup(vcpu);
}
int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
@ -536,7 +465,7 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
vcpu->arch.ctr = regs->ctr;
vcpu->arch.lr = regs->lr;
vcpu->arch.xer = regs->xer;
vcpu->arch.msr = regs->msr;
kvmppc_set_msr(vcpu, regs->msr);
vcpu->arch.srr0 = regs->srr0;
vcpu->arch.srr1 = regs->srr1;
vcpu->arch.sprg0 = regs->sprg0;
@ -575,31 +504,62 @@ int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
return -ENOTSUPP;
}
/* 'linear_address' is actually an encoding of AS|PID|EADDR . */
int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
struct kvm_translation *tr)
{
struct tlbe *gtlbe;
int index;
gva_t eaddr;
u8 pid;
u8 as;
return kvmppc_core_vcpu_translate(vcpu, tr);
}
eaddr = tr->linear_address;
pid = (tr->linear_address >> 32) & 0xff;
as = (tr->linear_address >> 40) & 0x1;
int kvmppc_booke_init(void)
{
unsigned long ivor[16];
unsigned long max_ivor = 0;
int i;
index = kvmppc_44x_tlb_index(vcpu, eaddr, pid, as);
if (index == -1) {
tr->valid = 0;
return 0;
/* We install our own exception handlers by hijacking IVPR. IVPR must
* be 16-bit aligned, so we need a 64KB allocation. */
kvmppc_booke_handlers = __get_free_pages(GFP_KERNEL | __GFP_ZERO,
VCPU_SIZE_ORDER);
if (!kvmppc_booke_handlers)
return -ENOMEM;
/* XXX make sure our handlers are smaller than Linux's */
/* Copy our interrupt handlers to match host IVORs. That way we don't
* have to swap the IVORs on every guest/host transition. */
ivor[0] = mfspr(SPRN_IVOR0);
ivor[1] = mfspr(SPRN_IVOR1);
ivor[2] = mfspr(SPRN_IVOR2);
ivor[3] = mfspr(SPRN_IVOR3);
ivor[4] = mfspr(SPRN_IVOR4);
ivor[5] = mfspr(SPRN_IVOR5);
ivor[6] = mfspr(SPRN_IVOR6);
ivor[7] = mfspr(SPRN_IVOR7);
ivor[8] = mfspr(SPRN_IVOR8);
ivor[9] = mfspr(SPRN_IVOR9);
ivor[10] = mfspr(SPRN_IVOR10);
ivor[11] = mfspr(SPRN_IVOR11);
ivor[12] = mfspr(SPRN_IVOR12);
ivor[13] = mfspr(SPRN_IVOR13);
ivor[14] = mfspr(SPRN_IVOR14);
ivor[15] = mfspr(SPRN_IVOR15);
for (i = 0; i < 16; i++) {
if (ivor[i] > max_ivor)
max_ivor = ivor[i];
memcpy((void *)kvmppc_booke_handlers + ivor[i],
kvmppc_handlers_start + i * kvmppc_handler_len,
kvmppc_handler_len);
}
gtlbe = &vcpu->arch.guest_tlb[index];
tr->physical_address = tlb_xlate(gtlbe, eaddr);
/* XXX what does "writeable" and "usermode" even mean? */
tr->valid = 1;
flush_icache_range(kvmppc_booke_handlers,
kvmppc_booke_handlers + max_ivor + kvmppc_handler_len);
return 0;
}
void __exit kvmppc_booke_exit(void)
{
free_pages(kvmppc_booke_handlers, VCPU_SIZE_ORDER);
kvm_exit();
}

60
arch/powerpc/kvm/booke.h Normal file
View File

@ -0,0 +1,60 @@
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License, version 2, as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*
* Copyright IBM Corp. 2008
*
* Authors: Hollis Blanchard <hollisb@us.ibm.com>
*/
#ifndef __KVM_BOOKE_H__
#define __KVM_BOOKE_H__
#include <linux/types.h>
#include <linux/kvm_host.h>
#include "timing.h"
/* interrupt priortity ordering */
#define BOOKE_IRQPRIO_DATA_STORAGE 0
#define BOOKE_IRQPRIO_INST_STORAGE 1
#define BOOKE_IRQPRIO_ALIGNMENT 2
#define BOOKE_IRQPRIO_PROGRAM 3
#define BOOKE_IRQPRIO_FP_UNAVAIL 4
#define BOOKE_IRQPRIO_SYSCALL 5
#define BOOKE_IRQPRIO_AP_UNAVAIL 6
#define BOOKE_IRQPRIO_DTLB_MISS 7
#define BOOKE_IRQPRIO_ITLB_MISS 8
#define BOOKE_IRQPRIO_MACHINE_CHECK 9
#define BOOKE_IRQPRIO_DEBUG 10
#define BOOKE_IRQPRIO_CRITICAL 11
#define BOOKE_IRQPRIO_WATCHDOG 12
#define BOOKE_IRQPRIO_EXTERNAL 13
#define BOOKE_IRQPRIO_FIT 14
#define BOOKE_IRQPRIO_DECREMENTER 15
/* Helper function for "full" MSR writes. No need to call this if only EE is
* changing. */
static inline void kvmppc_set_msr(struct kvm_vcpu *vcpu, u32 new_msr)
{
if ((new_msr & MSR_PR) != (vcpu->arch.msr & MSR_PR))
kvmppc_mmu_priv_switch(vcpu, new_msr & MSR_PR);
vcpu->arch.msr = new_msr;
if (vcpu->arch.msr & MSR_WE) {
kvm_vcpu_block(vcpu);
kvmppc_set_exit_type(vcpu, EMULATED_MTMSRWE_EXITS);
};
}
#endif /* __KVM_BOOKE_H__ */

View File

@ -1,83 +0,0 @@
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License, version 2, as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*
* Copyright IBM Corp. 2008
*
* Authors: Hollis Blanchard <hollisb@us.ibm.com>
*/
#include <linux/errno.h>
#include <linux/kvm_host.h>
#include <linux/module.h>
#include <asm/cacheflush.h>
#include <asm/kvm_ppc.h>
unsigned long kvmppc_booke_handlers;
static int kvmppc_booke_init(void)
{
unsigned long ivor[16];
unsigned long max_ivor = 0;
int i;
/* We install our own exception handlers by hijacking IVPR. IVPR must
* be 16-bit aligned, so we need a 64KB allocation. */
kvmppc_booke_handlers = __get_free_pages(GFP_KERNEL | __GFP_ZERO,
VCPU_SIZE_ORDER);
if (!kvmppc_booke_handlers)
return -ENOMEM;
/* XXX make sure our handlers are smaller than Linux's */
/* Copy our interrupt handlers to match host IVORs. That way we don't
* have to swap the IVORs on every guest/host transition. */
ivor[0] = mfspr(SPRN_IVOR0);
ivor[1] = mfspr(SPRN_IVOR1);
ivor[2] = mfspr(SPRN_IVOR2);
ivor[3] = mfspr(SPRN_IVOR3);
ivor[4] = mfspr(SPRN_IVOR4);
ivor[5] = mfspr(SPRN_IVOR5);
ivor[6] = mfspr(SPRN_IVOR6);
ivor[7] = mfspr(SPRN_IVOR7);
ivor[8] = mfspr(SPRN_IVOR8);
ivor[9] = mfspr(SPRN_IVOR9);
ivor[10] = mfspr(SPRN_IVOR10);
ivor[11] = mfspr(SPRN_IVOR11);
ivor[12] = mfspr(SPRN_IVOR12);
ivor[13] = mfspr(SPRN_IVOR13);
ivor[14] = mfspr(SPRN_IVOR14);
ivor[15] = mfspr(SPRN_IVOR15);
for (i = 0; i < 16; i++) {
if (ivor[i] > max_ivor)
max_ivor = ivor[i];
memcpy((void *)kvmppc_booke_handlers + ivor[i],
kvmppc_handlers_start + i * kvmppc_handler_len,
kvmppc_handler_len);
}
flush_icache_range(kvmppc_booke_handlers,
kvmppc_booke_handlers + max_ivor + kvmppc_handler_len);
return kvm_init(NULL, sizeof(struct kvm_vcpu), THIS_MODULE);
}
static void __exit kvmppc_booke_exit(void)
{
free_pages(kvmppc_booke_handlers, VCPU_SIZE_ORDER);
kvm_exit();
}
module_init(kvmppc_booke_init)
module_exit(kvmppc_booke_exit)

View File

@ -107,6 +107,18 @@ _GLOBAL(kvmppc_resume_host)
li r6, 1
slw r6, r6, r5
#ifdef CONFIG_KVM_EXIT_TIMING
/* save exit time */
1:
mfspr r7, SPRN_TBRU
mfspr r8, SPRN_TBRL
mfspr r9, SPRN_TBRU
cmpw r9, r7
bne 1b
stw r8, VCPU_TIMING_EXIT_TBL(r4)
stw r9, VCPU_TIMING_EXIT_TBU(r4)
#endif
/* Save the faulting instruction and all GPRs for emulation. */
andi. r7, r6, NEED_INST_MASK
beq ..skip_inst_copy
@ -335,54 +347,6 @@ lightweight_exit:
lwz r3, VCPU_SHADOW_PID(r4)
mtspr SPRN_PID, r3
/* Prevent all asynchronous TLB updates. */
mfmsr r5
lis r6, (MSR_EE|MSR_CE|MSR_ME|MSR_DE)@h
ori r6, r6, (MSR_EE|MSR_CE|MSR_ME|MSR_DE)@l
andc r6, r5, r6
mtmsr r6
/* Load the guest mappings, leaving the host's "pinned" kernel mappings
* in place. */
mfspr r10, SPRN_MMUCR /* Save host MMUCR. */
li r5, PPC44x_TLB_SIZE
lis r5, tlb_44x_hwater@ha
lwz r5, tlb_44x_hwater@l(r5)
mtctr r5
addi r9, r4, VCPU_SHADOW_TLB
addi r5, r4, VCPU_SHADOW_MOD
li r3, 0
1:
lbzx r7, r3, r5
cmpwi r7, 0
beq 3f
/* Load guest entry. */
mulli r11, r3, TLBE_BYTES
add r11, r11, r9
lwz r7, 0(r11)
mtspr SPRN_MMUCR, r7
lwz r7, 4(r11)
tlbwe r7, r3, PPC44x_TLB_PAGEID
lwz r7, 8(r11)
tlbwe r7, r3, PPC44x_TLB_XLAT
lwz r7, 12(r11)
tlbwe r7, r3, PPC44x_TLB_ATTRIB
3:
addi r3, r3, 1 /* Increment index. */
bdnz 1b
mtspr SPRN_MMUCR, r10 /* Restore host MMUCR. */
/* Clear bitmap of modified TLB entries */
li r5, PPC44x_TLB_SIZE>>2
mtctr r5
addi r5, r4, VCPU_SHADOW_MOD - 4
li r6, 0
1:
stwu r6, 4(r5)
bdnz 1b
iccci 0, 0 /* XXX hack */
/* Load some guest volatiles. */
@ -423,6 +387,18 @@ lightweight_exit:
lwz r3, VCPU_SPRG7(r4)
mtspr SPRN_SPRG7, r3
#ifdef CONFIG_KVM_EXIT_TIMING
/* save enter time */
1:
mfspr r6, SPRN_TBRU
mfspr r7, SPRN_TBRL
mfspr r8, SPRN_TBRU
cmpw r8, r6
bne 1b
stw r7, VCPU_TIMING_LAST_ENTER_TBL(r4)
stw r8, VCPU_TIMING_LAST_ENTER_TBU(r4)
#endif
/* Finish loading guest volatiles and jump to guest. */
lwz r3, VCPU_CTR(r4)
mtctr r3

View File

@ -23,161 +23,14 @@
#include <linux/string.h>
#include <linux/kvm_host.h>
#include <asm/dcr.h>
#include <asm/dcr-regs.h>
#include <asm/reg.h>
#include <asm/time.h>
#include <asm/byteorder.h>
#include <asm/kvm_ppc.h>
#include <asm/disassemble.h>
#include "timing.h"
#include "44x_tlb.h"
/* Instruction decoding */
static inline unsigned int get_op(u32 inst)
{
return inst >> 26;
}
static inline unsigned int get_xop(u32 inst)
{
return (inst >> 1) & 0x3ff;
}
static inline unsigned int get_sprn(u32 inst)
{
return ((inst >> 16) & 0x1f) | ((inst >> 6) & 0x3e0);
}
static inline unsigned int get_dcrn(u32 inst)
{
return ((inst >> 16) & 0x1f) | ((inst >> 6) & 0x3e0);
}
static inline unsigned int get_rt(u32 inst)
{
return (inst >> 21) & 0x1f;
}
static inline unsigned int get_rs(u32 inst)
{
return (inst >> 21) & 0x1f;
}
static inline unsigned int get_ra(u32 inst)
{
return (inst >> 16) & 0x1f;
}
static inline unsigned int get_rb(u32 inst)
{
return (inst >> 11) & 0x1f;
}
static inline unsigned int get_rc(u32 inst)
{
return inst & 0x1;
}
static inline unsigned int get_ws(u32 inst)
{
return (inst >> 11) & 0x1f;
}
static inline unsigned int get_d(u32 inst)
{
return inst & 0xffff;
}
static int tlbe_is_host_safe(const struct kvm_vcpu *vcpu,
const struct tlbe *tlbe)
{
gpa_t gpa;
if (!get_tlb_v(tlbe))
return 0;
/* Does it match current guest AS? */
/* XXX what about IS != DS? */
if (get_tlb_ts(tlbe) != !!(vcpu->arch.msr & MSR_IS))
return 0;
gpa = get_tlb_raddr(tlbe);
if (!gfn_to_memslot(vcpu->kvm, gpa >> PAGE_SHIFT))
/* Mapping is not for RAM. */
return 0;
return 1;
}
static int kvmppc_emul_tlbwe(struct kvm_vcpu *vcpu, u32 inst)
{
u64 eaddr;
u64 raddr;
u64 asid;
u32 flags;
struct tlbe *tlbe;
unsigned int ra;
unsigned int rs;
unsigned int ws;
unsigned int index;
ra = get_ra(inst);
rs = get_rs(inst);
ws = get_ws(inst);
index = vcpu->arch.gpr[ra];
if (index > PPC44x_TLB_SIZE) {
printk("%s: index %d\n", __func__, index);
kvmppc_dump_vcpu(vcpu);
return EMULATE_FAIL;
}
tlbe = &vcpu->arch.guest_tlb[index];
/* Invalidate shadow mappings for the about-to-be-clobbered TLBE. */
if (tlbe->word0 & PPC44x_TLB_VALID) {
eaddr = get_tlb_eaddr(tlbe);
asid = (tlbe->word0 & PPC44x_TLB_TS) | tlbe->tid;
kvmppc_mmu_invalidate(vcpu, eaddr, get_tlb_end(tlbe), asid);
}
switch (ws) {
case PPC44x_TLB_PAGEID:
tlbe->tid = vcpu->arch.mmucr & 0xff;
tlbe->word0 = vcpu->arch.gpr[rs];
break;
case PPC44x_TLB_XLAT:
tlbe->word1 = vcpu->arch.gpr[rs];
break;
case PPC44x_TLB_ATTRIB:
tlbe->word2 = vcpu->arch.gpr[rs];
break;
default:
return EMULATE_FAIL;
}
if (tlbe_is_host_safe(vcpu, tlbe)) {
eaddr = get_tlb_eaddr(tlbe);
raddr = get_tlb_raddr(tlbe);
asid = (tlbe->word0 & PPC44x_TLB_TS) | tlbe->tid;
flags = tlbe->word2 & 0xffff;
/* Create a 4KB mapping on the host. If the guest wanted a
* large page, only the first 4KB is mapped here and the rest
* are mapped on the fly. */
kvmppc_mmu_map(vcpu, eaddr, raddr >> PAGE_SHIFT, asid, flags);
}
KVMTRACE_5D(GTLB_WRITE, vcpu, index,
tlbe->tid, tlbe->word0, tlbe->word1, tlbe->word2,
handler);
return EMULATE_DONE;
}
static void kvmppc_emulate_dec(struct kvm_vcpu *vcpu)
void kvmppc_emulate_dec(struct kvm_vcpu *vcpu)
{
if (vcpu->arch.tcr & TCR_DIE) {
/* The decrementer ticks at the same rate as the timebase, so
@ -193,12 +46,6 @@ static void kvmppc_emulate_dec(struct kvm_vcpu *vcpu)
}
}
static void kvmppc_emul_rfi(struct kvm_vcpu *vcpu)
{
vcpu->arch.pc = vcpu->arch.srr0;
kvmppc_set_msr(vcpu, vcpu->arch.srr1);
}
/* XXX to do:
* lhax
* lhaux
@ -213,40 +60,30 @@ static void kvmppc_emul_rfi(struct kvm_vcpu *vcpu)
*
* XXX is_bigendian should depend on MMU mapping or MSR[LE]
*/
/* XXX Should probably auto-generate instruction decoding for a particular core
* from opcode tables in the future. */
int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
{
u32 inst = vcpu->arch.last_inst;
u32 ea;
int ra;
int rb;
int rc;
int rs;
int rt;
int sprn;
int dcrn;
enum emulation_result emulated = EMULATE_DONE;
int advance = 1;
/* this default type might be overwritten by subcategories */
kvmppc_set_exit_type(vcpu, EMULATED_INST_EXITS);
switch (get_op(inst)) {
case 3: /* trap */
printk("trap!\n");
kvmppc_queue_exception(vcpu, BOOKE_INTERRUPT_PROGRAM);
case 3: /* trap */
vcpu->arch.esr |= ESR_PTR;
kvmppc_core_queue_program(vcpu);
advance = 0;
break;
case 19:
switch (get_xop(inst)) {
case 50: /* rfi */
kvmppc_emul_rfi(vcpu);
advance = 0;
break;
default:
emulated = EMULATE_FAIL;
break;
}
break;
case 31:
switch (get_xop(inst)) {
@ -255,27 +92,11 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
emulated = kvmppc_handle_load(run, vcpu, rt, 4, 1);
break;
case 83: /* mfmsr */
rt = get_rt(inst);
vcpu->arch.gpr[rt] = vcpu->arch.msr;
break;
case 87: /* lbzx */
rt = get_rt(inst);
emulated = kvmppc_handle_load(run, vcpu, rt, 1, 1);
break;
case 131: /* wrtee */
rs = get_rs(inst);
vcpu->arch.msr = (vcpu->arch.msr & ~MSR_EE)
| (vcpu->arch.gpr[rs] & MSR_EE);
break;
case 146: /* mtmsr */
rs = get_rs(inst);
kvmppc_set_msr(vcpu, vcpu->arch.gpr[rs]);
break;
case 151: /* stwx */
rs = get_rs(inst);
emulated = kvmppc_handle_store(run, vcpu,
@ -283,11 +104,6 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
4, 1);
break;
case 163: /* wrteei */
vcpu->arch.msr = (vcpu->arch.msr & ~MSR_EE)
| (inst & MSR_EE);
break;
case 215: /* stbx */
rs = get_rs(inst);
emulated = kvmppc_handle_store(run, vcpu,
@ -328,42 +144,6 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
vcpu->arch.gpr[ra] = ea;
break;
case 323: /* mfdcr */
dcrn = get_dcrn(inst);
rt = get_rt(inst);
/* The guest may access CPR0 registers to determine the timebase
* frequency, and it must know the real host frequency because it
* can directly access the timebase registers.
*
* It would be possible to emulate those accesses in userspace,
* but userspace can really only figure out the end frequency.
* We could decompose that into the factors that compute it, but
* that's tricky math, and it's easier to just report the real
* CPR0 values.
*/
switch (dcrn) {
case DCRN_CPR0_CONFIG_ADDR:
vcpu->arch.gpr[rt] = vcpu->arch.cpr0_cfgaddr;
break;
case DCRN_CPR0_CONFIG_DATA:
local_irq_disable();
mtdcr(DCRN_CPR0_CONFIG_ADDR,
vcpu->arch.cpr0_cfgaddr);
vcpu->arch.gpr[rt] = mfdcr(DCRN_CPR0_CONFIG_DATA);
local_irq_enable();
break;
default:
run->dcr.dcrn = dcrn;
run->dcr.data = 0;
run->dcr.is_write = 0;
vcpu->arch.io_gpr = rt;
vcpu->arch.dcr_needed = 1;
emulated = EMULATE_DO_DCR;
}
break;
case 339: /* mfspr */
sprn = get_sprn(inst);
rt = get_rt(inst);
@ -373,26 +153,8 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
vcpu->arch.gpr[rt] = vcpu->arch.srr0; break;
case SPRN_SRR1:
vcpu->arch.gpr[rt] = vcpu->arch.srr1; break;
case SPRN_MMUCR:
vcpu->arch.gpr[rt] = vcpu->arch.mmucr; break;
case SPRN_PID:
vcpu->arch.gpr[rt] = vcpu->arch.pid; break;
case SPRN_IVPR:
vcpu->arch.gpr[rt] = vcpu->arch.ivpr; break;
case SPRN_CCR0:
vcpu->arch.gpr[rt] = vcpu->arch.ccr0; break;
case SPRN_CCR1:
vcpu->arch.gpr[rt] = vcpu->arch.ccr1; break;
case SPRN_PVR:
vcpu->arch.gpr[rt] = vcpu->arch.pvr; break;
case SPRN_DEAR:
vcpu->arch.gpr[rt] = vcpu->arch.dear; break;
case SPRN_ESR:
vcpu->arch.gpr[rt] = vcpu->arch.esr; break;
case SPRN_DBCR0:
vcpu->arch.gpr[rt] = vcpu->arch.dbcr0; break;
case SPRN_DBCR1:
vcpu->arch.gpr[rt] = vcpu->arch.dbcr1; break;
/* Note: mftb and TBRL/TBWL are user-accessible, so
* the guest can always access the real TB anyways.
@ -413,42 +175,12 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
/* Note: SPRG4-7 are user-readable, so we don't get
* a trap. */
case SPRN_IVOR0:
vcpu->arch.gpr[rt] = vcpu->arch.ivor[0]; break;
case SPRN_IVOR1:
vcpu->arch.gpr[rt] = vcpu->arch.ivor[1]; break;
case SPRN_IVOR2:
vcpu->arch.gpr[rt] = vcpu->arch.ivor[2]; break;
case SPRN_IVOR3:
vcpu->arch.gpr[rt] = vcpu->arch.ivor[3]; break;
case SPRN_IVOR4:
vcpu->arch.gpr[rt] = vcpu->arch.ivor[4]; break;
case SPRN_IVOR5:
vcpu->arch.gpr[rt] = vcpu->arch.ivor[5]; break;
case SPRN_IVOR6:
vcpu->arch.gpr[rt] = vcpu->arch.ivor[6]; break;
case SPRN_IVOR7:
vcpu->arch.gpr[rt] = vcpu->arch.ivor[7]; break;
case SPRN_IVOR8:
vcpu->arch.gpr[rt] = vcpu->arch.ivor[8]; break;
case SPRN_IVOR9:
vcpu->arch.gpr[rt] = vcpu->arch.ivor[9]; break;
case SPRN_IVOR10:
vcpu->arch.gpr[rt] = vcpu->arch.ivor[10]; break;
case SPRN_IVOR11:
vcpu->arch.gpr[rt] = vcpu->arch.ivor[11]; break;
case SPRN_IVOR12:
vcpu->arch.gpr[rt] = vcpu->arch.ivor[12]; break;
case SPRN_IVOR13:
vcpu->arch.gpr[rt] = vcpu->arch.ivor[13]; break;
case SPRN_IVOR14:
vcpu->arch.gpr[rt] = vcpu->arch.ivor[14]; break;
case SPRN_IVOR15:
vcpu->arch.gpr[rt] = vcpu->arch.ivor[15]; break;
default:
printk("mfspr: unknown spr %x\n", sprn);
vcpu->arch.gpr[rt] = 0;
emulated = kvmppc_core_emulate_mfspr(vcpu, sprn, rt);
if (emulated == EMULATE_FAIL) {
printk("mfspr: unknown spr %x\n", sprn);
vcpu->arch.gpr[rt] = 0;
}
break;
}
break;
@ -478,25 +210,6 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
vcpu->arch.gpr[ra] = ea;
break;
case 451: /* mtdcr */
dcrn = get_dcrn(inst);
rs = get_rs(inst);
/* emulate some access in kernel */
switch (dcrn) {
case DCRN_CPR0_CONFIG_ADDR:
vcpu->arch.cpr0_cfgaddr = vcpu->arch.gpr[rs];
break;
default:
run->dcr.dcrn = dcrn;
run->dcr.data = vcpu->arch.gpr[rs];
run->dcr.is_write = 1;
vcpu->arch.dcr_needed = 1;
emulated = EMULATE_DO_DCR;
}
break;
case 467: /* mtspr */
sprn = get_sprn(inst);
rs = get_rs(inst);
@ -505,22 +218,6 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
vcpu->arch.srr0 = vcpu->arch.gpr[rs]; break;
case SPRN_SRR1:
vcpu->arch.srr1 = vcpu->arch.gpr[rs]; break;
case SPRN_MMUCR:
vcpu->arch.mmucr = vcpu->arch.gpr[rs]; break;
case SPRN_PID:
kvmppc_set_pid(vcpu, vcpu->arch.gpr[rs]); break;
case SPRN_CCR0:
vcpu->arch.ccr0 = vcpu->arch.gpr[rs]; break;
case SPRN_CCR1:
vcpu->arch.ccr1 = vcpu->arch.gpr[rs]; break;
case SPRN_DEAR:
vcpu->arch.dear = vcpu->arch.gpr[rs]; break;
case SPRN_ESR:
vcpu->arch.esr = vcpu->arch.gpr[rs]; break;
case SPRN_DBCR0:
vcpu->arch.dbcr0 = vcpu->arch.gpr[rs]; break;
case SPRN_DBCR1:
vcpu->arch.dbcr1 = vcpu->arch.gpr[rs]; break;
/* XXX We need to context-switch the timebase for
* watchdog and FIT. */
@ -532,14 +229,6 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
kvmppc_emulate_dec(vcpu);
break;
case SPRN_TSR:
vcpu->arch.tsr &= ~vcpu->arch.gpr[rs]; break;
case SPRN_TCR:
vcpu->arch.tcr = vcpu->arch.gpr[rs];
kvmppc_emulate_dec(vcpu);
break;
case SPRN_SPRG0:
vcpu->arch.sprg0 = vcpu->arch.gpr[rs]; break;
case SPRN_SPRG1:
@ -549,56 +238,10 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
case SPRN_SPRG3:
vcpu->arch.sprg3 = vcpu->arch.gpr[rs]; break;
/* Note: SPRG4-7 are user-readable. These values are
* loaded into the real SPRGs when resuming the
* guest. */
case SPRN_SPRG4:
vcpu->arch.sprg4 = vcpu->arch.gpr[rs]; break;
case SPRN_SPRG5:
vcpu->arch.sprg5 = vcpu->arch.gpr[rs]; break;
case SPRN_SPRG6:
vcpu->arch.sprg6 = vcpu->arch.gpr[rs]; break;
case SPRN_SPRG7:
vcpu->arch.sprg7 = vcpu->arch.gpr[rs]; break;
case SPRN_IVPR:
vcpu->arch.ivpr = vcpu->arch.gpr[rs]; break;
case SPRN_IVOR0:
vcpu->arch.ivor[0] = vcpu->arch.gpr[rs]; break;
case SPRN_IVOR1:
vcpu->arch.ivor[1] = vcpu->arch.gpr[rs]; break;
case SPRN_IVOR2:
vcpu->arch.ivor[2] = vcpu->arch.gpr[rs]; break;
case SPRN_IVOR3:
vcpu->arch.ivor[3] = vcpu->arch.gpr[rs]; break;
case SPRN_IVOR4:
vcpu->arch.ivor[4] = vcpu->arch.gpr[rs]; break;
case SPRN_IVOR5:
vcpu->arch.ivor[5] = vcpu->arch.gpr[rs]; break;
case SPRN_IVOR6:
vcpu->arch.ivor[6] = vcpu->arch.gpr[rs]; break;
case SPRN_IVOR7:
vcpu->arch.ivor[7] = vcpu->arch.gpr[rs]; break;
case SPRN_IVOR8:
vcpu->arch.ivor[8] = vcpu->arch.gpr[rs]; break;
case SPRN_IVOR9:
vcpu->arch.ivor[9] = vcpu->arch.gpr[rs]; break;
case SPRN_IVOR10:
vcpu->arch.ivor[10] = vcpu->arch.gpr[rs]; break;
case SPRN_IVOR11:
vcpu->arch.ivor[11] = vcpu->arch.gpr[rs]; break;
case SPRN_IVOR12:
vcpu->arch.ivor[12] = vcpu->arch.gpr[rs]; break;
case SPRN_IVOR13:
vcpu->arch.ivor[13] = vcpu->arch.gpr[rs]; break;
case SPRN_IVOR14:
vcpu->arch.ivor[14] = vcpu->arch.gpr[rs]; break;
case SPRN_IVOR15:
vcpu->arch.ivor[15] = vcpu->arch.gpr[rs]; break;
default:
printk("mtspr: unknown spr %x\n", sprn);
emulated = EMULATE_FAIL;
emulated = kvmppc_core_emulate_mtspr(vcpu, sprn, rs);
if (emulated == EMULATE_FAIL)
printk("mtspr: unknown spr %x\n", sprn);
break;
}
break;
@ -629,36 +272,6 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
4, 0);
break;
case 978: /* tlbwe */
emulated = kvmppc_emul_tlbwe(vcpu, inst);
break;
case 914: { /* tlbsx */
int index;
unsigned int as = get_mmucr_sts(vcpu);
unsigned int pid = get_mmucr_stid(vcpu);
rt = get_rt(inst);
ra = get_ra(inst);
rb = get_rb(inst);
rc = get_rc(inst);
ea = vcpu->arch.gpr[rb];
if (ra)
ea += vcpu->arch.gpr[ra];
index = kvmppc_44x_tlb_index(vcpu, ea, pid, as);
if (rc) {
if (index < 0)
vcpu->arch.cr &= ~0x20000000;
else
vcpu->arch.cr |= 0x20000000;
}
vcpu->arch.gpr[rt] = index;
}
break;
case 790: /* lhbrx */
rt = get_rt(inst);
emulated = kvmppc_handle_load(run, vcpu, rt, 2, 0);
@ -674,14 +287,9 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
2, 0);
break;
case 966: /* iccci */
break;
default:
printk("unknown: op %d xop %d\n", get_op(inst),
get_xop(inst));
/* Attempt core-specific emulation below. */
emulated = EMULATE_FAIL;
break;
}
break;
@ -764,12 +372,19 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
break;
default:
printk("unknown op %d\n", get_op(inst));
emulated = EMULATE_FAIL;
break;
}
KVMTRACE_3D(PPC_INSTR, vcpu, inst, vcpu->arch.pc, emulated, entryexit);
if (emulated == EMULATE_FAIL) {
emulated = kvmppc_core_emulate_op(run, vcpu, inst, &advance);
if (emulated == EMULATE_FAIL) {
advance = 0;
printk(KERN_ERR "Couldn't emulate instruction 0x%08x "
"(op %d xop %d)\n", inst, get_op(inst), get_xop(inst));
}
}
KVMTRACE_3D(PPC_INSTR, vcpu, inst, (int)vcpu->arch.pc, emulated, entryexit);
if (advance)
vcpu->arch.pc += 4; /* Advance past emulated instruction. */

View File

@ -28,9 +28,9 @@
#include <asm/uaccess.h>
#include <asm/kvm_ppc.h>
#include <asm/tlbflush.h>
#include "timing.h"
#include "../mm/mmu_decl.h"
gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn)
{
return gfn;
@ -99,14 +99,7 @@ void kvm_arch_hardware_unsetup(void)
void kvm_arch_check_processor_compat(void *rtn)
{
int r;
if (strcmp(cur_cpu_spec->platform, "ppc440") == 0)
r = 0;
else
r = -ENOTSUPP;
*(int *)rtn = r;
*(int *)rtn = kvmppc_core_check_processor_compat();
}
struct kvm *kvm_arch_create_vm(void)
@ -144,9 +137,6 @@ int kvm_dev_ioctl_check_extension(long ext)
int r;
switch (ext) {
case KVM_CAP_USER_MEMORY:
r = 1;
break;
case KVM_CAP_COALESCED_MMIO:
r = KVM_COALESCED_MMIO_PAGE_OFFSET;
break;
@ -179,30 +169,15 @@ void kvm_arch_flush_shadow(struct kvm *kvm)
struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id)
{
struct kvm_vcpu *vcpu;
int err;
vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
if (!vcpu) {
err = -ENOMEM;
goto out;
}
err = kvm_vcpu_init(vcpu, kvm, id);
if (err)
goto free_vcpu;
vcpu = kvmppc_core_vcpu_create(kvm, id);
kvmppc_create_vcpu_debugfs(vcpu, id);
return vcpu;
free_vcpu:
kmem_cache_free(kvm_vcpu_cache, vcpu);
out:
return ERR_PTR(err);
}
void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu)
{
kvm_vcpu_uninit(vcpu);
kmem_cache_free(kvm_vcpu_cache, vcpu);
kvmppc_remove_vcpu_debugfs(vcpu);
kvmppc_core_vcpu_free(vcpu);
}
void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
@ -212,16 +187,14 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
{
unsigned int priority = exception_priority[BOOKE_INTERRUPT_DECREMENTER];
return test_bit(priority, &vcpu->arch.pending_exceptions);
return kvmppc_core_pending_dec(vcpu);
}
static void kvmppc_decrementer_func(unsigned long data)
{
struct kvm_vcpu *vcpu = (struct kvm_vcpu *)data;
kvmppc_queue_exception(vcpu, BOOKE_INTERRUPT_DECREMENTER);
kvmppc_core_queue_dec(vcpu);
if (waitqueue_active(&vcpu->wq)) {
wake_up_interruptible(&vcpu->wq);
@ -242,96 +215,25 @@ void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
kvmppc_core_destroy_mmu(vcpu);
}
/* Note: clearing MSR[DE] just means that the debug interrupt will not be
* delivered *immediately*. Instead, it simply sets the appropriate DBSR bits.
* If those DBSR bits are still set when MSR[DE] is re-enabled, the interrupt
* will be delivered as an "imprecise debug event" (which is indicated by
* DBSR[IDE].
*/
static void kvmppc_disable_debug_interrupts(void)
{
mtmsr(mfmsr() & ~MSR_DE);
}
static void kvmppc_restore_host_debug_state(struct kvm_vcpu *vcpu)
{
kvmppc_disable_debug_interrupts();
mtspr(SPRN_IAC1, vcpu->arch.host_iac[0]);
mtspr(SPRN_IAC2, vcpu->arch.host_iac[1]);
mtspr(SPRN_IAC3, vcpu->arch.host_iac[2]);
mtspr(SPRN_IAC4, vcpu->arch.host_iac[3]);
mtspr(SPRN_DBCR1, vcpu->arch.host_dbcr1);
mtspr(SPRN_DBCR2, vcpu->arch.host_dbcr2);
mtspr(SPRN_DBCR0, vcpu->arch.host_dbcr0);
mtmsr(vcpu->arch.host_msr);
}
static void kvmppc_load_guest_debug_registers(struct kvm_vcpu *vcpu)
{
struct kvm_guest_debug *dbg = &vcpu->guest_debug;
u32 dbcr0 = 0;
vcpu->arch.host_msr = mfmsr();
kvmppc_disable_debug_interrupts();
/* Save host debug register state. */
vcpu->arch.host_iac[0] = mfspr(SPRN_IAC1);
vcpu->arch.host_iac[1] = mfspr(SPRN_IAC2);
vcpu->arch.host_iac[2] = mfspr(SPRN_IAC3);
vcpu->arch.host_iac[3] = mfspr(SPRN_IAC4);
vcpu->arch.host_dbcr0 = mfspr(SPRN_DBCR0);
vcpu->arch.host_dbcr1 = mfspr(SPRN_DBCR1);
vcpu->arch.host_dbcr2 = mfspr(SPRN_DBCR2);
/* set registers up for guest */
if (dbg->bp[0]) {
mtspr(SPRN_IAC1, dbg->bp[0]);
dbcr0 |= DBCR0_IAC1 | DBCR0_IDM;
}
if (dbg->bp[1]) {
mtspr(SPRN_IAC2, dbg->bp[1]);
dbcr0 |= DBCR0_IAC2 | DBCR0_IDM;
}
if (dbg->bp[2]) {
mtspr(SPRN_IAC3, dbg->bp[2]);
dbcr0 |= DBCR0_IAC3 | DBCR0_IDM;
}
if (dbg->bp[3]) {
mtspr(SPRN_IAC4, dbg->bp[3]);
dbcr0 |= DBCR0_IAC4 | DBCR0_IDM;
}
mtspr(SPRN_DBCR0, dbcr0);
mtspr(SPRN_DBCR1, 0);
mtspr(SPRN_DBCR2, 0);
}
void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
{
int i;
if (vcpu->guest_debug.enabled)
kvmppc_load_guest_debug_registers(vcpu);
kvmppc_core_load_guest_debugstate(vcpu);
/* Mark every guest entry in the shadow TLB entry modified, so that they
* will all be reloaded on the next vcpu run (instead of being
* demand-faulted). */
for (i = 0; i <= tlb_44x_hwater; i++)
kvmppc_tlbe_set_modified(vcpu, i);
kvmppc_core_vcpu_load(vcpu, cpu);
}
void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
{
if (vcpu->guest_debug.enabled)
kvmppc_restore_host_debug_state(vcpu);
kvmppc_core_load_host_debugstate(vcpu);
/* Don't leave guest TLB entries resident when being de-scheduled. */
/* XXX It would be nice to differentiate between heavyweight exit and
* sched_out here, since we could avoid the TLB flush for heavyweight
* exits. */
_tlbil_all();
kvmppc_core_vcpu_put(vcpu);
}
int kvm_arch_vcpu_ioctl_debug_guest(struct kvm_vcpu *vcpu,
@ -355,14 +257,14 @@ int kvm_arch_vcpu_ioctl_debug_guest(struct kvm_vcpu *vcpu,
static void kvmppc_complete_dcr_load(struct kvm_vcpu *vcpu,
struct kvm_run *run)
{
u32 *gpr = &vcpu->arch.gpr[vcpu->arch.io_gpr];
ulong *gpr = &vcpu->arch.gpr[vcpu->arch.io_gpr];
*gpr = run->dcr.data;
}
static void kvmppc_complete_mmio_load(struct kvm_vcpu *vcpu,
struct kvm_run *run)
{
u32 *gpr = &vcpu->arch.gpr[vcpu->arch.io_gpr];
ulong *gpr = &vcpu->arch.gpr[vcpu->arch.io_gpr];
if (run->mmio.len > sizeof(*gpr)) {
printk(KERN_ERR "bad MMIO length: %d\n", run->mmio.len);
@ -460,7 +362,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
vcpu->arch.dcr_needed = 0;
}
kvmppc_check_and_deliver_interrupts(vcpu);
kvmppc_core_deliver_interrupts(vcpu);
local_irq_disable();
kvm_guest_enter();
@ -478,7 +380,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, struct kvm_interrupt *irq)
{
kvmppc_queue_exception(vcpu, BOOKE_INTERRUPT_EXTERNAL);
kvmppc_core_queue_external(vcpu, irq);
if (waitqueue_active(&vcpu->wq)) {
wake_up_interruptible(&vcpu->wq);

239
arch/powerpc/kvm/timing.c Normal file
View File

@ -0,0 +1,239 @@
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License, version 2, as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*
* Copyright IBM Corp. 2008
*
* Authors: Hollis Blanchard <hollisb@us.ibm.com>
* Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
*/
#include <linux/kvm_host.h>
#include <linux/fs.h>
#include <linux/seq_file.h>
#include <linux/debugfs.h>
#include <linux/uaccess.h>
#include <asm/time.h>
#include <asm-generic/div64.h>
#include "timing.h"
void kvmppc_init_timing_stats(struct kvm_vcpu *vcpu)
{
int i;
/* pause guest execution to avoid concurrent updates */
local_irq_disable();
mutex_lock(&vcpu->mutex);
vcpu->arch.last_exit_type = 0xDEAD;
for (i = 0; i < __NUMBER_OF_KVM_EXIT_TYPES; i++) {
vcpu->arch.timing_count_type[i] = 0;
vcpu->arch.timing_max_duration[i] = 0;
vcpu->arch.timing_min_duration[i] = 0xFFFFFFFF;
vcpu->arch.timing_sum_duration[i] = 0;
vcpu->arch.timing_sum_quad_duration[i] = 0;
}
vcpu->arch.timing_last_exit = 0;
vcpu->arch.timing_exit.tv64 = 0;
vcpu->arch.timing_last_enter.tv64 = 0;
mutex_unlock(&vcpu->mutex);
local_irq_enable();
}
static void add_exit_timing(struct kvm_vcpu *vcpu, u64 duration, int type)
{
u64 old;
do_div(duration, tb_ticks_per_usec);
if (unlikely(duration > 0xFFFFFFFF)) {
printk(KERN_ERR"%s - duration too big -> overflow"
" duration %lld type %d exit #%d\n",
__func__, duration, type,
vcpu->arch.timing_count_type[type]);
return;
}
vcpu->arch.timing_count_type[type]++;
/* sum */
old = vcpu->arch.timing_sum_duration[type];
vcpu->arch.timing_sum_duration[type] += duration;
if (unlikely(old > vcpu->arch.timing_sum_duration[type])) {
printk(KERN_ERR"%s - wrap adding sum of durations"
" old %lld new %lld type %d exit # of type %d\n",
__func__, old, vcpu->arch.timing_sum_duration[type],
type, vcpu->arch.timing_count_type[type]);
}
/* square sum */
old = vcpu->arch.timing_sum_quad_duration[type];
vcpu->arch.timing_sum_quad_duration[type] += (duration*duration);
if (unlikely(old > vcpu->arch.timing_sum_quad_duration[type])) {
printk(KERN_ERR"%s - wrap adding sum of squared durations"
" old %lld new %lld type %d exit # of type %d\n",
__func__, old,
vcpu->arch.timing_sum_quad_duration[type],
type, vcpu->arch.timing_count_type[type]);
}
/* set min/max */
if (unlikely(duration < vcpu->arch.timing_min_duration[type]))
vcpu->arch.timing_min_duration[type] = duration;
if (unlikely(duration > vcpu->arch.timing_max_duration[type]))
vcpu->arch.timing_max_duration[type] = duration;
}
void kvmppc_update_timing_stats(struct kvm_vcpu *vcpu)
{
u64 exit = vcpu->arch.timing_last_exit;
u64 enter = vcpu->arch.timing_last_enter.tv64;
/* save exit time, used next exit when the reenter time is known */
vcpu->arch.timing_last_exit = vcpu->arch.timing_exit.tv64;
if (unlikely(vcpu->arch.last_exit_type == 0xDEAD || exit == 0))
return; /* skip incomplete cycle (e.g. after reset) */
/* update statistics for average and standard deviation */
add_exit_timing(vcpu, (enter - exit), vcpu->arch.last_exit_type);
/* enter -> timing_last_exit is time spent in guest - log this too */
add_exit_timing(vcpu, (vcpu->arch.timing_last_exit - enter),
TIMEINGUEST);
}
static const char *kvm_exit_names[__NUMBER_OF_KVM_EXIT_TYPES] = {
[MMIO_EXITS] = "MMIO",
[DCR_EXITS] = "DCR",
[SIGNAL_EXITS] = "SIGNAL",
[ITLB_REAL_MISS_EXITS] = "ITLBREAL",
[ITLB_VIRT_MISS_EXITS] = "ITLBVIRT",
[DTLB_REAL_MISS_EXITS] = "DTLBREAL",
[DTLB_VIRT_MISS_EXITS] = "DTLBVIRT",
[SYSCALL_EXITS] = "SYSCALL",
[ISI_EXITS] = "ISI",
[DSI_EXITS] = "DSI",
[EMULATED_INST_EXITS] = "EMULINST",
[EMULATED_MTMSRWE_EXITS] = "EMUL_WAIT",
[EMULATED_WRTEE_EXITS] = "EMUL_WRTEE",
[EMULATED_MTSPR_EXITS] = "EMUL_MTSPR",
[EMULATED_MFSPR_EXITS] = "EMUL_MFSPR",
[EMULATED_MTMSR_EXITS] = "EMUL_MTMSR",
[EMULATED_MFMSR_EXITS] = "EMUL_MFMSR",
[EMULATED_TLBSX_EXITS] = "EMUL_TLBSX",
[EMULATED_TLBWE_EXITS] = "EMUL_TLBWE",
[EMULATED_RFI_EXITS] = "EMUL_RFI",
[DEC_EXITS] = "DEC",
[EXT_INTR_EXITS] = "EXTINT",
[HALT_WAKEUP] = "HALT",
[USR_PR_INST] = "USR_PR_INST",
[FP_UNAVAIL] = "FP_UNAVAIL",
[DEBUG_EXITS] = "DEBUG",
[TIMEINGUEST] = "TIMEINGUEST"
};
static int kvmppc_exit_timing_show(struct seq_file *m, void *private)
{
struct kvm_vcpu *vcpu = m->private;
int i;
seq_printf(m, "%s", "type count min max sum sum_squared\n");
for (i = 0; i < __NUMBER_OF_KVM_EXIT_TYPES; i++) {
seq_printf(m, "%12s %10d %10lld %10lld %20lld %20lld\n",
kvm_exit_names[i],
vcpu->arch.timing_count_type[i],
vcpu->arch.timing_min_duration[i],
vcpu->arch.timing_max_duration[i],
vcpu->arch.timing_sum_duration[i],
vcpu->arch.timing_sum_quad_duration[i]);
}
return 0;
}
/* Write 'c' to clear the timing statistics. */
static ssize_t kvmppc_exit_timing_write(struct file *file,
const char __user *user_buf,
size_t count, loff_t *ppos)
{
int err = -EINVAL;
char c;
if (count > 1) {
goto done;
}
if (get_user(c, user_buf)) {
err = -EFAULT;
goto done;
}
if (c == 'c') {
struct seq_file *seqf = (struct seq_file *)file->private_data;
struct kvm_vcpu *vcpu = seqf->private;
/* Write does not affect our buffers previously generated with
* show. seq_file is locked here to prevent races of init with
* a show call */
mutex_lock(&seqf->lock);
kvmppc_init_timing_stats(vcpu);
mutex_unlock(&seqf->lock);
err = count;
}
done:
return err;
}
static int kvmppc_exit_timing_open(struct inode *inode, struct file *file)
{
return single_open(file, kvmppc_exit_timing_show, inode->i_private);
}
static struct file_operations kvmppc_exit_timing_fops = {
.owner = THIS_MODULE,
.open = kvmppc_exit_timing_open,
.read = seq_read,
.write = kvmppc_exit_timing_write,
.llseek = seq_lseek,
.release = single_release,
};
void kvmppc_create_vcpu_debugfs(struct kvm_vcpu *vcpu, unsigned int id)
{
static char dbg_fname[50];
struct dentry *debugfs_file;
snprintf(dbg_fname, sizeof(dbg_fname), "vm%u_vcpu%u_timing",
current->pid, id);
debugfs_file = debugfs_create_file(dbg_fname, 0666,
kvm_debugfs_dir, vcpu,
&kvmppc_exit_timing_fops);
if (!debugfs_file) {
printk(KERN_ERR"%s: error creating debugfs file %s\n",
__func__, dbg_fname);
return;
}
vcpu->arch.debugfs_exit_timing = debugfs_file;
}
void kvmppc_remove_vcpu_debugfs(struct kvm_vcpu *vcpu)
{
if (vcpu->arch.debugfs_exit_timing) {
debugfs_remove(vcpu->arch.debugfs_exit_timing);
vcpu->arch.debugfs_exit_timing = NULL;
}
}

102
arch/powerpc/kvm/timing.h Normal file
View File

@ -0,0 +1,102 @@
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License, version 2, as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*
* Copyright IBM Corp. 2008
*
* Authors: Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
*/
#ifndef __POWERPC_KVM_EXITTIMING_H__
#define __POWERPC_KVM_EXITTIMING_H__
#include <linux/kvm_host.h>
#include <asm/kvm_host.h>
#ifdef CONFIG_KVM_EXIT_TIMING
void kvmppc_init_timing_stats(struct kvm_vcpu *vcpu);
void kvmppc_update_timing_stats(struct kvm_vcpu *vcpu);
void kvmppc_create_vcpu_debugfs(struct kvm_vcpu *vcpu, unsigned int id);
void kvmppc_remove_vcpu_debugfs(struct kvm_vcpu *vcpu);
static inline void kvmppc_set_exit_type(struct kvm_vcpu *vcpu, int type)
{
vcpu->arch.last_exit_type = type;
}
#else
/* if exit timing is not configured there is no need to build the c file */
static inline void kvmppc_init_timing_stats(struct kvm_vcpu *vcpu) {}
static inline void kvmppc_update_timing_stats(struct kvm_vcpu *vcpu) {}
static inline void kvmppc_create_vcpu_debugfs(struct kvm_vcpu *vcpu,
unsigned int id) {}
static inline void kvmppc_remove_vcpu_debugfs(struct kvm_vcpu *vcpu) {}
static inline void kvmppc_set_exit_type(struct kvm_vcpu *vcpu, int type) {}
#endif /* CONFIG_KVM_EXIT_TIMING */
/* account the exit in kvm_stats */
static inline void kvmppc_account_exit_stat(struct kvm_vcpu *vcpu, int type)
{
/* type has to be known at build time for optimization */
BUILD_BUG_ON(__builtin_constant_p(type));
switch (type) {
case EXT_INTR_EXITS:
vcpu->stat.ext_intr_exits++;
break;
case DEC_EXITS:
vcpu->stat.dec_exits++;
break;
case EMULATED_INST_EXITS:
vcpu->stat.emulated_inst_exits++;
break;
case DCR_EXITS:
vcpu->stat.dcr_exits++;
break;
case DSI_EXITS:
vcpu->stat.dsi_exits++;
break;
case ISI_EXITS:
vcpu->stat.isi_exits++;
break;
case SYSCALL_EXITS:
vcpu->stat.syscall_exits++;
break;
case DTLB_REAL_MISS_EXITS:
vcpu->stat.dtlb_real_miss_exits++;
break;
case DTLB_VIRT_MISS_EXITS:
vcpu->stat.dtlb_virt_miss_exits++;
break;
case MMIO_EXITS:
vcpu->stat.mmio_exits++;
break;
case ITLB_REAL_MISS_EXITS:
vcpu->stat.itlb_real_miss_exits++;
break;
case ITLB_VIRT_MISS_EXITS:
vcpu->stat.itlb_virt_miss_exits++;
break;
case SIGNAL_EXITS:
vcpu->stat.signal_exits++;
break;
}
}
/* wrapper to set exit time and account for it in kvm_stats */
static inline void kvmppc_account_exit(struct kvm_vcpu *vcpu, int type)
{
kvmppc_set_exit_type(vcpu, type);
kvmppc_account_exit_stat(vcpu, type);
}
#endif /* __POWERPC_KVM_EXITTIMING_H__ */

View File

@ -113,8 +113,6 @@ long kvm_arch_dev_ioctl(struct file *filp,
int kvm_dev_ioctl_check_extension(long ext)
{
switch (ext) {
case KVM_CAP_USER_MEMORY:
return 1;
default:
return 0;
}
@ -185,8 +183,6 @@ struct kvm *kvm_arch_create_vm(void)
debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
VM_EVENT(kvm, 3, "%s", "vm created");
try_module_get(THIS_MODULE);
return kvm;
out_nodbf:
free_page((unsigned long)(kvm->arch.sca));
@ -196,13 +192,33 @@ struct kvm *kvm_arch_create_vm(void)
return ERR_PTR(rc);
}
void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
{
VCPU_EVENT(vcpu, 3, "%s", "free cpu");
free_page((unsigned long)(vcpu->arch.sie_block));
kvm_vcpu_uninit(vcpu);
kfree(vcpu);
}
static void kvm_free_vcpus(struct kvm *kvm)
{
unsigned int i;
for (i = 0; i < KVM_MAX_VCPUS; ++i) {
if (kvm->vcpus[i]) {
kvm_arch_vcpu_destroy(kvm->vcpus[i]);
kvm->vcpus[i] = NULL;
}
}
}
void kvm_arch_destroy_vm(struct kvm *kvm)
{
debug_unregister(kvm->arch.dbf);
kvm_free_vcpus(kvm);
kvm_free_physmem(kvm);
free_page((unsigned long)(kvm->arch.sca));
debug_unregister(kvm->arch.dbf);
kfree(kvm);
module_put(THIS_MODULE);
}
/* Section: vcpu related */
@ -213,8 +229,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
{
/* kvm common code refers to this, but does'nt call it */
BUG();
/* Nothing todo */
}
void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
@ -308,8 +323,6 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
VM_EVENT(kvm, 3, "create cpu %d at %p, sie block at %p", id, vcpu,
vcpu->arch.sie_block);
try_module_get(THIS_MODULE);
return vcpu;
out_free_cpu:
kfree(vcpu);
@ -317,14 +330,6 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
return ERR_PTR(rc);
}
void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
{
VCPU_EVENT(vcpu, 3, "%s", "destroy cpu");
free_page((unsigned long)(vcpu->arch.sie_block));
kfree(vcpu);
module_put(THIS_MODULE);
}
int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
{
/* kvm common code refers to this, but never calls it */

View File

@ -21,6 +21,7 @@
#include <asm/pvclock-abi.h>
#include <asm/desc.h>
#include <asm/mtrr.h>
#define KVM_MAX_VCPUS 16
#define KVM_MEMORY_SLOTS 32
@ -86,6 +87,7 @@
#define KVM_MIN_FREE_MMU_PAGES 5
#define KVM_REFILL_PAGES 25
#define KVM_MAX_CPUID_ENTRIES 40
#define KVM_NR_FIXED_MTRR_REGION 88
#define KVM_NR_VAR_MTRR 8
extern spinlock_t kvm_lock;
@ -180,6 +182,8 @@ struct kvm_mmu_page {
struct list_head link;
struct hlist_node hash_link;
struct list_head oos_link;
/*
* The following two entries are used to key the shadow page in the
* hash table.
@ -190,13 +194,16 @@ struct kvm_mmu_page {
u64 *spt;
/* hold the gfn of each spte inside spt */
gfn_t *gfns;
unsigned long slot_bitmap; /* One bit set per slot which has memory
* in this shadow page.
*/
/*
* One bit set per slot which has memory
* in this shadow page.
*/
DECLARE_BITMAP(slot_bitmap, KVM_MEMORY_SLOTS + KVM_PRIVATE_MEM_SLOTS);
int multimapped; /* More than one parent_pte? */
int root_count; /* Currently serving as active root */
bool unsync;
bool unsync_children;
bool global;
unsigned int unsync_children;
union {
u64 *parent_pte; /* !multimapped */
struct hlist_head parent_ptes; /* multimapped, kvm_pte_chain */
@ -327,8 +334,10 @@ struct kvm_vcpu_arch {
bool nmi_pending;
bool nmi_injected;
bool nmi_window_open;
u64 mtrr[0x100];
struct mtrr_state_type mtrr_state;
u32 pat;
};
struct kvm_mem_alias {
@ -350,11 +359,13 @@ struct kvm_arch{
*/
struct list_head active_mmu_pages;
struct list_head assigned_dev_head;
struct list_head oos_global_pages;
struct dmar_domain *intel_iommu_domain;
struct kvm_pic *vpic;
struct kvm_ioapic *vioapic;
struct kvm_pit *vpit;
struct hlist_head irq_ack_notifier_list;
int vapics_in_nmi_mode;
int round_robin_prev_vcpu;
unsigned int tss_addr;
@ -378,6 +389,7 @@ struct kvm_vm_stat {
u32 mmu_recycled;
u32 mmu_cache_miss;
u32 mmu_unsync;
u32 mmu_unsync_global;
u32 remote_tlb_flush;
u32 lpages;
};
@ -397,6 +409,7 @@ struct kvm_vcpu_stat {
u32 halt_exits;
u32 halt_wakeup;
u32 request_irq_exits;
u32 request_nmi_exits;
u32 irq_exits;
u32 host_state_reload;
u32 efer_reload;
@ -405,6 +418,7 @@ struct kvm_vcpu_stat {
u32 insn_emulation_fail;
u32 hypercalls;
u32 irq_injections;
u32 nmi_injections;
};
struct descriptor_table {
@ -477,6 +491,7 @@ struct kvm_x86_ops {
int (*set_tss_addr)(struct kvm *kvm, unsigned int addr);
int (*get_tdp_level)(void);
int (*get_mt_mask_shift)(void);
};
extern struct kvm_x86_ops *kvm_x86_ops;
@ -490,7 +505,7 @@ int kvm_mmu_setup(struct kvm_vcpu *vcpu);
void kvm_mmu_set_nonpresent_ptes(u64 trap_pte, u64 notrap_pte);
void kvm_mmu_set_base_ptes(u64 base_pte);
void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask,
u64 dirty_mask, u64 nx_mask, u64 x_mask);
u64 dirty_mask, u64 nx_mask, u64 x_mask, u64 mt_mask);
int kvm_mmu_reset_context(struct kvm_vcpu *vcpu);
void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot);
@ -587,12 +602,14 @@ unsigned long segment_base(u16 selector);
void kvm_mmu_flush_tlb(struct kvm_vcpu *vcpu);
void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
const u8 *new, int bytes);
const u8 *new, int bytes,
bool guest_initiated);
int kvm_mmu_unprotect_page_virt(struct kvm_vcpu *vcpu, gva_t gva);
void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu);
int kvm_mmu_load(struct kvm_vcpu *vcpu);
void kvm_mmu_unload(struct kvm_vcpu *vcpu);
void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu);
void kvm_mmu_sync_global(struct kvm_vcpu *vcpu);
int kvm_emulate_hypercall(struct kvm_vcpu *vcpu);
@ -607,6 +624,8 @@ void kvm_disable_tdp(void);
int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3);
int complete_pio(struct kvm_vcpu *vcpu);
struct kvm_memory_slot *gfn_to_memslot_unaliased(struct kvm *kvm, gfn_t gfn);
static inline struct kvm_mmu_page *page_header(hpa_t shadow_page)
{
struct page *page = pfn_to_page(shadow_page >> PAGE_SHIFT);
@ -702,18 +721,6 @@ static inline void kvm_inject_gp(struct kvm_vcpu *vcpu, u32 error_code)
kvm_queue_exception_e(vcpu, GP_VECTOR, error_code);
}
#define ASM_VMX_VMCLEAR_RAX ".byte 0x66, 0x0f, 0xc7, 0x30"
#define ASM_VMX_VMLAUNCH ".byte 0x0f, 0x01, 0xc2"
#define ASM_VMX_VMRESUME ".byte 0x0f, 0x01, 0xc3"
#define ASM_VMX_VMPTRLD_RAX ".byte 0x0f, 0xc7, 0x30"
#define ASM_VMX_VMREAD_RDX_RAX ".byte 0x0f, 0x78, 0xd0"
#define ASM_VMX_VMWRITE_RAX_RDX ".byte 0x0f, 0x79, 0xd0"
#define ASM_VMX_VMWRITE_RSP_RDX ".byte 0x0f, 0x79, 0xd4"
#define ASM_VMX_VMXOFF ".byte 0x0f, 0x01, 0xc4"
#define ASM_VMX_VMXON_RAX ".byte 0xf3, 0x0f, 0xc7, 0x30"
#define ASM_VMX_INVEPT ".byte 0x66, 0x0f, 0x38, 0x80, 0x08"
#define ASM_VMX_INVVPID ".byte 0x66, 0x0f, 0x38, 0x81, 0x08"
#define MSR_IA32_TIME_STAMP_COUNTER 0x010
#define TSS_IOPB_BASE_OFFSET 0x66

View File

@ -123,6 +123,7 @@ struct decode_cache {
u8 ad_bytes;
u8 rex_prefix;
struct operand src;
struct operand src2;
struct operand dst;
bool has_seg_override;
u8 seg_override;
@ -146,22 +147,18 @@ struct x86_emulate_ctxt {
/* Register state before/after emulation. */
struct kvm_vcpu *vcpu;
/* Linear faulting address (if emulating a page-faulting instruction) */
unsigned long eflags;
/* Emulated execution mode, represented by an X86EMUL_MODE value. */
int mode;
u32 cs_base;
/* decode cache */
struct decode_cache decode;
};
/* Repeat String Operation Prefix */
#define REPE_PREFIX 1
#define REPNE_PREFIX 2
#define REPE_PREFIX 1
#define REPNE_PREFIX 2
/* Execution mode, passed to the emulator. */
#define X86EMUL_MODE_REAL 0 /* Real mode. */
@ -170,7 +167,7 @@ struct x86_emulate_ctxt {
#define X86EMUL_MODE_PROT64 8 /* 64-bit (long) mode. */
/* Host execution mode. */
#if defined(__i386__)
#if defined(CONFIG_X86_32)
#define X86EMUL_MODE_HOST X86EMUL_MODE_PROT32
#elif defined(CONFIG_X86_64)
#define X86EMUL_MODE_HOST X86EMUL_MODE_PROT64

View File

@ -57,6 +57,31 @@ struct mtrr_gentry {
};
#endif /* !__i386__ */
struct mtrr_var_range {
u32 base_lo;
u32 base_hi;
u32 mask_lo;
u32 mask_hi;
};
/* In the Intel processor's MTRR interface, the MTRR type is always held in
an 8 bit field: */
typedef u8 mtrr_type;
#define MTRR_NUM_FIXED_RANGES 88
#define MTRR_MAX_VAR_RANGES 256
struct mtrr_state_type {
struct mtrr_var_range var_ranges[MTRR_MAX_VAR_RANGES];
mtrr_type fixed_ranges[MTRR_NUM_FIXED_RANGES];
unsigned char enabled;
unsigned char have_fixed;
mtrr_type def_type;
};
#define MTRRphysBase_MSR(reg) (0x200 + 2 * (reg))
#define MTRRphysMask_MSR(reg) (0x200 + 2 * (reg) + 1)
/* These are the various ioctls */
#define MTRRIOC_ADD_ENTRY _IOW(MTRR_IOCTL_BASE, 0, struct mtrr_sentry)
#define MTRRIOC_SET_ENTRY _IOW(MTRR_IOCTL_BASE, 1, struct mtrr_sentry)

View File

@ -0,0 +1,132 @@
/* CPU virtualization extensions handling
*
* This should carry the code for handling CPU virtualization extensions
* that needs to live in the kernel core.
*
* Author: Eduardo Habkost <ehabkost@redhat.com>
*
* Copyright (C) 2008, Red Hat Inc.
*
* Contains code from KVM, Copyright (C) 2006 Qumranet, Inc.
*
* This work is licensed under the terms of the GNU GPL, version 2. See
* the COPYING file in the top-level directory.
*/
#ifndef _ASM_X86_VIRTEX_H
#define _ASM_X86_VIRTEX_H
#include <asm/processor.h>
#include <asm/system.h>
#include <asm/vmx.h>
#include <asm/svm.h>
/*
* VMX functions:
*/
static inline int cpu_has_vmx(void)
{
unsigned long ecx = cpuid_ecx(1);
return test_bit(5, &ecx); /* CPUID.1:ECX.VMX[bit 5] -> VT */
}
/** Disable VMX on the current CPU
*
* vmxoff causes a undefined-opcode exception if vmxon was not run
* on the CPU previously. Only call this function if you know VMX
* is enabled.
*/
static inline void cpu_vmxoff(void)
{
asm volatile (ASM_VMX_VMXOFF : : : "cc");
write_cr4(read_cr4() & ~X86_CR4_VMXE);
}
static inline int cpu_vmx_enabled(void)
{
return read_cr4() & X86_CR4_VMXE;
}
/** Disable VMX if it is enabled on the current CPU
*
* You shouldn't call this if cpu_has_vmx() returns 0.
*/
static inline void __cpu_emergency_vmxoff(void)
{
if (cpu_vmx_enabled())
cpu_vmxoff();
}
/** Disable VMX if it is supported and enabled on the current CPU
*/
static inline void cpu_emergency_vmxoff(void)
{
if (cpu_has_vmx())
__cpu_emergency_vmxoff();
}
/*
* SVM functions:
*/
/** Check if the CPU has SVM support
*
* You can use the 'msg' arg to get a message describing the problem,
* if the function returns zero. Simply pass NULL if you are not interested
* on the messages; gcc should take care of not generating code for
* the messages on this case.
*/
static inline int cpu_has_svm(const char **msg)
{
uint32_t eax, ebx, ecx, edx;
if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD) {
if (msg)
*msg = "not amd";
return 0;
}
cpuid(0x80000000, &eax, &ebx, &ecx, &edx);
if (eax < SVM_CPUID_FUNC) {
if (msg)
*msg = "can't execute cpuid_8000000a";
return 0;
}
cpuid(0x80000001, &eax, &ebx, &ecx, &edx);
if (!(ecx & (1 << SVM_CPUID_FEATURE_SHIFT))) {
if (msg)
*msg = "svm not available";
return 0;
}
return 1;
}
/** Disable SVM on the current CPU
*
* You should call this only if cpu_has_svm() returned true.
*/
static inline void cpu_svm_disable(void)
{
uint64_t efer;
wrmsrl(MSR_VM_HSAVE_PA, 0);
rdmsrl(MSR_EFER, efer);
wrmsrl(MSR_EFER, efer & ~MSR_EFER_SVME_MASK);
}
/** Makes sure SVM is disabled, if it is supported on the CPU
*/
static inline void cpu_emergency_svm_disable(void)
{
if (cpu_has_svm(NULL))
cpu_svm_disable();
}
#endif /* _ASM_X86_VIRTEX_H */

View File

@ -63,10 +63,13 @@
#define VM_EXIT_HOST_ADDR_SPACE_SIZE 0x00000200
#define VM_EXIT_ACK_INTR_ON_EXIT 0x00008000
#define VM_EXIT_SAVE_IA32_PAT 0x00040000
#define VM_EXIT_LOAD_IA32_PAT 0x00080000
#define VM_ENTRY_IA32E_MODE 0x00000200
#define VM_ENTRY_SMM 0x00000400
#define VM_ENTRY_DEACT_DUAL_MONITOR 0x00000800
#define VM_ENTRY_LOAD_IA32_PAT 0x00004000
/* VMCS Encodings */
enum vmcs_field {
@ -112,6 +115,8 @@ enum vmcs_field {
VMCS_LINK_POINTER_HIGH = 0x00002801,
GUEST_IA32_DEBUGCTL = 0x00002802,
GUEST_IA32_DEBUGCTL_HIGH = 0x00002803,
GUEST_IA32_PAT = 0x00002804,
GUEST_IA32_PAT_HIGH = 0x00002805,
GUEST_PDPTR0 = 0x0000280a,
GUEST_PDPTR0_HIGH = 0x0000280b,
GUEST_PDPTR1 = 0x0000280c,
@ -120,6 +125,8 @@ enum vmcs_field {
GUEST_PDPTR2_HIGH = 0x0000280f,
GUEST_PDPTR3 = 0x00002810,
GUEST_PDPTR3_HIGH = 0x00002811,
HOST_IA32_PAT = 0x00002c00,
HOST_IA32_PAT_HIGH = 0x00002c01,
PIN_BASED_VM_EXEC_CONTROL = 0x00004000,
CPU_BASED_VM_EXEC_CONTROL = 0x00004002,
EXCEPTION_BITMAP = 0x00004004,
@ -331,8 +338,9 @@ enum vmcs_field {
#define AR_RESERVD_MASK 0xfffe0f00
#define APIC_ACCESS_PAGE_PRIVATE_MEMSLOT 9
#define IDENTITY_PAGETABLE_PRIVATE_MEMSLOT 10
#define TSS_PRIVATE_MEMSLOT (KVM_MEMORY_SLOTS + 0)
#define APIC_ACCESS_PAGE_PRIVATE_MEMSLOT (KVM_MEMORY_SLOTS + 1)
#define IDENTITY_PAGETABLE_PRIVATE_MEMSLOT (KVM_MEMORY_SLOTS + 2)
#define VMX_NR_VPIDS (1 << 16)
#define VMX_VPID_EXTENT_SINGLE_CONTEXT 1
@ -356,4 +364,19 @@ enum vmcs_field {
#define VMX_EPT_IDENTITY_PAGETABLE_ADDR 0xfffbc000ul
#define ASM_VMX_VMCLEAR_RAX ".byte 0x66, 0x0f, 0xc7, 0x30"
#define ASM_VMX_VMLAUNCH ".byte 0x0f, 0x01, 0xc2"
#define ASM_VMX_VMRESUME ".byte 0x0f, 0x01, 0xc3"
#define ASM_VMX_VMPTRLD_RAX ".byte 0x0f, 0xc7, 0x30"
#define ASM_VMX_VMREAD_RDX_RAX ".byte 0x0f, 0x78, 0xd0"
#define ASM_VMX_VMWRITE_RAX_RDX ".byte 0x0f, 0x79, 0xd0"
#define ASM_VMX_VMWRITE_RSP_RDX ".byte 0x0f, 0x79, 0xd4"
#define ASM_VMX_VMXOFF ".byte 0x0f, 0x01, 0xc4"
#define ASM_VMX_VMXON_RAX ".byte 0xf3, 0x0f, 0xc7, 0x30"
#define ASM_VMX_INVEPT ".byte 0x66, 0x0f, 0x38, 0x80, 0x08"
#define ASM_VMX_INVVPID ".byte 0x66, 0x0f, 0x38, 0x81, 0x08"
#endif

View File

@ -14,14 +14,6 @@
#include <asm/pat.h>
#include "mtrr.h"
struct mtrr_state {
struct mtrr_var_range var_ranges[MAX_VAR_RANGES];
mtrr_type fixed_ranges[NUM_FIXED_RANGES];
unsigned char enabled;
unsigned char have_fixed;
mtrr_type def_type;
};
struct fixed_range_block {
int base_msr; /* start address of an MTRR block */
int ranges; /* number of MTRRs in this block */
@ -35,10 +27,12 @@ static struct fixed_range_block fixed_range_blocks[] = {
};
static unsigned long smp_changes_mask;
static struct mtrr_state mtrr_state = {};
static int mtrr_state_set;
u64 mtrr_tom2;
struct mtrr_state_type mtrr_state = {};
EXPORT_SYMBOL_GPL(mtrr_state);
#undef MODULE_PARAM_PREFIX
#define MODULE_PARAM_PREFIX "mtrr."

View File

@ -49,7 +49,7 @@
u32 num_var_ranges = 0;
unsigned int mtrr_usage_table[MAX_VAR_RANGES];
unsigned int mtrr_usage_table[MTRR_MAX_VAR_RANGES];
static DEFINE_MUTEX(mtrr_mutex);
u64 size_or_mask, size_and_mask;
@ -574,7 +574,7 @@ struct mtrr_value {
unsigned long lsize;
};
static struct mtrr_value mtrr_state[MAX_VAR_RANGES];
static struct mtrr_value mtrr_state[MTRR_MAX_VAR_RANGES];
static int mtrr_save(struct sys_device * sysdev, pm_message_t state)
{

View File

@ -8,11 +8,6 @@
#define MTRRcap_MSR 0x0fe
#define MTRRdefType_MSR 0x2ff
#define MTRRphysBase_MSR(reg) (0x200 + 2 * (reg))
#define MTRRphysMask_MSR(reg) (0x200 + 2 * (reg) + 1)
#define NUM_FIXED_RANGES 88
#define MAX_VAR_RANGES 256
#define MTRRfix64K_00000_MSR 0x250
#define MTRRfix16K_80000_MSR 0x258
#define MTRRfix16K_A0000_MSR 0x259
@ -29,11 +24,7 @@
#define MTRR_CHANGE_MASK_VARIABLE 0x02
#define MTRR_CHANGE_MASK_DEFTYPE 0x04
/* In the Intel processor's MTRR interface, the MTRR type is always held in
an 8 bit field: */
typedef u8 mtrr_type;
extern unsigned int mtrr_usage_table[MAX_VAR_RANGES];
extern unsigned int mtrr_usage_table[MTRR_MAX_VAR_RANGES];
struct mtrr_ops {
u32 vendor;
@ -70,13 +61,6 @@ struct set_mtrr_context {
u32 ccr3;
};
struct mtrr_var_range {
u32 base_lo;
u32 base_hi;
u32 mask_lo;
u32 mask_hi;
};
void set_mtrr_done(struct set_mtrr_context *ctxt);
void set_mtrr_cache_disable(struct set_mtrr_context *ctxt);
void set_mtrr_prepare_save(struct set_mtrr_context *ctxt);

View File

@ -26,6 +26,7 @@
#include <linux/kdebug.h>
#include <asm/smp.h>
#include <asm/reboot.h>
#include <asm/virtext.h>
#include <mach_ipi.h>
@ -49,6 +50,15 @@ static void kdump_nmi_callback(int cpu, struct die_args *args)
#endif
crash_save_cpu(regs, cpu);
/* Disable VMX or SVM if needed.
*
* We need to disable virtualization on all CPUs.
* Having VMX or SVM enabled on any CPU may break rebooting
* after the kdump kernel has finished its task.
*/
cpu_emergency_vmxoff();
cpu_emergency_svm_disable();
disable_local_APIC();
}
@ -80,6 +90,14 @@ void native_machine_crash_shutdown(struct pt_regs *regs)
local_irq_disable();
kdump_nmi_shootdown_cpus();
/* Booting kdump kernel with VMX or SVM enabled won't work,
* because (among other limitations) we can't disable paging
* with the virt flags.
*/
cpu_emergency_vmxoff();
cpu_emergency_svm_disable();
lapic_shutdown();
#if defined(CONFIG_X86_IO_APIC)
disable_IO_APIC();

View File

@ -89,17 +89,17 @@ static cycle_t kvm_clock_read(void)
*/
static unsigned long kvm_get_tsc_khz(void)
{
return preset_lpj;
struct pvclock_vcpu_time_info *src;
src = &per_cpu(hv_clock, 0);
return pvclock_tsc_khz(src);
}
static void kvm_get_preset_lpj(void)
{
struct pvclock_vcpu_time_info *src;
unsigned long khz;
u64 lpj;
src = &per_cpu(hv_clock, 0);
khz = pvclock_tsc_khz(src);
khz = kvm_get_tsc_khz();
lpj = ((u64)khz * 1000);
do_div(lpj, HZ);
@ -194,5 +194,7 @@ void __init kvmclock_init(void)
#endif
kvm_get_preset_lpj();
clocksource_register(&kvm_clock);
pv_info.paravirt_enabled = 1;
pv_info.name = "KVM";
}
}

View File

@ -12,6 +12,7 @@
#include <asm/proto.h>
#include <asm/reboot_fixups.h>
#include <asm/reboot.h>
#include <asm/virtext.h>
#ifdef CONFIG_X86_32
# include <linux/dmi.h>
@ -39,6 +40,12 @@ int reboot_force;
static int reboot_cpu = -1;
#endif
/* This is set if we need to go through the 'emergency' path.
* When machine_emergency_restart() is called, we may be on
* an inconsistent state and won't be able to do a clean cleanup
*/
static int reboot_emergency;
/* This is set by the PCI code if either type 1 or type 2 PCI is detected */
bool port_cf9_safe = false;
@ -368,6 +375,48 @@ static inline void kb_wait(void)
}
}
static void vmxoff_nmi(int cpu, struct die_args *args)
{
cpu_emergency_vmxoff();
}
/* Use NMIs as IPIs to tell all CPUs to disable virtualization
*/
static void emergency_vmx_disable_all(void)
{
/* Just make sure we won't change CPUs while doing this */
local_irq_disable();
/* We need to disable VMX on all CPUs before rebooting, otherwise
* we risk hanging up the machine, because the CPU ignore INIT
* signals when VMX is enabled.
*
* We can't take any locks and we may be on an inconsistent
* state, so we use NMIs as IPIs to tell the other CPUs to disable
* VMX and halt.
*
* For safety, we will avoid running the nmi_shootdown_cpus()
* stuff unnecessarily, but we don't have a way to check
* if other CPUs have VMX enabled. So we will call it only if the
* CPU we are running on has VMX enabled.
*
* We will miss cases where VMX is not enabled on all CPUs. This
* shouldn't do much harm because KVM always enable VMX on all
* CPUs anyway. But we can miss it on the small window where KVM
* is still enabling VMX.
*/
if (cpu_has_vmx() && cpu_vmx_enabled()) {
/* Disable VMX on this CPU.
*/
cpu_vmxoff();
/* Halt and disable VMX on the other CPUs */
nmi_shootdown_cpus(vmxoff_nmi);
}
}
void __attribute__((weak)) mach_reboot_fixups(void)
{
}
@ -376,6 +425,9 @@ static void native_machine_emergency_restart(void)
{
int i;
if (reboot_emergency)
emergency_vmx_disable_all();
/* Tell the BIOS if we want cold or warm reboot */
*((unsigned short *)__va(0x472)) = reboot_mode;
@ -482,13 +534,19 @@ void native_machine_shutdown(void)
#endif
}
static void __machine_emergency_restart(int emergency)
{
reboot_emergency = emergency;
machine_ops.emergency_restart();
}
static void native_machine_restart(char *__unused)
{
printk("machine restart\n");
if (!reboot_force)
machine_shutdown();
machine_emergency_restart();
__machine_emergency_restart(0);
}
static void native_machine_halt(void)
@ -532,7 +590,7 @@ void machine_shutdown(void)
void machine_emergency_restart(void)
{
machine_ops.emergency_restart();
__machine_emergency_restart(1);
}
void machine_restart(char *cmd)

View File

@ -603,10 +603,29 @@ void kvm_free_pit(struct kvm *kvm)
static void __inject_pit_timer_intr(struct kvm *kvm)
{
struct kvm_vcpu *vcpu;
int i;
mutex_lock(&kvm->lock);
kvm_set_irq(kvm, kvm->arch.vpit->irq_source_id, 0, 1);
kvm_set_irq(kvm, kvm->arch.vpit->irq_source_id, 0, 0);
mutex_unlock(&kvm->lock);
/*
* Provides NMI watchdog support via Virtual Wire mode.
* The route is: PIT -> PIC -> LVT0 in NMI mode.
*
* Note: Our Virtual Wire implementation is simplified, only
* propagating PIT interrupts to all VCPUs when they have set
* LVT0 to NMI delivery. Other PIC interrupts are just sent to
* VCPU0, and only if its LVT0 is in EXTINT mode.
*/
if (kvm->arch.vapics_in_nmi_mode > 0)
for (i = 0; i < KVM_MAX_VCPUS; ++i) {
vcpu = kvm->vcpus[i];
if (vcpu)
kvm_apic_nmi_wd_deliver(vcpu);
}
}
void kvm_inject_pit_timer_irqs(struct kvm_vcpu *vcpu)

View File

@ -26,10 +26,40 @@
* Port from Qemu.
*/
#include <linux/mm.h>
#include <linux/bitops.h>
#include "irq.h"
#include <linux/kvm_host.h>
static void pic_lock(struct kvm_pic *s)
{
spin_lock(&s->lock);
}
static void pic_unlock(struct kvm_pic *s)
{
struct kvm *kvm = s->kvm;
unsigned acks = s->pending_acks;
bool wakeup = s->wakeup_needed;
struct kvm_vcpu *vcpu;
s->pending_acks = 0;
s->wakeup_needed = false;
spin_unlock(&s->lock);
while (acks) {
kvm_notify_acked_irq(kvm, __ffs(acks));
acks &= acks - 1;
}
if (wakeup) {
vcpu = s->kvm->vcpus[0];
if (vcpu)
kvm_vcpu_kick(vcpu);
}
}
static void pic_clear_isr(struct kvm_kpic_state *s, int irq)
{
s->isr &= ~(1 << irq);
@ -136,17 +166,21 @@ static void pic_update_irq(struct kvm_pic *s)
void kvm_pic_update_irq(struct kvm_pic *s)
{
pic_lock(s);
pic_update_irq(s);
pic_unlock(s);
}
void kvm_pic_set_irq(void *opaque, int irq, int level)
{
struct kvm_pic *s = opaque;
pic_lock(s);
if (irq >= 0 && irq < PIC_NUM_PINS) {
pic_set_irq1(&s->pics[irq >> 3], irq & 7, level);
pic_update_irq(s);
}
pic_unlock(s);
}
/*
@ -172,6 +206,7 @@ int kvm_pic_read_irq(struct kvm *kvm)
int irq, irq2, intno;
struct kvm_pic *s = pic_irqchip(kvm);
pic_lock(s);
irq = pic_get_irq(&s->pics[0]);
if (irq >= 0) {
pic_intack(&s->pics[0], irq);
@ -196,6 +231,7 @@ int kvm_pic_read_irq(struct kvm *kvm)
intno = s->pics[0].irq_base + irq;
}
pic_update_irq(s);
pic_unlock(s);
kvm_notify_acked_irq(kvm, irq);
return intno;
@ -203,7 +239,7 @@ int kvm_pic_read_irq(struct kvm *kvm)
void kvm_pic_reset(struct kvm_kpic_state *s)
{
int irq, irqbase;
int irq, irqbase, n;
struct kvm *kvm = s->pics_state->irq_request_opaque;
struct kvm_vcpu *vcpu0 = kvm->vcpus[0];
@ -214,8 +250,10 @@ void kvm_pic_reset(struct kvm_kpic_state *s)
for (irq = 0; irq < PIC_NUM_PINS/2; irq++) {
if (vcpu0 && kvm_apic_accept_pic_intr(vcpu0))
if (s->irr & (1 << irq) || s->isr & (1 << irq))
kvm_notify_acked_irq(kvm, irq+irqbase);
if (s->irr & (1 << irq) || s->isr & (1 << irq)) {
n = irq + irqbase;
s->pics_state->pending_acks |= 1 << n;
}
}
s->last_irr = 0;
s->irr = 0;
@ -406,6 +444,7 @@ static void picdev_write(struct kvm_io_device *this,
printk(KERN_ERR "PIC: non byte write\n");
return;
}
pic_lock(s);
switch (addr) {
case 0x20:
case 0x21:
@ -418,6 +457,7 @@ static void picdev_write(struct kvm_io_device *this,
elcr_ioport_write(&s->pics[addr & 1], addr, data);
break;
}
pic_unlock(s);
}
static void picdev_read(struct kvm_io_device *this,
@ -431,6 +471,7 @@ static void picdev_read(struct kvm_io_device *this,
printk(KERN_ERR "PIC: non byte read\n");
return;
}
pic_lock(s);
switch (addr) {
case 0x20:
case 0x21:
@ -444,6 +485,7 @@ static void picdev_read(struct kvm_io_device *this,
break;
}
*(unsigned char *)val = data;
pic_unlock(s);
}
/*
@ -459,7 +501,7 @@ static void pic_irq_request(void *opaque, int level)
s->output = level;
if (vcpu && level && (s->pics[0].isr_ack & (1 << irq))) {
s->pics[0].isr_ack &= ~(1 << irq);
kvm_vcpu_kick(vcpu);
s->wakeup_needed = true;
}
}
@ -469,6 +511,8 @@ struct kvm_pic *kvm_create_pic(struct kvm *kvm)
s = kzalloc(sizeof(struct kvm_pic), GFP_KERNEL);
if (!s)
return NULL;
spin_lock_init(&s->lock);
s->kvm = kvm;
s->pics[0].elcr_mask = 0xf8;
s->pics[1].elcr_mask = 0xde;
s->irq_request = pic_irq_request;

View File

@ -25,6 +25,7 @@
#include <linux/mm_types.h>
#include <linux/hrtimer.h>
#include <linux/kvm_host.h>
#include <linux/spinlock.h>
#include "iodev.h"
#include "ioapic.h"
@ -59,6 +60,10 @@ struct kvm_kpic_state {
};
struct kvm_pic {
spinlock_t lock;
bool wakeup_needed;
unsigned pending_acks;
struct kvm *kvm;
struct kvm_kpic_state pics[2]; /* 0 is master pic, 1 is slave pic */
irq_request_func *irq_request;
void *irq_request_opaque;
@ -87,6 +92,7 @@ void kvm_pic_reset(struct kvm_kpic_state *s);
void kvm_timer_intr_post(struct kvm_vcpu *vcpu, int vec);
void kvm_inject_pending_timer_irqs(struct kvm_vcpu *vcpu);
void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu);
void kvm_apic_nmi_wd_deliver(struct kvm_vcpu *vcpu);
void __kvm_migrate_apic_timer(struct kvm_vcpu *vcpu);
void __kvm_migrate_pit_timer(struct kvm_vcpu *vcpu);
void __kvm_migrate_timers(struct kvm_vcpu *vcpu);

View File

@ -7,7 +7,7 @@
#include <linux/kvm_host.h>
#include <asm/msr.h>
#include "svm.h"
#include <asm/svm.h>
static const u32 host_save_user_msrs[] = {
#ifdef CONFIG_X86_64

View File

@ -130,6 +130,11 @@ static inline int apic_lvtt_period(struct kvm_lapic *apic)
return apic_get_reg(apic, APIC_LVTT) & APIC_LVT_TIMER_PERIODIC;
}
static inline int apic_lvt_nmi_mode(u32 lvt_val)
{
return (lvt_val & (APIC_MODE_MASK | APIC_LVT_MASKED)) == APIC_DM_NMI;
}
static unsigned int apic_lvt_mask[APIC_LVT_NUM] = {
LVT_MASK | APIC_LVT_TIMER_PERIODIC, /* LVTT */
LVT_MASK | APIC_MODE_MASK, /* LVTTHMR */
@ -354,6 +359,7 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
case APIC_DM_NMI:
kvm_inject_nmi(vcpu);
kvm_vcpu_kick(vcpu);
break;
case APIC_DM_INIT:
@ -380,6 +386,14 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
}
break;
case APIC_DM_EXTINT:
/*
* Should only be called by kvm_apic_local_deliver() with LVT0,
* before NMI watchdog was enabled. Already handled by
* kvm_apic_accept_pic_intr().
*/
break;
default:
printk(KERN_ERR "TODO: unsupported delivery mode %x\n",
delivery_mode);
@ -663,6 +677,20 @@ static void start_apic_timer(struct kvm_lapic *apic)
apic->timer.period)));
}
static void apic_manage_nmi_watchdog(struct kvm_lapic *apic, u32 lvt0_val)
{
int nmi_wd_enabled = apic_lvt_nmi_mode(apic_get_reg(apic, APIC_LVT0));
if (apic_lvt_nmi_mode(lvt0_val)) {
if (!nmi_wd_enabled) {
apic_debug("Receive NMI setting on APIC_LVT0 "
"for cpu %d\n", apic->vcpu->vcpu_id);
apic->vcpu->kvm->arch.vapics_in_nmi_mode++;
}
} else if (nmi_wd_enabled)
apic->vcpu->kvm->arch.vapics_in_nmi_mode--;
}
static void apic_mmio_write(struct kvm_io_device *this,
gpa_t address, int len, const void *data)
{
@ -743,10 +771,11 @@ static void apic_mmio_write(struct kvm_io_device *this,
apic_set_reg(apic, APIC_ICR2, val & 0xff000000);
break;
case APIC_LVT0:
apic_manage_nmi_watchdog(apic, val);
case APIC_LVTT:
case APIC_LVTTHMR:
case APIC_LVTPC:
case APIC_LVT0:
case APIC_LVT1:
case APIC_LVTERR:
/* TODO: Check vector */
@ -961,12 +990,26 @@ int apic_has_pending_timer(struct kvm_vcpu *vcpu)
return 0;
}
static int __inject_apic_timer_irq(struct kvm_lapic *apic)
static int kvm_apic_local_deliver(struct kvm_lapic *apic, int lvt_type)
{
int vector;
u32 reg = apic_get_reg(apic, lvt_type);
int vector, mode, trig_mode;
vector = apic_lvt_vector(apic, APIC_LVTT);
return __apic_accept_irq(apic, APIC_DM_FIXED, vector, 1, 0);
if (apic_hw_enabled(apic) && !(reg & APIC_LVT_MASKED)) {
vector = reg & APIC_VECTOR_MASK;
mode = reg & APIC_MODE_MASK;
trig_mode = reg & APIC_LVT_LEVEL_TRIGGER;
return __apic_accept_irq(apic, mode, vector, 1, trig_mode);
}
return 0;
}
void kvm_apic_nmi_wd_deliver(struct kvm_vcpu *vcpu)
{
struct kvm_lapic *apic = vcpu->arch.apic;
if (apic)
kvm_apic_local_deliver(apic, APIC_LVT0);
}
static enum hrtimer_restart apic_timer_fn(struct hrtimer *data)
@ -1061,9 +1104,8 @@ void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu)
{
struct kvm_lapic *apic = vcpu->arch.apic;
if (apic && apic_lvt_enabled(apic, APIC_LVTT) &&
atomic_read(&apic->timer.pending) > 0) {
if (__inject_apic_timer_irq(apic))
if (apic && atomic_read(&apic->timer.pending) > 0) {
if (kvm_apic_local_deliver(apic, APIC_LVTT))
atomic_dec(&apic->timer.pending);
}
}

View File

@ -17,7 +17,6 @@
*
*/
#include "vmx.h"
#include "mmu.h"
#include <linux/kvm_host.h>
@ -33,6 +32,7 @@
#include <asm/page.h>
#include <asm/cmpxchg.h>
#include <asm/io.h>
#include <asm/vmx.h>
/*
* When setting this variable to true it enables Two-Dimensional-Paging
@ -168,6 +168,7 @@ static u64 __read_mostly shadow_x_mask; /* mutual exclusive with nx_mask */
static u64 __read_mostly shadow_user_mask;
static u64 __read_mostly shadow_accessed_mask;
static u64 __read_mostly shadow_dirty_mask;
static u64 __read_mostly shadow_mt_mask;
void kvm_mmu_set_nonpresent_ptes(u64 trap_pte, u64 notrap_pte)
{
@ -183,13 +184,14 @@ void kvm_mmu_set_base_ptes(u64 base_pte)
EXPORT_SYMBOL_GPL(kvm_mmu_set_base_ptes);
void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask,
u64 dirty_mask, u64 nx_mask, u64 x_mask)
u64 dirty_mask, u64 nx_mask, u64 x_mask, u64 mt_mask)
{
shadow_user_mask = user_mask;
shadow_accessed_mask = accessed_mask;
shadow_dirty_mask = dirty_mask;
shadow_nx_mask = nx_mask;
shadow_x_mask = x_mask;
shadow_mt_mask = mt_mask;
}
EXPORT_SYMBOL_GPL(kvm_mmu_set_mask_ptes);
@ -384,7 +386,9 @@ static void account_shadowed(struct kvm *kvm, gfn_t gfn)
{
int *write_count;
write_count = slot_largepage_idx(gfn, gfn_to_memslot(kvm, gfn));
gfn = unalias_gfn(kvm, gfn);
write_count = slot_largepage_idx(gfn,
gfn_to_memslot_unaliased(kvm, gfn));
*write_count += 1;
}
@ -392,16 +396,20 @@ static void unaccount_shadowed(struct kvm *kvm, gfn_t gfn)
{
int *write_count;
write_count = slot_largepage_idx(gfn, gfn_to_memslot(kvm, gfn));
gfn = unalias_gfn(kvm, gfn);
write_count = slot_largepage_idx(gfn,
gfn_to_memslot_unaliased(kvm, gfn));
*write_count -= 1;
WARN_ON(*write_count < 0);
}
static int has_wrprotected_page(struct kvm *kvm, gfn_t gfn)
{
struct kvm_memory_slot *slot = gfn_to_memslot(kvm, gfn);
struct kvm_memory_slot *slot;
int *largepage_idx;
gfn = unalias_gfn(kvm, gfn);
slot = gfn_to_memslot_unaliased(kvm, gfn);
if (slot) {
largepage_idx = slot_largepage_idx(gfn, slot);
return *largepage_idx;
@ -613,7 +621,7 @@ static u64 *rmap_next(struct kvm *kvm, unsigned long *rmapp, u64 *spte)
return NULL;
}
static void rmap_write_protect(struct kvm *kvm, u64 gfn)
static int rmap_write_protect(struct kvm *kvm, u64 gfn)
{
unsigned long *rmapp;
u64 *spte;
@ -659,8 +667,7 @@ static void rmap_write_protect(struct kvm *kvm, u64 gfn)
spte = rmap_next(kvm, rmapp, spte);
}
if (write_protected)
kvm_flush_remote_tlbs(kvm);
return write_protected;
}
static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp)
@ -786,9 +793,11 @@ static struct kvm_mmu_page *kvm_mmu_alloc_page(struct kvm_vcpu *vcpu,
sp->gfns = mmu_memory_cache_alloc(&vcpu->arch.mmu_page_cache, PAGE_SIZE);
set_page_private(virt_to_page(sp->spt), (unsigned long)sp);
list_add(&sp->link, &vcpu->kvm->arch.active_mmu_pages);
INIT_LIST_HEAD(&sp->oos_link);
ASSERT(is_empty_shadow_page(sp->spt));
sp->slot_bitmap = 0;
bitmap_zero(sp->slot_bitmap, KVM_MEMORY_SLOTS + KVM_PRIVATE_MEM_SLOTS);
sp->multimapped = 0;
sp->global = 1;
sp->parent_pte = parent_pte;
--vcpu->kvm->arch.n_free_mmu_pages;
return sp;
@ -900,8 +909,9 @@ static void kvm_mmu_update_unsync_bitmap(u64 *spte)
struct kvm_mmu_page *sp = page_header(__pa(spte));
index = spte - sp->spt;
__set_bit(index, sp->unsync_child_bitmap);
sp->unsync_children = 1;
if (!__test_and_set_bit(index, sp->unsync_child_bitmap))
sp->unsync_children++;
WARN_ON(!sp->unsync_children);
}
static void kvm_mmu_update_parents_unsync(struct kvm_mmu_page *sp)
@ -928,7 +938,6 @@ static void kvm_mmu_update_parents_unsync(struct kvm_mmu_page *sp)
static int unsync_walk_fn(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
{
sp->unsync_children = 1;
kvm_mmu_update_parents_unsync(sp);
return 1;
}
@ -959,38 +968,66 @@ static void nonpaging_invlpg(struct kvm_vcpu *vcpu, gva_t gva)
{
}
#define KVM_PAGE_ARRAY_NR 16
struct kvm_mmu_pages {
struct mmu_page_and_offset {
struct kvm_mmu_page *sp;
unsigned int idx;
} page[KVM_PAGE_ARRAY_NR];
unsigned int nr;
};
#define for_each_unsync_children(bitmap, idx) \
for (idx = find_first_bit(bitmap, 512); \
idx < 512; \
idx = find_next_bit(bitmap, 512, idx+1))
static int mmu_unsync_walk(struct kvm_mmu_page *sp,
struct kvm_unsync_walk *walker)
int mmu_pages_add(struct kvm_mmu_pages *pvec, struct kvm_mmu_page *sp,
int idx)
{
int i, ret;
int i;
if (!sp->unsync_children)
return 0;
if (sp->unsync)
for (i=0; i < pvec->nr; i++)
if (pvec->page[i].sp == sp)
return 0;
pvec->page[pvec->nr].sp = sp;
pvec->page[pvec->nr].idx = idx;
pvec->nr++;
return (pvec->nr == KVM_PAGE_ARRAY_NR);
}
static int __mmu_unsync_walk(struct kvm_mmu_page *sp,
struct kvm_mmu_pages *pvec)
{
int i, ret, nr_unsync_leaf = 0;
for_each_unsync_children(sp->unsync_child_bitmap, i) {
u64 ent = sp->spt[i];
if (is_shadow_present_pte(ent)) {
if (is_shadow_present_pte(ent) && !is_large_pte(ent)) {
struct kvm_mmu_page *child;
child = page_header(ent & PT64_BASE_ADDR_MASK);
if (child->unsync_children) {
ret = mmu_unsync_walk(child, walker);
if (ret)
if (mmu_pages_add(pvec, child, i))
return -ENOSPC;
ret = __mmu_unsync_walk(child, pvec);
if (!ret)
__clear_bit(i, sp->unsync_child_bitmap);
else if (ret > 0)
nr_unsync_leaf += ret;
else
return ret;
__clear_bit(i, sp->unsync_child_bitmap);
}
if (child->unsync) {
ret = walker->entry(child, walker);
__clear_bit(i, sp->unsync_child_bitmap);
if (ret)
return ret;
nr_unsync_leaf++;
if (mmu_pages_add(pvec, child, i))
return -ENOSPC;
}
}
}
@ -998,7 +1035,17 @@ static int mmu_unsync_walk(struct kvm_mmu_page *sp,
if (find_first_bit(sp->unsync_child_bitmap, 512) == 512)
sp->unsync_children = 0;
return 0;
return nr_unsync_leaf;
}
static int mmu_unsync_walk(struct kvm_mmu_page *sp,
struct kvm_mmu_pages *pvec)
{
if (!sp->unsync_children)
return 0;
mmu_pages_add(pvec, sp, 0);
return __mmu_unsync_walk(sp, pvec);
}
static struct kvm_mmu_page *kvm_mmu_lookup_page(struct kvm *kvm, gfn_t gfn)
@ -1021,10 +1068,18 @@ static struct kvm_mmu_page *kvm_mmu_lookup_page(struct kvm *kvm, gfn_t gfn)
return NULL;
}
static void kvm_unlink_unsync_global(struct kvm *kvm, struct kvm_mmu_page *sp)
{
list_del(&sp->oos_link);
--kvm->stat.mmu_unsync_global;
}
static void kvm_unlink_unsync_page(struct kvm *kvm, struct kvm_mmu_page *sp)
{
WARN_ON(!sp->unsync);
sp->unsync = 0;
if (sp->global)
kvm_unlink_unsync_global(kvm, sp);
--kvm->stat.mmu_unsync;
}
@ -1037,7 +1092,8 @@ static int kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
return 1;
}
rmap_write_protect(vcpu->kvm, sp->gfn);
if (rmap_write_protect(vcpu->kvm, sp->gfn))
kvm_flush_remote_tlbs(vcpu->kvm);
kvm_unlink_unsync_page(vcpu->kvm, sp);
if (vcpu->arch.mmu.sync_page(vcpu, sp)) {
kvm_mmu_zap_page(vcpu->kvm, sp);
@ -1048,30 +1104,89 @@ static int kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
return 0;
}
struct sync_walker {
struct kvm_vcpu *vcpu;
struct kvm_unsync_walk walker;
struct mmu_page_path {
struct kvm_mmu_page *parent[PT64_ROOT_LEVEL-1];
unsigned int idx[PT64_ROOT_LEVEL-1];
};
static int mmu_sync_fn(struct kvm_mmu_page *sp, struct kvm_unsync_walk *walk)
{
struct sync_walker *sync_walk = container_of(walk, struct sync_walker,
walker);
struct kvm_vcpu *vcpu = sync_walk->vcpu;
#define for_each_sp(pvec, sp, parents, i) \
for (i = mmu_pages_next(&pvec, &parents, -1), \
sp = pvec.page[i].sp; \
i < pvec.nr && ({ sp = pvec.page[i].sp; 1;}); \
i = mmu_pages_next(&pvec, &parents, i))
kvm_sync_page(vcpu, sp);
return (need_resched() || spin_needbreak(&vcpu->kvm->mmu_lock));
int mmu_pages_next(struct kvm_mmu_pages *pvec, struct mmu_page_path *parents,
int i)
{
int n;
for (n = i+1; n < pvec->nr; n++) {
struct kvm_mmu_page *sp = pvec->page[n].sp;
if (sp->role.level == PT_PAGE_TABLE_LEVEL) {
parents->idx[0] = pvec->page[n].idx;
return n;
}
parents->parent[sp->role.level-2] = sp;
parents->idx[sp->role.level-1] = pvec->page[n].idx;
}
return n;
}
static void mmu_sync_children(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
void mmu_pages_clear_parents(struct mmu_page_path *parents)
{
struct sync_walker walker = {
.walker = { .entry = mmu_sync_fn, },
.vcpu = vcpu,
};
struct kvm_mmu_page *sp;
unsigned int level = 0;
while (mmu_unsync_walk(sp, &walker.walker))
do {
unsigned int idx = parents->idx[level];
sp = parents->parent[level];
if (!sp)
return;
--sp->unsync_children;
WARN_ON((int)sp->unsync_children < 0);
__clear_bit(idx, sp->unsync_child_bitmap);
level++;
} while (level < PT64_ROOT_LEVEL-1 && !sp->unsync_children);
}
static void kvm_mmu_pages_init(struct kvm_mmu_page *parent,
struct mmu_page_path *parents,
struct kvm_mmu_pages *pvec)
{
parents->parent[parent->role.level-1] = NULL;
pvec->nr = 0;
}
static void mmu_sync_children(struct kvm_vcpu *vcpu,
struct kvm_mmu_page *parent)
{
int i;
struct kvm_mmu_page *sp;
struct mmu_page_path parents;
struct kvm_mmu_pages pages;
kvm_mmu_pages_init(parent, &parents, &pages);
while (mmu_unsync_walk(parent, &pages)) {
int protected = 0;
for_each_sp(pages, sp, parents, i)
protected |= rmap_write_protect(vcpu->kvm, sp->gfn);
if (protected)
kvm_flush_remote_tlbs(vcpu->kvm);
for_each_sp(pages, sp, parents, i) {
kvm_sync_page(vcpu, sp);
mmu_pages_clear_parents(&parents);
}
cond_resched_lock(&vcpu->kvm->mmu_lock);
kvm_mmu_pages_init(parent, &parents, &pages);
}
}
static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
@ -1129,7 +1244,8 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
sp->role = role;
hlist_add_head(&sp->hash_link, bucket);
if (!metaphysical) {
rmap_write_protect(vcpu->kvm, gfn);
if (rmap_write_protect(vcpu->kvm, gfn))
kvm_flush_remote_tlbs(vcpu->kvm);
account_shadowed(vcpu->kvm, gfn);
}
if (shadow_trap_nonpresent_pte != shadow_notrap_nonpresent_pte)
@ -1153,6 +1269,8 @@ static int walk_shadow(struct kvm_shadow_walk *walker,
if (level == PT32E_ROOT_LEVEL) {
shadow_addr = vcpu->arch.mmu.pae_root[(addr >> 30) & 3];
shadow_addr &= PT64_BASE_ADDR_MASK;
if (!shadow_addr)
return 1;
--level;
}
@ -1237,33 +1355,29 @@ static void kvm_mmu_unlink_parents(struct kvm *kvm, struct kvm_mmu_page *sp)
}
}
struct zap_walker {
struct kvm_unsync_walk walker;
struct kvm *kvm;
int zapped;
};
static int mmu_zap_fn(struct kvm_mmu_page *sp, struct kvm_unsync_walk *walk)
static int mmu_zap_unsync_children(struct kvm *kvm,
struct kvm_mmu_page *parent)
{
struct zap_walker *zap_walk = container_of(walk, struct zap_walker,
walker);
kvm_mmu_zap_page(zap_walk->kvm, sp);
zap_walk->zapped = 1;
return 0;
}
int i, zapped = 0;
struct mmu_page_path parents;
struct kvm_mmu_pages pages;
static int mmu_zap_unsync_children(struct kvm *kvm, struct kvm_mmu_page *sp)
{
struct zap_walker walker = {
.walker = { .entry = mmu_zap_fn, },
.kvm = kvm,
.zapped = 0,
};
if (sp->role.level == PT_PAGE_TABLE_LEVEL)
if (parent->role.level == PT_PAGE_TABLE_LEVEL)
return 0;
mmu_unsync_walk(sp, &walker.walker);
return walker.zapped;
kvm_mmu_pages_init(parent, &parents, &pages);
while (mmu_unsync_walk(parent, &pages)) {
struct kvm_mmu_page *sp;
for_each_sp(pages, sp, parents, i) {
kvm_mmu_zap_page(kvm, sp);
mmu_pages_clear_parents(&parents);
}
zapped += pages.nr;
kvm_mmu_pages_init(parent, &parents, &pages);
}
return zapped;
}
static int kvm_mmu_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp)
@ -1362,7 +1476,7 @@ static void page_header_update_slot(struct kvm *kvm, void *pte, gfn_t gfn)
int slot = memslot_id(kvm, gfn_to_memslot(kvm, gfn));
struct kvm_mmu_page *sp = page_header(__pa(pte));
__set_bit(slot, &sp->slot_bitmap);
__set_bit(slot, sp->slot_bitmap);
}
static void mmu_convert_notrap(struct kvm_mmu_page *sp)
@ -1393,6 +1507,110 @@ struct page *gva_to_page(struct kvm_vcpu *vcpu, gva_t gva)
return page;
}
/*
* The function is based on mtrr_type_lookup() in
* arch/x86/kernel/cpu/mtrr/generic.c
*/
static int get_mtrr_type(struct mtrr_state_type *mtrr_state,
u64 start, u64 end)
{
int i;
u64 base, mask;
u8 prev_match, curr_match;
int num_var_ranges = KVM_NR_VAR_MTRR;
if (!mtrr_state->enabled)
return 0xFF;
/* Make end inclusive end, instead of exclusive */
end--;
/* Look in fixed ranges. Just return the type as per start */
if (mtrr_state->have_fixed && (start < 0x100000)) {
int idx;
if (start < 0x80000) {
idx = 0;
idx += (start >> 16);
return mtrr_state->fixed_ranges[idx];
} else if (start < 0xC0000) {
idx = 1 * 8;
idx += ((start - 0x80000) >> 14);
return mtrr_state->fixed_ranges[idx];
} else if (start < 0x1000000) {
idx = 3 * 8;
idx += ((start - 0xC0000) >> 12);
return mtrr_state->fixed_ranges[idx];
}
}
/*
* Look in variable ranges
* Look of multiple ranges matching this address and pick type
* as per MTRR precedence
*/
if (!(mtrr_state->enabled & 2))
return mtrr_state->def_type;
prev_match = 0xFF;
for (i = 0; i < num_var_ranges; ++i) {
unsigned short start_state, end_state;
if (!(mtrr_state->var_ranges[i].mask_lo & (1 << 11)))
continue;
base = (((u64)mtrr_state->var_ranges[i].base_hi) << 32) +
(mtrr_state->var_ranges[i].base_lo & PAGE_MASK);
mask = (((u64)mtrr_state->var_ranges[i].mask_hi) << 32) +
(mtrr_state->var_ranges[i].mask_lo & PAGE_MASK);
start_state = ((start & mask) == (base & mask));
end_state = ((end & mask) == (base & mask));
if (start_state != end_state)
return 0xFE;
if ((start & mask) != (base & mask))
continue;
curr_match = mtrr_state->var_ranges[i].base_lo & 0xff;
if (prev_match == 0xFF) {
prev_match = curr_match;
continue;
}
if (prev_match == MTRR_TYPE_UNCACHABLE ||
curr_match == MTRR_TYPE_UNCACHABLE)
return MTRR_TYPE_UNCACHABLE;
if ((prev_match == MTRR_TYPE_WRBACK &&
curr_match == MTRR_TYPE_WRTHROUGH) ||
(prev_match == MTRR_TYPE_WRTHROUGH &&
curr_match == MTRR_TYPE_WRBACK)) {
prev_match = MTRR_TYPE_WRTHROUGH;
curr_match = MTRR_TYPE_WRTHROUGH;
}
if (prev_match != curr_match)
return MTRR_TYPE_UNCACHABLE;
}
if (prev_match != 0xFF)
return prev_match;
return mtrr_state->def_type;
}
static u8 get_memory_type(struct kvm_vcpu *vcpu, gfn_t gfn)
{
u8 mtrr;
mtrr = get_mtrr_type(&vcpu->arch.mtrr_state, gfn << PAGE_SHIFT,
(gfn << PAGE_SHIFT) + PAGE_SIZE);
if (mtrr == 0xfe || mtrr == 0xff)
mtrr = MTRR_TYPE_WRBACK;
return mtrr;
}
static int kvm_unsync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
{
unsigned index;
@ -1409,9 +1627,15 @@ static int kvm_unsync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
if (s->role.word != sp->role.word)
return 1;
}
kvm_mmu_mark_parents_unsync(vcpu, sp);
++vcpu->kvm->stat.mmu_unsync;
sp->unsync = 1;
if (sp->global) {
list_add(&sp->oos_link, &vcpu->kvm->arch.oos_global_pages);
++vcpu->kvm->stat.mmu_unsync_global;
} else
kvm_mmu_mark_parents_unsync(vcpu, sp);
mmu_convert_notrap(sp);
return 0;
}
@ -1437,11 +1661,24 @@ static int mmu_need_write_protect(struct kvm_vcpu *vcpu, gfn_t gfn,
static int set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte,
unsigned pte_access, int user_fault,
int write_fault, int dirty, int largepage,
gfn_t gfn, pfn_t pfn, bool speculative,
int global, gfn_t gfn, pfn_t pfn, bool speculative,
bool can_unsync)
{
u64 spte;
int ret = 0;
u64 mt_mask = shadow_mt_mask;
struct kvm_mmu_page *sp = page_header(__pa(shadow_pte));
if (!(vcpu->arch.cr4 & X86_CR4_PGE))
global = 0;
if (!global && sp->global) {
sp->global = 0;
if (sp->unsync) {
kvm_unlink_unsync_global(vcpu->kvm, sp);
kvm_mmu_mark_parents_unsync(vcpu, sp);
}
}
/*
* We don't set the accessed bit, since we sometimes want to see
* whether the guest actually used the pte (in order to detect
@ -1460,6 +1697,11 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte,
spte |= shadow_user_mask;
if (largepage)
spte |= PT_PAGE_SIZE_MASK;
if (mt_mask) {
mt_mask = get_memory_type(vcpu, gfn) <<
kvm_x86_ops->get_mt_mask_shift();
spte |= mt_mask;
}
spte |= (u64)pfn << PAGE_SHIFT;
@ -1474,6 +1716,15 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte,
spte |= PT_WRITABLE_MASK;
/*
* Optimization: for pte sync, if spte was writable the hash
* lookup is unnecessary (and expensive). Write protection
* is responsibility of mmu_get_page / kvm_sync_page.
* Same reasoning can be applied to dirty page accounting.
*/
if (!can_unsync && is_writeble_pte(*shadow_pte))
goto set_pte;
if (mmu_need_write_protect(vcpu, gfn, can_unsync)) {
pgprintk("%s: found shadow page for %lx, marking ro\n",
__func__, gfn);
@ -1495,8 +1746,8 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte,
static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte,
unsigned pt_access, unsigned pte_access,
int user_fault, int write_fault, int dirty,
int *ptwrite, int largepage, gfn_t gfn,
pfn_t pfn, bool speculative)
int *ptwrite, int largepage, int global,
gfn_t gfn, pfn_t pfn, bool speculative)
{
int was_rmapped = 0;
int was_writeble = is_writeble_pte(*shadow_pte);
@ -1529,7 +1780,7 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte,
}
}
if (set_spte(vcpu, shadow_pte, pte_access, user_fault, write_fault,
dirty, largepage, gfn, pfn, speculative, true)) {
dirty, largepage, global, gfn, pfn, speculative, true)) {
if (write_fault)
*ptwrite = 1;
kvm_x86_ops->tlb_flush(vcpu);
@ -1586,7 +1837,7 @@ static int direct_map_entry(struct kvm_shadow_walk *_walk,
|| (walk->largepage && level == PT_DIRECTORY_LEVEL)) {
mmu_set_spte(vcpu, sptep, ACC_ALL, ACC_ALL,
0, walk->write, 1, &walk->pt_write,
walk->largepage, gfn, walk->pfn, false);
walk->largepage, 0, gfn, walk->pfn, false);
++vcpu->stat.pf_fixed;
return 1;
}
@ -1773,6 +2024,15 @@ static void mmu_sync_roots(struct kvm_vcpu *vcpu)
}
}
static void mmu_sync_global(struct kvm_vcpu *vcpu)
{
struct kvm *kvm = vcpu->kvm;
struct kvm_mmu_page *sp, *n;
list_for_each_entry_safe(sp, n, &kvm->arch.oos_global_pages, oos_link)
kvm_sync_page(vcpu, sp);
}
void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu)
{
spin_lock(&vcpu->kvm->mmu_lock);
@ -1780,6 +2040,13 @@ void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu)
spin_unlock(&vcpu->kvm->mmu_lock);
}
void kvm_mmu_sync_global(struct kvm_vcpu *vcpu)
{
spin_lock(&vcpu->kvm->mmu_lock);
mmu_sync_global(vcpu);
spin_unlock(&vcpu->kvm->mmu_lock);
}
static gpa_t nonpaging_gva_to_gpa(struct kvm_vcpu *vcpu, gva_t vaddr)
{
return vaddr;
@ -2178,7 +2445,8 @@ static void kvm_mmu_access_page(struct kvm_vcpu *vcpu, gfn_t gfn)
}
void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
const u8 *new, int bytes)
const u8 *new, int bytes,
bool guest_initiated)
{
gfn_t gfn = gpa >> PAGE_SHIFT;
struct kvm_mmu_page *sp;
@ -2204,15 +2472,17 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
kvm_mmu_free_some_pages(vcpu);
++vcpu->kvm->stat.mmu_pte_write;
kvm_mmu_audit(vcpu, "pre pte write");
if (gfn == vcpu->arch.last_pt_write_gfn
&& !last_updated_pte_accessed(vcpu)) {
++vcpu->arch.last_pt_write_count;
if (vcpu->arch.last_pt_write_count >= 3)
flooded = 1;
} else {
vcpu->arch.last_pt_write_gfn = gfn;
vcpu->arch.last_pt_write_count = 1;
vcpu->arch.last_pte_updated = NULL;
if (guest_initiated) {
if (gfn == vcpu->arch.last_pt_write_gfn
&& !last_updated_pte_accessed(vcpu)) {
++vcpu->arch.last_pt_write_count;
if (vcpu->arch.last_pt_write_count >= 3)
flooded = 1;
} else {
vcpu->arch.last_pt_write_gfn = gfn;
vcpu->arch.last_pt_write_count = 1;
vcpu->arch.last_pte_updated = NULL;
}
}
index = kvm_page_table_hashfn(gfn);
bucket = &vcpu->kvm->arch.mmu_page_hash[index];
@ -2352,9 +2622,7 @@ EXPORT_SYMBOL_GPL(kvm_mmu_page_fault);
void kvm_mmu_invlpg(struct kvm_vcpu *vcpu, gva_t gva)
{
spin_lock(&vcpu->kvm->mmu_lock);
vcpu->arch.mmu.invlpg(vcpu, gva);
spin_unlock(&vcpu->kvm->mmu_lock);
kvm_mmu_flush_tlb(vcpu);
++vcpu->stat.invlpg;
}
@ -2451,7 +2719,7 @@ void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot)
int i;
u64 *pt;
if (!test_bit(slot, &sp->slot_bitmap))
if (!test_bit(slot, sp->slot_bitmap))
continue;
pt = sp->spt;
@ -2860,8 +3128,8 @@ static void audit_write_protection(struct kvm_vcpu *vcpu)
if (sp->role.metaphysical)
continue;
slot = gfn_to_memslot(vcpu->kvm, sp->gfn);
gfn = unalias_gfn(vcpu->kvm, sp->gfn);
slot = gfn_to_memslot_unaliased(vcpu->kvm, sp->gfn);
rmapp = &slot->rmap[gfn - slot->base_gfn];
if (*rmapp)
printk(KERN_ERR "%s: (%s) shadow page has writable"

View File

@ -82,6 +82,7 @@ struct shadow_walker {
int *ptwrite;
pfn_t pfn;
u64 *sptep;
gpa_t pte_gpa;
};
static gfn_t gpte_to_gfn(pt_element_t gpte)
@ -222,7 +223,7 @@ static int FNAME(walk_addr)(struct guest_walker *walker,
if (ret)
goto walk;
pte |= PT_DIRTY_MASK;
kvm_mmu_pte_write(vcpu, pte_gpa, (u8 *)&pte, sizeof(pte));
kvm_mmu_pte_write(vcpu, pte_gpa, (u8 *)&pte, sizeof(pte), 0);
walker->ptes[walker->level - 1] = pte;
}
@ -274,7 +275,8 @@ static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *page,
return;
kvm_get_pfn(pfn);
mmu_set_spte(vcpu, spte, page->role.access, pte_access, 0, 0,
gpte & PT_DIRTY_MASK, NULL, largepage, gpte_to_gfn(gpte),
gpte & PT_DIRTY_MASK, NULL, largepage,
gpte & PT_GLOBAL_MASK, gpte_to_gfn(gpte),
pfn, true);
}
@ -301,8 +303,9 @@ static int FNAME(shadow_walk_entry)(struct kvm_shadow_walk *_sw,
mmu_set_spte(vcpu, sptep, access, gw->pte_access & access,
sw->user_fault, sw->write_fault,
gw->ptes[gw->level-1] & PT_DIRTY_MASK,
sw->ptwrite, sw->largepage, gw->gfn, sw->pfn,
false);
sw->ptwrite, sw->largepage,
gw->ptes[gw->level-1] & PT_GLOBAL_MASK,
gw->gfn, sw->pfn, false);
sw->sptep = sptep;
return 1;
}
@ -466,10 +469,22 @@ static int FNAME(shadow_invlpg_entry)(struct kvm_shadow_walk *_sw,
struct kvm_vcpu *vcpu, u64 addr,
u64 *sptep, int level)
{
struct shadow_walker *sw =
container_of(_sw, struct shadow_walker, walker);
if (level == PT_PAGE_TABLE_LEVEL) {
if (is_shadow_present_pte(*sptep))
/* FIXME: properly handle invlpg on large guest pages */
if (level == PT_PAGE_TABLE_LEVEL ||
((level == PT_DIRECTORY_LEVEL) && is_large_pte(*sptep))) {
struct kvm_mmu_page *sp = page_header(__pa(sptep));
sw->pte_gpa = (sp->gfn << PAGE_SHIFT);
sw->pte_gpa += (sptep - sp->spt) * sizeof(pt_element_t);
if (is_shadow_present_pte(*sptep)) {
rmap_remove(vcpu->kvm, sptep);
if (is_large_pte(*sptep))
--vcpu->kvm->stat.lpages;
}
set_shadow_pte(sptep, shadow_trap_nonpresent_pte);
return 1;
}
@ -480,11 +495,26 @@ static int FNAME(shadow_invlpg_entry)(struct kvm_shadow_walk *_sw,
static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva)
{
pt_element_t gpte;
struct shadow_walker walker = {
.walker = { .entry = FNAME(shadow_invlpg_entry), },
.pte_gpa = -1,
};
spin_lock(&vcpu->kvm->mmu_lock);
walk_shadow(&walker.walker, vcpu, gva);
spin_unlock(&vcpu->kvm->mmu_lock);
if (walker.pte_gpa == -1)
return;
if (kvm_read_guest_atomic(vcpu->kvm, walker.pte_gpa, &gpte,
sizeof(pt_element_t)))
return;
if (is_present_pte(gpte) && (gpte & PT_ACCESSED_MASK)) {
if (mmu_topup_memory_caches(vcpu))
return;
kvm_mmu_pte_write(vcpu, walker.pte_gpa, (const u8 *)&gpte,
sizeof(pt_element_t), 0);
}
}
static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t vaddr)
@ -580,7 +610,7 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
nr_present++;
pte_access = sp->role.access & FNAME(gpte_access)(vcpu, gpte);
set_spte(vcpu, &sp->spt[i], pte_access, 0, 0,
is_dirty_pte(gpte), 0, gfn,
is_dirty_pte(gpte), 0, gpte & PT_GLOBAL_MASK, gfn,
spte_to_pfn(sp->spt[i]), true, false);
}

View File

@ -28,6 +28,8 @@
#include <asm/desc.h>
#include <asm/virtext.h>
#define __ex(x) __kvm_handle_fault_on_reboot(x)
MODULE_AUTHOR("Qumranet");
@ -245,34 +247,19 @@ static void skip_emulated_instruction(struct kvm_vcpu *vcpu)
static int has_svm(void)
{
uint32_t eax, ebx, ecx, edx;
const char *msg;
if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD) {
printk(KERN_INFO "has_svm: not amd\n");
if (!cpu_has_svm(&msg)) {
printk(KERN_INFO "has_svn: %s\n", msg);
return 0;
}
cpuid(0x80000000, &eax, &ebx, &ecx, &edx);
if (eax < SVM_CPUID_FUNC) {
printk(KERN_INFO "has_svm: can't execute cpuid_8000000a\n");
return 0;
}
cpuid(0x80000001, &eax, &ebx, &ecx, &edx);
if (!(ecx & (1 << SVM_CPUID_FEATURE_SHIFT))) {
printk(KERN_DEBUG "has_svm: svm not available\n");
return 0;
}
return 1;
}
static void svm_hardware_disable(void *garbage)
{
uint64_t efer;
wrmsrl(MSR_VM_HSAVE_PA, 0);
rdmsrl(MSR_EFER, efer);
wrmsrl(MSR_EFER, efer & ~MSR_EFER_SVME_MASK);
cpu_svm_disable();
}
static void svm_hardware_enable(void *garbage)
@ -772,6 +759,22 @@ static void svm_get_segment(struct kvm_vcpu *vcpu,
var->l = (s->attrib >> SVM_SELECTOR_L_SHIFT) & 1;
var->db = (s->attrib >> SVM_SELECTOR_DB_SHIFT) & 1;
var->g = (s->attrib >> SVM_SELECTOR_G_SHIFT) & 1;
/*
* SVM always stores 0 for the 'G' bit in the CS selector in
* the VMCB on a VMEXIT. This hurts cross-vendor migration:
* Intel's VMENTRY has a check on the 'G' bit.
*/
if (seg == VCPU_SREG_CS)
var->g = s->limit > 0xfffff;
/*
* Work around a bug where the busy flag in the tr selector
* isn't exposed
*/
if (seg == VCPU_SREG_TR)
var->type |= 0x2;
var->unusable = !var->present;
}
@ -1099,6 +1102,7 @@ static int io_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
rep = (io_info & SVM_IOIO_REP_MASK) != 0;
down = (svm->vmcb->save.rflags & X86_EFLAGS_DF) != 0;
skip_emulated_instruction(&svm->vcpu);
return kvm_emulate_pio(&svm->vcpu, kvm_run, in, size, port);
}
@ -1912,6 +1916,11 @@ static int get_npt_level(void)
#endif
}
static int svm_get_mt_mask_shift(void)
{
return 0;
}
static struct kvm_x86_ops svm_x86_ops = {
.cpu_has_kvm_support = has_svm,
.disabled_by_bios = is_disabled,
@ -1967,6 +1976,7 @@ static struct kvm_x86_ops svm_x86_ops = {
.set_tss_addr = svm_set_tss_addr,
.get_tdp_level = get_npt_level,
.get_mt_mask_shift = svm_get_mt_mask_shift,
};
static int __init svm_init(void)

View File

@ -16,7 +16,6 @@
*/
#include "irq.h"
#include "vmx.h"
#include "mmu.h"
#include <linux/kvm_host.h>
@ -31,6 +30,8 @@
#include <asm/io.h>
#include <asm/desc.h>
#include <asm/vmx.h>
#include <asm/virtext.h>
#define __ex(x) __kvm_handle_fault_on_reboot(x)
@ -90,6 +91,11 @@ struct vcpu_vmx {
} rmode;
int vpid;
bool emulation_required;
/* Support for vnmi-less CPUs */
int soft_vnmi_blocked;
ktime_t entry_time;
s64 vnmi_blocked_time;
};
static inline struct vcpu_vmx *to_vmx(struct kvm_vcpu *vcpu)
@ -122,7 +128,7 @@ static struct vmcs_config {
u32 vmentry_ctrl;
} vmcs_config;
struct vmx_capability {
static struct vmx_capability {
u32 ept;
u32 vpid;
} vmx_capability;
@ -957,6 +963,13 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data)
pr_unimpl(vcpu, "unimplemented perfctr wrmsr: 0x%x data 0x%llx\n", msr_index, data);
break;
case MSR_IA32_CR_PAT:
if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT) {
vmcs_write64(GUEST_IA32_PAT, data);
vcpu->arch.pat = data;
break;
}
/* Otherwise falls through to kvm_set_msr_common */
default:
vmx_load_host_state(vmx);
msr = find_msr_entry(vmx, msr_index);
@ -1032,8 +1045,7 @@ static int vmx_get_irq(struct kvm_vcpu *vcpu)
static __init int cpu_has_kvm_support(void)
{
unsigned long ecx = cpuid_ecx(1);
return test_bit(5, &ecx); /* CPUID.1:ECX.VMX[bit 5] -> VT */
return cpu_has_vmx();
}
static __init int vmx_disabled_by_bios(void)
@ -1079,11 +1091,20 @@ static void vmclear_local_vcpus(void)
__vcpu_clear(vmx);
}
/* Just like cpu_vmxoff(), but with the __kvm_handle_fault_on_reboot()
* tricks.
*/
static void kvm_cpu_vmxoff(void)
{
asm volatile (__ex(ASM_VMX_VMXOFF) : : : "cc");
write_cr4(read_cr4() & ~X86_CR4_VMXE);
}
static void hardware_disable(void *garbage)
{
vmclear_local_vcpus();
asm volatile (__ex(ASM_VMX_VMXOFF) : : : "cc");
write_cr4(read_cr4() & ~X86_CR4_VMXE);
kvm_cpu_vmxoff();
}
static __init int adjust_vmx_controls(u32 ctl_min, u32 ctl_opt,
@ -1176,12 +1197,13 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
#ifdef CONFIG_X86_64
min |= VM_EXIT_HOST_ADDR_SPACE_SIZE;
#endif
opt = 0;
opt = VM_EXIT_SAVE_IA32_PAT | VM_EXIT_LOAD_IA32_PAT;
if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_EXIT_CTLS,
&_vmexit_control) < 0)
return -EIO;
min = opt = 0;
min = 0;
opt = VM_ENTRY_LOAD_IA32_PAT;
if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_ENTRY_CTLS,
&_vmentry_control) < 0)
return -EIO;
@ -2087,8 +2109,9 @@ static void vmx_disable_intercept_for_msr(struct page *msr_bitmap, u32 msr)
*/
static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
{
u32 host_sysenter_cs;
u32 host_sysenter_cs, msr_low, msr_high;
u32 junk;
u64 host_pat;
unsigned long a;
struct descriptor_table dt;
int i;
@ -2176,6 +2199,20 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
rdmsrl(MSR_IA32_SYSENTER_EIP, a);
vmcs_writel(HOST_IA32_SYSENTER_EIP, a); /* 22.2.3 */
if (vmcs_config.vmexit_ctrl & VM_EXIT_LOAD_IA32_PAT) {
rdmsr(MSR_IA32_CR_PAT, msr_low, msr_high);
host_pat = msr_low | ((u64) msr_high << 32);
vmcs_write64(HOST_IA32_PAT, host_pat);
}
if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT) {
rdmsr(MSR_IA32_CR_PAT, msr_low, msr_high);
host_pat = msr_low | ((u64) msr_high << 32);
/* Write the default value follow host pat */
vmcs_write64(GUEST_IA32_PAT, host_pat);
/* Keep arch.pat sync with GUEST_IA32_PAT */
vmx->vcpu.arch.pat = host_pat;
}
for (i = 0; i < NR_VMX_MSR; ++i) {
u32 index = vmx_msr_index[i];
u32 data_low, data_high;
@ -2230,6 +2267,8 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu)
vmx->vcpu.arch.rmode.active = 0;
vmx->soft_vnmi_blocked = 0;
vmx->vcpu.arch.regs[VCPU_REGS_RDX] = get_rdx_init_val();
kvm_set_cr8(&vmx->vcpu, 0);
msr = 0xfee00000 | MSR_IA32_APICBASE_ENABLE;
@ -2335,6 +2374,29 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu)
return ret;
}
static void enable_irq_window(struct kvm_vcpu *vcpu)
{
u32 cpu_based_vm_exec_control;
cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL);
cpu_based_vm_exec_control |= CPU_BASED_VIRTUAL_INTR_PENDING;
vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control);
}
static void enable_nmi_window(struct kvm_vcpu *vcpu)
{
u32 cpu_based_vm_exec_control;
if (!cpu_has_virtual_nmis()) {
enable_irq_window(vcpu);
return;
}
cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL);
cpu_based_vm_exec_control |= CPU_BASED_VIRTUAL_NMI_PENDING;
vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control);
}
static void vmx_inject_irq(struct kvm_vcpu *vcpu, int irq)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
@ -2358,10 +2420,54 @@ static void vmx_inject_irq(struct kvm_vcpu *vcpu, int irq)
static void vmx_inject_nmi(struct kvm_vcpu *vcpu)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
if (!cpu_has_virtual_nmis()) {
/*
* Tracking the NMI-blocked state in software is built upon
* finding the next open IRQ window. This, in turn, depends on
* well-behaving guests: They have to keep IRQs disabled at
* least as long as the NMI handler runs. Otherwise we may
* cause NMI nesting, maybe breaking the guest. But as this is
* highly unlikely, we can live with the residual risk.
*/
vmx->soft_vnmi_blocked = 1;
vmx->vnmi_blocked_time = 0;
}
++vcpu->stat.nmi_injections;
if (vcpu->arch.rmode.active) {
vmx->rmode.irq.pending = true;
vmx->rmode.irq.vector = NMI_VECTOR;
vmx->rmode.irq.rip = kvm_rip_read(vcpu);
vmcs_write32(VM_ENTRY_INTR_INFO_FIELD,
NMI_VECTOR | INTR_TYPE_SOFT_INTR |
INTR_INFO_VALID_MASK);
vmcs_write32(VM_ENTRY_INSTRUCTION_LEN, 1);
kvm_rip_write(vcpu, vmx->rmode.irq.rip - 1);
return;
}
vmcs_write32(VM_ENTRY_INTR_INFO_FIELD,
INTR_TYPE_NMI_INTR | INTR_INFO_VALID_MASK | NMI_VECTOR);
}
static void vmx_update_window_states(struct kvm_vcpu *vcpu)
{
u32 guest_intr = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO);
vcpu->arch.nmi_window_open =
!(guest_intr & (GUEST_INTR_STATE_STI |
GUEST_INTR_STATE_MOV_SS |
GUEST_INTR_STATE_NMI));
if (!cpu_has_virtual_nmis() && to_vmx(vcpu)->soft_vnmi_blocked)
vcpu->arch.nmi_window_open = 0;
vcpu->arch.interrupt_window_open =
((vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF) &&
!(guest_intr & (GUEST_INTR_STATE_STI |
GUEST_INTR_STATE_MOV_SS)));
}
static void kvm_do_inject_irq(struct kvm_vcpu *vcpu)
{
int word_index = __ffs(vcpu->arch.irq_summary);
@ -2374,40 +2480,49 @@ static void kvm_do_inject_irq(struct kvm_vcpu *vcpu)
kvm_queue_interrupt(vcpu, irq);
}
static void do_interrupt_requests(struct kvm_vcpu *vcpu,
struct kvm_run *kvm_run)
{
u32 cpu_based_vm_exec_control;
vmx_update_window_states(vcpu);
vcpu->arch.interrupt_window_open =
((vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF) &&
(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & 3) == 0);
if (vcpu->arch.nmi_pending && !vcpu->arch.nmi_injected) {
if (vcpu->arch.interrupt.pending) {
enable_nmi_window(vcpu);
} else if (vcpu->arch.nmi_window_open) {
vcpu->arch.nmi_pending = false;
vcpu->arch.nmi_injected = true;
} else {
enable_nmi_window(vcpu);
return;
}
}
if (vcpu->arch.nmi_injected) {
vmx_inject_nmi(vcpu);
if (vcpu->arch.nmi_pending)
enable_nmi_window(vcpu);
else if (vcpu->arch.irq_summary
|| kvm_run->request_interrupt_window)
enable_irq_window(vcpu);
return;
}
if (vcpu->arch.interrupt_window_open &&
vcpu->arch.irq_summary && !vcpu->arch.interrupt.pending)
kvm_do_inject_irq(vcpu);
if (vcpu->arch.interrupt_window_open) {
if (vcpu->arch.irq_summary && !vcpu->arch.interrupt.pending)
kvm_do_inject_irq(vcpu);
if (vcpu->arch.interrupt_window_open && vcpu->arch.interrupt.pending)
vmx_inject_irq(vcpu, vcpu->arch.interrupt.nr);
cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL);
if (vcpu->arch.interrupt.pending)
vmx_inject_irq(vcpu, vcpu->arch.interrupt.nr);
}
if (!vcpu->arch.interrupt_window_open &&
(vcpu->arch.irq_summary || kvm_run->request_interrupt_window))
/*
* Interrupts blocked. Wait for unblock.
*/
cpu_based_vm_exec_control |= CPU_BASED_VIRTUAL_INTR_PENDING;
else
cpu_based_vm_exec_control &= ~CPU_BASED_VIRTUAL_INTR_PENDING;
vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control);
enable_irq_window(vcpu);
}
static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr)
{
int ret;
struct kvm_userspace_memory_region tss_mem = {
.slot = 8,
.slot = TSS_PRIVATE_MEMSLOT,
.guest_phys_addr = addr,
.memory_size = PAGE_SIZE * 3,
.flags = 0,
@ -2492,7 +2607,7 @@ static int handle_exception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
set_bit(irq / BITS_PER_LONG, &vcpu->arch.irq_summary);
}
if ((intr_info & INTR_INFO_INTR_TYPE_MASK) == 0x200) /* nmi */
if ((intr_info & INTR_INFO_INTR_TYPE_MASK) == INTR_TYPE_NMI_INTR)
return 1; /* already handled by vmx_vcpu_run() */
if (is_no_device(intr_info)) {
@ -2581,6 +2696,7 @@ static int handle_io(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
rep = (exit_qualification & 32) != 0;
port = exit_qualification >> 16;
skip_emulated_instruction(vcpu);
return kvm_emulate_pio(vcpu, kvm_run, in, size, port);
}
@ -2767,6 +2883,7 @@ static int handle_interrupt_window(struct kvm_vcpu *vcpu,
vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control);
KVMTRACE_0D(PEND_INTR, vcpu, handler);
++vcpu->stat.irq_window_exits;
/*
* If the user space waits to inject interrupts, exit as soon as
@ -2775,7 +2892,6 @@ static int handle_interrupt_window(struct kvm_vcpu *vcpu,
if (kvm_run->request_interrupt_window &&
!vcpu->arch.irq_summary) {
kvm_run->exit_reason = KVM_EXIT_IRQ_WINDOW_OPEN;
++vcpu->stat.irq_window_exits;
return 0;
}
return 1;
@ -2832,6 +2948,7 @@ static int handle_apic_access(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
static int handle_task_switch(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
unsigned long exit_qualification;
u16 tss_selector;
int reason;
@ -2839,6 +2956,15 @@ static int handle_task_switch(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
reason = (u32)exit_qualification >> 30;
if (reason == TASK_SWITCH_GATE && vmx->vcpu.arch.nmi_injected &&
(vmx->idt_vectoring_info & VECTORING_INFO_VALID_MASK) &&
(vmx->idt_vectoring_info & VECTORING_INFO_TYPE_MASK)
== INTR_TYPE_NMI_INTR) {
vcpu->arch.nmi_injected = false;
if (cpu_has_virtual_nmis())
vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO,
GUEST_INTR_STATE_NMI);
}
tss_selector = exit_qualification;
return kvm_task_switch(vcpu, tss_selector, reason);
@ -2927,16 +3053,12 @@ static void handle_invalid_guest_state(struct kvm_vcpu *vcpu,
while (!guest_state_valid(vcpu)) {
err = emulate_instruction(vcpu, kvm_run, 0, 0, 0);
switch (err) {
case EMULATE_DONE:
break;
case EMULATE_DO_MMIO:
kvm_report_emulation_failure(vcpu, "mmio");
/* TODO: Handle MMIO */
return;
default:
kvm_report_emulation_failure(vcpu, "emulation failure");
return;
if (err == EMULATE_DO_MMIO)
break;
if (err != EMULATE_DONE) {
kvm_report_emulation_failure(vcpu, "emulation failure");
return;
}
if (signal_pending(current))
@ -2948,8 +3070,10 @@ static void handle_invalid_guest_state(struct kvm_vcpu *vcpu,
local_irq_disable();
preempt_disable();
/* Guest state should be valid now, no more emulation should be needed */
vmx->emulation_required = 0;
/* Guest state should be valid now except if we need to
* emulate an MMIO */
if (guest_state_valid(vcpu))
vmx->emulation_required = 0;
}
/*
@ -2996,6 +3120,11 @@ static int kvm_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
KVMTRACE_3D(VMEXIT, vcpu, exit_reason, (u32)kvm_rip_read(vcpu),
(u32)((u64)kvm_rip_read(vcpu) >> 32), entryexit);
/* If we need to emulate an MMIO from handle_invalid_guest_state
* we just return 0 */
if (vmx->emulation_required && emulate_invalid_guest_state)
return 0;
/* Access CR3 don't cause VMExit in paging mode, so we need
* to sync with guest real CR3. */
if (vm_need_ept() && is_paging(vcpu)) {
@ -3012,9 +3141,32 @@ static int kvm_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
if ((vectoring_info & VECTORING_INFO_VALID_MASK) &&
(exit_reason != EXIT_REASON_EXCEPTION_NMI &&
exit_reason != EXIT_REASON_EPT_VIOLATION))
printk(KERN_WARNING "%s: unexpected, valid vectoring info and "
"exit reason is 0x%x\n", __func__, exit_reason);
exit_reason != EXIT_REASON_EPT_VIOLATION &&
exit_reason != EXIT_REASON_TASK_SWITCH))
printk(KERN_WARNING "%s: unexpected, valid vectoring info "
"(0x%x) and exit reason is 0x%x\n",
__func__, vectoring_info, exit_reason);
if (unlikely(!cpu_has_virtual_nmis() && vmx->soft_vnmi_blocked)) {
if (vcpu->arch.interrupt_window_open) {
vmx->soft_vnmi_blocked = 0;
vcpu->arch.nmi_window_open = 1;
} else if (vmx->vnmi_blocked_time > 1000000000LL &&
vcpu->arch.nmi_pending) {
/*
* This CPU don't support us in finding the end of an
* NMI-blocked window if the guest runs with IRQs
* disabled. So we pull the trigger after 1 s of
* futile waiting, but inform the user about this.
*/
printk(KERN_WARNING "%s: Breaking out of NMI-blocked "
"state on VCPU %d after 1 s timeout\n",
__func__, vcpu->vcpu_id);
vmx->soft_vnmi_blocked = 0;
vmx->vcpu.arch.nmi_window_open = 1;
}
}
if (exit_reason < kvm_vmx_max_exit_handlers
&& kvm_vmx_exit_handlers[exit_reason])
return kvm_vmx_exit_handlers[exit_reason](vcpu, kvm_run);
@ -3042,51 +3194,6 @@ static void update_tpr_threshold(struct kvm_vcpu *vcpu)
vmcs_write32(TPR_THRESHOLD, (max_irr > tpr) ? tpr >> 4 : max_irr >> 4);
}
static void enable_irq_window(struct kvm_vcpu *vcpu)
{
u32 cpu_based_vm_exec_control;
cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL);
cpu_based_vm_exec_control |= CPU_BASED_VIRTUAL_INTR_PENDING;
vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control);
}
static void enable_nmi_window(struct kvm_vcpu *vcpu)
{
u32 cpu_based_vm_exec_control;
if (!cpu_has_virtual_nmis())
return;
cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL);
cpu_based_vm_exec_control |= CPU_BASED_VIRTUAL_NMI_PENDING;
vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control);
}
static int vmx_nmi_enabled(struct kvm_vcpu *vcpu)
{
u32 guest_intr = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO);
return !(guest_intr & (GUEST_INTR_STATE_NMI |
GUEST_INTR_STATE_MOV_SS |
GUEST_INTR_STATE_STI));
}
static int vmx_irq_enabled(struct kvm_vcpu *vcpu)
{
u32 guest_intr = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO);
return (!(guest_intr & (GUEST_INTR_STATE_MOV_SS |
GUEST_INTR_STATE_STI)) &&
(vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF));
}
static void enable_intr_window(struct kvm_vcpu *vcpu)
{
if (vcpu->arch.nmi_pending)
enable_nmi_window(vcpu);
else if (kvm_cpu_has_interrupt(vcpu))
enable_irq_window(vcpu);
}
static void vmx_complete_interrupts(struct vcpu_vmx *vmx)
{
u32 exit_intr_info;
@ -3109,7 +3216,9 @@ static void vmx_complete_interrupts(struct vcpu_vmx *vmx)
if (unblock_nmi && vector != DF_VECTOR)
vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO,
GUEST_INTR_STATE_NMI);
}
} else if (unlikely(vmx->soft_vnmi_blocked))
vmx->vnmi_blocked_time +=
ktime_to_ns(ktime_sub(ktime_get(), vmx->entry_time));
idt_vectoring_info = vmx->idt_vectoring_info;
idtv_info_valid = idt_vectoring_info & VECTORING_INFO_VALID_MASK;
@ -3147,26 +3256,29 @@ static void vmx_intr_assist(struct kvm_vcpu *vcpu)
{
update_tpr_threshold(vcpu);
if (cpu_has_virtual_nmis()) {
if (vcpu->arch.nmi_pending && !vcpu->arch.nmi_injected) {
if (vcpu->arch.interrupt.pending) {
enable_nmi_window(vcpu);
} else if (vmx_nmi_enabled(vcpu)) {
vcpu->arch.nmi_pending = false;
vcpu->arch.nmi_injected = true;
} else {
enable_intr_window(vcpu);
return;
}
}
if (vcpu->arch.nmi_injected) {
vmx_inject_nmi(vcpu);
enable_intr_window(vcpu);
vmx_update_window_states(vcpu);
if (vcpu->arch.nmi_pending && !vcpu->arch.nmi_injected) {
if (vcpu->arch.interrupt.pending) {
enable_nmi_window(vcpu);
} else if (vcpu->arch.nmi_window_open) {
vcpu->arch.nmi_pending = false;
vcpu->arch.nmi_injected = true;
} else {
enable_nmi_window(vcpu);
return;
}
}
if (vcpu->arch.nmi_injected) {
vmx_inject_nmi(vcpu);
if (vcpu->arch.nmi_pending)
enable_nmi_window(vcpu);
else if (kvm_cpu_has_interrupt(vcpu))
enable_irq_window(vcpu);
return;
}
if (!vcpu->arch.interrupt.pending && kvm_cpu_has_interrupt(vcpu)) {
if (vmx_irq_enabled(vcpu))
if (vcpu->arch.interrupt_window_open)
kvm_queue_interrupt(vcpu, kvm_cpu_get_interrupt(vcpu));
else
enable_irq_window(vcpu);
@ -3174,6 +3286,8 @@ static void vmx_intr_assist(struct kvm_vcpu *vcpu)
if (vcpu->arch.interrupt.pending) {
vmx_inject_irq(vcpu, vcpu->arch.interrupt.nr);
kvm_timer_intr_post(vcpu, vcpu->arch.interrupt.nr);
if (kvm_cpu_has_interrupt(vcpu))
enable_irq_window(vcpu);
}
}
@ -3213,6 +3327,10 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
struct vcpu_vmx *vmx = to_vmx(vcpu);
u32 intr_info;
/* Record the guest's net vcpu time for enforced NMI injections. */
if (unlikely(!cpu_has_virtual_nmis() && vmx->soft_vnmi_blocked))
vmx->entry_time = ktime_get();
/* Handle invalid guest state instead of entering VMX */
if (vmx->emulation_required && emulate_invalid_guest_state) {
handle_invalid_guest_state(vcpu, kvm_run);
@ -3327,9 +3445,7 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
if (vmx->rmode.irq.pending)
fixup_rmode_irq(vmx);
vcpu->arch.interrupt_window_open =
(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) &
(GUEST_INTR_STATE_STI | GUEST_INTR_STATE_MOV_SS)) == 0;
vmx_update_window_states(vcpu);
asm("mov %0, %%ds; mov %0, %%es" : : "r"(__USER_DS));
vmx->launched = 1;
@ -3337,7 +3453,7 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
/* We need to handle NMIs before interrupts are enabled */
if ((intr_info & INTR_INFO_INTR_TYPE_MASK) == 0x200 &&
if ((intr_info & INTR_INFO_INTR_TYPE_MASK) == INTR_TYPE_NMI_INTR &&
(intr_info & INTR_INFO_VALID_MASK)) {
KVMTRACE_0D(NMI, vcpu, handler);
asm("int $2");
@ -3455,6 +3571,11 @@ static int get_ept_level(void)
return VMX_EPT_DEFAULT_GAW + 1;
}
static int vmx_get_mt_mask_shift(void)
{
return VMX_EPT_MT_EPTE_SHIFT;
}
static struct kvm_x86_ops vmx_x86_ops = {
.cpu_has_kvm_support = cpu_has_kvm_support,
.disabled_by_bios = vmx_disabled_by_bios,
@ -3510,6 +3631,7 @@ static struct kvm_x86_ops vmx_x86_ops = {
.set_tss_addr = vmx_set_tss_addr,
.get_tdp_level = get_ept_level,
.get_mt_mask_shift = vmx_get_mt_mask_shift,
};
static int __init vmx_init(void)
@ -3566,10 +3688,10 @@ static int __init vmx_init(void)
bypass_guest_pf = 0;
kvm_mmu_set_base_ptes(VMX_EPT_READABLE_MASK |
VMX_EPT_WRITABLE_MASK |
VMX_EPT_DEFAULT_MT << VMX_EPT_MT_EPTE_SHIFT |
VMX_EPT_IGMT_BIT);
kvm_mmu_set_mask_ptes(0ull, 0ull, 0ull, 0ull,
VMX_EPT_EXECUTABLE_MASK);
VMX_EPT_EXECUTABLE_MASK,
VMX_EPT_DEFAULT_MT << VMX_EPT_MT_EPTE_SHIFT);
kvm_enable_tdp();
} else
kvm_disable_tdp();

View File

@ -39,6 +39,7 @@
#include <asm/uaccess.h>
#include <asm/msr.h>
#include <asm/desc.h>
#include <asm/mtrr.h>
#define MAX_IO_MSRS 256
#define CR0_RESERVED_BITS \
@ -86,6 +87,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
{ "halt_wakeup", VCPU_STAT(halt_wakeup) },
{ "hypercalls", VCPU_STAT(hypercalls) },
{ "request_irq", VCPU_STAT(request_irq_exits) },
{ "request_nmi", VCPU_STAT(request_nmi_exits) },
{ "irq_exits", VCPU_STAT(irq_exits) },
{ "host_state_reload", VCPU_STAT(host_state_reload) },
{ "efer_reload", VCPU_STAT(efer_reload) },
@ -93,6 +95,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
{ "insn_emulation", VCPU_STAT(insn_emulation) },
{ "insn_emulation_fail", VCPU_STAT(insn_emulation_fail) },
{ "irq_injections", VCPU_STAT(irq_injections) },
{ "nmi_injections", VCPU_STAT(nmi_injections) },
{ "mmu_shadow_zapped", VM_STAT(mmu_shadow_zapped) },
{ "mmu_pte_write", VM_STAT(mmu_pte_write) },
{ "mmu_pte_updated", VM_STAT(mmu_pte_updated) },
@ -101,6 +104,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
{ "mmu_recycled", VM_STAT(mmu_recycled) },
{ "mmu_cache_miss", VM_STAT(mmu_cache_miss) },
{ "mmu_unsync", VM_STAT(mmu_unsync) },
{ "mmu_unsync_global", VM_STAT(mmu_unsync_global) },
{ "remote_tlb_flush", VM_STAT(remote_tlb_flush) },
{ "largepages", VM_STAT(lpages) },
{ NULL }
@ -312,6 +316,7 @@ void kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
kvm_x86_ops->set_cr0(vcpu, cr0);
vcpu->arch.cr0 = cr0;
kvm_mmu_sync_global(vcpu);
kvm_mmu_reset_context(vcpu);
return;
}
@ -355,6 +360,7 @@ void kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
}
kvm_x86_ops->set_cr4(vcpu, cr4);
vcpu->arch.cr4 = cr4;
kvm_mmu_sync_global(vcpu);
kvm_mmu_reset_context(vcpu);
}
EXPORT_SYMBOL_GPL(kvm_set_cr4);
@ -449,7 +455,7 @@ static u32 msrs_to_save[] = {
MSR_CSTAR, MSR_KERNEL_GS_BASE, MSR_SYSCALL_MASK, MSR_LSTAR,
#endif
MSR_IA32_TIME_STAMP_COUNTER, MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK,
MSR_IA32_PERF_STATUS,
MSR_IA32_PERF_STATUS, MSR_IA32_CR_PAT
};
static unsigned num_msrs_to_save;
@ -648,10 +654,38 @@ static bool msr_mtrr_valid(unsigned msr)
static int set_msr_mtrr(struct kvm_vcpu *vcpu, u32 msr, u64 data)
{
u64 *p = (u64 *)&vcpu->arch.mtrr_state.fixed_ranges;
if (!msr_mtrr_valid(msr))
return 1;
vcpu->arch.mtrr[msr - 0x200] = data;
if (msr == MSR_MTRRdefType) {
vcpu->arch.mtrr_state.def_type = data;
vcpu->arch.mtrr_state.enabled = (data & 0xc00) >> 10;
} else if (msr == MSR_MTRRfix64K_00000)
p[0] = data;
else if (msr == MSR_MTRRfix16K_80000 || msr == MSR_MTRRfix16K_A0000)
p[1 + msr - MSR_MTRRfix16K_80000] = data;
else if (msr >= MSR_MTRRfix4K_C0000 && msr <= MSR_MTRRfix4K_F8000)
p[3 + msr - MSR_MTRRfix4K_C0000] = data;
else if (msr == MSR_IA32_CR_PAT)
vcpu->arch.pat = data;
else { /* Variable MTRRs */
int idx, is_mtrr_mask;
u64 *pt;
idx = (msr - 0x200) / 2;
is_mtrr_mask = msr - 0x200 - 2 * idx;
if (!is_mtrr_mask)
pt =
(u64 *)&vcpu->arch.mtrr_state.var_ranges[idx].base_lo;
else
pt =
(u64 *)&vcpu->arch.mtrr_state.var_ranges[idx].mask_lo;
*pt = data;
}
kvm_mmu_reset_context(vcpu);
return 0;
}
@ -747,10 +781,37 @@ int kvm_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata)
static int get_msr_mtrr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
{
u64 *p = (u64 *)&vcpu->arch.mtrr_state.fixed_ranges;
if (!msr_mtrr_valid(msr))
return 1;
*pdata = vcpu->arch.mtrr[msr - 0x200];
if (msr == MSR_MTRRdefType)
*pdata = vcpu->arch.mtrr_state.def_type +
(vcpu->arch.mtrr_state.enabled << 10);
else if (msr == MSR_MTRRfix64K_00000)
*pdata = p[0];
else if (msr == MSR_MTRRfix16K_80000 || msr == MSR_MTRRfix16K_A0000)
*pdata = p[1 + msr - MSR_MTRRfix16K_80000];
else if (msr >= MSR_MTRRfix4K_C0000 && msr <= MSR_MTRRfix4K_F8000)
*pdata = p[3 + msr - MSR_MTRRfix4K_C0000];
else if (msr == MSR_IA32_CR_PAT)
*pdata = vcpu->arch.pat;
else { /* Variable MTRRs */
int idx, is_mtrr_mask;
u64 *pt;
idx = (msr - 0x200) / 2;
is_mtrr_mask = msr - 0x200 - 2 * idx;
if (!is_mtrr_mask)
pt =
(u64 *)&vcpu->arch.mtrr_state.var_ranges[idx].base_lo;
else
pt =
(u64 *)&vcpu->arch.mtrr_state.var_ranges[idx].mask_lo;
*pdata = *pt;
}
return 0;
}
@ -903,7 +964,6 @@ int kvm_dev_ioctl_check_extension(long ext)
case KVM_CAP_IRQCHIP:
case KVM_CAP_HLT:
case KVM_CAP_MMU_SHADOW_CACHE_CONTROL:
case KVM_CAP_USER_MEMORY:
case KVM_CAP_SET_TSS_ADDR:
case KVM_CAP_EXT_CPUID:
case KVM_CAP_CLOCKSOURCE:
@ -1188,6 +1248,7 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
int t, times = entry->eax & 0xff;
entry->flags |= KVM_CPUID_FLAG_STATEFUL_FUNC;
entry->flags |= KVM_CPUID_FLAG_STATE_READ_NEXT;
for (t = 1; t < times && *nent < maxnent; ++t) {
do_cpuid_1_ent(&entry[t], function, 0);
entry[t].flags |= KVM_CPUID_FLAG_STATEFUL_FUNC;
@ -1218,7 +1279,7 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
/* read more entries until level_type is zero */
for (i = 1; *nent < maxnent; ++i) {
level_type = entry[i - 1].ecx & 0xff;
level_type = entry[i - 1].ecx & 0xff00;
if (!level_type)
break;
do_cpuid_1_ent(&entry[i], function, i);
@ -1318,6 +1379,15 @@ static int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu,
return 0;
}
static int kvm_vcpu_ioctl_nmi(struct kvm_vcpu *vcpu)
{
vcpu_load(vcpu);
kvm_inject_nmi(vcpu);
vcpu_put(vcpu);
return 0;
}
static int vcpu_ioctl_tpr_access_reporting(struct kvm_vcpu *vcpu,
struct kvm_tpr_access_ctl *tac)
{
@ -1377,6 +1447,13 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
r = 0;
break;
}
case KVM_NMI: {
r = kvm_vcpu_ioctl_nmi(vcpu);
if (r)
goto out;
r = 0;
break;
}
case KVM_SET_CPUID: {
struct kvm_cpuid __user *cpuid_arg = argp;
struct kvm_cpuid cpuid;
@ -1968,7 +2045,7 @@ int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa,
ret = kvm_write_guest(vcpu->kvm, gpa, val, bytes);
if (ret < 0)
return 0;
kvm_mmu_pte_write(vcpu, gpa, val, bytes);
kvm_mmu_pte_write(vcpu, gpa, val, bytes, 1);
return 1;
}
@ -2404,8 +2481,6 @@ int kvm_emulate_pio(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
val = kvm_register_read(vcpu, VCPU_REGS_RAX);
memcpy(vcpu->arch.pio_data, &val, 4);
kvm_x86_ops->skip_emulated_instruction(vcpu);
pio_dev = vcpu_find_pio_dev(vcpu, port, size, !in);
if (pio_dev) {
kernel_pio(pio_dev, vcpu, vcpu->arch.pio_data);
@ -2541,7 +2616,7 @@ int kvm_arch_init(void *opaque)
kvm_mmu_set_nonpresent_ptes(0ull, 0ull);
kvm_mmu_set_base_ptes(PT_PRESENT_MASK);
kvm_mmu_set_mask_ptes(PT_USER_MASK, PT_ACCESSED_MASK,
PT_DIRTY_MASK, PT64_NX_MASK, 0);
PT_DIRTY_MASK, PT64_NX_MASK, 0, 0);
return 0;
out:
@ -2729,7 +2804,7 @@ static int move_to_next_stateful_cpuid_entry(struct kvm_vcpu *vcpu, int i)
e->flags &= ~KVM_CPUID_FLAG_STATE_READ_NEXT;
/* when no next entry is found, the current entry[i] is reselected */
for (j = i + 1; j == i; j = (j + 1) % nent) {
for (j = i + 1; ; j = (j + 1) % nent) {
struct kvm_cpuid_entry2 *ej = &vcpu->arch.cpuid_entries[j];
if (ej->function == e->function) {
ej->flags |= KVM_CPUID_FLAG_STATE_READ_NEXT;
@ -2973,7 +3048,7 @@ static int __vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
pr_debug("vcpu %d received sipi with vector # %x\n",
vcpu->vcpu_id, vcpu->arch.sipi_vector);
kvm_lapic_reset(vcpu);
r = kvm_x86_ops->vcpu_reset(vcpu);
r = kvm_arch_vcpu_reset(vcpu);
if (r)
return r;
vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
@ -3275,9 +3350,9 @@ static void seg_desct_to_kvm_desct(struct desc_struct *seg_desc, u16 selector,
kvm_desct->padding = 0;
}
static void get_segment_descritptor_dtable(struct kvm_vcpu *vcpu,
u16 selector,
struct descriptor_table *dtable)
static void get_segment_descriptor_dtable(struct kvm_vcpu *vcpu,
u16 selector,
struct descriptor_table *dtable)
{
if (selector & 1 << 2) {
struct kvm_segment kvm_seg;
@ -3302,7 +3377,7 @@ static int load_guest_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector,
struct descriptor_table dtable;
u16 index = selector >> 3;
get_segment_descritptor_dtable(vcpu, selector, &dtable);
get_segment_descriptor_dtable(vcpu, selector, &dtable);
if (dtable.limit < index * 8 + 7) {
kvm_queue_exception_e(vcpu, GP_VECTOR, selector & 0xfffc);
@ -3321,7 +3396,7 @@ static int save_guest_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector,
struct descriptor_table dtable;
u16 index = selector >> 3;
get_segment_descritptor_dtable(vcpu, selector, &dtable);
get_segment_descriptor_dtable(vcpu, selector, &dtable);
if (dtable.limit < index * 8 + 7)
return 1;
@ -3900,6 +3975,7 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
/* We do fxsave: this must be aligned. */
BUG_ON((unsigned long)&vcpu->arch.host_fx_image & 0xF);
vcpu->arch.mtrr_state.have_fixed = 1;
vcpu_load(vcpu);
r = kvm_arch_vcpu_reset(vcpu);
if (r == 0)
@ -3925,6 +4001,9 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
int kvm_arch_vcpu_reset(struct kvm_vcpu *vcpu)
{
vcpu->arch.nmi_pending = false;
vcpu->arch.nmi_injected = false;
return kvm_x86_ops->vcpu_reset(vcpu);
}
@ -4012,6 +4091,7 @@ struct kvm *kvm_arch_create_vm(void)
return ERR_PTR(-ENOMEM);
INIT_LIST_HEAD(&kvm->arch.active_mmu_pages);
INIT_LIST_HEAD(&kvm->arch.oos_global_pages);
INIT_LIST_HEAD(&kvm->arch.assigned_dev_head);
/* Reserve bit 0 of irq_sources_bitmap for userspace irq source */
@ -4048,8 +4128,8 @@ static void kvm_free_vcpus(struct kvm *kvm)
void kvm_arch_destroy_vm(struct kvm *kvm)
{
kvm_iommu_unmap_guest(kvm);
kvm_free_all_assigned_devices(kvm);
kvm_iommu_unmap_guest(kvm);
kvm_free_pit(kvm);
kfree(kvm->arch.vpic);
kfree(kvm->arch.vioapic);
@ -4127,7 +4207,8 @@ void kvm_arch_flush_shadow(struct kvm *kvm)
int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
{
return vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE
|| vcpu->arch.mp_state == KVM_MP_STATE_SIPI_RECEIVED;
|| vcpu->arch.mp_state == KVM_MP_STATE_SIPI_RECEIVED
|| vcpu->arch.nmi_pending;
}
static void vcpu_kick_intr(void *info)

View File

@ -58,6 +58,7 @@
#define SrcMem32 (4<<4) /* Memory operand (32-bit). */
#define SrcImm (5<<4) /* Immediate operand. */
#define SrcImmByte (6<<4) /* 8-bit sign-extended immediate operand. */
#define SrcOne (7<<4) /* Implied '1' */
#define SrcMask (7<<4)
/* Generic ModRM decode. */
#define ModRM (1<<7)
@ -70,17 +71,23 @@
#define Group (1<<14) /* Bits 3:5 of modrm byte extend opcode */
#define GroupDual (1<<15) /* Alternate decoding of mod == 3 */
#define GroupMask 0xff /* Group number stored in bits 0:7 */
/* Source 2 operand type */
#define Src2None (0<<29)
#define Src2CL (1<<29)
#define Src2ImmByte (2<<29)
#define Src2One (3<<29)
#define Src2Mask (7<<29)
enum {
Group1_80, Group1_81, Group1_82, Group1_83,
Group1A, Group3_Byte, Group3, Group4, Group5, Group7,
};
static u16 opcode_table[256] = {
static u32 opcode_table[256] = {
/* 0x00 - 0x07 */
ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM,
ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
0, 0, 0, 0,
ByteOp | DstAcc | SrcImm, DstAcc | SrcImm, 0, 0,
/* 0x08 - 0x0F */
ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM,
ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
@ -195,7 +202,7 @@ static u16 opcode_table[256] = {
ImplicitOps, ImplicitOps, Group | Group4, Group | Group5,
};
static u16 twobyte_table[256] = {
static u32 twobyte_table[256] = {
/* 0x00 - 0x0F */
0, Group | GroupDual | Group7, 0, 0, 0, 0, ImplicitOps, 0,
ImplicitOps, ImplicitOps, 0, 0, 0, ImplicitOps | ModRM, 0, 0,
@ -230,9 +237,14 @@ static u16 twobyte_table[256] = {
/* 0x90 - 0x9F */
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
/* 0xA0 - 0xA7 */
0, 0, 0, DstMem | SrcReg | ModRM | BitOp, 0, 0, 0, 0,
0, 0, 0, DstMem | SrcReg | ModRM | BitOp,
DstMem | SrcReg | Src2ImmByte | ModRM,
DstMem | SrcReg | Src2CL | ModRM, 0, 0,
/* 0xA8 - 0xAF */
0, 0, 0, DstMem | SrcReg | ModRM | BitOp, 0, 0, ModRM, 0,
0, 0, 0, DstMem | SrcReg | ModRM | BitOp,
DstMem | SrcReg | Src2ImmByte | ModRM,
DstMem | SrcReg | Src2CL | ModRM,
ModRM, 0,
/* 0xB0 - 0xB7 */
ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM, 0,
DstMem | SrcReg | ModRM | BitOp,
@ -253,7 +265,7 @@ static u16 twobyte_table[256] = {
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
};
static u16 group_table[] = {
static u32 group_table[] = {
[Group1_80*8] =
ByteOp | DstMem | SrcImm | ModRM, ByteOp | DstMem | SrcImm | ModRM,
ByteOp | DstMem | SrcImm | ModRM, ByteOp | DstMem | SrcImm | ModRM,
@ -297,9 +309,9 @@ static u16 group_table[] = {
SrcMem16 | ModRM | Mov, SrcMem | ModRM | ByteOp,
};
static u16 group2_table[] = {
static u32 group2_table[] = {
[Group7*8] =
SrcNone | ModRM, 0, 0, 0,
SrcNone | ModRM, 0, 0, SrcNone | ModRM,
SrcNone | ModRM | DstMem | Mov, 0,
SrcMem16 | ModRM | Mov, 0,
};
@ -359,49 +371,48 @@ static u16 group2_table[] = {
"andl %"_msk",%"_LO32 _tmp"; " \
"orl %"_LO32 _tmp",%"_sav"; "
#ifdef CONFIG_X86_64
#define ON64(x) x
#else
#define ON64(x)
#endif
#define ____emulate_2op(_op, _src, _dst, _eflags, _x, _y, _suffix) \
do { \
__asm__ __volatile__ ( \
_PRE_EFLAGS("0", "4", "2") \
_op _suffix " %"_x"3,%1; " \
_POST_EFLAGS("0", "4", "2") \
: "=m" (_eflags), "=m" ((_dst).val), \
"=&r" (_tmp) \
: _y ((_src).val), "i" (EFLAGS_MASK)); \
} while (0)
/* Raw emulation: instruction has two explicit operands. */
#define __emulate_2op_nobyte(_op,_src,_dst,_eflags,_wx,_wy,_lx,_ly,_qx,_qy) \
do { \
unsigned long _tmp; \
\
switch ((_dst).bytes) { \
case 2: \
__asm__ __volatile__ ( \
_PRE_EFLAGS("0", "4", "2") \
_op"w %"_wx"3,%1; " \
_POST_EFLAGS("0", "4", "2") \
: "=m" (_eflags), "=m" ((_dst).val), \
"=&r" (_tmp) \
: _wy ((_src).val), "i" (EFLAGS_MASK)); \
break; \
case 4: \
__asm__ __volatile__ ( \
_PRE_EFLAGS("0", "4", "2") \
_op"l %"_lx"3,%1; " \
_POST_EFLAGS("0", "4", "2") \
: "=m" (_eflags), "=m" ((_dst).val), \
"=&r" (_tmp) \
: _ly ((_src).val), "i" (EFLAGS_MASK)); \
break; \
case 8: \
__emulate_2op_8byte(_op, _src, _dst, \
_eflags, _qx, _qy); \
break; \
} \
do { \
unsigned long _tmp; \
\
switch ((_dst).bytes) { \
case 2: \
____emulate_2op(_op,_src,_dst,_eflags,_wx,_wy,"w"); \
break; \
case 4: \
____emulate_2op(_op,_src,_dst,_eflags,_lx,_ly,"l"); \
break; \
case 8: \
ON64(____emulate_2op(_op,_src,_dst,_eflags,_qx,_qy,"q")); \
break; \
} \
} while (0)
#define __emulate_2op(_op,_src,_dst,_eflags,_bx,_by,_wx,_wy,_lx,_ly,_qx,_qy) \
do { \
unsigned long __tmp; \
unsigned long _tmp; \
switch ((_dst).bytes) { \
case 1: \
__asm__ __volatile__ ( \
_PRE_EFLAGS("0", "4", "2") \
_op"b %"_bx"3,%1; " \
_POST_EFLAGS("0", "4", "2") \
: "=m" (_eflags), "=m" ((_dst).val), \
"=&r" (__tmp) \
: _by ((_src).val), "i" (EFLAGS_MASK)); \
____emulate_2op(_op,_src,_dst,_eflags,_bx,_by,"b"); \
break; \
default: \
__emulate_2op_nobyte(_op, _src, _dst, _eflags, \
@ -425,72 +436,69 @@ static u16 group2_table[] = {
__emulate_2op_nobyte(_op, _src, _dst, _eflags, \
"w", "r", _LO32, "r", "", "r")
/* Instruction has only one explicit operand (no source operand). */
#define emulate_1op(_op, _dst, _eflags) \
/* Instruction has three operands and one operand is stored in ECX register */
#define __emulate_2op_cl(_op, _cl, _src, _dst, _eflags, _suffix, _type) \
do { \
unsigned long _tmp; \
_type _clv = (_cl).val; \
_type _srcv = (_src).val; \
_type _dstv = (_dst).val; \
\
__asm__ __volatile__ ( \
_PRE_EFLAGS("0", "5", "2") \
_op _suffix " %4,%1 \n" \
_POST_EFLAGS("0", "5", "2") \
: "=m" (_eflags), "+r" (_dstv), "=&r" (_tmp) \
: "c" (_clv) , "r" (_srcv), "i" (EFLAGS_MASK) \
); \
\
(_cl).val = (unsigned long) _clv; \
(_src).val = (unsigned long) _srcv; \
(_dst).val = (unsigned long) _dstv; \
} while (0)
#define emulate_2op_cl(_op, _cl, _src, _dst, _eflags) \
do { \
switch ((_dst).bytes) { \
case 2: \
__emulate_2op_cl(_op, _cl, _src, _dst, _eflags, \
"w", unsigned short); \
break; \
case 4: \
__emulate_2op_cl(_op, _cl, _src, _dst, _eflags, \
"l", unsigned int); \
break; \
case 8: \
ON64(__emulate_2op_cl(_op, _cl, _src, _dst, _eflags, \
"q", unsigned long)); \
break; \
} \
} while (0)
#define __emulate_1op(_op, _dst, _eflags, _suffix) \
do { \
unsigned long _tmp; \
\
__asm__ __volatile__ ( \
_PRE_EFLAGS("0", "3", "2") \
_op _suffix " %1; " \
_POST_EFLAGS("0", "3", "2") \
: "=m" (_eflags), "+m" ((_dst).val), \
"=&r" (_tmp) \
: "i" (EFLAGS_MASK)); \
} while (0)
/* Instruction has only one explicit operand (no source operand). */
#define emulate_1op(_op, _dst, _eflags) \
do { \
switch ((_dst).bytes) { \
case 1: \
__asm__ __volatile__ ( \
_PRE_EFLAGS("0", "3", "2") \
_op"b %1; " \
_POST_EFLAGS("0", "3", "2") \
: "=m" (_eflags), "=m" ((_dst).val), \
"=&r" (_tmp) \
: "i" (EFLAGS_MASK)); \
break; \
case 2: \
__asm__ __volatile__ ( \
_PRE_EFLAGS("0", "3", "2") \
_op"w %1; " \
_POST_EFLAGS("0", "3", "2") \
: "=m" (_eflags), "=m" ((_dst).val), \
"=&r" (_tmp) \
: "i" (EFLAGS_MASK)); \
break; \
case 4: \
__asm__ __volatile__ ( \
_PRE_EFLAGS("0", "3", "2") \
_op"l %1; " \
_POST_EFLAGS("0", "3", "2") \
: "=m" (_eflags), "=m" ((_dst).val), \
"=&r" (_tmp) \
: "i" (EFLAGS_MASK)); \
break; \
case 8: \
__emulate_1op_8byte(_op, _dst, _eflags); \
break; \
case 1: __emulate_1op(_op, _dst, _eflags, "b"); break; \
case 2: __emulate_1op(_op, _dst, _eflags, "w"); break; \
case 4: __emulate_1op(_op, _dst, _eflags, "l"); break; \
case 8: ON64(__emulate_1op(_op, _dst, _eflags, "q")); break; \
} \
} while (0)
/* Emulate an instruction with quadword operands (x86/64 only). */
#if defined(CONFIG_X86_64)
#define __emulate_2op_8byte(_op, _src, _dst, _eflags, _qx, _qy) \
do { \
__asm__ __volatile__ ( \
_PRE_EFLAGS("0", "4", "2") \
_op"q %"_qx"3,%1; " \
_POST_EFLAGS("0", "4", "2") \
: "=m" (_eflags), "=m" ((_dst).val), "=&r" (_tmp) \
: _qy ((_src).val), "i" (EFLAGS_MASK)); \
} while (0)
#define __emulate_1op_8byte(_op, _dst, _eflags) \
do { \
__asm__ __volatile__ ( \
_PRE_EFLAGS("0", "3", "2") \
_op"q %1; " \
_POST_EFLAGS("0", "3", "2") \
: "=m" (_eflags), "=m" ((_dst).val), "=&r" (_tmp) \
: "i" (EFLAGS_MASK)); \
} while (0)
#elif defined(__i386__)
#define __emulate_2op_8byte(_op, _src, _dst, _eflags, _qx, _qy)
#define __emulate_1op_8byte(_op, _dst, _eflags)
#endif /* __i386__ */
/* Fetch next part of the instruction being emulated. */
#define insn_fetch(_type, _size, _eip) \
({ unsigned long _x; \
@ -1041,6 +1049,33 @@ x86_decode_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
c->src.bytes = 1;
c->src.val = insn_fetch(s8, 1, c->eip);
break;
case SrcOne:
c->src.bytes = 1;
c->src.val = 1;
break;
}
/*
* Decode and fetch the second source operand: register, memory
* or immediate.
*/
switch (c->d & Src2Mask) {
case Src2None:
break;
case Src2CL:
c->src2.bytes = 1;
c->src2.val = c->regs[VCPU_REGS_RCX] & 0x8;
break;
case Src2ImmByte:
c->src2.type = OP_IMM;
c->src2.ptr = (unsigned long *)c->eip;
c->src2.bytes = 1;
c->src2.val = insn_fetch(u8, 1, c->eip);
break;
case Src2One:
c->src2.bytes = 1;
c->src2.val = 1;
break;
}
/* Decode and fetch the destination operand: register or memory. */
@ -1100,20 +1135,33 @@ static inline void emulate_push(struct x86_emulate_ctxt *ctxt)
c->regs[VCPU_REGS_RSP]);
}
static int emulate_pop(struct x86_emulate_ctxt *ctxt,
struct x86_emulate_ops *ops)
{
struct decode_cache *c = &ctxt->decode;
int rc;
rc = ops->read_emulated(register_address(c, ss_base(ctxt),
c->regs[VCPU_REGS_RSP]),
&c->src.val, c->src.bytes, ctxt->vcpu);
if (rc != 0)
return rc;
register_address_increment(c, &c->regs[VCPU_REGS_RSP], c->src.bytes);
return rc;
}
static inline int emulate_grp1a(struct x86_emulate_ctxt *ctxt,
struct x86_emulate_ops *ops)
{
struct decode_cache *c = &ctxt->decode;
int rc;
rc = ops->read_std(register_address(c, ss_base(ctxt),
c->regs[VCPU_REGS_RSP]),
&c->dst.val, c->dst.bytes, ctxt->vcpu);
c->src.bytes = c->dst.bytes;
rc = emulate_pop(ctxt, ops);
if (rc != 0)
return rc;
register_address_increment(c, &c->regs[VCPU_REGS_RSP], c->dst.bytes);
c->dst.val = c->src.val;
return 0;
}
@ -1415,24 +1463,15 @@ x86_emulate_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
emulate_1op("dec", c->dst, ctxt->eflags);
break;
case 0x50 ... 0x57: /* push reg */
c->dst.type = OP_MEM;
c->dst.bytes = c->op_bytes;
c->dst.val = c->src.val;
register_address_increment(c, &c->regs[VCPU_REGS_RSP],
-c->op_bytes);
c->dst.ptr = (void *) register_address(
c, ss_base(ctxt), c->regs[VCPU_REGS_RSP]);
emulate_push(ctxt);
break;
case 0x58 ... 0x5f: /* pop reg */
pop_instruction:
if ((rc = ops->read_std(register_address(c, ss_base(ctxt),
c->regs[VCPU_REGS_RSP]), c->dst.ptr,
c->op_bytes, ctxt->vcpu)) != 0)
c->src.bytes = c->op_bytes;
rc = emulate_pop(ctxt, ops);
if (rc != 0)
goto done;
register_address_increment(c, &c->regs[VCPU_REGS_RSP],
c->op_bytes);
c->dst.type = OP_NONE; /* Disable writeback. */
c->dst.val = c->src.val;
break;
case 0x63: /* movsxd */
if (ctxt->mode != X86EMUL_MODE_PROT64)
@ -1591,7 +1630,9 @@ x86_emulate_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
emulate_push(ctxt);
break;
case 0x9d: /* popf */
c->dst.type = OP_REG;
c->dst.ptr = (unsigned long *) &ctxt->eflags;
c->dst.bytes = c->op_bytes;
goto pop_instruction;
case 0xa0 ... 0xa1: /* mov */
c->dst.ptr = (unsigned long *)&c->regs[VCPU_REGS_RAX];
@ -1689,7 +1730,9 @@ x86_emulate_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
emulate_grp2(ctxt);
break;
case 0xc3: /* ret */
c->dst.type = OP_REG;
c->dst.ptr = &c->eip;
c->dst.bytes = c->op_bytes;
goto pop_instruction;
case 0xc6 ... 0xc7: /* mov (sole member of Grp11) */
mov:
@ -1778,7 +1821,7 @@ x86_emulate_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
c->eip = saved_eip;
goto cannot_emulate;
}
return 0;
break;
case 0xf4: /* hlt */
ctxt->vcpu->arch.halt_request = 1;
break;
@ -1999,12 +2042,20 @@ x86_emulate_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
c->src.val &= (c->dst.bytes << 3) - 1;
emulate_2op_SrcV_nobyte("bt", c->src, c->dst, ctxt->eflags);
break;
case 0xa4: /* shld imm8, r, r/m */
case 0xa5: /* shld cl, r, r/m */
emulate_2op_cl("shld", c->src2, c->src, c->dst, ctxt->eflags);
break;
case 0xab:
bts: /* bts */
/* only subword offset */
c->src.val &= (c->dst.bytes << 3) - 1;
emulate_2op_SrcV_nobyte("bts", c->src, c->dst, ctxt->eflags);
break;
case 0xac: /* shrd imm8, r, r/m */
case 0xad: /* shrd cl, r, r/m */
emulate_2op_cl("shrd", c->src2, c->src, c->dst, ctxt->eflags);
break;
case 0xae: /* clflush */
break;
case 0xb0 ... 0xb1: /* cmpxchg */

View File

@ -79,9 +79,12 @@ int anon_inode_getfd(const char *name, const struct file_operations *fops,
if (IS_ERR(anon_inode_inode))
return -ENODEV;
if (fops->owner && !try_module_get(fops->owner))
return -ENOENT;
error = get_unused_fd_flags(flags);
if (error < 0)
return error;
goto err_module;
fd = error;
/*
@ -128,6 +131,8 @@ int anon_inode_getfd(const char *name, const struct file_operations *fops,
dput(dentry);
err_put_unused_fd:
put_unused_fd(fd);
err_module:
module_put(fops->owner);
return error;
}
EXPORT_SYMBOL_GPL(anon_inode_getfd);

View File

@ -83,6 +83,7 @@ struct kvm_irqchip {
#define KVM_EXIT_S390_SIEIC 13
#define KVM_EXIT_S390_RESET 14
#define KVM_EXIT_DCR 15
#define KVM_EXIT_NMI 16
/* for KVM_RUN, returned by mmap(vcpu_fd, offset=0) */
struct kvm_run {
@ -387,6 +388,14 @@ struct kvm_trace_rec {
#define KVM_CAP_DEVICE_ASSIGNMENT 17
#endif
#define KVM_CAP_IOMMU 18
#if defined(CONFIG_X86)
#define KVM_CAP_DEVICE_MSI 20
#endif
/* Bug in KVM_SET_USER_MEMORY_REGION fixed: */
#define KVM_CAP_DESTROY_MEMORY_REGION_WORKS 21
#if defined(CONFIG_X86)
#define KVM_CAP_USER_NMI 22
#endif
/*
* ioctls for VM fds
@ -458,6 +467,8 @@ struct kvm_trace_rec {
#define KVM_S390_INITIAL_RESET _IO(KVMIO, 0x97)
#define KVM_GET_MP_STATE _IOR(KVMIO, 0x98, struct kvm_mp_state)
#define KVM_SET_MP_STATE _IOW(KVMIO, 0x99, struct kvm_mp_state)
/* Available with KVM_CAP_NMI */
#define KVM_NMI _IO(KVMIO, 0x9a)
#define KVM_TRC_INJ_VIRQ (KVM_TRC_HANDLER + 0x02)
#define KVM_TRC_REDELIVER_EVT (KVM_TRC_HANDLER + 0x03)
@ -500,10 +511,17 @@ struct kvm_assigned_irq {
__u32 guest_irq;
__u32 flags;
union {
struct {
__u32 addr_lo;
__u32 addr_hi;
__u32 data;
} guest_msi;
__u32 reserved[12];
};
};
#define KVM_DEV_ASSIGN_ENABLE_IOMMU (1 << 0)
#define KVM_DEV_IRQ_ASSIGN_ENABLE_MSI (1 << 0)
#endif

View File

@ -16,6 +16,7 @@
#include <linux/mm.h>
#include <linux/preempt.h>
#include <linux/marker.h>
#include <linux/msi.h>
#include <asm/signal.h>
#include <linux/kvm.h>
@ -306,8 +307,14 @@ struct kvm_assigned_dev_kernel {
int host_busnr;
int host_devfn;
int host_irq;
bool host_irq_disabled;
int guest_irq;
int irq_requested;
struct msi_msg guest_msi;
#define KVM_ASSIGNED_DEV_GUEST_INTX (1 << 0)
#define KVM_ASSIGNED_DEV_GUEST_MSI (1 << 1)
#define KVM_ASSIGNED_DEV_HOST_INTX (1 << 8)
#define KVM_ASSIGNED_DEV_HOST_MSI (1 << 9)
unsigned long irq_requested_type;
int irq_source_id;
struct pci_dev *dev;
struct kvm *kvm;
@ -316,8 +323,7 @@ void kvm_set_irq(struct kvm *kvm, int irq_source_id, int irq, int level);
void kvm_notify_acked_irq(struct kvm *kvm, unsigned gsi);
void kvm_register_irq_ack_notifier(struct kvm *kvm,
struct kvm_irq_ack_notifier *kian);
void kvm_unregister_irq_ack_notifier(struct kvm *kvm,
struct kvm_irq_ack_notifier *kian);
void kvm_unregister_irq_ack_notifier(struct kvm_irq_ack_notifier *kian);
int kvm_request_irq_source_id(struct kvm *kvm);
void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id);

View File

@ -150,10 +150,11 @@ static int ioapic_inj_irq(struct kvm_ioapic *ioapic,
static void ioapic_inj_nmi(struct kvm_vcpu *vcpu)
{
kvm_inject_nmi(vcpu);
kvm_vcpu_kick(vcpu);
}
static u32 ioapic_get_delivery_bitmask(struct kvm_ioapic *ioapic, u8 dest,
u8 dest_mode)
u32 kvm_ioapic_get_delivery_bitmask(struct kvm_ioapic *ioapic, u8 dest,
u8 dest_mode)
{
u32 mask = 0;
int i;
@ -207,7 +208,8 @@ static int ioapic_deliver(struct kvm_ioapic *ioapic, int irq)
"vector=%x trig_mode=%x\n",
dest, dest_mode, delivery_mode, vector, trig_mode);
deliver_bitmask = ioapic_get_delivery_bitmask(ioapic, dest, dest_mode);
deliver_bitmask = kvm_ioapic_get_delivery_bitmask(ioapic, dest,
dest_mode);
if (!deliver_bitmask) {
ioapic_debug("no target on destination\n");
return 0;

View File

@ -85,5 +85,7 @@ void kvm_ioapic_update_eoi(struct kvm *kvm, int vector, int trigger_mode);
int kvm_ioapic_init(struct kvm *kvm);
void kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level);
void kvm_ioapic_reset(struct kvm_ioapic *ioapic);
u32 kvm_ioapic_get_delivery_bitmask(struct kvm_ioapic *ioapic, u8 dest,
u8 dest_mode);
#endif

View File

@ -61,10 +61,9 @@ void kvm_register_irq_ack_notifier(struct kvm *kvm,
hlist_add_head(&kian->link, &kvm->arch.irq_ack_notifier_list);
}
void kvm_unregister_irq_ack_notifier(struct kvm *kvm,
struct kvm_irq_ack_notifier *kian)
void kvm_unregister_irq_ack_notifier(struct kvm_irq_ack_notifier *kian)
{
hlist_del(&kian->link);
hlist_del_init(&kian->link);
}
/* The caller must hold kvm->lock mutex */
@ -73,11 +72,15 @@ int kvm_request_irq_source_id(struct kvm *kvm)
unsigned long *bitmap = &kvm->arch.irq_sources_bitmap;
int irq_source_id = find_first_zero_bit(bitmap,
sizeof(kvm->arch.irq_sources_bitmap));
if (irq_source_id >= sizeof(kvm->arch.irq_sources_bitmap)) {
printk(KERN_WARNING "kvm: exhaust allocatable IRQ sources!\n");
irq_source_id = -EFAULT;
} else
set_bit(irq_source_id, bitmap);
return -EFAULT;
}
ASSERT(irq_source_id != KVM_USERSPACE_IRQ_SOURCE_ID);
set_bit(irq_source_id, bitmap);
return irq_source_id;
}
@ -85,7 +88,9 @@ void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id)
{
int i;
if (irq_source_id <= 0 ||
ASSERT(irq_source_id != KVM_USERSPACE_IRQ_SOURCE_ID);
if (irq_source_id < 0 ||
irq_source_id >= sizeof(kvm->arch.irq_sources_bitmap)) {
printk(KERN_ERR "kvm: IRQ source ID out of range!\n");
return;

View File

@ -47,6 +47,10 @@
#include <asm/uaccess.h>
#include <asm/pgtable.h>
#ifdef CONFIG_X86
#include <asm/msidef.h>
#endif
#ifdef KVM_COALESCED_MMIO_PAGE_OFFSET
#include "coalesced_mmio.h"
#endif
@ -60,10 +64,13 @@
MODULE_AUTHOR("Qumranet");
MODULE_LICENSE("GPL");
static int msi2intx = 1;
module_param(msi2intx, bool, 0);
DEFINE_SPINLOCK(kvm_lock);
LIST_HEAD(vm_list);
static cpumask_t cpus_hardware_enabled;
static cpumask_var_t cpus_hardware_enabled;
struct kmem_cache *kvm_vcpu_cache;
EXPORT_SYMBOL_GPL(kvm_vcpu_cache);
@ -75,9 +82,60 @@ struct dentry *kvm_debugfs_dir;
static long kvm_vcpu_ioctl(struct file *file, unsigned int ioctl,
unsigned long arg);
bool kvm_rebooting;
static bool kvm_rebooting;
#ifdef KVM_CAP_DEVICE_ASSIGNMENT
#ifdef CONFIG_X86
static void assigned_device_msi_dispatch(struct kvm_assigned_dev_kernel *dev)
{
int vcpu_id;
struct kvm_vcpu *vcpu;
struct kvm_ioapic *ioapic = ioapic_irqchip(dev->kvm);
int dest_id = (dev->guest_msi.address_lo & MSI_ADDR_DEST_ID_MASK)
>> MSI_ADDR_DEST_ID_SHIFT;
int vector = (dev->guest_msi.data & MSI_DATA_VECTOR_MASK)
>> MSI_DATA_VECTOR_SHIFT;
int dest_mode = test_bit(MSI_ADDR_DEST_MODE_SHIFT,
(unsigned long *)&dev->guest_msi.address_lo);
int trig_mode = test_bit(MSI_DATA_TRIGGER_SHIFT,
(unsigned long *)&dev->guest_msi.data);
int delivery_mode = test_bit(MSI_DATA_DELIVERY_MODE_SHIFT,
(unsigned long *)&dev->guest_msi.data);
u32 deliver_bitmask;
BUG_ON(!ioapic);
deliver_bitmask = kvm_ioapic_get_delivery_bitmask(ioapic,
dest_id, dest_mode);
/* IOAPIC delivery mode value is the same as MSI here */
switch (delivery_mode) {
case IOAPIC_LOWEST_PRIORITY:
vcpu = kvm_get_lowest_prio_vcpu(ioapic->kvm, vector,
deliver_bitmask);
if (vcpu != NULL)
kvm_apic_set_irq(vcpu, vector, trig_mode);
else
printk(KERN_INFO "kvm: null lowest priority vcpu!\n");
break;
case IOAPIC_FIXED:
for (vcpu_id = 0; deliver_bitmask != 0; vcpu_id++) {
if (!(deliver_bitmask & (1 << vcpu_id)))
continue;
deliver_bitmask &= ~(1 << vcpu_id);
vcpu = ioapic->kvm->vcpus[vcpu_id];
if (vcpu)
kvm_apic_set_irq(vcpu, vector, trig_mode);
}
break;
default:
printk(KERN_INFO "kvm: unsupported MSI delivery mode\n");
}
}
#else
static void assigned_device_msi_dispatch(struct kvm_assigned_dev_kernel *dev) {}
#endif
static struct kvm_assigned_dev_kernel *kvm_find_assigned_dev(struct list_head *head,
int assigned_dev_id)
{
@ -104,9 +162,16 @@ static void kvm_assigned_dev_interrupt_work_handler(struct work_struct *work)
* finer-grained lock, update this
*/
mutex_lock(&assigned_dev->kvm->lock);
kvm_set_irq(assigned_dev->kvm,
assigned_dev->irq_source_id,
assigned_dev->guest_irq, 1);
if (assigned_dev->irq_requested_type & KVM_ASSIGNED_DEV_GUEST_INTX)
kvm_set_irq(assigned_dev->kvm,
assigned_dev->irq_source_id,
assigned_dev->guest_irq, 1);
else if (assigned_dev->irq_requested_type &
KVM_ASSIGNED_DEV_GUEST_MSI) {
assigned_device_msi_dispatch(assigned_dev);
enable_irq(assigned_dev->host_irq);
assigned_dev->host_irq_disabled = false;
}
mutex_unlock(&assigned_dev->kvm->lock);
kvm_put_kvm(assigned_dev->kvm);
}
@ -117,8 +182,12 @@ static irqreturn_t kvm_assigned_dev_intr(int irq, void *dev_id)
(struct kvm_assigned_dev_kernel *) dev_id;
kvm_get_kvm(assigned_dev->kvm);
schedule_work(&assigned_dev->interrupt_work);
disable_irq_nosync(irq);
assigned_dev->host_irq_disabled = true;
return IRQ_HANDLED;
}
@ -132,19 +201,32 @@ static void kvm_assigned_dev_ack_irq(struct kvm_irq_ack_notifier *kian)
dev = container_of(kian, struct kvm_assigned_dev_kernel,
ack_notifier);
kvm_set_irq(dev->kvm, dev->irq_source_id, dev->guest_irq, 0);
enable_irq(dev->host_irq);
/* The guest irq may be shared so this ack may be
* from another device.
*/
if (dev->host_irq_disabled) {
enable_irq(dev->host_irq);
dev->host_irq_disabled = false;
}
}
static void kvm_free_assigned_device(struct kvm *kvm,
struct kvm_assigned_dev_kernel
*assigned_dev)
static void kvm_free_assigned_irq(struct kvm *kvm,
struct kvm_assigned_dev_kernel *assigned_dev)
{
if (irqchip_in_kernel(kvm) && assigned_dev->irq_requested)
free_irq(assigned_dev->host_irq, (void *)assigned_dev);
if (!irqchip_in_kernel(kvm))
return;
kvm_unregister_irq_ack_notifier(kvm, &assigned_dev->ack_notifier);
kvm_free_irq_source_id(kvm, assigned_dev->irq_source_id);
kvm_unregister_irq_ack_notifier(&assigned_dev->ack_notifier);
if (assigned_dev->irq_source_id != -1)
kvm_free_irq_source_id(kvm, assigned_dev->irq_source_id);
assigned_dev->irq_source_id = -1;
if (!assigned_dev->irq_requested_type)
return;
if (cancel_work_sync(&assigned_dev->interrupt_work))
/* We had pending work. That means we will have to take
@ -152,6 +234,23 @@ static void kvm_free_assigned_device(struct kvm *kvm,
*/
kvm_put_kvm(kvm);
free_irq(assigned_dev->host_irq, (void *)assigned_dev);
if (assigned_dev->irq_requested_type & KVM_ASSIGNED_DEV_HOST_MSI)
pci_disable_msi(assigned_dev->dev);
assigned_dev->irq_requested_type = 0;
}
static void kvm_free_assigned_device(struct kvm *kvm,
struct kvm_assigned_dev_kernel
*assigned_dev)
{
kvm_free_assigned_irq(kvm, assigned_dev);
pci_reset_function(assigned_dev->dev);
pci_release_regions(assigned_dev->dev);
pci_disable_device(assigned_dev->dev);
pci_dev_put(assigned_dev->dev);
@ -174,6 +273,95 @@ void kvm_free_all_assigned_devices(struct kvm *kvm)
}
}
static int assigned_device_update_intx(struct kvm *kvm,
struct kvm_assigned_dev_kernel *adev,
struct kvm_assigned_irq *airq)
{
adev->guest_irq = airq->guest_irq;
adev->ack_notifier.gsi = airq->guest_irq;
if (adev->irq_requested_type & KVM_ASSIGNED_DEV_HOST_INTX)
return 0;
if (irqchip_in_kernel(kvm)) {
if (!msi2intx &&
adev->irq_requested_type & KVM_ASSIGNED_DEV_HOST_MSI) {
free_irq(adev->host_irq, (void *)kvm);
pci_disable_msi(adev->dev);
}
if (!capable(CAP_SYS_RAWIO))
return -EPERM;
if (airq->host_irq)
adev->host_irq = airq->host_irq;
else
adev->host_irq = adev->dev->irq;
/* Even though this is PCI, we don't want to use shared
* interrupts. Sharing host devices with guest-assigned devices
* on the same interrupt line is not a happy situation: there
* are going to be long delays in accepting, acking, etc.
*/
if (request_irq(adev->host_irq, kvm_assigned_dev_intr,
0, "kvm_assigned_intx_device", (void *)adev))
return -EIO;
}
adev->irq_requested_type = KVM_ASSIGNED_DEV_GUEST_INTX |
KVM_ASSIGNED_DEV_HOST_INTX;
return 0;
}
#ifdef CONFIG_X86
static int assigned_device_update_msi(struct kvm *kvm,
struct kvm_assigned_dev_kernel *adev,
struct kvm_assigned_irq *airq)
{
int r;
if (airq->flags & KVM_DEV_IRQ_ASSIGN_ENABLE_MSI) {
/* x86 don't care upper address of guest msi message addr */
adev->irq_requested_type |= KVM_ASSIGNED_DEV_GUEST_MSI;
adev->irq_requested_type &= ~KVM_ASSIGNED_DEV_GUEST_INTX;
adev->guest_msi.address_lo = airq->guest_msi.addr_lo;
adev->guest_msi.data = airq->guest_msi.data;
adev->ack_notifier.gsi = -1;
} else if (msi2intx) {
adev->irq_requested_type |= KVM_ASSIGNED_DEV_GUEST_INTX;
adev->irq_requested_type &= ~KVM_ASSIGNED_DEV_GUEST_MSI;
adev->guest_irq = airq->guest_irq;
adev->ack_notifier.gsi = airq->guest_irq;
}
if (adev->irq_requested_type & KVM_ASSIGNED_DEV_HOST_MSI)
return 0;
if (irqchip_in_kernel(kvm)) {
if (!msi2intx) {
if (adev->irq_requested_type &
KVM_ASSIGNED_DEV_HOST_INTX)
free_irq(adev->host_irq, (void *)adev);
r = pci_enable_msi(adev->dev);
if (r)
return r;
}
adev->host_irq = adev->dev->irq;
if (request_irq(adev->host_irq, kvm_assigned_dev_intr, 0,
"kvm_assigned_msi_device", (void *)adev))
return -EIO;
}
if (!msi2intx)
adev->irq_requested_type = KVM_ASSIGNED_DEV_GUEST_MSI;
adev->irq_requested_type |= KVM_ASSIGNED_DEV_HOST_MSI;
return 0;
}
#endif
static int kvm_vm_ioctl_assign_irq(struct kvm *kvm,
struct kvm_assigned_irq
*assigned_irq)
@ -190,49 +378,68 @@ static int kvm_vm_ioctl_assign_irq(struct kvm *kvm,
return -EINVAL;
}
if (match->irq_requested) {
match->guest_irq = assigned_irq->guest_irq;
match->ack_notifier.gsi = assigned_irq->guest_irq;
mutex_unlock(&kvm->lock);
return 0;
if (!match->irq_requested_type) {
INIT_WORK(&match->interrupt_work,
kvm_assigned_dev_interrupt_work_handler);
if (irqchip_in_kernel(kvm)) {
/* Register ack nofitier */
match->ack_notifier.gsi = -1;
match->ack_notifier.irq_acked =
kvm_assigned_dev_ack_irq;
kvm_register_irq_ack_notifier(kvm,
&match->ack_notifier);
/* Request IRQ source ID */
r = kvm_request_irq_source_id(kvm);
if (r < 0)
goto out_release;
else
match->irq_source_id = r;
#ifdef CONFIG_X86
/* Determine host device irq type, we can know the
* result from dev->msi_enabled */
if (msi2intx)
pci_enable_msi(match->dev);
#endif
}
}
INIT_WORK(&match->interrupt_work,
kvm_assigned_dev_interrupt_work_handler);
if (irqchip_in_kernel(kvm)) {
if (!capable(CAP_SYS_RAWIO)) {
r = -EPERM;
if ((!msi2intx &&
(assigned_irq->flags & KVM_DEV_IRQ_ASSIGN_ENABLE_MSI)) ||
(msi2intx && match->dev->msi_enabled)) {
#ifdef CONFIG_X86
r = assigned_device_update_msi(kvm, match, assigned_irq);
if (r) {
printk(KERN_WARNING "kvm: failed to enable "
"MSI device!\n");
goto out_release;
}
if (assigned_irq->host_irq)
match->host_irq = assigned_irq->host_irq;
else
match->host_irq = match->dev->irq;
match->guest_irq = assigned_irq->guest_irq;
match->ack_notifier.gsi = assigned_irq->guest_irq;
match->ack_notifier.irq_acked = kvm_assigned_dev_ack_irq;
kvm_register_irq_ack_notifier(kvm, &match->ack_notifier);
r = kvm_request_irq_source_id(kvm);
if (r < 0)
#else
r = -ENOTTY;
#endif
} else if (assigned_irq->host_irq == 0 && match->dev->irq == 0) {
/* Host device IRQ 0 means don't support INTx */
if (!msi2intx) {
printk(KERN_WARNING
"kvm: wait device to enable MSI!\n");
r = 0;
} else {
printk(KERN_WARNING
"kvm: failed to enable MSI device!\n");
r = -ENOTTY;
goto out_release;
else
match->irq_source_id = r;
/* Even though this is PCI, we don't want to use shared
* interrupts. Sharing host devices with guest-assigned devices
* on the same interrupt line is not a happy situation: there
* are going to be long delays in accepting, acking, etc.
*/
if (request_irq(match->host_irq, kvm_assigned_dev_intr, 0,
"kvm_assigned_device", (void *)match)) {
r = -EIO;
}
} else {
/* Non-sharing INTx mode */
r = assigned_device_update_intx(kvm, match, assigned_irq);
if (r) {
printk(KERN_WARNING "kvm: failed to enable "
"INTx device!\n");
goto out_release;
}
}
match->irq_requested = true;
mutex_unlock(&kvm->lock);
return r;
out_release:
@ -283,11 +490,14 @@ static int kvm_vm_ioctl_assign_device(struct kvm *kvm,
__func__);
goto out_disable;
}
pci_reset_function(dev);
match->assigned_dev_id = assigned_dev->assigned_dev_id;
match->host_busnr = assigned_dev->busnr;
match->host_devfn = assigned_dev->devfn;
match->dev = dev;
match->irq_source_id = -1;
match->kvm = kvm;
list_add(&match->list, &kvm->arch.assigned_dev_head);
@ -355,58 +565,49 @@ static void ack_flush(void *_completed)
{
}
void kvm_flush_remote_tlbs(struct kvm *kvm)
static bool make_all_cpus_request(struct kvm *kvm, unsigned int req)
{
int i, cpu, me;
cpumask_t cpus;
cpumask_var_t cpus;
bool called = true;
struct kvm_vcpu *vcpu;
if (alloc_cpumask_var(&cpus, GFP_ATOMIC))
cpumask_clear(cpus);
me = get_cpu();
cpus_clear(cpus);
for (i = 0; i < KVM_MAX_VCPUS; ++i) {
vcpu = kvm->vcpus[i];
if (!vcpu)
continue;
if (test_and_set_bit(KVM_REQ_TLB_FLUSH, &vcpu->requests))
if (test_and_set_bit(req, &vcpu->requests))
continue;
cpu = vcpu->cpu;
if (cpu != -1 && cpu != me)
cpu_set(cpu, cpus);
if (cpus != NULL && cpu != -1 && cpu != me)
cpumask_set_cpu(cpu, cpus);
}
if (cpus_empty(cpus))
goto out;
++kvm->stat.remote_tlb_flush;
smp_call_function_mask(cpus, ack_flush, NULL, 1);
out:
if (unlikely(cpus == NULL))
smp_call_function_many(cpu_online_mask, ack_flush, NULL, 1);
else if (!cpumask_empty(cpus))
smp_call_function_many(cpus, ack_flush, NULL, 1);
else
called = false;
put_cpu();
free_cpumask_var(cpus);
return called;
}
void kvm_flush_remote_tlbs(struct kvm *kvm)
{
if (make_all_cpus_request(kvm, KVM_REQ_TLB_FLUSH))
++kvm->stat.remote_tlb_flush;
}
void kvm_reload_remote_mmus(struct kvm *kvm)
{
int i, cpu, me;
cpumask_t cpus;
struct kvm_vcpu *vcpu;
me = get_cpu();
cpus_clear(cpus);
for (i = 0; i < KVM_MAX_VCPUS; ++i) {
vcpu = kvm->vcpus[i];
if (!vcpu)
continue;
if (test_and_set_bit(KVM_REQ_MMU_RELOAD, &vcpu->requests))
continue;
cpu = vcpu->cpu;
if (cpu != -1 && cpu != me)
cpu_set(cpu, cpus);
}
if (cpus_empty(cpus))
goto out;
smp_call_function_mask(cpus, ack_flush, NULL, 1);
out:
put_cpu();
make_all_cpus_request(kvm, KVM_REQ_MMU_RELOAD);
}
int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id)
{
struct page *page;
@ -710,6 +911,8 @@ int __kvm_set_memory_region(struct kvm *kvm,
goto out;
if (mem->guest_phys_addr & (PAGE_SIZE - 1))
goto out;
if (user_alloc && (mem->userspace_addr & (PAGE_SIZE - 1)))
goto out;
if (mem->slot >= KVM_MEMORY_SLOTS + KVM_PRIVATE_MEM_SLOTS)
goto out;
if (mem->guest_phys_addr + mem->memory_size < mem->guest_phys_addr)
@ -821,7 +1024,10 @@ int __kvm_set_memory_region(struct kvm *kvm,
goto out_free;
}
kvm_free_physmem_slot(&old, &new);
kvm_free_physmem_slot(&old, npages ? &new : NULL);
/* Slot deletion case: we have to update the current slot */
if (!npages)
*memslot = old;
#ifdef CONFIG_DMAR
/* map the pages in iommu page table */
r = kvm_iommu_map_pages(kvm, base_gfn, npages);
@ -918,7 +1124,7 @@ int kvm_is_error_hva(unsigned long addr)
}
EXPORT_SYMBOL_GPL(kvm_is_error_hva);
static struct kvm_memory_slot *__gfn_to_memslot(struct kvm *kvm, gfn_t gfn)
struct kvm_memory_slot *gfn_to_memslot_unaliased(struct kvm *kvm, gfn_t gfn)
{
int i;
@ -931,11 +1137,12 @@ static struct kvm_memory_slot *__gfn_to_memslot(struct kvm *kvm, gfn_t gfn)
}
return NULL;
}
EXPORT_SYMBOL_GPL(gfn_to_memslot_unaliased);
struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn)
{
gfn = unalias_gfn(kvm, gfn);
return __gfn_to_memslot(kvm, gfn);
return gfn_to_memslot_unaliased(kvm, gfn);
}
int kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn)
@ -959,7 +1166,7 @@ unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn)
struct kvm_memory_slot *slot;
gfn = unalias_gfn(kvm, gfn);
slot = __gfn_to_memslot(kvm, gfn);
slot = gfn_to_memslot_unaliased(kvm, gfn);
if (!slot)
return bad_hva();
return (slot->userspace_addr + (gfn - slot->base_gfn) * PAGE_SIZE);
@ -1210,7 +1417,7 @@ void mark_page_dirty(struct kvm *kvm, gfn_t gfn)
struct kvm_memory_slot *memslot;
gfn = unalias_gfn(kvm, gfn);
memslot = __gfn_to_memslot(kvm, gfn);
memslot = gfn_to_memslot_unaliased(kvm, gfn);
if (memslot && memslot->dirty_bitmap) {
unsigned long rel_gfn = gfn - memslot->base_gfn;
@ -1295,7 +1502,7 @@ static int kvm_vcpu_release(struct inode *inode, struct file *filp)
return 0;
}
static const struct file_operations kvm_vcpu_fops = {
static struct file_operations kvm_vcpu_fops = {
.release = kvm_vcpu_release,
.unlocked_ioctl = kvm_vcpu_ioctl,
.compat_ioctl = kvm_vcpu_ioctl,
@ -1689,7 +1896,7 @@ static int kvm_vm_mmap(struct file *file, struct vm_area_struct *vma)
return 0;
}
static const struct file_operations kvm_vm_fops = {
static struct file_operations kvm_vm_fops = {
.release = kvm_vm_release,
.unlocked_ioctl = kvm_vm_ioctl,
.compat_ioctl = kvm_vm_ioctl,
@ -1711,6 +1918,18 @@ static int kvm_dev_ioctl_create_vm(void)
return fd;
}
static long kvm_dev_ioctl_check_extension_generic(long arg)
{
switch (arg) {
case KVM_CAP_USER_MEMORY:
case KVM_CAP_DESTROY_MEMORY_REGION_WORKS:
return 1;
default:
break;
}
return kvm_dev_ioctl_check_extension(arg);
}
static long kvm_dev_ioctl(struct file *filp,
unsigned int ioctl, unsigned long arg)
{
@ -1730,7 +1949,7 @@ static long kvm_dev_ioctl(struct file *filp,
r = kvm_dev_ioctl_create_vm();
break;
case KVM_CHECK_EXTENSION:
r = kvm_dev_ioctl_check_extension(arg);
r = kvm_dev_ioctl_check_extension_generic(arg);
break;
case KVM_GET_VCPU_MMAP_SIZE:
r = -EINVAL;
@ -1771,9 +1990,9 @@ static void hardware_enable(void *junk)
{
int cpu = raw_smp_processor_id();
if (cpu_isset(cpu, cpus_hardware_enabled))
if (cpumask_test_cpu(cpu, cpus_hardware_enabled))
return;
cpu_set(cpu, cpus_hardware_enabled);
cpumask_set_cpu(cpu, cpus_hardware_enabled);
kvm_arch_hardware_enable(NULL);
}
@ -1781,9 +2000,9 @@ static void hardware_disable(void *junk)
{
int cpu = raw_smp_processor_id();
if (!cpu_isset(cpu, cpus_hardware_enabled))
if (!cpumask_test_cpu(cpu, cpus_hardware_enabled))
return;
cpu_clear(cpu, cpus_hardware_enabled);
cpumask_clear_cpu(cpu, cpus_hardware_enabled);
kvm_arch_hardware_disable(NULL);
}
@ -2017,9 +2236,14 @@ int kvm_init(void *opaque, unsigned int vcpu_size,
bad_pfn = page_to_pfn(bad_page);
if (!alloc_cpumask_var(&cpus_hardware_enabled, GFP_KERNEL)) {
r = -ENOMEM;
goto out_free_0;
}
r = kvm_arch_hardware_setup();
if (r < 0)
goto out_free_0;
goto out_free_0a;
for_each_online_cpu(cpu) {
smp_call_function_single(cpu,
@ -2053,6 +2277,8 @@ int kvm_init(void *opaque, unsigned int vcpu_size,
}
kvm_chardev_ops.owner = module;
kvm_vm_fops.owner = module;
kvm_vcpu_fops.owner = module;
r = misc_register(&kvm_dev);
if (r) {
@ -2062,6 +2288,9 @@ int kvm_init(void *opaque, unsigned int vcpu_size,
kvm_preempt_ops.sched_in = kvm_sched_in;
kvm_preempt_ops.sched_out = kvm_sched_out;
#ifndef CONFIG_X86
msi2intx = 0;
#endif
return 0;
@ -2078,6 +2307,8 @@ int kvm_init(void *opaque, unsigned int vcpu_size,
on_each_cpu(hardware_disable, NULL, 1);
out_free_1:
kvm_arch_hardware_unsetup();
out_free_0a:
free_cpumask_var(cpus_hardware_enabled);
out_free_0:
__free_page(bad_page);
out:
@ -2101,6 +2332,7 @@ void kvm_exit(void)
kvm_arch_hardware_unsetup();
kvm_arch_exit();
kvm_exit_debug();
free_cpumask_var(cpus_hardware_enabled);
__free_page(bad_page);
}
EXPORT_SYMBOL_GPL(kvm_exit);

View File

@ -252,6 +252,7 @@ void kvm_trace_cleanup(void)
struct kvm_trace_probe *p = &kvm_trace_probes[i];
marker_probe_unregister(p->name, p->probe_func, p);
}
marker_synchronize_unregister();
relay_close(kt->rchan);
debugfs_remove(kt->lost_file);