From 5dc832628229d2736fab10523566855c3cda622d Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Sun, 16 Mar 2008 18:48:26 +0200 Subject: [PATCH 1/5] KVM: VMX: Restore tss even on x86_64 The vmx hardware state restore restores the tss selector and base address, but not its length. Usually, this does not matter since most of the tss contents is within the default length of 0x67. However, if a process is using ioperm() to grant itself I/O port permissions, an additional bitmap within the tss, but outside the default length is consulted. The effect is that the process will receive a SIGSEGV instead of transparently accessing the port. Fix by restoring the tss length. Note that i386 had this working already. Closes bugzilla 10246. Signed-off-by: Avi Kivity --- arch/x86/kvm/vmx.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 94ea724638fd..f2df03c12f16 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -349,8 +349,6 @@ static void update_exception_bitmap(struct kvm_vcpu *vcpu) static void reload_tss(void) { -#ifndef CONFIG_X86_64 - /* * VT restores TR but not its size. Useless. */ @@ -361,7 +359,6 @@ static void reload_tss(void) descs = (void *)gdt.base; descs[GDT_ENTRY_TSS].type = 9; /* available TSS */ load_TR_desc(); -#endif } static void load_transition_efer(struct vcpu_vmx *vmx) From 4b1a80fa65aa9e2ec5696998856136c886385538 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Sun, 23 Mar 2008 12:18:19 +0200 Subject: [PATCH 2/5] KVM: MMU: Fix is_rmap_pte() with io ptes is_rmap_pte() doesn't take into account io ptes, which have the avail bit set. Signed-off-by: Avi Kivity --- arch/x86/kvm/mmu.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index d8172aabc660..e49c4d433c0f 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -222,8 +222,7 @@ static int is_io_pte(unsigned long pte) static int is_rmap_pte(u64 pte) { - return pte != shadow_trap_nonpresent_pte - && pte != shadow_notrap_nonpresent_pte; + return is_shadow_present_pte(pte); } static gfn_t pse36_gfn_delta(u32 gpte) From 15aaa819e20cb183f26392ed8ea16020630ef142 Mon Sep 17 00:00:00 2001 From: Marcelo Tosatti Date: Mon, 17 Mar 2008 10:08:18 -0300 Subject: [PATCH 3/5] KVM: MMU: handle page removal with shadow mapping Do not assume that a shadow mapping will always point to the same host frame number. Fixes crash with madvise(MADV_DONTNEED). [avi: move after first printk(), add another printk()] Signed-off-by: Marcelo Tosatti Signed-off-by: Avi Kivity --- arch/x86/kvm/mmu.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index e49c4d433c0f..4ba85d95bd29 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -892,14 +892,25 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte, int *ptwrite, gfn_t gfn, struct page *page) { u64 spte; - int was_rmapped = is_rmap_pte(*shadow_pte); + int was_rmapped = 0; int was_writeble = is_writeble_pte(*shadow_pte); + hfn_t host_pfn = (*shadow_pte & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT; pgprintk("%s: spte %llx access %x write_fault %d" " user_fault %d gfn %lx\n", __FUNCTION__, *shadow_pte, pt_access, write_fault, user_fault, gfn); + if (is_rmap_pte(*shadow_pte)) { + if (host_pfn != page_to_pfn(page)) { + pgprintk("hfn old %lx new %lx\n", + host_pfn, page_to_pfn(page)); + rmap_remove(vcpu->kvm, shadow_pte); + } + else + was_rmapped = 1; + } + /* * We don't set the accessed bit, since we sometimes want to see * whether the guest actually used the pte (in order to detect From 707a18a51d83d9180a63b3cbaad8eda7764a8689 Mon Sep 17 00:00:00 2001 From: Marcelo Tosatti Date: Tue, 18 Mar 2008 17:42:34 -0300 Subject: [PATCH 4/5] KVM: VMX: convert init_rmode_tss() to slots_lock init_rmode_tss was forgotten during the conversion from mmap_sem to slots_lock. INFO: task qemu-system-x86:3748 blocked for more than 120 seconds. Call Trace: [] __down_read+0x86/0x9e [] do_page_fault+0x346/0x78e [] trace_hardirqs_on_thunk+0x35/0x3a [] error_exit+0x0/0xa9 [] copy_user_generic_string+0x17/0x40 [] :kvm:kvm_write_guest_page+0x3e/0x5f [] :kvm_intel:init_rmode_tss+0xa7/0xf9 [] :kvm_intel:vmx_vcpu_reset+0x10/0x38a [] :kvm:kvm_arch_vcpu_setup+0x20/0x53 [] :kvm:kvm_vm_ioctl+0xad/0x1cf [] __lock_acquire+0x4f7/0xc28 [] vfs_ioctl+0x21/0x6b [] do_vfs_ioctl+0x252/0x26b [] sys_ioctl+0x3c/0x5e [] system_call_after_swapgs+0x7b/0x80 Signed-off-by: Marcelo Tosatti Signed-off-by: Avi Kivity --- arch/x86/kvm/vmx.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index f2df03c12f16..8e1462880d1f 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -1433,7 +1433,7 @@ static int init_rmode_tss(struct kvm *kvm) int ret = 0; int r; - down_read(¤t->mm->mmap_sem); + down_read(&kvm->slots_lock); r = kvm_clear_guest_page(kvm, fn, 0, PAGE_SIZE); if (r < 0) goto out; @@ -1456,7 +1456,7 @@ static int init_rmode_tss(struct kvm *kvm) ret = 1; out: - up_read(¤t->mm->mmap_sem); + up_read(&kvm->slots_lock); return ret; } From e48bb497b95a0f7127f9ff596a6b4c4b206f7dcf Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Sun, 23 Mar 2008 14:21:08 +0200 Subject: [PATCH 5/5] KVM: MMU: Fix memory leak on guest demand faults While backporting 72dc67a69690288538142df73a7e3ac66fea68dc, a gfn_to_page() call was duplicated instead of moved (due to an unrelated patch not being present in mainline). This caused a page reference leak, resulting in a fairly massive memory leak. Fix by removing the extraneous gfn_to_page() call. Signed-off-by: Avi Kivity --- arch/x86/kvm/mmu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 4ba85d95bd29..e55af12e11b7 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -1412,7 +1412,7 @@ static void mmu_guess_page_from_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, up_read(¤t->mm->mmap_sem); vcpu->arch.update_pte.gfn = gfn; - vcpu->arch.update_pte.page = gfn_to_page(vcpu->kvm, gfn); + vcpu->arch.update_pte.page = page; } void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,