KVM: PPC: Allow use of small pages to back Book3S HV guests

This relaxes the requirement that the guest memory be provided as
16MB huge pages, allowing it to be provided as normal memory, i.e.
in pages of PAGE_SIZE bytes (4k or 64k).  To allow this, we index
the kvm->arch.slot_phys[] arrays with a small page index, even if
huge pages are being used, and use the low-order 5 bits of each
entry to store the order of the enclosing page with respect to
normal pages, i.e. log_2(enclosing_page_size / PAGE_SIZE).

Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Alexander Graf <agraf@suse.de>
Signed-off-by: Avi Kivity <avi@redhat.com>
This commit is contained in:
Paul Mackerras 2011-12-12 12:31:41 +00:00 committed by Avi Kivity
parent c77162dee7
commit da9d1d7f28
7 changed files with 131 additions and 68 deletions

View File

@ -113,4 +113,14 @@ static inline unsigned long hpte_page_size(unsigned long h, unsigned long l)
return 0; /* error */
}
static inline bool slot_is_aligned(struct kvm_memory_slot *memslot,
unsigned long pagesize)
{
unsigned long mask = (pagesize >> PAGE_SHIFT) - 1;
if (pagesize <= PAGE_SIZE)
return 1;
return !(memslot->base_gfn & mask) && !(memslot->npages & mask);
}
#endif /* __ASM_KVM_BOOK3S_64_H__ */

View File

@ -177,14 +177,13 @@ struct revmap_entry {
};
/* Low-order bits in kvm->arch.slot_phys[][] */
#define KVMPPC_PAGE_ORDER_MASK 0x1f
#define KVMPPC_GOT_PAGE 0x80
struct kvm_arch {
#ifdef CONFIG_KVM_BOOK3S_64_HV
unsigned long hpt_virt;
struct revmap_entry *revmap;
unsigned long ram_psize;
unsigned long ram_porder;
unsigned int lpid;
unsigned int host_lpid;
unsigned long host_lpcr;

View File

@ -122,7 +122,7 @@ extern void kvmppc_free_hpt(struct kvm *kvm);
extern long kvmppc_prepare_vrma(struct kvm *kvm,
struct kvm_userspace_memory_region *mem);
extern void kvmppc_map_vrma(struct kvm_vcpu *vcpu,
struct kvm_memory_slot *memslot);
struct kvm_memory_slot *memslot, unsigned long porder);
extern int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu);
extern long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
struct kvm_create_spapr_tce *args);

View File

@ -237,6 +237,7 @@
#define LPCR_ISL (1ul << (63-2))
#define LPCR_VC_SH (63-2)
#define LPCR_DPFD_SH (63-11)
#define LPCR_VRMASD (0x1ful << (63-16))
#define LPCR_VRMA_L (1ul << (63-12))
#define LPCR_VRMA_LP0 (1ul << (63-15))
#define LPCR_VRMA_LP1 (1ul << (63-16))

View File

@ -34,8 +34,6 @@
#include <asm/ppc-opcode.h>
#include <asm/cputable.h>
/* Pages in the VRMA are 16MB pages */
#define VRMA_PAGE_ORDER 24
#define VRMA_VSID 0x1ffffffUL /* 1TB VSID reserved for VRMA */
/* POWER7 has 10-bit LPIDs, PPC970 has 6-bit LPIDs */
@ -95,17 +93,31 @@ void kvmppc_free_hpt(struct kvm *kvm)
free_pages(kvm->arch.hpt_virt, HPT_ORDER - PAGE_SHIFT);
}
void kvmppc_map_vrma(struct kvm_vcpu *vcpu, struct kvm_memory_slot *memslot)
/* Bits in first HPTE dword for pagesize 4k, 64k or 16M */
static inline unsigned long hpte0_pgsize_encoding(unsigned long pgsize)
{
return (pgsize > 0x1000) ? HPTE_V_LARGE : 0;
}
/* Bits in second HPTE dword for pagesize 4k, 64k or 16M */
static inline unsigned long hpte1_pgsize_encoding(unsigned long pgsize)
{
return (pgsize == 0x10000) ? 0x1000 : 0;
}
void kvmppc_map_vrma(struct kvm_vcpu *vcpu, struct kvm_memory_slot *memslot,
unsigned long porder)
{
struct kvm *kvm = vcpu->kvm;
unsigned long i;
unsigned long npages;
unsigned long hp_v, hp_r;
unsigned long addr, hash;
unsigned long porder = kvm->arch.ram_porder;
unsigned long psize;
unsigned long hp0, hp1;
long ret;
npages = kvm->arch.slot_npages[memslot->id];
psize = 1ul << porder;
npages = memslot->npages >> (porder - PAGE_SHIFT);
/* VRMA can't be > 1TB */
if (npages > 1ul << (40 - porder))
@ -114,6 +126,11 @@ void kvmppc_map_vrma(struct kvm_vcpu *vcpu, struct kvm_memory_slot *memslot)
if (npages > HPT_NPTEG)
npages = HPT_NPTEG;
hp0 = HPTE_V_1TB_SEG | (VRMA_VSID << (40 - 16)) |
HPTE_V_BOLTED | hpte0_pgsize_encoding(psize);
hp1 = hpte1_pgsize_encoding(psize) |
HPTE_R_R | HPTE_R_C | HPTE_R_M | PP_RWXX;
for (i = 0; i < npages; ++i) {
addr = i << porder;
/* can't use hpt_hash since va > 64 bits */
@ -125,10 +142,8 @@ void kvmppc_map_vrma(struct kvm_vcpu *vcpu, struct kvm_memory_slot *memslot)
* is available and use it.
*/
hash = (hash << 3) + 7;
hp_v = HPTE_V_1TB_SEG | (VRMA_VSID << (40 - 16)) |
(i << (VRMA_PAGE_ORDER - 16)) | HPTE_V_BOLTED |
HPTE_V_LARGE | HPTE_V_VALID;
hp_r = addr | HPTE_R_R | HPTE_R_C | HPTE_R_M | PP_RWXX;
hp_v = hp0 | ((addr >> 16) & ~0x7fUL);
hp_r = hp1 | addr;
ret = kvmppc_virtmode_h_enter(vcpu, H_EXACT, hash, hp_v, hp_r);
if (ret != H_SUCCESS) {
pr_err("KVM: map_vrma at %lx failed, ret=%ld\n",
@ -176,22 +191,25 @@ static void kvmppc_mmu_book3s_64_hv_reset_msr(struct kvm_vcpu *vcpu)
* one already in the kvm->arch.slot_phys[][] arrays.
*/
static long kvmppc_get_guest_page(struct kvm *kvm, unsigned long gfn,
struct kvm_memory_slot *memslot)
struct kvm_memory_slot *memslot,
unsigned long psize)
{
unsigned long start;
long np;
struct page *page, *pages[1];
long np, err;
struct page *page, *hpage, *pages[1];
unsigned long s, pgsize;
unsigned long *physp;
unsigned long pfn, i;
unsigned int got, pgorder;
unsigned long pfn, i, npages;
physp = kvm->arch.slot_phys[memslot->id];
if (!physp)
return -EINVAL;
i = (gfn - memslot->base_gfn) >> (kvm->arch.ram_porder - PAGE_SHIFT);
if (physp[i])
if (physp[gfn - memslot->base_gfn])
return 0;
page = NULL;
pgsize = psize;
start = gfn_to_hva_memslot(memslot, gfn);
/* Instantiate and get the page we want access to */
@ -199,25 +217,46 @@ static long kvmppc_get_guest_page(struct kvm *kvm, unsigned long gfn,
if (np != 1)
return -EINVAL;
page = pages[0];
got = KVMPPC_GOT_PAGE;
/* Check it's a 16MB page */
if (!PageHead(page) ||
compound_order(page) != (kvm->arch.ram_porder - PAGE_SHIFT)) {
pr_err("page at %lx isn't 16MB (o=%d)\n",
start, compound_order(page));
put_page(page);
return -EINVAL;
/* See if this is a large page */
s = PAGE_SIZE;
if (PageHuge(page)) {
hpage = compound_head(page);
s <<= compound_order(hpage);
/* Get the whole large page if slot alignment is ok */
if (s > psize && slot_is_aligned(memslot, s) &&
!(memslot->userspace_addr & (s - 1))) {
start &= ~(s - 1);
pgsize = s;
page = hpage;
}
}
err = -EINVAL;
if (s < psize)
goto out;
pfn = page_to_pfn(page);
npages = pgsize >> PAGE_SHIFT;
pgorder = __ilog2(npages);
physp += (gfn - memslot->base_gfn) & ~(npages - 1);
spin_lock(&kvm->arch.slot_phys_lock);
if (!physp[i])
physp[i] = (pfn << PAGE_SHIFT) | KVMPPC_GOT_PAGE;
else
put_page(page);
for (i = 0; i < npages; ++i) {
if (!physp[i]) {
physp[i] = ((pfn + i) << PAGE_SHIFT) + got + pgorder;
got = 0;
}
}
spin_unlock(&kvm->arch.slot_phys_lock);
err = 0;
return 0;
out:
if (got) {
if (PageHuge(page))
page = compound_head(page);
put_page(page);
}
return err;
}
/*
@ -242,7 +281,9 @@ long kvmppc_virtmode_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
memslot = gfn_to_memslot(kvm, gfn);
if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID))
return H_PARAMETER;
if (kvmppc_get_guest_page(kvm, gfn, memslot) < 0)
if (!slot_is_aligned(memslot, psize))
return H_PARAMETER;
if (kvmppc_get_guest_page(kvm, gfn, memslot, psize) < 0)
return H_PARAMETER;
preempt_disable();
@ -269,8 +310,8 @@ void *kvmppc_pin_guest_page(struct kvm *kvm, unsigned long gpa,
struct kvm_memory_slot *memslot;
unsigned long gfn = gpa >> PAGE_SHIFT;
struct page *page;
unsigned long offset;
unsigned long pfn, pa;
unsigned long psize, offset;
unsigned long pa;
unsigned long *physp;
memslot = gfn_to_memslot(kvm, gfn);
@ -279,20 +320,23 @@ void *kvmppc_pin_guest_page(struct kvm *kvm, unsigned long gpa,
physp = kvm->arch.slot_phys[memslot->id];
if (!physp)
return NULL;
physp += (gfn - memslot->base_gfn) >>
(kvm->arch.ram_porder - PAGE_SHIFT);
physp += gfn - memslot->base_gfn;
pa = *physp;
if (!pa) {
if (kvmppc_get_guest_page(kvm, gfn, memslot) < 0)
if (kvmppc_get_guest_page(kvm, gfn, memslot, PAGE_SIZE) < 0)
return NULL;
pa = *physp;
}
pfn = pa >> PAGE_SHIFT;
page = pfn_to_page(pfn);
page = pfn_to_page(pa >> PAGE_SHIFT);
psize = PAGE_SIZE;
if (PageHuge(page)) {
page = compound_head(page);
psize <<= compound_order(page);
}
get_page(page);
offset = gpa & (kvm->arch.ram_psize - 1);
offset = gpa & (psize - 1);
if (nb_ret)
*nb_ret = kvm->arch.ram_psize - offset;
*nb_ret = psize - offset;
return page_address(page) + offset;
}

View File

@ -51,8 +51,6 @@
#include <linux/highmem.h>
#include <linux/hugetlb.h>
#define LARGE_PAGE_ORDER 24 /* 16MB pages */
/* #define EXIT_DEBUG */
/* #define EXIT_DEBUG_SIMPLE */
/* #define EXIT_DEBUG_INT */
@ -1074,24 +1072,26 @@ long kvm_vm_ioctl_allocate_rma(struct kvm *kvm, struct kvm_allocate_rma *ret)
return fd;
}
static unsigned long slb_pgsize_encoding(unsigned long psize)
{
unsigned long senc = 0;
if (psize > 0x1000) {
senc = SLB_VSID_L;
if (psize == 0x10000)
senc |= SLB_VSID_LP_01;
}
return senc;
}
int kvmppc_core_prepare_memory_region(struct kvm *kvm,
struct kvm_userspace_memory_region *mem)
{
unsigned long psize;
unsigned long npages;
unsigned long *phys;
/* For now, only allow 16MB-aligned slots */
psize = kvm->arch.ram_psize;
if ((mem->memory_size & (psize - 1)) ||
(mem->guest_phys_addr & (psize - 1))) {
pr_err("bad memory_size=%llx @ %llx\n",
mem->memory_size, mem->guest_phys_addr);
return -EINVAL;
}
/* Allocate a slot_phys array */
npages = mem->memory_size >> kvm->arch.ram_porder;
npages = mem->memory_size >> PAGE_SHIFT;
phys = kvm->arch.slot_phys[mem->slot];
if (!phys) {
phys = vzalloc(npages * sizeof(unsigned long));
@ -1119,6 +1119,8 @@ static void unpin_slot(struct kvm *kvm, int slot_id)
continue;
pfn = physp[j] >> PAGE_SHIFT;
page = pfn_to_page(pfn);
if (PageHuge(page))
page = compound_head(page);
SetPageDirty(page);
put_page(page);
}
@ -1141,12 +1143,12 @@ static int kvmppc_hv_setup_rma(struct kvm_vcpu *vcpu)
unsigned long hva;
struct kvm_memory_slot *memslot;
struct vm_area_struct *vma;
unsigned long lpcr;
unsigned long lpcr, senc;
unsigned long psize, porder;
unsigned long rma_size;
unsigned long rmls;
unsigned long *physp;
unsigned long i, npages, pa;
unsigned long i, npages;
mutex_lock(&kvm->lock);
if (kvm->arch.rma_setup_done)
@ -1168,8 +1170,7 @@ static int kvmppc_hv_setup_rma(struct kvm_vcpu *vcpu)
goto up_out;
psize = vma_kernel_pagesize(vma);
if (psize != kvm->arch.ram_psize)
goto up_out;
porder = __ilog2(psize);
/* Is this one of our preallocated RMAs? */
if (vma->vm_file && vma->vm_file->f_op == &kvm_rma_fops &&
@ -1186,13 +1187,20 @@ static int kvmppc_hv_setup_rma(struct kvm_vcpu *vcpu)
goto out;
}
/* We can handle 4k, 64k or 16M pages in the VRMA */
err = -EINVAL;
if (!(psize == 0x1000 || psize == 0x10000 ||
psize == 0x1000000))
goto out;
/* Update VRMASD field in the LPCR */
lpcr = kvm->arch.lpcr & ~(0x1fUL << LPCR_VRMASD_SH);
lpcr |= LPCR_VRMA_L;
senc = slb_pgsize_encoding(psize);
lpcr = kvm->arch.lpcr & ~LPCR_VRMASD;
lpcr |= senc << (LPCR_VRMASD_SH - 4);
kvm->arch.lpcr = lpcr;
/* Create HPTEs in the hash page table for the VRMA */
kvmppc_map_vrma(vcpu, memslot);
kvmppc_map_vrma(vcpu, memslot, porder);
} else {
/* Set up to use an RMO region */
@ -1231,13 +1239,12 @@ static int kvmppc_hv_setup_rma(struct kvm_vcpu *vcpu)
ri->base_pfn << PAGE_SHIFT, rma_size, lpcr);
/* Initialize phys addrs of pages in RMO */
porder = kvm->arch.ram_porder;
npages = rma_size >> porder;
pa = ri->base_pfn << PAGE_SHIFT;
npages = ri->npages;
porder = __ilog2(npages);
physp = kvm->arch.slot_phys[memslot->id];
spin_lock(&kvm->arch.slot_phys_lock);
for (i = 0; i < npages; ++i)
physp[i] = pa + (i << porder);
physp[i] = ((ri->base_pfn + i) << PAGE_SHIFT) + porder;
spin_unlock(&kvm->arch.slot_phys_lock);
}
@ -1266,8 +1273,6 @@ int kvmppc_core_init_vm(struct kvm *kvm)
INIT_LIST_HEAD(&kvm->arch.spapr_tce_tables);
kvm->arch.ram_psize = 1ul << LARGE_PAGE_ORDER;
kvm->arch.ram_porder = LARGE_PAGE_ORDER;
kvm->arch.rma = NULL;
kvm->arch.host_sdr1 = mfspr(SPRN_SDR1);

View File

@ -77,6 +77,10 @@ long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
memslot = builtin_gfn_to_memslot(kvm, gfn);
if (!(memslot && !(memslot->flags & KVM_MEMSLOT_INVALID)))
return H_PARAMETER;
/* Check if the requested page fits entirely in the memslot. */
if (!slot_is_aligned(memslot, psize))
return H_PARAMETER;
slot_fn = gfn - memslot->base_gfn;
physp = kvm->arch.slot_phys[memslot->id];
@ -88,9 +92,9 @@ long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
pa = *physp;
if (!pa)
return H_TOO_HARD;
pte_size = PAGE_SIZE << (pa & KVMPPC_PAGE_ORDER_MASK);
pa &= PAGE_MASK;
pte_size = kvm->arch.ram_psize;
if (pte_size < psize)
return H_PARAMETER;
if (pa && pte_size > psize)