mirror of
https://github.com/AuxXxilium/linux_dsm_epyc7002.git
synced 2024-11-25 10:00:53 +07:00
f781951299
This patch introduces a new module parameter for the KVM module; when it is present, KVM attempts a bit of polling on every HLT before scheduling itself out via kvm_vcpu_block. This parameter helps a lot for latency-bound workloads---in particular I tested it with O_DSYNC writes with a battery-backed disk in the host. In this case, writes are fast (because the data doesn't have to go all the way to the platters) but they cannot be merged by either the host or the guest. KVM's performance here is usually around 30% of bare metal, or 50% if you use cache=directsync or cache=writethrough (these parameters avoid that the guest sends pointless flush requests, and at the same time they are not slow because of the battery-backed cache). The bad performance happens because on every halt the host CPU decides to halt itself too. When the interrupt comes, the vCPU thread is then migrated to a new physical CPU, and in general the latency is horrible because the vCPU thread has to be scheduled back in. With this patch performance reaches 60-65% of bare metal and, more important, 99% of what you get if you use idle=poll in the guest. This means that the tunable gets rid of this particular bottleneck, and more work can be done to improve performance in the kernel or QEMU. Of course there is some price to pay; every time an otherwise idle vCPUs is interrupted by an interrupt, it will poll unnecessarily and thus impose a little load on the host. The above results were obtained with a mostly random value of the parameter (500000), and the load was around 1.5-2.5% CPU usage on one of the host's core for each idle guest vCPU. The patch also adds a new stat, /sys/kernel/debug/kvm/halt_successful_poll, that can be used to tune the parameter. It counts how many HLT instructions received an interrupt during the polling period; each successful poll avoids that Linux schedules the VCPU thread out and back in, and may also avoid a likely trip to C1 and back for the physical CPU. While the VM is idle, a Linux 4 VCPU VM halts around 10 times per second. Of these halts, almost all are failed polls. During the benchmark, instead, basically all halts end within the polling period, except a more or less constant stream of 50 per second coming from vCPUs that are not running the benchmark. The wasted time is thus very low. Things may be slightly different for Windows VMs, which have a ~10 ms timer tick. The effect is also visible on Marcelo's recently-introduced latency test for the TSC deadline timer. Though of course a non-RT kernel has awful latency bounds, the latency of the timer is around 8000-10000 clock cycles compared to 20000-120000 without setting halt_poll_ns. For the TSC deadline timer, thus, the effect is both a smaller average latency and a smaller variance. Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
869 lines
22 KiB
C
869 lines
22 KiB
C
/*
|
|
* Copyright (C) 2009. SUSE Linux Products GmbH. All rights reserved.
|
|
*
|
|
* Authors:
|
|
* Alexander Graf <agraf@suse.de>
|
|
* Kevin Wolf <mail@kevin-wolf.de>
|
|
*
|
|
* Description:
|
|
* This file is derived from arch/powerpc/kvm/44x.c,
|
|
* by Hollis Blanchard <hollisb@us.ibm.com>.
|
|
*
|
|
* This program is free software; you can redistribute it and/or modify
|
|
* it under the terms of the GNU General Public License, version 2, as
|
|
* published by the Free Software Foundation.
|
|
*/
|
|
|
|
#include <linux/kvm_host.h>
|
|
#include <linux/err.h>
|
|
#include <linux/export.h>
|
|
#include <linux/slab.h>
|
|
#include <linux/module.h>
|
|
#include <linux/miscdevice.h>
|
|
|
|
#include <asm/reg.h>
|
|
#include <asm/cputable.h>
|
|
#include <asm/cacheflush.h>
|
|
#include <asm/tlbflush.h>
|
|
#include <asm/uaccess.h>
|
|
#include <asm/io.h>
|
|
#include <asm/kvm_ppc.h>
|
|
#include <asm/kvm_book3s.h>
|
|
#include <asm/mmu_context.h>
|
|
#include <asm/page.h>
|
|
#include <linux/gfp.h>
|
|
#include <linux/sched.h>
|
|
#include <linux/vmalloc.h>
|
|
#include <linux/highmem.h>
|
|
|
|
#include "book3s.h"
|
|
#include "trace.h"
|
|
|
|
#define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
|
|
|
|
/* #define EXIT_DEBUG */
|
|
|
|
struct kvm_stats_debugfs_item debugfs_entries[] = {
|
|
{ "exits", VCPU_STAT(sum_exits) },
|
|
{ "mmio", VCPU_STAT(mmio_exits) },
|
|
{ "sig", VCPU_STAT(signal_exits) },
|
|
{ "sysc", VCPU_STAT(syscall_exits) },
|
|
{ "inst_emu", VCPU_STAT(emulated_inst_exits) },
|
|
{ "dec", VCPU_STAT(dec_exits) },
|
|
{ "ext_intr", VCPU_STAT(ext_intr_exits) },
|
|
{ "queue_intr", VCPU_STAT(queue_intr) },
|
|
{ "halt_successful_poll", VCPU_STAT(halt_successful_poll), },
|
|
{ "halt_wakeup", VCPU_STAT(halt_wakeup) },
|
|
{ "pf_storage", VCPU_STAT(pf_storage) },
|
|
{ "sp_storage", VCPU_STAT(sp_storage) },
|
|
{ "pf_instruc", VCPU_STAT(pf_instruc) },
|
|
{ "sp_instruc", VCPU_STAT(sp_instruc) },
|
|
{ "ld", VCPU_STAT(ld) },
|
|
{ "ld_slow", VCPU_STAT(ld_slow) },
|
|
{ "st", VCPU_STAT(st) },
|
|
{ "st_slow", VCPU_STAT(st_slow) },
|
|
{ NULL }
|
|
};
|
|
|
|
void kvmppc_unfixup_split_real(struct kvm_vcpu *vcpu)
|
|
{
|
|
if (vcpu->arch.hflags & BOOK3S_HFLAG_SPLIT_HACK) {
|
|
ulong pc = kvmppc_get_pc(vcpu);
|
|
if ((pc & SPLIT_HACK_MASK) == SPLIT_HACK_OFFS)
|
|
kvmppc_set_pc(vcpu, pc & ~SPLIT_HACK_MASK);
|
|
vcpu->arch.hflags &= ~BOOK3S_HFLAG_SPLIT_HACK;
|
|
}
|
|
}
|
|
EXPORT_SYMBOL_GPL(kvmppc_unfixup_split_real);
|
|
|
|
static inline unsigned long kvmppc_interrupt_offset(struct kvm_vcpu *vcpu)
|
|
{
|
|
if (!is_kvmppc_hv_enabled(vcpu->kvm))
|
|
return to_book3s(vcpu)->hior;
|
|
return 0;
|
|
}
|
|
|
|
static inline void kvmppc_update_int_pending(struct kvm_vcpu *vcpu,
|
|
unsigned long pending_now, unsigned long old_pending)
|
|
{
|
|
if (is_kvmppc_hv_enabled(vcpu->kvm))
|
|
return;
|
|
if (pending_now)
|
|
kvmppc_set_int_pending(vcpu, 1);
|
|
else if (old_pending)
|
|
kvmppc_set_int_pending(vcpu, 0);
|
|
}
|
|
|
|
static inline bool kvmppc_critical_section(struct kvm_vcpu *vcpu)
|
|
{
|
|
ulong crit_raw;
|
|
ulong crit_r1;
|
|
bool crit;
|
|
|
|
if (is_kvmppc_hv_enabled(vcpu->kvm))
|
|
return false;
|
|
|
|
crit_raw = kvmppc_get_critical(vcpu);
|
|
crit_r1 = kvmppc_get_gpr(vcpu, 1);
|
|
|
|
/* Truncate crit indicators in 32 bit mode */
|
|
if (!(kvmppc_get_msr(vcpu) & MSR_SF)) {
|
|
crit_raw &= 0xffffffff;
|
|
crit_r1 &= 0xffffffff;
|
|
}
|
|
|
|
/* Critical section when crit == r1 */
|
|
crit = (crit_raw == crit_r1);
|
|
/* ... and we're in supervisor mode */
|
|
crit = crit && !(kvmppc_get_msr(vcpu) & MSR_PR);
|
|
|
|
return crit;
|
|
}
|
|
|
|
void kvmppc_inject_interrupt(struct kvm_vcpu *vcpu, int vec, u64 flags)
|
|
{
|
|
kvmppc_unfixup_split_real(vcpu);
|
|
kvmppc_set_srr0(vcpu, kvmppc_get_pc(vcpu));
|
|
kvmppc_set_srr1(vcpu, kvmppc_get_msr(vcpu) | flags);
|
|
kvmppc_set_pc(vcpu, kvmppc_interrupt_offset(vcpu) + vec);
|
|
vcpu->arch.mmu.reset_msr(vcpu);
|
|
}
|
|
|
|
static int kvmppc_book3s_vec2irqprio(unsigned int vec)
|
|
{
|
|
unsigned int prio;
|
|
|
|
switch (vec) {
|
|
case 0x100: prio = BOOK3S_IRQPRIO_SYSTEM_RESET; break;
|
|
case 0x200: prio = BOOK3S_IRQPRIO_MACHINE_CHECK; break;
|
|
case 0x300: prio = BOOK3S_IRQPRIO_DATA_STORAGE; break;
|
|
case 0x380: prio = BOOK3S_IRQPRIO_DATA_SEGMENT; break;
|
|
case 0x400: prio = BOOK3S_IRQPRIO_INST_STORAGE; break;
|
|
case 0x480: prio = BOOK3S_IRQPRIO_INST_SEGMENT; break;
|
|
case 0x500: prio = BOOK3S_IRQPRIO_EXTERNAL; break;
|
|
case 0x501: prio = BOOK3S_IRQPRIO_EXTERNAL_LEVEL; break;
|
|
case 0x600: prio = BOOK3S_IRQPRIO_ALIGNMENT; break;
|
|
case 0x700: prio = BOOK3S_IRQPRIO_PROGRAM; break;
|
|
case 0x800: prio = BOOK3S_IRQPRIO_FP_UNAVAIL; break;
|
|
case 0x900: prio = BOOK3S_IRQPRIO_DECREMENTER; break;
|
|
case 0xc00: prio = BOOK3S_IRQPRIO_SYSCALL; break;
|
|
case 0xd00: prio = BOOK3S_IRQPRIO_DEBUG; break;
|
|
case 0xf20: prio = BOOK3S_IRQPRIO_ALTIVEC; break;
|
|
case 0xf40: prio = BOOK3S_IRQPRIO_VSX; break;
|
|
case 0xf60: prio = BOOK3S_IRQPRIO_FAC_UNAVAIL; break;
|
|
default: prio = BOOK3S_IRQPRIO_MAX; break;
|
|
}
|
|
|
|
return prio;
|
|
}
|
|
|
|
void kvmppc_book3s_dequeue_irqprio(struct kvm_vcpu *vcpu,
|
|
unsigned int vec)
|
|
{
|
|
unsigned long old_pending = vcpu->arch.pending_exceptions;
|
|
|
|
clear_bit(kvmppc_book3s_vec2irqprio(vec),
|
|
&vcpu->arch.pending_exceptions);
|
|
|
|
kvmppc_update_int_pending(vcpu, vcpu->arch.pending_exceptions,
|
|
old_pending);
|
|
}
|
|
|
|
void kvmppc_book3s_queue_irqprio(struct kvm_vcpu *vcpu, unsigned int vec)
|
|
{
|
|
vcpu->stat.queue_intr++;
|
|
|
|
set_bit(kvmppc_book3s_vec2irqprio(vec),
|
|
&vcpu->arch.pending_exceptions);
|
|
#ifdef EXIT_DEBUG
|
|
printk(KERN_INFO "Queueing interrupt %x\n", vec);
|
|
#endif
|
|
}
|
|
EXPORT_SYMBOL_GPL(kvmppc_book3s_queue_irqprio);
|
|
|
|
void kvmppc_core_queue_program(struct kvm_vcpu *vcpu, ulong flags)
|
|
{
|
|
/* might as well deliver this straight away */
|
|
kvmppc_inject_interrupt(vcpu, BOOK3S_INTERRUPT_PROGRAM, flags);
|
|
}
|
|
EXPORT_SYMBOL_GPL(kvmppc_core_queue_program);
|
|
|
|
void kvmppc_core_queue_dec(struct kvm_vcpu *vcpu)
|
|
{
|
|
kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_DECREMENTER);
|
|
}
|
|
EXPORT_SYMBOL_GPL(kvmppc_core_queue_dec);
|
|
|
|
int kvmppc_core_pending_dec(struct kvm_vcpu *vcpu)
|
|
{
|
|
return test_bit(BOOK3S_IRQPRIO_DECREMENTER, &vcpu->arch.pending_exceptions);
|
|
}
|
|
EXPORT_SYMBOL_GPL(kvmppc_core_pending_dec);
|
|
|
|
void kvmppc_core_dequeue_dec(struct kvm_vcpu *vcpu)
|
|
{
|
|
kvmppc_book3s_dequeue_irqprio(vcpu, BOOK3S_INTERRUPT_DECREMENTER);
|
|
}
|
|
EXPORT_SYMBOL_GPL(kvmppc_core_dequeue_dec);
|
|
|
|
void kvmppc_core_queue_external(struct kvm_vcpu *vcpu,
|
|
struct kvm_interrupt *irq)
|
|
{
|
|
unsigned int vec = BOOK3S_INTERRUPT_EXTERNAL;
|
|
|
|
if (irq->irq == KVM_INTERRUPT_SET_LEVEL)
|
|
vec = BOOK3S_INTERRUPT_EXTERNAL_LEVEL;
|
|
|
|
kvmppc_book3s_queue_irqprio(vcpu, vec);
|
|
}
|
|
|
|
void kvmppc_core_dequeue_external(struct kvm_vcpu *vcpu)
|
|
{
|
|
kvmppc_book3s_dequeue_irqprio(vcpu, BOOK3S_INTERRUPT_EXTERNAL);
|
|
kvmppc_book3s_dequeue_irqprio(vcpu, BOOK3S_INTERRUPT_EXTERNAL_LEVEL);
|
|
}
|
|
|
|
void kvmppc_core_queue_data_storage(struct kvm_vcpu *vcpu, ulong dar,
|
|
ulong flags)
|
|
{
|
|
kvmppc_set_dar(vcpu, dar);
|
|
kvmppc_set_dsisr(vcpu, flags);
|
|
kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_DATA_STORAGE);
|
|
}
|
|
|
|
void kvmppc_core_queue_inst_storage(struct kvm_vcpu *vcpu, ulong flags)
|
|
{
|
|
u64 msr = kvmppc_get_msr(vcpu);
|
|
msr &= ~(SRR1_ISI_NOPT | SRR1_ISI_N_OR_G | SRR1_ISI_PROT);
|
|
msr |= flags & (SRR1_ISI_NOPT | SRR1_ISI_N_OR_G | SRR1_ISI_PROT);
|
|
kvmppc_set_msr_fast(vcpu, msr);
|
|
kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_INST_STORAGE);
|
|
}
|
|
|
|
int kvmppc_book3s_irqprio_deliver(struct kvm_vcpu *vcpu, unsigned int priority)
|
|
{
|
|
int deliver = 1;
|
|
int vec = 0;
|
|
bool crit = kvmppc_critical_section(vcpu);
|
|
|
|
switch (priority) {
|
|
case BOOK3S_IRQPRIO_DECREMENTER:
|
|
deliver = (kvmppc_get_msr(vcpu) & MSR_EE) && !crit;
|
|
vec = BOOK3S_INTERRUPT_DECREMENTER;
|
|
break;
|
|
case BOOK3S_IRQPRIO_EXTERNAL:
|
|
case BOOK3S_IRQPRIO_EXTERNAL_LEVEL:
|
|
deliver = (kvmppc_get_msr(vcpu) & MSR_EE) && !crit;
|
|
vec = BOOK3S_INTERRUPT_EXTERNAL;
|
|
break;
|
|
case BOOK3S_IRQPRIO_SYSTEM_RESET:
|
|
vec = BOOK3S_INTERRUPT_SYSTEM_RESET;
|
|
break;
|
|
case BOOK3S_IRQPRIO_MACHINE_CHECK:
|
|
vec = BOOK3S_INTERRUPT_MACHINE_CHECK;
|
|
break;
|
|
case BOOK3S_IRQPRIO_DATA_STORAGE:
|
|
vec = BOOK3S_INTERRUPT_DATA_STORAGE;
|
|
break;
|
|
case BOOK3S_IRQPRIO_INST_STORAGE:
|
|
vec = BOOK3S_INTERRUPT_INST_STORAGE;
|
|
break;
|
|
case BOOK3S_IRQPRIO_DATA_SEGMENT:
|
|
vec = BOOK3S_INTERRUPT_DATA_SEGMENT;
|
|
break;
|
|
case BOOK3S_IRQPRIO_INST_SEGMENT:
|
|
vec = BOOK3S_INTERRUPT_INST_SEGMENT;
|
|
break;
|
|
case BOOK3S_IRQPRIO_ALIGNMENT:
|
|
vec = BOOK3S_INTERRUPT_ALIGNMENT;
|
|
break;
|
|
case BOOK3S_IRQPRIO_PROGRAM:
|
|
vec = BOOK3S_INTERRUPT_PROGRAM;
|
|
break;
|
|
case BOOK3S_IRQPRIO_VSX:
|
|
vec = BOOK3S_INTERRUPT_VSX;
|
|
break;
|
|
case BOOK3S_IRQPRIO_ALTIVEC:
|
|
vec = BOOK3S_INTERRUPT_ALTIVEC;
|
|
break;
|
|
case BOOK3S_IRQPRIO_FP_UNAVAIL:
|
|
vec = BOOK3S_INTERRUPT_FP_UNAVAIL;
|
|
break;
|
|
case BOOK3S_IRQPRIO_SYSCALL:
|
|
vec = BOOK3S_INTERRUPT_SYSCALL;
|
|
break;
|
|
case BOOK3S_IRQPRIO_DEBUG:
|
|
vec = BOOK3S_INTERRUPT_TRACE;
|
|
break;
|
|
case BOOK3S_IRQPRIO_PERFORMANCE_MONITOR:
|
|
vec = BOOK3S_INTERRUPT_PERFMON;
|
|
break;
|
|
case BOOK3S_IRQPRIO_FAC_UNAVAIL:
|
|
vec = BOOK3S_INTERRUPT_FAC_UNAVAIL;
|
|
break;
|
|
default:
|
|
deliver = 0;
|
|
printk(KERN_ERR "KVM: Unknown interrupt: 0x%x\n", priority);
|
|
break;
|
|
}
|
|
|
|
#if 0
|
|
printk(KERN_INFO "Deliver interrupt 0x%x? %x\n", vec, deliver);
|
|
#endif
|
|
|
|
if (deliver)
|
|
kvmppc_inject_interrupt(vcpu, vec, 0);
|
|
|
|
return deliver;
|
|
}
|
|
|
|
/*
|
|
* This function determines if an irqprio should be cleared once issued.
|
|
*/
|
|
static bool clear_irqprio(struct kvm_vcpu *vcpu, unsigned int priority)
|
|
{
|
|
switch (priority) {
|
|
case BOOK3S_IRQPRIO_DECREMENTER:
|
|
/* DEC interrupts get cleared by mtdec */
|
|
return false;
|
|
case BOOK3S_IRQPRIO_EXTERNAL_LEVEL:
|
|
/* External interrupts get cleared by userspace */
|
|
return false;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
int kvmppc_core_prepare_to_enter(struct kvm_vcpu *vcpu)
|
|
{
|
|
unsigned long *pending = &vcpu->arch.pending_exceptions;
|
|
unsigned long old_pending = vcpu->arch.pending_exceptions;
|
|
unsigned int priority;
|
|
|
|
#ifdef EXIT_DEBUG
|
|
if (vcpu->arch.pending_exceptions)
|
|
printk(KERN_EMERG "KVM: Check pending: %lx\n", vcpu->arch.pending_exceptions);
|
|
#endif
|
|
priority = __ffs(*pending);
|
|
while (priority < BOOK3S_IRQPRIO_MAX) {
|
|
if (kvmppc_book3s_irqprio_deliver(vcpu, priority) &&
|
|
clear_irqprio(vcpu, priority)) {
|
|
clear_bit(priority, &vcpu->arch.pending_exceptions);
|
|
break;
|
|
}
|
|
|
|
priority = find_next_bit(pending,
|
|
BITS_PER_BYTE * sizeof(*pending),
|
|
priority + 1);
|
|
}
|
|
|
|
/* Tell the guest about our interrupt status */
|
|
kvmppc_update_int_pending(vcpu, *pending, old_pending);
|
|
|
|
return 0;
|
|
}
|
|
EXPORT_SYMBOL_GPL(kvmppc_core_prepare_to_enter);
|
|
|
|
pfn_t kvmppc_gpa_to_pfn(struct kvm_vcpu *vcpu, gpa_t gpa, bool writing,
|
|
bool *writable)
|
|
{
|
|
ulong mp_pa = vcpu->arch.magic_page_pa & KVM_PAM;
|
|
gfn_t gfn = gpa >> PAGE_SHIFT;
|
|
|
|
if (!(kvmppc_get_msr(vcpu) & MSR_SF))
|
|
mp_pa = (uint32_t)mp_pa;
|
|
|
|
/* Magic page override */
|
|
gpa &= ~0xFFFULL;
|
|
if (unlikely(mp_pa) && unlikely((gpa & KVM_PAM) == mp_pa)) {
|
|
ulong shared_page = ((ulong)vcpu->arch.shared) & PAGE_MASK;
|
|
pfn_t pfn;
|
|
|
|
pfn = (pfn_t)virt_to_phys((void*)shared_page) >> PAGE_SHIFT;
|
|
get_page(pfn_to_page(pfn));
|
|
if (writable)
|
|
*writable = true;
|
|
return pfn;
|
|
}
|
|
|
|
return gfn_to_pfn_prot(vcpu->kvm, gfn, writing, writable);
|
|
}
|
|
EXPORT_SYMBOL_GPL(kvmppc_gpa_to_pfn);
|
|
|
|
int kvmppc_xlate(struct kvm_vcpu *vcpu, ulong eaddr, enum xlate_instdata xlid,
|
|
enum xlate_readwrite xlrw, struct kvmppc_pte *pte)
|
|
{
|
|
bool data = (xlid == XLATE_DATA);
|
|
bool iswrite = (xlrw == XLATE_WRITE);
|
|
int relocated = (kvmppc_get_msr(vcpu) & (data ? MSR_DR : MSR_IR));
|
|
int r;
|
|
|
|
if (relocated) {
|
|
r = vcpu->arch.mmu.xlate(vcpu, eaddr, pte, data, iswrite);
|
|
} else {
|
|
pte->eaddr = eaddr;
|
|
pte->raddr = eaddr & KVM_PAM;
|
|
pte->vpage = VSID_REAL | eaddr >> 12;
|
|
pte->may_read = true;
|
|
pte->may_write = true;
|
|
pte->may_execute = true;
|
|
r = 0;
|
|
|
|
if ((kvmppc_get_msr(vcpu) & (MSR_IR | MSR_DR)) == MSR_DR &&
|
|
!data) {
|
|
if ((vcpu->arch.hflags & BOOK3S_HFLAG_SPLIT_HACK) &&
|
|
((eaddr & SPLIT_HACK_MASK) == SPLIT_HACK_OFFS))
|
|
pte->raddr &= ~SPLIT_HACK_MASK;
|
|
}
|
|
}
|
|
|
|
return r;
|
|
}
|
|
|
|
int kvmppc_load_last_inst(struct kvm_vcpu *vcpu, enum instruction_type type,
|
|
u32 *inst)
|
|
{
|
|
ulong pc = kvmppc_get_pc(vcpu);
|
|
int r;
|
|
|
|
if (type == INST_SC)
|
|
pc -= 4;
|
|
|
|
r = kvmppc_ld(vcpu, &pc, sizeof(u32), inst, false);
|
|
if (r == EMULATE_DONE)
|
|
return r;
|
|
else
|
|
return EMULATE_AGAIN;
|
|
}
|
|
EXPORT_SYMBOL_GPL(kvmppc_load_last_inst);
|
|
|
|
int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
|
|
{
|
|
return 0;
|
|
}
|
|
|
|
int kvmppc_subarch_vcpu_init(struct kvm_vcpu *vcpu)
|
|
{
|
|
return 0;
|
|
}
|
|
|
|
void kvmppc_subarch_vcpu_uninit(struct kvm_vcpu *vcpu)
|
|
{
|
|
}
|
|
|
|
int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
|
|
struct kvm_sregs *sregs)
|
|
{
|
|
return vcpu->kvm->arch.kvm_ops->get_sregs(vcpu, sregs);
|
|
}
|
|
|
|
int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
|
|
struct kvm_sregs *sregs)
|
|
{
|
|
return vcpu->kvm->arch.kvm_ops->set_sregs(vcpu, sregs);
|
|
}
|
|
|
|
int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
|
|
{
|
|
int i;
|
|
|
|
regs->pc = kvmppc_get_pc(vcpu);
|
|
regs->cr = kvmppc_get_cr(vcpu);
|
|
regs->ctr = kvmppc_get_ctr(vcpu);
|
|
regs->lr = kvmppc_get_lr(vcpu);
|
|
regs->xer = kvmppc_get_xer(vcpu);
|
|
regs->msr = kvmppc_get_msr(vcpu);
|
|
regs->srr0 = kvmppc_get_srr0(vcpu);
|
|
regs->srr1 = kvmppc_get_srr1(vcpu);
|
|
regs->pid = vcpu->arch.pid;
|
|
regs->sprg0 = kvmppc_get_sprg0(vcpu);
|
|
regs->sprg1 = kvmppc_get_sprg1(vcpu);
|
|
regs->sprg2 = kvmppc_get_sprg2(vcpu);
|
|
regs->sprg3 = kvmppc_get_sprg3(vcpu);
|
|
regs->sprg4 = kvmppc_get_sprg4(vcpu);
|
|
regs->sprg5 = kvmppc_get_sprg5(vcpu);
|
|
regs->sprg6 = kvmppc_get_sprg6(vcpu);
|
|
regs->sprg7 = kvmppc_get_sprg7(vcpu);
|
|
|
|
for (i = 0; i < ARRAY_SIZE(regs->gpr); i++)
|
|
regs->gpr[i] = kvmppc_get_gpr(vcpu, i);
|
|
|
|
return 0;
|
|
}
|
|
|
|
int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
|
|
{
|
|
int i;
|
|
|
|
kvmppc_set_pc(vcpu, regs->pc);
|
|
kvmppc_set_cr(vcpu, regs->cr);
|
|
kvmppc_set_ctr(vcpu, regs->ctr);
|
|
kvmppc_set_lr(vcpu, regs->lr);
|
|
kvmppc_set_xer(vcpu, regs->xer);
|
|
kvmppc_set_msr(vcpu, regs->msr);
|
|
kvmppc_set_srr0(vcpu, regs->srr0);
|
|
kvmppc_set_srr1(vcpu, regs->srr1);
|
|
kvmppc_set_sprg0(vcpu, regs->sprg0);
|
|
kvmppc_set_sprg1(vcpu, regs->sprg1);
|
|
kvmppc_set_sprg2(vcpu, regs->sprg2);
|
|
kvmppc_set_sprg3(vcpu, regs->sprg3);
|
|
kvmppc_set_sprg4(vcpu, regs->sprg4);
|
|
kvmppc_set_sprg5(vcpu, regs->sprg5);
|
|
kvmppc_set_sprg6(vcpu, regs->sprg6);
|
|
kvmppc_set_sprg7(vcpu, regs->sprg7);
|
|
|
|
for (i = 0; i < ARRAY_SIZE(regs->gpr); i++)
|
|
kvmppc_set_gpr(vcpu, i, regs->gpr[i]);
|
|
|
|
return 0;
|
|
}
|
|
|
|
int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
|
|
{
|
|
return -ENOTSUPP;
|
|
}
|
|
|
|
int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
|
|
{
|
|
return -ENOTSUPP;
|
|
}
|
|
|
|
int kvmppc_get_one_reg(struct kvm_vcpu *vcpu, u64 id,
|
|
union kvmppc_one_reg *val)
|
|
{
|
|
int r = 0;
|
|
long int i;
|
|
|
|
r = vcpu->kvm->arch.kvm_ops->get_one_reg(vcpu, id, val);
|
|
if (r == -EINVAL) {
|
|
r = 0;
|
|
switch (id) {
|
|
case KVM_REG_PPC_DAR:
|
|
*val = get_reg_val(id, kvmppc_get_dar(vcpu));
|
|
break;
|
|
case KVM_REG_PPC_DSISR:
|
|
*val = get_reg_val(id, kvmppc_get_dsisr(vcpu));
|
|
break;
|
|
case KVM_REG_PPC_FPR0 ... KVM_REG_PPC_FPR31:
|
|
i = id - KVM_REG_PPC_FPR0;
|
|
*val = get_reg_val(id, VCPU_FPR(vcpu, i));
|
|
break;
|
|
case KVM_REG_PPC_FPSCR:
|
|
*val = get_reg_val(id, vcpu->arch.fp.fpscr);
|
|
break;
|
|
#ifdef CONFIG_VSX
|
|
case KVM_REG_PPC_VSR0 ... KVM_REG_PPC_VSR31:
|
|
if (cpu_has_feature(CPU_FTR_VSX)) {
|
|
i = id - KVM_REG_PPC_VSR0;
|
|
val->vsxval[0] = vcpu->arch.fp.fpr[i][0];
|
|
val->vsxval[1] = vcpu->arch.fp.fpr[i][1];
|
|
} else {
|
|
r = -ENXIO;
|
|
}
|
|
break;
|
|
#endif /* CONFIG_VSX */
|
|
case KVM_REG_PPC_DEBUG_INST:
|
|
*val = get_reg_val(id, INS_TW);
|
|
break;
|
|
#ifdef CONFIG_KVM_XICS
|
|
case KVM_REG_PPC_ICP_STATE:
|
|
if (!vcpu->arch.icp) {
|
|
r = -ENXIO;
|
|
break;
|
|
}
|
|
*val = get_reg_val(id, kvmppc_xics_get_icp(vcpu));
|
|
break;
|
|
#endif /* CONFIG_KVM_XICS */
|
|
case KVM_REG_PPC_FSCR:
|
|
*val = get_reg_val(id, vcpu->arch.fscr);
|
|
break;
|
|
case KVM_REG_PPC_TAR:
|
|
*val = get_reg_val(id, vcpu->arch.tar);
|
|
break;
|
|
case KVM_REG_PPC_EBBHR:
|
|
*val = get_reg_val(id, vcpu->arch.ebbhr);
|
|
break;
|
|
case KVM_REG_PPC_EBBRR:
|
|
*val = get_reg_val(id, vcpu->arch.ebbrr);
|
|
break;
|
|
case KVM_REG_PPC_BESCR:
|
|
*val = get_reg_val(id, vcpu->arch.bescr);
|
|
break;
|
|
case KVM_REG_PPC_VTB:
|
|
*val = get_reg_val(id, vcpu->arch.vtb);
|
|
break;
|
|
case KVM_REG_PPC_IC:
|
|
*val = get_reg_val(id, vcpu->arch.ic);
|
|
break;
|
|
default:
|
|
r = -EINVAL;
|
|
break;
|
|
}
|
|
}
|
|
|
|
return r;
|
|
}
|
|
|
|
int kvmppc_set_one_reg(struct kvm_vcpu *vcpu, u64 id,
|
|
union kvmppc_one_reg *val)
|
|
{
|
|
int r = 0;
|
|
long int i;
|
|
|
|
r = vcpu->kvm->arch.kvm_ops->set_one_reg(vcpu, id, val);
|
|
if (r == -EINVAL) {
|
|
r = 0;
|
|
switch (id) {
|
|
case KVM_REG_PPC_DAR:
|
|
kvmppc_set_dar(vcpu, set_reg_val(id, *val));
|
|
break;
|
|
case KVM_REG_PPC_DSISR:
|
|
kvmppc_set_dsisr(vcpu, set_reg_val(id, *val));
|
|
break;
|
|
case KVM_REG_PPC_FPR0 ... KVM_REG_PPC_FPR31:
|
|
i = id - KVM_REG_PPC_FPR0;
|
|
VCPU_FPR(vcpu, i) = set_reg_val(id, *val);
|
|
break;
|
|
case KVM_REG_PPC_FPSCR:
|
|
vcpu->arch.fp.fpscr = set_reg_val(id, *val);
|
|
break;
|
|
#ifdef CONFIG_VSX
|
|
case KVM_REG_PPC_VSR0 ... KVM_REG_PPC_VSR31:
|
|
if (cpu_has_feature(CPU_FTR_VSX)) {
|
|
i = id - KVM_REG_PPC_VSR0;
|
|
vcpu->arch.fp.fpr[i][0] = val->vsxval[0];
|
|
vcpu->arch.fp.fpr[i][1] = val->vsxval[1];
|
|
} else {
|
|
r = -ENXIO;
|
|
}
|
|
break;
|
|
#endif /* CONFIG_VSX */
|
|
#ifdef CONFIG_KVM_XICS
|
|
case KVM_REG_PPC_ICP_STATE:
|
|
if (!vcpu->arch.icp) {
|
|
r = -ENXIO;
|
|
break;
|
|
}
|
|
r = kvmppc_xics_set_icp(vcpu,
|
|
set_reg_val(id, *val));
|
|
break;
|
|
#endif /* CONFIG_KVM_XICS */
|
|
case KVM_REG_PPC_FSCR:
|
|
vcpu->arch.fscr = set_reg_val(id, *val);
|
|
break;
|
|
case KVM_REG_PPC_TAR:
|
|
vcpu->arch.tar = set_reg_val(id, *val);
|
|
break;
|
|
case KVM_REG_PPC_EBBHR:
|
|
vcpu->arch.ebbhr = set_reg_val(id, *val);
|
|
break;
|
|
case KVM_REG_PPC_EBBRR:
|
|
vcpu->arch.ebbrr = set_reg_val(id, *val);
|
|
break;
|
|
case KVM_REG_PPC_BESCR:
|
|
vcpu->arch.bescr = set_reg_val(id, *val);
|
|
break;
|
|
case KVM_REG_PPC_VTB:
|
|
vcpu->arch.vtb = set_reg_val(id, *val);
|
|
break;
|
|
case KVM_REG_PPC_IC:
|
|
vcpu->arch.ic = set_reg_val(id, *val);
|
|
break;
|
|
default:
|
|
r = -EINVAL;
|
|
break;
|
|
}
|
|
}
|
|
|
|
return r;
|
|
}
|
|
|
|
void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
|
|
{
|
|
vcpu->kvm->arch.kvm_ops->vcpu_load(vcpu, cpu);
|
|
}
|
|
|
|
void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu)
|
|
{
|
|
vcpu->kvm->arch.kvm_ops->vcpu_put(vcpu);
|
|
}
|
|
|
|
void kvmppc_set_msr(struct kvm_vcpu *vcpu, u64 msr)
|
|
{
|
|
vcpu->kvm->arch.kvm_ops->set_msr(vcpu, msr);
|
|
}
|
|
EXPORT_SYMBOL_GPL(kvmppc_set_msr);
|
|
|
|
int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
|
|
{
|
|
return vcpu->kvm->arch.kvm_ops->vcpu_run(kvm_run, vcpu);
|
|
}
|
|
|
|
int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
|
|
struct kvm_translation *tr)
|
|
{
|
|
return 0;
|
|
}
|
|
|
|
int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
|
|
struct kvm_guest_debug *dbg)
|
|
{
|
|
vcpu->guest_debug = dbg->control;
|
|
return 0;
|
|
}
|
|
|
|
void kvmppc_decrementer_func(struct kvm_vcpu *vcpu)
|
|
{
|
|
kvmppc_core_queue_dec(vcpu);
|
|
kvm_vcpu_kick(vcpu);
|
|
}
|
|
|
|
struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
|
|
{
|
|
return kvm->arch.kvm_ops->vcpu_create(kvm, id);
|
|
}
|
|
|
|
void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu)
|
|
{
|
|
vcpu->kvm->arch.kvm_ops->vcpu_free(vcpu);
|
|
}
|
|
|
|
int kvmppc_core_check_requests(struct kvm_vcpu *vcpu)
|
|
{
|
|
return vcpu->kvm->arch.kvm_ops->check_requests(vcpu);
|
|
}
|
|
|
|
int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log)
|
|
{
|
|
return kvm->arch.kvm_ops->get_dirty_log(kvm, log);
|
|
}
|
|
|
|
void kvmppc_core_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free,
|
|
struct kvm_memory_slot *dont)
|
|
{
|
|
kvm->arch.kvm_ops->free_memslot(free, dont);
|
|
}
|
|
|
|
int kvmppc_core_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
|
|
unsigned long npages)
|
|
{
|
|
return kvm->arch.kvm_ops->create_memslot(slot, npages);
|
|
}
|
|
|
|
void kvmppc_core_flush_memslot(struct kvm *kvm, struct kvm_memory_slot *memslot)
|
|
{
|
|
kvm->arch.kvm_ops->flush_memslot(kvm, memslot);
|
|
}
|
|
|
|
int kvmppc_core_prepare_memory_region(struct kvm *kvm,
|
|
struct kvm_memory_slot *memslot,
|
|
struct kvm_userspace_memory_region *mem)
|
|
{
|
|
return kvm->arch.kvm_ops->prepare_memory_region(kvm, memslot, mem);
|
|
}
|
|
|
|
void kvmppc_core_commit_memory_region(struct kvm *kvm,
|
|
struct kvm_userspace_memory_region *mem,
|
|
const struct kvm_memory_slot *old)
|
|
{
|
|
kvm->arch.kvm_ops->commit_memory_region(kvm, mem, old);
|
|
}
|
|
|
|
int kvm_unmap_hva(struct kvm *kvm, unsigned long hva)
|
|
{
|
|
return kvm->arch.kvm_ops->unmap_hva(kvm, hva);
|
|
}
|
|
EXPORT_SYMBOL_GPL(kvm_unmap_hva);
|
|
|
|
int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end)
|
|
{
|
|
return kvm->arch.kvm_ops->unmap_hva_range(kvm, start, end);
|
|
}
|
|
|
|
int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end)
|
|
{
|
|
return kvm->arch.kvm_ops->age_hva(kvm, start, end);
|
|
}
|
|
|
|
int kvm_test_age_hva(struct kvm *kvm, unsigned long hva)
|
|
{
|
|
return kvm->arch.kvm_ops->test_age_hva(kvm, hva);
|
|
}
|
|
|
|
void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte)
|
|
{
|
|
kvm->arch.kvm_ops->set_spte_hva(kvm, hva, pte);
|
|
}
|
|
|
|
void kvmppc_mmu_destroy(struct kvm_vcpu *vcpu)
|
|
{
|
|
vcpu->kvm->arch.kvm_ops->mmu_destroy(vcpu);
|
|
}
|
|
|
|
int kvmppc_core_init_vm(struct kvm *kvm)
|
|
{
|
|
|
|
#ifdef CONFIG_PPC64
|
|
INIT_LIST_HEAD(&kvm->arch.spapr_tce_tables);
|
|
INIT_LIST_HEAD(&kvm->arch.rtas_tokens);
|
|
#endif
|
|
|
|
return kvm->arch.kvm_ops->init_vm(kvm);
|
|
}
|
|
|
|
void kvmppc_core_destroy_vm(struct kvm *kvm)
|
|
{
|
|
kvm->arch.kvm_ops->destroy_vm(kvm);
|
|
|
|
#ifdef CONFIG_PPC64
|
|
kvmppc_rtas_tokens_free(kvm);
|
|
WARN_ON(!list_empty(&kvm->arch.spapr_tce_tables));
|
|
#endif
|
|
}
|
|
|
|
int kvmppc_core_check_processor_compat(void)
|
|
{
|
|
/*
|
|
* We always return 0 for book3s. We check
|
|
* for compatability while loading the HV
|
|
* or PR module
|
|
*/
|
|
return 0;
|
|
}
|
|
|
|
int kvmppc_book3s_hcall_implemented(struct kvm *kvm, unsigned long hcall)
|
|
{
|
|
return kvm->arch.kvm_ops->hcall_implemented(hcall);
|
|
}
|
|
|
|
static int kvmppc_book3s_init(void)
|
|
{
|
|
int r;
|
|
|
|
r = kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
|
|
if (r)
|
|
return r;
|
|
#ifdef CONFIG_KVM_BOOK3S_32_HANDLER
|
|
r = kvmppc_book3s_init_pr();
|
|
#endif
|
|
return r;
|
|
|
|
}
|
|
|
|
static void kvmppc_book3s_exit(void)
|
|
{
|
|
#ifdef CONFIG_KVM_BOOK3S_32_HANDLER
|
|
kvmppc_book3s_exit_pr();
|
|
#endif
|
|
kvm_exit();
|
|
}
|
|
|
|
module_init(kvmppc_book3s_init);
|
|
module_exit(kvmppc_book3s_exit);
|
|
|
|
/* On 32bit this is our one and only kernel module */
|
|
#ifdef CONFIG_KVM_BOOK3S_32_HANDLER
|
|
MODULE_ALIAS_MISCDEV(KVM_MINOR);
|
|
MODULE_ALIAS("devname:kvm");
|
|
#endif
|