2009-10-30 12:47:06 +07:00
|
|
|
/*
|
|
|
|
* This program is free software; you can redistribute it and/or modify
|
|
|
|
* it under the terms of the GNU General Public License, version 2, as
|
|
|
|
* published by the Free Software Foundation.
|
|
|
|
*
|
|
|
|
* This program is distributed in the hope that it will be useful,
|
|
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
* GNU General Public License for more details.
|
|
|
|
*
|
|
|
|
* You should have received a copy of the GNU General Public License
|
|
|
|
* along with this program; if not, write to the Free Software
|
|
|
|
* Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
|
|
|
*
|
|
|
|
* Copyright SUSE Linux Products GmbH 2009
|
|
|
|
*
|
|
|
|
* Authors: Alexander Graf <agraf@suse.de>
|
|
|
|
*/
|
|
|
|
|
|
|
|
#ifndef __ASM_KVM_BOOK3S_ASM_H__
|
|
|
|
#define __ASM_KVM_BOOK3S_ASM_H__
|
|
|
|
|
2013-04-18 03:30:50 +07:00
|
|
|
/* XICS ICP register offsets */
|
|
|
|
#define XICS_XIRR 4
|
|
|
|
#define XICS_MFRR 0xc
|
|
|
|
#define XICS_IPI 2 /* interrupt source # for IPIs */
|
|
|
|
|
2018-10-08 12:31:03 +07:00
|
|
|
/* LPIDs we support with this build -- runtime limit may be lower */
|
|
|
|
#define KVMPPC_NR_LPIDS (LPID_RSVD + 1)
|
|
|
|
|
KVM: PPC: Book3S HV: Implement dynamic micro-threading on POWER8
This builds on the ability to run more than one vcore on a physical
core by using the micro-threading (split-core) modes of the POWER8
chip. Previously, only vcores from the same VM could be run together,
and (on POWER8) only if they had just one thread per core. With the
ability to split the core on guest entry and unsplit it on guest exit,
we can run up to 8 vcpu threads from up to 4 different VMs, and we can
run multiple vcores with 2 or 4 vcpus per vcore.
Dynamic micro-threading is only available if the static configuration
of the cores is whole-core mode (unsplit), and only on POWER8.
To manage this, we introduce a new kvm_split_mode struct which is
shared across all of the subcores in the core, with a pointer in the
paca on each thread. In addition we extend the core_info struct to
have information on each subcore. When deciding whether to add a
vcore to the set already on the core, we now have two possibilities:
(a) piggyback the vcore onto an existing subcore, or (b) start a new
subcore.
Currently, when any vcpu needs to exit the guest and switch to host
virtual mode, we interrupt all the threads in all subcores and switch
the core back to whole-core mode. It may be possible in future to
allow some of the subcores to keep executing in the guest while
subcore 0 switches to the host, but that is not implemented in this
patch.
This adds a module parameter called dynamic_mt_modes which controls
which micro-threading (split-core) modes the code will consider, as a
bitmap. In other words, if it is 0, no micro-threading mode is
considered; if it is 2, only 2-way micro-threading is considered; if
it is 4, only 4-way, and if it is 6, both 2-way and 4-way
micro-threading mode will be considered. The default is 6.
With this, we now have secondary threads which are the primary thread
for their subcore and therefore need to do the MMU switch. These
threads will need to be started even if they have no vcpu to run, so
we use the vcore pointer in the PACA rather than the vcpu pointer to
trigger them.
It is now possible for thread 0 to find that an exit has been
requested before it gets to switch the subcore state to the guest. In
that case we haven't added the guest's timebase offset to the
timebase, so we need to be careful not to subtract the offset in the
guest exit path. In fact we just skip the whole path that switches
back to host context, since we haven't switched to the guest context.
Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Alexander Graf <agraf@suse.de>
2015-07-02 17:38:16 +07:00
|
|
|
/* Maximum number of threads per physical core */
|
|
|
|
#define MAX_SMT_THREADS 8
|
|
|
|
|
|
|
|
/* Maximum number of subcores per physical core */
|
|
|
|
#define MAX_SUBCORES 4
|
|
|
|
|
2010-01-08 08:58:03 +07:00
|
|
|
#ifdef __ASSEMBLY__
|
|
|
|
|
2010-04-16 05:11:41 +07:00
|
|
|
#ifdef CONFIG_KVM_BOOK3S_HANDLER
|
2009-10-30 12:47:06 +07:00
|
|
|
|
|
|
|
#include <asm/kvm_asm.h>
|
|
|
|
|
|
|
|
.macro DO_KVM intno
|
|
|
|
.if (\intno == BOOK3S_INTERRUPT_SYSTEM_RESET) || \
|
|
|
|
(\intno == BOOK3S_INTERRUPT_MACHINE_CHECK) || \
|
|
|
|
(\intno == BOOK3S_INTERRUPT_DATA_STORAGE) || \
|
|
|
|
(\intno == BOOK3S_INTERRUPT_INST_STORAGE) || \
|
|
|
|
(\intno == BOOK3S_INTERRUPT_DATA_SEGMENT) || \
|
|
|
|
(\intno == BOOK3S_INTERRUPT_INST_SEGMENT) || \
|
|
|
|
(\intno == BOOK3S_INTERRUPT_EXTERNAL) || \
|
2011-04-05 11:20:31 +07:00
|
|
|
(\intno == BOOK3S_INTERRUPT_EXTERNAL_HV) || \
|
2009-10-30 12:47:06 +07:00
|
|
|
(\intno == BOOK3S_INTERRUPT_ALIGNMENT) || \
|
|
|
|
(\intno == BOOK3S_INTERRUPT_PROGRAM) || \
|
|
|
|
(\intno == BOOK3S_INTERRUPT_FP_UNAVAIL) || \
|
|
|
|
(\intno == BOOK3S_INTERRUPT_DECREMENTER) || \
|
|
|
|
(\intno == BOOK3S_INTERRUPT_SYSCALL) || \
|
|
|
|
(\intno == BOOK3S_INTERRUPT_TRACE) || \
|
|
|
|
(\intno == BOOK3S_INTERRUPT_PERFMON) || \
|
|
|
|
(\intno == BOOK3S_INTERRUPT_ALTIVEC) || \
|
|
|
|
(\intno == BOOK3S_INTERRUPT_VSX)
|
|
|
|
|
|
|
|
b kvmppc_trampoline_\intno
|
|
|
|
kvmppc_resume_\intno:
|
|
|
|
|
|
|
|
.endif
|
|
|
|
.endm
|
|
|
|
|
|
|
|
#else
|
|
|
|
|
|
|
|
.macro DO_KVM intno
|
|
|
|
.endm
|
|
|
|
|
2010-04-16 05:11:41 +07:00
|
|
|
#endif /* CONFIG_KVM_BOOK3S_HANDLER */
|
2009-10-30 12:47:06 +07:00
|
|
|
|
2010-01-08 08:58:03 +07:00
|
|
|
#else /*__ASSEMBLY__ */
|
|
|
|
|
KVM: PPC: Book3S HV: Implement dynamic micro-threading on POWER8
This builds on the ability to run more than one vcore on a physical
core by using the micro-threading (split-core) modes of the POWER8
chip. Previously, only vcores from the same VM could be run together,
and (on POWER8) only if they had just one thread per core. With the
ability to split the core on guest entry and unsplit it on guest exit,
we can run up to 8 vcpu threads from up to 4 different VMs, and we can
run multiple vcores with 2 or 4 vcpus per vcore.
Dynamic micro-threading is only available if the static configuration
of the cores is whole-core mode (unsplit), and only on POWER8.
To manage this, we introduce a new kvm_split_mode struct which is
shared across all of the subcores in the core, with a pointer in the
paca on each thread. In addition we extend the core_info struct to
have information on each subcore. When deciding whether to add a
vcore to the set already on the core, we now have two possibilities:
(a) piggyback the vcore onto an existing subcore, or (b) start a new
subcore.
Currently, when any vcpu needs to exit the guest and switch to host
virtual mode, we interrupt all the threads in all subcores and switch
the core back to whole-core mode. It may be possible in future to
allow some of the subcores to keep executing in the guest while
subcore 0 switches to the host, but that is not implemented in this
patch.
This adds a module parameter called dynamic_mt_modes which controls
which micro-threading (split-core) modes the code will consider, as a
bitmap. In other words, if it is 0, no micro-threading mode is
considered; if it is 2, only 2-way micro-threading is considered; if
it is 4, only 4-way, and if it is 6, both 2-way and 4-way
micro-threading mode will be considered. The default is 6.
With this, we now have secondary threads which are the primary thread
for their subcore and therefore need to do the MMU switch. These
threads will need to be started even if they have no vcpu to run, so
we use the vcore pointer in the PACA rather than the vcpu pointer to
trigger them.
It is now possible for thread 0 to find that an exit has been
requested before it gets to switch the subcore state to the guest. In
that case we haven't added the guest's timebase offset to the
timebase, so we need to be careful not to subtract the offset in the
guest exit path. In fact we just skip the whole path that switches
back to host context, since we haven't switched to the guest context.
Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Alexander Graf <agraf@suse.de>
2015-07-02 17:38:16 +07:00
|
|
|
struct kvmppc_vcore;
|
|
|
|
|
|
|
|
/* Struct used for coordinating micro-threading (split-core) mode changes */
|
|
|
|
struct kvm_split_mode {
|
|
|
|
unsigned long rpr;
|
|
|
|
unsigned long pmmar;
|
|
|
|
unsigned long ldbar;
|
|
|
|
u8 subcore_size;
|
|
|
|
u8 do_nap;
|
|
|
|
u8 napped[MAX_SMT_THREADS];
|
2017-06-22 12:08:42 +07:00
|
|
|
struct kvmppc_vcore *vc[MAX_SUBCORES];
|
KVM: PPC: Book3S HV: Run HPT guests on POWER9 radix hosts
This patch removes the restriction that a radix host can only run
radix guests, allowing us to run HPT (hashed page table) guests as
well. This is useful because it provides a way to run old guest
kernels that know about POWER8 but not POWER9.
Unfortunately, POWER9 currently has a restriction that all threads
in a given code must either all be in HPT mode, or all in radix mode.
This means that when entering a HPT guest, we have to obtain control
of all 4 threads in the core and get them to switch their LPIDR and
LPCR registers, even if they are not going to run a guest. On guest
exit we also have to get all threads to switch LPIDR and LPCR back
to host values.
To make this feasible, we require that KVM not be in the "independent
threads" mode, and that the CPU cores be in single-threaded mode from
the host kernel's perspective (only thread 0 online; threads 1, 2 and
3 offline). That allows us to use the same code as on POWER8 for
obtaining control of the secondary threads.
To manage the LPCR/LPIDR changes required, we extend the kvm_split_info
struct to contain the information needed by the secondary threads.
All threads perform a barrier synchronization (where all threads wait
for every other thread to reach the synchronization point) on guest
entry, both before and after loading LPCR and LPIDR. On guest exit,
they all once again perform a barrier synchronization both before
and after loading host values into LPCR and LPIDR.
Finally, it is also currently necessary to flush the entire TLB every
time we enter a HPT guest on a radix host. We do this on thread 0
with a loop of tlbiel instructions.
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
2017-10-19 10:11:23 +07:00
|
|
|
/* Bits for changing lpcr on P9 */
|
|
|
|
unsigned long lpcr_req;
|
|
|
|
unsigned long lpidr_req;
|
|
|
|
unsigned long host_lpcr;
|
|
|
|
u32 do_set;
|
|
|
|
u32 do_restore;
|
|
|
|
union {
|
|
|
|
u32 allphases;
|
|
|
|
u8 phase[4];
|
|
|
|
} lpcr_sync;
|
KVM: PPC: Book3S HV: Implement dynamic micro-threading on POWER8
This builds on the ability to run more than one vcore on a physical
core by using the micro-threading (split-core) modes of the POWER8
chip. Previously, only vcores from the same VM could be run together,
and (on POWER8) only if they had just one thread per core. With the
ability to split the core on guest entry and unsplit it on guest exit,
we can run up to 8 vcpu threads from up to 4 different VMs, and we can
run multiple vcores with 2 or 4 vcpus per vcore.
Dynamic micro-threading is only available if the static configuration
of the cores is whole-core mode (unsplit), and only on POWER8.
To manage this, we introduce a new kvm_split_mode struct which is
shared across all of the subcores in the core, with a pointer in the
paca on each thread. In addition we extend the core_info struct to
have information on each subcore. When deciding whether to add a
vcore to the set already on the core, we now have two possibilities:
(a) piggyback the vcore onto an existing subcore, or (b) start a new
subcore.
Currently, when any vcpu needs to exit the guest and switch to host
virtual mode, we interrupt all the threads in all subcores and switch
the core back to whole-core mode. It may be possible in future to
allow some of the subcores to keep executing in the guest while
subcore 0 switches to the host, but that is not implemented in this
patch.
This adds a module parameter called dynamic_mt_modes which controls
which micro-threading (split-core) modes the code will consider, as a
bitmap. In other words, if it is 0, no micro-threading mode is
considered; if it is 2, only 2-way micro-threading is considered; if
it is 4, only 4-way, and if it is 6, both 2-way and 4-way
micro-threading mode will be considered. The default is 6.
With this, we now have secondary threads which are the primary thread
for their subcore and therefore need to do the MMU switch. These
threads will need to be started even if they have no vcpu to run, so
we use the vcore pointer in the PACA rather than the vcpu pointer to
trigger them.
It is now possible for thread 0 to find that an exit has been
requested before it gets to switch the subcore state to the guest. In
that case we haven't added the guest's timebase offset to the
timebase, so we need to be careful not to subtract the offset in the
guest exit path. In fact we just skip the whole path that switches
back to host context, since we haven't switched to the guest context.
Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Alexander Graf <agraf@suse.de>
2015-07-02 17:38:16 +07:00
|
|
|
};
|
|
|
|
|
2011-06-29 07:20:58 +07:00
|
|
|
/*
|
|
|
|
* This struct goes in the PACA on 64-bit processors. It is used
|
|
|
|
* to store host state that needs to be saved when we enter a guest
|
|
|
|
* and restored when we exit, but isn't specific to any particular
|
|
|
|
* guest or vcpu. It also has some scratch fields used by the guest
|
|
|
|
* exit code.
|
|
|
|
*/
|
|
|
|
struct kvmppc_host_state {
|
|
|
|
ulong host_r1;
|
|
|
|
ulong host_r2;
|
KVM: PPC: Add support for Book3S processors in hypervisor mode
This adds support for KVM running on 64-bit Book 3S processors,
specifically POWER7, in hypervisor mode. Using hypervisor mode means
that the guest can use the processor's supervisor mode. That means
that the guest can execute privileged instructions and access privileged
registers itself without trapping to the host. This gives excellent
performance, but does mean that KVM cannot emulate a processor
architecture other than the one that the hardware implements.
This code assumes that the guest is running paravirtualized using the
PAPR (Power Architecture Platform Requirements) interface, which is the
interface that IBM's PowerVM hypervisor uses. That means that existing
Linux distributions that run on IBM pSeries machines will also run
under KVM without modification. In order to communicate the PAPR
hypercalls to qemu, this adds a new KVM_EXIT_PAPR_HCALL exit code
to include/linux/kvm.h.
Currently the choice between book3s_hv support and book3s_pr support
(i.e. the existing code, which runs the guest in user mode) has to be
made at kernel configuration time, so a given kernel binary can only
do one or the other.
This new book3s_hv code doesn't support MMIO emulation at present.
Since we are running paravirtualized guests, this isn't a serious
restriction.
With the guest running in supervisor mode, most exceptions go straight
to the guest. We will never get data or instruction storage or segment
interrupts, alignment interrupts, decrementer interrupts, program
interrupts, single-step interrupts, etc., coming to the hypervisor from
the guest. Therefore this introduces a new KVMTEST_NONHV macro for the
exception entry path so that we don't have to do the KVM test on entry
to those exception handlers.
We do however get hypervisor decrementer, hypervisor data storage,
hypervisor instruction storage, and hypervisor emulation assist
interrupts, so we have to handle those.
In hypervisor mode, real-mode accesses can access all of RAM, not just
a limited amount. Therefore we put all the guest state in the vcpu.arch
and use the shadow_vcpu in the PACA only for temporary scratch space.
We allocate the vcpu with kzalloc rather than vzalloc, and we don't use
anything in the kvmppc_vcpu_book3s struct, so we don't allocate it.
We don't have a shared page with the guest, but we still need a
kvm_vcpu_arch_shared struct to store the values of various registers,
so we include one in the vcpu_arch struct.
The POWER7 processor has a restriction that all threads in a core have
to be in the same partition. MMU-on kernel code counts as a partition
(partition 0), so we have to do a partition switch on every entry to and
exit from the guest. At present we require the host and guest to run
in single-thread mode because of this hardware restriction.
This code allocates a hashed page table for the guest and initializes
it with HPTEs for the guest's Virtual Real Memory Area (VRMA). We
require that the guest memory is allocated using 16MB huge pages, in
order to simplify the low-level memory management. This also means that
we can get away without tracking paging activity in the host for now,
since huge pages can't be paged or swapped.
This also adds a few new exports needed by the book3s_hv code.
Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Alexander Graf <agraf@suse.de>
2011-06-29 07:21:34 +07:00
|
|
|
ulong host_msr;
|
2011-06-29 07:20:58 +07:00
|
|
|
ulong vmhandler;
|
|
|
|
ulong scratch0;
|
|
|
|
ulong scratch1;
|
2013-11-11 20:59:47 +07:00
|
|
|
ulong scratch2;
|
2011-06-29 07:20:58 +07:00
|
|
|
u8 in_guest;
|
2011-07-23 14:41:44 +07:00
|
|
|
u8 restore_hid5;
|
KVM: PPC: Implement H_CEDE hcall for book3s_hv in real-mode code
With a KVM guest operating in SMT4 mode (i.e. 4 hardware threads per
core), whenever a CPU goes idle, we have to pull all the other
hardware threads in the core out of the guest, because the H_CEDE
hcall is handled in the kernel. This is inefficient.
This adds code to book3s_hv_rmhandlers.S to handle the H_CEDE hcall
in real mode. When a guest vcpu does an H_CEDE hcall, we now only
exit to the kernel if all the other vcpus in the same core are also
idle. Otherwise we mark this vcpu as napping, save state that could
be lost in nap mode (mainly GPRs and FPRs), and execute the nap
instruction. When the thread wakes up, because of a decrementer or
external interrupt, we come back in at kvm_start_guest (from the
system reset interrupt vector), find the `napping' flag set in the
paca, and go to the resume path.
This has some other ramifications. First, when starting a core, we
now start all the threads, both those that are immediately runnable and
those that are idle. This is so that we don't have to pull all the
threads out of the guest when an idle thread gets a decrementer interrupt
and wants to start running. In fact the idle threads will all start
with the H_CEDE hcall returning; being idle they will just do another
H_CEDE immediately and go to nap mode.
This required some changes to kvmppc_run_core() and kvmppc_run_vcpu().
These functions have been restructured to make them simpler and clearer.
We introduce a level of indirection in the wait queue that gets woken
when external and decrementer interrupts get generated for a vcpu, so
that we can have the 4 vcpus in a vcore using the same wait queue.
We need this because the 4 vcpus are being handled by one thread.
Secondly, when we need to exit from the guest to the kernel, we now
have to generate an IPI for any napping threads, because an HDEC
interrupt doesn't wake up a napping thread.
Thirdly, we now need to be able to handle virtual external interrupts
and decrementer interrupts becoming pending while a thread is napping,
and deliver those interrupts to the guest when the thread wakes.
This is done in kvmppc_cede_reentry, just before fast_guest_return.
Finally, since we are not using the generic kvm_vcpu_block for book3s_hv,
and hence not calling kvm_arch_vcpu_runnable, we can remove the #ifdef
from kvm_arch_vcpu_runnable.
Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Alexander Graf <agraf@suse.de>
2011-07-23 14:42:46 +07:00
|
|
|
u8 napping;
|
KVM: PPC: Add support for Book3S processors in hypervisor mode
This adds support for KVM running on 64-bit Book 3S processors,
specifically POWER7, in hypervisor mode. Using hypervisor mode means
that the guest can use the processor's supervisor mode. That means
that the guest can execute privileged instructions and access privileged
registers itself without trapping to the host. This gives excellent
performance, but does mean that KVM cannot emulate a processor
architecture other than the one that the hardware implements.
This code assumes that the guest is running paravirtualized using the
PAPR (Power Architecture Platform Requirements) interface, which is the
interface that IBM's PowerVM hypervisor uses. That means that existing
Linux distributions that run on IBM pSeries machines will also run
under KVM without modification. In order to communicate the PAPR
hypercalls to qemu, this adds a new KVM_EXIT_PAPR_HCALL exit code
to include/linux/kvm.h.
Currently the choice between book3s_hv support and book3s_pr support
(i.e. the existing code, which runs the guest in user mode) has to be
made at kernel configuration time, so a given kernel binary can only
do one or the other.
This new book3s_hv code doesn't support MMIO emulation at present.
Since we are running paravirtualized guests, this isn't a serious
restriction.
With the guest running in supervisor mode, most exceptions go straight
to the guest. We will never get data or instruction storage or segment
interrupts, alignment interrupts, decrementer interrupts, program
interrupts, single-step interrupts, etc., coming to the hypervisor from
the guest. Therefore this introduces a new KVMTEST_NONHV macro for the
exception entry path so that we don't have to do the KVM test on entry
to those exception handlers.
We do however get hypervisor decrementer, hypervisor data storage,
hypervisor instruction storage, and hypervisor emulation assist
interrupts, so we have to handle those.
In hypervisor mode, real-mode accesses can access all of RAM, not just
a limited amount. Therefore we put all the guest state in the vcpu.arch
and use the shadow_vcpu in the PACA only for temporary scratch space.
We allocate the vcpu with kzalloc rather than vzalloc, and we don't use
anything in the kvmppc_vcpu_book3s struct, so we don't allocate it.
We don't have a shared page with the guest, but we still need a
kvm_vcpu_arch_shared struct to store the values of various registers,
so we include one in the vcpu_arch struct.
The POWER7 processor has a restriction that all threads in a core have
to be in the same partition. MMU-on kernel code counts as a partition
(partition 0), so we have to do a partition switch on every entry to and
exit from the guest. At present we require the host and guest to run
in single-thread mode because of this hardware restriction.
This code allocates a hashed page table for the guest and initializes
it with HPTEs for the guest's Virtual Real Memory Area (VRMA). We
require that the guest memory is allocated using 16MB huge pages, in
order to simplify the low-level memory management. This also means that
we can get away without tracking paging activity in the host for now,
since huge pages can't be paged or swapped.
This also adds a few new exports needed by the book3s_hv code.
Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Alexander Graf <agraf@suse.de>
2011-06-29 07:21:34 +07:00
|
|
|
|
2013-10-07 23:47:52 +07:00
|
|
|
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
|
2012-02-03 07:54:17 +07:00
|
|
|
u8 hwthread_req;
|
|
|
|
u8 hwthread_state;
|
2013-04-18 03:30:50 +07:00
|
|
|
u8 host_ipi;
|
KVM: PPC: Book3S HV: Run HPT guests on POWER9 radix hosts
This patch removes the restriction that a radix host can only run
radix guests, allowing us to run HPT (hashed page table) guests as
well. This is useful because it provides a way to run old guest
kernels that know about POWER8 but not POWER9.
Unfortunately, POWER9 currently has a restriction that all threads
in a given code must either all be in HPT mode, or all in radix mode.
This means that when entering a HPT guest, we have to obtain control
of all 4 threads in the core and get them to switch their LPIDR and
LPCR registers, even if they are not going to run a guest. On guest
exit we also have to get all threads to switch LPIDR and LPCR back
to host values.
To make this feasible, we require that KVM not be in the "independent
threads" mode, and that the CPU cores be in single-threaded mode from
the host kernel's perspective (only thread 0 online; threads 1, 2 and
3 offline). That allows us to use the same code as on POWER8 for
obtaining control of the secondary threads.
To manage the LPCR/LPIDR changes required, we extend the kvm_split_info
struct to contain the information needed by the secondary threads.
All threads perform a barrier synchronization (where all threads wait
for every other thread to reach the synchronization point) on guest
entry, both before and after loading LPCR and LPIDR. On guest exit,
they all once again perform a barrier synchronization both before
and after loading host values into LPCR and LPIDR.
Finally, it is also currently necessary to flush the entire TLB every
time we enter a HPT guest on a radix host. We do this on thread 0
with a loop of tlbiel instructions.
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
2017-10-19 10:11:23 +07:00
|
|
|
u8 ptid; /* thread number within subcore when split */
|
|
|
|
u8 tid; /* thread number within whole core */
|
KVM: PPC: Book3S HV: Work around transactional memory bugs in POWER9
POWER9 has hardware bugs relating to transactional memory and thread
reconfiguration (changes to hardware SMT mode). Specifically, the core
does not have enough storage to store a complete checkpoint of all the
architected state for all four threads. The DD2.2 version of POWER9
includes hardware modifications designed to allow hypervisor software
to implement workarounds for these problems. This patch implements
those workarounds in KVM code so that KVM guests see a full, working
transactional memory implementation.
The problems center around the use of TM suspended state, where the
CPU has a checkpointed state but execution is not transactional. The
workaround is to implement a "fake suspend" state, which looks to the
guest like suspended state but the CPU does not store a checkpoint.
In this state, any instruction that would cause a transition to
transactional state (rfid, rfebb, mtmsrd, tresume) or would use the
checkpointed state (treclaim) causes a "soft patch" interrupt (vector
0x1500) to the hypervisor so that it can be emulated. The trechkpt
instruction also causes a soft patch interrupt.
On POWER9 DD2.2, we avoid returning to the guest in any state which
would require a checkpoint to be present. The trechkpt in the guest
entry path which would normally create that checkpoint is replaced by
either a transition to fake suspend state, if the guest is in suspend
state, or a rollback to the pre-transactional state if the guest is in
transactional state. Fake suspend state is indicated by a flag in the
PACA plus a new bit in the PSSCR. The new PSSCR bit is write-only and
reads back as 0.
On exit from the guest, if the guest is in fake suspend state, we still
do the treclaim instruction as we would in real suspend state, in order
to get into non-transactional state, but we do not save the resulting
register state since there was no checkpoint.
Emulation of the instructions that cause a softpatch interrupt is
handled in two paths. If the guest is in real suspend mode, we call
kvmhv_p9_tm_emulation_early() to handle the cases where the guest is
transitioning to transactional state. This is called before we do the
treclaim in the guest exit path; because we haven't done treclaim, we
can get back to the guest with the transaction still active. If the
instruction is a case that kvmhv_p9_tm_emulation_early() doesn't
handle, or if the guest is in fake suspend state, then we proceed to
do the complete guest exit path and subsequently call
kvmhv_p9_tm_emulation() in host context with the MMU on. This handles
all the cases including the cases that generate program interrupts
(illegal instruction or TM Bad Thing) and facility unavailable
interrupts.
The emulation is reasonably straightforward and is mostly concerned
with checking for exception conditions and updating the state of
registers such as MSR and CR0. The treclaim emulation takes care to
ensure that the TEXASR register gets updated as if it were the guest
treclaim instruction that had done failure recording, not the treclaim
done in hypervisor state in the guest exit path.
With this, the KVM_CAP_PPC_HTM capability returns true (1) even if
transactional memory is not available to host userspace.
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
2018-03-21 17:32:01 +07:00
|
|
|
u8 fake_suspend;
|
KVM: PPC: Add support for Book3S processors in hypervisor mode
This adds support for KVM running on 64-bit Book 3S processors,
specifically POWER7, in hypervisor mode. Using hypervisor mode means
that the guest can use the processor's supervisor mode. That means
that the guest can execute privileged instructions and access privileged
registers itself without trapping to the host. This gives excellent
performance, but does mean that KVM cannot emulate a processor
architecture other than the one that the hardware implements.
This code assumes that the guest is running paravirtualized using the
PAPR (Power Architecture Platform Requirements) interface, which is the
interface that IBM's PowerVM hypervisor uses. That means that existing
Linux distributions that run on IBM pSeries machines will also run
under KVM without modification. In order to communicate the PAPR
hypercalls to qemu, this adds a new KVM_EXIT_PAPR_HCALL exit code
to include/linux/kvm.h.
Currently the choice between book3s_hv support and book3s_pr support
(i.e. the existing code, which runs the guest in user mode) has to be
made at kernel configuration time, so a given kernel binary can only
do one or the other.
This new book3s_hv code doesn't support MMIO emulation at present.
Since we are running paravirtualized guests, this isn't a serious
restriction.
With the guest running in supervisor mode, most exceptions go straight
to the guest. We will never get data or instruction storage or segment
interrupts, alignment interrupts, decrementer interrupts, program
interrupts, single-step interrupts, etc., coming to the hypervisor from
the guest. Therefore this introduces a new KVMTEST_NONHV macro for the
exception entry path so that we don't have to do the KVM test on entry
to those exception handlers.
We do however get hypervisor decrementer, hypervisor data storage,
hypervisor instruction storage, and hypervisor emulation assist
interrupts, so we have to handle those.
In hypervisor mode, real-mode accesses can access all of RAM, not just
a limited amount. Therefore we put all the guest state in the vcpu.arch
and use the shadow_vcpu in the PACA only for temporary scratch space.
We allocate the vcpu with kzalloc rather than vzalloc, and we don't use
anything in the kvmppc_vcpu_book3s struct, so we don't allocate it.
We don't have a shared page with the guest, but we still need a
kvm_vcpu_arch_shared struct to store the values of various registers,
so we include one in the vcpu_arch struct.
The POWER7 processor has a restriction that all threads in a core have
to be in the same partition. MMU-on kernel code counts as a partition
(partition 0), so we have to do a partition switch on every entry to and
exit from the guest. At present we require the host and guest to run
in single-thread mode because of this hardware restriction.
This code allocates a hashed page table for the guest and initializes
it with HPTEs for the guest's Virtual Real Memory Area (VRMA). We
require that the guest memory is allocated using 16MB huge pages, in
order to simplify the low-level memory management. This also means that
we can get away without tracking paging activity in the host for now,
since huge pages can't be paged or swapped.
This also adds a few new exports needed by the book3s_hv code.
Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Alexander Graf <agraf@suse.de>
2011-06-29 07:21:34 +07:00
|
|
|
struct kvm_vcpu *kvm_vcpu;
|
KVM: PPC: Allow book3s_hv guests to use SMT processor modes
This lifts the restriction that book3s_hv guests can only run one
hardware thread per core, and allows them to use up to 4 threads
per core on POWER7. The host still has to run single-threaded.
This capability is advertised to qemu through a new KVM_CAP_PPC_SMT
capability. The return value of the ioctl querying this capability
is the number of vcpus per virtual CPU core (vcore), currently 4.
To use this, the host kernel should be booted with all threads
active, and then all the secondary threads should be offlined.
This will put the secondary threads into nap mode. KVM will then
wake them from nap mode and use them for running guest code (while
they are still offline). To wake the secondary threads, we send
them an IPI using a new xics_wake_cpu() function, implemented in
arch/powerpc/sysdev/xics/icp-native.c. In other words, at this stage
we assume that the platform has a XICS interrupt controller and
we are using icp-native.c to drive it. Since the woken thread will
need to acknowledge and clear the IPI, we also export the base
physical address of the XICS registers using kvmppc_set_xics_phys()
for use in the low-level KVM book3s code.
When a vcpu is created, it is assigned to a virtual CPU core.
The vcore number is obtained by dividing the vcpu number by the
number of threads per core in the host. This number is exported
to userspace via the KVM_CAP_PPC_SMT capability. If qemu wishes
to run the guest in single-threaded mode, it should make all vcpu
numbers be multiples of the number of threads per core.
We distinguish three states of a vcpu: runnable (i.e., ready to execute
the guest), blocked (that is, idle), and busy in host. We currently
implement a policy that the vcore can run only when all its threads
are runnable or blocked. This way, if a vcpu needs to execute elsewhere
in the kernel or in qemu, it can do so without being starved of CPU
by the other vcpus.
When a vcore starts to run, it executes in the context of one of the
vcpu threads. The other vcpu threads all go to sleep and stay asleep
until something happens requiring the vcpu thread to return to qemu,
or to wake up to run the vcore (this can happen when another vcpu
thread goes from busy in host state to blocked).
It can happen that a vcpu goes from blocked to runnable state (e.g.
because of an interrupt), and the vcore it belongs to is already
running. In that case it can start to run immediately as long as
the none of the vcpus in the vcore have started to exit the guest.
We send the next free thread in the vcore an IPI to get it to start
to execute the guest. It synchronizes with the other threads via
the vcore->entry_exit_count field to make sure that it doesn't go
into the guest if the other vcpus are exiting by the time that it
is ready to actually enter the guest.
Note that there is no fixed relationship between the hardware thread
number and the vcpu number. Hardware threads are assigned to vcpus
as they become runnable, so we will always use the lower-numbered
hardware threads in preference to higher-numbered threads if not all
the vcpus in the vcore are runnable, regardless of which vcpus are
runnable.
Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Alexander Graf <agraf@suse.de>
2011-06-29 07:23:08 +07:00
|
|
|
struct kvmppc_vcore *kvm_vcore;
|
2017-04-05 14:54:54 +07:00
|
|
|
void __iomem *xics_phys;
|
2017-04-05 14:54:56 +07:00
|
|
|
void __iomem *xive_tima_phys;
|
|
|
|
void __iomem *xive_tima_virt;
|
2013-04-18 03:30:50 +07:00
|
|
|
u32 saved_xirr;
|
KVM: PPC: Add support for Book3S processors in hypervisor mode
This adds support for KVM running on 64-bit Book 3S processors,
specifically POWER7, in hypervisor mode. Using hypervisor mode means
that the guest can use the processor's supervisor mode. That means
that the guest can execute privileged instructions and access privileged
registers itself without trapping to the host. This gives excellent
performance, but does mean that KVM cannot emulate a processor
architecture other than the one that the hardware implements.
This code assumes that the guest is running paravirtualized using the
PAPR (Power Architecture Platform Requirements) interface, which is the
interface that IBM's PowerVM hypervisor uses. That means that existing
Linux distributions that run on IBM pSeries machines will also run
under KVM without modification. In order to communicate the PAPR
hypercalls to qemu, this adds a new KVM_EXIT_PAPR_HCALL exit code
to include/linux/kvm.h.
Currently the choice between book3s_hv support and book3s_pr support
(i.e. the existing code, which runs the guest in user mode) has to be
made at kernel configuration time, so a given kernel binary can only
do one or the other.
This new book3s_hv code doesn't support MMIO emulation at present.
Since we are running paravirtualized guests, this isn't a serious
restriction.
With the guest running in supervisor mode, most exceptions go straight
to the guest. We will never get data or instruction storage or segment
interrupts, alignment interrupts, decrementer interrupts, program
interrupts, single-step interrupts, etc., coming to the hypervisor from
the guest. Therefore this introduces a new KVMTEST_NONHV macro for the
exception entry path so that we don't have to do the KVM test on entry
to those exception handlers.
We do however get hypervisor decrementer, hypervisor data storage,
hypervisor instruction storage, and hypervisor emulation assist
interrupts, so we have to handle those.
In hypervisor mode, real-mode accesses can access all of RAM, not just
a limited amount. Therefore we put all the guest state in the vcpu.arch
and use the shadow_vcpu in the PACA only for temporary scratch space.
We allocate the vcpu with kzalloc rather than vzalloc, and we don't use
anything in the kvmppc_vcpu_book3s struct, so we don't allocate it.
We don't have a shared page with the guest, but we still need a
kvm_vcpu_arch_shared struct to store the values of various registers,
so we include one in the vcpu_arch struct.
The POWER7 processor has a restriction that all threads in a core have
to be in the same partition. MMU-on kernel code counts as a partition
(partition 0), so we have to do a partition switch on every entry to and
exit from the guest. At present we require the host and guest to run
in single-thread mode because of this hardware restriction.
This code allocates a hashed page table for the guest and initializes
it with HPTEs for the guest's Virtual Real Memory Area (VRMA). We
require that the guest memory is allocated using 16MB huge pages, in
order to simplify the low-level memory management. This also means that
we can get away without tracking paging activity in the host for now,
since huge pages can't be paged or swapped.
This also adds a few new exports needed by the book3s_hv code.
Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Alexander Graf <agraf@suse.de>
2011-06-29 07:21:34 +07:00
|
|
|
u64 dabr;
|
2014-03-25 06:47:08 +07:00
|
|
|
u64 host_mmcr[7]; /* MMCR 0,1,A, SIAR, SDAR, MMCR2, SIER */
|
KVM: PPC: book3s_hv: Add support for PPC970-family processors
This adds support for running KVM guests in supervisor mode on those
PPC970 processors that have a usable hypervisor mode. Unfortunately,
Apple G5 machines have supervisor mode disabled (MSR[HV] is forced to
1), but the YDL PowerStation does have a usable hypervisor mode.
There are several differences between the PPC970 and POWER7 in how
guests are managed. These differences are accommodated using the
CPU_FTR_ARCH_201 (PPC970) and CPU_FTR_ARCH_206 (POWER7) CPU feature
bits. Notably, on PPC970:
* The LPCR, LPID or RMOR registers don't exist, and the functions of
those registers are provided by bits in HID4 and one bit in HID0.
* External interrupts can be directed to the hypervisor, but unlike
POWER7 they are masked by MSR[EE] in non-hypervisor modes and use
SRR0/1 not HSRR0/1.
* There is no virtual RMA (VRMA) mode; the guest must use an RMO
(real mode offset) area.
* The TLB entries are not tagged with the LPID, so it is necessary to
flush the whole TLB on partition switch. Furthermore, when switching
partitions we have to ensure that no other CPU is executing the tlbie
or tlbsync instructions in either the old or the new partition,
otherwise undefined behaviour can occur.
* The PMU has 8 counters (PMC registers) rather than 6.
* The DSCR, PURR, SPURR, AMR, AMOR, UAMOR registers don't exist.
* The SLB has 64 entries rather than 32.
* There is no mediated external interrupt facility, so if we switch to
a guest that has a virtual external interrupt pending but the guest
has MSR[EE] = 0, we have to arrange to have an interrupt pending for
it so that we can get control back once it re-enables interrupts. We
do that by sending ourselves an IPI with smp_send_reschedule after
hard-disabling interrupts.
Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Alexander Graf <agraf@suse.de>
2011-06-29 07:40:08 +07:00
|
|
|
u32 host_pmc[8];
|
KVM: PPC: Add support for Book3S processors in hypervisor mode
This adds support for KVM running on 64-bit Book 3S processors,
specifically POWER7, in hypervisor mode. Using hypervisor mode means
that the guest can use the processor's supervisor mode. That means
that the guest can execute privileged instructions and access privileged
registers itself without trapping to the host. This gives excellent
performance, but does mean that KVM cannot emulate a processor
architecture other than the one that the hardware implements.
This code assumes that the guest is running paravirtualized using the
PAPR (Power Architecture Platform Requirements) interface, which is the
interface that IBM's PowerVM hypervisor uses. That means that existing
Linux distributions that run on IBM pSeries machines will also run
under KVM without modification. In order to communicate the PAPR
hypercalls to qemu, this adds a new KVM_EXIT_PAPR_HCALL exit code
to include/linux/kvm.h.
Currently the choice between book3s_hv support and book3s_pr support
(i.e. the existing code, which runs the guest in user mode) has to be
made at kernel configuration time, so a given kernel binary can only
do one or the other.
This new book3s_hv code doesn't support MMIO emulation at present.
Since we are running paravirtualized guests, this isn't a serious
restriction.
With the guest running in supervisor mode, most exceptions go straight
to the guest. We will never get data or instruction storage or segment
interrupts, alignment interrupts, decrementer interrupts, program
interrupts, single-step interrupts, etc., coming to the hypervisor from
the guest. Therefore this introduces a new KVMTEST_NONHV macro for the
exception entry path so that we don't have to do the KVM test on entry
to those exception handlers.
We do however get hypervisor decrementer, hypervisor data storage,
hypervisor instruction storage, and hypervisor emulation assist
interrupts, so we have to handle those.
In hypervisor mode, real-mode accesses can access all of RAM, not just
a limited amount. Therefore we put all the guest state in the vcpu.arch
and use the shadow_vcpu in the PACA only for temporary scratch space.
We allocate the vcpu with kzalloc rather than vzalloc, and we don't use
anything in the kvmppc_vcpu_book3s struct, so we don't allocate it.
We don't have a shared page with the guest, but we still need a
kvm_vcpu_arch_shared struct to store the values of various registers,
so we include one in the vcpu_arch struct.
The POWER7 processor has a restriction that all threads in a core have
to be in the same partition. MMU-on kernel code counts as a partition
(partition 0), so we have to do a partition switch on every entry to and
exit from the guest. At present we require the host and guest to run
in single-thread mode because of this hardware restriction.
This code allocates a hashed page table for the guest and initializes
it with HPTEs for the guest's Virtual Real Memory Area (VRMA). We
require that the guest memory is allocated using 16MB huge pages, in
order to simplify the low-level memory management. This also means that
we can get away without tracking paging activity in the host for now,
since huge pages can't be paged or swapped.
This also adds a few new exports needed by the book3s_hv code.
Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Alexander Graf <agraf@suse.de>
2011-06-29 07:21:34 +07:00
|
|
|
u64 host_purr;
|
|
|
|
u64 host_spurr;
|
|
|
|
u64 host_dscr;
|
|
|
|
u64 dec_expires;
|
KVM: PPC: Book3S HV: Implement dynamic micro-threading on POWER8
This builds on the ability to run more than one vcore on a physical
core by using the micro-threading (split-core) modes of the POWER8
chip. Previously, only vcores from the same VM could be run together,
and (on POWER8) only if they had just one thread per core. With the
ability to split the core on guest entry and unsplit it on guest exit,
we can run up to 8 vcpu threads from up to 4 different VMs, and we can
run multiple vcores with 2 or 4 vcpus per vcore.
Dynamic micro-threading is only available if the static configuration
of the cores is whole-core mode (unsplit), and only on POWER8.
To manage this, we introduce a new kvm_split_mode struct which is
shared across all of the subcores in the core, with a pointer in the
paca on each thread. In addition we extend the core_info struct to
have information on each subcore. When deciding whether to add a
vcore to the set already on the core, we now have two possibilities:
(a) piggyback the vcore onto an existing subcore, or (b) start a new
subcore.
Currently, when any vcpu needs to exit the guest and switch to host
virtual mode, we interrupt all the threads in all subcores and switch
the core back to whole-core mode. It may be possible in future to
allow some of the subcores to keep executing in the guest while
subcore 0 switches to the host, but that is not implemented in this
patch.
This adds a module parameter called dynamic_mt_modes which controls
which micro-threading (split-core) modes the code will consider, as a
bitmap. In other words, if it is 0, no micro-threading mode is
considered; if it is 2, only 2-way micro-threading is considered; if
it is 4, only 4-way, and if it is 6, both 2-way and 4-way
micro-threading mode will be considered. The default is 6.
With this, we now have secondary threads which are the primary thread
for their subcore and therefore need to do the MMU switch. These
threads will need to be started even if they have no vcpu to run, so
we use the vcore pointer in the PACA rather than the vcpu pointer to
trigger them.
It is now possible for thread 0 to find that an exit has been
requested before it gets to switch the subcore state to the guest. In
that case we haven't added the guest's timebase offset to the
timebase, so we need to be careful not to subtract the offset in the
guest exit path. In fact we just skip the whole path that switches
back to host context, since we haven't switched to the guest context.
Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Alexander Graf <agraf@suse.de>
2015-07-02 17:38:16 +07:00
|
|
|
struct kvm_split_mode *kvm_split_mode;
|
KVM: PPC: Add support for Book3S processors in hypervisor mode
This adds support for KVM running on 64-bit Book 3S processors,
specifically POWER7, in hypervisor mode. Using hypervisor mode means
that the guest can use the processor's supervisor mode. That means
that the guest can execute privileged instructions and access privileged
registers itself without trapping to the host. This gives excellent
performance, but does mean that KVM cannot emulate a processor
architecture other than the one that the hardware implements.
This code assumes that the guest is running paravirtualized using the
PAPR (Power Architecture Platform Requirements) interface, which is the
interface that IBM's PowerVM hypervisor uses. That means that existing
Linux distributions that run on IBM pSeries machines will also run
under KVM without modification. In order to communicate the PAPR
hypercalls to qemu, this adds a new KVM_EXIT_PAPR_HCALL exit code
to include/linux/kvm.h.
Currently the choice between book3s_hv support and book3s_pr support
(i.e. the existing code, which runs the guest in user mode) has to be
made at kernel configuration time, so a given kernel binary can only
do one or the other.
This new book3s_hv code doesn't support MMIO emulation at present.
Since we are running paravirtualized guests, this isn't a serious
restriction.
With the guest running in supervisor mode, most exceptions go straight
to the guest. We will never get data or instruction storage or segment
interrupts, alignment interrupts, decrementer interrupts, program
interrupts, single-step interrupts, etc., coming to the hypervisor from
the guest. Therefore this introduces a new KVMTEST_NONHV macro for the
exception entry path so that we don't have to do the KVM test on entry
to those exception handlers.
We do however get hypervisor decrementer, hypervisor data storage,
hypervisor instruction storage, and hypervisor emulation assist
interrupts, so we have to handle those.
In hypervisor mode, real-mode accesses can access all of RAM, not just
a limited amount. Therefore we put all the guest state in the vcpu.arch
and use the shadow_vcpu in the PACA only for temporary scratch space.
We allocate the vcpu with kzalloc rather than vzalloc, and we don't use
anything in the kvmppc_vcpu_book3s struct, so we don't allocate it.
We don't have a shared page with the guest, but we still need a
kvm_vcpu_arch_shared struct to store the values of various registers,
so we include one in the vcpu_arch struct.
The POWER7 processor has a restriction that all threads in a core have
to be in the same partition. MMU-on kernel code counts as a partition
(partition 0), so we have to do a partition switch on every entry to and
exit from the guest. At present we require the host and guest to run
in single-thread mode because of this hardware restriction.
This code allocates a hashed page table for the guest and initializes
it with HPTEs for the guest's Virtual Real Memory Area (VRMA). We
require that the guest memory is allocated using 16MB huge pages, in
order to simplify the low-level memory management. This also means that
we can get away without tracking paging activity in the host for now,
since huge pages can't be paged or swapped.
This also adds a few new exports needed by the book3s_hv code.
Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Alexander Graf <agraf@suse.de>
2011-06-29 07:21:34 +07:00
|
|
|
#endif
|
2013-02-05 01:10:51 +07:00
|
|
|
#ifdef CONFIG_PPC_BOOK3S_64
|
|
|
|
u64 cfar;
|
2013-09-20 11:52:39 +07:00
|
|
|
u64 ppr;
|
2014-04-29 21:48:44 +07:00
|
|
|
u64 host_fscr;
|
2013-02-05 01:10:51 +07:00
|
|
|
#endif
|
2011-06-29 07:20:58 +07:00
|
|
|
};
|
|
|
|
|
2010-01-08 08:58:03 +07:00
|
|
|
struct kvmppc_book3s_shadow_vcpu {
|
2013-11-29 08:29:00 +07:00
|
|
|
bool in_use;
|
2010-01-08 08:58:03 +07:00
|
|
|
ulong gpr[14];
|
|
|
|
u32 cr;
|
2015-05-27 06:56:57 +07:00
|
|
|
ulong xer;
|
2010-04-16 05:11:39 +07:00
|
|
|
ulong ctr;
|
|
|
|
ulong lr;
|
|
|
|
ulong pc;
|
KVM: PPC: Book3S PR: Keep volatile reg values in vcpu rather than shadow_vcpu
Currently PR-style KVM keeps the volatile guest register values
(R0 - R13, CR, LR, CTR, XER, PC) in a shadow_vcpu struct rather than
the main kvm_vcpu struct. For 64-bit, the shadow_vcpu exists in two
places, a kmalloc'd struct and in the PACA, and it gets copied back
and forth in kvmppc_core_vcpu_load/put(), because the real-mode code
can't rely on being able to access the kmalloc'd struct.
This changes the code to copy the volatile values into the shadow_vcpu
as one of the last things done before entering the guest. Similarly
the values are copied back out of the shadow_vcpu to the kvm_vcpu
immediately after exiting the guest. We arrange for interrupts to be
still disabled at this point so that we can't get preempted on 64-bit
and end up copying values from the wrong PACA.
This means that the accessor functions in kvm_book3s.h for these
registers are greatly simplified, and are same between PR and HV KVM.
In places where accesses to shadow_vcpu fields are now replaced by
accesses to the kvm_vcpu, we can also remove the svcpu_get/put pairs.
Finally, on 64-bit, we don't need the kmalloc'd struct at all any more.
With this, the time to read the PVR one million times in a loop went
from 567.7ms to 575.5ms (averages of 6 values), an increase of about
1.4% for this worse-case test for guest entries and exits. The
standard deviation of the measurements is about 11ms, so the
difference is only marginally significant statistically.
Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Alexander Graf <agraf@suse.de>
2013-09-20 11:52:43 +07:00
|
|
|
|
2010-04-16 05:11:39 +07:00
|
|
|
ulong shadow_srr1;
|
|
|
|
ulong fault_dar;
|
KVM: PPC: Book3S PR: Keep volatile reg values in vcpu rather than shadow_vcpu
Currently PR-style KVM keeps the volatile guest register values
(R0 - R13, CR, LR, CTR, XER, PC) in a shadow_vcpu struct rather than
the main kvm_vcpu struct. For 64-bit, the shadow_vcpu exists in two
places, a kmalloc'd struct and in the PACA, and it gets copied back
and forth in kvmppc_core_vcpu_load/put(), because the real-mode code
can't rely on being able to access the kmalloc'd struct.
This changes the code to copy the volatile values into the shadow_vcpu
as one of the last things done before entering the guest. Similarly
the values are copied back out of the shadow_vcpu to the kvm_vcpu
immediately after exiting the guest. We arrange for interrupts to be
still disabled at this point so that we can't get preempted on 64-bit
and end up copying values from the wrong PACA.
This means that the accessor functions in kvm_book3s.h for these
registers are greatly simplified, and are same between PR and HV KVM.
In places where accesses to shadow_vcpu fields are now replaced by
accesses to the kvm_vcpu, we can also remove the svcpu_get/put pairs.
Finally, on 64-bit, we don't need the kmalloc'd struct at all any more.
With this, the time to read the PVR one million times in a loop went
from 567.7ms to 575.5ms (averages of 6 values), an increase of about
1.4% for this worse-case test for guest entries and exits. The
standard deviation of the measurements is about 11ms, so the
difference is only marginally significant statistically.
Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Alexander Graf <agraf@suse.de>
2013-09-20 11:52:43 +07:00
|
|
|
u32 fault_dsisr;
|
|
|
|
u32 last_inst;
|
2010-04-16 05:11:39 +07:00
|
|
|
|
|
|
|
#ifdef CONFIG_PPC_BOOK3S_32
|
|
|
|
u32 sr[16]; /* Guest SRs */
|
2011-06-29 07:20:58 +07:00
|
|
|
|
|
|
|
struct kvmppc_host_state hstate;
|
2010-04-16 05:11:39 +07:00
|
|
|
#endif
|
2011-06-29 07:20:58 +07:00
|
|
|
|
2010-04-16 05:11:39 +07:00
|
|
|
#ifdef CONFIG_PPC_BOOK3S_64
|
|
|
|
u8 slb_max; /* highest used guest slb entry */
|
|
|
|
struct {
|
|
|
|
u64 esid;
|
|
|
|
u64 vsid;
|
|
|
|
} slb[64]; /* guest SLB */
|
2014-04-29 21:48:44 +07:00
|
|
|
u64 shadow_fscr;
|
2010-04-16 05:11:39 +07:00
|
|
|
#endif
|
2010-01-08 08:58:03 +07:00
|
|
|
};
|
|
|
|
|
|
|
|
#endif /*__ASSEMBLY__ */
|
|
|
|
|
2012-02-03 07:54:17 +07:00
|
|
|
/* Values for kvm_state */
|
|
|
|
#define KVM_HWTHREAD_IN_KERNEL 0
|
2016-07-08 13:20:49 +07:00
|
|
|
#define KVM_HWTHREAD_IN_IDLE 1
|
2012-02-03 07:54:17 +07:00
|
|
|
#define KVM_HWTHREAD_IN_KVM 2
|
|
|
|
|
2009-10-30 12:47:06 +07:00
|
|
|
#endif /* __ASM_KVM_BOOK3S_ASM_H__ */
|