One of the largest releases for KVM... Hardly any generic improvement,

but lots of architecture-specific changes.
 
 * ARM:
 - VHE support so that we can run the kernel at EL2 on ARMv8.1 systems
 - PMU support for guests
 - 32bit world switch rewritten in C
 - various optimizations to the vgic save/restore code.
 
 * PPC:
 - enabled KVM-VFIO integration ("VFIO device")
 - optimizations to speed up IPIs between vcpus
 - in-kernel handling of IOMMU hypercalls
 - support for dynamic DMA windows (DDW).
 
 * s390:
 - provide the floating point registers via sync regs;
 - separated instruction vs. data accesses
 - dirty log improvements for huge guests
 - bugfixes and documentation improvements.
 
 * x86:
 - Hyper-V VMBus hypercall userspace exit
 - alternative implementation of lowest-priority interrupts using vector
 hashing (for better VT-d posted interrupt support)
 - fixed guest debugging with nested virtualizations
 - improved interrupt tracking in the in-kernel IOAPIC
 - generic infrastructure for tracking writes to guest memory---currently
 its only use is to speedup the legacy shadow paging (pre-EPT) case, but
 in the future it will be used for virtual GPUs as well
 - much cleanup (LAPIC, kvmclock, MMU, PIT), including ubsan fixes.
 -----BEGIN PGP SIGNATURE-----
 Version: GnuPG v2.0.22 (GNU/Linux)
 
 iQEcBAABAgAGBQJW5r3BAAoJEL/70l94x66D2pMH/jTSWWwdTUJMctrDjPVzKzG0
 yOzHW5vSLFoFlwEOY2VpslnXzn5TUVmCAfrdmFNmQcSw6hGb3K/xA/ZX/KLwWhyb
 oZpr123ycahga+3q/ht/dFUBCCyWeIVMdsLSFwpobEBzPL0pMgc9joLgdUC6UpWX
 tmN0LoCAeS7spC4TTiTTpw3gZ/L+aB0B6CXhOMjldb9q/2CsgaGyoVvKA199nk9o
 Ngu7ImDt7l/x1VJX4/6E/17VHuwqAdUrrnbqerB/2oJ5ixsZsHMGzxQ3sHCmvyJx
 WG5L00ubB1oAJAs9fBg58Y/MdiWX99XqFhdEfxq4foZEiQuCyxygVvq3JwZTxII=
 =OUZZ
 -----END PGP SIGNATURE-----

Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm

Pull KVM updates from Paolo Bonzini:
 "One of the largest releases for KVM...  Hardly any generic
  changes, but lots of architecture-specific updates.

  ARM:
   - VHE support so that we can run the kernel at EL2 on ARMv8.1 systems
   - PMU support for guests
   - 32bit world switch rewritten in C
   - various optimizations to the vgic save/restore code.

  PPC:
   - enabled KVM-VFIO integration ("VFIO device")
   - optimizations to speed up IPIs between vcpus
   - in-kernel handling of IOMMU hypercalls
   - support for dynamic DMA windows (DDW).

  s390:
   - provide the floating point registers via sync regs;
   - separated instruction vs.  data accesses
   - dirty log improvements for huge guests
   - bugfixes and documentation improvements.

  x86:
   - Hyper-V VMBus hypercall userspace exit
   - alternative implementation of lowest-priority interrupts using
     vector hashing (for better VT-d posted interrupt support)
   - fixed guest debugging with nested virtualizations
   - improved interrupt tracking in the in-kernel IOAPIC
   - generic infrastructure for tracking writes to guest
     memory - currently its only use is to speedup the legacy shadow
     paging (pre-EPT) case, but in the future it will be used for
     virtual GPUs as well
   - much cleanup (LAPIC, kvmclock, MMU, PIT), including ubsan fixes"

* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (217 commits)
  KVM: x86: remove eager_fpu field of struct kvm_vcpu_arch
  KVM: x86: disable MPX if host did not enable MPX XSAVE features
  arm64: KVM: vgic-v3: Only wipe LRs on vcpu exit
  arm64: KVM: vgic-v3: Reset LRs at boot time
  arm64: KVM: vgic-v3: Do not save an LR known to be empty
  arm64: KVM: vgic-v3: Save maintenance interrupt state only if required
  arm64: KVM: vgic-v3: Avoid accessing ICH registers
  KVM: arm/arm64: vgic-v2: Make GICD_SGIR quicker to hit
  KVM: arm/arm64: vgic-v2: Only wipe LRs on vcpu exit
  KVM: arm/arm64: vgic-v2: Reset LRs at boot time
  KVM: arm/arm64: vgic-v2: Do not save an LR known to be empty
  KVM: arm/arm64: vgic-v2: Move GICH_ELRSR saving to its own function
  KVM: arm/arm64: vgic-v2: Save maintenance interrupt state only if required
  KVM: arm/arm64: vgic-v2: Avoid accessing GICH registers
  KVM: s390: allocate only one DMA page per VM
  KVM: s390: enable STFLE interpretation only if enabled for the guest
  KVM: s390: wake up when the VCPU cpu timer expires
  KVM: s390: step the VCPU timer while in enabled wait
  KVM: s390: protect VCPU cpu timer with a seqcount
  KVM: s390: step VCPU cpu timer during kvm_run ioctl
  ...
This commit is contained in:
Linus Torvalds 2016-03-16 09:55:35 -07:00
commit 10dc374766
142 changed files with 6754 additions and 2936 deletions

View File

@ -2507,8 +2507,9 @@ struct kvm_create_device {
4.80 KVM_SET_DEVICE_ATTR/KVM_GET_DEVICE_ATTR
Capability: KVM_CAP_DEVICE_CTRL, KVM_CAP_VM_ATTRIBUTES for vm device
Type: device ioctl, vm ioctl
Capability: KVM_CAP_DEVICE_CTRL, KVM_CAP_VM_ATTRIBUTES for vm device,
KVM_CAP_VCPU_ATTRIBUTES for vcpu device
Type: device ioctl, vm ioctl, vcpu ioctl
Parameters: struct kvm_device_attr
Returns: 0 on success, -1 on error
Errors:
@ -2533,8 +2534,9 @@ struct kvm_device_attr {
4.81 KVM_HAS_DEVICE_ATTR
Capability: KVM_CAP_DEVICE_CTRL, KVM_CAP_VM_ATTRIBUTES for vm device
Type: device ioctl, vm ioctl
Capability: KVM_CAP_DEVICE_CTRL, KVM_CAP_VM_ATTRIBUTES for vm device,
KVM_CAP_VCPU_ATTRIBUTES for vcpu device
Type: device ioctl, vm ioctl, vcpu ioctl
Parameters: struct kvm_device_attr
Returns: 0 on success, -1 on error
Errors:
@ -2577,6 +2579,8 @@ Possible features:
Depends on KVM_CAP_ARM_EL1_32BIT (arm64 only).
- KVM_ARM_VCPU_PSCI_0_2: Emulate PSCI v0.2 for the CPU.
Depends on KVM_CAP_ARM_PSCI_0_2.
- KVM_ARM_VCPU_PMU_V3: Emulate PMUv3 for the CPU.
Depends on KVM_CAP_ARM_PMU_V3.
4.83 KVM_ARM_PREFERRED_TARGET
@ -3035,6 +3039,87 @@ Returns: 0 on success, -1 on error
Queues an SMI on the thread's vcpu.
4.97 KVM_CAP_PPC_MULTITCE
Capability: KVM_CAP_PPC_MULTITCE
Architectures: ppc
Type: vm
This capability means the kernel is capable of handling hypercalls
H_PUT_TCE_INDIRECT and H_STUFF_TCE without passing those into the user
space. This significantly accelerates DMA operations for PPC KVM guests.
User space should expect that its handlers for these hypercalls
are not going to be called if user space previously registered LIOBN
in KVM (via KVM_CREATE_SPAPR_TCE or similar calls).
In order to enable H_PUT_TCE_INDIRECT and H_STUFF_TCE use in the guest,
user space might have to advertise it for the guest. For example,
IBM pSeries (sPAPR) guest starts using them if "hcall-multi-tce" is
present in the "ibm,hypertas-functions" device-tree property.
The hypercalls mentioned above may or may not be processed successfully
in the kernel based fast path. If they can not be handled by the kernel,
they will get passed on to user space. So user space still has to have
an implementation for these despite the in kernel acceleration.
This capability is always enabled.
4.98 KVM_CREATE_SPAPR_TCE_64
Capability: KVM_CAP_SPAPR_TCE_64
Architectures: powerpc
Type: vm ioctl
Parameters: struct kvm_create_spapr_tce_64 (in)
Returns: file descriptor for manipulating the created TCE table
This is an extension for KVM_CAP_SPAPR_TCE which only supports 32bit
windows, described in 4.62 KVM_CREATE_SPAPR_TCE
This capability uses extended struct in ioctl interface:
/* for KVM_CAP_SPAPR_TCE_64 */
struct kvm_create_spapr_tce_64 {
__u64 liobn;
__u32 page_shift;
__u32 flags;
__u64 offset; /* in pages */
__u64 size; /* in pages */
};
The aim of extension is to support an additional bigger DMA window with
a variable page size.
KVM_CREATE_SPAPR_TCE_64 receives a 64bit window size, an IOMMU page shift and
a bus offset of the corresponding DMA window, @size and @offset are numbers
of IOMMU pages.
@flags are not used at the moment.
The rest of functionality is identical to KVM_CREATE_SPAPR_TCE.
4.98 KVM_REINJECT_CONTROL
Capability: KVM_CAP_REINJECT_CONTROL
Architectures: x86
Type: vm ioctl
Parameters: struct kvm_reinject_control (in)
Returns: 0 on success,
-EFAULT if struct kvm_reinject_control cannot be read,
-ENXIO if KVM_CREATE_PIT or KVM_CREATE_PIT2 didn't succeed earlier.
i8254 (PIT) has two modes, reinject and !reinject. The default is reinject,
where KVM queues elapsed i8254 ticks and monitors completion of interrupt from
vector(s) that i8254 injects. Reinject mode dequeues a tick and injects its
interrupt whenever there isn't a pending interrupt from i8254.
!reinject mode injects an interrupt as soon as a tick arrives.
struct kvm_reinject_control {
__u8 pit_reinject;
__u8 reserved[31];
};
pit_reinject = 0 (!reinject mode) is recommended, unless running an old
operating system that uses the PIT for timing (e.g. Linux 2.4.x).
5. The kvm_run structure
------------------------
@ -3339,6 +3424,7 @@ EOI was received.
struct kvm_hyperv_exit {
#define KVM_EXIT_HYPERV_SYNIC 1
#define KVM_EXIT_HYPERV_HCALL 2
__u32 type;
union {
struct {
@ -3347,6 +3433,11 @@ EOI was received.
__u64 evt_page;
__u64 msg_page;
} synic;
struct {
__u64 input;
__u64 result;
__u64 params[2];
} hcall;
} u;
};
/* KVM_EXIT_HYPERV */

View File

@ -88,6 +88,8 @@ struct kvm_s390_io_adapter_req {
perform a gmap translation for the guest address provided in addr,
pin a userspace page for the translated address and add it to the
list of mappings
Note: A new mapping will be created unconditionally; therefore,
the calling code should avoid making duplicate mappings.
KVM_S390_IO_ADAPTER_UNMAP
release a userspace page for the translated address specified in addr

View File

@ -0,0 +1,33 @@
Generic vcpu interface
====================================
The virtual cpu "device" also accepts the ioctls KVM_SET_DEVICE_ATTR,
KVM_GET_DEVICE_ATTR, and KVM_HAS_DEVICE_ATTR. The interface uses the same struct
kvm_device_attr as other devices, but targets VCPU-wide settings and controls.
The groups and attributes per virtual cpu, if any, are architecture specific.
1. GROUP: KVM_ARM_VCPU_PMU_V3_CTRL
Architectures: ARM64
1.1. ATTRIBUTE: KVM_ARM_VCPU_PMU_V3_IRQ
Parameters: in kvm_device_attr.addr the address for PMU overflow interrupt is a
pointer to an int
Returns: -EBUSY: The PMU overflow interrupt is already set
-ENXIO: The overflow interrupt not set when attempting to get it
-ENODEV: PMUv3 not supported
-EINVAL: Invalid PMU overflow interrupt number supplied
A value describing the PMUv3 (Performance Monitor Unit v3) overflow interrupt
number for this vcpu. This interrupt could be a PPI or SPI, but the interrupt
type must be same for each vcpu. As a PPI, the interrupt number is the same for
all vcpus, while as an SPI it must be a separate number per vcpu.
1.2 ATTRIBUTE: KVM_ARM_VCPU_PMU_V3_INIT
Parameters: no additional parameter in kvm_device_attr.addr
Returns: -ENODEV: PMUv3 not supported
-ENXIO: PMUv3 not properly configured as required prior to calling this
attribute
-EBUSY: PMUv3 already initialized
Request the initialization of the PMUv3.

View File

@ -84,3 +84,55 @@ Returns: -EBUSY in case 1 or more vcpus are already activated (only in write
-EFAULT if the given address is not accessible from kernel space
-ENOMEM if not enough memory is available to process the ioctl
0 in case of success
3. GROUP: KVM_S390_VM_TOD
Architectures: s390
3.1. ATTRIBUTE: KVM_S390_VM_TOD_HIGH
Allows user space to set/get the TOD clock extension (u8).
Parameters: address of a buffer in user space to store the data (u8) to
Returns: -EFAULT if the given address is not accessible from kernel space
-EINVAL if setting the TOD clock extension to != 0 is not supported
3.2. ATTRIBUTE: KVM_S390_VM_TOD_LOW
Allows user space to set/get bits 0-63 of the TOD clock register as defined in
the POP (u64).
Parameters: address of a buffer in user space to store the data (u64) to
Returns: -EFAULT if the given address is not accessible from kernel space
4. GROUP: KVM_S390_VM_CRYPTO
Architectures: s390
4.1. ATTRIBUTE: KVM_S390_VM_CRYPTO_ENABLE_AES_KW (w/o)
Allows user space to enable aes key wrapping, including generating a new
wrapping key.
Parameters: none
Returns: 0
4.2. ATTRIBUTE: KVM_S390_VM_CRYPTO_ENABLE_DEA_KW (w/o)
Allows user space to enable dea key wrapping, including generating a new
wrapping key.
Parameters: none
Returns: 0
4.3. ATTRIBUTE: KVM_S390_VM_CRYPTO_DISABLE_AES_KW (w/o)
Allows user space to disable aes key wrapping, clearing the wrapping key.
Parameters: none
Returns: 0
4.4. ATTRIBUTE: KVM_S390_VM_CRYPTO_DISABLE_DEA_KW (w/o)
Allows user space to disable dea key wrapping, clearing the wrapping key.
Parameters: none
Returns: 0

View File

@ -392,11 +392,11 @@ To instantiate a large spte, four constraints must be satisfied:
write-protected pages
- the guest page must be wholly contained by a single memory slot
To check the last two conditions, the mmu maintains a ->write_count set of
To check the last two conditions, the mmu maintains a ->disallow_lpage set of
arrays for each memory slot and large page size. Every write protected page
causes its write_count to be incremented, thus preventing instantiation of
causes its disallow_lpage to be incremented, thus preventing instantiation of
a large spte. The frames at the end of an unaligned memory slot have
artificially inflated ->write_counts so they can never be instantiated.
artificially inflated ->disallow_lpages so they can never be instantiated.
Zapping all pages (page generation count)
=========================================

View File

@ -19,38 +19,7 @@
#ifndef __ARM_KVM_ASM_H__
#define __ARM_KVM_ASM_H__
/* 0 is reserved as an invalid value. */
#define c0_MPIDR 1 /* MultiProcessor ID Register */
#define c0_CSSELR 2 /* Cache Size Selection Register */
#define c1_SCTLR 3 /* System Control Register */
#define c1_ACTLR 4 /* Auxiliary Control Register */
#define c1_CPACR 5 /* Coprocessor Access Control */
#define c2_TTBR0 6 /* Translation Table Base Register 0 */
#define c2_TTBR0_high 7 /* TTBR0 top 32 bits */
#define c2_TTBR1 8 /* Translation Table Base Register 1 */
#define c2_TTBR1_high 9 /* TTBR1 top 32 bits */
#define c2_TTBCR 10 /* Translation Table Base Control R. */
#define c3_DACR 11 /* Domain Access Control Register */
#define c5_DFSR 12 /* Data Fault Status Register */
#define c5_IFSR 13 /* Instruction Fault Status Register */
#define c5_ADFSR 14 /* Auxilary Data Fault Status R */
#define c5_AIFSR 15 /* Auxilary Instrunction Fault Status R */
#define c6_DFAR 16 /* Data Fault Address Register */
#define c6_IFAR 17 /* Instruction Fault Address Register */
#define c7_PAR 18 /* Physical Address Register */
#define c7_PAR_high 19 /* PAR top 32 bits */
#define c9_L2CTLR 20 /* Cortex A15/A7 L2 Control Register */
#define c10_PRRR 21 /* Primary Region Remap Register */
#define c10_NMRR 22 /* Normal Memory Remap Register */
#define c12_VBAR 23 /* Vector Base Address Register */
#define c13_CID 24 /* Context ID Register */
#define c13_TID_URW 25 /* Thread ID, User R/W */
#define c13_TID_URO 26 /* Thread ID, User R/O */
#define c13_TID_PRIV 27 /* Thread ID, Privileged */
#define c14_CNTKCTL 28 /* Timer Control Register (PL1) */
#define c10_AMAIR0 29 /* Auxilary Memory Attribute Indirection Reg0 */
#define c10_AMAIR1 30 /* Auxilary Memory Attribute Indirection Reg1 */
#define NR_CP15_REGS 31 /* Number of regs (incl. invalid) */
#include <asm/virt.h>
#define ARM_EXCEPTION_RESET 0
#define ARM_EXCEPTION_UNDEFINED 1
@ -86,19 +55,15 @@ struct kvm_vcpu;
extern char __kvm_hyp_init[];
extern char __kvm_hyp_init_end[];
extern char __kvm_hyp_exit[];
extern char __kvm_hyp_exit_end[];
extern char __kvm_hyp_vector[];
extern char __kvm_hyp_code_start[];
extern char __kvm_hyp_code_end[];
extern void __kvm_flush_vm_context(void);
extern void __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa);
extern void __kvm_tlb_flush_vmid(struct kvm *kvm);
extern int __kvm_vcpu_run(struct kvm_vcpu *vcpu);
extern void __init_stage2_translation(void);
#endif
#endif /* __ARM_KVM_ASM_H__ */

View File

@ -68,12 +68,12 @@ static inline bool vcpu_mode_is_32bit(struct kvm_vcpu *vcpu)
static inline unsigned long *vcpu_pc(struct kvm_vcpu *vcpu)
{
return &vcpu->arch.regs.usr_regs.ARM_pc;
return &vcpu->arch.ctxt.gp_regs.usr_regs.ARM_pc;
}
static inline unsigned long *vcpu_cpsr(struct kvm_vcpu *vcpu)
{
return &vcpu->arch.regs.usr_regs.ARM_cpsr;
return &vcpu->arch.ctxt.gp_regs.usr_regs.ARM_cpsr;
}
static inline void vcpu_set_thumb(struct kvm_vcpu *vcpu)
@ -83,13 +83,13 @@ static inline void vcpu_set_thumb(struct kvm_vcpu *vcpu)
static inline bool mode_has_spsr(struct kvm_vcpu *vcpu)
{
unsigned long cpsr_mode = vcpu->arch.regs.usr_regs.ARM_cpsr & MODE_MASK;
unsigned long cpsr_mode = vcpu->arch.ctxt.gp_regs.usr_regs.ARM_cpsr & MODE_MASK;
return (cpsr_mode > USR_MODE && cpsr_mode < SYSTEM_MODE);
}
static inline bool vcpu_mode_priv(struct kvm_vcpu *vcpu)
{
unsigned long cpsr_mode = vcpu->arch.regs.usr_regs.ARM_cpsr & MODE_MASK;
unsigned long cpsr_mode = vcpu->arch.ctxt.gp_regs.usr_regs.ARM_cpsr & MODE_MASK;
return cpsr_mode > USR_MODE;;
}
@ -108,11 +108,6 @@ static inline phys_addr_t kvm_vcpu_get_fault_ipa(struct kvm_vcpu *vcpu)
return ((phys_addr_t)vcpu->arch.fault.hpfar & HPFAR_MASK) << 8;
}
static inline unsigned long kvm_vcpu_get_hyp_pc(struct kvm_vcpu *vcpu)
{
return vcpu->arch.fault.hyp_pc;
}
static inline bool kvm_vcpu_dabt_isvalid(struct kvm_vcpu *vcpu)
{
return kvm_vcpu_get_hsr(vcpu) & HSR_ISV;
@ -143,6 +138,11 @@ static inline bool kvm_vcpu_dabt_iss1tw(struct kvm_vcpu *vcpu)
return kvm_vcpu_get_hsr(vcpu) & HSR_DABT_S1PTW;
}
static inline bool kvm_vcpu_dabt_is_cm(struct kvm_vcpu *vcpu)
{
return !!(kvm_vcpu_get_hsr(vcpu) & HSR_DABT_CM);
}
/* Get Access Size from a data abort */
static inline int kvm_vcpu_dabt_get_as(struct kvm_vcpu *vcpu)
{
@ -192,7 +192,7 @@ static inline u32 kvm_vcpu_hvc_get_imm(struct kvm_vcpu *vcpu)
static inline unsigned long kvm_vcpu_get_mpidr_aff(struct kvm_vcpu *vcpu)
{
return vcpu->arch.cp15[c0_MPIDR] & MPIDR_HWID_BITMASK;
return vcpu_cp15(vcpu, c0_MPIDR) & MPIDR_HWID_BITMASK;
}
static inline void kvm_vcpu_set_be(struct kvm_vcpu *vcpu)

View File

@ -85,20 +85,61 @@ struct kvm_vcpu_fault_info {
u32 hsr; /* Hyp Syndrome Register */
u32 hxfar; /* Hyp Data/Inst. Fault Address Register */
u32 hpfar; /* Hyp IPA Fault Address Register */
u32 hyp_pc; /* PC when exception was taken from Hyp mode */
};
typedef struct vfp_hard_struct kvm_cpu_context_t;
/*
* 0 is reserved as an invalid value.
* Order should be kept in sync with the save/restore code.
*/
enum vcpu_sysreg {
__INVALID_SYSREG__,
c0_MPIDR, /* MultiProcessor ID Register */
c0_CSSELR, /* Cache Size Selection Register */
c1_SCTLR, /* System Control Register */
c1_ACTLR, /* Auxiliary Control Register */
c1_CPACR, /* Coprocessor Access Control */
c2_TTBR0, /* Translation Table Base Register 0 */
c2_TTBR0_high, /* TTBR0 top 32 bits */
c2_TTBR1, /* Translation Table Base Register 1 */
c2_TTBR1_high, /* TTBR1 top 32 bits */
c2_TTBCR, /* Translation Table Base Control R. */
c3_DACR, /* Domain Access Control Register */
c5_DFSR, /* Data Fault Status Register */
c5_IFSR, /* Instruction Fault Status Register */
c5_ADFSR, /* Auxilary Data Fault Status R */
c5_AIFSR, /* Auxilary Instrunction Fault Status R */
c6_DFAR, /* Data Fault Address Register */
c6_IFAR, /* Instruction Fault Address Register */
c7_PAR, /* Physical Address Register */
c7_PAR_high, /* PAR top 32 bits */
c9_L2CTLR, /* Cortex A15/A7 L2 Control Register */
c10_PRRR, /* Primary Region Remap Register */
c10_NMRR, /* Normal Memory Remap Register */
c12_VBAR, /* Vector Base Address Register */
c13_CID, /* Context ID Register */
c13_TID_URW, /* Thread ID, User R/W */
c13_TID_URO, /* Thread ID, User R/O */
c13_TID_PRIV, /* Thread ID, Privileged */
c14_CNTKCTL, /* Timer Control Register (PL1) */
c10_AMAIR0, /* Auxilary Memory Attribute Indirection Reg0 */
c10_AMAIR1, /* Auxilary Memory Attribute Indirection Reg1 */
NR_CP15_REGS /* Number of regs (incl. invalid) */
};
struct kvm_cpu_context {
struct kvm_regs gp_regs;
struct vfp_hard_struct vfp;
u32 cp15[NR_CP15_REGS];
};
typedef struct kvm_cpu_context kvm_cpu_context_t;
struct kvm_vcpu_arch {
struct kvm_regs regs;
struct kvm_cpu_context ctxt;
int target; /* Processor target */
DECLARE_BITMAP(features, KVM_VCPU_MAX_FEATURES);
/* System control coprocessor (cp15) */
u32 cp15[NR_CP15_REGS];
/* The CPU type we expose to the VM */
u32 midr;
@ -111,9 +152,6 @@ struct kvm_vcpu_arch {
/* Exception Information */
struct kvm_vcpu_fault_info fault;
/* Floating point registers (VFP and Advanced SIMD/NEON) */
struct vfp_hard_struct vfp_guest;
/* Host FP context */
kvm_cpu_context_t *host_cpu_context;
@ -158,12 +196,14 @@ struct kvm_vcpu_stat {
u64 exits;
};
#define vcpu_cp15(v,r) (v)->arch.ctxt.cp15[r]
int kvm_vcpu_preferred_target(struct kvm_vcpu_init *init);
unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu);
int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *indices);
int kvm_arm_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg);
int kvm_arm_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg);
u64 kvm_call_hyp(void *hypfn, ...);
unsigned long kvm_call_hyp(void *hypfn, ...);
void force_vm_exit(const cpumask_t *mask);
#define KVM_ARCH_WANT_MMU_NOTIFIER
@ -220,6 +260,11 @@ static inline void __cpu_init_hyp_mode(phys_addr_t boot_pgd_ptr,
kvm_call_hyp((void*)hyp_stack_ptr, vector_ptr, pgd_ptr);
}
static inline void __cpu_init_stage2(void)
{
kvm_call_hyp(__init_stage2_translation);
}
static inline int kvm_arch_dev_ioctl_check_extension(long ext)
{
return 0;
@ -242,5 +287,20 @@ static inline void kvm_arm_init_debug(void) {}
static inline void kvm_arm_setup_debug(struct kvm_vcpu *vcpu) {}
static inline void kvm_arm_clear_debug(struct kvm_vcpu *vcpu) {}
static inline void kvm_arm_reset_debug_ptr(struct kvm_vcpu *vcpu) {}
static inline int kvm_arm_vcpu_arch_set_attr(struct kvm_vcpu *vcpu,
struct kvm_device_attr *attr)
{
return -ENXIO;
}
static inline int kvm_arm_vcpu_arch_get_attr(struct kvm_vcpu *vcpu,
struct kvm_device_attr *attr)
{
return -ENXIO;
}
static inline int kvm_arm_vcpu_arch_has_attr(struct kvm_vcpu *vcpu,
struct kvm_device_attr *attr)
{
return -ENXIO;
}
#endif /* __ARM_KVM_HOST_H__ */

View File

@ -0,0 +1,139 @@
/*
* Copyright (C) 2015 - ARM Ltd
* Author: Marc Zyngier <marc.zyngier@arm.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef __ARM_KVM_HYP_H__
#define __ARM_KVM_HYP_H__
#include <linux/compiler.h>
#include <linux/kvm_host.h>
#include <asm/kvm_mmu.h>
#include <asm/vfp.h>
#define __hyp_text __section(.hyp.text) notrace
#define kern_hyp_va(v) (v)
#define hyp_kern_va(v) (v)
#define __ACCESS_CP15(CRn, Op1, CRm, Op2) \
"mrc", "mcr", __stringify(p15, Op1, %0, CRn, CRm, Op2), u32
#define __ACCESS_CP15_64(Op1, CRm) \
"mrrc", "mcrr", __stringify(p15, Op1, %Q0, %R0, CRm), u64
#define __ACCESS_VFP(CRn) \
"mrc", "mcr", __stringify(p10, 7, %0, CRn, cr0, 0), u32
#define __write_sysreg(v, r, w, c, t) asm volatile(w " " c : : "r" ((t)(v)))
#define write_sysreg(v, ...) __write_sysreg(v, __VA_ARGS__)
#define __read_sysreg(r, w, c, t) ({ \
t __val; \
asm volatile(r " " c : "=r" (__val)); \
__val; \
})
#define read_sysreg(...) __read_sysreg(__VA_ARGS__)
#define write_special(v, r) \
asm volatile("msr " __stringify(r) ", %0" : : "r" (v))
#define read_special(r) ({ \
u32 __val; \
asm volatile("mrs %0, " __stringify(r) : "=r" (__val)); \
__val; \
})
#define TTBR0 __ACCESS_CP15_64(0, c2)
#define TTBR1 __ACCESS_CP15_64(1, c2)
#define VTTBR __ACCESS_CP15_64(6, c2)
#define PAR __ACCESS_CP15_64(0, c7)
#define CNTV_CVAL __ACCESS_CP15_64(3, c14)
#define CNTVOFF __ACCESS_CP15_64(4, c14)
#define MIDR __ACCESS_CP15(c0, 0, c0, 0)
#define CSSELR __ACCESS_CP15(c0, 2, c0, 0)
#define VPIDR __ACCESS_CP15(c0, 4, c0, 0)
#define VMPIDR __ACCESS_CP15(c0, 4, c0, 5)
#define SCTLR __ACCESS_CP15(c1, 0, c0, 0)
#define CPACR __ACCESS_CP15(c1, 0, c0, 2)
#define HCR __ACCESS_CP15(c1, 4, c1, 0)
#define HDCR __ACCESS_CP15(c1, 4, c1, 1)
#define HCPTR __ACCESS_CP15(c1, 4, c1, 2)
#define HSTR __ACCESS_CP15(c1, 4, c1, 3)
#define TTBCR __ACCESS_CP15(c2, 0, c0, 2)
#define HTCR __ACCESS_CP15(c2, 4, c0, 2)
#define VTCR __ACCESS_CP15(c2, 4, c1, 2)
#define DACR __ACCESS_CP15(c3, 0, c0, 0)
#define DFSR __ACCESS_CP15(c5, 0, c0, 0)
#define IFSR __ACCESS_CP15(c5, 0, c0, 1)
#define ADFSR __ACCESS_CP15(c5, 0, c1, 0)
#define AIFSR __ACCESS_CP15(c5, 0, c1, 1)
#define HSR __ACCESS_CP15(c5, 4, c2, 0)
#define DFAR __ACCESS_CP15(c6, 0, c0, 0)
#define IFAR __ACCESS_CP15(c6, 0, c0, 2)
#define HDFAR __ACCESS_CP15(c6, 4, c0, 0)
#define HIFAR __ACCESS_CP15(c6, 4, c0, 2)
#define HPFAR __ACCESS_CP15(c6, 4, c0, 4)
#define ICIALLUIS __ACCESS_CP15(c7, 0, c1, 0)
#define ATS1CPR __ACCESS_CP15(c7, 0, c8, 0)
#define TLBIALLIS __ACCESS_CP15(c8, 0, c3, 0)
#define TLBIALLNSNHIS __ACCESS_CP15(c8, 4, c3, 4)
#define PRRR __ACCESS_CP15(c10, 0, c2, 0)
#define NMRR __ACCESS_CP15(c10, 0, c2, 1)
#define AMAIR0 __ACCESS_CP15(c10, 0, c3, 0)
#define AMAIR1 __ACCESS_CP15(c10, 0, c3, 1)
#define VBAR __ACCESS_CP15(c12, 0, c0, 0)
#define CID __ACCESS_CP15(c13, 0, c0, 1)
#define TID_URW __ACCESS_CP15(c13, 0, c0, 2)
#define TID_URO __ACCESS_CP15(c13, 0, c0, 3)
#define TID_PRIV __ACCESS_CP15(c13, 0, c0, 4)
#define HTPIDR __ACCESS_CP15(c13, 4, c0, 2)
#define CNTKCTL __ACCESS_CP15(c14, 0, c1, 0)
#define CNTV_CTL __ACCESS_CP15(c14, 0, c3, 1)
#define CNTHCTL __ACCESS_CP15(c14, 4, c1, 0)
#define VFP_FPEXC __ACCESS_VFP(FPEXC)
/* AArch64 compatibility macros, only for the timer so far */
#define read_sysreg_el0(r) read_sysreg(r##_el0)
#define write_sysreg_el0(v, r) write_sysreg(v, r##_el0)
#define cntv_ctl_el0 CNTV_CTL
#define cntv_cval_el0 CNTV_CVAL
#define cntvoff_el2 CNTVOFF
#define cnthctl_el2 CNTHCTL
void __timer_save_state(struct kvm_vcpu *vcpu);
void __timer_restore_state(struct kvm_vcpu *vcpu);
void __vgic_v2_save_state(struct kvm_vcpu *vcpu);
void __vgic_v2_restore_state(struct kvm_vcpu *vcpu);
void __sysreg_save_state(struct kvm_cpu_context *ctxt);
void __sysreg_restore_state(struct kvm_cpu_context *ctxt);
void asmlinkage __vfp_save_state(struct vfp_hard_struct *vfp);
void asmlinkage __vfp_restore_state(struct vfp_hard_struct *vfp);
static inline bool __vfp_enabled(void)
{
return !(read_sysreg(HCPTR) & (HCPTR_TCP(11) | HCPTR_TCP(10)));
}
void __hyp_text __banked_save_state(struct kvm_cpu_context *ctxt);
void __hyp_text __banked_restore_state(struct kvm_cpu_context *ctxt);
int asmlinkage __guest_enter(struct kvm_vcpu *vcpu,
struct kvm_cpu_context *host);
int asmlinkage __hyp_do_panic(const char *, int, u32);
#endif /* __ARM_KVM_HYP_H__ */

View File

@ -179,7 +179,7 @@ struct kvm;
static inline bool vcpu_has_cache_enabled(struct kvm_vcpu *vcpu)
{
return (vcpu->arch.cp15[c1_SCTLR] & 0b101) == 0b101;
return (vcpu_cp15(vcpu, c1_SCTLR) & 0b101) == 0b101;
}
static inline void __coherent_cache_guest_page(struct kvm_vcpu *vcpu,

View File

@ -74,6 +74,15 @@ static inline bool is_hyp_mode_mismatched(void)
{
return !!(__boot_cpu_mode & BOOT_CPU_MODE_MISMATCH);
}
static inline bool is_kernel_in_hyp_mode(void)
{
return false;
}
/* The section containing the hypervisor text */
extern char __hyp_text_start[];
extern char __hyp_text_end[];
#endif
#endif /* __ASSEMBLY__ */

View File

@ -170,41 +170,11 @@ int main(void)
DEFINE(CACHE_WRITEBACK_GRANULE, __CACHE_WRITEBACK_GRANULE);
BLANK();
#ifdef CONFIG_KVM_ARM_HOST
DEFINE(VCPU_KVM, offsetof(struct kvm_vcpu, kvm));
DEFINE(VCPU_MIDR, offsetof(struct kvm_vcpu, arch.midr));
DEFINE(VCPU_CP15, offsetof(struct kvm_vcpu, arch.cp15));
DEFINE(VCPU_VFP_GUEST, offsetof(struct kvm_vcpu, arch.vfp_guest));
DEFINE(VCPU_VFP_HOST, offsetof(struct kvm_vcpu, arch.host_cpu_context));
DEFINE(VCPU_REGS, offsetof(struct kvm_vcpu, arch.regs));
DEFINE(VCPU_USR_REGS, offsetof(struct kvm_vcpu, arch.regs.usr_regs));
DEFINE(VCPU_SVC_REGS, offsetof(struct kvm_vcpu, arch.regs.svc_regs));
DEFINE(VCPU_ABT_REGS, offsetof(struct kvm_vcpu, arch.regs.abt_regs));
DEFINE(VCPU_UND_REGS, offsetof(struct kvm_vcpu, arch.regs.und_regs));
DEFINE(VCPU_IRQ_REGS, offsetof(struct kvm_vcpu, arch.regs.irq_regs));
DEFINE(VCPU_FIQ_REGS, offsetof(struct kvm_vcpu, arch.regs.fiq_regs));
DEFINE(VCPU_PC, offsetof(struct kvm_vcpu, arch.regs.usr_regs.ARM_pc));
DEFINE(VCPU_CPSR, offsetof(struct kvm_vcpu, arch.regs.usr_regs.ARM_cpsr));
DEFINE(VCPU_HCR, offsetof(struct kvm_vcpu, arch.hcr));
DEFINE(VCPU_IRQ_LINES, offsetof(struct kvm_vcpu, arch.irq_lines));
DEFINE(VCPU_HSR, offsetof(struct kvm_vcpu, arch.fault.hsr));
DEFINE(VCPU_HxFAR, offsetof(struct kvm_vcpu, arch.fault.hxfar));
DEFINE(VCPU_HPFAR, offsetof(struct kvm_vcpu, arch.fault.hpfar));
DEFINE(VCPU_HYP_PC, offsetof(struct kvm_vcpu, arch.fault.hyp_pc));
DEFINE(VCPU_VGIC_CPU, offsetof(struct kvm_vcpu, arch.vgic_cpu));
DEFINE(VGIC_V2_CPU_HCR, offsetof(struct vgic_cpu, vgic_v2.vgic_hcr));
DEFINE(VGIC_V2_CPU_VMCR, offsetof(struct vgic_cpu, vgic_v2.vgic_vmcr));
DEFINE(VGIC_V2_CPU_MISR, offsetof(struct vgic_cpu, vgic_v2.vgic_misr));
DEFINE(VGIC_V2_CPU_EISR, offsetof(struct vgic_cpu, vgic_v2.vgic_eisr));
DEFINE(VGIC_V2_CPU_ELRSR, offsetof(struct vgic_cpu, vgic_v2.vgic_elrsr));
DEFINE(VGIC_V2_CPU_APR, offsetof(struct vgic_cpu, vgic_v2.vgic_apr));
DEFINE(VGIC_V2_CPU_LR, offsetof(struct vgic_cpu, vgic_v2.vgic_lr));
DEFINE(VGIC_CPU_NR_LR, offsetof(struct vgic_cpu, nr_lr));
DEFINE(VCPU_TIMER_CNTV_CTL, offsetof(struct kvm_vcpu, arch.timer_cpu.cntv_ctl));
DEFINE(VCPU_TIMER_CNTV_CVAL, offsetof(struct kvm_vcpu, arch.timer_cpu.cntv_cval));
DEFINE(KVM_TIMER_CNTVOFF, offsetof(struct kvm, arch.timer.cntvoff));
DEFINE(KVM_TIMER_ENABLED, offsetof(struct kvm, arch.timer.enabled));
DEFINE(KVM_VGIC_VCTRL, offsetof(struct kvm, arch.vgic.vctrl_base));
DEFINE(KVM_VTTBR, offsetof(struct kvm, arch.vttbr));
DEFINE(VCPU_GUEST_CTXT, offsetof(struct kvm_vcpu, arch.ctxt));
DEFINE(VCPU_HOST_CTXT, offsetof(struct kvm_vcpu, arch.host_cpu_context));
DEFINE(CPU_CTXT_VFP, offsetof(struct kvm_cpu_context, vfp));
DEFINE(CPU_CTXT_GP_REGS, offsetof(struct kvm_cpu_context, gp_regs));
DEFINE(GP_REGS_USR, offsetof(struct kvm_regs, usr_regs));
#endif
BLANK();
#ifdef CONFIG_VDSO

View File

@ -18,6 +18,11 @@
*(.proc.info.init) \
VMLINUX_SYMBOL(__proc_info_end) = .;
#define HYPERVISOR_TEXT \
VMLINUX_SYMBOL(__hyp_text_start) = .; \
*(.hyp.text) \
VMLINUX_SYMBOL(__hyp_text_end) = .;
#define IDMAP_TEXT \
ALIGN_FUNCTION(); \
VMLINUX_SYMBOL(__idmap_text_start) = .; \
@ -108,6 +113,7 @@ SECTIONS
TEXT_TEXT
SCHED_TEXT
LOCK_TEXT
HYPERVISOR_TEXT
KPROBES_TEXT
*(.gnu.warning)
*(.glue_7)

View File

@ -17,6 +17,7 @@ AFLAGS_interrupts.o := -Wa,-march=armv7-a$(plus_virt)
KVM := ../../../virt/kvm
kvm-arm-y = $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o $(KVM)/eventfd.o $(KVM)/vfio.o
obj-$(CONFIG_KVM_ARM_HOST) += hyp/
obj-y += kvm-arm.o init.o interrupts.o
obj-y += arm.o handle_exit.o guest.o mmu.o emulate.o reset.o
obj-y += coproc.o coproc_a15.o coproc_a7.o mmio.o psci.o perf.o

View File

@ -28,6 +28,7 @@
#include <linux/sched.h>
#include <linux/kvm.h>
#include <trace/events/kvm.h>
#include <kvm/arm_pmu.h>
#define CREATE_TRACE_POINTS
#include "trace.h"
@ -265,6 +266,7 @@ void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu)
kvm_mmu_free_memory_caches(vcpu);
kvm_timer_vcpu_terminate(vcpu);
kvm_vgic_vcpu_destroy(vcpu);
kvm_pmu_vcpu_destroy(vcpu);
kmem_cache_free(kvm_vcpu_cache, vcpu);
}
@ -320,6 +322,7 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
vcpu->cpu = -1;
kvm_arm_set_running_vcpu(NULL);
kvm_timer_vcpu_put(vcpu);
}
int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
@ -577,6 +580,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
* non-preemptible context.
*/
preempt_disable();
kvm_pmu_flush_hwstate(vcpu);
kvm_timer_flush_hwstate(vcpu);
kvm_vgic_flush_hwstate(vcpu);
@ -593,6 +597,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
if (ret <= 0 || need_new_vmid_gen(vcpu->kvm) ||
vcpu->arch.power_off || vcpu->arch.pause) {
local_irq_enable();
kvm_pmu_sync_hwstate(vcpu);
kvm_timer_sync_hwstate(vcpu);
kvm_vgic_sync_hwstate(vcpu);
preempt_enable();
@ -642,10 +647,11 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
trace_kvm_exit(ret, kvm_vcpu_trap_get_class(vcpu), *vcpu_pc(vcpu));
/*
* We must sync the timer state before the vgic state so that
* the vgic can properly sample the updated state of the
* We must sync the PMU and timer state before the vgic state so
* that the vgic can properly sample the updated state of the
* interrupt line.
*/
kvm_pmu_sync_hwstate(vcpu);
kvm_timer_sync_hwstate(vcpu);
kvm_vgic_sync_hwstate(vcpu);
@ -823,11 +829,54 @@ static int kvm_arch_vcpu_ioctl_vcpu_init(struct kvm_vcpu *vcpu,
return 0;
}
static int kvm_arm_vcpu_set_attr(struct kvm_vcpu *vcpu,
struct kvm_device_attr *attr)
{
int ret = -ENXIO;
switch (attr->group) {
default:
ret = kvm_arm_vcpu_arch_set_attr(vcpu, attr);
break;
}
return ret;
}
static int kvm_arm_vcpu_get_attr(struct kvm_vcpu *vcpu,
struct kvm_device_attr *attr)
{
int ret = -ENXIO;
switch (attr->group) {
default:
ret = kvm_arm_vcpu_arch_get_attr(vcpu, attr);
break;
}
return ret;
}
static int kvm_arm_vcpu_has_attr(struct kvm_vcpu *vcpu,
struct kvm_device_attr *attr)
{
int ret = -ENXIO;
switch (attr->group) {
default:
ret = kvm_arm_vcpu_arch_has_attr(vcpu, attr);
break;
}
return ret;
}
long kvm_arch_vcpu_ioctl(struct file *filp,
unsigned int ioctl, unsigned long arg)
{
struct kvm_vcpu *vcpu = filp->private_data;
void __user *argp = (void __user *)arg;
struct kvm_device_attr attr;
switch (ioctl) {
case KVM_ARM_VCPU_INIT: {
@ -870,6 +919,21 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
return -E2BIG;
return kvm_arm_copy_reg_indices(vcpu, user_list->reg);
}
case KVM_SET_DEVICE_ATTR: {
if (copy_from_user(&attr, argp, sizeof(attr)))
return -EFAULT;
return kvm_arm_vcpu_set_attr(vcpu, &attr);
}
case KVM_GET_DEVICE_ATTR: {
if (copy_from_user(&attr, argp, sizeof(attr)))
return -EFAULT;
return kvm_arm_vcpu_get_attr(vcpu, &attr);
}
case KVM_HAS_DEVICE_ATTR: {
if (copy_from_user(&attr, argp, sizeof(attr)))
return -EFAULT;
return kvm_arm_vcpu_has_attr(vcpu, &attr);
}
default:
return -EINVAL;
}
@ -967,6 +1031,11 @@ long kvm_arch_vm_ioctl(struct file *filp,
}
}
static void cpu_init_stage2(void *dummy)
{
__cpu_init_stage2();
}
static void cpu_init_hyp_mode(void *dummy)
{
phys_addr_t boot_pgd_ptr;
@ -985,6 +1054,7 @@ static void cpu_init_hyp_mode(void *dummy)
vector_ptr = (unsigned long)__kvm_hyp_vector;
__cpu_init_hyp_mode(boot_pgd_ptr, pgd_ptr, hyp_stack_ptr, vector_ptr);
__cpu_init_stage2();
kvm_arm_init_debug();
}
@ -1035,6 +1105,82 @@ static inline void hyp_cpu_pm_init(void)
}
#endif
static void teardown_common_resources(void)
{
free_percpu(kvm_host_cpu_state);
}
static int init_common_resources(void)
{
kvm_host_cpu_state = alloc_percpu(kvm_cpu_context_t);
if (!kvm_host_cpu_state) {
kvm_err("Cannot allocate host CPU state\n");
return -ENOMEM;
}
return 0;
}
static int init_subsystems(void)
{
int err;
/*
* Init HYP view of VGIC
*/
err = kvm_vgic_hyp_init();
switch (err) {
case 0:
vgic_present = true;
break;
case -ENODEV:
case -ENXIO:
vgic_present = false;
break;
default:
return err;
}
/*
* Init HYP architected timer support
*/
err = kvm_timer_hyp_init();
if (err)
return err;
kvm_perf_init();
kvm_coproc_table_init();
return 0;
}
static void teardown_hyp_mode(void)
{
int cpu;
if (is_kernel_in_hyp_mode())
return;
free_hyp_pgds();
for_each_possible_cpu(cpu)
free_page(per_cpu(kvm_arm_hyp_stack_page, cpu));
}
static int init_vhe_mode(void)
{
/*
* Execute the init code on each CPU.
*/
on_each_cpu(cpu_init_stage2, NULL, 1);
/* set size of VMID supported by CPU */
kvm_vmid_bits = kvm_get_vmid_bits();
kvm_info("%d-bit VMID\n", kvm_vmid_bits);
kvm_info("VHE mode initialized successfully\n");
return 0;
}
/**
* Inits Hyp-mode on all online CPUs
*/
@ -1065,7 +1211,7 @@ static int init_hyp_mode(void)
stack_page = __get_free_page(GFP_KERNEL);
if (!stack_page) {
err = -ENOMEM;
goto out_free_stack_pages;
goto out_err;
}
per_cpu(kvm_arm_hyp_stack_page, cpu) = stack_page;
@ -1074,16 +1220,16 @@ static int init_hyp_mode(void)
/*
* Map the Hyp-code called directly from the host
*/
err = create_hyp_mappings(__kvm_hyp_code_start, __kvm_hyp_code_end);
err = create_hyp_mappings(__hyp_text_start, __hyp_text_end);
if (err) {
kvm_err("Cannot map world-switch code\n");
goto out_free_mappings;
goto out_err;
}
err = create_hyp_mappings(__start_rodata, __end_rodata);
if (err) {
kvm_err("Cannot map rodata section\n");
goto out_free_mappings;
goto out_err;
}
/*
@ -1095,20 +1241,10 @@ static int init_hyp_mode(void)
if (err) {
kvm_err("Cannot map hyp stack\n");
goto out_free_mappings;
goto out_err;
}
}
/*
* Map the host CPU structures
*/
kvm_host_cpu_state = alloc_percpu(kvm_cpu_context_t);
if (!kvm_host_cpu_state) {
err = -ENOMEM;
kvm_err("Cannot allocate host CPU state\n");
goto out_free_mappings;
}
for_each_possible_cpu(cpu) {
kvm_cpu_context_t *cpu_ctxt;
@ -1117,7 +1253,7 @@ static int init_hyp_mode(void)
if (err) {
kvm_err("Cannot map host CPU state: %d\n", err);
goto out_free_context;
goto out_err;
}
}
@ -1126,34 +1262,22 @@ static int init_hyp_mode(void)
*/
on_each_cpu(cpu_init_hyp_mode, NULL, 1);
/*
* Init HYP view of VGIC
*/
err = kvm_vgic_hyp_init();
switch (err) {
case 0:
vgic_present = true;
break;
case -ENODEV:
case -ENXIO:
vgic_present = false;
break;
default:
goto out_free_context;
}
/*
* Init HYP architected timer support
*/
err = kvm_timer_hyp_init();
if (err)
goto out_free_context;
#ifndef CONFIG_HOTPLUG_CPU
free_boot_hyp_pgd();
#endif
kvm_perf_init();
cpu_notifier_register_begin();
err = __register_cpu_notifier(&hyp_init_cpu_nb);
cpu_notifier_register_done();
if (err) {
kvm_err("Cannot register HYP init CPU notifier (%d)\n", err);
goto out_err;
}
hyp_cpu_pm_init();
/* set size of VMID supported by CPU */
kvm_vmid_bits = kvm_get_vmid_bits();
@ -1162,14 +1286,9 @@ static int init_hyp_mode(void)
kvm_info("Hyp mode initialized successfully\n");
return 0;
out_free_context:
free_percpu(kvm_host_cpu_state);
out_free_mappings:
free_hyp_pgds();
out_free_stack_pages:
for_each_possible_cpu(cpu)
free_page(per_cpu(kvm_arm_hyp_stack_page, cpu));
out_err:
teardown_hyp_mode();
kvm_err("error initializing Hyp mode: %d\n", err);
return err;
}
@ -1213,26 +1332,27 @@ int kvm_arch_init(void *opaque)
}
}
cpu_notifier_register_begin();
err = init_common_resources();
if (err)
return err;
err = init_hyp_mode();
if (is_kernel_in_hyp_mode())
err = init_vhe_mode();
else
err = init_hyp_mode();
if (err)
goto out_err;
err = __register_cpu_notifier(&hyp_init_cpu_nb);
if (err) {
kvm_err("Cannot register HYP init CPU notifier (%d)\n", err);
goto out_err;
}
err = init_subsystems();
if (err)
goto out_hyp;
cpu_notifier_register_done();
hyp_cpu_pm_init();
kvm_coproc_table_init();
return 0;
out_hyp:
teardown_hyp_mode();
out_err:
cpu_notifier_register_done();
teardown_common_resources();
return err;
}

View File

@ -16,6 +16,8 @@
* along with this program; if not, write to the Free Software
* Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*/
#include <linux/bsearch.h>
#include <linux/mm.h>
#include <linux/kvm_host.h>
#include <linux/uaccess.h>
@ -54,8 +56,8 @@ static inline void vcpu_cp15_reg64_set(struct kvm_vcpu *vcpu,
const struct coproc_reg *r,
u64 val)
{
vcpu->arch.cp15[r->reg] = val & 0xffffffff;
vcpu->arch.cp15[r->reg + 1] = val >> 32;
vcpu_cp15(vcpu, r->reg) = val & 0xffffffff;
vcpu_cp15(vcpu, r->reg + 1) = val >> 32;
}
static inline u64 vcpu_cp15_reg64_get(struct kvm_vcpu *vcpu,
@ -63,9 +65,9 @@ static inline u64 vcpu_cp15_reg64_get(struct kvm_vcpu *vcpu,
{
u64 val;
val = vcpu->arch.cp15[r->reg + 1];
val = vcpu_cp15(vcpu, r->reg + 1);
val = val << 32;
val = val | vcpu->arch.cp15[r->reg];
val = val | vcpu_cp15(vcpu, r->reg);
return val;
}
@ -104,7 +106,7 @@ static void reset_mpidr(struct kvm_vcpu *vcpu, const struct coproc_reg *r)
* vcpu_id, but we read the 'U' bit from the underlying
* hardware directly.
*/
vcpu->arch.cp15[c0_MPIDR] = ((read_cpuid_mpidr() & MPIDR_SMP_BITMASK) |
vcpu_cp15(vcpu, c0_MPIDR) = ((read_cpuid_mpidr() & MPIDR_SMP_BITMASK) |
((vcpu->vcpu_id >> 2) << MPIDR_LEVEL_BITS) |
(vcpu->vcpu_id & 3));
}
@ -117,7 +119,7 @@ static bool access_actlr(struct kvm_vcpu *vcpu,
if (p->is_write)
return ignore_write(vcpu, p);
*vcpu_reg(vcpu, p->Rt1) = vcpu->arch.cp15[c1_ACTLR];
*vcpu_reg(vcpu, p->Rt1) = vcpu_cp15(vcpu, c1_ACTLR);
return true;
}
@ -139,7 +141,7 @@ static bool access_l2ctlr(struct kvm_vcpu *vcpu,
if (p->is_write)
return ignore_write(vcpu, p);
*vcpu_reg(vcpu, p->Rt1) = vcpu->arch.cp15[c9_L2CTLR];
*vcpu_reg(vcpu, p->Rt1) = vcpu_cp15(vcpu, c9_L2CTLR);
return true;
}
@ -156,7 +158,7 @@ static void reset_l2ctlr(struct kvm_vcpu *vcpu, const struct coproc_reg *r)
ncores = min(ncores, 3U);
l2ctlr |= (ncores & 3) << 24;
vcpu->arch.cp15[c9_L2CTLR] = l2ctlr;
vcpu_cp15(vcpu, c9_L2CTLR) = l2ctlr;
}
static void reset_actlr(struct kvm_vcpu *vcpu, const struct coproc_reg *r)
@ -171,7 +173,7 @@ static void reset_actlr(struct kvm_vcpu *vcpu, const struct coproc_reg *r)
else
actlr &= ~(1U << 6);
vcpu->arch.cp15[c1_ACTLR] = actlr;
vcpu_cp15(vcpu, c1_ACTLR) = actlr;
}
/*
@ -218,9 +220,9 @@ bool access_vm_reg(struct kvm_vcpu *vcpu,
BUG_ON(!p->is_write);
vcpu->arch.cp15[r->reg] = *vcpu_reg(vcpu, p->Rt1);
vcpu_cp15(vcpu, r->reg) = *vcpu_reg(vcpu, p->Rt1);
if (p->is_64bit)
vcpu->arch.cp15[r->reg + 1] = *vcpu_reg(vcpu, p->Rt2);
vcpu_cp15(vcpu, r->reg + 1) = *vcpu_reg(vcpu, p->Rt2);
kvm_toggle_cache(vcpu, was_enabled);
return true;
@ -381,17 +383,26 @@ static const struct coproc_reg cp15_regs[] = {
{ CRn(15), CRm( 0), Op1( 4), Op2( 0), is32, access_cbar},
};
static int check_reg_table(const struct coproc_reg *table, unsigned int n)
{
unsigned int i;
for (i = 1; i < n; i++) {
if (cmp_reg(&table[i-1], &table[i]) >= 0) {
kvm_err("reg table %p out of order (%d)\n", table, i - 1);
return 1;
}
}
return 0;
}
/* Target specific emulation tables */
static struct kvm_coproc_target_table *target_tables[KVM_ARM_NUM_TARGETS];
void kvm_register_target_coproc_table(struct kvm_coproc_target_table *table)
{
unsigned int i;
for (i = 1; i < table->num; i++)
BUG_ON(cmp_reg(&table->table[i-1],
&table->table[i]) >= 0);
BUG_ON(check_reg_table(table->table, table->num));
target_tables[table->target] = table;
}
@ -405,29 +416,32 @@ static const struct coproc_reg *get_target_table(unsigned target, size_t *num)
return table->table;
}
#define reg_to_match_value(x) \
({ \
unsigned long val; \
val = (x)->CRn << 11; \
val |= (x)->CRm << 7; \
val |= (x)->Op1 << 4; \
val |= (x)->Op2 << 1; \
val |= !(x)->is_64bit; \
val; \
})
static int match_reg(const void *key, const void *elt)
{
const unsigned long pval = (unsigned long)key;
const struct coproc_reg *r = elt;
return pval - reg_to_match_value(r);
}
static const struct coproc_reg *find_reg(const struct coproc_params *params,
const struct coproc_reg table[],
unsigned int num)
{
unsigned int i;
unsigned long pval = reg_to_match_value(params);
for (i = 0; i < num; i++) {
const struct coproc_reg *r = &table[i];
if (params->is_64bit != r->is_64)
continue;
if (params->CRn != r->CRn)
continue;
if (params->CRm != r->CRm)
continue;
if (params->Op1 != r->Op1)
continue;
if (params->Op2 != r->Op2)
continue;
return r;
}
return NULL;
return bsearch((void *)pval, table, num, sizeof(table[0]), match_reg);
}
static int emulate_cp15(struct kvm_vcpu *vcpu,
@ -645,6 +659,9 @@ static struct coproc_reg invariant_cp15[] = {
{ CRn( 0), CRm( 0), Op1( 0), Op2( 3), is32, NULL, get_TLBTR },
{ CRn( 0), CRm( 0), Op1( 0), Op2( 6), is32, NULL, get_REVIDR },
{ CRn( 0), CRm( 0), Op1( 1), Op2( 1), is32, NULL, get_CLIDR },
{ CRn( 0), CRm( 0), Op1( 1), Op2( 7), is32, NULL, get_AIDR },
{ CRn( 0), CRm( 1), Op1( 0), Op2( 0), is32, NULL, get_ID_PFR0 },
{ CRn( 0), CRm( 1), Op1( 0), Op2( 1), is32, NULL, get_ID_PFR1 },
{ CRn( 0), CRm( 1), Op1( 0), Op2( 2), is32, NULL, get_ID_DFR0 },
@ -660,9 +677,6 @@ static struct coproc_reg invariant_cp15[] = {
{ CRn( 0), CRm( 2), Op1( 0), Op2( 3), is32, NULL, get_ID_ISAR3 },
{ CRn( 0), CRm( 2), Op1( 0), Op2( 4), is32, NULL, get_ID_ISAR4 },
{ CRn( 0), CRm( 2), Op1( 0), Op2( 5), is32, NULL, get_ID_ISAR5 },
{ CRn( 0), CRm( 0), Op1( 1), Op2( 1), is32, NULL, get_CLIDR },
{ CRn( 0), CRm( 0), Op1( 1), Op2( 7), is32, NULL, get_AIDR },
};
/*
@ -901,7 +915,7 @@ static int vfp_get_reg(const struct kvm_vcpu *vcpu, u64 id, void __user *uaddr)
if (vfpid < num_fp_regs()) {
if (KVM_REG_SIZE(id) != 8)
return -ENOENT;
return reg_to_user(uaddr, &vcpu->arch.vfp_guest.fpregs[vfpid],
return reg_to_user(uaddr, &vcpu->arch.ctxt.vfp.fpregs[vfpid],
id);
}
@ -911,13 +925,13 @@ static int vfp_get_reg(const struct kvm_vcpu *vcpu, u64 id, void __user *uaddr)
switch (vfpid) {
case KVM_REG_ARM_VFP_FPEXC:
return reg_to_user(uaddr, &vcpu->arch.vfp_guest.fpexc, id);
return reg_to_user(uaddr, &vcpu->arch.ctxt.vfp.fpexc, id);
case KVM_REG_ARM_VFP_FPSCR:
return reg_to_user(uaddr, &vcpu->arch.vfp_guest.fpscr, id);
return reg_to_user(uaddr, &vcpu->arch.ctxt.vfp.fpscr, id);
case KVM_REG_ARM_VFP_FPINST:
return reg_to_user(uaddr, &vcpu->arch.vfp_guest.fpinst, id);
return reg_to_user(uaddr, &vcpu->arch.ctxt.vfp.fpinst, id);
case KVM_REG_ARM_VFP_FPINST2:
return reg_to_user(uaddr, &vcpu->arch.vfp_guest.fpinst2, id);
return reg_to_user(uaddr, &vcpu->arch.ctxt.vfp.fpinst2, id);
case KVM_REG_ARM_VFP_MVFR0:
val = fmrx(MVFR0);
return reg_to_user(uaddr, &val, id);
@ -945,7 +959,7 @@ static int vfp_set_reg(struct kvm_vcpu *vcpu, u64 id, const void __user *uaddr)
if (vfpid < num_fp_regs()) {
if (KVM_REG_SIZE(id) != 8)
return -ENOENT;
return reg_from_user(&vcpu->arch.vfp_guest.fpregs[vfpid],
return reg_from_user(&vcpu->arch.ctxt.vfp.fpregs[vfpid],
uaddr, id);
}
@ -955,13 +969,13 @@ static int vfp_set_reg(struct kvm_vcpu *vcpu, u64 id, const void __user *uaddr)
switch (vfpid) {
case KVM_REG_ARM_VFP_FPEXC:
return reg_from_user(&vcpu->arch.vfp_guest.fpexc, uaddr, id);
return reg_from_user(&vcpu->arch.ctxt.vfp.fpexc, uaddr, id);
case KVM_REG_ARM_VFP_FPSCR:
return reg_from_user(&vcpu->arch.vfp_guest.fpscr, uaddr, id);
return reg_from_user(&vcpu->arch.ctxt.vfp.fpscr, uaddr, id);
case KVM_REG_ARM_VFP_FPINST:
return reg_from_user(&vcpu->arch.vfp_guest.fpinst, uaddr, id);
return reg_from_user(&vcpu->arch.ctxt.vfp.fpinst, uaddr, id);
case KVM_REG_ARM_VFP_FPINST2:
return reg_from_user(&vcpu->arch.vfp_guest.fpinst2, uaddr, id);
return reg_from_user(&vcpu->arch.ctxt.vfp.fpinst2, uaddr, id);
/* These are invariant. */
case KVM_REG_ARM_VFP_MVFR0:
if (reg_from_user(&val, uaddr, id))
@ -1030,7 +1044,7 @@ int kvm_arm_coproc_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
val = vcpu_cp15_reg64_get(vcpu, r);
ret = reg_to_user(uaddr, &val, reg->id);
} else if (KVM_REG_SIZE(reg->id) == 4) {
ret = reg_to_user(uaddr, &vcpu->arch.cp15[r->reg], reg->id);
ret = reg_to_user(uaddr, &vcpu_cp15(vcpu, r->reg), reg->id);
}
return ret;
@ -1060,7 +1074,7 @@ int kvm_arm_coproc_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
if (!ret)
vcpu_cp15_reg64_set(vcpu, r, val);
} else if (KVM_REG_SIZE(reg->id) == 4) {
ret = reg_from_user(&vcpu->arch.cp15[r->reg], uaddr, reg->id);
ret = reg_from_user(&vcpu_cp15(vcpu, r->reg), uaddr, reg->id);
}
return ret;
@ -1096,7 +1110,7 @@ static int write_demux_regids(u64 __user *uindices)
static u64 cp15_to_index(const struct coproc_reg *reg)
{
u64 val = KVM_REG_ARM | (15 << KVM_REG_ARM_COPROC_SHIFT);
if (reg->is_64) {
if (reg->is_64bit) {
val |= KVM_REG_SIZE_U64;
val |= (reg->Op1 << KVM_REG_ARM_OPC1_SHIFT);
/*
@ -1210,8 +1224,8 @@ void kvm_coproc_table_init(void)
unsigned int i;
/* Make sure tables are unique and in order. */
for (i = 1; i < ARRAY_SIZE(cp15_regs); i++)
BUG_ON(cmp_reg(&cp15_regs[i-1], &cp15_regs[i]) >= 0);
BUG_ON(check_reg_table(cp15_regs, ARRAY_SIZE(cp15_regs)));
BUG_ON(check_reg_table(invariant_cp15, ARRAY_SIZE(invariant_cp15)));
/* We abuse the reset function to overwrite the table itself. */
for (i = 0; i < ARRAY_SIZE(invariant_cp15); i++)
@ -1248,7 +1262,7 @@ void kvm_reset_coprocs(struct kvm_vcpu *vcpu)
const struct coproc_reg *table;
/* Catch someone adding a register without putting in reset entry. */
memset(vcpu->arch.cp15, 0x42, sizeof(vcpu->arch.cp15));
memset(vcpu->arch.ctxt.cp15, 0x42, sizeof(vcpu->arch.ctxt.cp15));
/* Generic chip reset first (so target could override). */
reset_coproc_regs(vcpu, cp15_regs, ARRAY_SIZE(cp15_regs));
@ -1257,6 +1271,6 @@ void kvm_reset_coprocs(struct kvm_vcpu *vcpu)
reset_coproc_regs(vcpu, table, num);
for (num = 1; num < NR_CP15_REGS; num++)
if (vcpu->arch.cp15[num] == 0x42424242)
panic("Didn't reset vcpu->arch.cp15[%zi]", num);
if (vcpu_cp15(vcpu, num) == 0x42424242)
panic("Didn't reset vcpu_cp15(vcpu, %zi)", num);
}

View File

@ -37,7 +37,7 @@ struct coproc_reg {
unsigned long Op1;
unsigned long Op2;
bool is_64;
bool is_64bit;
/* Trapped access from guest, if non-NULL. */
bool (*access)(struct kvm_vcpu *,
@ -47,7 +47,7 @@ struct coproc_reg {
/* Initialization for vcpu. */
void (*reset)(struct kvm_vcpu *, const struct coproc_reg *);
/* Index into vcpu->arch.cp15[], or 0 if we don't need to save it. */
/* Index into vcpu_cp15(vcpu, ...), or 0 if we don't need to save it. */
unsigned long reg;
/* Value (usually reset value) */
@ -104,25 +104,25 @@ static inline void reset_unknown(struct kvm_vcpu *vcpu,
const struct coproc_reg *r)
{
BUG_ON(!r->reg);
BUG_ON(r->reg >= ARRAY_SIZE(vcpu->arch.cp15));
vcpu->arch.cp15[r->reg] = 0xdecafbad;
BUG_ON(r->reg >= ARRAY_SIZE(vcpu->arch.ctxt.cp15));
vcpu_cp15(vcpu, r->reg) = 0xdecafbad;
}
static inline void reset_val(struct kvm_vcpu *vcpu, const struct coproc_reg *r)
{
BUG_ON(!r->reg);
BUG_ON(r->reg >= ARRAY_SIZE(vcpu->arch.cp15));
vcpu->arch.cp15[r->reg] = r->val;
BUG_ON(r->reg >= ARRAY_SIZE(vcpu->arch.ctxt.cp15));
vcpu_cp15(vcpu, r->reg) = r->val;
}
static inline void reset_unknown64(struct kvm_vcpu *vcpu,
const struct coproc_reg *r)
{
BUG_ON(!r->reg);
BUG_ON(r->reg + 1 >= ARRAY_SIZE(vcpu->arch.cp15));
BUG_ON(r->reg + 1 >= ARRAY_SIZE(vcpu->arch.ctxt.cp15));
vcpu->arch.cp15[r->reg] = 0xdecafbad;
vcpu->arch.cp15[r->reg+1] = 0xd0c0ffee;
vcpu_cp15(vcpu, r->reg) = 0xdecafbad;
vcpu_cp15(vcpu, r->reg+1) = 0xd0c0ffee;
}
static inline int cmp_reg(const struct coproc_reg *i1,
@ -141,7 +141,7 @@ static inline int cmp_reg(const struct coproc_reg *i1,
return i1->Op1 - i2->Op1;
if (i1->Op2 != i2->Op2)
return i1->Op2 - i2->Op2;
return i2->is_64 - i1->is_64;
return i2->is_64bit - i1->is_64bit;
}
@ -150,8 +150,8 @@ static inline int cmp_reg(const struct coproc_reg *i1,
#define CRm64(_x) .CRn = _x, .CRm = 0
#define Op1(_x) .Op1 = _x
#define Op2(_x) .Op2 = _x
#define is64 .is_64 = true
#define is32 .is_64 = false
#define is64 .is_64bit = true
#define is32 .is_64bit = false
bool access_vm_reg(struct kvm_vcpu *vcpu,
const struct coproc_params *p,

View File

@ -112,7 +112,7 @@ static const unsigned long vcpu_reg_offsets[VCPU_NR_MODES][15] = {
*/
unsigned long *vcpu_reg(struct kvm_vcpu *vcpu, u8 reg_num)
{
unsigned long *reg_array = (unsigned long *)&vcpu->arch.regs;
unsigned long *reg_array = (unsigned long *)&vcpu->arch.ctxt.gp_regs;
unsigned long mode = *vcpu_cpsr(vcpu) & MODE_MASK;
switch (mode) {
@ -147,15 +147,15 @@ unsigned long *vcpu_spsr(struct kvm_vcpu *vcpu)
unsigned long mode = *vcpu_cpsr(vcpu) & MODE_MASK;
switch (mode) {
case SVC_MODE:
return &vcpu->arch.regs.KVM_ARM_SVC_spsr;
return &vcpu->arch.ctxt.gp_regs.KVM_ARM_SVC_spsr;
case ABT_MODE:
return &vcpu->arch.regs.KVM_ARM_ABT_spsr;
return &vcpu->arch.ctxt.gp_regs.KVM_ARM_ABT_spsr;
case UND_MODE:
return &vcpu->arch.regs.KVM_ARM_UND_spsr;
return &vcpu->arch.ctxt.gp_regs.KVM_ARM_UND_spsr;
case IRQ_MODE:
return &vcpu->arch.regs.KVM_ARM_IRQ_spsr;
return &vcpu->arch.ctxt.gp_regs.KVM_ARM_IRQ_spsr;
case FIQ_MODE:
return &vcpu->arch.regs.KVM_ARM_FIQ_spsr;
return &vcpu->arch.ctxt.gp_regs.KVM_ARM_FIQ_spsr;
default:
BUG();
}
@ -266,8 +266,8 @@ void kvm_skip_instr(struct kvm_vcpu *vcpu, bool is_wide_instr)
static u32 exc_vector_base(struct kvm_vcpu *vcpu)
{
u32 sctlr = vcpu->arch.cp15[c1_SCTLR];
u32 vbar = vcpu->arch.cp15[c12_VBAR];
u32 sctlr = vcpu_cp15(vcpu, c1_SCTLR);
u32 vbar = vcpu_cp15(vcpu, c12_VBAR);
if (sctlr & SCTLR_V)
return 0xffff0000;
@ -282,7 +282,7 @@ static u32 exc_vector_base(struct kvm_vcpu *vcpu)
static void kvm_update_psr(struct kvm_vcpu *vcpu, unsigned long mode)
{
unsigned long cpsr = *vcpu_cpsr(vcpu);
u32 sctlr = vcpu->arch.cp15[c1_SCTLR];
u32 sctlr = vcpu_cp15(vcpu, c1_SCTLR);
*vcpu_cpsr(vcpu) = (cpsr & ~MODE_MASK) | mode;
@ -357,22 +357,22 @@ static void inject_abt(struct kvm_vcpu *vcpu, bool is_pabt, unsigned long addr)
if (is_pabt) {
/* Set IFAR and IFSR */
vcpu->arch.cp15[c6_IFAR] = addr;
is_lpae = (vcpu->arch.cp15[c2_TTBCR] >> 31);
vcpu_cp15(vcpu, c6_IFAR) = addr;
is_lpae = (vcpu_cp15(vcpu, c2_TTBCR) >> 31);
/* Always give debug fault for now - should give guest a clue */
if (is_lpae)
vcpu->arch.cp15[c5_IFSR] = 1 << 9 | 0x22;
vcpu_cp15(vcpu, c5_IFSR) = 1 << 9 | 0x22;
else
vcpu->arch.cp15[c5_IFSR] = 2;
vcpu_cp15(vcpu, c5_IFSR) = 2;
} else { /* !iabt */
/* Set DFAR and DFSR */
vcpu->arch.cp15[c6_DFAR] = addr;
is_lpae = (vcpu->arch.cp15[c2_TTBCR] >> 31);
vcpu_cp15(vcpu, c6_DFAR) = addr;
is_lpae = (vcpu_cp15(vcpu, c2_TTBCR) >> 31);
/* Always give debug fault for now - should give guest a clue */
if (is_lpae)
vcpu->arch.cp15[c5_DFSR] = 1 << 9 | 0x22;
vcpu_cp15(vcpu, c5_DFSR) = 1 << 9 | 0x22;
else
vcpu->arch.cp15[c5_DFSR] = 2;
vcpu_cp15(vcpu, c5_DFSR) = 2;
}
}

View File

@ -25,7 +25,6 @@
#include <asm/cputype.h>
#include <asm/uaccess.h>
#include <asm/kvm.h>
#include <asm/kvm_asm.h>
#include <asm/kvm_emulate.h>
#include <asm/kvm_coproc.h>
@ -55,7 +54,7 @@ static u64 core_reg_offset_from_id(u64 id)
static int get_core_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
{
u32 __user *uaddr = (u32 __user *)(long)reg->addr;
struct kvm_regs *regs = &vcpu->arch.regs;
struct kvm_regs *regs = &vcpu->arch.ctxt.gp_regs;
u64 off;
if (KVM_REG_SIZE(reg->id) != 4)
@ -72,7 +71,7 @@ static int get_core_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
static int set_core_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
{
u32 __user *uaddr = (u32 __user *)(long)reg->addr;
struct kvm_regs *regs = &vcpu->arch.regs;
struct kvm_regs *regs = &vcpu->arch.ctxt.gp_regs;
u64 off, val;
if (KVM_REG_SIZE(reg->id) != 4)

View File

@ -147,13 +147,6 @@ int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *run,
switch (exception_index) {
case ARM_EXCEPTION_IRQ:
return 1;
case ARM_EXCEPTION_UNDEFINED:
kvm_err("Undefined exception in Hyp mode at: %#08lx\n",
kvm_vcpu_get_hyp_pc(vcpu));
BUG();
panic("KVM: Hypervisor undefined exception!\n");
case ARM_EXCEPTION_DATA_ABORT:
case ARM_EXCEPTION_PREF_ABORT:
case ARM_EXCEPTION_HVC:
/*
* See ARM ARM B1.14.1: "Hyp traps on instructions

17
arch/arm/kvm/hyp/Makefile Normal file
View File

@ -0,0 +1,17 @@
#
# Makefile for Kernel-based Virtual Machine module, HYP part
#
KVM=../../../../virt/kvm
obj-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/hyp/vgic-v2-sr.o
obj-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/hyp/timer-sr.o
obj-$(CONFIG_KVM_ARM_HOST) += tlb.o
obj-$(CONFIG_KVM_ARM_HOST) += cp15-sr.o
obj-$(CONFIG_KVM_ARM_HOST) += vfp.o
obj-$(CONFIG_KVM_ARM_HOST) += banked-sr.o
obj-$(CONFIG_KVM_ARM_HOST) += entry.o
obj-$(CONFIG_KVM_ARM_HOST) += hyp-entry.o
obj-$(CONFIG_KVM_ARM_HOST) += switch.o
obj-$(CONFIG_KVM_ARM_HOST) += s2-setup.o

View File

@ -0,0 +1,77 @@
/*
* Original code:
* Copyright (C) 2012 - Virtual Open Systems and Columbia University
* Author: Christoffer Dall <c.dall@virtualopensystems.com>
*
* Mostly rewritten in C by Marc Zyngier <marc.zyngier@arm.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <asm/kvm_hyp.h>
__asm__(".arch_extension virt");
void __hyp_text __banked_save_state(struct kvm_cpu_context *ctxt)
{
ctxt->gp_regs.usr_regs.ARM_sp = read_special(SP_usr);
ctxt->gp_regs.usr_regs.ARM_pc = read_special(ELR_hyp);
ctxt->gp_regs.usr_regs.ARM_cpsr = read_special(SPSR);
ctxt->gp_regs.KVM_ARM_SVC_sp = read_special(SP_svc);
ctxt->gp_regs.KVM_ARM_SVC_lr = read_special(LR_svc);
ctxt->gp_regs.KVM_ARM_SVC_spsr = read_special(SPSR_svc);
ctxt->gp_regs.KVM_ARM_ABT_sp = read_special(SP_abt);
ctxt->gp_regs.KVM_ARM_ABT_lr = read_special(LR_abt);
ctxt->gp_regs.KVM_ARM_ABT_spsr = read_special(SPSR_abt);
ctxt->gp_regs.KVM_ARM_UND_sp = read_special(SP_und);
ctxt->gp_regs.KVM_ARM_UND_lr = read_special(LR_und);
ctxt->gp_regs.KVM_ARM_UND_spsr = read_special(SPSR_und);
ctxt->gp_regs.KVM_ARM_IRQ_sp = read_special(SP_irq);
ctxt->gp_regs.KVM_ARM_IRQ_lr = read_special(LR_irq);
ctxt->gp_regs.KVM_ARM_IRQ_spsr = read_special(SPSR_irq);
ctxt->gp_regs.KVM_ARM_FIQ_r8 = read_special(R8_fiq);
ctxt->gp_regs.KVM_ARM_FIQ_r9 = read_special(R9_fiq);
ctxt->gp_regs.KVM_ARM_FIQ_r10 = read_special(R10_fiq);
ctxt->gp_regs.KVM_ARM_FIQ_fp = read_special(R11_fiq);
ctxt->gp_regs.KVM_ARM_FIQ_ip = read_special(R12_fiq);
ctxt->gp_regs.KVM_ARM_FIQ_sp = read_special(SP_fiq);
ctxt->gp_regs.KVM_ARM_FIQ_lr = read_special(LR_fiq);
ctxt->gp_regs.KVM_ARM_FIQ_spsr = read_special(SPSR_fiq);
}
void __hyp_text __banked_restore_state(struct kvm_cpu_context *ctxt)
{
write_special(ctxt->gp_regs.usr_regs.ARM_sp, SP_usr);
write_special(ctxt->gp_regs.usr_regs.ARM_pc, ELR_hyp);
write_special(ctxt->gp_regs.usr_regs.ARM_cpsr, SPSR_cxsf);
write_special(ctxt->gp_regs.KVM_ARM_SVC_sp, SP_svc);
write_special(ctxt->gp_regs.KVM_ARM_SVC_lr, LR_svc);
write_special(ctxt->gp_regs.KVM_ARM_SVC_spsr, SPSR_svc);
write_special(ctxt->gp_regs.KVM_ARM_ABT_sp, SP_abt);
write_special(ctxt->gp_regs.KVM_ARM_ABT_lr, LR_abt);
write_special(ctxt->gp_regs.KVM_ARM_ABT_spsr, SPSR_abt);
write_special(ctxt->gp_regs.KVM_ARM_UND_sp, SP_und);
write_special(ctxt->gp_regs.KVM_ARM_UND_lr, LR_und);
write_special(ctxt->gp_regs.KVM_ARM_UND_spsr, SPSR_und);
write_special(ctxt->gp_regs.KVM_ARM_IRQ_sp, SP_irq);
write_special(ctxt->gp_regs.KVM_ARM_IRQ_lr, LR_irq);
write_special(ctxt->gp_regs.KVM_ARM_IRQ_spsr, SPSR_irq);
write_special(ctxt->gp_regs.KVM_ARM_FIQ_r8, R8_fiq);
write_special(ctxt->gp_regs.KVM_ARM_FIQ_r9, R9_fiq);
write_special(ctxt->gp_regs.KVM_ARM_FIQ_r10, R10_fiq);
write_special(ctxt->gp_regs.KVM_ARM_FIQ_fp, R11_fiq);
write_special(ctxt->gp_regs.KVM_ARM_FIQ_ip, R12_fiq);
write_special(ctxt->gp_regs.KVM_ARM_FIQ_sp, SP_fiq);
write_special(ctxt->gp_regs.KVM_ARM_FIQ_lr, LR_fiq);
write_special(ctxt->gp_regs.KVM_ARM_FIQ_spsr, SPSR_fiq);
}

View File

@ -0,0 +1,84 @@
/*
* Original code:
* Copyright (C) 2012 - Virtual Open Systems and Columbia University
* Author: Christoffer Dall <c.dall@virtualopensystems.com>
*
* Mostly rewritten in C by Marc Zyngier <marc.zyngier@arm.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <asm/kvm_hyp.h>
static u64 *cp15_64(struct kvm_cpu_context *ctxt, int idx)
{
return (u64 *)(ctxt->cp15 + idx);
}
void __hyp_text __sysreg_save_state(struct kvm_cpu_context *ctxt)
{
ctxt->cp15[c0_MPIDR] = read_sysreg(VMPIDR);
ctxt->cp15[c0_CSSELR] = read_sysreg(CSSELR);
ctxt->cp15[c1_SCTLR] = read_sysreg(SCTLR);
ctxt->cp15[c1_CPACR] = read_sysreg(CPACR);
*cp15_64(ctxt, c2_TTBR0) = read_sysreg(TTBR0);
*cp15_64(ctxt, c2_TTBR1) = read_sysreg(TTBR1);
ctxt->cp15[c2_TTBCR] = read_sysreg(TTBCR);
ctxt->cp15[c3_DACR] = read_sysreg(DACR);
ctxt->cp15[c5_DFSR] = read_sysreg(DFSR);
ctxt->cp15[c5_IFSR] = read_sysreg(IFSR);
ctxt->cp15[c5_ADFSR] = read_sysreg(ADFSR);
ctxt->cp15[c5_AIFSR] = read_sysreg(AIFSR);
ctxt->cp15[c6_DFAR] = read_sysreg(DFAR);
ctxt->cp15[c6_IFAR] = read_sysreg(IFAR);
*cp15_64(ctxt, c7_PAR) = read_sysreg(PAR);
ctxt->cp15[c10_PRRR] = read_sysreg(PRRR);
ctxt->cp15[c10_NMRR] = read_sysreg(NMRR);
ctxt->cp15[c10_AMAIR0] = read_sysreg(AMAIR0);
ctxt->cp15[c10_AMAIR1] = read_sysreg(AMAIR1);
ctxt->cp15[c12_VBAR] = read_sysreg(VBAR);
ctxt->cp15[c13_CID] = read_sysreg(CID);
ctxt->cp15[c13_TID_URW] = read_sysreg(TID_URW);
ctxt->cp15[c13_TID_URO] = read_sysreg(TID_URO);
ctxt->cp15[c13_TID_PRIV] = read_sysreg(TID_PRIV);
ctxt->cp15[c14_CNTKCTL] = read_sysreg(CNTKCTL);
}
void __hyp_text __sysreg_restore_state(struct kvm_cpu_context *ctxt)
{
write_sysreg(ctxt->cp15[c0_MPIDR], VMPIDR);
write_sysreg(ctxt->cp15[c0_CSSELR], CSSELR);
write_sysreg(ctxt->cp15[c1_SCTLR], SCTLR);
write_sysreg(ctxt->cp15[c1_CPACR], CPACR);
write_sysreg(*cp15_64(ctxt, c2_TTBR0), TTBR0);
write_sysreg(*cp15_64(ctxt, c2_TTBR1), TTBR1);
write_sysreg(ctxt->cp15[c2_TTBCR], TTBCR);
write_sysreg(ctxt->cp15[c3_DACR], DACR);
write_sysreg(ctxt->cp15[c5_DFSR], DFSR);
write_sysreg(ctxt->cp15[c5_IFSR], IFSR);
write_sysreg(ctxt->cp15[c5_ADFSR], ADFSR);
write_sysreg(ctxt->cp15[c5_AIFSR], AIFSR);
write_sysreg(ctxt->cp15[c6_DFAR], DFAR);
write_sysreg(ctxt->cp15[c6_IFAR], IFAR);
write_sysreg(*cp15_64(ctxt, c7_PAR), PAR);
write_sysreg(ctxt->cp15[c10_PRRR], PRRR);
write_sysreg(ctxt->cp15[c10_NMRR], NMRR);
write_sysreg(ctxt->cp15[c10_AMAIR0], AMAIR0);
write_sysreg(ctxt->cp15[c10_AMAIR1], AMAIR1);
write_sysreg(ctxt->cp15[c12_VBAR], VBAR);
write_sysreg(ctxt->cp15[c13_CID], CID);
write_sysreg(ctxt->cp15[c13_TID_URW], TID_URW);
write_sysreg(ctxt->cp15[c13_TID_URO], TID_URO);
write_sysreg(ctxt->cp15[c13_TID_PRIV], TID_PRIV);
write_sysreg(ctxt->cp15[c14_CNTKCTL], CNTKCTL);
}

101
arch/arm/kvm/hyp/entry.S Normal file
View File

@ -0,0 +1,101 @@
/*
* Copyright (C) 2016 - ARM Ltd
* Author: Marc Zyngier <marc.zyngier@arm.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <linux/linkage.h>
#include <asm/asm-offsets.h>
#include <asm/kvm_arm.h>
.arch_extension virt
.text
.pushsection .hyp.text, "ax"
#define USR_REGS_OFFSET (CPU_CTXT_GP_REGS + GP_REGS_USR)
/* int __guest_enter(struct kvm_vcpu *vcpu, struct kvm_cpu_context *host) */
ENTRY(__guest_enter)
@ Save host registers
add r1, r1, #(USR_REGS_OFFSET + S_R4)
stm r1!, {r4-r12}
str lr, [r1, #4] @ Skip SP_usr (already saved)
@ Restore guest registers
add r0, r0, #(VCPU_GUEST_CTXT + USR_REGS_OFFSET + S_R0)
ldr lr, [r0, #S_LR]
ldm r0, {r0-r12}
clrex
eret
ENDPROC(__guest_enter)
ENTRY(__guest_exit)
/*
* return convention:
* guest r0, r1, r2 saved on the stack
* r0: vcpu pointer
* r1: exception code
*/
add r2, r0, #(VCPU_GUEST_CTXT + USR_REGS_OFFSET + S_R3)
stm r2!, {r3-r12}
str lr, [r2, #4]
add r2, r0, #(VCPU_GUEST_CTXT + USR_REGS_OFFSET + S_R0)
pop {r3, r4, r5} @ r0, r1, r2
stm r2, {r3-r5}
ldr r0, [r0, #VCPU_HOST_CTXT]
add r0, r0, #(USR_REGS_OFFSET + S_R4)
ldm r0!, {r4-r12}
ldr lr, [r0, #4]
mov r0, r1
bx lr
ENDPROC(__guest_exit)
/*
* If VFPv3 support is not available, then we will not switch the VFP
* registers; however cp10 and cp11 accesses will still trap and fallback
* to the regular coprocessor emulation code, which currently will
* inject an undefined exception to the guest.
*/
#ifdef CONFIG_VFPv3
ENTRY(__vfp_guest_restore)
push {r3, r4, lr}
@ NEON/VFP used. Turn on VFP access.
mrc p15, 4, r1, c1, c1, 2 @ HCPTR
bic r1, r1, #(HCPTR_TCP(10) | HCPTR_TCP(11))
mcr p15, 4, r1, c1, c1, 2 @ HCPTR
isb
@ Switch VFP/NEON hardware state to the guest's
mov r4, r0
ldr r0, [r0, #VCPU_HOST_CTXT]
add r0, r0, #CPU_CTXT_VFP
bl __vfp_save_state
add r0, r4, #(VCPU_GUEST_CTXT + CPU_CTXT_VFP)
bl __vfp_restore_state
pop {r3, r4, lr}
pop {r0, r1, r2}
clrex
eret
ENDPROC(__vfp_guest_restore)
#endif
.popsection

View File

@ -0,0 +1,169 @@
/*
* Copyright (C) 2012 - Virtual Open Systems and Columbia University
* Author: Christoffer Dall <c.dall@virtualopensystems.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License, version 2, as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*/
#include <linux/linkage.h>
#include <asm/kvm_arm.h>
#include <asm/kvm_asm.h>
.arch_extension virt
.text
.pushsection .hyp.text, "ax"
.macro load_vcpu reg
mrc p15, 4, \reg, c13, c0, 2 @ HTPIDR
.endm
/********************************************************************
* Hypervisor exception vector and handlers
*
*
* The KVM/ARM Hypervisor ABI is defined as follows:
*
* Entry to Hyp mode from the host kernel will happen _only_ when an HVC
* instruction is issued since all traps are disabled when running the host
* kernel as per the Hyp-mode initialization at boot time.
*
* HVC instructions cause a trap to the vector page + offset 0x14 (see hyp_hvc
* below) when the HVC instruction is called from SVC mode (i.e. a guest or the
* host kernel) and they cause a trap to the vector page + offset 0x8 when HVC
* instructions are called from within Hyp-mode.
*
* Hyp-ABI: Calling HYP-mode functions from host (in SVC mode):
* Switching to Hyp mode is done through a simple HVC #0 instruction. The
* exception vector code will check that the HVC comes from VMID==0.
* - r0 contains a pointer to a HYP function
* - r1, r2, and r3 contain arguments to the above function.
* - The HYP function will be called with its arguments in r0, r1 and r2.
* On HYP function return, we return directly to SVC.
*
* Note that the above is used to execute code in Hyp-mode from a host-kernel
* point of view, and is a different concept from performing a world-switch and
* executing guest code SVC mode (with a VMID != 0).
*/
.align 5
__kvm_hyp_vector:
.global __kvm_hyp_vector
@ Hyp-mode exception vector
W(b) hyp_reset
W(b) hyp_undef
W(b) hyp_svc
W(b) hyp_pabt
W(b) hyp_dabt
W(b) hyp_hvc
W(b) hyp_irq
W(b) hyp_fiq
.macro invalid_vector label, cause
.align
\label: mov r0, #\cause
b __hyp_panic
.endm
invalid_vector hyp_reset ARM_EXCEPTION_RESET
invalid_vector hyp_undef ARM_EXCEPTION_UNDEFINED
invalid_vector hyp_svc ARM_EXCEPTION_SOFTWARE
invalid_vector hyp_pabt ARM_EXCEPTION_PREF_ABORT
invalid_vector hyp_dabt ARM_EXCEPTION_DATA_ABORT
invalid_vector hyp_fiq ARM_EXCEPTION_FIQ
ENTRY(__hyp_do_panic)
mrs lr, cpsr
bic lr, lr, #MODE_MASK
orr lr, lr, #SVC_MODE
THUMB( orr lr, lr, #PSR_T_BIT )
msr spsr_cxsf, lr
ldr lr, =panic
msr ELR_hyp, lr
ldr lr, =kvm_call_hyp
clrex
eret
ENDPROC(__hyp_do_panic)
hyp_hvc:
/*
* Getting here is either because of a trap from a guest,
* or from executing HVC from the host kernel, which means
* "do something in Hyp mode".
*/
push {r0, r1, r2}
@ Check syndrome register
mrc p15, 4, r1, c5, c2, 0 @ HSR
lsr r0, r1, #HSR_EC_SHIFT
cmp r0, #HSR_EC_HVC
bne guest_trap @ Not HVC instr.
/*
* Let's check if the HVC came from VMID 0 and allow simple
* switch to Hyp mode
*/
mrrc p15, 6, r0, r2, c2
lsr r2, r2, #16
and r2, r2, #0xff
cmp r2, #0
bne guest_trap @ Guest called HVC
/*
* Getting here means host called HVC, we shift parameters and branch
* to Hyp function.
*/
pop {r0, r1, r2}
/* Check for __hyp_get_vectors */
cmp r0, #-1
mrceq p15, 4, r0, c12, c0, 0 @ get HVBAR
beq 1f
push {lr}
mov lr, r0
mov r0, r1
mov r1, r2
mov r2, r3
THUMB( orr lr, #1)
blx lr @ Call the HYP function
pop {lr}
1: eret
guest_trap:
load_vcpu r0 @ Load VCPU pointer to r0
#ifdef CONFIG_VFPv3
@ Check for a VFP access
lsr r1, r1, #HSR_EC_SHIFT
cmp r1, #HSR_EC_CP_0_13
beq __vfp_guest_restore
#endif
mov r1, #ARM_EXCEPTION_HVC
b __guest_exit
hyp_irq:
push {r0, r1, r2}
mov r1, #ARM_EXCEPTION_IRQ
load_vcpu r0 @ Load VCPU pointer to r0
b __guest_exit
.ltorg
.popsection

View File

@ -0,0 +1,33 @@
/*
* Copyright (C) 2016 - ARM Ltd
* Author: Marc Zyngier <marc.zyngier@arm.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <linux/types.h>
#include <asm/kvm_arm.h>
#include <asm/kvm_asm.h>
#include <asm/kvm_hyp.h>
void __hyp_text __init_stage2_translation(void)
{
u64 val;
val = read_sysreg(VTCR) & ~VTCR_MASK;
val |= read_sysreg(HTCR) & VTCR_HTCR_SH;
val |= KVM_VTCR_SL0 | KVM_VTCR_T0SZ | KVM_VTCR_S;
write_sysreg(val, VTCR);
}

232
arch/arm/kvm/hyp/switch.c Normal file
View File

@ -0,0 +1,232 @@
/*
* Copyright (C) 2015 - ARM Ltd
* Author: Marc Zyngier <marc.zyngier@arm.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <asm/kvm_asm.h>
#include <asm/kvm_hyp.h>
__asm__(".arch_extension virt");
/*
* Activate the traps, saving the host's fpexc register before
* overwriting it. We'll restore it on VM exit.
*/
static void __hyp_text __activate_traps(struct kvm_vcpu *vcpu, u32 *fpexc_host)
{
u32 val;
/*
* We are about to set HCPTR.TCP10/11 to trap all floating point
* register accesses to HYP, however, the ARM ARM clearly states that
* traps are only taken to HYP if the operation would not otherwise
* trap to SVC. Therefore, always make sure that for 32-bit guests,
* we set FPEXC.EN to prevent traps to SVC, when setting the TCP bits.
*/
val = read_sysreg(VFP_FPEXC);
*fpexc_host = val;
if (!(val & FPEXC_EN)) {
write_sysreg(val | FPEXC_EN, VFP_FPEXC);
isb();
}
write_sysreg(vcpu->arch.hcr | vcpu->arch.irq_lines, HCR);
/* Trap on AArch32 cp15 c15 accesses (EL1 or EL0) */
write_sysreg(HSTR_T(15), HSTR);
write_sysreg(HCPTR_TTA | HCPTR_TCP(10) | HCPTR_TCP(11), HCPTR);
val = read_sysreg(HDCR);
write_sysreg(val | HDCR_TPM | HDCR_TPMCR, HDCR);
}
static void __hyp_text __deactivate_traps(struct kvm_vcpu *vcpu)
{
u32 val;
write_sysreg(0, HCR);
write_sysreg(0, HSTR);
val = read_sysreg(HDCR);
write_sysreg(val & ~(HDCR_TPM | HDCR_TPMCR), HDCR);
write_sysreg(0, HCPTR);
}
static void __hyp_text __activate_vm(struct kvm_vcpu *vcpu)
{
struct kvm *kvm = kern_hyp_va(vcpu->kvm);
write_sysreg(kvm->arch.vttbr, VTTBR);
write_sysreg(vcpu->arch.midr, VPIDR);
}
static void __hyp_text __deactivate_vm(struct kvm_vcpu *vcpu)
{
write_sysreg(0, VTTBR);
write_sysreg(read_sysreg(MIDR), VPIDR);
}
static void __hyp_text __vgic_save_state(struct kvm_vcpu *vcpu)
{
__vgic_v2_save_state(vcpu);
}
static void __hyp_text __vgic_restore_state(struct kvm_vcpu *vcpu)
{
__vgic_v2_restore_state(vcpu);
}
static bool __hyp_text __populate_fault_info(struct kvm_vcpu *vcpu)
{
u32 hsr = read_sysreg(HSR);
u8 ec = hsr >> HSR_EC_SHIFT;
u32 hpfar, far;
vcpu->arch.fault.hsr = hsr;
if (ec == HSR_EC_IABT)
far = read_sysreg(HIFAR);
else if (ec == HSR_EC_DABT)
far = read_sysreg(HDFAR);
else
return true;
/*
* B3.13.5 Reporting exceptions taken to the Non-secure PL2 mode:
*
* Abort on the stage 2 translation for a memory access from a
* Non-secure PL1 or PL0 mode:
*
* For any Access flag fault or Translation fault, and also for any
* Permission fault on the stage 2 translation of a memory access
* made as part of a translation table walk for a stage 1 translation,
* the HPFAR holds the IPA that caused the fault. Otherwise, the HPFAR
* is UNKNOWN.
*/
if (!(hsr & HSR_DABT_S1PTW) && (hsr & HSR_FSC_TYPE) == FSC_PERM) {
u64 par, tmp;
par = read_sysreg(PAR);
write_sysreg(far, ATS1CPR);
isb();
tmp = read_sysreg(PAR);
write_sysreg(par, PAR);
if (unlikely(tmp & 1))
return false; /* Translation failed, back to guest */
hpfar = ((tmp >> 12) & ((1UL << 28) - 1)) << 4;
} else {
hpfar = read_sysreg(HPFAR);
}
vcpu->arch.fault.hxfar = far;
vcpu->arch.fault.hpfar = hpfar;
return true;
}
static int __hyp_text __guest_run(struct kvm_vcpu *vcpu)
{
struct kvm_cpu_context *host_ctxt;
struct kvm_cpu_context *guest_ctxt;
bool fp_enabled;
u64 exit_code;
u32 fpexc;
vcpu = kern_hyp_va(vcpu);
write_sysreg(vcpu, HTPIDR);
host_ctxt = kern_hyp_va(vcpu->arch.host_cpu_context);
guest_ctxt = &vcpu->arch.ctxt;
__sysreg_save_state(host_ctxt);
__banked_save_state(host_ctxt);
__activate_traps(vcpu, &fpexc);
__activate_vm(vcpu);
__vgic_restore_state(vcpu);
__timer_restore_state(vcpu);
__sysreg_restore_state(guest_ctxt);
__banked_restore_state(guest_ctxt);
/* Jump in the fire! */
again:
exit_code = __guest_enter(vcpu, host_ctxt);
/* And we're baaack! */
if (exit_code == ARM_EXCEPTION_HVC && !__populate_fault_info(vcpu))
goto again;
fp_enabled = __vfp_enabled();
__banked_save_state(guest_ctxt);
__sysreg_save_state(guest_ctxt);
__timer_save_state(vcpu);
__vgic_save_state(vcpu);
__deactivate_traps(vcpu);
__deactivate_vm(vcpu);
__banked_restore_state(host_ctxt);
__sysreg_restore_state(host_ctxt);
if (fp_enabled) {
__vfp_save_state(&guest_ctxt->vfp);
__vfp_restore_state(&host_ctxt->vfp);
}
write_sysreg(fpexc, VFP_FPEXC);
return exit_code;
}
__alias(__guest_run) int __kvm_vcpu_run(struct kvm_vcpu *vcpu);
static const char * const __hyp_panic_string[] = {
[ARM_EXCEPTION_RESET] = "\nHYP panic: RST PC:%08x CPSR:%08x",
[ARM_EXCEPTION_UNDEFINED] = "\nHYP panic: UNDEF PC:%08x CPSR:%08x",
[ARM_EXCEPTION_SOFTWARE] = "\nHYP panic: SVC PC:%08x CPSR:%08x",
[ARM_EXCEPTION_PREF_ABORT] = "\nHYP panic: PABRT PC:%08x CPSR:%08x",
[ARM_EXCEPTION_DATA_ABORT] = "\nHYP panic: DABRT PC:%08x ADDR:%08x",
[ARM_EXCEPTION_IRQ] = "\nHYP panic: IRQ PC:%08x CPSR:%08x",
[ARM_EXCEPTION_FIQ] = "\nHYP panic: FIQ PC:%08x CPSR:%08x",
[ARM_EXCEPTION_HVC] = "\nHYP panic: HVC PC:%08x CPSR:%08x",
};
void __hyp_text __noreturn __hyp_panic(int cause)
{
u32 elr = read_special(ELR_hyp);
u32 val;
if (cause == ARM_EXCEPTION_DATA_ABORT)
val = read_sysreg(HDFAR);
else
val = read_special(SPSR);
if (read_sysreg(VTTBR)) {
struct kvm_vcpu *vcpu;
struct kvm_cpu_context *host_ctxt;
vcpu = (struct kvm_vcpu *)read_sysreg(HTPIDR);
host_ctxt = kern_hyp_va(vcpu->arch.host_cpu_context);
__deactivate_traps(vcpu);
__deactivate_vm(vcpu);
__sysreg_restore_state(host_ctxt);
}
/* Call panic for real */
__hyp_do_panic(__hyp_panic_string[cause], elr, val);
unreachable();
}

70
arch/arm/kvm/hyp/tlb.c Normal file
View File

@ -0,0 +1,70 @@
/*
* Original code:
* Copyright (C) 2012 - Virtual Open Systems and Columbia University
* Author: Christoffer Dall <c.dall@virtualopensystems.com>
*
* Mostly rewritten in C by Marc Zyngier <marc.zyngier@arm.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <asm/kvm_hyp.h>
/**
* Flush per-VMID TLBs
*
* __kvm_tlb_flush_vmid(struct kvm *kvm);
*
* We rely on the hardware to broadcast the TLB invalidation to all CPUs
* inside the inner-shareable domain (which is the case for all v7
* implementations). If we come across a non-IS SMP implementation, we'll
* have to use an IPI based mechanism. Until then, we stick to the simple
* hardware assisted version.
*
* As v7 does not support flushing per IPA, just nuke the whole TLB
* instead, ignoring the ipa value.
*/
static void __hyp_text __tlb_flush_vmid(struct kvm *kvm)
{
dsb(ishst);
/* Switch to requested VMID */
kvm = kern_hyp_va(kvm);
write_sysreg(kvm->arch.vttbr, VTTBR);
isb();
write_sysreg(0, TLBIALLIS);
dsb(ish);
isb();
write_sysreg(0, VTTBR);
}
__alias(__tlb_flush_vmid) void __kvm_tlb_flush_vmid(struct kvm *kvm);
static void __hyp_text __tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa)
{
__tlb_flush_vmid(kvm);
}
__alias(__tlb_flush_vmid_ipa) void __kvm_tlb_flush_vmid_ipa(struct kvm *kvm,
phys_addr_t ipa);
static void __hyp_text __tlb_flush_vm_context(void)
{
write_sysreg(0, TLBIALLNSNHIS);
write_sysreg(0, ICIALLUIS);
dsb(ish);
}
__alias(__tlb_flush_vm_context) void __kvm_flush_vm_context(void);

68
arch/arm/kvm/hyp/vfp.S Normal file
View File

@ -0,0 +1,68 @@
/*
* Copyright (C) 2012 - Virtual Open Systems and Columbia University
* Author: Christoffer Dall <c.dall@virtualopensystems.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <linux/linkage.h>
#include <asm/vfpmacros.h>
.text
.pushsection .hyp.text, "ax"
/* void __vfp_save_state(struct vfp_hard_struct *vfp); */
ENTRY(__vfp_save_state)
push {r4, r5}
VFPFMRX r1, FPEXC
@ Make sure *really* VFP is enabled so we can touch the registers.
orr r5, r1, #FPEXC_EN
tst r5, #FPEXC_EX @ Check for VFP Subarchitecture
bic r5, r5, #FPEXC_EX @ FPEXC_EX disable
VFPFMXR FPEXC, r5
isb
VFPFMRX r2, FPSCR
beq 1f
@ If FPEXC_EX is 0, then FPINST/FPINST2 reads are upredictable, so
@ we only need to save them if FPEXC_EX is set.
VFPFMRX r3, FPINST
tst r5, #FPEXC_FP2V
VFPFMRX r4, FPINST2, ne @ vmrsne
1:
VFPFSTMIA r0, r5 @ Save VFP registers
stm r0, {r1-r4} @ Save FPEXC, FPSCR, FPINST, FPINST2
pop {r4, r5}
bx lr
ENDPROC(__vfp_save_state)
/* void __vfp_restore_state(struct vfp_hard_struct *vfp);
* Assume FPEXC_EN is on and FPEXC_EX is off */
ENTRY(__vfp_restore_state)
VFPFLDMIA r0, r1 @ Load VFP registers
ldm r0, {r0-r3} @ Load FPEXC, FPSCR, FPINST, FPINST2
VFPFMXR FPSCR, r1
tst r0, #FPEXC_EX @ Check for VFP Subarchitecture
beq 1f
VFPFMXR FPINST, r2
tst r0, #FPEXC_FP2V
VFPFMXR FPINST2, r3, ne
1:
VFPFMXR FPEXC, r0 @ FPEXC (last, in case !EN)
bx lr
ENDPROC(__vfp_restore_state)
.popsection

View File

@ -84,14 +84,6 @@ __do_hyp_init:
orr r0, r0, r1
mcr p15, 4, r0, c2, c0, 2 @ HTCR
mrc p15, 4, r1, c2, c1, 2 @ VTCR
ldr r2, =VTCR_MASK
bic r1, r1, r2
bic r0, r0, #(~VTCR_HTCR_SH) @ clear non-reusable HTCR bits
orr r1, r0, r1
orr r1, r1, #(KVM_VTCR_SL0 | KVM_VTCR_T0SZ | KVM_VTCR_S)
mcr p15, 4, r1, c2, c1, 2 @ VTCR
@ Use the same memory attributes for hyp. accesses as the kernel
@ (copy MAIRx ro HMAIRx).
mrc p15, 0, r0, c10, c2, 0

View File

@ -17,211 +17,14 @@
*/
#include <linux/linkage.h>
#include <linux/const.h>
#include <asm/unified.h>
#include <asm/page.h>
#include <asm/ptrace.h>
#include <asm/asm-offsets.h>
#include <asm/kvm_asm.h>
#include <asm/kvm_arm.h>
#include <asm/vfpmacros.h>
#include "interrupts_head.S"
.text
__kvm_hyp_code_start:
.globl __kvm_hyp_code_start
/********************************************************************
* Flush per-VMID TLBs
*
* void __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa);
*
* We rely on the hardware to broadcast the TLB invalidation to all CPUs
* inside the inner-shareable domain (which is the case for all v7
* implementations). If we come across a non-IS SMP implementation, we'll
* have to use an IPI based mechanism. Until then, we stick to the simple
* hardware assisted version.
*
* As v7 does not support flushing per IPA, just nuke the whole TLB
* instead, ignoring the ipa value.
*/
ENTRY(__kvm_tlb_flush_vmid_ipa)
push {r2, r3}
dsb ishst
add r0, r0, #KVM_VTTBR
ldrd r2, r3, [r0]
mcrr p15, 6, rr_lo_hi(r2, r3), c2 @ Write VTTBR
isb
mcr p15, 0, r0, c8, c3, 0 @ TLBIALLIS (rt ignored)
dsb ish
isb
mov r2, #0
mov r3, #0
mcrr p15, 6, r2, r3, c2 @ Back to VMID #0
isb @ Not necessary if followed by eret
pop {r2, r3}
bx lr
ENDPROC(__kvm_tlb_flush_vmid_ipa)
/**
* void __kvm_tlb_flush_vmid(struct kvm *kvm) - Flush per-VMID TLBs
*
* Reuses __kvm_tlb_flush_vmid_ipa() for ARMv7, without passing address
* parameter
*/
ENTRY(__kvm_tlb_flush_vmid)
b __kvm_tlb_flush_vmid_ipa
ENDPROC(__kvm_tlb_flush_vmid)
/********************************************************************
* Flush TLBs and instruction caches of all CPUs inside the inner-shareable
* domain, for all VMIDs
*
* void __kvm_flush_vm_context(void);
*/
ENTRY(__kvm_flush_vm_context)
mov r0, #0 @ rn parameter for c15 flushes is SBZ
/* Invalidate NS Non-Hyp TLB Inner Shareable (TLBIALLNSNHIS) */
mcr p15, 4, r0, c8, c3, 4
/* Invalidate instruction caches Inner Shareable (ICIALLUIS) */
mcr p15, 0, r0, c7, c1, 0
dsb ish
isb @ Not necessary if followed by eret
bx lr
ENDPROC(__kvm_flush_vm_context)
/********************************************************************
* Hypervisor world-switch code
*
*
* int __kvm_vcpu_run(struct kvm_vcpu *vcpu)
*/
ENTRY(__kvm_vcpu_run)
@ Save the vcpu pointer
mcr p15, 4, vcpu, c13, c0, 2 @ HTPIDR
save_host_regs
restore_vgic_state
restore_timer_state
@ Store hardware CP15 state and load guest state
read_cp15_state store_to_vcpu = 0
write_cp15_state read_from_vcpu = 1
@ If the host kernel has not been configured with VFPv3 support,
@ then it is safer if we deny guests from using it as well.
#ifdef CONFIG_VFPv3
@ Set FPEXC_EN so the guest doesn't trap floating point instructions
VFPFMRX r2, FPEXC @ VMRS
push {r2}
orr r2, r2, #FPEXC_EN
VFPFMXR FPEXC, r2 @ VMSR
#endif
@ Configure Hyp-role
configure_hyp_role vmentry
@ Trap coprocessor CRx accesses
set_hstr vmentry
set_hcptr vmentry, (HCPTR_TTA | HCPTR_TCP(10) | HCPTR_TCP(11))
set_hdcr vmentry
@ Write configured ID register into MIDR alias
ldr r1, [vcpu, #VCPU_MIDR]
mcr p15, 4, r1, c0, c0, 0
@ Write guest view of MPIDR into VMPIDR
ldr r1, [vcpu, #CP15_OFFSET(c0_MPIDR)]
mcr p15, 4, r1, c0, c0, 5
@ Set up guest memory translation
ldr r1, [vcpu, #VCPU_KVM]
add r1, r1, #KVM_VTTBR
ldrd r2, r3, [r1]
mcrr p15, 6, rr_lo_hi(r2, r3), c2 @ Write VTTBR
@ We're all done, just restore the GPRs and go to the guest
restore_guest_regs
clrex @ Clear exclusive monitor
eret
__kvm_vcpu_return:
/*
* return convention:
* guest r0, r1, r2 saved on the stack
* r0: vcpu pointer
* r1: exception code
*/
save_guest_regs
@ Set VMID == 0
mov r2, #0
mov r3, #0
mcrr p15, 6, r2, r3, c2 @ Write VTTBR
@ Don't trap coprocessor accesses for host kernel
set_hstr vmexit
set_hdcr vmexit
set_hcptr vmexit, (HCPTR_TTA | HCPTR_TCP(10) | HCPTR_TCP(11)), after_vfp_restore
#ifdef CONFIG_VFPv3
@ Switch VFP/NEON hardware state to the host's
add r7, vcpu, #VCPU_VFP_GUEST
store_vfp_state r7
add r7, vcpu, #VCPU_VFP_HOST
ldr r7, [r7]
restore_vfp_state r7
after_vfp_restore:
@ Restore FPEXC_EN which we clobbered on entry
pop {r2}
VFPFMXR FPEXC, r2
#else
after_vfp_restore:
#endif
@ Reset Hyp-role
configure_hyp_role vmexit
@ Let host read hardware MIDR
mrc p15, 0, r2, c0, c0, 0
mcr p15, 4, r2, c0, c0, 0
@ Back to hardware MPIDR
mrc p15, 0, r2, c0, c0, 5
mcr p15, 4, r2, c0, c0, 5
@ Store guest CP15 state and restore host state
read_cp15_state store_to_vcpu = 1
write_cp15_state read_from_vcpu = 0
save_timer_state
save_vgic_state
restore_host_regs
clrex @ Clear exclusive monitor
#ifndef CONFIG_CPU_ENDIAN_BE8
mov r0, r1 @ Return the return code
mov r1, #0 @ Clear upper bits in return value
#else
@ r1 already has return code
mov r0, #0 @ Clear upper bits in return value
#endif /* CONFIG_CPU_ENDIAN_BE8 */
bx lr @ return to IOCTL
/********************************************************************
* Call function in Hyp mode
*
*
* u64 kvm_call_hyp(void *hypfn, ...);
* unsigned long kvm_call_hyp(void *hypfn, ...);
*
* This is not really a variadic function in the classic C-way and care must
* be taken when calling this to ensure parameters are passed in registers
@ -232,7 +35,7 @@ after_vfp_restore:
* passed as r0, r1, and r2 (a maximum of 3 arguments in addition to the
* function pointer can be passed). The function being called must be mapped
* in Hyp mode (see init_hyp_mode in arch/arm/kvm/arm.c). Return values are
* passed in r0 and r1.
* passed in r0 (strictly 32bit).
*
* A function pointer with a value of 0xffffffff has a special meaning,
* and is used to implement __hyp_get_vectors in the same way as in
@ -246,281 +49,4 @@ after_vfp_restore:
ENTRY(kvm_call_hyp)
hvc #0
bx lr
/********************************************************************
* Hypervisor exception vector and handlers
*
*
* The KVM/ARM Hypervisor ABI is defined as follows:
*
* Entry to Hyp mode from the host kernel will happen _only_ when an HVC
* instruction is issued since all traps are disabled when running the host
* kernel as per the Hyp-mode initialization at boot time.
*
* HVC instructions cause a trap to the vector page + offset 0x14 (see hyp_hvc
* below) when the HVC instruction is called from SVC mode (i.e. a guest or the
* host kernel) and they cause a trap to the vector page + offset 0x8 when HVC
* instructions are called from within Hyp-mode.
*
* Hyp-ABI: Calling HYP-mode functions from host (in SVC mode):
* Switching to Hyp mode is done through a simple HVC #0 instruction. The
* exception vector code will check that the HVC comes from VMID==0 and if
* so will push the necessary state (SPSR, lr_usr) on the Hyp stack.
* - r0 contains a pointer to a HYP function
* - r1, r2, and r3 contain arguments to the above function.
* - The HYP function will be called with its arguments in r0, r1 and r2.
* On HYP function return, we return directly to SVC.
*
* Note that the above is used to execute code in Hyp-mode from a host-kernel
* point of view, and is a different concept from performing a world-switch and
* executing guest code SVC mode (with a VMID != 0).
*/
/* Handle undef, svc, pabt, or dabt by crashing with a user notice */
.macro bad_exception exception_code, panic_str
push {r0-r2}
mrrc p15, 6, r0, r1, c2 @ Read VTTBR
lsr r1, r1, #16
ands r1, r1, #0xff
beq 99f
load_vcpu @ Load VCPU pointer
.if \exception_code == ARM_EXCEPTION_DATA_ABORT
mrc p15, 4, r2, c5, c2, 0 @ HSR
mrc p15, 4, r1, c6, c0, 0 @ HDFAR
str r2, [vcpu, #VCPU_HSR]
str r1, [vcpu, #VCPU_HxFAR]
.endif
.if \exception_code == ARM_EXCEPTION_PREF_ABORT
mrc p15, 4, r2, c5, c2, 0 @ HSR
mrc p15, 4, r1, c6, c0, 2 @ HIFAR
str r2, [vcpu, #VCPU_HSR]
str r1, [vcpu, #VCPU_HxFAR]
.endif
mov r1, #\exception_code
b __kvm_vcpu_return
@ We were in the host already. Let's craft a panic-ing return to SVC.
99: mrs r2, cpsr
bic r2, r2, #MODE_MASK
orr r2, r2, #SVC_MODE
THUMB( orr r2, r2, #PSR_T_BIT )
msr spsr_cxsf, r2
mrs r1, ELR_hyp
ldr r2, =panic
msr ELR_hyp, r2
ldr r0, =\panic_str
clrex @ Clear exclusive monitor
eret
.endm
.text
.align 5
__kvm_hyp_vector:
.globl __kvm_hyp_vector
@ Hyp-mode exception vector
W(b) hyp_reset
W(b) hyp_undef
W(b) hyp_svc
W(b) hyp_pabt
W(b) hyp_dabt
W(b) hyp_hvc
W(b) hyp_irq
W(b) hyp_fiq
.align
hyp_reset:
b hyp_reset
.align
hyp_undef:
bad_exception ARM_EXCEPTION_UNDEFINED, und_die_str
.align
hyp_svc:
bad_exception ARM_EXCEPTION_HVC, svc_die_str
.align
hyp_pabt:
bad_exception ARM_EXCEPTION_PREF_ABORT, pabt_die_str
.align
hyp_dabt:
bad_exception ARM_EXCEPTION_DATA_ABORT, dabt_die_str
.align
hyp_hvc:
/*
* Getting here is either becuase of a trap from a guest or from calling
* HVC from the host kernel, which means "switch to Hyp mode".
*/
push {r0, r1, r2}
@ Check syndrome register
mrc p15, 4, r1, c5, c2, 0 @ HSR
lsr r0, r1, #HSR_EC_SHIFT
cmp r0, #HSR_EC_HVC
bne guest_trap @ Not HVC instr.
/*
* Let's check if the HVC came from VMID 0 and allow simple
* switch to Hyp mode
*/
mrrc p15, 6, r0, r2, c2
lsr r2, r2, #16
and r2, r2, #0xff
cmp r2, #0
bne guest_trap @ Guest called HVC
/*
* Getting here means host called HVC, we shift parameters and branch
* to Hyp function.
*/
pop {r0, r1, r2}
/* Check for __hyp_get_vectors */
cmp r0, #-1
mrceq p15, 4, r0, c12, c0, 0 @ get HVBAR
beq 1f
push {lr}
mrs lr, SPSR
push {lr}
mov lr, r0
mov r0, r1
mov r1, r2
mov r2, r3
THUMB( orr lr, #1)
blx lr @ Call the HYP function
pop {lr}
msr SPSR_csxf, lr
pop {lr}
1: eret
guest_trap:
load_vcpu @ Load VCPU pointer to r0
str r1, [vcpu, #VCPU_HSR]
@ Check if we need the fault information
lsr r1, r1, #HSR_EC_SHIFT
#ifdef CONFIG_VFPv3
cmp r1, #HSR_EC_CP_0_13
beq switch_to_guest_vfp
#endif
cmp r1, #HSR_EC_IABT
mrceq p15, 4, r2, c6, c0, 2 @ HIFAR
beq 2f
cmp r1, #HSR_EC_DABT
bne 1f
mrc p15, 4, r2, c6, c0, 0 @ HDFAR
2: str r2, [vcpu, #VCPU_HxFAR]
/*
* B3.13.5 Reporting exceptions taken to the Non-secure PL2 mode:
*
* Abort on the stage 2 translation for a memory access from a
* Non-secure PL1 or PL0 mode:
*
* For any Access flag fault or Translation fault, and also for any
* Permission fault on the stage 2 translation of a memory access
* made as part of a translation table walk for a stage 1 translation,
* the HPFAR holds the IPA that caused the fault. Otherwise, the HPFAR
* is UNKNOWN.
*/
/* Check for permission fault, and S1PTW */
mrc p15, 4, r1, c5, c2, 0 @ HSR
and r0, r1, #HSR_FSC_TYPE
cmp r0, #FSC_PERM
tsteq r1, #(1 << 7) @ S1PTW
mrcne p15, 4, r2, c6, c0, 4 @ HPFAR
bne 3f
/* Preserve PAR */
mrrc p15, 0, r0, r1, c7 @ PAR
push {r0, r1}
/* Resolve IPA using the xFAR */
mcr p15, 0, r2, c7, c8, 0 @ ATS1CPR
isb
mrrc p15, 0, r0, r1, c7 @ PAR
tst r0, #1
bne 4f @ Failed translation
ubfx r2, r0, #12, #20
lsl r2, r2, #4
orr r2, r2, r1, lsl #24
/* Restore PAR */
pop {r0, r1}
mcrr p15, 0, r0, r1, c7 @ PAR
3: load_vcpu @ Load VCPU pointer to r0
str r2, [r0, #VCPU_HPFAR]
1: mov r1, #ARM_EXCEPTION_HVC
b __kvm_vcpu_return
4: pop {r0, r1} @ Failed translation, return to guest
mcrr p15, 0, r0, r1, c7 @ PAR
clrex
pop {r0, r1, r2}
eret
/*
* If VFPv3 support is not available, then we will not switch the VFP
* registers; however cp10 and cp11 accesses will still trap and fallback
* to the regular coprocessor emulation code, which currently will
* inject an undefined exception to the guest.
*/
#ifdef CONFIG_VFPv3
switch_to_guest_vfp:
push {r3-r7}
@ NEON/VFP used. Turn on VFP access.
set_hcptr vmtrap, (HCPTR_TCP(10) | HCPTR_TCP(11))
@ Switch VFP/NEON hardware state to the guest's
add r7, r0, #VCPU_VFP_HOST
ldr r7, [r7]
store_vfp_state r7
add r7, r0, #VCPU_VFP_GUEST
restore_vfp_state r7
pop {r3-r7}
pop {r0-r2}
clrex
eret
#endif
.align
hyp_irq:
push {r0, r1, r2}
mov r1, #ARM_EXCEPTION_IRQ
load_vcpu @ Load VCPU pointer to r0
b __kvm_vcpu_return
.align
hyp_fiq:
b hyp_fiq
.ltorg
__kvm_hyp_code_end:
.globl __kvm_hyp_code_end
.section ".rodata"
und_die_str:
.ascii "unexpected undefined exception in Hyp mode at: %#08x\n"
pabt_die_str:
.ascii "unexpected prefetch abort in Hyp mode at: %#08x\n"
dabt_die_str:
.ascii "unexpected data abort in Hyp mode at: %#08x\n"
svc_die_str:
.ascii "unexpected HVC/SVC trap in Hyp mode at: %#08x\n"
ENDPROC(kvm_call_hyp)

View File

@ -1,648 +0,0 @@
#include <linux/irqchip/arm-gic.h>
#include <asm/assembler.h>
#define VCPU_USR_REG(_reg_nr) (VCPU_USR_REGS + (_reg_nr * 4))
#define VCPU_USR_SP (VCPU_USR_REG(13))
#define VCPU_USR_LR (VCPU_USR_REG(14))
#define CP15_OFFSET(_cp15_reg_idx) (VCPU_CP15 + (_cp15_reg_idx * 4))
/*
* Many of these macros need to access the VCPU structure, which is always
* held in r0. These macros should never clobber r1, as it is used to hold the
* exception code on the return path (except of course the macro that switches
* all the registers before the final jump to the VM).
*/
vcpu .req r0 @ vcpu pointer always in r0
/* Clobbers {r2-r6} */
.macro store_vfp_state vfp_base
@ The VFPFMRX and VFPFMXR macros are the VMRS and VMSR instructions
VFPFMRX r2, FPEXC
@ Make sure VFP is enabled so we can touch the registers.
orr r6, r2, #FPEXC_EN
VFPFMXR FPEXC, r6
VFPFMRX r3, FPSCR
tst r2, #FPEXC_EX @ Check for VFP Subarchitecture
beq 1f
@ If FPEXC_EX is 0, then FPINST/FPINST2 reads are upredictable, so
@ we only need to save them if FPEXC_EX is set.
VFPFMRX r4, FPINST
tst r2, #FPEXC_FP2V
VFPFMRX r5, FPINST2, ne @ vmrsne
bic r6, r2, #FPEXC_EX @ FPEXC_EX disable
VFPFMXR FPEXC, r6
1:
VFPFSTMIA \vfp_base, r6 @ Save VFP registers
stm \vfp_base, {r2-r5} @ Save FPEXC, FPSCR, FPINST, FPINST2
.endm
/* Assume FPEXC_EN is on and FPEXC_EX is off, clobbers {r2-r6} */
.macro restore_vfp_state vfp_base
VFPFLDMIA \vfp_base, r6 @ Load VFP registers
ldm \vfp_base, {r2-r5} @ Load FPEXC, FPSCR, FPINST, FPINST2
VFPFMXR FPSCR, r3
tst r2, #FPEXC_EX @ Check for VFP Subarchitecture
beq 1f
VFPFMXR FPINST, r4
tst r2, #FPEXC_FP2V
VFPFMXR FPINST2, r5, ne
1:
VFPFMXR FPEXC, r2 @ FPEXC (last, in case !EN)
.endm
/* These are simply for the macros to work - value don't have meaning */
.equ usr, 0
.equ svc, 1
.equ abt, 2
.equ und, 3
.equ irq, 4
.equ fiq, 5
.macro push_host_regs_mode mode
mrs r2, SP_\mode
mrs r3, LR_\mode
mrs r4, SPSR_\mode
push {r2, r3, r4}
.endm
/*
* Store all host persistent registers on the stack.
* Clobbers all registers, in all modes, except r0 and r1.
*/
.macro save_host_regs
/* Hyp regs. Only ELR_hyp (SPSR_hyp already saved) */
mrs r2, ELR_hyp
push {r2}
/* usr regs */
push {r4-r12} @ r0-r3 are always clobbered
mrs r2, SP_usr
mov r3, lr
push {r2, r3}
push_host_regs_mode svc
push_host_regs_mode abt
push_host_regs_mode und
push_host_regs_mode irq
/* fiq regs */
mrs r2, r8_fiq
mrs r3, r9_fiq
mrs r4, r10_fiq
mrs r5, r11_fiq
mrs r6, r12_fiq
mrs r7, SP_fiq
mrs r8, LR_fiq
mrs r9, SPSR_fiq
push {r2-r9}
.endm
.macro pop_host_regs_mode mode
pop {r2, r3, r4}
msr SP_\mode, r2
msr LR_\mode, r3
msr SPSR_\mode, r4
.endm
/*
* Restore all host registers from the stack.
* Clobbers all registers, in all modes, except r0 and r1.
*/
.macro restore_host_regs
pop {r2-r9}
msr r8_fiq, r2
msr r9_fiq, r3
msr r10_fiq, r4
msr r11_fiq, r5
msr r12_fiq, r6
msr SP_fiq, r7
msr LR_fiq, r8
msr SPSR_fiq, r9
pop_host_regs_mode irq
pop_host_regs_mode und
pop_host_regs_mode abt
pop_host_regs_mode svc
pop {r2, r3}
msr SP_usr, r2
mov lr, r3
pop {r4-r12}
pop {r2}
msr ELR_hyp, r2
.endm
/*
* Restore SP, LR and SPSR for a given mode. offset is the offset of
* this mode's registers from the VCPU base.
*
* Assumes vcpu pointer in vcpu reg
*
* Clobbers r1, r2, r3, r4.
*/
.macro restore_guest_regs_mode mode, offset
add r1, vcpu, \offset
ldm r1, {r2, r3, r4}
msr SP_\mode, r2
msr LR_\mode, r3
msr SPSR_\mode, r4
.endm
/*
* Restore all guest registers from the vcpu struct.
*
* Assumes vcpu pointer in vcpu reg
*
* Clobbers *all* registers.
*/
.macro restore_guest_regs
restore_guest_regs_mode svc, #VCPU_SVC_REGS
restore_guest_regs_mode abt, #VCPU_ABT_REGS
restore_guest_regs_mode und, #VCPU_UND_REGS
restore_guest_regs_mode irq, #VCPU_IRQ_REGS
add r1, vcpu, #VCPU_FIQ_REGS
ldm r1, {r2-r9}
msr r8_fiq, r2
msr r9_fiq, r3
msr r10_fiq, r4
msr r11_fiq, r5
msr r12_fiq, r6
msr SP_fiq, r7
msr LR_fiq, r8
msr SPSR_fiq, r9
@ Load return state
ldr r2, [vcpu, #VCPU_PC]
ldr r3, [vcpu, #VCPU_CPSR]
msr ELR_hyp, r2
msr SPSR_cxsf, r3
@ Load user registers
ldr r2, [vcpu, #VCPU_USR_SP]
ldr r3, [vcpu, #VCPU_USR_LR]
msr SP_usr, r2
mov lr, r3
add vcpu, vcpu, #(VCPU_USR_REGS)
ldm vcpu, {r0-r12}
.endm
/*
* Save SP, LR and SPSR for a given mode. offset is the offset of
* this mode's registers from the VCPU base.
*
* Assumes vcpu pointer in vcpu reg
*
* Clobbers r2, r3, r4, r5.
*/
.macro save_guest_regs_mode mode, offset
add r2, vcpu, \offset
mrs r3, SP_\mode
mrs r4, LR_\mode
mrs r5, SPSR_\mode
stm r2, {r3, r4, r5}
.endm
/*
* Save all guest registers to the vcpu struct
* Expects guest's r0, r1, r2 on the stack.
*
* Assumes vcpu pointer in vcpu reg
*
* Clobbers r2, r3, r4, r5.
*/
.macro save_guest_regs
@ Store usr registers
add r2, vcpu, #VCPU_USR_REG(3)
stm r2, {r3-r12}
add r2, vcpu, #VCPU_USR_REG(0)
pop {r3, r4, r5} @ r0, r1, r2
stm r2, {r3, r4, r5}
mrs r2, SP_usr
mov r3, lr
str r2, [vcpu, #VCPU_USR_SP]
str r3, [vcpu, #VCPU_USR_LR]
@ Store return state
mrs r2, ELR_hyp
mrs r3, spsr
str r2, [vcpu, #VCPU_PC]
str r3, [vcpu, #VCPU_CPSR]
@ Store other guest registers
save_guest_regs_mode svc, #VCPU_SVC_REGS
save_guest_regs_mode abt, #VCPU_ABT_REGS
save_guest_regs_mode und, #VCPU_UND_REGS
save_guest_regs_mode irq, #VCPU_IRQ_REGS
.endm
/* Reads cp15 registers from hardware and stores them in memory
* @store_to_vcpu: If 0, registers are written in-order to the stack,
* otherwise to the VCPU struct pointed to by vcpup
*
* Assumes vcpu pointer in vcpu reg
*
* Clobbers r2 - r12
*/
.macro read_cp15_state store_to_vcpu
mrc p15, 0, r2, c1, c0, 0 @ SCTLR
mrc p15, 0, r3, c1, c0, 2 @ CPACR
mrc p15, 0, r4, c2, c0, 2 @ TTBCR
mrc p15, 0, r5, c3, c0, 0 @ DACR
mrrc p15, 0, r6, r7, c2 @ TTBR 0
mrrc p15, 1, r8, r9, c2 @ TTBR 1
mrc p15, 0, r10, c10, c2, 0 @ PRRR
mrc p15, 0, r11, c10, c2, 1 @ NMRR
mrc p15, 2, r12, c0, c0, 0 @ CSSELR
.if \store_to_vcpu == 0
push {r2-r12} @ Push CP15 registers
.else
str r2, [vcpu, #CP15_OFFSET(c1_SCTLR)]
str r3, [vcpu, #CP15_OFFSET(c1_CPACR)]
str r4, [vcpu, #CP15_OFFSET(c2_TTBCR)]
str r5, [vcpu, #CP15_OFFSET(c3_DACR)]
add r2, vcpu, #CP15_OFFSET(c2_TTBR0)
strd r6, r7, [r2]
add r2, vcpu, #CP15_OFFSET(c2_TTBR1)
strd r8, r9, [r2]
str r10, [vcpu, #CP15_OFFSET(c10_PRRR)]
str r11, [vcpu, #CP15_OFFSET(c10_NMRR)]
str r12, [vcpu, #CP15_OFFSET(c0_CSSELR)]
.endif
mrc p15, 0, r2, c13, c0, 1 @ CID
mrc p15, 0, r3, c13, c0, 2 @ TID_URW
mrc p15, 0, r4, c13, c0, 3 @ TID_URO
mrc p15, 0, r5, c13, c0, 4 @ TID_PRIV
mrc p15, 0, r6, c5, c0, 0 @ DFSR
mrc p15, 0, r7, c5, c0, 1 @ IFSR
mrc p15, 0, r8, c5, c1, 0 @ ADFSR
mrc p15, 0, r9, c5, c1, 1 @ AIFSR
mrc p15, 0, r10, c6, c0, 0 @ DFAR
mrc p15, 0, r11, c6, c0, 2 @ IFAR
mrc p15, 0, r12, c12, c0, 0 @ VBAR
.if \store_to_vcpu == 0
push {r2-r12} @ Push CP15 registers
.else
str r2, [vcpu, #CP15_OFFSET(c13_CID)]
str r3, [vcpu, #CP15_OFFSET(c13_TID_URW)]
str r4, [vcpu, #CP15_OFFSET(c13_TID_URO)]
str r5, [vcpu, #CP15_OFFSET(c13_TID_PRIV)]
str r6, [vcpu, #CP15_OFFSET(c5_DFSR)]
str r7, [vcpu, #CP15_OFFSET(c5_IFSR)]
str r8, [vcpu, #CP15_OFFSET(c5_ADFSR)]
str r9, [vcpu, #CP15_OFFSET(c5_AIFSR)]
str r10, [vcpu, #CP15_OFFSET(c6_DFAR)]
str r11, [vcpu, #CP15_OFFSET(c6_IFAR)]
str r12, [vcpu, #CP15_OFFSET(c12_VBAR)]
.endif
mrc p15, 0, r2, c14, c1, 0 @ CNTKCTL
mrrc p15, 0, r4, r5, c7 @ PAR
mrc p15, 0, r6, c10, c3, 0 @ AMAIR0
mrc p15, 0, r7, c10, c3, 1 @ AMAIR1
.if \store_to_vcpu == 0
push {r2,r4-r7}
.else
str r2, [vcpu, #CP15_OFFSET(c14_CNTKCTL)]
add r12, vcpu, #CP15_OFFSET(c7_PAR)
strd r4, r5, [r12]
str r6, [vcpu, #CP15_OFFSET(c10_AMAIR0)]
str r7, [vcpu, #CP15_OFFSET(c10_AMAIR1)]
.endif
.endm
/*
* Reads cp15 registers from memory and writes them to hardware
* @read_from_vcpu: If 0, registers are read in-order from the stack,
* otherwise from the VCPU struct pointed to by vcpup
*
* Assumes vcpu pointer in vcpu reg
*/
.macro write_cp15_state read_from_vcpu
.if \read_from_vcpu == 0
pop {r2,r4-r7}
.else
ldr r2, [vcpu, #CP15_OFFSET(c14_CNTKCTL)]
add r12, vcpu, #CP15_OFFSET(c7_PAR)
ldrd r4, r5, [r12]
ldr r6, [vcpu, #CP15_OFFSET(c10_AMAIR0)]
ldr r7, [vcpu, #CP15_OFFSET(c10_AMAIR1)]
.endif
mcr p15, 0, r2, c14, c1, 0 @ CNTKCTL
mcrr p15, 0, r4, r5, c7 @ PAR
mcr p15, 0, r6, c10, c3, 0 @ AMAIR0
mcr p15, 0, r7, c10, c3, 1 @ AMAIR1
.if \read_from_vcpu == 0
pop {r2-r12}
.else
ldr r2, [vcpu, #CP15_OFFSET(c13_CID)]
ldr r3, [vcpu, #CP15_OFFSET(c13_TID_URW)]
ldr r4, [vcpu, #CP15_OFFSET(c13_TID_URO)]
ldr r5, [vcpu, #CP15_OFFSET(c13_TID_PRIV)]
ldr r6, [vcpu, #CP15_OFFSET(c5_DFSR)]
ldr r7, [vcpu, #CP15_OFFSET(c5_IFSR)]
ldr r8, [vcpu, #CP15_OFFSET(c5_ADFSR)]
ldr r9, [vcpu, #CP15_OFFSET(c5_AIFSR)]
ldr r10, [vcpu, #CP15_OFFSET(c6_DFAR)]
ldr r11, [vcpu, #CP15_OFFSET(c6_IFAR)]
ldr r12, [vcpu, #CP15_OFFSET(c12_VBAR)]
.endif
mcr p15, 0, r2, c13, c0, 1 @ CID
mcr p15, 0, r3, c13, c0, 2 @ TID_URW
mcr p15, 0, r4, c13, c0, 3 @ TID_URO
mcr p15, 0, r5, c13, c0, 4 @ TID_PRIV
mcr p15, 0, r6, c5, c0, 0 @ DFSR
mcr p15, 0, r7, c5, c0, 1 @ IFSR
mcr p15, 0, r8, c5, c1, 0 @ ADFSR
mcr p15, 0, r9, c5, c1, 1 @ AIFSR
mcr p15, 0, r10, c6, c0, 0 @ DFAR
mcr p15, 0, r11, c6, c0, 2 @ IFAR
mcr p15, 0, r12, c12, c0, 0 @ VBAR
.if \read_from_vcpu == 0
pop {r2-r12}
.else
ldr r2, [vcpu, #CP15_OFFSET(c1_SCTLR)]
ldr r3, [vcpu, #CP15_OFFSET(c1_CPACR)]
ldr r4, [vcpu, #CP15_OFFSET(c2_TTBCR)]
ldr r5, [vcpu, #CP15_OFFSET(c3_DACR)]
add r12, vcpu, #CP15_OFFSET(c2_TTBR0)
ldrd r6, r7, [r12]
add r12, vcpu, #CP15_OFFSET(c2_TTBR1)
ldrd r8, r9, [r12]
ldr r10, [vcpu, #CP15_OFFSET(c10_PRRR)]
ldr r11, [vcpu, #CP15_OFFSET(c10_NMRR)]
ldr r12, [vcpu, #CP15_OFFSET(c0_CSSELR)]
.endif
mcr p15, 0, r2, c1, c0, 0 @ SCTLR
mcr p15, 0, r3, c1, c0, 2 @ CPACR
mcr p15, 0, r4, c2, c0, 2 @ TTBCR
mcr p15, 0, r5, c3, c0, 0 @ DACR
mcrr p15, 0, r6, r7, c2 @ TTBR 0
mcrr p15, 1, r8, r9, c2 @ TTBR 1
mcr p15, 0, r10, c10, c2, 0 @ PRRR
mcr p15, 0, r11, c10, c2, 1 @ NMRR
mcr p15, 2, r12, c0, c0, 0 @ CSSELR
.endm
/*
* Save the VGIC CPU state into memory
*
* Assumes vcpu pointer in vcpu reg
*/
.macro save_vgic_state
/* Get VGIC VCTRL base into r2 */
ldr r2, [vcpu, #VCPU_KVM]
ldr r2, [r2, #KVM_VGIC_VCTRL]
cmp r2, #0
beq 2f
/* Compute the address of struct vgic_cpu */
add r11, vcpu, #VCPU_VGIC_CPU
/* Save all interesting registers */
ldr r4, [r2, #GICH_VMCR]
ldr r5, [r2, #GICH_MISR]
ldr r6, [r2, #GICH_EISR0]
ldr r7, [r2, #GICH_EISR1]
ldr r8, [r2, #GICH_ELRSR0]
ldr r9, [r2, #GICH_ELRSR1]
ldr r10, [r2, #GICH_APR]
ARM_BE8(rev r4, r4 )
ARM_BE8(rev r5, r5 )
ARM_BE8(rev r6, r6 )
ARM_BE8(rev r7, r7 )
ARM_BE8(rev r8, r8 )
ARM_BE8(rev r9, r9 )
ARM_BE8(rev r10, r10 )
str r4, [r11, #VGIC_V2_CPU_VMCR]
str r5, [r11, #VGIC_V2_CPU_MISR]
#ifdef CONFIG_CPU_ENDIAN_BE8
str r6, [r11, #(VGIC_V2_CPU_EISR + 4)]
str r7, [r11, #VGIC_V2_CPU_EISR]
str r8, [r11, #(VGIC_V2_CPU_ELRSR + 4)]
str r9, [r11, #VGIC_V2_CPU_ELRSR]
#else
str r6, [r11, #VGIC_V2_CPU_EISR]
str r7, [r11, #(VGIC_V2_CPU_EISR + 4)]
str r8, [r11, #VGIC_V2_CPU_ELRSR]
str r9, [r11, #(VGIC_V2_CPU_ELRSR + 4)]
#endif
str r10, [r11, #VGIC_V2_CPU_APR]
/* Clear GICH_HCR */
mov r5, #0
str r5, [r2, #GICH_HCR]
/* Save list registers */
add r2, r2, #GICH_LR0
add r3, r11, #VGIC_V2_CPU_LR
ldr r4, [r11, #VGIC_CPU_NR_LR]
1: ldr r6, [r2], #4
ARM_BE8(rev r6, r6 )
str r6, [r3], #4
subs r4, r4, #1
bne 1b
2:
.endm
/*
* Restore the VGIC CPU state from memory
*
* Assumes vcpu pointer in vcpu reg
*/
.macro restore_vgic_state
/* Get VGIC VCTRL base into r2 */
ldr r2, [vcpu, #VCPU_KVM]
ldr r2, [r2, #KVM_VGIC_VCTRL]
cmp r2, #0
beq 2f
/* Compute the address of struct vgic_cpu */
add r11, vcpu, #VCPU_VGIC_CPU
/* We only restore a minimal set of registers */
ldr r3, [r11, #VGIC_V2_CPU_HCR]
ldr r4, [r11, #VGIC_V2_CPU_VMCR]
ldr r8, [r11, #VGIC_V2_CPU_APR]
ARM_BE8(rev r3, r3 )
ARM_BE8(rev r4, r4 )
ARM_BE8(rev r8, r8 )
str r3, [r2, #GICH_HCR]
str r4, [r2, #GICH_VMCR]
str r8, [r2, #GICH_APR]
/* Restore list registers */
add r2, r2, #GICH_LR0
add r3, r11, #VGIC_V2_CPU_LR
ldr r4, [r11, #VGIC_CPU_NR_LR]
1: ldr r6, [r3], #4
ARM_BE8(rev r6, r6 )
str r6, [r2], #4
subs r4, r4, #1
bne 1b
2:
.endm
#define CNTHCTL_PL1PCTEN (1 << 0)
#define CNTHCTL_PL1PCEN (1 << 1)
/*
* Save the timer state onto the VCPU and allow physical timer/counter access
* for the host.
*
* Assumes vcpu pointer in vcpu reg
* Clobbers r2-r5
*/
.macro save_timer_state
ldr r4, [vcpu, #VCPU_KVM]
ldr r2, [r4, #KVM_TIMER_ENABLED]
cmp r2, #0
beq 1f
mrc p15, 0, r2, c14, c3, 1 @ CNTV_CTL
str r2, [vcpu, #VCPU_TIMER_CNTV_CTL]
isb
mrrc p15, 3, rr_lo_hi(r2, r3), c14 @ CNTV_CVAL
ldr r4, =VCPU_TIMER_CNTV_CVAL
add r5, vcpu, r4
strd r2, r3, [r5]
@ Ensure host CNTVCT == CNTPCT
mov r2, #0
mcrr p15, 4, r2, r2, c14 @ CNTVOFF
1:
mov r2, #0 @ Clear ENABLE
mcr p15, 0, r2, c14, c3, 1 @ CNTV_CTL
@ Allow physical timer/counter access for the host
mrc p15, 4, r2, c14, c1, 0 @ CNTHCTL
orr r2, r2, #(CNTHCTL_PL1PCEN | CNTHCTL_PL1PCTEN)
mcr p15, 4, r2, c14, c1, 0 @ CNTHCTL
.endm
/*
* Load the timer state from the VCPU and deny physical timer/counter access
* for the host.
*
* Assumes vcpu pointer in vcpu reg
* Clobbers r2-r5
*/
.macro restore_timer_state
@ Disallow physical timer access for the guest
@ Physical counter access is allowed
mrc p15, 4, r2, c14, c1, 0 @ CNTHCTL
orr r2, r2, #CNTHCTL_PL1PCTEN
bic r2, r2, #CNTHCTL_PL1PCEN
mcr p15, 4, r2, c14, c1, 0 @ CNTHCTL
ldr r4, [vcpu, #VCPU_KVM]
ldr r2, [r4, #KVM_TIMER_ENABLED]
cmp r2, #0
beq 1f
ldr r2, [r4, #KVM_TIMER_CNTVOFF]
ldr r3, [r4, #(KVM_TIMER_CNTVOFF + 4)]
mcrr p15, 4, rr_lo_hi(r2, r3), c14 @ CNTVOFF
ldr r4, =VCPU_TIMER_CNTV_CVAL
add r5, vcpu, r4
ldrd r2, r3, [r5]
mcrr p15, 3, rr_lo_hi(r2, r3), c14 @ CNTV_CVAL
isb
ldr r2, [vcpu, #VCPU_TIMER_CNTV_CTL]
and r2, r2, #3
mcr p15, 0, r2, c14, c3, 1 @ CNTV_CTL
1:
.endm
.equ vmentry, 0
.equ vmexit, 1
/* Configures the HSTR (Hyp System Trap Register) on entry/return
* (hardware reset value is 0) */
.macro set_hstr operation
mrc p15, 4, r2, c1, c1, 3
ldr r3, =HSTR_T(15)
.if \operation == vmentry
orr r2, r2, r3 @ Trap CR{15}
.else
bic r2, r2, r3 @ Don't trap any CRx accesses
.endif
mcr p15, 4, r2, c1, c1, 3
.endm
/* Configures the HCPTR (Hyp Coprocessor Trap Register) on entry/return
* (hardware reset value is 0). Keep previous value in r2.
* An ISB is emited on vmexit/vmtrap, but executed on vmexit only if
* VFP wasn't already enabled (always executed on vmtrap).
* If a label is specified with vmexit, it is branched to if VFP wasn't
* enabled.
*/
.macro set_hcptr operation, mask, label = none
mrc p15, 4, r2, c1, c1, 2
ldr r3, =\mask
.if \operation == vmentry
orr r3, r2, r3 @ Trap coproc-accesses defined in mask
.else
bic r3, r2, r3 @ Don't trap defined coproc-accesses
.endif
mcr p15, 4, r3, c1, c1, 2
.if \operation != vmentry
.if \operation == vmexit
tst r2, #(HCPTR_TCP(10) | HCPTR_TCP(11))
beq 1f
.endif
isb
.if \label != none
b \label
.endif
1:
.endif
.endm
/* Configures the HDCR (Hyp Debug Configuration Register) on entry/return
* (hardware reset value is 0) */
.macro set_hdcr operation
mrc p15, 4, r2, c1, c1, 1
ldr r3, =(HDCR_TPM|HDCR_TPMCR)
.if \operation == vmentry
orr r2, r2, r3 @ Trap some perfmon accesses
.else
bic r2, r2, r3 @ Don't trap any perfmon accesses
.endif
mcr p15, 4, r2, c1, c1, 1
.endm
/* Enable/Disable: stage-2 trans., trap interrupts, trap wfi, trap smc */
.macro configure_hyp_role operation
.if \operation == vmentry
ldr r2, [vcpu, #VCPU_HCR]
ldr r3, [vcpu, #VCPU_IRQ_LINES]
orr r2, r2, r3
.else
mov r2, #0
.endif
mcr p15, 4, r2, c1, c1, 0 @ HCR
.endm
.macro load_vcpu
mrc p15, 4, vcpu, c13, c0, 2 @ HTPIDR
.endm

View File

@ -28,6 +28,7 @@
#include <asm/kvm_mmio.h>
#include <asm/kvm_asm.h>
#include <asm/kvm_emulate.h>
#include <asm/virt.h>
#include "trace.h"
@ -598,6 +599,9 @@ int create_hyp_mappings(void *from, void *to)
unsigned long start = KERN_TO_HYP((unsigned long)from);
unsigned long end = KERN_TO_HYP((unsigned long)to);
if (is_kernel_in_hyp_mode())
return 0;
start = start & PAGE_MASK;
end = PAGE_ALIGN(end);
@ -630,6 +634,9 @@ int create_hyp_io_mappings(void *from, void *to, phys_addr_t phys_addr)
unsigned long start = KERN_TO_HYP((unsigned long)from);
unsigned long end = KERN_TO_HYP((unsigned long)to);
if (is_kernel_in_hyp_mode())
return 0;
/* Check for a valid kernel IO mapping */
if (!is_vmalloc_addr(from) || !is_vmalloc_addr(to - 1))
return -EINVAL;
@ -1430,6 +1437,22 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run)
goto out_unlock;
}
/*
* Check for a cache maintenance operation. Since we
* ended-up here, we know it is outside of any memory
* slot. But we can't find out if that is for a device,
* or if the guest is just being stupid. The only thing
* we know for sure is that this range cannot be cached.
*
* So let's assume that the guest is just being
* cautious, and skip the instruction.
*/
if (kvm_vcpu_dabt_is_cm(vcpu)) {
kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu));
ret = 1;
goto out_unlock;
}
/*
* The IPA is reported as [MAX:12], so we need to
* complement it with the bottom 12 bits from the

View File

@ -71,7 +71,7 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu)
}
/* Reset core registers */
memcpy(&vcpu->arch.regs, reset_regs, sizeof(vcpu->arch.regs));
memcpy(&vcpu->arch.ctxt.gp_regs, reset_regs, sizeof(vcpu->arch.ctxt.gp_regs));
/* Reset CP15 registers */
kvm_reset_coprocs(vcpu);

View File

@ -750,6 +750,19 @@ config ARM64_LSE_ATOMICS
not support these instructions and requires the kernel to be
built with binutils >= 2.25.
config ARM64_VHE
bool "Enable support for Virtualization Host Extensions (VHE)"
default y
help
Virtualization Host Extensions (VHE) allow the kernel to run
directly at EL2 (instead of EL1) on processors that support
it. This leads to better performance for KVM, as they reduce
the cost of the world switch.
Selecting this option allows the VHE feature to be detected
at runtime, and does not affect processors that do not
implement this feature.
endmenu
endmenu

View File

@ -30,8 +30,12 @@
#define ARM64_HAS_LSE_ATOMICS 5
#define ARM64_WORKAROUND_CAVIUM_23154 6
#define ARM64_WORKAROUND_834220 7
/* #define ARM64_HAS_NO_HW_PREFETCH 8 */
/* #define ARM64_HAS_UAO 9 */
/* #define ARM64_ALT_PAN_NOT_UAO 10 */
#define ARM64_HAS_VIRT_HOST_EXTN 11
#define ARM64_NCAPS 8
#define ARM64_NCAPS 12
#ifndef __ASSEMBLY__

View File

@ -18,6 +18,7 @@
#include <asm/cputype.h>
#include <asm/cpufeature.h>
#include <asm/virt.h>
#ifdef __KERNEL__
@ -35,10 +36,21 @@ struct arch_hw_breakpoint {
struct arch_hw_breakpoint_ctrl ctrl;
};
/* Privilege Levels */
#define AARCH64_BREAKPOINT_EL1 1
#define AARCH64_BREAKPOINT_EL0 2
#define DBG_HMC_HYP (1 << 13)
static inline u32 encode_ctrl_reg(struct arch_hw_breakpoint_ctrl ctrl)
{
return (ctrl.len << 5) | (ctrl.type << 3) | (ctrl.privilege << 1) |
u32 val = (ctrl.len << 5) | (ctrl.type << 3) | (ctrl.privilege << 1) |
ctrl.enabled;
if (is_kernel_in_hyp_mode() && ctrl.privilege == AARCH64_BREAKPOINT_EL1)
val |= DBG_HMC_HYP;
return val;
}
static inline void decode_ctrl_reg(u32 reg,
@ -61,10 +73,6 @@ static inline void decode_ctrl_reg(u32 reg,
#define ARM_BREAKPOINT_STORE 2
#define AARCH64_ESR_ACCESS_MASK (1 << 6)
/* Privilege Levels */
#define AARCH64_BREAKPOINT_EL1 1
#define AARCH64_BREAKPOINT_EL0 2
/* Lengths */
#define ARM_BREAKPOINT_LEN_1 0x1
#define ARM_BREAKPOINT_LEN_2 0x3

View File

@ -23,6 +23,7 @@
#include <asm/types.h>
/* Hyp Configuration Register (HCR) bits */
#define HCR_E2H (UL(1) << 34)
#define HCR_ID (UL(1) << 33)
#define HCR_CD (UL(1) << 32)
#define HCR_RW_SHIFT 31
@ -81,7 +82,7 @@
HCR_AMO | HCR_SWIO | HCR_TIDCP | HCR_RW)
#define HCR_VIRT_EXCP_MASK (HCR_VA | HCR_VI | HCR_VF)
#define HCR_INT_OVERRIDE (HCR_FMO | HCR_IMO)
#define HCR_HOST_VHE_FLAGS (HCR_RW | HCR_TGE | HCR_E2H)
/* Hyp System Control Register (SCTLR_EL2) bits */
#define SCTLR_EL2_EE (1 << 25)
@ -216,4 +217,7 @@
ECN(SOFTSTP_CUR), ECN(WATCHPT_LOW), ECN(WATCHPT_CUR), \
ECN(BKPT32), ECN(VECTOR32), ECN(BRK64)
#define CPACR_EL1_FPEN (3 << 20)
#define CPACR_EL1_TTA (1 << 28)
#endif /* __ARM64_KVM_ARM_H__ */

View File

@ -35,9 +35,6 @@ extern char __kvm_hyp_init_end[];
extern char __kvm_hyp_vector[];
#define __kvm_hyp_code_start __hyp_text_start
#define __kvm_hyp_code_end __hyp_text_end
extern void __kvm_flush_vm_context(void);
extern void __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa);
extern void __kvm_tlb_flush_vmid(struct kvm *kvm);
@ -45,9 +42,12 @@ extern void __kvm_tlb_flush_vmid(struct kvm *kvm);
extern int __kvm_vcpu_run(struct kvm_vcpu *vcpu);
extern u64 __vgic_v3_get_ich_vtr_el2(void);
extern void __vgic_v3_init_lrs(void);
extern u32 __kvm_get_mdcr_el2(void);
extern void __init_stage2_translation(void);
#endif
#endif /* __ARM_KVM_ASM_H__ */

View File

@ -29,6 +29,7 @@
#include <asm/kvm_mmio.h>
#include <asm/ptrace.h>
#include <asm/cputype.h>
#include <asm/virt.h>
unsigned long *vcpu_reg32(const struct kvm_vcpu *vcpu, u8 reg_num);
unsigned long *vcpu_spsr32(const struct kvm_vcpu *vcpu);
@ -43,6 +44,8 @@ void kvm_inject_pabt(struct kvm_vcpu *vcpu, unsigned long addr);
static inline void vcpu_reset_hcr(struct kvm_vcpu *vcpu)
{
vcpu->arch.hcr_el2 = HCR_GUEST_FLAGS;
if (is_kernel_in_hyp_mode())
vcpu->arch.hcr_el2 |= HCR_E2H;
if (test_bit(KVM_ARM_VCPU_EL1_32BIT, vcpu->arch.features))
vcpu->arch.hcr_el2 &= ~HCR_RW;
}
@ -189,6 +192,11 @@ static inline bool kvm_vcpu_dabt_iss1tw(const struct kvm_vcpu *vcpu)
return !!(kvm_vcpu_get_hsr(vcpu) & ESR_ELx_S1PTW);
}
static inline bool kvm_vcpu_dabt_is_cm(const struct kvm_vcpu *vcpu)
{
return !!(kvm_vcpu_get_hsr(vcpu) & ESR_ELx_CM);
}
static inline int kvm_vcpu_dabt_get_as(const struct kvm_vcpu *vcpu)
{
return 1 << ((kvm_vcpu_get_hsr(vcpu) & ESR_ELx_SAS) >> ESR_ELx_SAS_SHIFT);

View File

@ -25,7 +25,9 @@
#include <linux/types.h>
#include <linux/kvm_types.h>
#include <asm/kvm.h>
#include <asm/kvm_asm.h>
#include <asm/kvm_mmio.h>
#include <asm/kvm_perf_event.h>
#define __KVM_HAVE_ARCH_INTC_INITIALIZED
@ -36,10 +38,11 @@
#include <kvm/arm_vgic.h>
#include <kvm/arm_arch_timer.h>
#include <kvm/arm_pmu.h>
#define KVM_MAX_VCPUS VGIC_V3_MAX_CPUS
#define KVM_VCPU_MAX_FEATURES 3
#define KVM_VCPU_MAX_FEATURES 4
int __attribute_const__ kvm_target_cpu(void);
int kvm_reset_vcpu(struct kvm_vcpu *vcpu);
@ -114,6 +117,21 @@ enum vcpu_sysreg {
MDSCR_EL1, /* Monitor Debug System Control Register */
MDCCINT_EL1, /* Monitor Debug Comms Channel Interrupt Enable Reg */
/* Performance Monitors Registers */
PMCR_EL0, /* Control Register */
PMSELR_EL0, /* Event Counter Selection Register */
PMEVCNTR0_EL0, /* Event Counter Register (0-30) */
PMEVCNTR30_EL0 = PMEVCNTR0_EL0 + 30,
PMCCNTR_EL0, /* Cycle Counter Register */
PMEVTYPER0_EL0, /* Event Type Register (0-30) */
PMEVTYPER30_EL0 = PMEVTYPER0_EL0 + 30,
PMCCFILTR_EL0, /* Cycle Count Filter Register */
PMCNTENSET_EL0, /* Count Enable Set Register */
PMINTENSET_EL1, /* Interrupt Enable Set Register */
PMOVSSET_EL0, /* Overflow Flag Status Set Register */
PMSWINC_EL0, /* Software Increment Register */
PMUSERENR_EL0, /* User Enable Register */
/* 32bit specific registers. Keep them at the end of the range */
DACR32_EL2, /* Domain Access Control Register */
IFSR32_EL2, /* Instruction Fault Status Register */
@ -211,6 +229,7 @@ struct kvm_vcpu_arch {
/* VGIC state */
struct vgic_cpu vgic_cpu;
struct arch_timer_cpu timer_cpu;
struct kvm_pmu pmu;
/*
* Anything that is not used directly from assembly code goes
@ -342,5 +361,18 @@ void kvm_arm_init_debug(void);
void kvm_arm_setup_debug(struct kvm_vcpu *vcpu);
void kvm_arm_clear_debug(struct kvm_vcpu *vcpu);
void kvm_arm_reset_debug_ptr(struct kvm_vcpu *vcpu);
int kvm_arm_vcpu_arch_set_attr(struct kvm_vcpu *vcpu,
struct kvm_device_attr *attr);
int kvm_arm_vcpu_arch_get_attr(struct kvm_vcpu *vcpu,
struct kvm_device_attr *attr);
int kvm_arm_vcpu_arch_has_attr(struct kvm_vcpu *vcpu,
struct kvm_device_attr *attr);
/* #define kvm_call_hyp(f, ...) __kvm_call_hyp(kvm_ksym_ref(f), ##__VA_ARGS__) */
static inline void __cpu_init_stage2(void)
{
kvm_call_hyp(__init_stage2_translation);
}
#endif /* __ARM64_KVM_HOST_H__ */

View File

@ -0,0 +1,181 @@
/*
* Copyright (C) 2015 - ARM Ltd
* Author: Marc Zyngier <marc.zyngier@arm.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef __ARM64_KVM_HYP_H__
#define __ARM64_KVM_HYP_H__
#include <linux/compiler.h>
#include <linux/kvm_host.h>
#include <asm/kvm_mmu.h>
#include <asm/kvm_perf_event.h>
#include <asm/sysreg.h>
#define __hyp_text __section(.hyp.text) notrace
static inline unsigned long __kern_hyp_va(unsigned long v)
{
asm volatile(ALTERNATIVE("and %0, %0, %1",
"nop",
ARM64_HAS_VIRT_HOST_EXTN)
: "+r" (v) : "i" (HYP_PAGE_OFFSET_MASK));
return v;
}
#define kern_hyp_va(v) (typeof(v))(__kern_hyp_va((unsigned long)(v)))
static inline unsigned long __hyp_kern_va(unsigned long v)
{
u64 offset = PAGE_OFFSET - HYP_PAGE_OFFSET;
asm volatile(ALTERNATIVE("add %0, %0, %1",
"nop",
ARM64_HAS_VIRT_HOST_EXTN)
: "+r" (v) : "r" (offset));
return v;
}
#define hyp_kern_va(v) (typeof(v))(__hyp_kern_va((unsigned long)(v)))
#define read_sysreg_elx(r,nvh,vh) \
({ \
u64 reg; \
asm volatile(ALTERNATIVE("mrs %0, " __stringify(r##nvh),\
"mrs_s %0, " __stringify(r##vh),\
ARM64_HAS_VIRT_HOST_EXTN) \
: "=r" (reg)); \
reg; \
})
#define write_sysreg_elx(v,r,nvh,vh) \
do { \
u64 __val = (u64)(v); \
asm volatile(ALTERNATIVE("msr " __stringify(r##nvh) ", %x0",\
"msr_s " __stringify(r##vh) ", %x0",\
ARM64_HAS_VIRT_HOST_EXTN) \
: : "rZ" (__val)); \
} while (0)
/*
* Unified accessors for registers that have a different encoding
* between VHE and non-VHE. They must be specified without their "ELx"
* encoding.
*/
#define read_sysreg_el2(r) \
({ \
u64 reg; \
asm volatile(ALTERNATIVE("mrs %0, " __stringify(r##_EL2),\
"mrs %0, " __stringify(r##_EL1),\
ARM64_HAS_VIRT_HOST_EXTN) \
: "=r" (reg)); \
reg; \
})
#define write_sysreg_el2(v,r) \
do { \
u64 __val = (u64)(v); \
asm volatile(ALTERNATIVE("msr " __stringify(r##_EL2) ", %x0",\
"msr " __stringify(r##_EL1) ", %x0",\
ARM64_HAS_VIRT_HOST_EXTN) \
: : "rZ" (__val)); \
} while (0)
#define read_sysreg_el0(r) read_sysreg_elx(r, _EL0, _EL02)
#define write_sysreg_el0(v,r) write_sysreg_elx(v, r, _EL0, _EL02)
#define read_sysreg_el1(r) read_sysreg_elx(r, _EL1, _EL12)
#define write_sysreg_el1(v,r) write_sysreg_elx(v, r, _EL1, _EL12)
/* The VHE specific system registers and their encoding */
#define sctlr_EL12 sys_reg(3, 5, 1, 0, 0)
#define cpacr_EL12 sys_reg(3, 5, 1, 0, 2)
#define ttbr0_EL12 sys_reg(3, 5, 2, 0, 0)
#define ttbr1_EL12 sys_reg(3, 5, 2, 0, 1)
#define tcr_EL12 sys_reg(3, 5, 2, 0, 2)
#define afsr0_EL12 sys_reg(3, 5, 5, 1, 0)
#define afsr1_EL12 sys_reg(3, 5, 5, 1, 1)
#define esr_EL12 sys_reg(3, 5, 5, 2, 0)
#define far_EL12 sys_reg(3, 5, 6, 0, 0)
#define mair_EL12 sys_reg(3, 5, 10, 2, 0)
#define amair_EL12 sys_reg(3, 5, 10, 3, 0)
#define vbar_EL12 sys_reg(3, 5, 12, 0, 0)
#define contextidr_EL12 sys_reg(3, 5, 13, 0, 1)
#define cntkctl_EL12 sys_reg(3, 5, 14, 1, 0)
#define cntp_tval_EL02 sys_reg(3, 5, 14, 2, 0)
#define cntp_ctl_EL02 sys_reg(3, 5, 14, 2, 1)
#define cntp_cval_EL02 sys_reg(3, 5, 14, 2, 2)
#define cntv_tval_EL02 sys_reg(3, 5, 14, 3, 0)
#define cntv_ctl_EL02 sys_reg(3, 5, 14, 3, 1)
#define cntv_cval_EL02 sys_reg(3, 5, 14, 3, 2)
#define spsr_EL12 sys_reg(3, 5, 4, 0, 0)
#define elr_EL12 sys_reg(3, 5, 4, 0, 1)
/**
* hyp_alternate_select - Generates patchable code sequences that are
* used to switch between two implementations of a function, depending
* on the availability of a feature.
*
* @fname: a symbol name that will be defined as a function returning a
* function pointer whose type will match @orig and @alt
* @orig: A pointer to the default function, as returned by @fname when
* @cond doesn't hold
* @alt: A pointer to the alternate function, as returned by @fname
* when @cond holds
* @cond: a CPU feature (as described in asm/cpufeature.h)
*/
#define hyp_alternate_select(fname, orig, alt, cond) \
typeof(orig) * __hyp_text fname(void) \
{ \
typeof(alt) *val = orig; \
asm volatile(ALTERNATIVE("nop \n", \
"mov %0, %1 \n", \
cond) \
: "+r" (val) : "r" (alt)); \
return val; \
}
void __vgic_v2_save_state(struct kvm_vcpu *vcpu);
void __vgic_v2_restore_state(struct kvm_vcpu *vcpu);
void __vgic_v3_save_state(struct kvm_vcpu *vcpu);
void __vgic_v3_restore_state(struct kvm_vcpu *vcpu);
void __timer_save_state(struct kvm_vcpu *vcpu);
void __timer_restore_state(struct kvm_vcpu *vcpu);
void __sysreg_save_host_state(struct kvm_cpu_context *ctxt);
void __sysreg_restore_host_state(struct kvm_cpu_context *ctxt);
void __sysreg_save_guest_state(struct kvm_cpu_context *ctxt);
void __sysreg_restore_guest_state(struct kvm_cpu_context *ctxt);
void __sysreg32_save_state(struct kvm_vcpu *vcpu);
void __sysreg32_restore_state(struct kvm_vcpu *vcpu);
void __debug_save_state(struct kvm_vcpu *vcpu,
struct kvm_guest_debug_arch *dbg,
struct kvm_cpu_context *ctxt);
void __debug_restore_state(struct kvm_vcpu *vcpu,
struct kvm_guest_debug_arch *dbg,
struct kvm_cpu_context *ctxt);
void __debug_cond_save_host_state(struct kvm_vcpu *vcpu);
void __debug_cond_restore_host_state(struct kvm_vcpu *vcpu);
void __fpsimd_save_state(struct user_fpsimd_state *fp_regs);
void __fpsimd_restore_state(struct user_fpsimd_state *fp_regs);
bool __fpsimd_enabled(void);
u64 __guest_enter(struct kvm_vcpu *vcpu, struct kvm_cpu_context *host_ctxt);
void __noreturn __hyp_do_panic(unsigned long, ...);
#endif /* __ARM64_KVM_HYP_H__ */

View File

@ -23,13 +23,16 @@
#include <asm/cpufeature.h>
/*
* As we only have the TTBR0_EL2 register, we cannot express
* As ARMv8.0 only has the TTBR0_EL2 register, we cannot express
* "negative" addresses. This makes it impossible to directly share
* mappings with the kernel.
*
* Instead, give the HYP mode its own VA region at a fixed offset from
* the kernel by just masking the top bits (which are all ones for a
* kernel address).
*
* ARMv8.1 (using VHE) does have a TTBR1_EL2, and doesn't use these
* macros (the entire kernel runs at EL2).
*/
#define HYP_PAGE_OFFSET_SHIFT VA_BITS
#define HYP_PAGE_OFFSET_MASK ((UL(1) << HYP_PAGE_OFFSET_SHIFT) - 1)
@ -56,12 +59,19 @@
#ifdef __ASSEMBLY__
#include <asm/alternative.h>
#include <asm/cpufeature.h>
/*
* Convert a kernel VA into a HYP VA.
* reg: VA to be converted.
*/
.macro kern_hyp_va reg
alternative_if_not ARM64_HAS_VIRT_HOST_EXTN
and \reg, \reg, #HYP_PAGE_OFFSET_MASK
alternative_else
nop
alternative_endif
.endm
#else

View File

@ -0,0 +1,68 @@
/*
* Copyright (C) 2012 ARM Ltd.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef __ASM_KVM_PERF_EVENT_H
#define __ASM_KVM_PERF_EVENT_H
#define ARMV8_PMU_MAX_COUNTERS 32
#define ARMV8_PMU_COUNTER_MASK (ARMV8_PMU_MAX_COUNTERS - 1)
/*
* Per-CPU PMCR: config reg
*/
#define ARMV8_PMU_PMCR_E (1 << 0) /* Enable all counters */
#define ARMV8_PMU_PMCR_P (1 << 1) /* Reset all counters */
#define ARMV8_PMU_PMCR_C (1 << 2) /* Cycle counter reset */
#define ARMV8_PMU_PMCR_D (1 << 3) /* CCNT counts every 64th cpu cycle */
#define ARMV8_PMU_PMCR_X (1 << 4) /* Export to ETM */
#define ARMV8_PMU_PMCR_DP (1 << 5) /* Disable CCNT if non-invasive debug*/
/* Determines which bit of PMCCNTR_EL0 generates an overflow */
#define ARMV8_PMU_PMCR_LC (1 << 6)
#define ARMV8_PMU_PMCR_N_SHIFT 11 /* Number of counters supported */
#define ARMV8_PMU_PMCR_N_MASK 0x1f
#define ARMV8_PMU_PMCR_MASK 0x7f /* Mask for writable bits */
/*
* PMOVSR: counters overflow flag status reg
*/
#define ARMV8_PMU_OVSR_MASK 0xffffffff /* Mask for writable bits */
#define ARMV8_PMU_OVERFLOWED_MASK ARMV8_PMU_OVSR_MASK
/*
* PMXEVTYPER: Event selection reg
*/
#define ARMV8_PMU_EVTYPE_MASK 0xc80003ff /* Mask for writable bits */
#define ARMV8_PMU_EVTYPE_EVENT 0x3ff /* Mask for EVENT bits */
#define ARMV8_PMU_EVTYPE_EVENT_SW_INCR 0 /* Software increment event */
/*
* Event filters for PMUv3
*/
#define ARMV8_PMU_EXCLUDE_EL1 (1 << 31)
#define ARMV8_PMU_EXCLUDE_EL0 (1 << 30)
#define ARMV8_PMU_INCLUDE_EL2 (1 << 27)
/*
* PMUSERENR: user enable reg
*/
#define ARMV8_PMU_USERENR_MASK 0xf /* Mask for writable bits */
#define ARMV8_PMU_USERENR_EN (1 << 0) /* PMU regs can be accessed at EL0 */
#define ARMV8_PMU_USERENR_SW (1 << 1) /* PMSWINC can be written at EL0 */
#define ARMV8_PMU_USERENR_CR (1 << 2) /* Cycle counter can be read at EL0 */
#define ARMV8_PMU_USERENR_ER (1 << 3) /* Event counter can be read at EL0 */
#endif

View File

@ -23,6 +23,8 @@
#ifndef __ASSEMBLY__
#include <asm/ptrace.h>
/*
* __boot_cpu_mode records what mode CPUs were booted in.
* A correctly-implemented bootloader must start all CPUs in the same mode:
@ -50,6 +52,14 @@ static inline bool is_hyp_mode_mismatched(void)
return __boot_cpu_mode[0] != __boot_cpu_mode[1];
}
static inline bool is_kernel_in_hyp_mode(void)
{
u64 el;
asm("mrs %0, CurrentEL" : "=r" (el));
return el == CurrentEL_EL2;
}
/* The section containing the hypervisor text */
extern char __hyp_text_start[];
extern char __hyp_text_end[];

View File

@ -94,6 +94,7 @@ struct kvm_regs {
#define KVM_ARM_VCPU_POWER_OFF 0 /* CPU is started in OFF state */
#define KVM_ARM_VCPU_EL1_32BIT 1 /* CPU running a 32bit VM */
#define KVM_ARM_VCPU_PSCI_0_2 2 /* CPU uses PSCI v0.2 */
#define KVM_ARM_VCPU_PMU_V3 3 /* Support guest PMUv3 */
struct kvm_vcpu_init {
__u32 target;
@ -204,6 +205,11 @@ struct kvm_arch_memory_slot {
#define KVM_DEV_ARM_VGIC_GRP_CTRL 4
#define KVM_DEV_ARM_VGIC_CTRL_INIT 0
/* Device Control API on vcpu fd */
#define KVM_ARM_VCPU_PMU_V3_CTRL 0
#define KVM_ARM_VCPU_PMU_V3_IRQ 0
#define KVM_ARM_VCPU_PMU_V3_INIT 1
/* KVM_IRQ_LINE irq field index values */
#define KVM_ARM_IRQ_TYPE_SHIFT 24
#define KVM_ARM_IRQ_TYPE_MASK 0xff

View File

@ -110,9 +110,6 @@ int main(void)
DEFINE(CPU_USER_PT_REGS, offsetof(struct kvm_regs, regs));
DEFINE(CPU_FP_REGS, offsetof(struct kvm_regs, fp_regs));
DEFINE(VCPU_FPEXC32_EL2, offsetof(struct kvm_vcpu, arch.ctxt.sys_regs[FPEXC32_EL2]));
DEFINE(VCPU_ESR_EL2, offsetof(struct kvm_vcpu, arch.fault.esr_el2));
DEFINE(VCPU_FAR_EL2, offsetof(struct kvm_vcpu, arch.fault.far_el2));
DEFINE(VCPU_HPFAR_EL2, offsetof(struct kvm_vcpu, arch.fault.hpfar_el2));
DEFINE(VCPU_HOST_CONTEXT, offsetof(struct kvm_vcpu, arch.host_cpu_context));
#endif
#ifdef CONFIG_CPU_PM

View File

@ -26,6 +26,7 @@
#include <asm/cpu_ops.h>
#include <asm/processor.h>
#include <asm/sysreg.h>
#include <asm/virt.h>
unsigned long elf_hwcap __read_mostly;
EXPORT_SYMBOL_GPL(elf_hwcap);
@ -621,6 +622,11 @@ static bool has_useable_gicv3_cpuif(const struct arm64_cpu_capabilities *entry)
return has_sre;
}
static bool runs_at_el2(const struct arm64_cpu_capabilities *entry)
{
return is_kernel_in_hyp_mode();
}
static const struct arm64_cpu_capabilities arm64_features[] = {
{
.desc = "GIC system register CPU interface",
@ -651,6 +657,11 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
.min_field_value = 2,
},
#endif /* CONFIG_AS_LSE && CONFIG_ARM64_LSE_ATOMICS */
{
.desc = "Virtualization Host Extensions",
.capability = ARM64_HAS_VIRT_HOST_EXTN,
.matches = runs_at_el2,
},
{},
};

View File

@ -30,6 +30,7 @@
#include <asm/cache.h>
#include <asm/cputype.h>
#include <asm/kernel-pgtable.h>
#include <asm/kvm_arm.h>
#include <asm/memory.h>
#include <asm/pgtable-hwdef.h>
#include <asm/pgtable.h>
@ -464,9 +465,27 @@ CPU_LE( bic x0, x0, #(3 << 24) ) // Clear the EE and E0E bits for EL1
isb
ret
2:
#ifdef CONFIG_ARM64_VHE
/*
* Check for VHE being present. For the rest of the EL2 setup,
* x2 being non-zero indicates that we do have VHE, and that the
* kernel is intended to run at EL2.
*/
mrs x2, id_aa64mmfr1_el1
ubfx x2, x2, #8, #4
#else
mov x2, xzr
#endif
/* Hyp configuration. */
2: mov x0, #(1 << 31) // 64-bit EL1
mov x0, #HCR_RW // 64-bit EL1
cbz x2, set_hcr
orr x0, x0, #HCR_TGE // Enable Host Extensions
orr x0, x0, #HCR_E2H
set_hcr:
msr hcr_el2, x0
isb
/* Generic timers. */
mrs x0, cnthctl_el2
@ -526,6 +545,13 @@ CPU_LE( movk x0, #0x30d0, lsl #16 ) // Clear EE and E0E on LE systems
/* Stage-2 translation */
msr vttbr_el2, xzr
cbz x2, install_el2_stub
mov w20, #BOOT_CPU_MODE_EL2 // This CPU booted in EL2
isb
ret
install_el2_stub:
/* Hypervisor stub */
adrp x0, __hyp_stub_vectors
add x0, x0, #:lo12:__hyp_stub_vectors

View File

@ -20,6 +20,7 @@
*/
#include <asm/irq_regs.h>
#include <asm/virt.h>
#include <linux/of.h>
#include <linux/perf/arm_pmu.h>
@ -691,9 +692,12 @@ static int armv8pmu_set_event_filter(struct hw_perf_event *event,
if (attr->exclude_idle)
return -EPERM;
if (is_kernel_in_hyp_mode() &&
attr->exclude_kernel != attr->exclude_hv)
return -EINVAL;
if (attr->exclude_user)
config_base |= ARMV8_EXCLUDE_EL0;
if (attr->exclude_kernel)
if (!is_kernel_in_hyp_mode() && attr->exclude_kernel)
config_base |= ARMV8_EXCLUDE_EL1;
if (!attr->exclude_hv)
config_base |= ARMV8_INCLUDE_EL2;

View File

@ -36,6 +36,7 @@ config KVM
select HAVE_KVM_EVENTFD
select HAVE_KVM_IRQFD
select KVM_ARM_VGIC_V3
select KVM_ARM_PMU if HW_PERF_EVENTS
---help---
Support hosting virtualized guest machines.
We don't support KVM with 16K page tables yet, due to the multiple
@ -48,6 +49,12 @@ config KVM_ARM_HOST
---help---
Provides host support for ARM processors.
config KVM_ARM_PMU
bool
---help---
Adds support for a virtual Performance Monitoring Unit (PMU) in
virtual machines.
source drivers/vhost/Kconfig
endif # VIRTUALIZATION

View File

@ -26,3 +26,4 @@ kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic-v2-emul.o
kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic-v3.o
kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic-v3-emul.o
kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/arch_timer.o
kvm-$(CONFIG_KVM_ARM_PMU) += $(KVM)/arm/pmu.o

View File

@ -380,3 +380,54 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
}
return 0;
}
int kvm_arm_vcpu_arch_set_attr(struct kvm_vcpu *vcpu,
struct kvm_device_attr *attr)
{
int ret;
switch (attr->group) {
case KVM_ARM_VCPU_PMU_V3_CTRL:
ret = kvm_arm_pmu_v3_set_attr(vcpu, attr);
break;
default:
ret = -ENXIO;
break;
}
return ret;
}
int kvm_arm_vcpu_arch_get_attr(struct kvm_vcpu *vcpu,
struct kvm_device_attr *attr)
{
int ret;
switch (attr->group) {
case KVM_ARM_VCPU_PMU_V3_CTRL:
ret = kvm_arm_pmu_v3_get_attr(vcpu, attr);
break;
default:
ret = -ENXIO;
break;
}
return ret;
}
int kvm_arm_vcpu_arch_has_attr(struct kvm_vcpu *vcpu,
struct kvm_device_attr *attr)
{
int ret;
switch (attr->group) {
case KVM_ARM_VCPU_PMU_V3_CTRL:
ret = kvm_arm_pmu_v3_has_attr(vcpu, attr);
break;
default:
ret = -ENXIO;
break;
}
return ret;
}

View File

@ -87,26 +87,13 @@ __do_hyp_init:
#endif
/*
* Read the PARange bits from ID_AA64MMFR0_EL1 and set the PS bits in
* TCR_EL2 and VTCR_EL2.
* TCR_EL2.
*/
mrs x5, ID_AA64MMFR0_EL1
bfi x4, x5, #16, #3
msr tcr_el2, x4
ldr x4, =VTCR_EL2_FLAGS
bfi x4, x5, #16, #3
/*
* Read the VMIDBits bits from ID_AA64MMFR1_EL1 and set the VS bit in
* VTCR_EL2.
*/
mrs x5, ID_AA64MMFR1_EL1
ubfx x5, x5, #5, #1
lsl x5, x5, #VTCR_EL2_VS
orr x4, x4, x5
msr vtcr_el2, x4
mrs x4, mair_el1
msr mair_el2, x4
isb

View File

@ -17,7 +17,9 @@
#include <linux/linkage.h>
#include <asm/alternative.h>
#include <asm/assembler.h>
#include <asm/cpufeature.h>
/*
* u64 kvm_call_hyp(void *hypfn, ...);
@ -38,6 +40,11 @@
* arch/arm64/kernel/hyp_stub.S.
*/
ENTRY(kvm_call_hyp)
alternative_if_not ARM64_HAS_VIRT_HOST_EXTN
hvc #0
ret
alternative_else
b __vhe_hyp_call
nop
alternative_endif
ENDPROC(kvm_call_hyp)

View File

@ -2,9 +2,12 @@
# Makefile for Kernel-based Virtual Machine module, HYP part
#
obj-$(CONFIG_KVM_ARM_HOST) += vgic-v2-sr.o
KVM=../../../../virt/kvm
obj-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/hyp/vgic-v2-sr.o
obj-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/hyp/timer-sr.o
obj-$(CONFIG_KVM_ARM_HOST) += vgic-v3-sr.o
obj-$(CONFIG_KVM_ARM_HOST) += timer-sr.o
obj-$(CONFIG_KVM_ARM_HOST) += sysreg-sr.o
obj-$(CONFIG_KVM_ARM_HOST) += debug-sr.o
obj-$(CONFIG_KVM_ARM_HOST) += entry.o
@ -12,3 +15,4 @@ obj-$(CONFIG_KVM_ARM_HOST) += switch.o
obj-$(CONFIG_KVM_ARM_HOST) += fpsimd.o
obj-$(CONFIG_KVM_ARM_HOST) += tlb.o
obj-$(CONFIG_KVM_ARM_HOST) += hyp-entry.o
obj-$(CONFIG_KVM_ARM_HOST) += s2-setup.o

View File

@ -19,9 +19,7 @@
#include <linux/kvm_host.h>
#include <asm/kvm_asm.h>
#include <asm/kvm_mmu.h>
#include "hyp.h"
#include <asm/kvm_hyp.h>
#define read_debug(r,n) read_sysreg(r##n##_el1)
#define write_debug(v,r,n) write_sysreg(v, r##n##_el1)

View File

@ -130,9 +130,15 @@ ENDPROC(__guest_exit)
ENTRY(__fpsimd_guest_restore)
stp x4, lr, [sp, #-16]!
alternative_if_not ARM64_HAS_VIRT_HOST_EXTN
mrs x2, cptr_el2
bic x2, x2, #CPTR_EL2_TFP
msr cptr_el2, x2
alternative_else
mrs x2, cpacr_el1
orr x2, x2, #CPACR_EL1_FPEN
msr cpacr_el1, x2
alternative_endif
isb
mrs x3, tpidr_el2

View File

@ -19,7 +19,6 @@
#include <asm/alternative.h>
#include <asm/assembler.h>
#include <asm/asm-offsets.h>
#include <asm/cpufeature.h>
#include <asm/kvm_arm.h>
#include <asm/kvm_asm.h>
@ -38,10 +37,42 @@
ldp x0, x1, [sp], #16
.endm
.macro do_el2_call
/*
* Shuffle the parameters before calling the function
* pointed to in x0. Assumes parameters in x[1,2,3].
*/
sub sp, sp, #16
str lr, [sp]
mov lr, x0
mov x0, x1
mov x1, x2
mov x2, x3
blr lr
ldr lr, [sp]
add sp, sp, #16
.endm
ENTRY(__vhe_hyp_call)
do_el2_call
/*
* We used to rely on having an exception return to get
* an implicit isb. In the E2H case, we don't have it anymore.
* rather than changing all the leaf functions, just do it here
* before returning to the rest of the kernel.
*/
isb
ret
ENDPROC(__vhe_hyp_call)
el1_sync: // Guest trapped into EL2
save_x0_to_x3
alternative_if_not ARM64_HAS_VIRT_HOST_EXTN
mrs x1, esr_el2
alternative_else
mrs x1, esr_el1
alternative_endif
lsr x2, x1, #ESR_ELx_EC_SHIFT
cmp x2, #ESR_ELx_EC_HVC64
@ -58,19 +89,13 @@ el1_sync: // Guest trapped into EL2
mrs x0, vbar_el2
b 2f
1: stp lr, xzr, [sp, #-16]!
1:
/*
* Compute the function address in EL2, and shuffle the parameters.
* Perform the EL2 call
*/
kern_hyp_va x0
mov lr, x0
mov x0, x1
mov x1, x2
mov x2, x3
blr lr
do_el2_call
ldp lr, xzr, [sp], #16
2: eret
el1_trap:
@ -83,72 +108,10 @@ el1_trap:
cmp x2, #ESR_ELx_EC_FP_ASIMD
b.eq __fpsimd_guest_restore
cmp x2, #ESR_ELx_EC_DABT_LOW
mov x0, #ESR_ELx_EC_IABT_LOW
ccmp x2, x0, #4, ne
b.ne 1f // Not an abort we care about
/* This is an abort. Check for permission fault */
alternative_if_not ARM64_WORKAROUND_834220
and x2, x1, #ESR_ELx_FSC_TYPE
cmp x2, #FSC_PERM
b.ne 1f // Not a permission fault
alternative_else
nop // Use the permission fault path to
nop // check for a valid S1 translation,
nop // regardless of the ESR value.
alternative_endif
/*
* Check for Stage-1 page table walk, which is guaranteed
* to give a valid HPFAR_EL2.
*/
tbnz x1, #7, 1f // S1PTW is set
/* Preserve PAR_EL1 */
mrs x3, par_el1
stp x3, xzr, [sp, #-16]!
/*
* Permission fault, HPFAR_EL2 is invalid.
* Resolve the IPA the hard way using the guest VA.
* Stage-1 translation already validated the memory access rights.
* As such, we can use the EL1 translation regime, and don't have
* to distinguish between EL0 and EL1 access.
*/
mrs x2, far_el2
at s1e1r, x2
isb
/* Read result */
mrs x3, par_el1
ldp x0, xzr, [sp], #16 // Restore PAR_EL1 from the stack
msr par_el1, x0
tbnz x3, #0, 3f // Bail out if we failed the translation
ubfx x3, x3, #12, #36 // Extract IPA
lsl x3, x3, #4 // and present it like HPFAR
b 2f
1: mrs x3, hpfar_el2
mrs x2, far_el2
2: mrs x0, tpidr_el2
str w1, [x0, #VCPU_ESR_EL2]
str x2, [x0, #VCPU_FAR_EL2]
str x3, [x0, #VCPU_HPFAR_EL2]
mrs x0, tpidr_el2
mov x1, #ARM_EXCEPTION_TRAP
b __guest_exit
/*
* Translation failed. Just return to the guest and
* let it fault again. Another CPU is probably playing
* behind our back.
*/
3: restore_x0_to_x3
eret
el1_irq:
save_x0_to_x3
mrs x0, tpidr_el2

View File

@ -1,90 +0,0 @@
/*
* Copyright (C) 2015 - ARM Ltd
* Author: Marc Zyngier <marc.zyngier@arm.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef __ARM64_KVM_HYP_H__
#define __ARM64_KVM_HYP_H__
#include <linux/compiler.h>
#include <linux/kvm_host.h>
#include <asm/kvm_mmu.h>
#include <asm/sysreg.h>
#define __hyp_text __section(.hyp.text) notrace
#define kern_hyp_va(v) (typeof(v))((unsigned long)(v) & HYP_PAGE_OFFSET_MASK)
#define hyp_kern_va(v) (typeof(v))((unsigned long)(v) - HYP_PAGE_OFFSET \
+ PAGE_OFFSET)
/**
* hyp_alternate_select - Generates patchable code sequences that are
* used to switch between two implementations of a function, depending
* on the availability of a feature.
*
* @fname: a symbol name that will be defined as a function returning a
* function pointer whose type will match @orig and @alt
* @orig: A pointer to the default function, as returned by @fname when
* @cond doesn't hold
* @alt: A pointer to the alternate function, as returned by @fname
* when @cond holds
* @cond: a CPU feature (as described in asm/cpufeature.h)
*/
#define hyp_alternate_select(fname, orig, alt, cond) \
typeof(orig) * __hyp_text fname(void) \
{ \
typeof(alt) *val = orig; \
asm volatile(ALTERNATIVE("nop \n", \
"mov %0, %1 \n", \
cond) \
: "+r" (val) : "r" (alt)); \
return val; \
}
void __vgic_v2_save_state(struct kvm_vcpu *vcpu);
void __vgic_v2_restore_state(struct kvm_vcpu *vcpu);
void __vgic_v3_save_state(struct kvm_vcpu *vcpu);
void __vgic_v3_restore_state(struct kvm_vcpu *vcpu);
void __timer_save_state(struct kvm_vcpu *vcpu);
void __timer_restore_state(struct kvm_vcpu *vcpu);
void __sysreg_save_state(struct kvm_cpu_context *ctxt);
void __sysreg_restore_state(struct kvm_cpu_context *ctxt);
void __sysreg32_save_state(struct kvm_vcpu *vcpu);
void __sysreg32_restore_state(struct kvm_vcpu *vcpu);
void __debug_save_state(struct kvm_vcpu *vcpu,
struct kvm_guest_debug_arch *dbg,
struct kvm_cpu_context *ctxt);
void __debug_restore_state(struct kvm_vcpu *vcpu,
struct kvm_guest_debug_arch *dbg,
struct kvm_cpu_context *ctxt);
void __debug_cond_save_host_state(struct kvm_vcpu *vcpu);
void __debug_cond_restore_host_state(struct kvm_vcpu *vcpu);
void __fpsimd_save_state(struct user_fpsimd_state *fp_regs);
void __fpsimd_restore_state(struct user_fpsimd_state *fp_regs);
static inline bool __fpsimd_enabled(void)
{
return !(read_sysreg(cptr_el2) & CPTR_EL2_TFP);
}
u64 __guest_enter(struct kvm_vcpu *vcpu, struct kvm_cpu_context *host_ctxt);
void __noreturn __hyp_do_panic(unsigned long, ...);
#endif /* __ARM64_KVM_HYP_H__ */

View File

@ -0,0 +1,43 @@
/*
* Copyright (C) 2016 - ARM Ltd
* Author: Marc Zyngier <marc.zyngier@arm.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <linux/types.h>
#include <asm/kvm_arm.h>
#include <asm/kvm_asm.h>
#include <asm/kvm_hyp.h>
void __hyp_text __init_stage2_translation(void)
{
u64 val = VTCR_EL2_FLAGS;
u64 tmp;
/*
* Read the PARange bits from ID_AA64MMFR0_EL1 and set the PS
* bits in VTCR_EL2. Amusingly, the PARange is 4 bits, while
* PS is only 3. Fortunately, bit 19 is RES0 in VTCR_EL2...
*/
val |= (read_sysreg(id_aa64mmfr0_el1) & 7) << 16;
/*
* Read the VMIDBits bits from ID_AA64MMFR1_EL1 and set the VS
* bit in VTCR_EL2.
*/
tmp = (read_sysreg(id_aa64mmfr1_el1) >> 4) & 0xf;
val |= (tmp == 2) ? VTCR_EL2_VS : 0;
write_sysreg(val, vtcr_el2);
}

View File

@ -15,7 +15,53 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include "hyp.h"
#include <linux/types.h>
#include <asm/kvm_asm.h>
#include <asm/kvm_hyp.h>
static bool __hyp_text __fpsimd_enabled_nvhe(void)
{
return !(read_sysreg(cptr_el2) & CPTR_EL2_TFP);
}
static bool __hyp_text __fpsimd_enabled_vhe(void)
{
return !!(read_sysreg(cpacr_el1) & CPACR_EL1_FPEN);
}
static hyp_alternate_select(__fpsimd_is_enabled,
__fpsimd_enabled_nvhe, __fpsimd_enabled_vhe,
ARM64_HAS_VIRT_HOST_EXTN);
bool __hyp_text __fpsimd_enabled(void)
{
return __fpsimd_is_enabled()();
}
static void __hyp_text __activate_traps_vhe(void)
{
u64 val;
val = read_sysreg(cpacr_el1);
val |= CPACR_EL1_TTA;
val &= ~CPACR_EL1_FPEN;
write_sysreg(val, cpacr_el1);
write_sysreg(__kvm_hyp_vector, vbar_el1);
}
static void __hyp_text __activate_traps_nvhe(void)
{
u64 val;
val = CPTR_EL2_DEFAULT;
val |= CPTR_EL2_TTA | CPTR_EL2_TFP;
write_sysreg(val, cptr_el2);
}
static hyp_alternate_select(__activate_traps_arch,
__activate_traps_nvhe, __activate_traps_vhe,
ARM64_HAS_VIRT_HOST_EXTN);
static void __hyp_text __activate_traps(struct kvm_vcpu *vcpu)
{
@ -36,20 +82,37 @@ static void __hyp_text __activate_traps(struct kvm_vcpu *vcpu)
write_sysreg(val, hcr_el2);
/* Trap on AArch32 cp15 c15 accesses (EL1 or EL0) */
write_sysreg(1 << 15, hstr_el2);
val = CPTR_EL2_DEFAULT;
val |= CPTR_EL2_TTA | CPTR_EL2_TFP;
write_sysreg(val, cptr_el2);
/* Make sure we trap PMU access from EL0 to EL2 */
write_sysreg(ARMV8_PMU_USERENR_MASK, pmuserenr_el0);
write_sysreg(vcpu->arch.mdcr_el2, mdcr_el2);
__activate_traps_arch()();
}
static void __hyp_text __deactivate_traps_vhe(void)
{
extern char vectors[]; /* kernel exception vectors */
write_sysreg(HCR_HOST_VHE_FLAGS, hcr_el2);
write_sysreg(CPACR_EL1_FPEN, cpacr_el1);
write_sysreg(vectors, vbar_el1);
}
static void __hyp_text __deactivate_traps_nvhe(void)
{
write_sysreg(HCR_RW, hcr_el2);
write_sysreg(CPTR_EL2_DEFAULT, cptr_el2);
}
static hyp_alternate_select(__deactivate_traps_arch,
__deactivate_traps_nvhe, __deactivate_traps_vhe,
ARM64_HAS_VIRT_HOST_EXTN);
static void __hyp_text __deactivate_traps(struct kvm_vcpu *vcpu)
{
write_sysreg(HCR_RW, hcr_el2);
__deactivate_traps_arch()();
write_sysreg(0, hstr_el2);
write_sysreg(read_sysreg(mdcr_el2) & MDCR_EL2_HPMN_MASK, mdcr_el2);
write_sysreg(CPTR_EL2_DEFAULT, cptr_el2);
write_sysreg(0, pmuserenr_el0);
}
static void __hyp_text __activate_vm(struct kvm_vcpu *vcpu)
@ -89,6 +152,86 @@ static void __hyp_text __vgic_restore_state(struct kvm_vcpu *vcpu)
__vgic_call_restore_state()(vcpu);
}
static bool __hyp_text __true_value(void)
{
return true;
}
static bool __hyp_text __false_value(void)
{
return false;
}
static hyp_alternate_select(__check_arm_834220,
__false_value, __true_value,
ARM64_WORKAROUND_834220);
static bool __hyp_text __translate_far_to_hpfar(u64 far, u64 *hpfar)
{
u64 par, tmp;
/*
* Resolve the IPA the hard way using the guest VA.
*
* Stage-1 translation already validated the memory access
* rights. As such, we can use the EL1 translation regime, and
* don't have to distinguish between EL0 and EL1 access.
*
* We do need to save/restore PAR_EL1 though, as we haven't
* saved the guest context yet, and we may return early...
*/
par = read_sysreg(par_el1);
asm volatile("at s1e1r, %0" : : "r" (far));
isb();
tmp = read_sysreg(par_el1);
write_sysreg(par, par_el1);
if (unlikely(tmp & 1))
return false; /* Translation failed, back to guest */
/* Convert PAR to HPFAR format */
*hpfar = ((tmp >> 12) & ((1UL << 36) - 1)) << 4;
return true;
}
static bool __hyp_text __populate_fault_info(struct kvm_vcpu *vcpu)
{
u64 esr = read_sysreg_el2(esr);
u8 ec = esr >> ESR_ELx_EC_SHIFT;
u64 hpfar, far;
vcpu->arch.fault.esr_el2 = esr;
if (ec != ESR_ELx_EC_DABT_LOW && ec != ESR_ELx_EC_IABT_LOW)
return true;
far = read_sysreg_el2(far);
/*
* The HPFAR can be invalid if the stage 2 fault did not
* happen during a stage 1 page table walk (the ESR_EL2.S1PTW
* bit is clear) and one of the two following cases are true:
* 1. The fault was due to a permission fault
* 2. The processor carries errata 834220
*
* Therefore, for all non S1PTW faults where we either have a
* permission fault or the errata workaround is enabled, we
* resolve the IPA using the AT instruction.
*/
if (!(esr & ESR_ELx_S1PTW) &&
(__check_arm_834220()() || (esr & ESR_ELx_FSC_TYPE) == FSC_PERM)) {
if (!__translate_far_to_hpfar(far, &hpfar))
return false;
} else {
hpfar = read_sysreg(hpfar_el2);
}
vcpu->arch.fault.far_el2 = far;
vcpu->arch.fault.hpfar_el2 = hpfar;
return true;
}
static int __hyp_text __guest_run(struct kvm_vcpu *vcpu)
{
struct kvm_cpu_context *host_ctxt;
@ -102,7 +245,7 @@ static int __hyp_text __guest_run(struct kvm_vcpu *vcpu)
host_ctxt = kern_hyp_va(vcpu->arch.host_cpu_context);
guest_ctxt = &vcpu->arch.ctxt;
__sysreg_save_state(host_ctxt);
__sysreg_save_host_state(host_ctxt);
__debug_cond_save_host_state(vcpu);
__activate_traps(vcpu);
@ -116,16 +259,20 @@ static int __hyp_text __guest_run(struct kvm_vcpu *vcpu)
* to Cortex-A57 erratum #852523.
*/
__sysreg32_restore_state(vcpu);
__sysreg_restore_state(guest_ctxt);
__sysreg_restore_guest_state(guest_ctxt);
__debug_restore_state(vcpu, kern_hyp_va(vcpu->arch.debug_ptr), guest_ctxt);
/* Jump in the fire! */
again:
exit_code = __guest_enter(vcpu, host_ctxt);
/* And we're baaack! */
if (exit_code == ARM_EXCEPTION_TRAP && !__populate_fault_info(vcpu))
goto again;
fp_enabled = __fpsimd_enabled();
__sysreg_save_state(guest_ctxt);
__sysreg_save_guest_state(guest_ctxt);
__sysreg32_save_state(vcpu);
__timer_save_state(vcpu);
__vgic_save_state(vcpu);
@ -133,7 +280,7 @@ static int __hyp_text __guest_run(struct kvm_vcpu *vcpu)
__deactivate_traps(vcpu);
__deactivate_vm(vcpu);
__sysreg_restore_state(host_ctxt);
__sysreg_restore_host_state(host_ctxt);
if (fp_enabled) {
__fpsimd_save_state(&guest_ctxt->gp_regs.fp_regs);
@ -150,11 +297,34 @@ __alias(__guest_run) int __kvm_vcpu_run(struct kvm_vcpu *vcpu);
static const char __hyp_panic_string[] = "HYP panic:\nPS:%08llx PC:%016llx ESR:%08llx\nFAR:%016llx HPFAR:%016llx PAR:%016llx\nVCPU:%p\n";
void __hyp_text __noreturn __hyp_panic(void)
static void __hyp_text __hyp_call_panic_nvhe(u64 spsr, u64 elr, u64 par)
{
unsigned long str_va = (unsigned long)__hyp_panic_string;
u64 spsr = read_sysreg(spsr_el2);
u64 elr = read_sysreg(elr_el2);
__hyp_do_panic(hyp_kern_va(str_va),
spsr, elr,
read_sysreg(esr_el2), read_sysreg_el2(far),
read_sysreg(hpfar_el2), par,
(void *)read_sysreg(tpidr_el2));
}
static void __hyp_text __hyp_call_panic_vhe(u64 spsr, u64 elr, u64 par)
{
panic(__hyp_panic_string,
spsr, elr,
read_sysreg_el2(esr), read_sysreg_el2(far),
read_sysreg(hpfar_el2), par,
(void *)read_sysreg(tpidr_el2));
}
static hyp_alternate_select(__hyp_call_panic,
__hyp_call_panic_nvhe, __hyp_call_panic_vhe,
ARM64_HAS_VIRT_HOST_EXTN);
void __hyp_text __noreturn __hyp_panic(void)
{
u64 spsr = read_sysreg_el2(spsr);
u64 elr = read_sysreg_el2(elr);
u64 par = read_sysreg(par_el1);
if (read_sysreg(vttbr_el2)) {
@ -165,15 +335,11 @@ void __hyp_text __noreturn __hyp_panic(void)
host_ctxt = kern_hyp_va(vcpu->arch.host_cpu_context);
__deactivate_traps(vcpu);
__deactivate_vm(vcpu);
__sysreg_restore_state(host_ctxt);
__sysreg_restore_host_state(host_ctxt);
}
/* Call panic for real */
__hyp_do_panic(hyp_kern_va(str_va),
spsr, elr,
read_sysreg(esr_el2), read_sysreg(far_el2),
read_sysreg(hpfar_el2), par,
(void *)read_sysreg(tpidr_el2));
__hyp_call_panic()(spsr, elr, par);
unreachable();
}

View File

@ -19,75 +19,122 @@
#include <linux/kvm_host.h>
#include <asm/kvm_asm.h>
#include <asm/kvm_mmu.h>
#include <asm/kvm_hyp.h>
#include "hyp.h"
/* Yes, this does nothing, on purpose */
static void __hyp_text __sysreg_do_nothing(struct kvm_cpu_context *ctxt) { }
/* ctxt is already in the HYP VA space */
void __hyp_text __sysreg_save_state(struct kvm_cpu_context *ctxt)
/*
* Non-VHE: Both host and guest must save everything.
*
* VHE: Host must save tpidr*_el[01], actlr_el1, sp0, pc, pstate, and
* guest must save everything.
*/
static void __hyp_text __sysreg_save_common_state(struct kvm_cpu_context *ctxt)
{
ctxt->sys_regs[MPIDR_EL1] = read_sysreg(vmpidr_el2);
ctxt->sys_regs[CSSELR_EL1] = read_sysreg(csselr_el1);
ctxt->sys_regs[SCTLR_EL1] = read_sysreg(sctlr_el1);
ctxt->sys_regs[ACTLR_EL1] = read_sysreg(actlr_el1);
ctxt->sys_regs[CPACR_EL1] = read_sysreg(cpacr_el1);
ctxt->sys_regs[TTBR0_EL1] = read_sysreg(ttbr0_el1);
ctxt->sys_regs[TTBR1_EL1] = read_sysreg(ttbr1_el1);
ctxt->sys_regs[TCR_EL1] = read_sysreg(tcr_el1);
ctxt->sys_regs[ESR_EL1] = read_sysreg(esr_el1);
ctxt->sys_regs[AFSR0_EL1] = read_sysreg(afsr0_el1);
ctxt->sys_regs[AFSR1_EL1] = read_sysreg(afsr1_el1);
ctxt->sys_regs[FAR_EL1] = read_sysreg(far_el1);
ctxt->sys_regs[MAIR_EL1] = read_sysreg(mair_el1);
ctxt->sys_regs[VBAR_EL1] = read_sysreg(vbar_el1);
ctxt->sys_regs[CONTEXTIDR_EL1] = read_sysreg(contextidr_el1);
ctxt->sys_regs[TPIDR_EL0] = read_sysreg(tpidr_el0);
ctxt->sys_regs[TPIDRRO_EL0] = read_sysreg(tpidrro_el0);
ctxt->sys_regs[TPIDR_EL1] = read_sysreg(tpidr_el1);
ctxt->sys_regs[AMAIR_EL1] = read_sysreg(amair_el1);
ctxt->sys_regs[CNTKCTL_EL1] = read_sysreg(cntkctl_el1);
ctxt->gp_regs.regs.sp = read_sysreg(sp_el0);
ctxt->gp_regs.regs.pc = read_sysreg_el2(elr);
ctxt->gp_regs.regs.pstate = read_sysreg_el2(spsr);
}
static void __hyp_text __sysreg_save_state(struct kvm_cpu_context *ctxt)
{
ctxt->sys_regs[MPIDR_EL1] = read_sysreg(vmpidr_el2);
ctxt->sys_regs[CSSELR_EL1] = read_sysreg(csselr_el1);
ctxt->sys_regs[SCTLR_EL1] = read_sysreg_el1(sctlr);
ctxt->sys_regs[CPACR_EL1] = read_sysreg_el1(cpacr);
ctxt->sys_regs[TTBR0_EL1] = read_sysreg_el1(ttbr0);
ctxt->sys_regs[TTBR1_EL1] = read_sysreg_el1(ttbr1);
ctxt->sys_regs[TCR_EL1] = read_sysreg_el1(tcr);
ctxt->sys_regs[ESR_EL1] = read_sysreg_el1(esr);
ctxt->sys_regs[AFSR0_EL1] = read_sysreg_el1(afsr0);
ctxt->sys_regs[AFSR1_EL1] = read_sysreg_el1(afsr1);
ctxt->sys_regs[FAR_EL1] = read_sysreg_el1(far);
ctxt->sys_regs[MAIR_EL1] = read_sysreg_el1(mair);
ctxt->sys_regs[VBAR_EL1] = read_sysreg_el1(vbar);
ctxt->sys_regs[CONTEXTIDR_EL1] = read_sysreg_el1(contextidr);
ctxt->sys_regs[AMAIR_EL1] = read_sysreg_el1(amair);
ctxt->sys_regs[CNTKCTL_EL1] = read_sysreg_el1(cntkctl);
ctxt->sys_regs[PAR_EL1] = read_sysreg(par_el1);
ctxt->sys_regs[MDSCR_EL1] = read_sysreg(mdscr_el1);
ctxt->gp_regs.regs.sp = read_sysreg(sp_el0);
ctxt->gp_regs.regs.pc = read_sysreg(elr_el2);
ctxt->gp_regs.regs.pstate = read_sysreg(spsr_el2);
ctxt->gp_regs.sp_el1 = read_sysreg(sp_el1);
ctxt->gp_regs.elr_el1 = read_sysreg(elr_el1);
ctxt->gp_regs.spsr[KVM_SPSR_EL1]= read_sysreg(spsr_el1);
ctxt->gp_regs.elr_el1 = read_sysreg_el1(elr);
ctxt->gp_regs.spsr[KVM_SPSR_EL1]= read_sysreg_el1(spsr);
}
void __hyp_text __sysreg_restore_state(struct kvm_cpu_context *ctxt)
static hyp_alternate_select(__sysreg_call_save_host_state,
__sysreg_save_state, __sysreg_do_nothing,
ARM64_HAS_VIRT_HOST_EXTN);
void __hyp_text __sysreg_save_host_state(struct kvm_cpu_context *ctxt)
{
__sysreg_call_save_host_state()(ctxt);
__sysreg_save_common_state(ctxt);
}
void __hyp_text __sysreg_save_guest_state(struct kvm_cpu_context *ctxt)
{
__sysreg_save_state(ctxt);
__sysreg_save_common_state(ctxt);
}
static void __hyp_text __sysreg_restore_common_state(struct kvm_cpu_context *ctxt)
{
write_sysreg(ctxt->sys_regs[MPIDR_EL1], vmpidr_el2);
write_sysreg(ctxt->sys_regs[CSSELR_EL1], csselr_el1);
write_sysreg(ctxt->sys_regs[SCTLR_EL1], sctlr_el1);
write_sysreg(ctxt->sys_regs[ACTLR_EL1], actlr_el1);
write_sysreg(ctxt->sys_regs[CPACR_EL1], cpacr_el1);
write_sysreg(ctxt->sys_regs[TTBR0_EL1], ttbr0_el1);
write_sysreg(ctxt->sys_regs[TTBR1_EL1], ttbr1_el1);
write_sysreg(ctxt->sys_regs[TCR_EL1], tcr_el1);
write_sysreg(ctxt->sys_regs[ESR_EL1], esr_el1);
write_sysreg(ctxt->sys_regs[AFSR0_EL1], afsr0_el1);
write_sysreg(ctxt->sys_regs[AFSR1_EL1], afsr1_el1);
write_sysreg(ctxt->sys_regs[FAR_EL1], far_el1);
write_sysreg(ctxt->sys_regs[MAIR_EL1], mair_el1);
write_sysreg(ctxt->sys_regs[VBAR_EL1], vbar_el1);
write_sysreg(ctxt->sys_regs[CONTEXTIDR_EL1], contextidr_el1);
write_sysreg(ctxt->sys_regs[TPIDR_EL0], tpidr_el0);
write_sysreg(ctxt->sys_regs[TPIDRRO_EL0], tpidrro_el0);
write_sysreg(ctxt->sys_regs[TPIDR_EL1], tpidr_el1);
write_sysreg(ctxt->sys_regs[AMAIR_EL1], amair_el1);
write_sysreg(ctxt->sys_regs[CNTKCTL_EL1], cntkctl_el1);
write_sysreg(ctxt->sys_regs[PAR_EL1], par_el1);
write_sysreg(ctxt->sys_regs[MDSCR_EL1], mdscr_el1);
write_sysreg(ctxt->gp_regs.regs.sp, sp_el0);
write_sysreg_el2(ctxt->gp_regs.regs.pc, elr);
write_sysreg_el2(ctxt->gp_regs.regs.pstate, spsr);
}
write_sysreg(ctxt->gp_regs.regs.sp, sp_el0);
write_sysreg(ctxt->gp_regs.regs.pc, elr_el2);
write_sysreg(ctxt->gp_regs.regs.pstate, spsr_el2);
write_sysreg(ctxt->gp_regs.sp_el1, sp_el1);
write_sysreg(ctxt->gp_regs.elr_el1, elr_el1);
write_sysreg(ctxt->gp_regs.spsr[KVM_SPSR_EL1], spsr_el1);
static void __hyp_text __sysreg_restore_state(struct kvm_cpu_context *ctxt)
{
write_sysreg(ctxt->sys_regs[MPIDR_EL1], vmpidr_el2);
write_sysreg(ctxt->sys_regs[CSSELR_EL1], csselr_el1);
write_sysreg_el1(ctxt->sys_regs[SCTLR_EL1], sctlr);
write_sysreg_el1(ctxt->sys_regs[CPACR_EL1], cpacr);
write_sysreg_el1(ctxt->sys_regs[TTBR0_EL1], ttbr0);
write_sysreg_el1(ctxt->sys_regs[TTBR1_EL1], ttbr1);
write_sysreg_el1(ctxt->sys_regs[TCR_EL1], tcr);
write_sysreg_el1(ctxt->sys_regs[ESR_EL1], esr);
write_sysreg_el1(ctxt->sys_regs[AFSR0_EL1], afsr0);
write_sysreg_el1(ctxt->sys_regs[AFSR1_EL1], afsr1);
write_sysreg_el1(ctxt->sys_regs[FAR_EL1], far);
write_sysreg_el1(ctxt->sys_regs[MAIR_EL1], mair);
write_sysreg_el1(ctxt->sys_regs[VBAR_EL1], vbar);
write_sysreg_el1(ctxt->sys_regs[CONTEXTIDR_EL1],contextidr);
write_sysreg_el1(ctxt->sys_regs[AMAIR_EL1], amair);
write_sysreg_el1(ctxt->sys_regs[CNTKCTL_EL1], cntkctl);
write_sysreg(ctxt->sys_regs[PAR_EL1], par_el1);
write_sysreg(ctxt->sys_regs[MDSCR_EL1], mdscr_el1);
write_sysreg(ctxt->gp_regs.sp_el1, sp_el1);
write_sysreg_el1(ctxt->gp_regs.elr_el1, elr);
write_sysreg_el1(ctxt->gp_regs.spsr[KVM_SPSR_EL1],spsr);
}
static hyp_alternate_select(__sysreg_call_restore_host_state,
__sysreg_restore_state, __sysreg_do_nothing,
ARM64_HAS_VIRT_HOST_EXTN);
void __hyp_text __sysreg_restore_host_state(struct kvm_cpu_context *ctxt)
{
__sysreg_call_restore_host_state()(ctxt);
__sysreg_restore_common_state(ctxt);
}
void __hyp_text __sysreg_restore_guest_state(struct kvm_cpu_context *ctxt)
{
__sysreg_restore_state(ctxt);
__sysreg_restore_common_state(ctxt);
}
void __hyp_text __sysreg32_save_state(struct kvm_vcpu *vcpu)

View File

@ -15,7 +15,7 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include "hyp.h"
#include <asm/kvm_hyp.h>
static void __hyp_text __tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa)
{

View File

@ -1,84 +0,0 @@
/*
* Copyright (C) 2012-2015 - ARM Ltd
* Author: Marc Zyngier <marc.zyngier@arm.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <linux/compiler.h>
#include <linux/irqchip/arm-gic.h>
#include <linux/kvm_host.h>
#include <asm/kvm_mmu.h>
#include "hyp.h"
/* vcpu is already in the HYP VA space */
void __hyp_text __vgic_v2_save_state(struct kvm_vcpu *vcpu)
{
struct kvm *kvm = kern_hyp_va(vcpu->kvm);
struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2;
struct vgic_dist *vgic = &kvm->arch.vgic;
void __iomem *base = kern_hyp_va(vgic->vctrl_base);
u32 eisr0, eisr1, elrsr0, elrsr1;
int i, nr_lr;
if (!base)
return;
nr_lr = vcpu->arch.vgic_cpu.nr_lr;
cpu_if->vgic_vmcr = readl_relaxed(base + GICH_VMCR);
cpu_if->vgic_misr = readl_relaxed(base + GICH_MISR);
eisr0 = readl_relaxed(base + GICH_EISR0);
elrsr0 = readl_relaxed(base + GICH_ELRSR0);
if (unlikely(nr_lr > 32)) {
eisr1 = readl_relaxed(base + GICH_EISR1);
elrsr1 = readl_relaxed(base + GICH_ELRSR1);
} else {
eisr1 = elrsr1 = 0;
}
#ifdef CONFIG_CPU_BIG_ENDIAN
cpu_if->vgic_eisr = ((u64)eisr0 << 32) | eisr1;
cpu_if->vgic_elrsr = ((u64)elrsr0 << 32) | elrsr1;
#else
cpu_if->vgic_eisr = ((u64)eisr1 << 32) | eisr0;
cpu_if->vgic_elrsr = ((u64)elrsr1 << 32) | elrsr0;
#endif
cpu_if->vgic_apr = readl_relaxed(base + GICH_APR);
writel_relaxed(0, base + GICH_HCR);
for (i = 0; i < nr_lr; i++)
cpu_if->vgic_lr[i] = readl_relaxed(base + GICH_LR0 + (i * 4));
}
/* vcpu is already in the HYP VA space */
void __hyp_text __vgic_v2_restore_state(struct kvm_vcpu *vcpu)
{
struct kvm *kvm = kern_hyp_va(vcpu->kvm);
struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2;
struct vgic_dist *vgic = &kvm->arch.vgic;
void __iomem *base = kern_hyp_va(vgic->vctrl_base);
int i, nr_lr;
if (!base)
return;
writel_relaxed(cpu_if->vgic_hcr, base + GICH_HCR);
writel_relaxed(cpu_if->vgic_vmcr, base + GICH_VMCR);
writel_relaxed(cpu_if->vgic_apr, base + GICH_APR);
nr_lr = vcpu->arch.vgic_cpu.nr_lr;
for (i = 0; i < nr_lr; i++)
writel_relaxed(cpu_if->vgic_lr[i], base + GICH_LR0 + (i * 4));
}

View File

@ -19,9 +19,7 @@
#include <linux/irqchip/arm-gic-v3.h>
#include <linux/kvm_host.h>
#include <asm/kvm_mmu.h>
#include "hyp.h"
#include <asm/kvm_hyp.h>
#define vtr_to_max_lr_idx(v) ((v) & 0xf)
#define vtr_to_nr_pri_bits(v) (((u32)(v) >> 29) + 1)
@ -39,12 +37,133 @@
asm volatile("msr_s " __stringify(r) ", %0" : : "r" (__val));\
} while (0)
/* vcpu is already in the HYP VA space */
static u64 __hyp_text __gic_v3_get_lr(unsigned int lr)
{
switch (lr & 0xf) {
case 0:
return read_gicreg(ICH_LR0_EL2);
case 1:
return read_gicreg(ICH_LR1_EL2);
case 2:
return read_gicreg(ICH_LR2_EL2);
case 3:
return read_gicreg(ICH_LR3_EL2);
case 4:
return read_gicreg(ICH_LR4_EL2);
case 5:
return read_gicreg(ICH_LR5_EL2);
case 6:
return read_gicreg(ICH_LR6_EL2);
case 7:
return read_gicreg(ICH_LR7_EL2);
case 8:
return read_gicreg(ICH_LR8_EL2);
case 9:
return read_gicreg(ICH_LR9_EL2);
case 10:
return read_gicreg(ICH_LR10_EL2);
case 11:
return read_gicreg(ICH_LR11_EL2);
case 12:
return read_gicreg(ICH_LR12_EL2);
case 13:
return read_gicreg(ICH_LR13_EL2);
case 14:
return read_gicreg(ICH_LR14_EL2);
case 15:
return read_gicreg(ICH_LR15_EL2);
}
unreachable();
}
static void __hyp_text __gic_v3_set_lr(u64 val, int lr)
{
switch (lr & 0xf) {
case 0:
write_gicreg(val, ICH_LR0_EL2);
break;
case 1:
write_gicreg(val, ICH_LR1_EL2);
break;
case 2:
write_gicreg(val, ICH_LR2_EL2);
break;
case 3:
write_gicreg(val, ICH_LR3_EL2);
break;
case 4:
write_gicreg(val, ICH_LR4_EL2);
break;
case 5:
write_gicreg(val, ICH_LR5_EL2);
break;
case 6:
write_gicreg(val, ICH_LR6_EL2);
break;
case 7:
write_gicreg(val, ICH_LR7_EL2);
break;
case 8:
write_gicreg(val, ICH_LR8_EL2);
break;
case 9:
write_gicreg(val, ICH_LR9_EL2);
break;
case 10:
write_gicreg(val, ICH_LR10_EL2);
break;
case 11:
write_gicreg(val, ICH_LR11_EL2);
break;
case 12:
write_gicreg(val, ICH_LR12_EL2);
break;
case 13:
write_gicreg(val, ICH_LR13_EL2);
break;
case 14:
write_gicreg(val, ICH_LR14_EL2);
break;
case 15:
write_gicreg(val, ICH_LR15_EL2);
break;
}
}
static void __hyp_text save_maint_int_state(struct kvm_vcpu *vcpu, int nr_lr)
{
struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3;
int i;
bool expect_mi;
expect_mi = !!(cpu_if->vgic_hcr & ICH_HCR_UIE);
for (i = 0; i < nr_lr; i++) {
if (!(vcpu->arch.vgic_cpu.live_lrs & (1UL << i)))
continue;
expect_mi |= (!(cpu_if->vgic_lr[i] & ICH_LR_HW) &&
(cpu_if->vgic_lr[i] & ICH_LR_EOI));
}
if (expect_mi) {
cpu_if->vgic_misr = read_gicreg(ICH_MISR_EL2);
if (cpu_if->vgic_misr & ICH_MISR_EOI)
cpu_if->vgic_eisr = read_gicreg(ICH_EISR_EL2);
else
cpu_if->vgic_eisr = 0;
} else {
cpu_if->vgic_misr = 0;
cpu_if->vgic_eisr = 0;
}
}
void __hyp_text __vgic_v3_save_state(struct kvm_vcpu *vcpu)
{
struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3;
u64 val;
u32 max_lr_idx, nr_pri_bits;
/*
* Make sure stores to the GIC via the memory mapped interface
@ -53,68 +172,66 @@ void __hyp_text __vgic_v3_save_state(struct kvm_vcpu *vcpu)
dsb(st);
cpu_if->vgic_vmcr = read_gicreg(ICH_VMCR_EL2);
cpu_if->vgic_misr = read_gicreg(ICH_MISR_EL2);
cpu_if->vgic_eisr = read_gicreg(ICH_EISR_EL2);
cpu_if->vgic_elrsr = read_gicreg(ICH_ELSR_EL2);
write_gicreg(0, ICH_HCR_EL2);
val = read_gicreg(ICH_VTR_EL2);
max_lr_idx = vtr_to_max_lr_idx(val);
nr_pri_bits = vtr_to_nr_pri_bits(val);
if (vcpu->arch.vgic_cpu.live_lrs) {
int i;
u32 max_lr_idx, nr_pri_bits;
switch (max_lr_idx) {
case 15:
cpu_if->vgic_lr[VGIC_V3_LR_INDEX(15)] = read_gicreg(ICH_LR15_EL2);
case 14:
cpu_if->vgic_lr[VGIC_V3_LR_INDEX(14)] = read_gicreg(ICH_LR14_EL2);
case 13:
cpu_if->vgic_lr[VGIC_V3_LR_INDEX(13)] = read_gicreg(ICH_LR13_EL2);
case 12:
cpu_if->vgic_lr[VGIC_V3_LR_INDEX(12)] = read_gicreg(ICH_LR12_EL2);
case 11:
cpu_if->vgic_lr[VGIC_V3_LR_INDEX(11)] = read_gicreg(ICH_LR11_EL2);
case 10:
cpu_if->vgic_lr[VGIC_V3_LR_INDEX(10)] = read_gicreg(ICH_LR10_EL2);
case 9:
cpu_if->vgic_lr[VGIC_V3_LR_INDEX(9)] = read_gicreg(ICH_LR9_EL2);
case 8:
cpu_if->vgic_lr[VGIC_V3_LR_INDEX(8)] = read_gicreg(ICH_LR8_EL2);
case 7:
cpu_if->vgic_lr[VGIC_V3_LR_INDEX(7)] = read_gicreg(ICH_LR7_EL2);
case 6:
cpu_if->vgic_lr[VGIC_V3_LR_INDEX(6)] = read_gicreg(ICH_LR6_EL2);
case 5:
cpu_if->vgic_lr[VGIC_V3_LR_INDEX(5)] = read_gicreg(ICH_LR5_EL2);
case 4:
cpu_if->vgic_lr[VGIC_V3_LR_INDEX(4)] = read_gicreg(ICH_LR4_EL2);
case 3:
cpu_if->vgic_lr[VGIC_V3_LR_INDEX(3)] = read_gicreg(ICH_LR3_EL2);
case 2:
cpu_if->vgic_lr[VGIC_V3_LR_INDEX(2)] = read_gicreg(ICH_LR2_EL2);
case 1:
cpu_if->vgic_lr[VGIC_V3_LR_INDEX(1)] = read_gicreg(ICH_LR1_EL2);
case 0:
cpu_if->vgic_lr[VGIC_V3_LR_INDEX(0)] = read_gicreg(ICH_LR0_EL2);
}
cpu_if->vgic_elrsr = read_gicreg(ICH_ELSR_EL2);
switch (nr_pri_bits) {
case 7:
cpu_if->vgic_ap0r[3] = read_gicreg(ICH_AP0R3_EL2);
cpu_if->vgic_ap0r[2] = read_gicreg(ICH_AP0R2_EL2);
case 6:
cpu_if->vgic_ap0r[1] = read_gicreg(ICH_AP0R1_EL2);
default:
cpu_if->vgic_ap0r[0] = read_gicreg(ICH_AP0R0_EL2);
}
write_gicreg(0, ICH_HCR_EL2);
val = read_gicreg(ICH_VTR_EL2);
max_lr_idx = vtr_to_max_lr_idx(val);
nr_pri_bits = vtr_to_nr_pri_bits(val);
switch (nr_pri_bits) {
case 7:
cpu_if->vgic_ap1r[3] = read_gicreg(ICH_AP1R3_EL2);
cpu_if->vgic_ap1r[2] = read_gicreg(ICH_AP1R2_EL2);
case 6:
cpu_if->vgic_ap1r[1] = read_gicreg(ICH_AP1R1_EL2);
default:
cpu_if->vgic_ap1r[0] = read_gicreg(ICH_AP1R0_EL2);
save_maint_int_state(vcpu, max_lr_idx + 1);
for (i = 0; i <= max_lr_idx; i++) {
if (!(vcpu->arch.vgic_cpu.live_lrs & (1UL << i)))
continue;
if (cpu_if->vgic_elrsr & (1 << i)) {
cpu_if->vgic_lr[i] &= ~ICH_LR_STATE;
continue;
}
cpu_if->vgic_lr[i] = __gic_v3_get_lr(i);
__gic_v3_set_lr(0, i);
}
switch (nr_pri_bits) {
case 7:
cpu_if->vgic_ap0r[3] = read_gicreg(ICH_AP0R3_EL2);
cpu_if->vgic_ap0r[2] = read_gicreg(ICH_AP0R2_EL2);
case 6:
cpu_if->vgic_ap0r[1] = read_gicreg(ICH_AP0R1_EL2);
default:
cpu_if->vgic_ap0r[0] = read_gicreg(ICH_AP0R0_EL2);
}
switch (nr_pri_bits) {
case 7:
cpu_if->vgic_ap1r[3] = read_gicreg(ICH_AP1R3_EL2);
cpu_if->vgic_ap1r[2] = read_gicreg(ICH_AP1R2_EL2);
case 6:
cpu_if->vgic_ap1r[1] = read_gicreg(ICH_AP1R1_EL2);
default:
cpu_if->vgic_ap1r[0] = read_gicreg(ICH_AP1R0_EL2);
}
vcpu->arch.vgic_cpu.live_lrs = 0;
} else {
cpu_if->vgic_misr = 0;
cpu_if->vgic_eisr = 0;
cpu_if->vgic_elrsr = 0xffff;
cpu_if->vgic_ap0r[0] = 0;
cpu_if->vgic_ap0r[1] = 0;
cpu_if->vgic_ap0r[2] = 0;
cpu_if->vgic_ap0r[3] = 0;
cpu_if->vgic_ap1r[0] = 0;
cpu_if->vgic_ap1r[1] = 0;
cpu_if->vgic_ap1r[2] = 0;
cpu_if->vgic_ap1r[3] = 0;
}
val = read_gicreg(ICC_SRE_EL2);
@ -128,6 +245,8 @@ void __hyp_text __vgic_v3_restore_state(struct kvm_vcpu *vcpu)
struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3;
u64 val;
u32 max_lr_idx, nr_pri_bits;
u16 live_lrs = 0;
int i;
/*
* VFIQEn is RES1 if ICC_SRE_EL1.SRE is 1. This causes a
@ -140,66 +259,46 @@ void __hyp_text __vgic_v3_restore_state(struct kvm_vcpu *vcpu)
write_gicreg(cpu_if->vgic_sre, ICC_SRE_EL1);
isb();
write_gicreg(cpu_if->vgic_hcr, ICH_HCR_EL2);
write_gicreg(cpu_if->vgic_vmcr, ICH_VMCR_EL2);
val = read_gicreg(ICH_VTR_EL2);
max_lr_idx = vtr_to_max_lr_idx(val);
nr_pri_bits = vtr_to_nr_pri_bits(val);
switch (nr_pri_bits) {
case 7:
write_gicreg(cpu_if->vgic_ap0r[3], ICH_AP0R3_EL2);
write_gicreg(cpu_if->vgic_ap0r[2], ICH_AP0R2_EL2);
case 6:
write_gicreg(cpu_if->vgic_ap0r[1], ICH_AP0R1_EL2);
default:
write_gicreg(cpu_if->vgic_ap0r[0], ICH_AP0R0_EL2);
for (i = 0; i <= max_lr_idx; i++) {
if (cpu_if->vgic_lr[i] & ICH_LR_STATE)
live_lrs |= (1 << i);
}
switch (nr_pri_bits) {
case 7:
write_gicreg(cpu_if->vgic_ap1r[3], ICH_AP1R3_EL2);
write_gicreg(cpu_if->vgic_ap1r[2], ICH_AP1R2_EL2);
case 6:
write_gicreg(cpu_if->vgic_ap1r[1], ICH_AP1R1_EL2);
default:
write_gicreg(cpu_if->vgic_ap1r[0], ICH_AP1R0_EL2);
}
write_gicreg(cpu_if->vgic_vmcr, ICH_VMCR_EL2);
switch (max_lr_idx) {
case 15:
write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(15)], ICH_LR15_EL2);
case 14:
write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(14)], ICH_LR14_EL2);
case 13:
write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(13)], ICH_LR13_EL2);
case 12:
write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(12)], ICH_LR12_EL2);
case 11:
write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(11)], ICH_LR11_EL2);
case 10:
write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(10)], ICH_LR10_EL2);
case 9:
write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(9)], ICH_LR9_EL2);
case 8:
write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(8)], ICH_LR8_EL2);
case 7:
write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(7)], ICH_LR7_EL2);
case 6:
write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(6)], ICH_LR6_EL2);
case 5:
write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(5)], ICH_LR5_EL2);
case 4:
write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(4)], ICH_LR4_EL2);
case 3:
write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(3)], ICH_LR3_EL2);
case 2:
write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(2)], ICH_LR2_EL2);
case 1:
write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(1)], ICH_LR1_EL2);
case 0:
write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(0)], ICH_LR0_EL2);
if (live_lrs) {
write_gicreg(cpu_if->vgic_hcr, ICH_HCR_EL2);
switch (nr_pri_bits) {
case 7:
write_gicreg(cpu_if->vgic_ap0r[3], ICH_AP0R3_EL2);
write_gicreg(cpu_if->vgic_ap0r[2], ICH_AP0R2_EL2);
case 6:
write_gicreg(cpu_if->vgic_ap0r[1], ICH_AP0R1_EL2);
default:
write_gicreg(cpu_if->vgic_ap0r[0], ICH_AP0R0_EL2);
}
switch (nr_pri_bits) {
case 7:
write_gicreg(cpu_if->vgic_ap1r[3], ICH_AP1R3_EL2);
write_gicreg(cpu_if->vgic_ap1r[2], ICH_AP1R2_EL2);
case 6:
write_gicreg(cpu_if->vgic_ap1r[1], ICH_AP1R1_EL2);
default:
write_gicreg(cpu_if->vgic_ap1r[0], ICH_AP1R0_EL2);
}
for (i = 0; i <= max_lr_idx; i++) {
if (!(live_lrs & (1 << i)))
continue;
__gic_v3_set_lr(cpu_if->vgic_lr[i], i);
}
}
/*
@ -209,6 +308,7 @@ void __hyp_text __vgic_v3_restore_state(struct kvm_vcpu *vcpu)
*/
isb();
dsb(sy);
vcpu->arch.vgic_cpu.live_lrs = live_lrs;
/*
* Prevent the guest from touching the GIC system registers if
@ -220,6 +320,15 @@ void __hyp_text __vgic_v3_restore_state(struct kvm_vcpu *vcpu)
}
}
void __hyp_text __vgic_v3_init_lrs(void)
{
int max_lr_idx = vtr_to_max_lr_idx(read_gicreg(ICH_VTR_EL2));
int i;
for (i = 0; i <= max_lr_idx; i++)
__gic_v3_set_lr(0, i);
}
static u64 __hyp_text __vgic_v3_read_ich_vtr_el2(void)
{
return read_gicreg(ICH_VTR_EL2);

View File

@ -77,7 +77,11 @@ int kvm_arch_dev_ioctl_check_extension(long ext)
case KVM_CAP_GUEST_DEBUG_HW_WPS:
r = get_num_wrps();
break;
case KVM_CAP_ARM_PMU_V3:
r = kvm_arm_support_pmu_v3();
break;
case KVM_CAP_SET_GUEST_DEBUG:
case KVM_CAP_VCPU_ATTRIBUTES:
r = 1;
break;
default:
@ -120,6 +124,9 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu)
/* Reset system registers */
kvm_reset_sys_regs(vcpu);
/* Reset PMU */
kvm_pmu_vcpu_reset(vcpu);
/* Reset timer */
return kvm_timer_vcpu_reset(vcpu, cpu_vtimer_irq);
}

View File

@ -20,6 +20,7 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <linux/bsearch.h>
#include <linux/kvm_host.h>
#include <linux/mm.h>
#include <linux/uaccess.h>
@ -34,6 +35,7 @@
#include <asm/kvm_emulate.h>
#include <asm/kvm_host.h>
#include <asm/kvm_mmu.h>
#include <asm/perf_event.h>
#include <trace/events/kvm.h>
@ -439,6 +441,344 @@ static void reset_mpidr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
vcpu_sys_reg(vcpu, MPIDR_EL1) = (1ULL << 31) | mpidr;
}
static void reset_pmcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
{
u64 pmcr, val;
asm volatile("mrs %0, pmcr_el0\n" : "=r" (pmcr));
/* Writable bits of PMCR_EL0 (ARMV8_PMU_PMCR_MASK) is reset to UNKNOWN
* except PMCR.E resetting to zero.
*/
val = ((pmcr & ~ARMV8_PMU_PMCR_MASK)
| (ARMV8_PMU_PMCR_MASK & 0xdecafbad)) & (~ARMV8_PMU_PMCR_E);
vcpu_sys_reg(vcpu, PMCR_EL0) = val;
}
static bool pmu_access_el0_disabled(struct kvm_vcpu *vcpu)
{
u64 reg = vcpu_sys_reg(vcpu, PMUSERENR_EL0);
return !((reg & ARMV8_PMU_USERENR_EN) || vcpu_mode_priv(vcpu));
}
static bool pmu_write_swinc_el0_disabled(struct kvm_vcpu *vcpu)
{
u64 reg = vcpu_sys_reg(vcpu, PMUSERENR_EL0);
return !((reg & (ARMV8_PMU_USERENR_SW | ARMV8_PMU_USERENR_EN))
|| vcpu_mode_priv(vcpu));
}
static bool pmu_access_cycle_counter_el0_disabled(struct kvm_vcpu *vcpu)
{
u64 reg = vcpu_sys_reg(vcpu, PMUSERENR_EL0);
return !((reg & (ARMV8_PMU_USERENR_CR | ARMV8_PMU_USERENR_EN))
|| vcpu_mode_priv(vcpu));
}
static bool pmu_access_event_counter_el0_disabled(struct kvm_vcpu *vcpu)
{
u64 reg = vcpu_sys_reg(vcpu, PMUSERENR_EL0);
return !((reg & (ARMV8_PMU_USERENR_ER | ARMV8_PMU_USERENR_EN))
|| vcpu_mode_priv(vcpu));
}
static bool access_pmcr(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
const struct sys_reg_desc *r)
{
u64 val;
if (!kvm_arm_pmu_v3_ready(vcpu))
return trap_raz_wi(vcpu, p, r);
if (pmu_access_el0_disabled(vcpu))
return false;
if (p->is_write) {
/* Only update writeable bits of PMCR */
val = vcpu_sys_reg(vcpu, PMCR_EL0);
val &= ~ARMV8_PMU_PMCR_MASK;
val |= p->regval & ARMV8_PMU_PMCR_MASK;
vcpu_sys_reg(vcpu, PMCR_EL0) = val;
kvm_pmu_handle_pmcr(vcpu, val);
} else {
/* PMCR.P & PMCR.C are RAZ */
val = vcpu_sys_reg(vcpu, PMCR_EL0)
& ~(ARMV8_PMU_PMCR_P | ARMV8_PMU_PMCR_C);
p->regval = val;
}
return true;
}
static bool access_pmselr(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
const struct sys_reg_desc *r)
{
if (!kvm_arm_pmu_v3_ready(vcpu))
return trap_raz_wi(vcpu, p, r);
if (pmu_access_event_counter_el0_disabled(vcpu))
return false;
if (p->is_write)
vcpu_sys_reg(vcpu, PMSELR_EL0) = p->regval;
else
/* return PMSELR.SEL field */
p->regval = vcpu_sys_reg(vcpu, PMSELR_EL0)
& ARMV8_PMU_COUNTER_MASK;
return true;
}
static bool access_pmceid(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
const struct sys_reg_desc *r)
{
u64 pmceid;
if (!kvm_arm_pmu_v3_ready(vcpu))
return trap_raz_wi(vcpu, p, r);
BUG_ON(p->is_write);
if (pmu_access_el0_disabled(vcpu))
return false;
if (!(p->Op2 & 1))
asm volatile("mrs %0, pmceid0_el0\n" : "=r" (pmceid));
else
asm volatile("mrs %0, pmceid1_el0\n" : "=r" (pmceid));
p->regval = pmceid;
return true;
}
static bool pmu_counter_idx_valid(struct kvm_vcpu *vcpu, u64 idx)
{
u64 pmcr, val;
pmcr = vcpu_sys_reg(vcpu, PMCR_EL0);
val = (pmcr >> ARMV8_PMU_PMCR_N_SHIFT) & ARMV8_PMU_PMCR_N_MASK;
if (idx >= val && idx != ARMV8_PMU_CYCLE_IDX)
return false;
return true;
}
static bool access_pmu_evcntr(struct kvm_vcpu *vcpu,
struct sys_reg_params *p,
const struct sys_reg_desc *r)
{
u64 idx;
if (!kvm_arm_pmu_v3_ready(vcpu))
return trap_raz_wi(vcpu, p, r);
if (r->CRn == 9 && r->CRm == 13) {
if (r->Op2 == 2) {
/* PMXEVCNTR_EL0 */
if (pmu_access_event_counter_el0_disabled(vcpu))
return false;
idx = vcpu_sys_reg(vcpu, PMSELR_EL0)
& ARMV8_PMU_COUNTER_MASK;
} else if (r->Op2 == 0) {
/* PMCCNTR_EL0 */
if (pmu_access_cycle_counter_el0_disabled(vcpu))
return false;
idx = ARMV8_PMU_CYCLE_IDX;
} else {
BUG();
}
} else if (r->CRn == 14 && (r->CRm & 12) == 8) {
/* PMEVCNTRn_EL0 */
if (pmu_access_event_counter_el0_disabled(vcpu))
return false;
idx = ((r->CRm & 3) << 3) | (r->Op2 & 7);
} else {
BUG();
}
if (!pmu_counter_idx_valid(vcpu, idx))
return false;
if (p->is_write) {
if (pmu_access_el0_disabled(vcpu))
return false;
kvm_pmu_set_counter_value(vcpu, idx, p->regval);
} else {
p->regval = kvm_pmu_get_counter_value(vcpu, idx);
}
return true;
}
static bool access_pmu_evtyper(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
const struct sys_reg_desc *r)
{
u64 idx, reg;
if (!kvm_arm_pmu_v3_ready(vcpu))
return trap_raz_wi(vcpu, p, r);
if (pmu_access_el0_disabled(vcpu))
return false;
if (r->CRn == 9 && r->CRm == 13 && r->Op2 == 1) {
/* PMXEVTYPER_EL0 */
idx = vcpu_sys_reg(vcpu, PMSELR_EL0) & ARMV8_PMU_COUNTER_MASK;
reg = PMEVTYPER0_EL0 + idx;
} else if (r->CRn == 14 && (r->CRm & 12) == 12) {
idx = ((r->CRm & 3) << 3) | (r->Op2 & 7);
if (idx == ARMV8_PMU_CYCLE_IDX)
reg = PMCCFILTR_EL0;
else
/* PMEVTYPERn_EL0 */
reg = PMEVTYPER0_EL0 + idx;
} else {
BUG();
}
if (!pmu_counter_idx_valid(vcpu, idx))
return false;
if (p->is_write) {
kvm_pmu_set_counter_event_type(vcpu, p->regval, idx);
vcpu_sys_reg(vcpu, reg) = p->regval & ARMV8_PMU_EVTYPE_MASK;
} else {
p->regval = vcpu_sys_reg(vcpu, reg) & ARMV8_PMU_EVTYPE_MASK;
}
return true;
}
static bool access_pmcnten(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
const struct sys_reg_desc *r)
{
u64 val, mask;
if (!kvm_arm_pmu_v3_ready(vcpu))
return trap_raz_wi(vcpu, p, r);
if (pmu_access_el0_disabled(vcpu))
return false;
mask = kvm_pmu_valid_counter_mask(vcpu);
if (p->is_write) {
val = p->regval & mask;
if (r->Op2 & 0x1) {
/* accessing PMCNTENSET_EL0 */
vcpu_sys_reg(vcpu, PMCNTENSET_EL0) |= val;
kvm_pmu_enable_counter(vcpu, val);
} else {
/* accessing PMCNTENCLR_EL0 */
vcpu_sys_reg(vcpu, PMCNTENSET_EL0) &= ~val;
kvm_pmu_disable_counter(vcpu, val);
}
} else {
p->regval = vcpu_sys_reg(vcpu, PMCNTENSET_EL0) & mask;
}
return true;
}
static bool access_pminten(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
const struct sys_reg_desc *r)
{
u64 mask = kvm_pmu_valid_counter_mask(vcpu);
if (!kvm_arm_pmu_v3_ready(vcpu))
return trap_raz_wi(vcpu, p, r);
if (!vcpu_mode_priv(vcpu))
return false;
if (p->is_write) {
u64 val = p->regval & mask;
if (r->Op2 & 0x1)
/* accessing PMINTENSET_EL1 */
vcpu_sys_reg(vcpu, PMINTENSET_EL1) |= val;
else
/* accessing PMINTENCLR_EL1 */
vcpu_sys_reg(vcpu, PMINTENSET_EL1) &= ~val;
} else {
p->regval = vcpu_sys_reg(vcpu, PMINTENSET_EL1) & mask;
}
return true;
}
static bool access_pmovs(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
const struct sys_reg_desc *r)
{
u64 mask = kvm_pmu_valid_counter_mask(vcpu);
if (!kvm_arm_pmu_v3_ready(vcpu))
return trap_raz_wi(vcpu, p, r);
if (pmu_access_el0_disabled(vcpu))
return false;
if (p->is_write) {
if (r->CRm & 0x2)
/* accessing PMOVSSET_EL0 */
kvm_pmu_overflow_set(vcpu, p->regval & mask);
else
/* accessing PMOVSCLR_EL0 */
vcpu_sys_reg(vcpu, PMOVSSET_EL0) &= ~(p->regval & mask);
} else {
p->regval = vcpu_sys_reg(vcpu, PMOVSSET_EL0) & mask;
}
return true;
}
static bool access_pmswinc(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
const struct sys_reg_desc *r)
{
u64 mask;
if (!kvm_arm_pmu_v3_ready(vcpu))
return trap_raz_wi(vcpu, p, r);
if (pmu_write_swinc_el0_disabled(vcpu))
return false;
if (p->is_write) {
mask = kvm_pmu_valid_counter_mask(vcpu);
kvm_pmu_software_increment(vcpu, p->regval & mask);
return true;
}
return false;
}
static bool access_pmuserenr(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
const struct sys_reg_desc *r)
{
if (!kvm_arm_pmu_v3_ready(vcpu))
return trap_raz_wi(vcpu, p, r);
if (p->is_write) {
if (!vcpu_mode_priv(vcpu))
return false;
vcpu_sys_reg(vcpu, PMUSERENR_EL0) = p->regval
& ARMV8_PMU_USERENR_MASK;
} else {
p->regval = vcpu_sys_reg(vcpu, PMUSERENR_EL0)
& ARMV8_PMU_USERENR_MASK;
}
return true;
}
/* Silly macro to expand the DBG{BCR,BVR,WVR,WCR}n_EL1 registers in one go */
#define DBG_BCR_BVR_WCR_WVR_EL1(n) \
/* DBGBVRn_EL1 */ \
@ -454,6 +794,20 @@ static void reset_mpidr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
{ Op0(0b10), Op1(0b000), CRn(0b0000), CRm((n)), Op2(0b111), \
trap_wcr, reset_wcr, n, 0, get_wcr, set_wcr }
/* Macro to expand the PMEVCNTRn_EL0 register */
#define PMU_PMEVCNTR_EL0(n) \
/* PMEVCNTRn_EL0 */ \
{ Op0(0b11), Op1(0b011), CRn(0b1110), \
CRm((0b1000 | (((n) >> 3) & 0x3))), Op2(((n) & 0x7)), \
access_pmu_evcntr, reset_unknown, (PMEVCNTR0_EL0 + n), }
/* Macro to expand the PMEVTYPERn_EL0 register */
#define PMU_PMEVTYPER_EL0(n) \
/* PMEVTYPERn_EL0 */ \
{ Op0(0b11), Op1(0b011), CRn(0b1110), \
CRm((0b1100 | (((n) >> 3) & 0x3))), Op2(((n) & 0x7)), \
access_pmu_evtyper, reset_unknown, (PMEVTYPER0_EL0 + n), }
/*
* Architected system registers.
* Important: Must be sorted ascending by Op0, Op1, CRn, CRm, Op2
@ -583,10 +937,10 @@ static const struct sys_reg_desc sys_reg_descs[] = {
/* PMINTENSET_EL1 */
{ Op0(0b11), Op1(0b000), CRn(0b1001), CRm(0b1110), Op2(0b001),
trap_raz_wi },
access_pminten, reset_unknown, PMINTENSET_EL1 },
/* PMINTENCLR_EL1 */
{ Op0(0b11), Op1(0b000), CRn(0b1001), CRm(0b1110), Op2(0b010),
trap_raz_wi },
access_pminten, NULL, PMINTENSET_EL1 },
/* MAIR_EL1 */
{ Op0(0b11), Op1(0b000), CRn(0b1010), CRm(0b0010), Op2(0b000),
@ -623,43 +977,46 @@ static const struct sys_reg_desc sys_reg_descs[] = {
/* PMCR_EL0 */
{ Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1100), Op2(0b000),
trap_raz_wi },
access_pmcr, reset_pmcr, },
/* PMCNTENSET_EL0 */
{ Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1100), Op2(0b001),
trap_raz_wi },
access_pmcnten, reset_unknown, PMCNTENSET_EL0 },
/* PMCNTENCLR_EL0 */
{ Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1100), Op2(0b010),
trap_raz_wi },
access_pmcnten, NULL, PMCNTENSET_EL0 },
/* PMOVSCLR_EL0 */
{ Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1100), Op2(0b011),
trap_raz_wi },
access_pmovs, NULL, PMOVSSET_EL0 },
/* PMSWINC_EL0 */
{ Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1100), Op2(0b100),
trap_raz_wi },
access_pmswinc, reset_unknown, PMSWINC_EL0 },
/* PMSELR_EL0 */
{ Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1100), Op2(0b101),
trap_raz_wi },
access_pmselr, reset_unknown, PMSELR_EL0 },
/* PMCEID0_EL0 */
{ Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1100), Op2(0b110),
trap_raz_wi },
access_pmceid },
/* PMCEID1_EL0 */
{ Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1100), Op2(0b111),
trap_raz_wi },
access_pmceid },
/* PMCCNTR_EL0 */
{ Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1101), Op2(0b000),
trap_raz_wi },
access_pmu_evcntr, reset_unknown, PMCCNTR_EL0 },
/* PMXEVTYPER_EL0 */
{ Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1101), Op2(0b001),
trap_raz_wi },
access_pmu_evtyper },
/* PMXEVCNTR_EL0 */
{ Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1101), Op2(0b010),
trap_raz_wi },
/* PMUSERENR_EL0 */
access_pmu_evcntr },
/* PMUSERENR_EL0
* This register resets as unknown in 64bit mode while it resets as zero
* in 32bit mode. Here we choose to reset it as zero for consistency.
*/
{ Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1110), Op2(0b000),
trap_raz_wi },
access_pmuserenr, reset_val, PMUSERENR_EL0, 0 },
/* PMOVSSET_EL0 */
{ Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1110), Op2(0b011),
trap_raz_wi },
access_pmovs, reset_unknown, PMOVSSET_EL0 },
/* TPIDR_EL0 */
{ Op0(0b11), Op1(0b011), CRn(0b1101), CRm(0b0000), Op2(0b010),
@ -668,6 +1025,77 @@ static const struct sys_reg_desc sys_reg_descs[] = {
{ Op0(0b11), Op1(0b011), CRn(0b1101), CRm(0b0000), Op2(0b011),
NULL, reset_unknown, TPIDRRO_EL0 },
/* PMEVCNTRn_EL0 */
PMU_PMEVCNTR_EL0(0),
PMU_PMEVCNTR_EL0(1),
PMU_PMEVCNTR_EL0(2),
PMU_PMEVCNTR_EL0(3),
PMU_PMEVCNTR_EL0(4),
PMU_PMEVCNTR_EL0(5),
PMU_PMEVCNTR_EL0(6),
PMU_PMEVCNTR_EL0(7),
PMU_PMEVCNTR_EL0(8),
PMU_PMEVCNTR_EL0(9),
PMU_PMEVCNTR_EL0(10),
PMU_PMEVCNTR_EL0(11),
PMU_PMEVCNTR_EL0(12),
PMU_PMEVCNTR_EL0(13),
PMU_PMEVCNTR_EL0(14),
PMU_PMEVCNTR_EL0(15),
PMU_PMEVCNTR_EL0(16),
PMU_PMEVCNTR_EL0(17),
PMU_PMEVCNTR_EL0(18),
PMU_PMEVCNTR_EL0(19),
PMU_PMEVCNTR_EL0(20),
PMU_PMEVCNTR_EL0(21),
PMU_PMEVCNTR_EL0(22),
PMU_PMEVCNTR_EL0(23),
PMU_PMEVCNTR_EL0(24),
PMU_PMEVCNTR_EL0(25),
PMU_PMEVCNTR_EL0(26),
PMU_PMEVCNTR_EL0(27),
PMU_PMEVCNTR_EL0(28),
PMU_PMEVCNTR_EL0(29),
PMU_PMEVCNTR_EL0(30),
/* PMEVTYPERn_EL0 */
PMU_PMEVTYPER_EL0(0),
PMU_PMEVTYPER_EL0(1),
PMU_PMEVTYPER_EL0(2),
PMU_PMEVTYPER_EL0(3),
PMU_PMEVTYPER_EL0(4),
PMU_PMEVTYPER_EL0(5),
PMU_PMEVTYPER_EL0(6),
PMU_PMEVTYPER_EL0(7),
PMU_PMEVTYPER_EL0(8),
PMU_PMEVTYPER_EL0(9),
PMU_PMEVTYPER_EL0(10),
PMU_PMEVTYPER_EL0(11),
PMU_PMEVTYPER_EL0(12),
PMU_PMEVTYPER_EL0(13),
PMU_PMEVTYPER_EL0(14),
PMU_PMEVTYPER_EL0(15),
PMU_PMEVTYPER_EL0(16),
PMU_PMEVTYPER_EL0(17),
PMU_PMEVTYPER_EL0(18),
PMU_PMEVTYPER_EL0(19),
PMU_PMEVTYPER_EL0(20),
PMU_PMEVTYPER_EL0(21),
PMU_PMEVTYPER_EL0(22),
PMU_PMEVTYPER_EL0(23),
PMU_PMEVTYPER_EL0(24),
PMU_PMEVTYPER_EL0(25),
PMU_PMEVTYPER_EL0(26),
PMU_PMEVTYPER_EL0(27),
PMU_PMEVTYPER_EL0(28),
PMU_PMEVTYPER_EL0(29),
PMU_PMEVTYPER_EL0(30),
/* PMCCFILTR_EL0
* This register resets as unknown in 64bit mode while it resets as zero
* in 32bit mode. Here we choose to reset it as zero for consistency.
*/
{ Op0(0b11), Op1(0b011), CRn(0b1110), CRm(0b1111), Op2(0b111),
access_pmu_evtyper, reset_val, PMCCFILTR_EL0, 0 },
/* DACR32_EL2 */
{ Op0(0b11), Op1(0b100), CRn(0b0011), CRm(0b0000), Op2(0b000),
NULL, reset_unknown, DACR32_EL2 },
@ -857,6 +1285,20 @@ static const struct sys_reg_desc cp14_64_regs[] = {
{ Op1( 0), CRm( 2), .access = trap_raz_wi },
};
/* Macro to expand the PMEVCNTRn register */
#define PMU_PMEVCNTR(n) \
/* PMEVCNTRn */ \
{ Op1(0), CRn(0b1110), \
CRm((0b1000 | (((n) >> 3) & 0x3))), Op2(((n) & 0x7)), \
access_pmu_evcntr }
/* Macro to expand the PMEVTYPERn register */
#define PMU_PMEVTYPER(n) \
/* PMEVTYPERn */ \
{ Op1(0), CRn(0b1110), \
CRm((0b1100 | (((n) >> 3) & 0x3))), Op2(((n) & 0x7)), \
access_pmu_evtyper }
/*
* Trapped cp15 registers. TTBR0/TTBR1 get a double encoding,
* depending on the way they are accessed (as a 32bit or a 64bit
@ -885,19 +1327,21 @@ static const struct sys_reg_desc cp15_regs[] = {
{ Op1( 0), CRn( 7), CRm(14), Op2( 2), access_dcsw },
/* PMU */
{ Op1( 0), CRn( 9), CRm(12), Op2( 0), trap_raz_wi },
{ Op1( 0), CRn( 9), CRm(12), Op2( 1), trap_raz_wi },
{ Op1( 0), CRn( 9), CRm(12), Op2( 2), trap_raz_wi },
{ Op1( 0), CRn( 9), CRm(12), Op2( 3), trap_raz_wi },
{ Op1( 0), CRn( 9), CRm(12), Op2( 5), trap_raz_wi },
{ Op1( 0), CRn( 9), CRm(12), Op2( 6), trap_raz_wi },
{ Op1( 0), CRn( 9), CRm(12), Op2( 7), trap_raz_wi },
{ Op1( 0), CRn( 9), CRm(13), Op2( 0), trap_raz_wi },
{ Op1( 0), CRn( 9), CRm(13), Op2( 1), trap_raz_wi },
{ Op1( 0), CRn( 9), CRm(13), Op2( 2), trap_raz_wi },
{ Op1( 0), CRn( 9), CRm(14), Op2( 0), trap_raz_wi },
{ Op1( 0), CRn( 9), CRm(14), Op2( 1), trap_raz_wi },
{ Op1( 0), CRn( 9), CRm(14), Op2( 2), trap_raz_wi },
{ Op1( 0), CRn( 9), CRm(12), Op2( 0), access_pmcr },
{ Op1( 0), CRn( 9), CRm(12), Op2( 1), access_pmcnten },
{ Op1( 0), CRn( 9), CRm(12), Op2( 2), access_pmcnten },
{ Op1( 0), CRn( 9), CRm(12), Op2( 3), access_pmovs },
{ Op1( 0), CRn( 9), CRm(12), Op2( 4), access_pmswinc },
{ Op1( 0), CRn( 9), CRm(12), Op2( 5), access_pmselr },
{ Op1( 0), CRn( 9), CRm(12), Op2( 6), access_pmceid },
{ Op1( 0), CRn( 9), CRm(12), Op2( 7), access_pmceid },
{ Op1( 0), CRn( 9), CRm(13), Op2( 0), access_pmu_evcntr },
{ Op1( 0), CRn( 9), CRm(13), Op2( 1), access_pmu_evtyper },
{ Op1( 0), CRn( 9), CRm(13), Op2( 2), access_pmu_evcntr },
{ Op1( 0), CRn( 9), CRm(14), Op2( 0), access_pmuserenr },
{ Op1( 0), CRn( 9), CRm(14), Op2( 1), access_pminten },
{ Op1( 0), CRn( 9), CRm(14), Op2( 2), access_pminten },
{ Op1( 0), CRn( 9), CRm(14), Op2( 3), access_pmovs },
{ Op1( 0), CRn(10), CRm( 2), Op2( 0), access_vm_reg, NULL, c10_PRRR },
{ Op1( 0), CRn(10), CRm( 2), Op2( 1), access_vm_reg, NULL, c10_NMRR },
@ -908,10 +1352,78 @@ static const struct sys_reg_desc cp15_regs[] = {
{ Op1( 0), CRn(12), CRm(12), Op2( 5), trap_raz_wi },
{ Op1( 0), CRn(13), CRm( 0), Op2( 1), access_vm_reg, NULL, c13_CID },
/* PMEVCNTRn */
PMU_PMEVCNTR(0),
PMU_PMEVCNTR(1),
PMU_PMEVCNTR(2),
PMU_PMEVCNTR(3),
PMU_PMEVCNTR(4),
PMU_PMEVCNTR(5),
PMU_PMEVCNTR(6),
PMU_PMEVCNTR(7),
PMU_PMEVCNTR(8),
PMU_PMEVCNTR(9),
PMU_PMEVCNTR(10),
PMU_PMEVCNTR(11),
PMU_PMEVCNTR(12),
PMU_PMEVCNTR(13),
PMU_PMEVCNTR(14),
PMU_PMEVCNTR(15),
PMU_PMEVCNTR(16),
PMU_PMEVCNTR(17),
PMU_PMEVCNTR(18),
PMU_PMEVCNTR(19),
PMU_PMEVCNTR(20),
PMU_PMEVCNTR(21),
PMU_PMEVCNTR(22),
PMU_PMEVCNTR(23),
PMU_PMEVCNTR(24),
PMU_PMEVCNTR(25),
PMU_PMEVCNTR(26),
PMU_PMEVCNTR(27),
PMU_PMEVCNTR(28),
PMU_PMEVCNTR(29),
PMU_PMEVCNTR(30),
/* PMEVTYPERn */
PMU_PMEVTYPER(0),
PMU_PMEVTYPER(1),
PMU_PMEVTYPER(2),
PMU_PMEVTYPER(3),
PMU_PMEVTYPER(4),
PMU_PMEVTYPER(5),
PMU_PMEVTYPER(6),
PMU_PMEVTYPER(7),
PMU_PMEVTYPER(8),
PMU_PMEVTYPER(9),
PMU_PMEVTYPER(10),
PMU_PMEVTYPER(11),
PMU_PMEVTYPER(12),
PMU_PMEVTYPER(13),
PMU_PMEVTYPER(14),
PMU_PMEVTYPER(15),
PMU_PMEVTYPER(16),
PMU_PMEVTYPER(17),
PMU_PMEVTYPER(18),
PMU_PMEVTYPER(19),
PMU_PMEVTYPER(20),
PMU_PMEVTYPER(21),
PMU_PMEVTYPER(22),
PMU_PMEVTYPER(23),
PMU_PMEVTYPER(24),
PMU_PMEVTYPER(25),
PMU_PMEVTYPER(26),
PMU_PMEVTYPER(27),
PMU_PMEVTYPER(28),
PMU_PMEVTYPER(29),
PMU_PMEVTYPER(30),
/* PMCCFILTR */
{ Op1(0), CRn(14), CRm(15), Op2(7), access_pmu_evtyper },
};
static const struct sys_reg_desc cp15_64_regs[] = {
{ Op1( 0), CRn( 0), CRm( 2), Op2( 0), access_vm_reg, NULL, c2_TTBR0 },
{ Op1( 0), CRn( 0), CRm( 9), Op2( 0), access_pmu_evcntr },
{ Op1( 0), CRn( 0), CRm(12), Op2( 0), access_gic_sgi },
{ Op1( 1), CRn( 0), CRm( 2), Op2( 0), access_vm_reg, NULL, c2_TTBR1 },
};
@ -942,29 +1454,32 @@ static const struct sys_reg_desc *get_target_table(unsigned target,
}
}
#define reg_to_match_value(x) \
({ \
unsigned long val; \
val = (x)->Op0 << 14; \
val |= (x)->Op1 << 11; \
val |= (x)->CRn << 7; \
val |= (x)->CRm << 3; \
val |= (x)->Op2; \
val; \
})
static int match_sys_reg(const void *key, const void *elt)
{
const unsigned long pval = (unsigned long)key;
const struct sys_reg_desc *r = elt;
return pval - reg_to_match_value(r);
}
static const struct sys_reg_desc *find_reg(const struct sys_reg_params *params,
const struct sys_reg_desc table[],
unsigned int num)
{
unsigned int i;
unsigned long pval = reg_to_match_value(params);
for (i = 0; i < num; i++) {
const struct sys_reg_desc *r = &table[i];
if (params->Op0 != r->Op0)
continue;
if (params->Op1 != r->Op1)
continue;
if (params->CRn != r->CRn)
continue;
if (params->CRm != r->CRm)
continue;
if (params->Op2 != r->Op2)
continue;
return r;
}
return NULL;
return bsearch((void *)pval, table, num, sizeof(table[0]), match_sys_reg);
}
int kvm_handle_cp14_load_store(struct kvm_vcpu *vcpu, struct kvm_run *run)

View File

@ -33,8 +33,6 @@ static inline void svcpu_put(struct kvmppc_book3s_shadow_vcpu *svcpu)
}
#endif
#define SPAPR_TCE_SHIFT 12
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
#define KVM_DEFAULT_HPT_ORDER 24 /* 16MB HPT by default */
#endif

View File

@ -182,7 +182,10 @@ struct kvmppc_spapr_tce_table {
struct list_head list;
struct kvm *kvm;
u64 liobn;
u32 window_size;
struct rcu_head rcu;
u32 page_shift;
u64 offset; /* in pages */
u64 size; /* window size in pages */
struct page *pages[0];
};

View File

@ -165,9 +165,25 @@ extern void kvmppc_map_vrma(struct kvm_vcpu *vcpu,
extern int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu);
extern long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
struct kvm_create_spapr_tce *args);
struct kvm_create_spapr_tce_64 *args);
extern struct kvmppc_spapr_tce_table *kvmppc_find_table(
struct kvm_vcpu *vcpu, unsigned long liobn);
extern long kvmppc_ioba_validate(struct kvmppc_spapr_tce_table *stt,
unsigned long ioba, unsigned long npages);
extern long kvmppc_tce_validate(struct kvmppc_spapr_tce_table *tt,
unsigned long tce);
extern long kvmppc_gpa_to_ua(struct kvm *kvm, unsigned long gpa,
unsigned long *ua, unsigned long **prmap);
extern void kvmppc_tce_put(struct kvmppc_spapr_tce_table *tt,
unsigned long idx, unsigned long tce);
extern long kvmppc_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
unsigned long ioba, unsigned long tce);
extern long kvmppc_h_put_tce_indirect(struct kvm_vcpu *vcpu,
unsigned long liobn, unsigned long ioba,
unsigned long tce_list, unsigned long npages);
extern long kvmppc_h_stuff_tce(struct kvm_vcpu *vcpu,
unsigned long liobn, unsigned long ioba,
unsigned long tce_value, unsigned long npages);
extern long kvmppc_h_get_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
unsigned long ioba);
extern struct page *kvm_alloc_hpt(unsigned long nr_pages);
@ -437,6 +453,8 @@ static inline int kvmppc_xics_enabled(struct kvm_vcpu *vcpu)
{
return vcpu->arch.irq_type == KVMPPC_IRQ_XICS;
}
extern void kvmppc_alloc_host_rm_ops(void);
extern void kvmppc_free_host_rm_ops(void);
extern void kvmppc_xics_free_icp(struct kvm_vcpu *vcpu);
extern int kvmppc_xics_create_icp(struct kvm_vcpu *vcpu, unsigned long server);
extern int kvm_vm_ioctl_xics_irq(struct kvm *kvm, struct kvm_irq_level *args);
@ -445,7 +463,11 @@ extern u64 kvmppc_xics_get_icp(struct kvm_vcpu *vcpu);
extern int kvmppc_xics_set_icp(struct kvm_vcpu *vcpu, u64 icpval);
extern int kvmppc_xics_connect_vcpu(struct kvm_device *dev,
struct kvm_vcpu *vcpu, u32 cpu);
extern void kvmppc_xics_ipi_action(void);
extern int h_ipi_redirect;
#else
static inline void kvmppc_alloc_host_rm_ops(void) {};
static inline void kvmppc_free_host_rm_ops(void) {};
static inline int kvmppc_xics_enabled(struct kvm_vcpu *vcpu)
{ return 0; }
static inline void kvmppc_xics_free_icp(struct kvm_vcpu *vcpu) { }
@ -459,6 +481,33 @@ static inline int kvmppc_xics_hcall(struct kvm_vcpu *vcpu, u32 cmd)
{ return 0; }
#endif
/*
* Host-side operations we want to set up while running in real
* mode in the guest operating on the xics.
* Currently only VCPU wakeup is supported.
*/
union kvmppc_rm_state {
unsigned long raw;
struct {
u32 in_host;
u32 rm_action;
};
};
struct kvmppc_host_rm_core {
union kvmppc_rm_state rm_state;
void *rm_data;
char pad[112];
};
struct kvmppc_host_rm_ops {
struct kvmppc_host_rm_core *rm_core;
void (*vcpu_kick)(struct kvm_vcpu *vcpu);
};
extern struct kvmppc_host_rm_ops *kvmppc_host_rm_ops_hv;
static inline unsigned long kvmppc_get_epr(struct kvm_vcpu *vcpu)
{
#ifdef CONFIG_KVM_BOOKE_HV

View File

@ -78,6 +78,9 @@ static inline pte_t *find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea,
}
return __find_linux_pte_or_hugepte(pgdir, ea, is_thp, shift);
}
unsigned long vmalloc_to_phys(void *vmalloc_addr);
#endif /* __ASSEMBLY__ */
#endif /* _ASM_POWERPC_PGTABLE_H */

View File

@ -114,6 +114,9 @@ extern int cpu_to_core_id(int cpu);
#define PPC_MSG_TICK_BROADCAST 2
#define PPC_MSG_DEBUGGER_BREAK 3
/* This is only used by the powernv kernel */
#define PPC_MSG_RM_HOST_ACTION 4
/* for irq controllers that have dedicated ipis per message (4) */
extern int smp_request_message_ipi(int virq, int message);
extern const char *smp_ipi_name[];
@ -121,6 +124,7 @@ extern const char *smp_ipi_name[];
/* for irq controllers with only a single ipi */
extern void smp_muxed_ipi_set_data(int cpu, unsigned long data);
extern void smp_muxed_ipi_message_pass(int cpu, int msg);
extern void smp_muxed_ipi_set_message(int cpu, int msg);
extern irqreturn_t smp_ipi_demux(void);
void smp_init_pSeries(void);

View File

@ -30,6 +30,7 @@
#ifdef CONFIG_PPC_ICP_NATIVE
extern int icp_native_init(void);
extern void icp_native_flush_interrupt(void);
extern void icp_native_cause_ipi_rm(int cpu);
#else
static inline int icp_native_init(void) { return -ENODEV; }
#endif

View File

@ -333,6 +333,15 @@ struct kvm_create_spapr_tce {
__u32 window_size;
};
/* for KVM_CAP_SPAPR_TCE_64 */
struct kvm_create_spapr_tce_64 {
__u64 liobn;
__u32 page_shift;
__u32 flags;
__u64 offset; /* in pages */
__u64 size; /* in pages */
};
/* for KVM_ALLOCATE_RMA */
struct kvm_allocate_rma {
__u64 rma_size;

View File

@ -206,7 +206,7 @@ int smp_request_message_ipi(int virq, int msg)
#ifdef CONFIG_PPC_SMP_MUXED_IPI
struct cpu_messages {
int messages; /* current messages */
long messages; /* current messages */
unsigned long data; /* data for cause ipi */
};
static DEFINE_PER_CPU_SHARED_ALIGNED(struct cpu_messages, ipi_message);
@ -218,7 +218,7 @@ void smp_muxed_ipi_set_data(int cpu, unsigned long data)
info->data = data;
}
void smp_muxed_ipi_message_pass(int cpu, int msg)
void smp_muxed_ipi_set_message(int cpu, int msg)
{
struct cpu_messages *info = &per_cpu(ipi_message, cpu);
char *message = (char *)&info->messages;
@ -228,6 +228,13 @@ void smp_muxed_ipi_message_pass(int cpu, int msg)
*/
smp_mb();
message[msg] = 1;
}
void smp_muxed_ipi_message_pass(int cpu, int msg)
{
struct cpu_messages *info = &per_cpu(ipi_message, cpu);
smp_muxed_ipi_set_message(cpu, msg);
/*
* cause_ipi functions are required to include a full barrier
* before doing whatever causes the IPI.
@ -236,20 +243,31 @@ void smp_muxed_ipi_message_pass(int cpu, int msg)
}
#ifdef __BIG_ENDIAN__
#define IPI_MESSAGE(A) (1 << (24 - 8 * (A)))
#define IPI_MESSAGE(A) (1uL << ((BITS_PER_LONG - 8) - 8 * (A)))
#else
#define IPI_MESSAGE(A) (1 << (8 * (A)))
#define IPI_MESSAGE(A) (1uL << (8 * (A)))
#endif
irqreturn_t smp_ipi_demux(void)
{
struct cpu_messages *info = this_cpu_ptr(&ipi_message);
unsigned int all;
unsigned long all;
mb(); /* order any irq clear */
do {
all = xchg(&info->messages, 0);
#if defined(CONFIG_KVM_XICS) && defined(CONFIG_KVM_BOOK3S_HV_POSSIBLE)
/*
* Must check for PPC_MSG_RM_HOST_ACTION messages
* before PPC_MSG_CALL_FUNCTION messages because when
* a VM is destroyed, we call kick_all_cpus_sync()
* to ensure that any pending PPC_MSG_RM_HOST_ACTION
* messages have completed before we free any VCPUs.
*/
if (all & IPI_MESSAGE(PPC_MSG_RM_HOST_ACTION))
kvmppc_xics_ipi_action();
#endif
if (all & IPI_MESSAGE(PPC_MSG_CALL_FUNCTION))
generic_smp_call_function_interrupt();
if (all & IPI_MESSAGE(PPC_MSG_RESCHEDULE))

View File

@ -8,7 +8,7 @@ ccflags-y := -Ivirt/kvm -Iarch/powerpc/kvm
KVM := ../../../virt/kvm
common-objs-y = $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o \
$(KVM)/eventfd.o
$(KVM)/eventfd.o $(KVM)/vfio.o
CFLAGS_e500_mmu.o := -I.
CFLAGS_e500_mmu_host.o := -I.

View File

@ -807,7 +807,7 @@ int kvmppc_core_init_vm(struct kvm *kvm)
{
#ifdef CONFIG_PPC64
INIT_LIST_HEAD(&kvm->arch.spapr_tce_tables);
INIT_LIST_HEAD_RCU(&kvm->arch.spapr_tce_tables);
INIT_LIST_HEAD(&kvm->arch.rtas_tokens);
#endif

View File

@ -14,6 +14,7 @@
*
* Copyright 2010 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com>
* Copyright 2011 David Gibson, IBM Corporation <dwg@au1.ibm.com>
* Copyright 2016 Alexey Kardashevskiy, IBM Corporation <aik@au1.ibm.com>
*/
#include <linux/types.h>
@ -36,28 +37,69 @@
#include <asm/ppc-opcode.h>
#include <asm/kvm_host.h>
#include <asm/udbg.h>
#include <asm/iommu.h>
#include <asm/tce.h>
#define TCES_PER_PAGE (PAGE_SIZE / sizeof(u64))
static long kvmppc_stt_npages(unsigned long window_size)
static unsigned long kvmppc_tce_pages(unsigned long iommu_pages)
{
return ALIGN((window_size >> SPAPR_TCE_SHIFT)
* sizeof(u64), PAGE_SIZE) / PAGE_SIZE;
return ALIGN(iommu_pages * sizeof(u64), PAGE_SIZE) / PAGE_SIZE;
}
static void release_spapr_tce_table(struct kvmppc_spapr_tce_table *stt)
static unsigned long kvmppc_stt_pages(unsigned long tce_pages)
{
struct kvm *kvm = stt->kvm;
int i;
unsigned long stt_bytes = sizeof(struct kvmppc_spapr_tce_table) +
(tce_pages * sizeof(struct page *));
mutex_lock(&kvm->lock);
list_del(&stt->list);
for (i = 0; i < kvmppc_stt_npages(stt->window_size); i++)
return tce_pages + ALIGN(stt_bytes, PAGE_SIZE) / PAGE_SIZE;
}
static long kvmppc_account_memlimit(unsigned long stt_pages, bool inc)
{
long ret = 0;
if (!current || !current->mm)
return ret; /* process exited */
down_write(&current->mm->mmap_sem);
if (inc) {
unsigned long locked, lock_limit;
locked = current->mm->locked_vm + stt_pages;
lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
if (locked > lock_limit && !capable(CAP_IPC_LOCK))
ret = -ENOMEM;
else
current->mm->locked_vm += stt_pages;
} else {
if (WARN_ON_ONCE(stt_pages > current->mm->locked_vm))
stt_pages = current->mm->locked_vm;
current->mm->locked_vm -= stt_pages;
}
pr_debug("[%d] RLIMIT_MEMLOCK KVM %c%ld %ld/%ld%s\n", current->pid,
inc ? '+' : '-',
stt_pages << PAGE_SHIFT,
current->mm->locked_vm << PAGE_SHIFT,
rlimit(RLIMIT_MEMLOCK),
ret ? " - exceeded" : "");
up_write(&current->mm->mmap_sem);
return ret;
}
static void release_spapr_tce_table(struct rcu_head *head)
{
struct kvmppc_spapr_tce_table *stt = container_of(head,
struct kvmppc_spapr_tce_table, rcu);
unsigned long i, npages = kvmppc_tce_pages(stt->size);
for (i = 0; i < npages; i++)
__free_page(stt->pages[i]);
kfree(stt);
mutex_unlock(&kvm->lock);
kvm_put_kvm(kvm);
kfree(stt);
}
static int kvm_spapr_tce_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
@ -65,7 +107,7 @@ static int kvm_spapr_tce_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
struct kvmppc_spapr_tce_table *stt = vma->vm_file->private_data;
struct page *page;
if (vmf->pgoff >= kvmppc_stt_npages(stt->window_size))
if (vmf->pgoff >= kvmppc_tce_pages(stt->size))
return VM_FAULT_SIGBUS;
page = stt->pages[vmf->pgoff];
@ -88,7 +130,14 @@ static int kvm_spapr_tce_release(struct inode *inode, struct file *filp)
{
struct kvmppc_spapr_tce_table *stt = filp->private_data;
release_spapr_tce_table(stt);
list_del_rcu(&stt->list);
kvm_put_kvm(stt->kvm);
kvmppc_account_memlimit(
kvmppc_stt_pages(kvmppc_tce_pages(stt->size)), false);
call_rcu(&stt->rcu, release_spapr_tce_table);
return 0;
}
@ -98,20 +147,29 @@ static const struct file_operations kvm_spapr_tce_fops = {
};
long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
struct kvm_create_spapr_tce *args)
struct kvm_create_spapr_tce_64 *args)
{
struct kvmppc_spapr_tce_table *stt = NULL;
long npages;
unsigned long npages, size;
int ret = -ENOMEM;
int i;
if (!args->size)
return -EINVAL;
/* Check this LIOBN hasn't been previously allocated */
list_for_each_entry(stt, &kvm->arch.spapr_tce_tables, list) {
if (stt->liobn == args->liobn)
return -EBUSY;
}
npages = kvmppc_stt_npages(args->window_size);
size = args->size;
npages = kvmppc_tce_pages(size);
ret = kvmppc_account_memlimit(kvmppc_stt_pages(npages), true);
if (ret) {
stt = NULL;
goto fail;
}
stt = kzalloc(sizeof(*stt) + npages * sizeof(struct page *),
GFP_KERNEL);
@ -119,7 +177,9 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
goto fail;
stt->liobn = args->liobn;
stt->window_size = args->window_size;
stt->page_shift = args->page_shift;
stt->offset = args->offset;
stt->size = size;
stt->kvm = kvm;
for (i = 0; i < npages; i++) {
@ -131,7 +191,7 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
kvm_get_kvm(kvm);
mutex_lock(&kvm->lock);
list_add(&stt->list, &kvm->arch.spapr_tce_tables);
list_add_rcu(&stt->list, &kvm->arch.spapr_tce_tables);
mutex_unlock(&kvm->lock);
@ -148,3 +208,59 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
}
return ret;
}
long kvmppc_h_put_tce_indirect(struct kvm_vcpu *vcpu,
unsigned long liobn, unsigned long ioba,
unsigned long tce_list, unsigned long npages)
{
struct kvmppc_spapr_tce_table *stt;
long i, ret = H_SUCCESS, idx;
unsigned long entry, ua = 0;
u64 __user *tces, tce;
stt = kvmppc_find_table(vcpu, liobn);
if (!stt)
return H_TOO_HARD;
entry = ioba >> stt->page_shift;
/*
* SPAPR spec says that the maximum size of the list is 512 TCEs
* so the whole table fits in 4K page
*/
if (npages > 512)
return H_PARAMETER;
if (tce_list & (SZ_4K - 1))
return H_PARAMETER;
ret = kvmppc_ioba_validate(stt, ioba, npages);
if (ret != H_SUCCESS)
return ret;
idx = srcu_read_lock(&vcpu->kvm->srcu);
if (kvmppc_gpa_to_ua(vcpu->kvm, tce_list, &ua, NULL)) {
ret = H_TOO_HARD;
goto unlock_exit;
}
tces = (u64 __user *) ua;
for (i = 0; i < npages; ++i) {
if (get_user(tce, tces + i)) {
ret = H_TOO_HARD;
goto unlock_exit;
}
tce = be64_to_cpu(tce);
ret = kvmppc_tce_validate(stt, tce);
if (ret != H_SUCCESS)
goto unlock_exit;
kvmppc_tce_put(stt, entry + i, tce);
}
unlock_exit:
srcu_read_unlock(&vcpu->kvm->srcu, idx);
return ret;
}
EXPORT_SYMBOL_GPL(kvmppc_h_put_tce_indirect);

View File

@ -14,6 +14,7 @@
*
* Copyright 2010 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com>
* Copyright 2011 David Gibson, IBM Corporation <dwg@au1.ibm.com>
* Copyright 2016 Alexey Kardashevskiy, IBM Corporation <aik@au1.ibm.com>
*/
#include <linux/types.h>
@ -30,76 +31,321 @@
#include <asm/kvm_ppc.h>
#include <asm/kvm_book3s.h>
#include <asm/mmu-hash64.h>
#include <asm/mmu_context.h>
#include <asm/hvcall.h>
#include <asm/synch.h>
#include <asm/ppc-opcode.h>
#include <asm/kvm_host.h>
#include <asm/udbg.h>
#include <asm/iommu.h>
#include <asm/tce.h>
#include <asm/iommu.h>
#define TCES_PER_PAGE (PAGE_SIZE / sizeof(u64))
/* WARNING: This will be called in real-mode on HV KVM and virtual
/*
* Finds a TCE table descriptor by LIOBN.
*
* WARNING: This will be called in real or virtual mode on HV KVM and virtual
* mode on PR KVM
*/
long kvmppc_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
unsigned long ioba, unsigned long tce)
struct kvmppc_spapr_tce_table *kvmppc_find_table(struct kvm_vcpu *vcpu,
unsigned long liobn)
{
struct kvm *kvm = vcpu->kvm;
struct kvmppc_spapr_tce_table *stt;
list_for_each_entry_lockless(stt, &kvm->arch.spapr_tce_tables, list)
if (stt->liobn == liobn)
return stt;
return NULL;
}
EXPORT_SYMBOL_GPL(kvmppc_find_table);
/*
* Validates IO address.
*
* WARNING: This will be called in real-mode on HV KVM and virtual
* mode on PR KVM
*/
long kvmppc_ioba_validate(struct kvmppc_spapr_tce_table *stt,
unsigned long ioba, unsigned long npages)
{
unsigned long mask = (1ULL << stt->page_shift) - 1;
unsigned long idx = ioba >> stt->page_shift;
if ((ioba & mask) || (idx < stt->offset) ||
(idx - stt->offset + npages > stt->size) ||
(idx + npages < idx))
return H_PARAMETER;
return H_SUCCESS;
}
EXPORT_SYMBOL_GPL(kvmppc_ioba_validate);
/*
* Validates TCE address.
* At the moment flags and page mask are validated.
* As the host kernel does not access those addresses (just puts them
* to the table and user space is supposed to process them), we can skip
* checking other things (such as TCE is a guest RAM address or the page
* was actually allocated).
*
* WARNING: This will be called in real-mode on HV KVM and virtual
* mode on PR KVM
*/
long kvmppc_tce_validate(struct kvmppc_spapr_tce_table *stt, unsigned long tce)
{
unsigned long page_mask = ~((1ULL << stt->page_shift) - 1);
unsigned long mask = ~(page_mask | TCE_PCI_WRITE | TCE_PCI_READ);
if (tce & mask)
return H_PARAMETER;
return H_SUCCESS;
}
EXPORT_SYMBOL_GPL(kvmppc_tce_validate);
/* Note on the use of page_address() in real mode,
*
* It is safe to use page_address() in real mode on ppc64 because
* page_address() is always defined as lowmem_page_address()
* which returns __va(PFN_PHYS(page_to_pfn(page))) which is arithmetic
* operation and does not access page struct.
*
* Theoretically page_address() could be defined different
* but either WANT_PAGE_VIRTUAL or HASHED_PAGE_VIRTUAL
* would have to be enabled.
* WANT_PAGE_VIRTUAL is never enabled on ppc32/ppc64,
* HASHED_PAGE_VIRTUAL could be enabled for ppc32 only and only
* if CONFIG_HIGHMEM is defined. As CONFIG_SPARSEMEM_VMEMMAP
* is not expected to be enabled on ppc32, page_address()
* is safe for ppc32 as well.
*
* WARNING: This will be called in real-mode on HV KVM and virtual
* mode on PR KVM
*/
static u64 *kvmppc_page_address(struct page *page)
{
#if defined(HASHED_PAGE_VIRTUAL) || defined(WANT_PAGE_VIRTUAL)
#error TODO: fix to avoid page_address() here
#endif
return (u64 *) page_address(page);
}
/*
* Handles TCE requests for emulated devices.
* Puts guest TCE values to the table and expects user space to convert them.
* Called in both real and virtual modes.
* Cannot fail so kvmppc_tce_validate must be called before it.
*
* WARNING: This will be called in real-mode on HV KVM and virtual
* mode on PR KVM
*/
void kvmppc_tce_put(struct kvmppc_spapr_tce_table *stt,
unsigned long idx, unsigned long tce)
{
struct page *page;
u64 *tbl;
idx -= stt->offset;
page = stt->pages[idx / TCES_PER_PAGE];
tbl = kvmppc_page_address(page);
tbl[idx % TCES_PER_PAGE] = tce;
}
EXPORT_SYMBOL_GPL(kvmppc_tce_put);
long kvmppc_gpa_to_ua(struct kvm *kvm, unsigned long gpa,
unsigned long *ua, unsigned long **prmap)
{
unsigned long gfn = gpa >> PAGE_SHIFT;
struct kvm_memory_slot *memslot;
memslot = search_memslots(kvm_memslots(kvm), gfn);
if (!memslot)
return -EINVAL;
*ua = __gfn_to_hva_memslot(memslot, gfn) |
(gpa & ~(PAGE_MASK | TCE_PCI_READ | TCE_PCI_WRITE));
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
if (prmap)
*prmap = &memslot->arch.rmap[gfn - memslot->base_gfn];
#endif
return 0;
}
EXPORT_SYMBOL_GPL(kvmppc_gpa_to_ua);
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
long kvmppc_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
unsigned long ioba, unsigned long tce)
{
struct kvmppc_spapr_tce_table *stt = kvmppc_find_table(vcpu, liobn);
long ret;
/* udbg_printf("H_PUT_TCE(): liobn=0x%lx ioba=0x%lx, tce=0x%lx\n", */
/* liobn, ioba, tce); */
list_for_each_entry(stt, &kvm->arch.spapr_tce_tables, list) {
if (stt->liobn == liobn) {
unsigned long idx = ioba >> SPAPR_TCE_SHIFT;
struct page *page;
u64 *tbl;
if (!stt)
return H_TOO_HARD;
/* udbg_printf("H_PUT_TCE: liobn 0x%lx => stt=%p window_size=0x%x\n", */
/* liobn, stt, stt->window_size); */
if (ioba >= stt->window_size)
return H_PARAMETER;
ret = kvmppc_ioba_validate(stt, ioba, 1);
if (ret != H_SUCCESS)
return ret;
page = stt->pages[idx / TCES_PER_PAGE];
tbl = (u64 *)page_address(page);
ret = kvmppc_tce_validate(stt, tce);
if (ret != H_SUCCESS)
return ret;
/* FIXME: Need to validate the TCE itself */
/* udbg_printf("tce @ %p\n", &tbl[idx % TCES_PER_PAGE]); */
tbl[idx % TCES_PER_PAGE] = tce;
return H_SUCCESS;
}
}
kvmppc_tce_put(stt, ioba >> stt->page_shift, tce);
/* Didn't find the liobn, punt it to userspace */
return H_TOO_HARD;
return H_SUCCESS;
}
EXPORT_SYMBOL_GPL(kvmppc_h_put_tce);
static long kvmppc_rm_ua_to_hpa(struct kvm_vcpu *vcpu,
unsigned long ua, unsigned long *phpa)
{
pte_t *ptep, pte;
unsigned shift = 0;
ptep = __find_linux_pte_or_hugepte(vcpu->arch.pgdir, ua, NULL, &shift);
if (!ptep || !pte_present(*ptep))
return -ENXIO;
pte = *ptep;
if (!shift)
shift = PAGE_SHIFT;
/* Avoid handling anything potentially complicated in realmode */
if (shift > PAGE_SHIFT)
return -EAGAIN;
if (!pte_young(pte))
return -EAGAIN;
*phpa = (pte_pfn(pte) << PAGE_SHIFT) | (ua & ((1ULL << shift) - 1)) |
(ua & ~PAGE_MASK);
return 0;
}
long kvmppc_rm_h_put_tce_indirect(struct kvm_vcpu *vcpu,
unsigned long liobn, unsigned long ioba,
unsigned long tce_list, unsigned long npages)
{
struct kvmppc_spapr_tce_table *stt;
long i, ret = H_SUCCESS;
unsigned long tces, entry, ua = 0;
unsigned long *rmap = NULL;
stt = kvmppc_find_table(vcpu, liobn);
if (!stt)
return H_TOO_HARD;
entry = ioba >> stt->page_shift;
/*
* The spec says that the maximum size of the list is 512 TCEs
* so the whole table addressed resides in 4K page
*/
if (npages > 512)
return H_PARAMETER;
if (tce_list & (SZ_4K - 1))
return H_PARAMETER;
ret = kvmppc_ioba_validate(stt, ioba, npages);
if (ret != H_SUCCESS)
return ret;
if (kvmppc_gpa_to_ua(vcpu->kvm, tce_list, &ua, &rmap))
return H_TOO_HARD;
rmap = (void *) vmalloc_to_phys(rmap);
/*
* Synchronize with the MMU notifier callbacks in
* book3s_64_mmu_hv.c (kvm_unmap_hva_hv etc.).
* While we have the rmap lock, code running on other CPUs
* cannot finish unmapping the host real page that backs
* this guest real page, so we are OK to access the host
* real page.
*/
lock_rmap(rmap);
if (kvmppc_rm_ua_to_hpa(vcpu, ua, &tces)) {
ret = H_TOO_HARD;
goto unlock_exit;
}
for (i = 0; i < npages; ++i) {
unsigned long tce = be64_to_cpu(((u64 *)tces)[i]);
ret = kvmppc_tce_validate(stt, tce);
if (ret != H_SUCCESS)
goto unlock_exit;
kvmppc_tce_put(stt, entry + i, tce);
}
unlock_exit:
unlock_rmap(rmap);
return ret;
}
long kvmppc_h_stuff_tce(struct kvm_vcpu *vcpu,
unsigned long liobn, unsigned long ioba,
unsigned long tce_value, unsigned long npages)
{
struct kvmppc_spapr_tce_table *stt;
long i, ret;
stt = kvmppc_find_table(vcpu, liobn);
if (!stt)
return H_TOO_HARD;
ret = kvmppc_ioba_validate(stt, ioba, npages);
if (ret != H_SUCCESS)
return ret;
/* Check permission bits only to allow userspace poison TCE for debug */
if (tce_value & (TCE_PCI_WRITE | TCE_PCI_READ))
return H_PARAMETER;
for (i = 0; i < npages; ++i, ioba += (1ULL << stt->page_shift))
kvmppc_tce_put(stt, ioba >> stt->page_shift, tce_value);
return H_SUCCESS;
}
EXPORT_SYMBOL_GPL(kvmppc_h_stuff_tce);
long kvmppc_h_get_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
unsigned long ioba)
{
struct kvm *kvm = vcpu->kvm;
struct kvmppc_spapr_tce_table *stt;
struct kvmppc_spapr_tce_table *stt = kvmppc_find_table(vcpu, liobn);
long ret;
unsigned long idx;
struct page *page;
u64 *tbl;
list_for_each_entry(stt, &kvm->arch.spapr_tce_tables, list) {
if (stt->liobn == liobn) {
unsigned long idx = ioba >> SPAPR_TCE_SHIFT;
struct page *page;
u64 *tbl;
if (!stt)
return H_TOO_HARD;
if (ioba >= stt->window_size)
return H_PARAMETER;
ret = kvmppc_ioba_validate(stt, ioba, 1);
if (ret != H_SUCCESS)
return ret;
page = stt->pages[idx / TCES_PER_PAGE];
tbl = (u64 *)page_address(page);
idx = (ioba >> stt->page_shift) - stt->offset;
page = stt->pages[idx / TCES_PER_PAGE];
tbl = (u64 *)page_address(page);
vcpu->arch.gpr[4] = tbl[idx % TCES_PER_PAGE];
return H_SUCCESS;
}
}
vcpu->arch.gpr[4] = tbl[idx % TCES_PER_PAGE];
/* Didn't find the liobn, punt it to userspace */
return H_TOO_HARD;
return H_SUCCESS;
}
EXPORT_SYMBOL_GPL(kvmppc_h_get_tce);
#endif /* KVM_BOOK3S_HV_POSSIBLE */

View File

@ -81,6 +81,17 @@ static int target_smt_mode;
module_param(target_smt_mode, int, S_IRUGO | S_IWUSR);
MODULE_PARM_DESC(target_smt_mode, "Target threads per core (0 = max)");
#ifdef CONFIG_KVM_XICS
static struct kernel_param_ops module_param_ops = {
.set = param_set_int,
.get = param_get_int,
};
module_param_cb(h_ipi_redirect, &module_param_ops, &h_ipi_redirect,
S_IRUGO | S_IWUSR);
MODULE_PARM_DESC(h_ipi_redirect, "Redirect H_IPI wakeup to a free host core");
#endif
static void kvmppc_end_cede(struct kvm_vcpu *vcpu);
static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu);
@ -768,7 +779,31 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
if (kvmppc_xics_enabled(vcpu)) {
ret = kvmppc_xics_hcall(vcpu, req);
break;
} /* fallthrough */
}
return RESUME_HOST;
case H_PUT_TCE:
ret = kvmppc_h_put_tce(vcpu, kvmppc_get_gpr(vcpu, 4),
kvmppc_get_gpr(vcpu, 5),
kvmppc_get_gpr(vcpu, 6));
if (ret == H_TOO_HARD)
return RESUME_HOST;
break;
case H_PUT_TCE_INDIRECT:
ret = kvmppc_h_put_tce_indirect(vcpu, kvmppc_get_gpr(vcpu, 4),
kvmppc_get_gpr(vcpu, 5),
kvmppc_get_gpr(vcpu, 6),
kvmppc_get_gpr(vcpu, 7));
if (ret == H_TOO_HARD)
return RESUME_HOST;
break;
case H_STUFF_TCE:
ret = kvmppc_h_stuff_tce(vcpu, kvmppc_get_gpr(vcpu, 4),
kvmppc_get_gpr(vcpu, 5),
kvmppc_get_gpr(vcpu, 6),
kvmppc_get_gpr(vcpu, 7));
if (ret == H_TOO_HARD)
return RESUME_HOST;
break;
default:
return RESUME_HOST;
}
@ -2278,6 +2313,46 @@ static void post_guest_process(struct kvmppc_vcore *vc, bool is_master)
spin_unlock(&vc->lock);
}
/*
* Clear core from the list of active host cores as we are about to
* enter the guest. Only do this if it is the primary thread of the
* core (not if a subcore) that is entering the guest.
*/
static inline void kvmppc_clear_host_core(int cpu)
{
int core;
if (!kvmppc_host_rm_ops_hv || cpu_thread_in_core(cpu))
return;
/*
* Memory barrier can be omitted here as we will do a smp_wmb()
* later in kvmppc_start_thread and we need ensure that state is
* visible to other CPUs only after we enter guest.
*/
core = cpu >> threads_shift;
kvmppc_host_rm_ops_hv->rm_core[core].rm_state.in_host = 0;
}
/*
* Advertise this core as an active host core since we exited the guest
* Only need to do this if it is the primary thread of the core that is
* exiting.
*/
static inline void kvmppc_set_host_core(int cpu)
{
int core;
if (!kvmppc_host_rm_ops_hv || cpu_thread_in_core(cpu))
return;
/*
* Memory barrier can be omitted here because we do a spin_unlock
* immediately after this which provides the memory barrier.
*/
core = cpu >> threads_shift;
kvmppc_host_rm_ops_hv->rm_core[core].rm_state.in_host = 1;
}
/*
* Run a set of guest threads on a physical core.
* Called with vc->lock held.
@ -2390,6 +2465,8 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
}
}
kvmppc_clear_host_core(pcpu);
/* Start all the threads */
active = 0;
for (sub = 0; sub < core_info.n_subcores; ++sub) {
@ -2486,6 +2563,8 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
kvmppc_ipi_thread(pcpu + i);
}
kvmppc_set_host_core(pcpu);
spin_unlock(&vc->lock);
/* make sure updates to secondary vcpu structs are visible now */
@ -2983,6 +3062,114 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu)
goto out_srcu;
}
#ifdef CONFIG_KVM_XICS
static int kvmppc_cpu_notify(struct notifier_block *self, unsigned long action,
void *hcpu)
{
unsigned long cpu = (long)hcpu;
switch (action) {
case CPU_UP_PREPARE:
case CPU_UP_PREPARE_FROZEN:
kvmppc_set_host_core(cpu);
break;
#ifdef CONFIG_HOTPLUG_CPU
case CPU_DEAD:
case CPU_DEAD_FROZEN:
case CPU_UP_CANCELED:
case CPU_UP_CANCELED_FROZEN:
kvmppc_clear_host_core(cpu);
break;
#endif
default:
break;
}
return NOTIFY_OK;
}
static struct notifier_block kvmppc_cpu_notifier = {
.notifier_call = kvmppc_cpu_notify,
};
/*
* Allocate a per-core structure for managing state about which cores are
* running in the host versus the guest and for exchanging data between
* real mode KVM and CPU running in the host.
* This is only done for the first VM.
* The allocated structure stays even if all VMs have stopped.
* It is only freed when the kvm-hv module is unloaded.
* It's OK for this routine to fail, we just don't support host
* core operations like redirecting H_IPI wakeups.
*/
void kvmppc_alloc_host_rm_ops(void)
{
struct kvmppc_host_rm_ops *ops;
unsigned long l_ops;
int cpu, core;
int size;
/* Not the first time here ? */
if (kvmppc_host_rm_ops_hv != NULL)
return;
ops = kzalloc(sizeof(struct kvmppc_host_rm_ops), GFP_KERNEL);
if (!ops)
return;
size = cpu_nr_cores() * sizeof(struct kvmppc_host_rm_core);
ops->rm_core = kzalloc(size, GFP_KERNEL);
if (!ops->rm_core) {
kfree(ops);
return;
}
get_online_cpus();
for (cpu = 0; cpu < nr_cpu_ids; cpu += threads_per_core) {
if (!cpu_online(cpu))
continue;
core = cpu >> threads_shift;
ops->rm_core[core].rm_state.in_host = 1;
}
ops->vcpu_kick = kvmppc_fast_vcpu_kick_hv;
/*
* Make the contents of the kvmppc_host_rm_ops structure visible
* to other CPUs before we assign it to the global variable.
* Do an atomic assignment (no locks used here), but if someone
* beats us to it, just free our copy and return.
*/
smp_wmb();
l_ops = (unsigned long) ops;
if (cmpxchg64((unsigned long *)&kvmppc_host_rm_ops_hv, 0, l_ops)) {
put_online_cpus();
kfree(ops->rm_core);
kfree(ops);
return;
}
register_cpu_notifier(&kvmppc_cpu_notifier);
put_online_cpus();
}
void kvmppc_free_host_rm_ops(void)
{
if (kvmppc_host_rm_ops_hv) {
unregister_cpu_notifier(&kvmppc_cpu_notifier);
kfree(kvmppc_host_rm_ops_hv->rm_core);
kfree(kvmppc_host_rm_ops_hv);
kvmppc_host_rm_ops_hv = NULL;
}
}
#endif
static int kvmppc_core_init_vm_hv(struct kvm *kvm)
{
unsigned long lpcr, lpid;
@ -2995,6 +3182,8 @@ static int kvmppc_core_init_vm_hv(struct kvm *kvm)
return -ENOMEM;
kvm->arch.lpid = lpid;
kvmppc_alloc_host_rm_ops();
/*
* Since we don't flush the TLB when tearing down a VM,
* and this lpid might have previously been used,
@ -3228,6 +3417,7 @@ static int kvmppc_book3s_init_hv(void)
static void kvmppc_book3s_exit_hv(void)
{
kvmppc_free_host_rm_ops();
kvmppc_hv_ops = NULL;
}

View File

@ -283,3 +283,6 @@ void kvmhv_commence_exit(int trap)
kvmhv_interrupt_vcore(vc, ee);
}
}
struct kvmppc_host_rm_ops *kvmppc_host_rm_ops_hv;
EXPORT_SYMBOL_GPL(kvmppc_host_rm_ops_hv);

View File

@ -17,12 +17,16 @@
#include <asm/xics.h>
#include <asm/debug.h>
#include <asm/synch.h>
#include <asm/cputhreads.h>
#include <asm/ppc-opcode.h>
#include "book3s_xics.h"
#define DEBUG_PASSUP
int h_ipi_redirect = 1;
EXPORT_SYMBOL(h_ipi_redirect);
static void icp_rm_deliver_irq(struct kvmppc_xics *xics, struct kvmppc_icp *icp,
u32 new_irq);
@ -50,11 +54,84 @@ static void ics_rm_check_resend(struct kvmppc_xics *xics,
/* -- ICP routines -- */
#ifdef CONFIG_SMP
static inline void icp_send_hcore_msg(int hcore, struct kvm_vcpu *vcpu)
{
int hcpu;
hcpu = hcore << threads_shift;
kvmppc_host_rm_ops_hv->rm_core[hcore].rm_data = vcpu;
smp_muxed_ipi_set_message(hcpu, PPC_MSG_RM_HOST_ACTION);
icp_native_cause_ipi_rm(hcpu);
}
#else
static inline void icp_send_hcore_msg(int hcore, struct kvm_vcpu *vcpu) { }
#endif
/*
* We start the search from our current CPU Id in the core map
* and go in a circle until we get back to our ID looking for a
* core that is running in host context and that hasn't already
* been targeted for another rm_host_ops.
*
* In the future, could consider using a fairer algorithm (one
* that distributes the IPIs better)
*
* Returns -1, if no CPU could be found in the host
* Else, returns a CPU Id which has been reserved for use
*/
static inline int grab_next_hostcore(int start,
struct kvmppc_host_rm_core *rm_core, int max, int action)
{
bool success;
int core;
union kvmppc_rm_state old, new;
for (core = start + 1; core < max; core++) {
old = new = READ_ONCE(rm_core[core].rm_state);
if (!old.in_host || old.rm_action)
continue;
/* Try to grab this host core if not taken already. */
new.rm_action = action;
success = cmpxchg64(&rm_core[core].rm_state.raw,
old.raw, new.raw) == old.raw;
if (success) {
/*
* Make sure that the store to the rm_action is made
* visible before we return to caller (and the
* subsequent store to rm_data) to synchronize with
* the IPI handler.
*/
smp_wmb();
return core;
}
}
return -1;
}
static inline int find_available_hostcore(int action)
{
int core;
int my_core = smp_processor_id() >> threads_shift;
struct kvmppc_host_rm_core *rm_core = kvmppc_host_rm_ops_hv->rm_core;
core = grab_next_hostcore(my_core, rm_core, cpu_nr_cores(), action);
if (core == -1)
core = grab_next_hostcore(core, rm_core, my_core, action);
return core;
}
static void icp_rm_set_vcpu_irq(struct kvm_vcpu *vcpu,
struct kvm_vcpu *this_vcpu)
{
struct kvmppc_icp *this_icp = this_vcpu->arch.icp;
int cpu;
int hcore;
/* Mark the target VCPU as having an interrupt pending */
vcpu->stat.queue_intr++;
@ -66,11 +143,22 @@ static void icp_rm_set_vcpu_irq(struct kvm_vcpu *vcpu,
return;
}
/* Check if the core is loaded, if not, too hard */
/*
* Check if the core is loaded,
* if not, find an available host core to post to wake the VCPU,
* if we can't find one, set up state to eventually return too hard.
*/
cpu = vcpu->arch.thread_cpu;
if (cpu < 0 || cpu >= nr_cpu_ids) {
this_icp->rm_action |= XICS_RM_KICK_VCPU;
this_icp->rm_kick_target = vcpu;
hcore = -1;
if (kvmppc_host_rm_ops_hv && h_ipi_redirect)
hcore = find_available_hostcore(XICS_RM_KICK_VCPU);
if (hcore != -1) {
icp_send_hcore_msg(hcore, vcpu);
} else {
this_icp->rm_action |= XICS_RM_KICK_VCPU;
this_icp->rm_kick_target = vcpu;
}
return;
}
@ -623,3 +711,40 @@ int kvmppc_rm_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr)
bail:
return check_too_hard(xics, icp);
}
/* --- Non-real mode XICS-related built-in routines --- */
/**
* Host Operations poked by RM KVM
*/
static void rm_host_ipi_action(int action, void *data)
{
switch (action) {
case XICS_RM_KICK_VCPU:
kvmppc_host_rm_ops_hv->vcpu_kick(data);
break;
default:
WARN(1, "Unexpected rm_action=%d data=%p\n", action, data);
break;
}
}
void kvmppc_xics_ipi_action(void)
{
int core;
unsigned int cpu = smp_processor_id();
struct kvmppc_host_rm_core *rm_corep;
core = cpu >> threads_shift;
rm_corep = &kvmppc_host_rm_ops_hv->rm_core[core];
if (rm_corep->rm_data) {
rm_host_ipi_action(rm_corep->rm_state.rm_action,
rm_corep->rm_data);
/* Order these stores against the real mode KVM */
rm_corep->rm_data = NULL;
smp_wmb();
rm_corep->rm_state.rm_action = 0;
}
}

View File

@ -2020,8 +2020,8 @@ hcall_real_table:
.long 0 /* 0x12c */
.long 0 /* 0x130 */
.long DOTSYM(kvmppc_h_set_xdabr) - hcall_real_table
.long 0 /* 0x138 */
.long 0 /* 0x13c */
.long DOTSYM(kvmppc_h_stuff_tce) - hcall_real_table
.long DOTSYM(kvmppc_rm_h_put_tce_indirect) - hcall_real_table
.long 0 /* 0x140 */
.long 0 /* 0x144 */
.long 0 /* 0x148 */

View File

@ -280,6 +280,37 @@ static int kvmppc_h_pr_logical_ci_store(struct kvm_vcpu *vcpu)
return EMULATE_DONE;
}
static int kvmppc_h_pr_put_tce_indirect(struct kvm_vcpu *vcpu)
{
unsigned long liobn = kvmppc_get_gpr(vcpu, 4);
unsigned long ioba = kvmppc_get_gpr(vcpu, 5);
unsigned long tce = kvmppc_get_gpr(vcpu, 6);
unsigned long npages = kvmppc_get_gpr(vcpu, 7);
long rc;
rc = kvmppc_h_put_tce_indirect(vcpu, liobn, ioba,
tce, npages);
if (rc == H_TOO_HARD)
return EMULATE_FAIL;
kvmppc_set_gpr(vcpu, 3, rc);
return EMULATE_DONE;
}
static int kvmppc_h_pr_stuff_tce(struct kvm_vcpu *vcpu)
{
unsigned long liobn = kvmppc_get_gpr(vcpu, 4);
unsigned long ioba = kvmppc_get_gpr(vcpu, 5);
unsigned long tce_value = kvmppc_get_gpr(vcpu, 6);
unsigned long npages = kvmppc_get_gpr(vcpu, 7);
long rc;
rc = kvmppc_h_stuff_tce(vcpu, liobn, ioba, tce_value, npages);
if (rc == H_TOO_HARD)
return EMULATE_FAIL;
kvmppc_set_gpr(vcpu, 3, rc);
return EMULATE_DONE;
}
static int kvmppc_h_pr_xics_hcall(struct kvm_vcpu *vcpu, u32 cmd)
{
long rc = kvmppc_xics_hcall(vcpu, cmd);
@ -306,6 +337,10 @@ int kvmppc_h_pr(struct kvm_vcpu *vcpu, unsigned long cmd)
return kvmppc_h_pr_bulk_remove(vcpu);
case H_PUT_TCE:
return kvmppc_h_pr_put_tce(vcpu);
case H_PUT_TCE_INDIRECT:
return kvmppc_h_pr_put_tce_indirect(vcpu);
case H_STUFF_TCE:
return kvmppc_h_pr_stuff_tce(vcpu);
case H_CEDE:
kvmppc_set_msr_fast(vcpu, kvmppc_get_msr(vcpu) | MSR_EE);
kvm_vcpu_block(vcpu);

View File

@ -33,6 +33,7 @@
#include <asm/tlbflush.h>
#include <asm/cputhreads.h>
#include <asm/irqflags.h>
#include <asm/iommu.h>
#include "timing.h"
#include "irq.h"
#include "../mm/mmu_decl.h"
@ -437,6 +438,16 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
unsigned int i;
struct kvm_vcpu *vcpu;
#ifdef CONFIG_KVM_XICS
/*
* We call kick_all_cpus_sync() to ensure that all
* CPUs have executed any pending IPIs before we
* continue and free VCPUs structures below.
*/
if (is_kvmppc_hv_enabled(kvm))
kick_all_cpus_sync();
#endif
kvm_for_each_vcpu(i, vcpu, kvm)
kvm_arch_vcpu_free(vcpu);
@ -509,6 +520,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
#ifdef CONFIG_PPC_BOOK3S_64
case KVM_CAP_SPAPR_TCE:
case KVM_CAP_SPAPR_TCE_64:
case KVM_CAP_PPC_ALLOC_HTAB:
case KVM_CAP_PPC_RTAS:
case KVM_CAP_PPC_FIXUP_HCALL:
@ -569,6 +581,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
case KVM_CAP_PPC_GET_SMMU_INFO:
r = 1;
break;
case KVM_CAP_SPAPR_MULTITCE:
r = 1;
break;
#endif
default:
r = 0;
@ -1331,13 +1346,34 @@ long kvm_arch_vm_ioctl(struct file *filp,
break;
}
#ifdef CONFIG_PPC_BOOK3S_64
case KVM_CREATE_SPAPR_TCE_64: {
struct kvm_create_spapr_tce_64 create_tce_64;
r = -EFAULT;
if (copy_from_user(&create_tce_64, argp, sizeof(create_tce_64)))
goto out;
if (create_tce_64.flags) {
r = -EINVAL;
goto out;
}
r = kvm_vm_ioctl_create_spapr_tce(kvm, &create_tce_64);
goto out;
}
case KVM_CREATE_SPAPR_TCE: {
struct kvm_create_spapr_tce create_tce;
struct kvm_create_spapr_tce_64 create_tce_64;
r = -EFAULT;
if (copy_from_user(&create_tce, argp, sizeof(create_tce)))
goto out;
r = kvm_vm_ioctl_create_spapr_tce(kvm, &create_tce);
create_tce_64.liobn = create_tce.liobn;
create_tce_64.page_shift = IOMMU_PAGE_SHIFT_4K;
create_tce_64.offset = 0;
create_tce_64.size = create_tce.window_size >>
IOMMU_PAGE_SHIFT_4K;
create_tce_64.flags = 0;
r = kvm_vm_ioctl_create_spapr_tce(kvm, &create_tce_64);
goto out;
}
case KVM_PPC_GET_SMMU_INFO: {

View File

@ -243,3 +243,11 @@ void assert_pte_locked(struct mm_struct *mm, unsigned long addr)
}
#endif /* CONFIG_DEBUG_VM */
unsigned long vmalloc_to_phys(void *va)
{
unsigned long pfn = vmalloc_to_pfn(va);
BUG_ON(!pfn);
return __pa(pfn_to_kaddr(pfn)) + offset_in_page(va);
}
EXPORT_SYMBOL_GPL(vmalloc_to_phys);

View File

@ -493,14 +493,6 @@ static size_t event_to_attr_ct(struct hv_24x7_event_data *event)
}
}
static unsigned long vmalloc_to_phys(void *v)
{
struct page *p = vmalloc_to_page(v);
BUG_ON(!p);
return page_to_phys(p) + offset_in_page(v);
}
/* */
struct event_uniq {
struct rb_node node;

View File

@ -159,6 +159,27 @@ static void icp_native_cause_ipi(int cpu, unsigned long data)
icp_native_set_qirr(cpu, IPI_PRIORITY);
}
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
void icp_native_cause_ipi_rm(int cpu)
{
/*
* Currently not used to send IPIs to another CPU
* on the same core. Only caller is KVM real mode.
* Need the physical address of the XICS to be
* previously saved in kvm_hstate in the paca.
*/
unsigned long xics_phys;
/*
* Just like the cause_ipi functions, it is required to
* include a full barrier (out8 includes a sync) before
* causing the IPI.
*/
xics_phys = paca[cpu].kvm_hstate.xics_phys;
out_rm8((u8 *)(xics_phys + XICS_MFRR), IPI_PRIORITY);
}
#endif
/*
* Called when an interrupt is received on an off-line CPU to
* clear the interrupt, so that the CPU can go back to nap mode.

View File

@ -20,6 +20,7 @@
#include <linux/kvm_types.h>
#include <linux/kvm_host.h>
#include <linux/kvm.h>
#include <linux/seqlock.h>
#include <asm/debug.h>
#include <asm/cpu.h>
#include <asm/fpu/api.h>
@ -229,17 +230,11 @@ struct kvm_s390_itdb {
__u8 data[256];
} __packed;
struct kvm_s390_vregs {
__vector128 vrs[32];
__u8 reserved200[512]; /* for future vector expansion */
} __packed;
struct sie_page {
struct kvm_s390_sie_block sie_block;
__u8 reserved200[1024]; /* 0x0200 */
struct kvm_s390_itdb itdb; /* 0x0600 */
__u8 reserved700[1280]; /* 0x0700 */
struct kvm_s390_vregs vregs; /* 0x0c00 */
__u8 reserved700[2304]; /* 0x0700 */
} __packed;
struct kvm_vcpu_stat {
@ -558,6 +553,15 @@ struct kvm_vcpu_arch {
unsigned long pfault_token;
unsigned long pfault_select;
unsigned long pfault_compare;
bool cputm_enabled;
/*
* The seqcount protects updates to cputm_start and sie_block.cputm,
* this way we can have non-blocking reads with consistent values.
* Only the owning VCPU thread (vcpu->cpu) is allowed to change these
* values and to start/stop/enable/disable cpu timer accounting.
*/
seqcount_t cputm_seqcount;
__u64 cputm_start;
};
struct kvm_vm_stat {
@ -596,15 +600,11 @@ struct s390_io_adapter {
#define S390_ARCH_FAC_MASK_SIZE_U64 \
(S390_ARCH_FAC_MASK_SIZE_BYTE / sizeof(u64))
struct kvm_s390_fac {
/* facility list requested by guest */
__u64 list[S390_ARCH_FAC_LIST_SIZE_U64];
/* facility mask supported by kvm & hosting machine */
__u64 mask[S390_ARCH_FAC_LIST_SIZE_U64];
};
struct kvm_s390_cpu_model {
struct kvm_s390_fac *fac;
/* facility mask supported by kvm & hosting machine */
__u64 fac_mask[S390_ARCH_FAC_LIST_SIZE_U64];
/* facility list requested by guest (in dma page) */
__u64 *fac_list;
struct cpuid cpu_id;
unsigned short ibc;
};
@ -623,6 +623,16 @@ struct kvm_s390_crypto_cb {
__u8 reserved80[128]; /* 0x0080 */
};
/*
* sie_page2 has to be allocated as DMA because fac_list and crycb need
* 31bit addresses in the sie control block.
*/
struct sie_page2 {
__u64 fac_list[S390_ARCH_FAC_LIST_SIZE_U64]; /* 0x0000 */
struct kvm_s390_crypto_cb crycb; /* 0x0800 */
u8 reserved900[0x1000 - 0x900]; /* 0x0900 */
} __packed;
struct kvm_arch{
void *sca;
int use_esca;
@ -643,6 +653,7 @@ struct kvm_arch{
int ipte_lock_count;
struct mutex ipte_mutex;
spinlock_t start_stop_lock;
struct sie_page2 *sie_page2;
struct kvm_s390_cpu_model model;
struct kvm_s390_crypto crypto;
u64 epoch;

View File

@ -154,6 +154,7 @@ struct kvm_guest_debug_arch {
#define KVM_SYNC_PFAULT (1UL << 5)
#define KVM_SYNC_VRS (1UL << 6)
#define KVM_SYNC_RICCB (1UL << 7)
#define KVM_SYNC_FPRS (1UL << 8)
/* definition of registers in kvm_run */
struct kvm_sync_regs {
__u64 prefix; /* prefix register */
@ -168,9 +169,12 @@ struct kvm_sync_regs {
__u64 pft; /* pfault token [PFAULT] */
__u64 pfs; /* pfault select [PFAULT] */
__u64 pfc; /* pfault compare [PFAULT] */
__u64 vrs[32][2]; /* vector registers */
union {
__u64 vrs[32][2]; /* vector registers (KVM_SYNC_VRS) */
__u64 fprs[16]; /* fp registers (KVM_SYNC_FPRS) */
};
__u8 reserved[512]; /* for future vector expansion */
__u32 fpc; /* only valid with vector registers */
__u32 fpc; /* valid on KVM_SYNC_VRS or KVM_SYNC_FPRS */
__u8 padding[52]; /* riccb needs to be 64byte aligned */
__u8 riccb[64]; /* runtime instrumentation controls block */
};

View File

@ -7,6 +7,7 @@
{ 0x9c, "DIAG (0x9c) time slice end directed" }, \
{ 0x204, "DIAG (0x204) logical-cpu utilization" }, \
{ 0x258, "DIAG (0x258) page-reference services" }, \
{ 0x288, "DIAG (0x288) watchdog functions" }, \
{ 0x308, "DIAG (0x308) ipl functions" }, \
{ 0x500, "DIAG (0x500) KVM virtio functions" }, \
{ 0x501, "DIAG (0x501) KVM breakpoint" }

View File

@ -373,7 +373,7 @@ void ipte_unlock(struct kvm_vcpu *vcpu)
}
static int ar_translation(struct kvm_vcpu *vcpu, union asce *asce, ar_t ar,
int write)
enum gacc_mode mode)
{
union alet alet;
struct ale ale;
@ -454,7 +454,7 @@ static int ar_translation(struct kvm_vcpu *vcpu, union asce *asce, ar_t ar,
}
}
if (ale.fo == 1 && write)
if (ale.fo == 1 && mode == GACC_STORE)
return PGM_PROTECTION;
asce->val = aste.asce;
@ -477,25 +477,28 @@ enum {
};
static int get_vcpu_asce(struct kvm_vcpu *vcpu, union asce *asce,
ar_t ar, int write)
ar_t ar, enum gacc_mode mode)
{
int rc;
psw_t *psw = &vcpu->arch.sie_block->gpsw;
struct psw_bits psw = psw_bits(vcpu->arch.sie_block->gpsw);
struct kvm_s390_pgm_info *pgm = &vcpu->arch.pgm;
struct trans_exc_code_bits *tec_bits;
memset(pgm, 0, sizeof(*pgm));
tec_bits = (struct trans_exc_code_bits *)&pgm->trans_exc_code;
tec_bits->fsi = write ? FSI_STORE : FSI_FETCH;
tec_bits->as = psw_bits(*psw).as;
tec_bits->fsi = mode == GACC_STORE ? FSI_STORE : FSI_FETCH;
tec_bits->as = psw.as;
if (!psw_bits(*psw).t) {
if (!psw.t) {
asce->val = 0;
asce->r = 1;
return 0;
}
switch (psw_bits(vcpu->arch.sie_block->gpsw).as) {
if (mode == GACC_IFETCH)
psw.as = psw.as == PSW_AS_HOME ? PSW_AS_HOME : PSW_AS_PRIMARY;
switch (psw.as) {
case PSW_AS_PRIMARY:
asce->val = vcpu->arch.sie_block->gcr[1];
return 0;
@ -506,7 +509,7 @@ static int get_vcpu_asce(struct kvm_vcpu *vcpu, union asce *asce,
asce->val = vcpu->arch.sie_block->gcr[13];
return 0;
case PSW_AS_ACCREG:
rc = ar_translation(vcpu, asce, ar, write);
rc = ar_translation(vcpu, asce, ar, mode);
switch (rc) {
case PGM_ALEN_TRANSLATION:
case PGM_ALE_SEQUENCE:
@ -538,7 +541,7 @@ static int deref_table(struct kvm *kvm, unsigned long gpa, unsigned long *val)
* @gva: guest virtual address
* @gpa: points to where guest physical (absolute) address should be stored
* @asce: effective asce
* @write: indicates if access is a write access
* @mode: indicates the access mode to be used
*
* Translate a guest virtual address into a guest absolute address by means
* of dynamic address translation as specified by the architecture.
@ -554,7 +557,7 @@ static int deref_table(struct kvm *kvm, unsigned long gpa, unsigned long *val)
*/
static unsigned long guest_translate(struct kvm_vcpu *vcpu, unsigned long gva,
unsigned long *gpa, const union asce asce,
int write)
enum gacc_mode mode)
{
union vaddress vaddr = {.addr = gva};
union raddress raddr = {.addr = gva};
@ -699,7 +702,7 @@ static unsigned long guest_translate(struct kvm_vcpu *vcpu, unsigned long gva,
real_address:
raddr.addr = kvm_s390_real_to_abs(vcpu, raddr.addr);
absolute_address:
if (write && dat_protection)
if (mode == GACC_STORE && dat_protection)
return PGM_PROTECTION;
if (kvm_is_error_gpa(vcpu->kvm, raddr.addr))
return PGM_ADDRESSING;
@ -728,7 +731,7 @@ static int low_address_protection_enabled(struct kvm_vcpu *vcpu,
static int guest_page_range(struct kvm_vcpu *vcpu, unsigned long ga,
unsigned long *pages, unsigned long nr_pages,
const union asce asce, int write)
const union asce asce, enum gacc_mode mode)
{
struct kvm_s390_pgm_info *pgm = &vcpu->arch.pgm;
psw_t *psw = &vcpu->arch.sie_block->gpsw;
@ -740,13 +743,13 @@ static int guest_page_range(struct kvm_vcpu *vcpu, unsigned long ga,
while (nr_pages) {
ga = kvm_s390_logical_to_effective(vcpu, ga);
tec_bits->addr = ga >> PAGE_SHIFT;
if (write && lap_enabled && is_low_address(ga)) {
if (mode == GACC_STORE && lap_enabled && is_low_address(ga)) {
pgm->code = PGM_PROTECTION;
return pgm->code;
}
ga &= PAGE_MASK;
if (psw_bits(*psw).t) {
rc = guest_translate(vcpu, ga, pages, asce, write);
rc = guest_translate(vcpu, ga, pages, asce, mode);
if (rc < 0)
return rc;
if (rc == PGM_PROTECTION)
@ -768,7 +771,7 @@ static int guest_page_range(struct kvm_vcpu *vcpu, unsigned long ga,
}
int access_guest(struct kvm_vcpu *vcpu, unsigned long ga, ar_t ar, void *data,
unsigned long len, int write)
unsigned long len, enum gacc_mode mode)
{
psw_t *psw = &vcpu->arch.sie_block->gpsw;
unsigned long _len, nr_pages, gpa, idx;
@ -780,7 +783,7 @@ int access_guest(struct kvm_vcpu *vcpu, unsigned long ga, ar_t ar, void *data,
if (!len)
return 0;
rc = get_vcpu_asce(vcpu, &asce, ar, write);
rc = get_vcpu_asce(vcpu, &asce, ar, mode);
if (rc)
return rc;
nr_pages = (((ga & ~PAGE_MASK) + len - 1) >> PAGE_SHIFT) + 1;
@ -792,11 +795,11 @@ int access_guest(struct kvm_vcpu *vcpu, unsigned long ga, ar_t ar, void *data,
need_ipte_lock = psw_bits(*psw).t && !asce.r;
if (need_ipte_lock)
ipte_lock(vcpu);
rc = guest_page_range(vcpu, ga, pages, nr_pages, asce, write);
rc = guest_page_range(vcpu, ga, pages, nr_pages, asce, mode);
for (idx = 0; idx < nr_pages && !rc; idx++) {
gpa = *(pages + idx) + (ga & ~PAGE_MASK);
_len = min(PAGE_SIZE - (gpa & ~PAGE_MASK), len);
if (write)
if (mode == GACC_STORE)
rc = kvm_write_guest(vcpu->kvm, gpa, data, _len);
else
rc = kvm_read_guest(vcpu->kvm, gpa, data, _len);
@ -812,7 +815,7 @@ int access_guest(struct kvm_vcpu *vcpu, unsigned long ga, ar_t ar, void *data,
}
int access_guest_real(struct kvm_vcpu *vcpu, unsigned long gra,
void *data, unsigned long len, int write)
void *data, unsigned long len, enum gacc_mode mode)
{
unsigned long _len, gpa;
int rc = 0;
@ -820,7 +823,7 @@ int access_guest_real(struct kvm_vcpu *vcpu, unsigned long gra,
while (len && !rc) {
gpa = kvm_s390_real_to_abs(vcpu, gra);
_len = min(PAGE_SIZE - (gpa & ~PAGE_MASK), len);
if (write)
if (mode)
rc = write_guest_abs(vcpu, gpa, data, _len);
else
rc = read_guest_abs(vcpu, gpa, data, _len);
@ -841,7 +844,7 @@ int access_guest_real(struct kvm_vcpu *vcpu, unsigned long gra,
* has to take care of this.
*/
int guest_translate_address(struct kvm_vcpu *vcpu, unsigned long gva, ar_t ar,
unsigned long *gpa, int write)
unsigned long *gpa, enum gacc_mode mode)
{
struct kvm_s390_pgm_info *pgm = &vcpu->arch.pgm;
psw_t *psw = &vcpu->arch.sie_block->gpsw;
@ -851,19 +854,19 @@ int guest_translate_address(struct kvm_vcpu *vcpu, unsigned long gva, ar_t ar,
gva = kvm_s390_logical_to_effective(vcpu, gva);
tec = (struct trans_exc_code_bits *)&pgm->trans_exc_code;
rc = get_vcpu_asce(vcpu, &asce, ar, write);
rc = get_vcpu_asce(vcpu, &asce, ar, mode);
tec->addr = gva >> PAGE_SHIFT;
if (rc)
return rc;
if (is_low_address(gva) && low_address_protection_enabled(vcpu, asce)) {
if (write) {
if (mode == GACC_STORE) {
rc = pgm->code = PGM_PROTECTION;
return rc;
}
}
if (psw_bits(*psw).t && !asce.r) { /* Use DAT? */
rc = guest_translate(vcpu, gva, gpa, asce, write);
rc = guest_translate(vcpu, gva, gpa, asce, mode);
if (rc > 0) {
if (rc == PGM_PROTECTION)
tec->b61 = 1;
@ -883,7 +886,7 @@ int guest_translate_address(struct kvm_vcpu *vcpu, unsigned long gva, ar_t ar,
* check_gva_range - test a range of guest virtual addresses for accessibility
*/
int check_gva_range(struct kvm_vcpu *vcpu, unsigned long gva, ar_t ar,
unsigned long length, int is_write)
unsigned long length, enum gacc_mode mode)
{
unsigned long gpa;
unsigned long currlen;
@ -892,7 +895,7 @@ int check_gva_range(struct kvm_vcpu *vcpu, unsigned long gva, ar_t ar,
ipte_lock(vcpu);
while (length > 0 && !rc) {
currlen = min(length, PAGE_SIZE - (gva % PAGE_SIZE));
rc = guest_translate_address(vcpu, gva, ar, &gpa, is_write);
rc = guest_translate_address(vcpu, gva, ar, &gpa, mode);
gva += currlen;
length -= currlen;
}

View File

@ -155,16 +155,22 @@ int read_guest_lc(struct kvm_vcpu *vcpu, unsigned long gra, void *data,
return kvm_read_guest(vcpu->kvm, gpa, data, len);
}
enum gacc_mode {
GACC_FETCH,
GACC_STORE,
GACC_IFETCH,
};
int guest_translate_address(struct kvm_vcpu *vcpu, unsigned long gva,
ar_t ar, unsigned long *gpa, int write);
ar_t ar, unsigned long *gpa, enum gacc_mode mode);
int check_gva_range(struct kvm_vcpu *vcpu, unsigned long gva, ar_t ar,
unsigned long length, int is_write);
unsigned long length, enum gacc_mode mode);
int access_guest(struct kvm_vcpu *vcpu, unsigned long ga, ar_t ar, void *data,
unsigned long len, int write);
unsigned long len, enum gacc_mode mode);
int access_guest_real(struct kvm_vcpu *vcpu, unsigned long gra,
void *data, unsigned long len, int write);
void *data, unsigned long len, enum gacc_mode mode);
/**
* write_guest - copy data from kernel space to guest space
@ -215,7 +221,7 @@ static inline __must_check
int write_guest(struct kvm_vcpu *vcpu, unsigned long ga, ar_t ar, void *data,
unsigned long len)
{
return access_guest(vcpu, ga, ar, data, len, 1);
return access_guest(vcpu, ga, ar, data, len, GACC_STORE);
}
/**
@ -235,7 +241,27 @@ static inline __must_check
int read_guest(struct kvm_vcpu *vcpu, unsigned long ga, ar_t ar, void *data,
unsigned long len)
{
return access_guest(vcpu, ga, ar, data, len, 0);
return access_guest(vcpu, ga, ar, data, len, GACC_FETCH);
}
/**
* read_guest_instr - copy instruction data from guest space to kernel space
* @vcpu: virtual cpu
* @data: destination address in kernel space
* @len: number of bytes to copy
*
* Copy @len bytes from the current psw address (guest space) to @data (kernel
* space).
*
* The behaviour of read_guest_instr is identical to read_guest, except that
* instruction data will be read from primary space when in home-space or
* address-space mode.
*/
static inline __must_check
int read_guest_instr(struct kvm_vcpu *vcpu, void *data, unsigned long len)
{
return access_guest(vcpu, vcpu->arch.sie_block->gpsw.addr, 0, data, len,
GACC_IFETCH);
}
/**

View File

@ -38,17 +38,32 @@ static const intercept_handler_t instruction_handlers[256] = {
[0xeb] = kvm_s390_handle_eb,
};
void kvm_s390_rewind_psw(struct kvm_vcpu *vcpu, int ilc)
u8 kvm_s390_get_ilen(struct kvm_vcpu *vcpu)
{
struct kvm_s390_sie_block *sie_block = vcpu->arch.sie_block;
u8 ilen = 0;
/* Use the length of the EXECUTE instruction if necessary */
if (sie_block->icptstatus & 1) {
ilc = (sie_block->icptstatus >> 4) & 0x6;
if (!ilc)
ilc = 4;
switch (vcpu->arch.sie_block->icptcode) {
case ICPT_INST:
case ICPT_INSTPROGI:
case ICPT_OPEREXC:
case ICPT_PARTEXEC:
case ICPT_IOINST:
/* instruction only stored for these icptcodes */
ilen = insn_length(vcpu->arch.sie_block->ipa >> 8);
/* Use the length of the EXECUTE instruction if necessary */
if (sie_block->icptstatus & 1) {
ilen = (sie_block->icptstatus >> 4) & 0x6;
if (!ilen)
ilen = 4;
}
break;
case ICPT_PROGI:
/* bit 1+2 of pgmilc are the ilc, so we directly get ilen */
ilen = vcpu->arch.sie_block->pgmilc & 0x6;
break;
}
sie_block->gpsw.addr = __rewind_psw(sie_block->gpsw, ilc);
return ilen;
}
static int handle_noop(struct kvm_vcpu *vcpu)
@ -121,11 +136,13 @@ static int handle_instruction(struct kvm_vcpu *vcpu)
return -EOPNOTSUPP;
}
static void __extract_prog_irq(struct kvm_vcpu *vcpu,
struct kvm_s390_pgm_info *pgm_info)
static int inject_prog_on_prog_intercept(struct kvm_vcpu *vcpu)
{
memset(pgm_info, 0, sizeof(struct kvm_s390_pgm_info));
pgm_info->code = vcpu->arch.sie_block->iprcc;
struct kvm_s390_pgm_info pgm_info = {
.code = vcpu->arch.sie_block->iprcc,
/* the PSW has already been rewound */
.flags = KVM_S390_PGM_FLAGS_NO_REWIND,
};
switch (vcpu->arch.sie_block->iprcc & ~PGM_PER) {
case PGM_AFX_TRANSLATION:
@ -138,7 +155,7 @@ static void __extract_prog_irq(struct kvm_vcpu *vcpu,
case PGM_PRIMARY_AUTHORITY:
case PGM_SECONDARY_AUTHORITY:
case PGM_SPACE_SWITCH:
pgm_info->trans_exc_code = vcpu->arch.sie_block->tecmc;
pgm_info.trans_exc_code = vcpu->arch.sie_block->tecmc;
break;
case PGM_ALEN_TRANSLATION:
case PGM_ALE_SEQUENCE:
@ -146,7 +163,7 @@ static void __extract_prog_irq(struct kvm_vcpu *vcpu,
case PGM_ASTE_SEQUENCE:
case PGM_ASTE_VALIDITY:
case PGM_EXTENDED_AUTHORITY:
pgm_info->exc_access_id = vcpu->arch.sie_block->eai;
pgm_info.exc_access_id = vcpu->arch.sie_block->eai;
break;
case PGM_ASCE_TYPE:
case PGM_PAGE_TRANSLATION:
@ -154,32 +171,33 @@ static void __extract_prog_irq(struct kvm_vcpu *vcpu,
case PGM_REGION_SECOND_TRANS:
case PGM_REGION_THIRD_TRANS:
case PGM_SEGMENT_TRANSLATION:
pgm_info->trans_exc_code = vcpu->arch.sie_block->tecmc;
pgm_info->exc_access_id = vcpu->arch.sie_block->eai;
pgm_info->op_access_id = vcpu->arch.sie_block->oai;
pgm_info.trans_exc_code = vcpu->arch.sie_block->tecmc;
pgm_info.exc_access_id = vcpu->arch.sie_block->eai;
pgm_info.op_access_id = vcpu->arch.sie_block->oai;
break;
case PGM_MONITOR:
pgm_info->mon_class_nr = vcpu->arch.sie_block->mcn;
pgm_info->mon_code = vcpu->arch.sie_block->tecmc;
pgm_info.mon_class_nr = vcpu->arch.sie_block->mcn;
pgm_info.mon_code = vcpu->arch.sie_block->tecmc;
break;
case PGM_VECTOR_PROCESSING:
case PGM_DATA:
pgm_info->data_exc_code = vcpu->arch.sie_block->dxc;
pgm_info.data_exc_code = vcpu->arch.sie_block->dxc;
break;
case PGM_PROTECTION:
pgm_info->trans_exc_code = vcpu->arch.sie_block->tecmc;
pgm_info->exc_access_id = vcpu->arch.sie_block->eai;
pgm_info.trans_exc_code = vcpu->arch.sie_block->tecmc;
pgm_info.exc_access_id = vcpu->arch.sie_block->eai;
break;
default:
break;
}
if (vcpu->arch.sie_block->iprcc & PGM_PER) {
pgm_info->per_code = vcpu->arch.sie_block->perc;
pgm_info->per_atmid = vcpu->arch.sie_block->peratmid;
pgm_info->per_address = vcpu->arch.sie_block->peraddr;
pgm_info->per_access_id = vcpu->arch.sie_block->peraid;
pgm_info.per_code = vcpu->arch.sie_block->perc;
pgm_info.per_atmid = vcpu->arch.sie_block->peratmid;
pgm_info.per_address = vcpu->arch.sie_block->peraddr;
pgm_info.per_access_id = vcpu->arch.sie_block->peraid;
}
return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
}
/*
@ -208,7 +226,6 @@ static int handle_itdb(struct kvm_vcpu *vcpu)
static int handle_prog(struct kvm_vcpu *vcpu)
{
struct kvm_s390_pgm_info pgm_info;
psw_t psw;
int rc;
@ -234,8 +251,7 @@ static int handle_prog(struct kvm_vcpu *vcpu)
if (rc)
return rc;
__extract_prog_irq(vcpu, &pgm_info);
return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
return inject_prog_on_prog_intercept(vcpu);
}
/**
@ -302,7 +318,7 @@ static int handle_mvpg_pei(struct kvm_vcpu *vcpu)
/* Make sure that the source is paged-in */
rc = guest_translate_address(vcpu, vcpu->run->s.regs.gprs[reg2],
reg2, &srcaddr, 0);
reg2, &srcaddr, GACC_FETCH);
if (rc)
return kvm_s390_inject_prog_cond(vcpu, rc);
rc = kvm_arch_fault_in_page(vcpu, srcaddr, 0);
@ -311,14 +327,14 @@ static int handle_mvpg_pei(struct kvm_vcpu *vcpu)
/* Make sure that the destination is paged-in */
rc = guest_translate_address(vcpu, vcpu->run->s.regs.gprs[reg1],
reg1, &dstaddr, 1);
reg1, &dstaddr, GACC_STORE);
if (rc)
return kvm_s390_inject_prog_cond(vcpu, rc);
rc = kvm_arch_fault_in_page(vcpu, dstaddr, 1);
if (rc != 0)
return rc;
kvm_s390_rewind_psw(vcpu, 4);
kvm_s390_retry_instr(vcpu);
return 0;
}

View File

@ -182,8 +182,9 @@ static int cpu_timer_interrupts_enabled(struct kvm_vcpu *vcpu)
static int cpu_timer_irq_pending(struct kvm_vcpu *vcpu)
{
return (vcpu->arch.sie_block->cputm >> 63) &&
cpu_timer_interrupts_enabled(vcpu);
if (!cpu_timer_interrupts_enabled(vcpu))
return 0;
return kvm_s390_get_cpu_timer(vcpu) >> 63;
}
static inline int is_ioirq(unsigned long irq_type)
@ -335,23 +336,6 @@ static void set_intercept_indicators(struct kvm_vcpu *vcpu)
set_intercept_indicators_stop(vcpu);
}
static u16 get_ilc(struct kvm_vcpu *vcpu)
{
switch (vcpu->arch.sie_block->icptcode) {
case ICPT_INST:
case ICPT_INSTPROGI:
case ICPT_OPEREXC:
case ICPT_PARTEXEC:
case ICPT_IOINST:
/* last instruction only stored for these icptcodes */
return insn_length(vcpu->arch.sie_block->ipa >> 8);
case ICPT_PROGI:
return vcpu->arch.sie_block->pgmilc;
default:
return 0;
}
}
static int __must_check __deliver_cpu_timer(struct kvm_vcpu *vcpu)
{
struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
@ -588,7 +572,7 @@ static int __must_check __deliver_prog(struct kvm_vcpu *vcpu)
struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
struct kvm_s390_pgm_info pgm_info;
int rc = 0, nullifying = false;
u16 ilc = get_ilc(vcpu);
u16 ilen;
spin_lock(&li->lock);
pgm_info = li->irq.pgm;
@ -596,8 +580,9 @@ static int __must_check __deliver_prog(struct kvm_vcpu *vcpu)
memset(&li->irq.pgm, 0, sizeof(pgm_info));
spin_unlock(&li->lock);
VCPU_EVENT(vcpu, 3, "deliver: program irq code 0x%x, ilc:%d",
pgm_info.code, ilc);
ilen = pgm_info.flags & KVM_S390_PGM_FLAGS_ILC_MASK;
VCPU_EVENT(vcpu, 3, "deliver: program irq code 0x%x, ilen:%d",
pgm_info.code, ilen);
vcpu->stat.deliver_program_int++;
trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, KVM_S390_PROGRAM_INT,
pgm_info.code, 0);
@ -681,10 +666,11 @@ static int __must_check __deliver_prog(struct kvm_vcpu *vcpu)
(u8 *) __LC_PER_ACCESS_ID);
}
if (nullifying && vcpu->arch.sie_block->icptcode == ICPT_INST)
kvm_s390_rewind_psw(vcpu, ilc);
if (nullifying && !(pgm_info.flags & KVM_S390_PGM_FLAGS_NO_REWIND))
kvm_s390_rewind_psw(vcpu, ilen);
rc |= put_guest_lc(vcpu, ilc, (u16 *) __LC_PGM_ILC);
/* bit 1+2 of the target are the ilc, so we can directly use ilen */
rc |= put_guest_lc(vcpu, ilen, (u16 *) __LC_PGM_ILC);
rc |= put_guest_lc(vcpu, vcpu->arch.sie_block->gbea,
(u64 *) __LC_LAST_BREAK);
rc |= put_guest_lc(vcpu, pgm_info.code,
@ -923,9 +909,35 @@ int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
return ckc_irq_pending(vcpu) || cpu_timer_irq_pending(vcpu);
}
static u64 __calculate_sltime(struct kvm_vcpu *vcpu)
{
u64 now, cputm, sltime = 0;
if (ckc_interrupts_enabled(vcpu)) {
now = kvm_s390_get_tod_clock_fast(vcpu->kvm);
sltime = tod_to_ns(vcpu->arch.sie_block->ckc - now);
/* already expired or overflow? */
if (!sltime || vcpu->arch.sie_block->ckc <= now)
return 0;
if (cpu_timer_interrupts_enabled(vcpu)) {
cputm = kvm_s390_get_cpu_timer(vcpu);
/* already expired? */
if (cputm >> 63)
return 0;
return min(sltime, tod_to_ns(cputm));
}
} else if (cpu_timer_interrupts_enabled(vcpu)) {
sltime = kvm_s390_get_cpu_timer(vcpu);
/* already expired? */
if (sltime >> 63)
return 0;
}
return sltime;
}
int kvm_s390_handle_wait(struct kvm_vcpu *vcpu)
{
u64 now, sltime;
u64 sltime;
vcpu->stat.exit_wait_state++;
@ -938,22 +950,20 @@ int kvm_s390_handle_wait(struct kvm_vcpu *vcpu)
return -EOPNOTSUPP; /* disabled wait */
}
if (!ckc_interrupts_enabled(vcpu)) {
if (!ckc_interrupts_enabled(vcpu) &&
!cpu_timer_interrupts_enabled(vcpu)) {
VCPU_EVENT(vcpu, 3, "%s", "enabled wait w/o timer");
__set_cpu_idle(vcpu);
goto no_timer;
}
now = kvm_s390_get_tod_clock_fast(vcpu->kvm);
sltime = tod_to_ns(vcpu->arch.sie_block->ckc - now);
/* underflow */
if (vcpu->arch.sie_block->ckc < now)
sltime = __calculate_sltime(vcpu);
if (!sltime)
return 0;
__set_cpu_idle(vcpu);
hrtimer_start(&vcpu->arch.ckc_timer, ktime_set (0, sltime) , HRTIMER_MODE_REL);
VCPU_EVENT(vcpu, 4, "enabled wait via clock comparator: %llu ns", sltime);
VCPU_EVENT(vcpu, 4, "enabled wait: %llu ns", sltime);
no_timer:
srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
kvm_vcpu_block(vcpu);
@ -980,18 +990,16 @@ void kvm_s390_vcpu_wakeup(struct kvm_vcpu *vcpu)
enum hrtimer_restart kvm_s390_idle_wakeup(struct hrtimer *timer)
{
struct kvm_vcpu *vcpu;
u64 now, sltime;
u64 sltime;
vcpu = container_of(timer, struct kvm_vcpu, arch.ckc_timer);
now = kvm_s390_get_tod_clock_fast(vcpu->kvm);
sltime = tod_to_ns(vcpu->arch.sie_block->ckc - now);
sltime = __calculate_sltime(vcpu);
/*
* If the monotonic clock runs faster than the tod clock we might be
* woken up too early and have to go back to sleep to avoid deadlocks.
*/
if (vcpu->arch.sie_block->ckc > now &&
hrtimer_forward_now(timer, ns_to_ktime(sltime)))
if (sltime && hrtimer_forward_now(timer, ns_to_ktime(sltime)))
return HRTIMER_RESTART;
kvm_s390_vcpu_wakeup(vcpu);
return HRTIMER_NORESTART;
@ -1059,8 +1067,16 @@ static int __inject_prog(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq)
trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_PROGRAM_INT,
irq->u.pgm.code, 0);
if (!(irq->u.pgm.flags & KVM_S390_PGM_FLAGS_ILC_VALID)) {
/* auto detection if no valid ILC was given */
irq->u.pgm.flags &= ~KVM_S390_PGM_FLAGS_ILC_MASK;
irq->u.pgm.flags |= kvm_s390_get_ilen(vcpu);
irq->u.pgm.flags |= KVM_S390_PGM_FLAGS_ILC_VALID;
}
if (irq->u.pgm.code == PGM_PER) {
li->irq.pgm.code |= PGM_PER;
li->irq.pgm.flags = irq->u.pgm.flags;
/* only modify PER related information */
li->irq.pgm.per_address = irq->u.pgm.per_address;
li->irq.pgm.per_code = irq->u.pgm.per_code;
@ -1069,6 +1085,7 @@ static int __inject_prog(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq)
} else if (!(irq->u.pgm.code & PGM_PER)) {
li->irq.pgm.code = (li->irq.pgm.code & PGM_PER) |
irq->u.pgm.code;
li->irq.pgm.flags = irq->u.pgm.flags;
/* only modify non-PER information */
li->irq.pgm.trans_exc_code = irq->u.pgm.trans_exc_code;
li->irq.pgm.mon_code = irq->u.pgm.mon_code;

View File

@ -158,6 +158,8 @@ static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
kvm->arch.epoch -= *delta;
kvm_for_each_vcpu(i, vcpu, kvm) {
vcpu->arch.sie_block->epoch -= *delta;
if (vcpu->arch.cputm_enabled)
vcpu->arch.cputm_start += *delta;
}
}
return NOTIFY_OK;
@ -274,7 +276,6 @@ static void kvm_s390_sync_dirty_log(struct kvm *kvm,
unsigned long address;
struct gmap *gmap = kvm->arch.gmap;
down_read(&gmap->mm->mmap_sem);
/* Loop over all guest pages */
last_gfn = memslot->base_gfn + memslot->npages;
for (cur_gfn = memslot->base_gfn; cur_gfn <= last_gfn; cur_gfn++) {
@ -282,8 +283,10 @@ static void kvm_s390_sync_dirty_log(struct kvm *kvm,
if (gmap_test_and_clear_dirty(address, gmap))
mark_page_dirty(kvm, cur_gfn);
if (fatal_signal_pending(current))
return;
cond_resched();
}
up_read(&gmap->mm->mmap_sem);
}
/* Section: vm related */
@ -352,8 +355,8 @@ static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
if (atomic_read(&kvm->online_vcpus)) {
r = -EBUSY;
} else if (MACHINE_HAS_VX) {
set_kvm_facility(kvm->arch.model.fac->mask, 129);
set_kvm_facility(kvm->arch.model.fac->list, 129);
set_kvm_facility(kvm->arch.model.fac_mask, 129);
set_kvm_facility(kvm->arch.model.fac_list, 129);
r = 0;
} else
r = -EINVAL;
@ -367,8 +370,8 @@ static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
if (atomic_read(&kvm->online_vcpus)) {
r = -EBUSY;
} else if (test_facility(64)) {
set_kvm_facility(kvm->arch.model.fac->mask, 64);
set_kvm_facility(kvm->arch.model.fac->list, 64);
set_kvm_facility(kvm->arch.model.fac_mask, 64);
set_kvm_facility(kvm->arch.model.fac_list, 64);
r = 0;
}
mutex_unlock(&kvm->lock);
@ -651,7 +654,7 @@ static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
memcpy(&kvm->arch.model.cpu_id, &proc->cpuid,
sizeof(struct cpuid));
kvm->arch.model.ibc = proc->ibc;
memcpy(kvm->arch.model.fac->list, proc->fac_list,
memcpy(kvm->arch.model.fac_list, proc->fac_list,
S390_ARCH_FAC_LIST_SIZE_BYTE);
} else
ret = -EFAULT;
@ -685,7 +688,8 @@ static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
}
memcpy(&proc->cpuid, &kvm->arch.model.cpu_id, sizeof(struct cpuid));
proc->ibc = kvm->arch.model.ibc;
memcpy(&proc->fac_list, kvm->arch.model.fac->list, S390_ARCH_FAC_LIST_SIZE_BYTE);
memcpy(&proc->fac_list, kvm->arch.model.fac_list,
S390_ARCH_FAC_LIST_SIZE_BYTE);
if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
ret = -EFAULT;
kfree(proc);
@ -705,7 +709,7 @@ static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
}
get_cpu_id((struct cpuid *) &mach->cpuid);
mach->ibc = sclp.ibc;
memcpy(&mach->fac_mask, kvm->arch.model.fac->mask,
memcpy(&mach->fac_mask, kvm->arch.model.fac_mask,
S390_ARCH_FAC_LIST_SIZE_BYTE);
memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list,
S390_ARCH_FAC_LIST_SIZE_BYTE);
@ -1082,16 +1086,12 @@ static void kvm_s390_get_cpu_id(struct cpuid *cpu_id)
cpu_id->version = 0xff;
}
static int kvm_s390_crypto_init(struct kvm *kvm)
static void kvm_s390_crypto_init(struct kvm *kvm)
{
if (!test_kvm_facility(kvm, 76))
return 0;
kvm->arch.crypto.crycb = kzalloc(sizeof(*kvm->arch.crypto.crycb),
GFP_KERNEL | GFP_DMA);
if (!kvm->arch.crypto.crycb)
return -ENOMEM;
return;
kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb;
kvm_s390_set_crycb_format(kvm);
/* Enable AES/DEA protected key functions by default */
@ -1101,8 +1101,6 @@ static int kvm_s390_crypto_init(struct kvm *kvm)
sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
return 0;
}
static void sca_dispose(struct kvm *kvm)
@ -1156,37 +1154,30 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
if (!kvm->arch.dbf)
goto out_err;
/*
* The architectural maximum amount of facilities is 16 kbit. To store
* this amount, 2 kbyte of memory is required. Thus we need a full
* page to hold the guest facility list (arch.model.fac->list) and the
* facility mask (arch.model.fac->mask). Its address size has to be
* 31 bits and word aligned.
*/
kvm->arch.model.fac =
(struct kvm_s390_fac *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
if (!kvm->arch.model.fac)
kvm->arch.sie_page2 =
(struct sie_page2 *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
if (!kvm->arch.sie_page2)
goto out_err;
/* Populate the facility mask initially. */
memcpy(kvm->arch.model.fac->mask, S390_lowcore.stfle_fac_list,
memcpy(kvm->arch.model.fac_mask, S390_lowcore.stfle_fac_list,
S390_ARCH_FAC_LIST_SIZE_BYTE);
for (i = 0; i < S390_ARCH_FAC_LIST_SIZE_U64; i++) {
if (i < kvm_s390_fac_list_mask_size())
kvm->arch.model.fac->mask[i] &= kvm_s390_fac_list_mask[i];
kvm->arch.model.fac_mask[i] &= kvm_s390_fac_list_mask[i];
else
kvm->arch.model.fac->mask[i] = 0UL;
kvm->arch.model.fac_mask[i] = 0UL;
}
/* Populate the facility list initially. */
memcpy(kvm->arch.model.fac->list, kvm->arch.model.fac->mask,
kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list;
memcpy(kvm->arch.model.fac_list, kvm->arch.model.fac_mask,
S390_ARCH_FAC_LIST_SIZE_BYTE);
kvm_s390_get_cpu_id(&kvm->arch.model.cpu_id);
kvm->arch.model.ibc = sclp.ibc & 0x0fff;
if (kvm_s390_crypto_init(kvm) < 0)
goto out_err;
kvm_s390_crypto_init(kvm);
spin_lock_init(&kvm->arch.float_int.lock);
for (i = 0; i < FIRQ_LIST_COUNT; i++)
@ -1222,8 +1213,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
return 0;
out_err:
kfree(kvm->arch.crypto.crycb);
free_page((unsigned long)kvm->arch.model.fac);
free_page((unsigned long)kvm->arch.sie_page2);
debug_unregister(kvm->arch.dbf);
sca_dispose(kvm);
KVM_EVENT(3, "creation of vm failed: %d", rc);
@ -1269,10 +1259,9 @@ static void kvm_free_vcpus(struct kvm *kvm)
void kvm_arch_destroy_vm(struct kvm *kvm)
{
kvm_free_vcpus(kvm);
free_page((unsigned long)kvm->arch.model.fac);
sca_dispose(kvm);
debug_unregister(kvm->arch.dbf);
kfree(kvm->arch.crypto.crycb);
free_page((unsigned long)kvm->arch.sie_page2);
if (!kvm_is_ucontrol(kvm))
gmap_free(kvm->arch.gmap);
kvm_s390_destroy_adapters(kvm);
@ -1414,8 +1403,13 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
KVM_SYNC_PFAULT;
if (test_kvm_facility(vcpu->kvm, 64))
vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
if (test_kvm_facility(vcpu->kvm, 129))
/* fprs can be synchronized via vrs, even if the guest has no vx. With
* MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format.
*/
if (MACHINE_HAS_VX)
vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
else
vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS;
if (kvm_is_ucontrol(vcpu->kvm))
return __kvm_ucontrol_vcpu_init(vcpu);
@ -1423,6 +1417,93 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
return 0;
}
/* needs disabled preemption to protect from TOD sync and vcpu_load/put */
static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu)
{
WARN_ON_ONCE(vcpu->arch.cputm_start != 0);
raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
vcpu->arch.cputm_start = get_tod_clock_fast();
raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
}
/* needs disabled preemption to protect from TOD sync and vcpu_load/put */
static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu)
{
WARN_ON_ONCE(vcpu->arch.cputm_start == 0);
raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start;
vcpu->arch.cputm_start = 0;
raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
}
/* needs disabled preemption to protect from TOD sync and vcpu_load/put */
static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
{
WARN_ON_ONCE(vcpu->arch.cputm_enabled);
vcpu->arch.cputm_enabled = true;
__start_cpu_timer_accounting(vcpu);
}
/* needs disabled preemption to protect from TOD sync and vcpu_load/put */
static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
{
WARN_ON_ONCE(!vcpu->arch.cputm_enabled);
__stop_cpu_timer_accounting(vcpu);
vcpu->arch.cputm_enabled = false;
}
static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
{
preempt_disable(); /* protect from TOD sync and vcpu_load/put */
__enable_cpu_timer_accounting(vcpu);
preempt_enable();
}
static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
{
preempt_disable(); /* protect from TOD sync and vcpu_load/put */
__disable_cpu_timer_accounting(vcpu);
preempt_enable();
}
/* set the cpu timer - may only be called from the VCPU thread itself */
void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm)
{
preempt_disable(); /* protect from TOD sync and vcpu_load/put */
raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
if (vcpu->arch.cputm_enabled)
vcpu->arch.cputm_start = get_tod_clock_fast();
vcpu->arch.sie_block->cputm = cputm;
raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
preempt_enable();
}
/* update and get the cpu timer - can also be called from other VCPU threads */
__u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu)
{
unsigned int seq;
__u64 value;
if (unlikely(!vcpu->arch.cputm_enabled))
return vcpu->arch.sie_block->cputm;
preempt_disable(); /* protect from TOD sync and vcpu_load/put */
do {
seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount);
/*
* If the writer would ever execute a read in the critical
* section, e.g. in irq context, we have a deadlock.
*/
WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu);
value = vcpu->arch.sie_block->cputm;
/* if cputm_start is 0, accounting is being started/stopped */
if (likely(vcpu->arch.cputm_start))
value -= get_tod_clock_fast() - vcpu->arch.cputm_start;
} while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1));
preempt_enable();
return value;
}
void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
{
/* Save host register state */
@ -1430,10 +1511,10 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc;
vcpu->arch.host_fpregs.regs = current->thread.fpu.regs;
/* Depending on MACHINE_HAS_VX, data stored to vrs either
* has vector register or floating point register format.
*/
current->thread.fpu.regs = vcpu->run->s.regs.vrs;
if (MACHINE_HAS_VX)
current->thread.fpu.regs = vcpu->run->s.regs.vrs;
else
current->thread.fpu.regs = vcpu->run->s.regs.fprs;
current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
if (test_fp_ctl(current->thread.fpu.fpc))
/* User space provided an invalid FPC, let's clear it */
@ -1443,10 +1524,16 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
restore_access_regs(vcpu->run->s.regs.acrs);
gmap_enable(vcpu->arch.gmap);
atomic_or(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
__start_cpu_timer_accounting(vcpu);
vcpu->cpu = cpu;
}
void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
{
vcpu->cpu = -1;
if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
__stop_cpu_timer_accounting(vcpu);
atomic_andnot(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
gmap_disable(vcpu->arch.gmap);
@ -1468,7 +1555,7 @@ static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
vcpu->arch.sie_block->gpsw.mask = 0UL;
vcpu->arch.sie_block->gpsw.addr = 0UL;
kvm_s390_set_prefix(vcpu, 0);
vcpu->arch.sie_block->cputm = 0UL;
kvm_s390_set_cpu_timer(vcpu, 0);
vcpu->arch.sie_block->ckc = 0UL;
vcpu->arch.sie_block->todpr = 0;
memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64));
@ -1538,7 +1625,8 @@ static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
vcpu->arch.cpu_id = model->cpu_id;
vcpu->arch.sie_block->ibc = model->ibc;
vcpu->arch.sie_block->fac = (int) (long) model->fac->list;
if (test_kvm_facility(vcpu->kvm, 7))
vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list;
}
int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
@ -1616,6 +1704,7 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
vcpu->arch.local_int.float_int = &kvm->arch.float_int;
vcpu->arch.local_int.wq = &vcpu->wq;
vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags;
seqcount_init(&vcpu->arch.cputm_seqcount);
rc = kvm_vcpu_init(vcpu, kvm, id);
if (rc)
@ -1715,7 +1804,7 @@ static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
(u64 __user *)reg->addr);
break;
case KVM_REG_S390_CPU_TIMER:
r = put_user(vcpu->arch.sie_block->cputm,
r = put_user(kvm_s390_get_cpu_timer(vcpu),
(u64 __user *)reg->addr);
break;
case KVM_REG_S390_CLOCK_COMP:
@ -1753,6 +1842,7 @@ static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
struct kvm_one_reg *reg)
{
int r = -EINVAL;
__u64 val;
switch (reg->id) {
case KVM_REG_S390_TODPR:
@ -1764,8 +1854,9 @@ static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
(u64 __user *)reg->addr);
break;
case KVM_REG_S390_CPU_TIMER:
r = get_user(vcpu->arch.sie_block->cputm,
(u64 __user *)reg->addr);
r = get_user(val, (u64 __user *)reg->addr);
if (!r)
kvm_s390_set_cpu_timer(vcpu, val);
break;
case KVM_REG_S390_CLOCK_COMP:
r = get_user(vcpu->arch.sie_block->ckc,
@ -2158,8 +2249,10 @@ static int vcpu_pre_run(struct kvm_vcpu *vcpu)
static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
{
psw_t *psw = &vcpu->arch.sie_block->gpsw;
u8 opcode;
struct kvm_s390_pgm_info pgm_info = {
.code = PGM_ADDRESSING,
};
u8 opcode, ilen;
int rc;
VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
@ -2173,12 +2266,21 @@ static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
* to look up the current opcode to get the length of the instruction
* to be able to forward the PSW.
*/
rc = read_guest(vcpu, psw->addr, 0, &opcode, 1);
if (rc)
return kvm_s390_inject_prog_cond(vcpu, rc);
psw->addr = __rewind_psw(*psw, -insn_length(opcode));
return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
rc = read_guest_instr(vcpu, &opcode, 1);
ilen = insn_length(opcode);
if (rc < 0) {
return rc;
} else if (rc) {
/* Instruction-Fetching Exceptions - we can't detect the ilen.
* Forward by arbitrary ilc, injection will take care of
* nullification if necessary.
*/
pgm_info = vcpu->arch.pgm;
ilen = 4;
}
pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID;
kvm_s390_forward_psw(vcpu, ilen);
return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
}
static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
@ -2244,10 +2346,12 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
*/
local_irq_disable();
__kvm_guest_enter();
__disable_cpu_timer_accounting(vcpu);
local_irq_enable();
exit_reason = sie64a(vcpu->arch.sie_block,
vcpu->run->s.regs.gprs);
local_irq_disable();
__enable_cpu_timer_accounting(vcpu);
__kvm_guest_exit();
local_irq_enable();
vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
@ -2271,7 +2375,7 @@ static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
}
if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
vcpu->arch.sie_block->cputm = kvm_run->s.regs.cputm;
kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm);
vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
@ -2293,7 +2397,7 @@ static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
kvm_run->s.regs.cputm = vcpu->arch.sie_block->cputm;
kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu);
kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
@ -2325,6 +2429,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
}
sync_regs(vcpu, kvm_run);
enable_cpu_timer_accounting(vcpu);
might_fault();
rc = __vcpu_run(vcpu);
@ -2344,6 +2449,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
rc = 0;
}
disable_cpu_timer_accounting(vcpu);
store_regs(vcpu, kvm_run);
if (vcpu->sigset_active)
@ -2364,7 +2470,7 @@ int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
unsigned char archmode = 1;
freg_t fprs[NUM_FPRS];
unsigned int px;
u64 clkcomp;
u64 clkcomp, cputm;
int rc;
px = kvm_s390_get_prefix(vcpu);
@ -2386,7 +2492,7 @@ int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
fprs, 128);
} else {
rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
vcpu->run->s.regs.vrs, 128);
vcpu->run->s.regs.fprs, 128);
}
rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA,
vcpu->run->s.regs.gprs, 128);
@ -2398,8 +2504,9 @@ int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
&vcpu->run->s.regs.fpc, 4);
rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA,
&vcpu->arch.sie_block->todpr, 4);
cputm = kvm_s390_get_cpu_timer(vcpu);
rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA,
&vcpu->arch.sie_block->cputm, 8);
&cputm, 8);
clkcomp = vcpu->arch.sie_block->ckc >> 8;
rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA,
&clkcomp, 8);
@ -2605,7 +2712,8 @@ static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
switch (mop->op) {
case KVM_S390_MEMOP_LOGICAL_READ:
if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
r = check_gva_range(vcpu, mop->gaddr, mop->ar, mop->size, false);
r = check_gva_range(vcpu, mop->gaddr, mop->ar,
mop->size, GACC_FETCH);
break;
}
r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
@ -2616,7 +2724,8 @@ static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
break;
case KVM_S390_MEMOP_LOGICAL_WRITE:
if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
r = check_gva_range(vcpu, mop->gaddr, mop->ar, mop->size, true);
r = check_gva_range(vcpu, mop->gaddr, mop->ar,
mop->size, GACC_STORE);
break;
}
if (copy_from_user(tmpbuf, uaddr, mop->size)) {

View File

@ -19,6 +19,7 @@
#include <linux/kvm.h>
#include <linux/kvm_host.h>
#include <asm/facility.h>
#include <asm/processor.h>
typedef int (*intercept_handler_t)(struct kvm_vcpu *vcpu);
@ -53,6 +54,11 @@ static inline int is_vcpu_stopped(struct kvm_vcpu *vcpu)
return atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_STOPPED;
}
static inline int is_vcpu_idle(struct kvm_vcpu *vcpu)
{
return atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_WAIT;
}
static inline int kvm_is_ucontrol(struct kvm *kvm)
{
#ifdef CONFIG_KVM_S390_UCONTROL
@ -154,8 +160,8 @@ static inline void kvm_s390_set_psw_cc(struct kvm_vcpu *vcpu, unsigned long cc)
/* test availability of facility in a kvm instance */
static inline int test_kvm_facility(struct kvm *kvm, unsigned long nr)
{
return __test_facility(nr, kvm->arch.model.fac->mask) &&
__test_facility(nr, kvm->arch.model.fac->list);
return __test_facility(nr, kvm->arch.model.fac_mask) &&
__test_facility(nr, kvm->arch.model.fac_list);
}
static inline int set_kvm_facility(u64 *fac_list, unsigned long nr)
@ -212,8 +218,22 @@ int kvm_s390_reinject_io_int(struct kvm *kvm,
int kvm_s390_mask_adapter(struct kvm *kvm, unsigned int id, bool masked);
/* implemented in intercept.c */
void kvm_s390_rewind_psw(struct kvm_vcpu *vcpu, int ilc);
u8 kvm_s390_get_ilen(struct kvm_vcpu *vcpu);
int kvm_handle_sie_intercept(struct kvm_vcpu *vcpu);
static inline void kvm_s390_rewind_psw(struct kvm_vcpu *vcpu, int ilen)
{
struct kvm_s390_sie_block *sie_block = vcpu->arch.sie_block;
sie_block->gpsw.addr = __rewind_psw(sie_block->gpsw, ilen);
}
static inline void kvm_s390_forward_psw(struct kvm_vcpu *vcpu, int ilen)
{
kvm_s390_rewind_psw(vcpu, -ilen);
}
static inline void kvm_s390_retry_instr(struct kvm_vcpu *vcpu)
{
kvm_s390_rewind_psw(vcpu, kvm_s390_get_ilen(vcpu));
}
/* implemented in priv.c */
int is_valid_psw(psw_t *psw);
@ -248,6 +268,8 @@ int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu);
void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu);
unsigned long kvm_s390_fac_list_mask_size(void);
extern unsigned long kvm_s390_fac_list_mask[];
void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm);
__u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu);
/* implemented in diag.c */
int kvm_s390_handle_diag(struct kvm_vcpu *vcpu);

View File

@ -173,7 +173,7 @@ static int handle_skey(struct kvm_vcpu *vcpu)
if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
kvm_s390_rewind_psw(vcpu, 4);
kvm_s390_retry_instr(vcpu);
VCPU_EVENT(vcpu, 4, "%s", "retrying storage key operation");
return 0;
}
@ -184,7 +184,7 @@ static int handle_ipte_interlock(struct kvm_vcpu *vcpu)
if (psw_bits(vcpu->arch.sie_block->gpsw).p)
return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
wait_event(vcpu->kvm->arch.ipte_wq, !ipte_lock_held(vcpu));
kvm_s390_rewind_psw(vcpu, 4);
kvm_s390_retry_instr(vcpu);
VCPU_EVENT(vcpu, 4, "%s", "retrying ipte interlock operation");
return 0;
}
@ -354,7 +354,7 @@ static int handle_stfl(struct kvm_vcpu *vcpu)
* We need to shift the lower 32 facility bits (bit 0-31) from a u64
* into a u32 memory representation. They will remain bits 0-31.
*/
fac = *vcpu->kvm->arch.model.fac->list >> 32;
fac = *vcpu->kvm->arch.model.fac_list >> 32;
rc = write_guest_lc(vcpu, offsetof(struct lowcore, stfl_fac_list),
&fac, sizeof(fac));
if (rc)
@ -759,8 +759,8 @@ static int handle_essa(struct kvm_vcpu *vcpu)
if (((vcpu->arch.sie_block->ipb & 0xf0000000) >> 28) > 6)
return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
/* Rewind PSW to repeat the ESSA instruction */
kvm_s390_rewind_psw(vcpu, 4);
/* Retry the ESSA instruction */
kvm_s390_retry_instr(vcpu);
vcpu->arch.sie_block->cbrlo &= PAGE_MASK; /* reset nceo */
cbrlo = phys_to_virt(vcpu->arch.sie_block->cbrlo);
down_read(&gmap->mm->mmap_sem);
@ -981,11 +981,12 @@ static int handle_tprot(struct kvm_vcpu *vcpu)
return -EOPNOTSUPP;
if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_DAT)
ipte_lock(vcpu);
ret = guest_translate_address(vcpu, address1, ar, &gpa, 1);
ret = guest_translate_address(vcpu, address1, ar, &gpa, GACC_STORE);
if (ret == PGM_PROTECTION) {
/* Write protected? Try again with read-only... */
cc = 1;
ret = guest_translate_address(vcpu, address1, ar, &gpa, 0);
ret = guest_translate_address(vcpu, address1, ar, &gpa,
GACC_FETCH);
}
if (ret) {
if (ret == PGM_ADDRESSING || ret == PGM_TRANSLATION_SPEC) {

View File

@ -32,6 +32,7 @@
#include <asm/mtrr.h>
#include <asm/msr-index.h>
#include <asm/asm.h>
#include <asm/kvm_page_track.h>
#define KVM_MAX_VCPUS 255
#define KVM_SOFT_MAX_VCPUS 160
@ -214,6 +215,14 @@ struct kvm_mmu_memory_cache {
void *objects[KVM_NR_MEM_OBJS];
};
/*
* the pages used as guest page table on soft mmu are tracked by
* kvm_memory_slot.arch.gfn_track which is 16 bits, so the role bits used
* by indirect shadow page can not be more than 15 bits.
*
* Currently, we used 14 bits that are @level, @cr4_pae, @quadrant, @access,
* @nxe, @cr0_wp, @smep_andnot_wp and @smap_andnot_wp.
*/
union kvm_mmu_page_role {
unsigned word;
struct {
@ -276,7 +285,7 @@ struct kvm_mmu_page {
#endif
/* Number of writes since the last time traversal visited this page. */
int write_flooding_count;
atomic_t write_flooding_count;
};
struct kvm_pio_request {
@ -338,12 +347,8 @@ struct kvm_mmu {
struct rsvd_bits_validate guest_rsvd_check;
/*
* Bitmap: bit set = last pte in walk
* index[0:1]: level (zero-based)
* index[2]: pte.ps
*/
u8 last_pte_bitmap;
/* Can have large pages at levels 2..last_nonleaf_level-1. */
u8 last_nonleaf_level;
bool nx;
@ -498,7 +503,6 @@ struct kvm_vcpu_arch {
struct kvm_mmu_memory_cache mmu_page_header_cache;
struct fpu guest_fpu;
bool eager_fpu;
u64 xcr0;
u64 guest_supported_xcr0;
u32 guest_xstate_size;
@ -644,12 +648,13 @@ struct kvm_vcpu_arch {
};
struct kvm_lpage_info {
int write_count;
int disallow_lpage;
};
struct kvm_arch_memory_slot {
struct kvm_rmap_head *rmap[KVM_NR_PAGE_SIZES];
struct kvm_lpage_info *lpage_info[KVM_NR_PAGE_SIZES - 1];
unsigned short *gfn_track[KVM_PAGE_TRACK_MAX];
};
/*
@ -694,6 +699,8 @@ struct kvm_arch {
*/
struct list_head active_mmu_pages;
struct list_head zapped_obsolete_pages;
struct kvm_page_track_notifier_node mmu_sp_tracker;
struct kvm_page_track_notifier_head track_notifier_head;
struct list_head assigned_dev_head;
struct iommu_domain *iommu_domain;
@ -754,6 +761,8 @@ struct kvm_arch {
bool irqchip_split;
u8 nr_reserved_ioapic_pins;
bool disabled_lapic_found;
};
struct kvm_vm_stat {
@ -988,6 +997,8 @@ void kvm_mmu_module_exit(void);
void kvm_mmu_destroy(struct kvm_vcpu *vcpu);
int kvm_mmu_create(struct kvm_vcpu *vcpu);
void kvm_mmu_setup(struct kvm_vcpu *vcpu);
void kvm_mmu_init_vm(struct kvm *kvm);
void kvm_mmu_uninit_vm(struct kvm *kvm);
void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask,
u64 dirty_mask, u64 nx_mask, u64 x_mask);
@ -1127,8 +1138,6 @@ void kvm_pic_clear_all(struct kvm_pic *pic, int irq_source_id);
void kvm_inject_nmi(struct kvm_vcpu *vcpu);
void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
const u8 *new, int bytes);
int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn);
int kvm_mmu_unprotect_page_virt(struct kvm_vcpu *vcpu, gva_t gva);
void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu);

Some files were not shown because too many files have changed in this diff Show More