mirror of
https://github.com/AuxXxilium/linux_dsm_epyc7002.git
synced 2024-12-22 13:02:19 +07:00
ab9cf4996b
The idea is simple: there's a bit, per APIC, in guest memory, that tells the guest that it does not need EOI. Guest tests it using a single est and clear operation - this is necessary so that host can detect interrupt nesting - and if set, it can skip the EOI MSR. I run a simple microbenchmark to show exit reduction (note: for testing, need to apply follow-up patch 'kvm: host side for eoi optimization' + a qemu patch I posted separately, on host): Before: Performance counter stats for 'sleep 1s': 47,357 kvm:kvm_entry [99.98%] 0 kvm:kvm_hypercall [99.98%] 0 kvm:kvm_hv_hypercall [99.98%] 5,001 kvm:kvm_pio [99.98%] 0 kvm:kvm_cpuid [99.98%] 22,124 kvm:kvm_apic [99.98%] 49,849 kvm:kvm_exit [99.98%] 21,115 kvm:kvm_inj_virq [99.98%] 0 kvm:kvm_inj_exception [99.98%] 0 kvm:kvm_page_fault [99.98%] 22,937 kvm:kvm_msr [99.98%] 0 kvm:kvm_cr [99.98%] 0 kvm:kvm_pic_set_irq [99.98%] 0 kvm:kvm_apic_ipi [99.98%] 22,207 kvm:kvm_apic_accept_irq [99.98%] 22,421 kvm:kvm_eoi [99.98%] 0 kvm:kvm_pv_eoi [99.99%] 0 kvm:kvm_nested_vmrun [99.99%] 0 kvm:kvm_nested_intercepts [99.99%] 0 kvm:kvm_nested_vmexit [99.99%] 0 kvm:kvm_nested_vmexit_inject [99.99%] 0 kvm:kvm_nested_intr_vmexit [99.99%] 0 kvm:kvm_invlpga [99.99%] 0 kvm:kvm_skinit [99.99%] 57 kvm:kvm_emulate_insn [99.99%] 0 kvm:vcpu_match_mmio [99.99%] 0 kvm:kvm_userspace_exit [99.99%] 2 kvm:kvm_set_irq [99.99%] 2 kvm:kvm_ioapic_set_irq [99.99%] 23,609 kvm:kvm_msi_set_irq [99.99%] 1 kvm:kvm_ack_irq [99.99%] 131 kvm:kvm_mmio [99.99%] 226 kvm:kvm_fpu [100.00%] 0 kvm:kvm_age_page [100.00%] 0 kvm:kvm_try_async_get_page [100.00%] 0 kvm:kvm_async_pf_doublefault [100.00%] 0 kvm:kvm_async_pf_not_present [100.00%] 0 kvm:kvm_async_pf_ready [100.00%] 0 kvm:kvm_async_pf_completed 1.002100578 seconds time elapsed After: Performance counter stats for 'sleep 1s': 28,354 kvm:kvm_entry [99.98%] 0 kvm:kvm_hypercall [99.98%] 0 kvm:kvm_hv_hypercall [99.98%] 1,347 kvm:kvm_pio [99.98%] 0 kvm:kvm_cpuid [99.98%] 1,931 kvm:kvm_apic [99.98%] 29,595 kvm:kvm_exit [99.98%] 24,884 kvm:kvm_inj_virq [99.98%] 0 kvm:kvm_inj_exception [99.98%] 0 kvm:kvm_page_fault [99.98%] 1,986 kvm:kvm_msr [99.98%] 0 kvm:kvm_cr [99.98%] 0 kvm:kvm_pic_set_irq [99.98%] 0 kvm:kvm_apic_ipi [99.99%] 25,953 kvm:kvm_apic_accept_irq [99.99%] 26,132 kvm:kvm_eoi [99.99%] 26,593 kvm:kvm_pv_eoi [99.99%] 0 kvm:kvm_nested_vmrun [99.99%] 0 kvm:kvm_nested_intercepts [99.99%] 0 kvm:kvm_nested_vmexit [99.99%] 0 kvm:kvm_nested_vmexit_inject [99.99%] 0 kvm:kvm_nested_intr_vmexit [99.99%] 0 kvm:kvm_invlpga [99.99%] 0 kvm:kvm_skinit [99.99%] 284 kvm:kvm_emulate_insn [99.99%] 68 kvm:vcpu_match_mmio [99.99%] 68 kvm:kvm_userspace_exit [99.99%] 2 kvm:kvm_set_irq [99.99%] 2 kvm:kvm_ioapic_set_irq [99.99%] 28,288 kvm:kvm_msi_set_irq [99.99%] 1 kvm:kvm_ack_irq [99.99%] 131 kvm:kvm_mmio [100.00%] 588 kvm:kvm_fpu [100.00%] 0 kvm:kvm_age_page [100.00%] 0 kvm:kvm_try_async_get_page [100.00%] 0 kvm:kvm_async_pf_doublefault [100.00%] 0 kvm:kvm_async_pf_not_present [100.00%] 0 kvm:kvm_async_pf_ready [100.00%] 0 kvm:kvm_async_pf_completed 1.002039622 seconds time elapsed We see that # of exits is almost halved. Signed-off-by: Michael S. Tsirkin <mst@redhat.com> Signed-off-by: Avi Kivity <avi@redhat.com>
234 lines
5.7 KiB
C
234 lines
5.7 KiB
C
#ifndef _ASM_X86_KVM_PARA_H
|
|
#define _ASM_X86_KVM_PARA_H
|
|
|
|
#include <linux/types.h>
|
|
#include <asm/hyperv.h>
|
|
|
|
/* This CPUID returns the signature 'KVMKVMKVM' in ebx, ecx, and edx. It
|
|
* should be used to determine that a VM is running under KVM.
|
|
*/
|
|
#define KVM_CPUID_SIGNATURE 0x40000000
|
|
|
|
/* This CPUID returns a feature bitmap in eax. Before enabling a particular
|
|
* paravirtualization, the appropriate feature bit should be checked.
|
|
*/
|
|
#define KVM_CPUID_FEATURES 0x40000001
|
|
#define KVM_FEATURE_CLOCKSOURCE 0
|
|
#define KVM_FEATURE_NOP_IO_DELAY 1
|
|
#define KVM_FEATURE_MMU_OP 2
|
|
/* This indicates that the new set of kvmclock msrs
|
|
* are available. The use of 0x11 and 0x12 is deprecated
|
|
*/
|
|
#define KVM_FEATURE_CLOCKSOURCE2 3
|
|
#define KVM_FEATURE_ASYNC_PF 4
|
|
#define KVM_FEATURE_STEAL_TIME 5
|
|
#define KVM_FEATURE_PV_EOI 6
|
|
|
|
/* The last 8 bits are used to indicate how to interpret the flags field
|
|
* in pvclock structure. If no bits are set, all flags are ignored.
|
|
*/
|
|
#define KVM_FEATURE_CLOCKSOURCE_STABLE_BIT 24
|
|
|
|
#define MSR_KVM_WALL_CLOCK 0x11
|
|
#define MSR_KVM_SYSTEM_TIME 0x12
|
|
|
|
#define KVM_MSR_ENABLED 1
|
|
/* Custom MSRs falls in the range 0x4b564d00-0x4b564dff */
|
|
#define MSR_KVM_WALL_CLOCK_NEW 0x4b564d00
|
|
#define MSR_KVM_SYSTEM_TIME_NEW 0x4b564d01
|
|
#define MSR_KVM_ASYNC_PF_EN 0x4b564d02
|
|
#define MSR_KVM_STEAL_TIME 0x4b564d03
|
|
#define MSR_KVM_PV_EOI_EN 0x4b564d04
|
|
|
|
struct kvm_steal_time {
|
|
__u64 steal;
|
|
__u32 version;
|
|
__u32 flags;
|
|
__u32 pad[12];
|
|
};
|
|
|
|
#define KVM_STEAL_ALIGNMENT_BITS 5
|
|
#define KVM_STEAL_VALID_BITS ((-1ULL << (KVM_STEAL_ALIGNMENT_BITS + 1)))
|
|
#define KVM_STEAL_RESERVED_MASK (((1 << KVM_STEAL_ALIGNMENT_BITS) - 1 ) << 1)
|
|
|
|
#define KVM_MAX_MMU_OP_BATCH 32
|
|
|
|
#define KVM_ASYNC_PF_ENABLED (1 << 0)
|
|
#define KVM_ASYNC_PF_SEND_ALWAYS (1 << 1)
|
|
|
|
/* Operations for KVM_HC_MMU_OP */
|
|
#define KVM_MMU_OP_WRITE_PTE 1
|
|
#define KVM_MMU_OP_FLUSH_TLB 2
|
|
#define KVM_MMU_OP_RELEASE_PT 3
|
|
|
|
/* Payload for KVM_HC_MMU_OP */
|
|
struct kvm_mmu_op_header {
|
|
__u32 op;
|
|
__u32 pad;
|
|
};
|
|
|
|
struct kvm_mmu_op_write_pte {
|
|
struct kvm_mmu_op_header header;
|
|
__u64 pte_phys;
|
|
__u64 pte_val;
|
|
};
|
|
|
|
struct kvm_mmu_op_flush_tlb {
|
|
struct kvm_mmu_op_header header;
|
|
};
|
|
|
|
struct kvm_mmu_op_release_pt {
|
|
struct kvm_mmu_op_header header;
|
|
__u64 pt_phys;
|
|
};
|
|
|
|
#define KVM_PV_REASON_PAGE_NOT_PRESENT 1
|
|
#define KVM_PV_REASON_PAGE_READY 2
|
|
|
|
struct kvm_vcpu_pv_apf_data {
|
|
__u32 reason;
|
|
__u8 pad[60];
|
|
__u32 enabled;
|
|
};
|
|
|
|
#define KVM_PV_EOI_BIT 0
|
|
#define KVM_PV_EOI_MASK (0x1 << KVM_PV_EOI_BIT)
|
|
#define KVM_PV_EOI_ENABLED KVM_PV_EOI_MASK
|
|
#define KVM_PV_EOI_DISABLED 0x0
|
|
|
|
#ifdef __KERNEL__
|
|
#include <asm/processor.h>
|
|
|
|
extern void kvmclock_init(void);
|
|
extern int kvm_register_clock(char *txt);
|
|
|
|
#ifdef CONFIG_KVM_CLOCK
|
|
bool kvm_check_and_clear_guest_paused(void);
|
|
#else
|
|
static inline bool kvm_check_and_clear_guest_paused(void)
|
|
{
|
|
return false;
|
|
}
|
|
#endif /* CONFIG_KVMCLOCK */
|
|
|
|
/* This instruction is vmcall. On non-VT architectures, it will generate a
|
|
* trap that we will then rewrite to the appropriate instruction.
|
|
*/
|
|
#define KVM_HYPERCALL ".byte 0x0f,0x01,0xc1"
|
|
|
|
/* For KVM hypercalls, a three-byte sequence of either the vmrun or the vmmrun
|
|
* instruction. The hypervisor may replace it with something else but only the
|
|
* instructions are guaranteed to be supported.
|
|
*
|
|
* Up to four arguments may be passed in rbx, rcx, rdx, and rsi respectively.
|
|
* The hypercall number should be placed in rax and the return value will be
|
|
* placed in rax. No other registers will be clobbered unless explicited
|
|
* noted by the particular hypercall.
|
|
*/
|
|
|
|
static inline long kvm_hypercall0(unsigned int nr)
|
|
{
|
|
long ret;
|
|
asm volatile(KVM_HYPERCALL
|
|
: "=a"(ret)
|
|
: "a"(nr)
|
|
: "memory");
|
|
return ret;
|
|
}
|
|
|
|
static inline long kvm_hypercall1(unsigned int nr, unsigned long p1)
|
|
{
|
|
long ret;
|
|
asm volatile(KVM_HYPERCALL
|
|
: "=a"(ret)
|
|
: "a"(nr), "b"(p1)
|
|
: "memory");
|
|
return ret;
|
|
}
|
|
|
|
static inline long kvm_hypercall2(unsigned int nr, unsigned long p1,
|
|
unsigned long p2)
|
|
{
|
|
long ret;
|
|
asm volatile(KVM_HYPERCALL
|
|
: "=a"(ret)
|
|
: "a"(nr), "b"(p1), "c"(p2)
|
|
: "memory");
|
|
return ret;
|
|
}
|
|
|
|
static inline long kvm_hypercall3(unsigned int nr, unsigned long p1,
|
|
unsigned long p2, unsigned long p3)
|
|
{
|
|
long ret;
|
|
asm volatile(KVM_HYPERCALL
|
|
: "=a"(ret)
|
|
: "a"(nr), "b"(p1), "c"(p2), "d"(p3)
|
|
: "memory");
|
|
return ret;
|
|
}
|
|
|
|
static inline long kvm_hypercall4(unsigned int nr, unsigned long p1,
|
|
unsigned long p2, unsigned long p3,
|
|
unsigned long p4)
|
|
{
|
|
long ret;
|
|
asm volatile(KVM_HYPERCALL
|
|
: "=a"(ret)
|
|
: "a"(nr), "b"(p1), "c"(p2), "d"(p3), "S"(p4)
|
|
: "memory");
|
|
return ret;
|
|
}
|
|
|
|
static inline int kvm_para_available(void)
|
|
{
|
|
unsigned int eax, ebx, ecx, edx;
|
|
char signature[13];
|
|
|
|
if (boot_cpu_data.cpuid_level < 0)
|
|
return 0; /* So we don't blow up on old processors */
|
|
|
|
if (cpu_has_hypervisor) {
|
|
cpuid(KVM_CPUID_SIGNATURE, &eax, &ebx, &ecx, &edx);
|
|
memcpy(signature + 0, &ebx, 4);
|
|
memcpy(signature + 4, &ecx, 4);
|
|
memcpy(signature + 8, &edx, 4);
|
|
signature[12] = 0;
|
|
|
|
if (strcmp(signature, "KVMKVMKVM") == 0)
|
|
return 1;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
static inline unsigned int kvm_arch_para_features(void)
|
|
{
|
|
return cpuid_eax(KVM_CPUID_FEATURES);
|
|
}
|
|
|
|
#ifdef CONFIG_KVM_GUEST
|
|
void __init kvm_guest_init(void);
|
|
void kvm_async_pf_task_wait(u32 token);
|
|
void kvm_async_pf_task_wake(u32 token);
|
|
u32 kvm_read_and_reset_pf_reason(void);
|
|
extern void kvm_disable_steal_time(void);
|
|
#else
|
|
#define kvm_guest_init() do { } while (0)
|
|
#define kvm_async_pf_task_wait(T) do {} while(0)
|
|
#define kvm_async_pf_task_wake(T) do {} while(0)
|
|
static inline u32 kvm_read_and_reset_pf_reason(void)
|
|
{
|
|
return 0;
|
|
}
|
|
|
|
static inline void kvm_disable_steal_time(void)
|
|
{
|
|
return;
|
|
}
|
|
#endif
|
|
|
|
#endif /* __KERNEL__ */
|
|
|
|
#endif /* _ASM_X86_KVM_PARA_H */
|