linux_dsm_epyc7002/arch/powerpc/include/asm/kvm_host.h
Takuya Yoshikawa b3ae209697 KVM: Introduce kvm_unmap_hva_range() for kvm_mmu_notifier_invalidate_range_start()
When we tested KVM under memory pressure, with THP enabled on the host,
we noticed that MMU notifier took a long time to invalidate huge pages.

Since the invalidation was done with mmu_lock held, it not only wasted
the CPU but also made the host harder to respond.

This patch mitigates this by using kvm_handle_hva_range().

Signed-off-by: Takuya Yoshikawa <yoshikawa.takuya@oss.ntt.co.jp>
Cc: Alexander Graf <agraf@suse.de>
Cc: Paul Mackerras <paulus@samba.org>
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
2012-07-18 16:55:04 -03:00

554 lines
12 KiB
C

/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License, version 2, as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*
* Copyright IBM Corp. 2007
*
* Authors: Hollis Blanchard <hollisb@us.ibm.com>
*/
#ifndef __POWERPC_KVM_HOST_H__
#define __POWERPC_KVM_HOST_H__
#include <linux/mutex.h>
#include <linux/hrtimer.h>
#include <linux/interrupt.h>
#include <linux/types.h>
#include <linux/kvm_types.h>
#include <linux/threads.h>
#include <linux/spinlock.h>
#include <linux/kvm_para.h>
#include <linux/list.h>
#include <linux/atomic.h>
#include <asm/kvm_asm.h>
#include <asm/processor.h>
#include <asm/page.h>
#define KVM_MAX_VCPUS NR_CPUS
#define KVM_MAX_VCORES NR_CPUS
#define KVM_MEMORY_SLOTS 32
/* memory slots that does not exposed to userspace */
#define KVM_PRIVATE_MEM_SLOTS 4
#define KVM_MEM_SLOTS_NUM (KVM_MEMORY_SLOTS + KVM_PRIVATE_MEM_SLOTS)
#ifdef CONFIG_KVM_MMIO
#define KVM_COALESCED_MMIO_PAGE_OFFSET 1
#endif
#ifdef CONFIG_KVM_BOOK3S_64_HV
#include <linux/mmu_notifier.h>
#define KVM_ARCH_WANT_MMU_NOTIFIER
struct kvm;
extern int kvm_unmap_hva(struct kvm *kvm, unsigned long hva);
extern int kvm_unmap_hva_range(struct kvm *kvm,
unsigned long start, unsigned long end);
extern int kvm_age_hva(struct kvm *kvm, unsigned long hva);
extern int kvm_test_age_hva(struct kvm *kvm, unsigned long hva);
extern void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte);
#endif
/* We don't currently support large pages. */
#define KVM_HPAGE_GFN_SHIFT(x) 0
#define KVM_NR_PAGE_SIZES 1
#define KVM_PAGES_PER_HPAGE(x) (1UL<<31)
#define HPTEG_CACHE_NUM (1 << 15)
#define HPTEG_HASH_BITS_PTE 13
#define HPTEG_HASH_BITS_PTE_LONG 12
#define HPTEG_HASH_BITS_VPTE 13
#define HPTEG_HASH_BITS_VPTE_LONG 5
#define HPTEG_HASH_NUM_PTE (1 << HPTEG_HASH_BITS_PTE)
#define HPTEG_HASH_NUM_PTE_LONG (1 << HPTEG_HASH_BITS_PTE_LONG)
#define HPTEG_HASH_NUM_VPTE (1 << HPTEG_HASH_BITS_VPTE)
#define HPTEG_HASH_NUM_VPTE_LONG (1 << HPTEG_HASH_BITS_VPTE_LONG)
/* Physical Address Mask - allowed range of real mode RAM access */
#define KVM_PAM 0x0fffffffffffffffULL
struct kvm;
struct kvm_run;
struct kvm_vcpu;
struct lppaca;
struct slb_shadow;
struct dtl_entry;
struct kvm_vm_stat {
u32 remote_tlb_flush;
};
struct kvm_vcpu_stat {
u32 sum_exits;
u32 mmio_exits;
u32 dcr_exits;
u32 signal_exits;
u32 light_exits;
/* Account for special types of light exits: */
u32 itlb_real_miss_exits;
u32 itlb_virt_miss_exits;
u32 dtlb_real_miss_exits;
u32 dtlb_virt_miss_exits;
u32 syscall_exits;
u32 isi_exits;
u32 dsi_exits;
u32 emulated_inst_exits;
u32 dec_exits;
u32 ext_intr_exits;
u32 halt_wakeup;
u32 dbell_exits;
u32 gdbell_exits;
#ifdef CONFIG_PPC_BOOK3S
u32 pf_storage;
u32 pf_instruc;
u32 sp_storage;
u32 sp_instruc;
u32 queue_intr;
u32 ld;
u32 ld_slow;
u32 st;
u32 st_slow;
#endif
};
enum kvm_exit_types {
MMIO_EXITS,
DCR_EXITS,
SIGNAL_EXITS,
ITLB_REAL_MISS_EXITS,
ITLB_VIRT_MISS_EXITS,
DTLB_REAL_MISS_EXITS,
DTLB_VIRT_MISS_EXITS,
SYSCALL_EXITS,
ISI_EXITS,
DSI_EXITS,
EMULATED_INST_EXITS,
EMULATED_MTMSRWE_EXITS,
EMULATED_WRTEE_EXITS,
EMULATED_MTSPR_EXITS,
EMULATED_MFSPR_EXITS,
EMULATED_MTMSR_EXITS,
EMULATED_MFMSR_EXITS,
EMULATED_TLBSX_EXITS,
EMULATED_TLBWE_EXITS,
EMULATED_RFI_EXITS,
EMULATED_RFCI_EXITS,
DEC_EXITS,
EXT_INTR_EXITS,
HALT_WAKEUP,
USR_PR_INST,
FP_UNAVAIL,
DEBUG_EXITS,
TIMEINGUEST,
DBELL_EXITS,
GDBELL_EXITS,
__NUMBER_OF_KVM_EXIT_TYPES
};
/* allow access to big endian 32bit upper/lower parts and 64bit var */
struct kvmppc_exit_timing {
union {
u64 tv64;
struct {
u32 tbu, tbl;
} tv32;
};
};
struct kvmppc_pginfo {
unsigned long pfn;
atomic_t refcnt;
};
struct kvmppc_spapr_tce_table {
struct list_head list;
struct kvm *kvm;
u64 liobn;
u32 window_size;
struct page *pages[0];
};
struct kvmppc_linear_info {
void *base_virt;
unsigned long base_pfn;
unsigned long npages;
struct list_head list;
atomic_t use_count;
int type;
};
/*
* The reverse mapping array has one entry for each HPTE,
* which stores the guest's view of the second word of the HPTE
* (including the guest physical address of the mapping),
* plus forward and backward pointers in a doubly-linked ring
* of HPTEs that map the same host page. The pointers in this
* ring are 32-bit HPTE indexes, to save space.
*/
struct revmap_entry {
unsigned long guest_rpte;
unsigned int forw, back;
};
/*
* We use the top bit of each memslot->rmap entry as a lock bit,
* and bit 32 as a present flag. The bottom 32 bits are the
* index in the guest HPT of a HPTE that points to the page.
*/
#define KVMPPC_RMAP_LOCK_BIT 63
#define KVMPPC_RMAP_RC_SHIFT 32
#define KVMPPC_RMAP_REFERENCED (HPTE_R_R << KVMPPC_RMAP_RC_SHIFT)
#define KVMPPC_RMAP_CHANGED (HPTE_R_C << KVMPPC_RMAP_RC_SHIFT)
#define KVMPPC_RMAP_PRESENT 0x100000000ul
#define KVMPPC_RMAP_INDEX 0xfffffffful
/* Low-order bits in kvm->arch.slot_phys[][] */
#define KVMPPC_PAGE_ORDER_MASK 0x1f
#define KVMPPC_PAGE_NO_CACHE HPTE_R_I /* 0x20 */
#define KVMPPC_PAGE_WRITETHRU HPTE_R_W /* 0x40 */
#define KVMPPC_GOT_PAGE 0x80
struct kvm_arch_memory_slot {
};
struct kvm_arch {
unsigned int lpid;
#ifdef CONFIG_KVM_BOOK3S_64_HV
unsigned long hpt_virt;
struct revmap_entry *revmap;
unsigned int host_lpid;
unsigned long host_lpcr;
unsigned long sdr1;
unsigned long host_sdr1;
int tlbie_lock;
unsigned long lpcr;
unsigned long rmor;
struct kvmppc_linear_info *rma;
unsigned long vrma_slb_v;
int rma_setup_done;
int using_mmu_notifiers;
u32 hpt_order;
atomic_t vcpus_running;
unsigned long hpt_npte;
unsigned long hpt_mask;
spinlock_t slot_phys_lock;
unsigned long *slot_phys[KVM_MEM_SLOTS_NUM];
int slot_npages[KVM_MEM_SLOTS_NUM];
unsigned short last_vcpu[NR_CPUS];
struct kvmppc_vcore *vcores[KVM_MAX_VCORES];
struct kvmppc_linear_info *hpt_li;
#endif /* CONFIG_KVM_BOOK3S_64_HV */
#ifdef CONFIG_PPC_BOOK3S_64
struct list_head spapr_tce_tables;
#endif
};
/*
* Struct for a virtual core.
* Note: entry_exit_count combines an entry count in the bottom 8 bits
* and an exit count in the next 8 bits. This is so that we can
* atomically increment the entry count iff the exit count is 0
* without taking the lock.
*/
struct kvmppc_vcore {
int n_runnable;
int n_busy;
int num_threads;
int entry_exit_count;
int n_woken;
int nap_count;
int napping_threads;
u16 pcpu;
u8 vcore_state;
u8 in_guest;
struct list_head runnable_threads;
spinlock_t lock;
wait_queue_head_t wq;
u64 stolen_tb;
u64 preempt_tb;
struct kvm_vcpu *runner;
};
#define VCORE_ENTRY_COUNT(vc) ((vc)->entry_exit_count & 0xff)
#define VCORE_EXIT_COUNT(vc) ((vc)->entry_exit_count >> 8)
/* Values for vcore_state */
#define VCORE_INACTIVE 0
#define VCORE_RUNNING 1
#define VCORE_EXITING 2
#define VCORE_SLEEPING 3
/*
* Struct used to manage memory for a virtual processor area
* registered by a PAPR guest. There are three types of area
* that a guest can register.
*/
struct kvmppc_vpa {
void *pinned_addr; /* Address in kernel linear mapping */
void *pinned_end; /* End of region */
unsigned long next_gpa; /* Guest phys addr for update */
unsigned long len; /* Number of bytes required */
u8 update_pending; /* 1 => update pinned_addr from next_gpa */
};
struct kvmppc_pte {
ulong eaddr;
u64 vpage;
ulong raddr;
bool may_read : 1;
bool may_write : 1;
bool may_execute : 1;
};
struct kvmppc_mmu {
/* book3s_64 only */
void (*slbmte)(struct kvm_vcpu *vcpu, u64 rb, u64 rs);
u64 (*slbmfee)(struct kvm_vcpu *vcpu, u64 slb_nr);
u64 (*slbmfev)(struct kvm_vcpu *vcpu, u64 slb_nr);
void (*slbie)(struct kvm_vcpu *vcpu, u64 slb_nr);
void (*slbia)(struct kvm_vcpu *vcpu);
/* book3s */
void (*mtsrin)(struct kvm_vcpu *vcpu, u32 srnum, ulong value);
u32 (*mfsrin)(struct kvm_vcpu *vcpu, u32 srnum);
int (*xlate)(struct kvm_vcpu *vcpu, gva_t eaddr, struct kvmppc_pte *pte, bool data);
void (*reset_msr)(struct kvm_vcpu *vcpu);
void (*tlbie)(struct kvm_vcpu *vcpu, ulong addr, bool large);
int (*esid_to_vsid)(struct kvm_vcpu *vcpu, ulong esid, u64 *vsid);
u64 (*ea_to_vp)(struct kvm_vcpu *vcpu, gva_t eaddr, bool data);
bool (*is_dcbz32)(struct kvm_vcpu *vcpu);
};
struct kvmppc_slb {
u64 esid;
u64 vsid;
u64 orige;
u64 origv;
bool valid : 1;
bool Ks : 1;
bool Kp : 1;
bool nx : 1;
bool large : 1; /* PTEs are 16MB */
bool tb : 1; /* 1TB segment */
bool class : 1;
};
struct kvm_vcpu_arch {
ulong host_stack;
u32 host_pid;
#ifdef CONFIG_PPC_BOOK3S
struct kvmppc_slb slb[64];
int slb_max; /* 1 + index of last valid entry in slb[] */
int slb_nr; /* total number of entries in SLB */
struct kvmppc_mmu mmu;
#endif
ulong gpr[32];
u64 fpr[32];
u64 fpscr;
#ifdef CONFIG_SPE
ulong evr[32];
ulong spefscr;
ulong host_spefscr;
u64 acc;
#endif
#ifdef CONFIG_ALTIVEC
vector128 vr[32];
vector128 vscr;
#endif
#ifdef CONFIG_VSX
u64 vsr[64];
#endif
#ifdef CONFIG_KVM_BOOKE_HV
u32 host_mas4;
u32 host_mas6;
u32 shadow_epcr;
u32 epcr;
u32 shadow_msrp;
u32 eplc;
u32 epsc;
u32 oldpir;
#endif
#ifdef CONFIG_PPC_BOOK3S
/* For Gekko paired singles */
u32 qpr[32];
#endif
ulong pc;
ulong ctr;
ulong lr;
ulong xer;
u32 cr;
#ifdef CONFIG_PPC_BOOK3S
ulong hflags;
ulong guest_owned_ext;
ulong purr;
ulong spurr;
ulong dscr;
ulong amr;
ulong uamor;
u32 ctrl;
ulong dabr;
#endif
u32 vrsave; /* also USPRG0 */
u32 mmucr;
/* shadow_msr is unused for BookE HV */
ulong shadow_msr;
ulong csrr0;
ulong csrr1;
ulong dsrr0;
ulong dsrr1;
ulong mcsrr0;
ulong mcsrr1;
ulong mcsr;
u32 dec;
#ifdef CONFIG_BOOKE
u32 decar;
#endif
u32 tbl;
u32 tbu;
u32 tcr;
ulong tsr; /* we need to perform set/clr_bits() which requires ulong */
u32 ivor[64];
ulong ivpr;
u32 pvr;
u32 shadow_pid;
u32 shadow_pid1;
u32 pid;
u32 swap_pid;
u32 ccr0;
u32 ccr1;
u32 dbcr0;
u32 dbcr1;
u32 dbsr;
u64 mmcr[3];
u32 pmc[8];
#ifdef CONFIG_KVM_EXIT_TIMING
struct mutex exit_timing_lock;
struct kvmppc_exit_timing timing_exit;
struct kvmppc_exit_timing timing_last_enter;
u32 last_exit_type;
u32 timing_count_type[__NUMBER_OF_KVM_EXIT_TYPES];
u64 timing_sum_duration[__NUMBER_OF_KVM_EXIT_TYPES];
u64 timing_sum_quad_duration[__NUMBER_OF_KVM_EXIT_TYPES];
u64 timing_min_duration[__NUMBER_OF_KVM_EXIT_TYPES];
u64 timing_max_duration[__NUMBER_OF_KVM_EXIT_TYPES];
u64 timing_last_exit;
struct dentry *debugfs_exit_timing;
#endif
#ifdef CONFIG_PPC_BOOK3S
ulong fault_dar;
u32 fault_dsisr;
#endif
#ifdef CONFIG_BOOKE
ulong fault_dear;
ulong fault_esr;
ulong queued_dear;
ulong queued_esr;
u32 tlbcfg[4];
u32 mmucfg;
u32 epr;
#endif
gpa_t paddr_accessed;
gva_t vaddr_accessed;
u8 io_gpr; /* GPR used as IO source/target */
u8 mmio_is_bigendian;
u8 mmio_sign_extend;
u8 dcr_needed;
u8 dcr_is_write;
u8 osi_needed;
u8 osi_enabled;
u8 papr_enabled;
u8 sane;
u8 cpu_type;
u8 hcall_needed;
u32 cpr0_cfgaddr; /* holds the last set cpr0_cfgaddr */
struct hrtimer dec_timer;
struct tasklet_struct tasklet;
u64 dec_jiffies;
u64 dec_expires;
unsigned long pending_exceptions;
u16 last_cpu;
u8 ceded;
u8 prodded;
u32 last_inst;
wait_queue_head_t *wqp;
struct kvmppc_vcore *vcore;
int ret;
int trap;
int state;
int ptid;
bool timer_running;
wait_queue_head_t cpu_run;
struct kvm_vcpu_arch_shared *shared;
unsigned long magic_page_pa; /* phys addr to map the magic page to */
unsigned long magic_page_ea; /* effect. addr to map the magic page to */
#ifdef CONFIG_KVM_BOOK3S_64_HV
struct kvm_vcpu_arch_shared shregs;
unsigned long pgfault_addr;
long pgfault_index;
unsigned long pgfault_hpte[2];
struct list_head run_list;
struct task_struct *run_task;
struct kvm_run *kvm_run;
pgd_t *pgdir;
spinlock_t vpa_update_lock;
struct kvmppc_vpa vpa;
struct kvmppc_vpa dtl;
struct dtl_entry *dtl_ptr;
unsigned long dtl_index;
u64 stolen_logged;
struct kvmppc_vpa slb_shadow;
#endif
};
/* Values for vcpu->arch.state */
#define KVMPPC_VCPU_STOPPED 0
#define KVMPPC_VCPU_BUSY_IN_HOST 1
#define KVMPPC_VCPU_RUNNABLE 2
/* Values for vcpu->arch.io_gpr */
#define KVM_MMIO_REG_MASK 0x001f
#define KVM_MMIO_REG_EXT_MASK 0xffe0
#define KVM_MMIO_REG_GPR 0x0000
#define KVM_MMIO_REG_FPR 0x0020
#define KVM_MMIO_REG_QPR 0x0040
#define KVM_MMIO_REG_FQPR 0x0060
#define __KVM_HAVE_ARCH_WQP
#endif /* __POWERPC_KVM_HOST_H__ */