mirror of
https://github.com/AuxXxilium/linux_dsm_epyc7002.git
synced 2025-01-15 14:46:55 +07:00
53e86ada8e
Instead of partially depending on vfio pin/unpin pages interface if mdev is available, which would result in failure if vfio is not on. But replace with a wrapper which need to be fixed till mdev support got fully merged. Cc: Jike Song <jike.song@intel.com> Cc: Xiaoguang Chen <xiaoguang.chen@intel.com> Reviewed-by: Xiaoguang Chen <Xiaoguang.chen@intel.com> Signed-off-by: Zhenyu Wang <zhenyuw@linux.intel.com>
598 lines
13 KiB
C
598 lines
13 KiB
C
/*
|
|
* KVMGT - the implementation of Intel mediated pass-through framework for KVM
|
|
*
|
|
* Copyright(c) 2014-2016 Intel Corporation. All rights reserved.
|
|
*
|
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
|
* copy of this software and associated documentation files (the "Software"),
|
|
* to deal in the Software without restriction, including without limitation
|
|
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
* and/or sell copies of the Software, and to permit persons to whom the
|
|
* Software is furnished to do so, subject to the following conditions:
|
|
*
|
|
* The above copyright notice and this permission notice (including the next
|
|
* paragraph) shall be included in all copies or substantial portions of the
|
|
* Software.
|
|
*
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
|
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
* SOFTWARE.
|
|
*
|
|
* Authors:
|
|
* Kevin Tian <kevin.tian@intel.com>
|
|
* Jike Song <jike.song@intel.com>
|
|
* Xiaoguang Chen <xiaoguang.chen@intel.com>
|
|
*/
|
|
|
|
#include <linux/init.h>
|
|
#include <linux/device.h>
|
|
#include <linux/mm.h>
|
|
#include <linux/types.h>
|
|
#include <linux/list.h>
|
|
#include <linux/rbtree.h>
|
|
#include <linux/spinlock.h>
|
|
#include <linux/eventfd.h>
|
|
#include <linux/uuid.h>
|
|
#include <linux/kvm_host.h>
|
|
#include <linux/vfio.h>
|
|
|
|
#include "i915_drv.h"
|
|
#include "gvt.h"
|
|
|
|
static inline long kvmgt_pin_pages(struct device *dev, unsigned long *user_pfn,
|
|
long npage, int prot, unsigned long *phys_pfn)
|
|
{
|
|
return 0;
|
|
}
|
|
static inline long kvmgt_unpin_pages(struct device *dev, unsigned long *pfn,
|
|
long npage)
|
|
{
|
|
return 0;
|
|
}
|
|
|
|
static const struct intel_gvt_ops *intel_gvt_ops;
|
|
|
|
|
|
/* helper macros copied from vfio-pci */
|
|
#define VFIO_PCI_OFFSET_SHIFT 40
|
|
#define VFIO_PCI_OFFSET_TO_INDEX(off) (off >> VFIO_PCI_OFFSET_SHIFT)
|
|
#define VFIO_PCI_INDEX_TO_OFFSET(index) ((u64)(index) << VFIO_PCI_OFFSET_SHIFT)
|
|
#define VFIO_PCI_OFFSET_MASK (((u64)(1) << VFIO_PCI_OFFSET_SHIFT) - 1)
|
|
|
|
struct vfio_region {
|
|
u32 type;
|
|
u32 subtype;
|
|
size_t size;
|
|
u32 flags;
|
|
};
|
|
|
|
struct kvmgt_pgfn {
|
|
gfn_t gfn;
|
|
struct hlist_node hnode;
|
|
};
|
|
|
|
struct kvmgt_guest_info {
|
|
struct kvm *kvm;
|
|
struct intel_vgpu *vgpu;
|
|
struct kvm_page_track_notifier_node track_node;
|
|
#define NR_BKT (1 << 18)
|
|
struct hlist_head ptable[NR_BKT];
|
|
#undef NR_BKT
|
|
};
|
|
|
|
struct gvt_dma {
|
|
struct rb_node node;
|
|
gfn_t gfn;
|
|
kvm_pfn_t pfn;
|
|
};
|
|
|
|
static struct gvt_dma *__gvt_cache_find(struct intel_vgpu *vgpu, gfn_t gfn)
|
|
{
|
|
struct rb_node *node = vgpu->vdev.cache.rb_node;
|
|
struct gvt_dma *ret = NULL;
|
|
|
|
while (node) {
|
|
struct gvt_dma *itr = rb_entry(node, struct gvt_dma, node);
|
|
|
|
if (gfn < itr->gfn)
|
|
node = node->rb_left;
|
|
else if (gfn > itr->gfn)
|
|
node = node->rb_right;
|
|
else {
|
|
ret = itr;
|
|
goto out;
|
|
}
|
|
}
|
|
|
|
out:
|
|
return ret;
|
|
}
|
|
|
|
static kvm_pfn_t gvt_cache_find(struct intel_vgpu *vgpu, gfn_t gfn)
|
|
{
|
|
struct gvt_dma *entry;
|
|
|
|
mutex_lock(&vgpu->vdev.cache_lock);
|
|
entry = __gvt_cache_find(vgpu, gfn);
|
|
mutex_unlock(&vgpu->vdev.cache_lock);
|
|
|
|
return entry == NULL ? 0 : entry->pfn;
|
|
}
|
|
|
|
static void gvt_cache_add(struct intel_vgpu *vgpu, gfn_t gfn, kvm_pfn_t pfn)
|
|
{
|
|
struct gvt_dma *new, *itr;
|
|
struct rb_node **link = &vgpu->vdev.cache.rb_node, *parent = NULL;
|
|
|
|
new = kzalloc(sizeof(struct gvt_dma), GFP_KERNEL);
|
|
if (!new)
|
|
return;
|
|
|
|
new->gfn = gfn;
|
|
new->pfn = pfn;
|
|
|
|
mutex_lock(&vgpu->vdev.cache_lock);
|
|
while (*link) {
|
|
parent = *link;
|
|
itr = rb_entry(parent, struct gvt_dma, node);
|
|
|
|
if (gfn == itr->gfn)
|
|
goto out;
|
|
else if (gfn < itr->gfn)
|
|
link = &parent->rb_left;
|
|
else
|
|
link = &parent->rb_right;
|
|
}
|
|
|
|
rb_link_node(&new->node, parent, link);
|
|
rb_insert_color(&new->node, &vgpu->vdev.cache);
|
|
mutex_unlock(&vgpu->vdev.cache_lock);
|
|
return;
|
|
|
|
out:
|
|
mutex_unlock(&vgpu->vdev.cache_lock);
|
|
kfree(new);
|
|
}
|
|
|
|
static void __gvt_cache_remove_entry(struct intel_vgpu *vgpu,
|
|
struct gvt_dma *entry)
|
|
{
|
|
rb_erase(&entry->node, &vgpu->vdev.cache);
|
|
kfree(entry);
|
|
}
|
|
|
|
static void gvt_cache_remove(struct intel_vgpu *vgpu, gfn_t gfn)
|
|
{
|
|
struct device *dev = vgpu->vdev.mdev;
|
|
struct gvt_dma *this;
|
|
unsigned long pfn;
|
|
|
|
mutex_lock(&vgpu->vdev.cache_lock);
|
|
this = __gvt_cache_find(vgpu, gfn);
|
|
if (!this) {
|
|
mutex_unlock(&vgpu->vdev.cache_lock);
|
|
return;
|
|
}
|
|
|
|
pfn = this->pfn;
|
|
WARN_ON((kvmgt_unpin_pages(dev, &pfn, 1) != 1));
|
|
__gvt_cache_remove_entry(vgpu, this);
|
|
mutex_unlock(&vgpu->vdev.cache_lock);
|
|
}
|
|
|
|
static void gvt_cache_init(struct intel_vgpu *vgpu)
|
|
{
|
|
vgpu->vdev.cache = RB_ROOT;
|
|
mutex_init(&vgpu->vdev.cache_lock);
|
|
}
|
|
|
|
static void gvt_cache_destroy(struct intel_vgpu *vgpu)
|
|
{
|
|
struct gvt_dma *dma;
|
|
struct rb_node *node = NULL;
|
|
struct device *dev = vgpu->vdev.mdev;
|
|
unsigned long pfn;
|
|
|
|
mutex_lock(&vgpu->vdev.cache_lock);
|
|
while ((node = rb_first(&vgpu->vdev.cache))) {
|
|
dma = rb_entry(node, struct gvt_dma, node);
|
|
pfn = dma->pfn;
|
|
|
|
kvmgt_unpin_pages(dev, &pfn, 1);
|
|
__gvt_cache_remove_entry(vgpu, dma);
|
|
}
|
|
mutex_unlock(&vgpu->vdev.cache_lock);
|
|
}
|
|
|
|
static struct intel_vgpu_type *intel_gvt_find_vgpu_type(struct intel_gvt *gvt,
|
|
const char *name)
|
|
{
|
|
int i;
|
|
struct intel_vgpu_type *t;
|
|
const char *driver_name = dev_driver_string(
|
|
&gvt->dev_priv->drm.pdev->dev);
|
|
|
|
for (i = 0; i < gvt->num_types; i++) {
|
|
t = &gvt->types[i];
|
|
if (!strncmp(t->name, name + strlen(driver_name) + 1,
|
|
sizeof(t->name)))
|
|
return t;
|
|
}
|
|
|
|
return NULL;
|
|
}
|
|
|
|
static struct attribute *type_attrs[] = {
|
|
NULL,
|
|
};
|
|
|
|
static struct attribute_group *intel_vgpu_type_groups[] = {
|
|
[0 ... NR_MAX_INTEL_VGPU_TYPES - 1] = NULL,
|
|
};
|
|
|
|
static bool intel_gvt_init_vgpu_type_groups(struct intel_gvt *gvt)
|
|
{
|
|
int i, j;
|
|
struct intel_vgpu_type *type;
|
|
struct attribute_group *group;
|
|
|
|
for (i = 0; i < gvt->num_types; i++) {
|
|
type = &gvt->types[i];
|
|
|
|
group = kzalloc(sizeof(struct attribute_group), GFP_KERNEL);
|
|
if (WARN_ON(!group))
|
|
goto unwind;
|
|
|
|
group->name = type->name;
|
|
group->attrs = type_attrs;
|
|
intel_vgpu_type_groups[i] = group;
|
|
}
|
|
|
|
return true;
|
|
|
|
unwind:
|
|
for (j = 0; j < i; j++) {
|
|
group = intel_vgpu_type_groups[j];
|
|
kfree(group);
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
static void intel_gvt_cleanup_vgpu_type_groups(struct intel_gvt *gvt)
|
|
{
|
|
int i;
|
|
struct attribute_group *group;
|
|
|
|
for (i = 0; i < gvt->num_types; i++) {
|
|
group = intel_vgpu_type_groups[i];
|
|
kfree(group);
|
|
}
|
|
}
|
|
|
|
static void kvmgt_protect_table_init(struct kvmgt_guest_info *info)
|
|
{
|
|
hash_init(info->ptable);
|
|
}
|
|
|
|
static void kvmgt_protect_table_destroy(struct kvmgt_guest_info *info)
|
|
{
|
|
struct kvmgt_pgfn *p;
|
|
struct hlist_node *tmp;
|
|
int i;
|
|
|
|
hash_for_each_safe(info->ptable, i, tmp, p, hnode) {
|
|
hash_del(&p->hnode);
|
|
kfree(p);
|
|
}
|
|
}
|
|
|
|
static struct kvmgt_pgfn *
|
|
__kvmgt_protect_table_find(struct kvmgt_guest_info *info, gfn_t gfn)
|
|
{
|
|
struct kvmgt_pgfn *p, *res = NULL;
|
|
|
|
hash_for_each_possible(info->ptable, p, hnode, gfn) {
|
|
if (gfn == p->gfn) {
|
|
res = p;
|
|
break;
|
|
}
|
|
}
|
|
|
|
return res;
|
|
}
|
|
|
|
static bool kvmgt_gfn_is_write_protected(struct kvmgt_guest_info *info,
|
|
gfn_t gfn)
|
|
{
|
|
struct kvmgt_pgfn *p;
|
|
|
|
p = __kvmgt_protect_table_find(info, gfn);
|
|
return !!p;
|
|
}
|
|
|
|
static void kvmgt_protect_table_add(struct kvmgt_guest_info *info, gfn_t gfn)
|
|
{
|
|
struct kvmgt_pgfn *p;
|
|
|
|
if (kvmgt_gfn_is_write_protected(info, gfn))
|
|
return;
|
|
|
|
p = kmalloc(sizeof(struct kvmgt_pgfn), GFP_ATOMIC);
|
|
if (WARN(!p, "gfn: 0x%llx\n", gfn))
|
|
return;
|
|
|
|
p->gfn = gfn;
|
|
hash_add(info->ptable, &p->hnode, gfn);
|
|
}
|
|
|
|
static void kvmgt_protect_table_del(struct kvmgt_guest_info *info,
|
|
gfn_t gfn)
|
|
{
|
|
struct kvmgt_pgfn *p;
|
|
|
|
p = __kvmgt_protect_table_find(info, gfn);
|
|
if (p) {
|
|
hash_del(&p->hnode);
|
|
kfree(p);
|
|
}
|
|
}
|
|
|
|
static int kvmgt_host_init(struct device *dev, void *gvt, const void *ops)
|
|
{
|
|
if (!intel_gvt_init_vgpu_type_groups(gvt))
|
|
return -EFAULT;
|
|
|
|
intel_gvt_ops = ops;
|
|
|
|
/* MDEV is not yet available */
|
|
return -ENODEV;
|
|
}
|
|
|
|
static void kvmgt_host_exit(struct device *dev, void *gvt)
|
|
{
|
|
intel_gvt_cleanup_vgpu_type_groups(gvt);
|
|
}
|
|
|
|
static int kvmgt_write_protect_add(unsigned long handle, u64 gfn)
|
|
{
|
|
struct kvmgt_guest_info *info = (struct kvmgt_guest_info *)handle;
|
|
struct kvm *kvm = info->kvm;
|
|
struct kvm_memory_slot *slot;
|
|
int idx;
|
|
|
|
idx = srcu_read_lock(&kvm->srcu);
|
|
slot = gfn_to_memslot(kvm, gfn);
|
|
|
|
spin_lock(&kvm->mmu_lock);
|
|
|
|
if (kvmgt_gfn_is_write_protected(info, gfn))
|
|
goto out;
|
|
|
|
kvm_slot_page_track_add_page(kvm, slot, gfn, KVM_PAGE_TRACK_WRITE);
|
|
kvmgt_protect_table_add(info, gfn);
|
|
|
|
out:
|
|
spin_unlock(&kvm->mmu_lock);
|
|
srcu_read_unlock(&kvm->srcu, idx);
|
|
return 0;
|
|
}
|
|
|
|
static int kvmgt_write_protect_remove(unsigned long handle, u64 gfn)
|
|
{
|
|
struct kvmgt_guest_info *info = (struct kvmgt_guest_info *)handle;
|
|
struct kvm *kvm = info->kvm;
|
|
struct kvm_memory_slot *slot;
|
|
int idx;
|
|
|
|
idx = srcu_read_lock(&kvm->srcu);
|
|
slot = gfn_to_memslot(kvm, gfn);
|
|
|
|
spin_lock(&kvm->mmu_lock);
|
|
|
|
if (!kvmgt_gfn_is_write_protected(info, gfn))
|
|
goto out;
|
|
|
|
kvm_slot_page_track_remove_page(kvm, slot, gfn, KVM_PAGE_TRACK_WRITE);
|
|
kvmgt_protect_table_del(info, gfn);
|
|
|
|
out:
|
|
spin_unlock(&kvm->mmu_lock);
|
|
srcu_read_unlock(&kvm->srcu, idx);
|
|
return 0;
|
|
}
|
|
|
|
static void kvmgt_page_track_write(struct kvm_vcpu *vcpu, gpa_t gpa,
|
|
const u8 *val, int len,
|
|
struct kvm_page_track_notifier_node *node)
|
|
{
|
|
struct kvmgt_guest_info *info = container_of(node,
|
|
struct kvmgt_guest_info, track_node);
|
|
|
|
if (kvmgt_gfn_is_write_protected(info, gpa_to_gfn(gpa)))
|
|
intel_gvt_ops->emulate_mmio_write(info->vgpu, gpa,
|
|
(void *)val, len);
|
|
}
|
|
|
|
static void kvmgt_page_track_flush_slot(struct kvm *kvm,
|
|
struct kvm_memory_slot *slot,
|
|
struct kvm_page_track_notifier_node *node)
|
|
{
|
|
int i;
|
|
gfn_t gfn;
|
|
struct kvmgt_guest_info *info = container_of(node,
|
|
struct kvmgt_guest_info, track_node);
|
|
|
|
spin_lock(&kvm->mmu_lock);
|
|
for (i = 0; i < slot->npages; i++) {
|
|
gfn = slot->base_gfn + i;
|
|
if (kvmgt_gfn_is_write_protected(info, gfn)) {
|
|
kvm_slot_page_track_remove_page(kvm, slot, gfn,
|
|
KVM_PAGE_TRACK_WRITE);
|
|
kvmgt_protect_table_del(info, gfn);
|
|
}
|
|
}
|
|
spin_unlock(&kvm->mmu_lock);
|
|
}
|
|
|
|
static bool kvmgt_check_guest(void)
|
|
{
|
|
unsigned int eax, ebx, ecx, edx;
|
|
char s[12];
|
|
unsigned int *i;
|
|
|
|
eax = KVM_CPUID_SIGNATURE;
|
|
ebx = ecx = edx = 0;
|
|
|
|
asm volatile ("cpuid"
|
|
: "+a"(eax), "=b"(ebx), "=c"(ecx), "=d"(edx)
|
|
:
|
|
: "cc", "memory");
|
|
i = (unsigned int *)s;
|
|
i[0] = ebx;
|
|
i[1] = ecx;
|
|
i[2] = edx;
|
|
|
|
return !strncmp(s, "KVMKVMKVM", strlen("KVMKVMKVM"));
|
|
}
|
|
|
|
/**
|
|
* NOTE:
|
|
* It's actually impossible to check if we are running in KVM host,
|
|
* since the "KVM host" is simply native. So we only dectect guest here.
|
|
*/
|
|
static int kvmgt_detect_host(void)
|
|
{
|
|
#ifdef CONFIG_INTEL_IOMMU
|
|
if (intel_iommu_gfx_mapped) {
|
|
gvt_err("Hardware IOMMU compatibility not yet supported, try to boot with intel_iommu=igfx_off\n");
|
|
return -ENODEV;
|
|
}
|
|
#endif
|
|
return kvmgt_check_guest() ? -ENODEV : 0;
|
|
}
|
|
|
|
static int kvmgt_attach_vgpu(void *vgpu, unsigned long *handle)
|
|
{
|
|
/* nothing to do here */
|
|
return 0;
|
|
}
|
|
|
|
static void kvmgt_detach_vgpu(unsigned long handle)
|
|
{
|
|
/* nothing to do here */
|
|
}
|
|
|
|
static int kvmgt_inject_msi(unsigned long handle, u32 addr, u16 data)
|
|
{
|
|
struct kvmgt_guest_info *info = (struct kvmgt_guest_info *)handle;
|
|
struct intel_vgpu *vgpu = info->vgpu;
|
|
|
|
if (vgpu->vdev.msi_trigger)
|
|
return eventfd_signal(vgpu->vdev.msi_trigger, 1) == 1;
|
|
|
|
return false;
|
|
}
|
|
|
|
static unsigned long kvmgt_gfn_to_pfn(unsigned long handle, unsigned long gfn)
|
|
{
|
|
unsigned long pfn;
|
|
struct kvmgt_guest_info *info = (struct kvmgt_guest_info *)handle;
|
|
int rc;
|
|
|
|
pfn = gvt_cache_find(info->vgpu, gfn);
|
|
if (pfn != 0)
|
|
return pfn;
|
|
|
|
rc = kvmgt_pin_pages(info->vgpu->vdev.mdev, &gfn, 1,
|
|
IOMMU_READ | IOMMU_WRITE, &pfn);
|
|
if (rc != 1) {
|
|
gvt_err("vfio_pin_pages failed for gfn: 0x%lx\n", gfn);
|
|
return 0;
|
|
}
|
|
|
|
gvt_cache_add(info->vgpu, gfn, pfn);
|
|
return pfn;
|
|
}
|
|
|
|
static void *kvmgt_gpa_to_hva(unsigned long handle, unsigned long gpa)
|
|
{
|
|
unsigned long pfn;
|
|
gfn_t gfn = gpa_to_gfn(gpa);
|
|
|
|
pfn = kvmgt_gfn_to_pfn(handle, gfn);
|
|
if (!pfn)
|
|
return NULL;
|
|
|
|
return (char *)pfn_to_kaddr(pfn) + offset_in_page(gpa);
|
|
}
|
|
|
|
static int kvmgt_rw_gpa(unsigned long handle, unsigned long gpa,
|
|
void *buf, unsigned long len, bool write)
|
|
{
|
|
void *hva = NULL;
|
|
|
|
hva = kvmgt_gpa_to_hva(handle, gpa);
|
|
if (!hva)
|
|
return -EFAULT;
|
|
|
|
if (write)
|
|
memcpy(hva, buf, len);
|
|
else
|
|
memcpy(buf, hva, len);
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int kvmgt_read_gpa(unsigned long handle, unsigned long gpa,
|
|
void *buf, unsigned long len)
|
|
{
|
|
return kvmgt_rw_gpa(handle, gpa, buf, len, false);
|
|
}
|
|
|
|
static int kvmgt_write_gpa(unsigned long handle, unsigned long gpa,
|
|
void *buf, unsigned long len)
|
|
{
|
|
return kvmgt_rw_gpa(handle, gpa, buf, len, true);
|
|
}
|
|
|
|
static unsigned long kvmgt_virt_to_pfn(void *addr)
|
|
{
|
|
return PFN_DOWN(__pa(addr));
|
|
}
|
|
|
|
struct intel_gvt_mpt kvmgt_mpt = {
|
|
.detect_host = kvmgt_detect_host,
|
|
.host_init = kvmgt_host_init,
|
|
.host_exit = kvmgt_host_exit,
|
|
.attach_vgpu = kvmgt_attach_vgpu,
|
|
.detach_vgpu = kvmgt_detach_vgpu,
|
|
.inject_msi = kvmgt_inject_msi,
|
|
.from_virt_to_mfn = kvmgt_virt_to_pfn,
|
|
.set_wp_page = kvmgt_write_protect_add,
|
|
.unset_wp_page = kvmgt_write_protect_remove,
|
|
.read_gpa = kvmgt_read_gpa,
|
|
.write_gpa = kvmgt_write_gpa,
|
|
.gfn_to_mfn = kvmgt_gfn_to_pfn,
|
|
};
|
|
EXPORT_SYMBOL_GPL(kvmgt_mpt);
|
|
|
|
static int __init kvmgt_init(void)
|
|
{
|
|
return 0;
|
|
}
|
|
|
|
static void __exit kvmgt_exit(void)
|
|
{
|
|
}
|
|
|
|
module_init(kvmgt_init);
|
|
module_exit(kvmgt_exit);
|
|
|
|
MODULE_LICENSE("GPL and additional rights");
|
|
MODULE_AUTHOR("Intel Corporation");
|