mirror of
https://github.com/AuxXxilium/linux_dsm_epyc7002.git
synced 2024-12-28 11:18:45 +07:00
abafbc551f
Accessing the disabled memory space of a PCI device would typically result in a master abort response on conventional PCI, or an unsupported request on PCI express. The user would generally see these as a -1 response for the read return data and the write would be silently discarded, possibly with an uncorrected, non-fatal AER error triggered on the host. Some systems however take it upon themselves to bring down the entire system when they see something that might indicate a loss of data, such as this discarded write to a disabled memory space. To avoid this, we want to try to block the user from accessing memory spaces while they're disabled. We start with a semaphore around the memory enable bit, where writers modify the memory enable state and must be serialized, while readers make use of the memory region and can access in parallel. Writers include both direct manipulation via the command register, as well as any reset path where the internal mechanics of the reset may both explicitly and implicitly disable memory access, and manipulation of the MSI-X configuration, where the MSI-X vector table resides in MMIO space of the device. Readers include the read and write file ops to access the vfio device fd offsets as well as memory mapped access. In the latter case, we make use of our new vma list support to zap, or invalidate, those memory mappings in order to force them to be faulted back in on access. Our semaphore usage will stall user access to MMIO spaces across internal operations like reset, but the user might experience new behavior when trying to access the MMIO space while disabled via the PCI command register. Access via read or write while disabled will return -EIO and access via memory maps will result in a SIGBUS. This is expected to be compatible with known use cases and potentially provides better error handling capabilities than present in the hardware, while avoiding the more readily accessible and severe platform error responses that might otherwise occur. Fixes: CVE-2020-12888 Reviewed-by: Peter Xu <peterx@redhat.com> Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
215 lines
5.9 KiB
C
215 lines
5.9 KiB
C
/* SPDX-License-Identifier: GPL-2.0-only */
|
|
/*
|
|
* Copyright (C) 2012 Red Hat, Inc. All rights reserved.
|
|
* Author: Alex Williamson <alex.williamson@redhat.com>
|
|
*
|
|
* Derived from original vfio:
|
|
* Copyright 2010 Cisco Systems, Inc. All rights reserved.
|
|
* Author: Tom Lyon, pugs@cisco.com
|
|
*/
|
|
|
|
#include <linux/mutex.h>
|
|
#include <linux/pci.h>
|
|
#include <linux/irqbypass.h>
|
|
#include <linux/types.h>
|
|
#include <linux/uuid.h>
|
|
#include <linux/notifier.h>
|
|
|
|
#ifndef VFIO_PCI_PRIVATE_H
|
|
#define VFIO_PCI_PRIVATE_H
|
|
|
|
#define VFIO_PCI_OFFSET_SHIFT 40
|
|
|
|
#define VFIO_PCI_OFFSET_TO_INDEX(off) (off >> VFIO_PCI_OFFSET_SHIFT)
|
|
#define VFIO_PCI_INDEX_TO_OFFSET(index) ((u64)(index) << VFIO_PCI_OFFSET_SHIFT)
|
|
#define VFIO_PCI_OFFSET_MASK (((u64)(1) << VFIO_PCI_OFFSET_SHIFT) - 1)
|
|
|
|
/* Special capability IDs predefined access */
|
|
#define PCI_CAP_ID_INVALID 0xFF /* default raw access */
|
|
#define PCI_CAP_ID_INVALID_VIRT 0xFE /* default virt access */
|
|
|
|
/* Cap maximum number of ioeventfds per device (arbitrary) */
|
|
#define VFIO_PCI_IOEVENTFD_MAX 1000
|
|
|
|
struct vfio_pci_ioeventfd {
|
|
struct list_head next;
|
|
struct virqfd *virqfd;
|
|
void __iomem *addr;
|
|
uint64_t data;
|
|
loff_t pos;
|
|
int bar;
|
|
int count;
|
|
};
|
|
|
|
struct vfio_pci_irq_ctx {
|
|
struct eventfd_ctx *trigger;
|
|
struct virqfd *unmask;
|
|
struct virqfd *mask;
|
|
char *name;
|
|
bool masked;
|
|
struct irq_bypass_producer producer;
|
|
};
|
|
|
|
struct vfio_pci_device;
|
|
struct vfio_pci_region;
|
|
|
|
struct vfio_pci_regops {
|
|
size_t (*rw)(struct vfio_pci_device *vdev, char __user *buf,
|
|
size_t count, loff_t *ppos, bool iswrite);
|
|
void (*release)(struct vfio_pci_device *vdev,
|
|
struct vfio_pci_region *region);
|
|
int (*mmap)(struct vfio_pci_device *vdev,
|
|
struct vfio_pci_region *region,
|
|
struct vm_area_struct *vma);
|
|
int (*add_capability)(struct vfio_pci_device *vdev,
|
|
struct vfio_pci_region *region,
|
|
struct vfio_info_cap *caps);
|
|
};
|
|
|
|
struct vfio_pci_region {
|
|
u32 type;
|
|
u32 subtype;
|
|
const struct vfio_pci_regops *ops;
|
|
void *data;
|
|
size_t size;
|
|
u32 flags;
|
|
};
|
|
|
|
struct vfio_pci_dummy_resource {
|
|
struct resource resource;
|
|
int index;
|
|
struct list_head res_next;
|
|
};
|
|
|
|
struct vfio_pci_reflck {
|
|
struct kref kref;
|
|
struct mutex lock;
|
|
};
|
|
|
|
struct vfio_pci_vf_token {
|
|
struct mutex lock;
|
|
uuid_t uuid;
|
|
int users;
|
|
};
|
|
|
|
struct vfio_pci_mmap_vma {
|
|
struct vm_area_struct *vma;
|
|
struct list_head vma_next;
|
|
};
|
|
|
|
struct vfio_pci_device {
|
|
struct pci_dev *pdev;
|
|
void __iomem *barmap[PCI_STD_NUM_BARS];
|
|
bool bar_mmap_supported[PCI_STD_NUM_BARS];
|
|
u8 *pci_config_map;
|
|
u8 *vconfig;
|
|
struct perm_bits *msi_perm;
|
|
spinlock_t irqlock;
|
|
struct mutex igate;
|
|
struct vfio_pci_irq_ctx *ctx;
|
|
int num_ctx;
|
|
int irq_type;
|
|
int num_regions;
|
|
struct vfio_pci_region *region;
|
|
u8 msi_qmax;
|
|
u8 msix_bar;
|
|
u16 msix_size;
|
|
u32 msix_offset;
|
|
u32 rbar[7];
|
|
bool pci_2_3;
|
|
bool virq_disabled;
|
|
bool reset_works;
|
|
bool extended_caps;
|
|
bool bardirty;
|
|
bool has_vga;
|
|
bool needs_reset;
|
|
bool nointx;
|
|
bool needs_pm_restore;
|
|
struct pci_saved_state *pci_saved_state;
|
|
struct pci_saved_state *pm_save;
|
|
struct vfio_pci_reflck *reflck;
|
|
int refcnt;
|
|
int ioeventfds_nr;
|
|
struct eventfd_ctx *err_trigger;
|
|
struct eventfd_ctx *req_trigger;
|
|
struct list_head dummy_resources_list;
|
|
struct mutex ioeventfds_lock;
|
|
struct list_head ioeventfds_list;
|
|
struct vfio_pci_vf_token *vf_token;
|
|
struct notifier_block nb;
|
|
struct mutex vma_lock;
|
|
struct list_head vma_list;
|
|
struct rw_semaphore memory_lock;
|
|
};
|
|
|
|
#define is_intx(vdev) (vdev->irq_type == VFIO_PCI_INTX_IRQ_INDEX)
|
|
#define is_msi(vdev) (vdev->irq_type == VFIO_PCI_MSI_IRQ_INDEX)
|
|
#define is_msix(vdev) (vdev->irq_type == VFIO_PCI_MSIX_IRQ_INDEX)
|
|
#define is_irq_none(vdev) (!(is_intx(vdev) || is_msi(vdev) || is_msix(vdev)))
|
|
#define irq_is(vdev, type) (vdev->irq_type == type)
|
|
|
|
extern void vfio_pci_intx_mask(struct vfio_pci_device *vdev);
|
|
extern void vfio_pci_intx_unmask(struct vfio_pci_device *vdev);
|
|
|
|
extern int vfio_pci_set_irqs_ioctl(struct vfio_pci_device *vdev,
|
|
uint32_t flags, unsigned index,
|
|
unsigned start, unsigned count, void *data);
|
|
|
|
extern ssize_t vfio_pci_config_rw(struct vfio_pci_device *vdev,
|
|
char __user *buf, size_t count,
|
|
loff_t *ppos, bool iswrite);
|
|
|
|
extern ssize_t vfio_pci_bar_rw(struct vfio_pci_device *vdev, char __user *buf,
|
|
size_t count, loff_t *ppos, bool iswrite);
|
|
|
|
extern ssize_t vfio_pci_vga_rw(struct vfio_pci_device *vdev, char __user *buf,
|
|
size_t count, loff_t *ppos, bool iswrite);
|
|
|
|
extern long vfio_pci_ioeventfd(struct vfio_pci_device *vdev, loff_t offset,
|
|
uint64_t data, int count, int fd);
|
|
|
|
extern int vfio_pci_init_perm_bits(void);
|
|
extern void vfio_pci_uninit_perm_bits(void);
|
|
|
|
extern int vfio_config_init(struct vfio_pci_device *vdev);
|
|
extern void vfio_config_free(struct vfio_pci_device *vdev);
|
|
|
|
extern int vfio_pci_register_dev_region(struct vfio_pci_device *vdev,
|
|
unsigned int type, unsigned int subtype,
|
|
const struct vfio_pci_regops *ops,
|
|
size_t size, u32 flags, void *data);
|
|
|
|
extern int vfio_pci_set_power_state(struct vfio_pci_device *vdev,
|
|
pci_power_t state);
|
|
|
|
extern bool __vfio_pci_memory_enabled(struct vfio_pci_device *vdev);
|
|
extern void vfio_pci_zap_and_down_write_memory_lock(struct vfio_pci_device
|
|
*vdev);
|
|
extern u16 vfio_pci_memory_lock_and_enable(struct vfio_pci_device *vdev);
|
|
extern void vfio_pci_memory_unlock_and_restore(struct vfio_pci_device *vdev,
|
|
u16 cmd);
|
|
|
|
#ifdef CONFIG_VFIO_PCI_IGD
|
|
extern int vfio_pci_igd_init(struct vfio_pci_device *vdev);
|
|
#else
|
|
static inline int vfio_pci_igd_init(struct vfio_pci_device *vdev)
|
|
{
|
|
return -ENODEV;
|
|
}
|
|
#endif
|
|
#ifdef CONFIG_VFIO_PCI_NVLINK2
|
|
extern int vfio_pci_nvdia_v100_nvlink2_init(struct vfio_pci_device *vdev);
|
|
extern int vfio_pci_ibm_npu2_init(struct vfio_pci_device *vdev);
|
|
#else
|
|
static inline int vfio_pci_nvdia_v100_nvlink2_init(struct vfio_pci_device *vdev)
|
|
{
|
|
return -ENODEV;
|
|
}
|
|
|
|
static inline int vfio_pci_ibm_npu2_init(struct vfio_pci_device *vdev)
|
|
{
|
|
return -ENODEV;
|
|
}
|
|
#endif
|
|
#endif /* VFIO_PCI_PRIVATE_H */
|