2019-06-04 15:11:33 +07:00
|
|
|
/* SPDX-License-Identifier: GPL-2.0-only */
|
2012-07-31 21:16:24 +07:00
|
|
|
/*
|
|
|
|
* Copyright (C) 2012 Red Hat, Inc. All rights reserved.
|
|
|
|
* Author: Alex Williamson <alex.williamson@redhat.com>
|
|
|
|
*
|
|
|
|
* Derived from original vfio:
|
|
|
|
* Copyright 2010 Cisco Systems, Inc. All rights reserved.
|
|
|
|
* Author: Tom Lyon, pugs@cisco.com
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include <linux/mutex.h>
|
|
|
|
#include <linux/pci.h>
|
2015-09-18 21:29:50 +07:00
|
|
|
#include <linux/irqbypass.h>
|
2016-02-23 06:02:39 +07:00
|
|
|
#include <linux/types.h>
|
vfio/pci: Introduce VF token
If we enable SR-IOV on a vfio-pci owned PF, the resulting VFs are not
fully isolated from the PF. The PF can always cause a denial of service
to the VF, even if by simply resetting itself. The degree to which a PF
can access the data passed through a VF or interfere with its operation
is dependent on a given SR-IOV implementation. Therefore we want to
avoid a scenario where an existing vfio-pci based userspace driver might
assume the PF driver is trusted, for example assigning a PF to one VM
and VF to another with some expectation of isolation. IOMMU grouping
could be a solution to this, but imposes an unnecessarily strong
relationship between PF and VF drivers if they need to operate with the
same IOMMU context. Instead we introduce a "VF token", which is
essentially just a shared secret between PF and VF drivers, implemented
as a UUID.
The VF token can be set by a vfio-pci based PF driver and must be known
by the vfio-pci based VF driver in order to gain access to the device.
This allows the degree to which this VF token is considered secret to be
determined by the applications and environment. For example a VM might
generate a random UUID known only internally to the hypervisor while a
userspace networking appliance might use a shared, or even well know,
UUID among the application drivers.
To incorporate this VF token, the VFIO_GROUP_GET_DEVICE_FD interface is
extended to accept key=value pairs in addition to the device name. This
allows us to most easily deny user access to the device without risk
that existing userspace drivers assume region offsets, IRQs, and other
device features, leading to more elaborate error paths. The format of
these options are expected to take the form:
"$DEVICE_NAME $OPTION1=$VALUE1 $OPTION2=$VALUE2"
Where the device name is always provided first for compatibility and
additional options are specified in a space separated list. The
relation between and requirements for the additional options will be
vfio bus driver dependent, however unknown or unused option within this
schema should return error. This allow for future use of unknown
options as well as a positive indication to the user that an option is
used.
An example VF token option would take this form:
"0000:03:00.0 vf_token=2ab74924-c335-45f4-9b16-8569e5b08258"
When accessing a VF where the PF is making use of vfio-pci, the user
MUST provide the current vf_token. When accessing a PF, the user MUST
provide the current vf_token IF there are active VF users or MAY provide
a vf_token in order to set the current VF token when no VF users are
active. The former requirement assures VF users that an unassociated
driver cannot usurp the PF device. These semantics also imply that a
VF token MUST be set by a PF driver before VF drivers can access their
device, the default token is random and mechanisms to read the token are
not provided in order to protect the VF token of previous users. Use of
the vf_token option outside of these cases will return an error, as
discussed above.
Reviewed-by: Cornelia Huck <cohuck@redhat.com>
Reviewed-by: Kevin Tian <kevin.tian@intel.com>
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
2020-03-24 22:28:27 +07:00
|
|
|
#include <linux/uuid.h>
|
2012-07-31 21:16:24 +07:00
|
|
|
|
|
|
|
#ifndef VFIO_PCI_PRIVATE_H
|
|
|
|
#define VFIO_PCI_PRIVATE_H
|
|
|
|
|
|
|
|
#define VFIO_PCI_OFFSET_SHIFT 40
|
|
|
|
|
|
|
|
#define VFIO_PCI_OFFSET_TO_INDEX(off) (off >> VFIO_PCI_OFFSET_SHIFT)
|
|
|
|
#define VFIO_PCI_INDEX_TO_OFFSET(index) ((u64)(index) << VFIO_PCI_OFFSET_SHIFT)
|
|
|
|
#define VFIO_PCI_OFFSET_MASK (((u64)(1) << VFIO_PCI_OFFSET_SHIFT) - 1)
|
|
|
|
|
2016-02-23 06:02:41 +07:00
|
|
|
/* Special capability IDs predefined access */
|
|
|
|
#define PCI_CAP_ID_INVALID 0xFF /* default raw access */
|
|
|
|
#define PCI_CAP_ID_INVALID_VIRT 0xFE /* default virt access */
|
|
|
|
|
2018-03-22 01:46:21 +07:00
|
|
|
/* Cap maximum number of ioeventfds per device (arbitrary) */
|
|
|
|
#define VFIO_PCI_IOEVENTFD_MAX 1000
|
|
|
|
|
|
|
|
struct vfio_pci_ioeventfd {
|
|
|
|
struct list_head next;
|
|
|
|
struct virqfd *virqfd;
|
|
|
|
void __iomem *addr;
|
|
|
|
uint64_t data;
|
|
|
|
loff_t pos;
|
|
|
|
int bar;
|
|
|
|
int count;
|
|
|
|
};
|
|
|
|
|
2012-07-31 21:16:24 +07:00
|
|
|
struct vfio_pci_irq_ctx {
|
|
|
|
struct eventfd_ctx *trigger;
|
|
|
|
struct virqfd *unmask;
|
|
|
|
struct virqfd *mask;
|
|
|
|
char *name;
|
|
|
|
bool masked;
|
2015-09-18 21:29:50 +07:00
|
|
|
struct irq_bypass_producer producer;
|
2012-07-31 21:16:24 +07:00
|
|
|
};
|
|
|
|
|
2016-02-23 06:02:39 +07:00
|
|
|
struct vfio_pci_device;
|
|
|
|
struct vfio_pci_region;
|
|
|
|
|
|
|
|
struct vfio_pci_regops {
|
|
|
|
size_t (*rw)(struct vfio_pci_device *vdev, char __user *buf,
|
|
|
|
size_t count, loff_t *ppos, bool iswrite);
|
|
|
|
void (*release)(struct vfio_pci_device *vdev,
|
|
|
|
struct vfio_pci_region *region);
|
2018-12-19 15:52:30 +07:00
|
|
|
int (*mmap)(struct vfio_pci_device *vdev,
|
|
|
|
struct vfio_pci_region *region,
|
|
|
|
struct vm_area_struct *vma);
|
2018-12-19 15:52:31 +07:00
|
|
|
int (*add_capability)(struct vfio_pci_device *vdev,
|
|
|
|
struct vfio_pci_region *region,
|
|
|
|
struct vfio_info_cap *caps);
|
2016-02-23 06:02:39 +07:00
|
|
|
};
|
|
|
|
|
|
|
|
struct vfio_pci_region {
|
|
|
|
u32 type;
|
|
|
|
u32 subtype;
|
|
|
|
const struct vfio_pci_regops *ops;
|
|
|
|
void *data;
|
|
|
|
size_t size;
|
|
|
|
u32 flags;
|
|
|
|
};
|
|
|
|
|
2016-06-30 14:21:24 +07:00
|
|
|
struct vfio_pci_dummy_resource {
|
|
|
|
struct resource resource;
|
|
|
|
int index;
|
|
|
|
struct list_head res_next;
|
|
|
|
};
|
|
|
|
|
2018-12-13 02:51:07 +07:00
|
|
|
struct vfio_pci_reflck {
|
|
|
|
struct kref kref;
|
|
|
|
struct mutex lock;
|
|
|
|
};
|
|
|
|
|
vfio/pci: Introduce VF token
If we enable SR-IOV on a vfio-pci owned PF, the resulting VFs are not
fully isolated from the PF. The PF can always cause a denial of service
to the VF, even if by simply resetting itself. The degree to which a PF
can access the data passed through a VF or interfere with its operation
is dependent on a given SR-IOV implementation. Therefore we want to
avoid a scenario where an existing vfio-pci based userspace driver might
assume the PF driver is trusted, for example assigning a PF to one VM
and VF to another with some expectation of isolation. IOMMU grouping
could be a solution to this, but imposes an unnecessarily strong
relationship between PF and VF drivers if they need to operate with the
same IOMMU context. Instead we introduce a "VF token", which is
essentially just a shared secret between PF and VF drivers, implemented
as a UUID.
The VF token can be set by a vfio-pci based PF driver and must be known
by the vfio-pci based VF driver in order to gain access to the device.
This allows the degree to which this VF token is considered secret to be
determined by the applications and environment. For example a VM might
generate a random UUID known only internally to the hypervisor while a
userspace networking appliance might use a shared, or even well know,
UUID among the application drivers.
To incorporate this VF token, the VFIO_GROUP_GET_DEVICE_FD interface is
extended to accept key=value pairs in addition to the device name. This
allows us to most easily deny user access to the device without risk
that existing userspace drivers assume region offsets, IRQs, and other
device features, leading to more elaborate error paths. The format of
these options are expected to take the form:
"$DEVICE_NAME $OPTION1=$VALUE1 $OPTION2=$VALUE2"
Where the device name is always provided first for compatibility and
additional options are specified in a space separated list. The
relation between and requirements for the additional options will be
vfio bus driver dependent, however unknown or unused option within this
schema should return error. This allow for future use of unknown
options as well as a positive indication to the user that an option is
used.
An example VF token option would take this form:
"0000:03:00.0 vf_token=2ab74924-c335-45f4-9b16-8569e5b08258"
When accessing a VF where the PF is making use of vfio-pci, the user
MUST provide the current vf_token. When accessing a PF, the user MUST
provide the current vf_token IF there are active VF users or MAY provide
a vf_token in order to set the current VF token when no VF users are
active. The former requirement assures VF users that an unassociated
driver cannot usurp the PF device. These semantics also imply that a
VF token MUST be set by a PF driver before VF drivers can access their
device, the default token is random and mechanisms to read the token are
not provided in order to protect the VF token of previous users. Use of
the vf_token option outside of these cases will return an error, as
discussed above.
Reviewed-by: Cornelia Huck <cohuck@redhat.com>
Reviewed-by: Kevin Tian <kevin.tian@intel.com>
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
2020-03-24 22:28:27 +07:00
|
|
|
struct vfio_pci_vf_token {
|
|
|
|
struct mutex lock;
|
|
|
|
uuid_t uuid;
|
|
|
|
int users;
|
|
|
|
};
|
|
|
|
|
2012-07-31 21:16:24 +07:00
|
|
|
struct vfio_pci_device {
|
|
|
|
struct pci_dev *pdev;
|
2019-09-28 06:43:08 +07:00
|
|
|
void __iomem *barmap[PCI_STD_NUM_BARS];
|
|
|
|
bool bar_mmap_supported[PCI_STD_NUM_BARS];
|
2012-07-31 21:16:24 +07:00
|
|
|
u8 *pci_config_map;
|
|
|
|
u8 *vconfig;
|
|
|
|
struct perm_bits *msi_perm;
|
|
|
|
spinlock_t irqlock;
|
|
|
|
struct mutex igate;
|
|
|
|
struct vfio_pci_irq_ctx *ctx;
|
|
|
|
int num_ctx;
|
|
|
|
int irq_type;
|
2016-02-23 06:02:39 +07:00
|
|
|
int num_regions;
|
|
|
|
struct vfio_pci_region *region;
|
2012-07-31 21:16:24 +07:00
|
|
|
u8 msi_qmax;
|
|
|
|
u8 msix_bar;
|
|
|
|
u16 msix_size;
|
|
|
|
u32 msix_offset;
|
|
|
|
u32 rbar[7];
|
|
|
|
bool pci_2_3;
|
|
|
|
bool virq_disabled;
|
|
|
|
bool reset_works;
|
|
|
|
bool extended_caps;
|
|
|
|
bool bardirty;
|
2013-02-19 00:11:13 +07:00
|
|
|
bool has_vga;
|
2014-08-08 00:12:07 +07:00
|
|
|
bool needs_reset;
|
vfio/pci: Hide broken INTx support from user
INTx masking has two components, the first is that we need the ability
to prevent the device from continuing to assert INTx. This is
provided via the DisINTx bit in the command register and is the only
thing we can really probe for when testing if INTx masking is
supported. The second component is that the device needs to indicate
if INTx is asserted via the interrupt status bit in the device status
register. With these two features we can generically determine if one
of the devices we own is asserting INTx, signal the user, and mask the
interrupt while the user services the device.
Generally if one or both of these components is broken we resort to
APIC level interrupt masking, which requires an exclusive interrupt
since we have no way to determine the source of the interrupt in a
shared configuration. This often makes it difficult or impossible to
configure the system for userspace use of the device, for an interrupt
mode that the user may not need.
One possible configuration of broken INTx masking is that the DisINTx
support is fully functional, but the interrupt status bit never
signals interrupt assertion. In this case we do have the ability to
prevent the device from asserting INTx, but lack the ability to
identify the interrupt source. For this case we can simply pretend
that the device lacks INTx support entirely, keeping DisINTx set on
the physical device, virtualizing this bit for the user, and
virtualizing the interrupt pin register to indicate no INTx support.
We already support virtualization of the DisINTx bit and already
virtualize the interrupt pin for platforms without INTx support. By
tying these components together, setting DisINTx on open and reset,
and identifying devices broken in this particular way, we can provide
support for them w/o the handicap of APIC level INTx masking.
Intel i40e (XL710/X710) 10/20/40GbE NICs have been identified as being
broken in this specific way. We leave the vfio-pci.nointxmask option
as a mechanism to bypass this support, enabling INTx on the device
with all the requirements of APIC level masking.
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
Cc: John Ronciak <john.ronciak@intel.com>
Cc: Jesse Brandeburg <jesse.brandeburg@intel.com>
2016-03-25 02:05:18 +07:00
|
|
|
bool nointx;
|
2019-02-10 03:43:30 +07:00
|
|
|
bool needs_pm_restore;
|
2012-07-31 21:16:24 +07:00
|
|
|
struct pci_saved_state *pci_saved_state;
|
2019-02-10 03:43:30 +07:00
|
|
|
struct pci_saved_state *pm_save;
|
2018-12-13 02:51:07 +07:00
|
|
|
struct vfio_pci_reflck *reflck;
|
2014-08-08 00:12:04 +07:00
|
|
|
int refcnt;
|
2018-03-22 01:46:21 +07:00
|
|
|
int ioeventfds_nr;
|
2013-03-11 22:31:22 +07:00
|
|
|
struct eventfd_ctx *err_trigger;
|
2015-02-07 05:05:08 +07:00
|
|
|
struct eventfd_ctx *req_trigger;
|
2016-06-30 14:21:24 +07:00
|
|
|
struct list_head dummy_resources_list;
|
2018-03-22 01:46:21 +07:00
|
|
|
struct mutex ioeventfds_lock;
|
|
|
|
struct list_head ioeventfds_list;
|
vfio/pci: Introduce VF token
If we enable SR-IOV on a vfio-pci owned PF, the resulting VFs are not
fully isolated from the PF. The PF can always cause a denial of service
to the VF, even if by simply resetting itself. The degree to which a PF
can access the data passed through a VF or interfere with its operation
is dependent on a given SR-IOV implementation. Therefore we want to
avoid a scenario where an existing vfio-pci based userspace driver might
assume the PF driver is trusted, for example assigning a PF to one VM
and VF to another with some expectation of isolation. IOMMU grouping
could be a solution to this, but imposes an unnecessarily strong
relationship between PF and VF drivers if they need to operate with the
same IOMMU context. Instead we introduce a "VF token", which is
essentially just a shared secret between PF and VF drivers, implemented
as a UUID.
The VF token can be set by a vfio-pci based PF driver and must be known
by the vfio-pci based VF driver in order to gain access to the device.
This allows the degree to which this VF token is considered secret to be
determined by the applications and environment. For example a VM might
generate a random UUID known only internally to the hypervisor while a
userspace networking appliance might use a shared, or even well know,
UUID among the application drivers.
To incorporate this VF token, the VFIO_GROUP_GET_DEVICE_FD interface is
extended to accept key=value pairs in addition to the device name. This
allows us to most easily deny user access to the device without risk
that existing userspace drivers assume region offsets, IRQs, and other
device features, leading to more elaborate error paths. The format of
these options are expected to take the form:
"$DEVICE_NAME $OPTION1=$VALUE1 $OPTION2=$VALUE2"
Where the device name is always provided first for compatibility and
additional options are specified in a space separated list. The
relation between and requirements for the additional options will be
vfio bus driver dependent, however unknown or unused option within this
schema should return error. This allow for future use of unknown
options as well as a positive indication to the user that an option is
used.
An example VF token option would take this form:
"0000:03:00.0 vf_token=2ab74924-c335-45f4-9b16-8569e5b08258"
When accessing a VF where the PF is making use of vfio-pci, the user
MUST provide the current vf_token. When accessing a PF, the user MUST
provide the current vf_token IF there are active VF users or MAY provide
a vf_token in order to set the current VF token when no VF users are
active. The former requirement assures VF users that an unassociated
driver cannot usurp the PF device. These semantics also imply that a
VF token MUST be set by a PF driver before VF drivers can access their
device, the default token is random and mechanisms to read the token are
not provided in order to protect the VF token of previous users. Use of
the vf_token option outside of these cases will return an error, as
discussed above.
Reviewed-by: Cornelia Huck <cohuck@redhat.com>
Reviewed-by: Kevin Tian <kevin.tian@intel.com>
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
2020-03-24 22:28:27 +07:00
|
|
|
struct vfio_pci_vf_token *vf_token;
|
2012-07-31 21:16:24 +07:00
|
|
|
};
|
|
|
|
|
|
|
|
#define is_intx(vdev) (vdev->irq_type == VFIO_PCI_INTX_IRQ_INDEX)
|
|
|
|
#define is_msi(vdev) (vdev->irq_type == VFIO_PCI_MSI_IRQ_INDEX)
|
|
|
|
#define is_msix(vdev) (vdev->irq_type == VFIO_PCI_MSIX_IRQ_INDEX)
|
|
|
|
#define is_irq_none(vdev) (!(is_intx(vdev) || is_msi(vdev) || is_msix(vdev)))
|
|
|
|
#define irq_is(vdev, type) (vdev->irq_type == type)
|
|
|
|
|
|
|
|
extern void vfio_pci_intx_mask(struct vfio_pci_device *vdev);
|
|
|
|
extern void vfio_pci_intx_unmask(struct vfio_pci_device *vdev);
|
|
|
|
|
|
|
|
extern int vfio_pci_set_irqs_ioctl(struct vfio_pci_device *vdev,
|
|
|
|
uint32_t flags, unsigned index,
|
|
|
|
unsigned start, unsigned count, void *data);
|
|
|
|
|
2013-02-15 04:02:12 +07:00
|
|
|
extern ssize_t vfio_pci_config_rw(struct vfio_pci_device *vdev,
|
|
|
|
char __user *buf, size_t count,
|
|
|
|
loff_t *ppos, bool iswrite);
|
|
|
|
|
|
|
|
extern ssize_t vfio_pci_bar_rw(struct vfio_pci_device *vdev, char __user *buf,
|
|
|
|
size_t count, loff_t *ppos, bool iswrite);
|
2012-07-31 21:16:24 +07:00
|
|
|
|
2013-02-19 00:11:13 +07:00
|
|
|
extern ssize_t vfio_pci_vga_rw(struct vfio_pci_device *vdev, char __user *buf,
|
|
|
|
size_t count, loff_t *ppos, bool iswrite);
|
|
|
|
|
2018-03-22 01:46:21 +07:00
|
|
|
extern long vfio_pci_ioeventfd(struct vfio_pci_device *vdev, loff_t offset,
|
|
|
|
uint64_t data, int count, int fd);
|
|
|
|
|
2012-07-31 21:16:24 +07:00
|
|
|
extern int vfio_pci_init_perm_bits(void);
|
|
|
|
extern void vfio_pci_uninit_perm_bits(void);
|
|
|
|
|
|
|
|
extern int vfio_config_init(struct vfio_pci_device *vdev);
|
|
|
|
extern void vfio_config_free(struct vfio_pci_device *vdev);
|
2016-02-23 06:02:39 +07:00
|
|
|
|
|
|
|
extern int vfio_pci_register_dev_region(struct vfio_pci_device *vdev,
|
|
|
|
unsigned int type, unsigned int subtype,
|
|
|
|
const struct vfio_pci_regops *ops,
|
|
|
|
size_t size, u32 flags, void *data);
|
2019-02-10 03:43:30 +07:00
|
|
|
|
|
|
|
extern int vfio_pci_set_power_state(struct vfio_pci_device *vdev,
|
|
|
|
pci_power_t state);
|
|
|
|
|
2016-02-23 06:02:43 +07:00
|
|
|
#ifdef CONFIG_VFIO_PCI_IGD
|
2016-02-23 06:02:45 +07:00
|
|
|
extern int vfio_pci_igd_init(struct vfio_pci_device *vdev);
|
2016-02-23 06:02:43 +07:00
|
|
|
#else
|
2016-02-23 06:02:45 +07:00
|
|
|
static inline int vfio_pci_igd_init(struct vfio_pci_device *vdev)
|
2016-02-23 06:02:43 +07:00
|
|
|
{
|
|
|
|
return -ENODEV;
|
|
|
|
}
|
|
|
|
#endif
|
2018-12-20 08:10:36 +07:00
|
|
|
#ifdef CONFIG_VFIO_PCI_NVLINK2
|
|
|
|
extern int vfio_pci_nvdia_v100_nvlink2_init(struct vfio_pci_device *vdev);
|
|
|
|
extern int vfio_pci_ibm_npu2_init(struct vfio_pci_device *vdev);
|
|
|
|
#else
|
|
|
|
static inline int vfio_pci_nvdia_v100_nvlink2_init(struct vfio_pci_device *vdev)
|
|
|
|
{
|
|
|
|
return -ENODEV;
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline int vfio_pci_ibm_npu2_init(struct vfio_pci_device *vdev)
|
|
|
|
{
|
|
|
|
return -ENODEV;
|
|
|
|
}
|
|
|
|
#endif
|
2012-07-31 21:16:24 +07:00
|
|
|
#endif /* VFIO_PCI_PRIVATE_H */
|