virtio: oops fixes

virtio pci rework using shared interrupts caused a lot of issues. We
 tried to fix them but run out of time. Revert for now, and revisit the
 issue for the next kernel.
 
 Luckily we are able to do this without loosing automatic
 interrupt NUMA affinity which was the main motivator for the
 rework.
 
 Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
 -----BEGIN PGP SIGNATURE-----
 
 iQEcBAABAgAGBQJY6/oVAAoJECgfDbjSjVRpiDQH/3WL4zujwShOmEFSaUkka+BK
 +Il64oVliZk1BMsMTqLsFYGqJtSlqOkQzWkQ2hyPwS9/U4pBzPZ4eJZCng/245YK
 5NsT51/m8x3mjRATh0fPqsAwz8CdkWfMpwLYBS6V73RB1XCTVB4IV9vVk6g922oe
 dkKlq6s3XvBqBJD02CkV1ApAYFyozF8ppyWdt7F/MsM9HdpM8uWR9F5fh/qDizbZ
 ifPUkTSk8BcFzyZ57P/9rdn+cTpPY4PeKIurKwttCGFRm9++5a6RdIwP+zQm7ypC
 LaI9StOj8ixloWjhS2eETMi/qLFkwf93gVFhRWhQzIetkjgqZoRIbcg+iLsi6uU=
 =W6NP
 -----END PGP SIGNATURE-----

Merge tag 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost

Pull virtio fixes from Michael S. Tsirkin:
 "virtio oops fixes

  The virtio pci rework using shared interrupts caused a lot of issues.
  We tried to fix them but run out of time. Revert for now, and revisit
  the issue for the next kernel.

  Luckily we are able to do this without loosing automatic interrupt
  NUMA affinity which was the main motivator for the rework"

* tag 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost:
  virtio-pci: Remove affinity hint before freeing the interrupt
  Revert "virtio_pci: remove struct virtio_pci_vq_info"
  Revert "virtio_pci: use shared interrupts for virtqueues"
  Revert "virtio_pci: don't duplicate the msix_enable flag in struct pci_dev"
  Revert "virtio_pci: simplify MSI-X setup"
  Revert "virtio_pci: fix out of bound access for msix_names"
  MAINTAINERS: fix virtio file pattern
  virtio_console: fix uninitialized variable use
  virtio_net: clear MTU when out of range
  virtio: allow drivers to validate features
  virtio_net: enable big packets for large MTU values
This commit is contained in:
Linus Torvalds 2017-04-14 08:49:39 -07:00
commit 7873933385
10 changed files with 324 additions and 174 deletions

View File

@ -13305,7 +13305,7 @@ F: drivers/virtio/
F: tools/virtio/ F: tools/virtio/
F: drivers/net/virtio_net.c F: drivers/net/virtio_net.c
F: drivers/block/virtio_blk.c F: drivers/block/virtio_blk.c
F: include/linux/virtio_*.h F: include/linux/virtio*.h
F: include/uapi/linux/virtio_*.h F: include/uapi/linux/virtio_*.h
F: drivers/crypto/virtio/ F: drivers/crypto/virtio/

View File

@ -2202,14 +2202,16 @@ static int virtcons_freeze(struct virtio_device *vdev)
vdev->config->reset(vdev); vdev->config->reset(vdev);
virtqueue_disable_cb(portdev->c_ivq); if (use_multiport(portdev))
virtqueue_disable_cb(portdev->c_ivq);
cancel_work_sync(&portdev->control_work); cancel_work_sync(&portdev->control_work);
cancel_work_sync(&portdev->config_work); cancel_work_sync(&portdev->config_work);
/* /*
* Once more: if control_work_handler() was running, it would * Once more: if control_work_handler() was running, it would
* enable the cb as the last step. * enable the cb as the last step.
*/ */
virtqueue_disable_cb(portdev->c_ivq); if (use_multiport(portdev))
virtqueue_disable_cb(portdev->c_ivq);
remove_controlq_data(portdev); remove_controlq_data(portdev);
list_for_each_entry(port, &portdev->ports, list) { list_for_each_entry(port, &portdev->ports, list) {

View File

@ -2230,14 +2230,8 @@ static bool virtnet_validate_features(struct virtio_device *vdev)
#define MIN_MTU ETH_MIN_MTU #define MIN_MTU ETH_MIN_MTU
#define MAX_MTU ETH_MAX_MTU #define MAX_MTU ETH_MAX_MTU
static int virtnet_probe(struct virtio_device *vdev) static int virtnet_validate(struct virtio_device *vdev)
{ {
int i, err;
struct net_device *dev;
struct virtnet_info *vi;
u16 max_queue_pairs;
int mtu;
if (!vdev->config->get) { if (!vdev->config->get) {
dev_err(&vdev->dev, "%s failure: config access disabled\n", dev_err(&vdev->dev, "%s failure: config access disabled\n",
__func__); __func__);
@ -2247,6 +2241,25 @@ static int virtnet_probe(struct virtio_device *vdev)
if (!virtnet_validate_features(vdev)) if (!virtnet_validate_features(vdev))
return -EINVAL; return -EINVAL;
if (virtio_has_feature(vdev, VIRTIO_NET_F_MTU)) {
int mtu = virtio_cread16(vdev,
offsetof(struct virtio_net_config,
mtu));
if (mtu < MIN_MTU)
__virtio_clear_bit(vdev, VIRTIO_NET_F_MTU);
}
return 0;
}
static int virtnet_probe(struct virtio_device *vdev)
{
int i, err;
struct net_device *dev;
struct virtnet_info *vi;
u16 max_queue_pairs;
int mtu;
/* Find if host supports multiqueue virtio_net device */ /* Find if host supports multiqueue virtio_net device */
err = virtio_cread_feature(vdev, VIRTIO_NET_F_MQ, err = virtio_cread_feature(vdev, VIRTIO_NET_F_MQ,
struct virtio_net_config, struct virtio_net_config,
@ -2362,11 +2375,20 @@ static int virtnet_probe(struct virtio_device *vdev)
offsetof(struct virtio_net_config, offsetof(struct virtio_net_config,
mtu)); mtu));
if (mtu < dev->min_mtu) { if (mtu < dev->min_mtu) {
__virtio_clear_bit(vdev, VIRTIO_NET_F_MTU); /* Should never trigger: MTU was previously validated
} else { * in virtnet_validate.
dev->mtu = mtu; */
dev->max_mtu = mtu; dev_err(&vdev->dev, "device MTU appears to have changed "
"it is now %d < %d", mtu, dev->min_mtu);
goto free_stats;
} }
dev->mtu = mtu;
dev->max_mtu = mtu;
/* TODO: size buffers correctly in this case. */
if (dev->mtu > ETH_DATA_LEN)
vi->big_packets = true;
} }
if (vi->any_header_sg) if (vi->any_header_sg)
@ -2544,6 +2566,7 @@ static struct virtio_driver virtio_net_driver = {
.driver.name = KBUILD_MODNAME, .driver.name = KBUILD_MODNAME,
.driver.owner = THIS_MODULE, .driver.owner = THIS_MODULE,
.id_table = id_table, .id_table = id_table,
.validate = virtnet_validate,
.probe = virtnet_probe, .probe = virtnet_probe,
.remove = virtnet_remove, .remove = virtnet_remove,
.config_changed = virtnet_config_changed, .config_changed = virtnet_config_changed,

View File

@ -232,6 +232,12 @@ static int virtio_dev_probe(struct device *_d)
if (device_features & (1ULL << i)) if (device_features & (1ULL << i))
__virtio_set_bit(dev, i); __virtio_set_bit(dev, i);
if (drv->validate) {
err = drv->validate(dev);
if (err)
goto err;
}
err = virtio_finalize_features(dev); err = virtio_finalize_features(dev);
if (err) if (err)
goto err; goto err;

View File

@ -33,8 +33,10 @@ void vp_synchronize_vectors(struct virtio_device *vdev)
struct virtio_pci_device *vp_dev = to_vp_device(vdev); struct virtio_pci_device *vp_dev = to_vp_device(vdev);
int i; int i;
synchronize_irq(pci_irq_vector(vp_dev->pci_dev, 0)); if (vp_dev->intx_enabled)
for (i = 1; i < vp_dev->msix_vectors; i++) synchronize_irq(vp_dev->pci_dev->irq);
for (i = 0; i < vp_dev->msix_vectors; ++i)
synchronize_irq(pci_irq_vector(vp_dev->pci_dev, i)); synchronize_irq(pci_irq_vector(vp_dev->pci_dev, i));
} }
@ -60,13 +62,16 @@ static irqreturn_t vp_config_changed(int irq, void *opaque)
static irqreturn_t vp_vring_interrupt(int irq, void *opaque) static irqreturn_t vp_vring_interrupt(int irq, void *opaque)
{ {
struct virtio_pci_device *vp_dev = opaque; struct virtio_pci_device *vp_dev = opaque;
struct virtio_pci_vq_info *info;
irqreturn_t ret = IRQ_NONE; irqreturn_t ret = IRQ_NONE;
struct virtqueue *vq; unsigned long flags;
list_for_each_entry(vq, &vp_dev->vdev.vqs, list) { spin_lock_irqsave(&vp_dev->lock, flags);
if (vq->callback && vring_interrupt(irq, vq) == IRQ_HANDLED) list_for_each_entry(info, &vp_dev->virtqueues, node) {
if (vring_interrupt(irq, info->vq) == IRQ_HANDLED)
ret = IRQ_HANDLED; ret = IRQ_HANDLED;
} }
spin_unlock_irqrestore(&vp_dev->lock, flags);
return ret; return ret;
} }
@ -97,186 +102,244 @@ static irqreturn_t vp_interrupt(int irq, void *opaque)
return vp_vring_interrupt(irq, opaque); return vp_vring_interrupt(irq, opaque);
} }
static void vp_remove_vqs(struct virtio_device *vdev) static int vp_request_msix_vectors(struct virtio_device *vdev, int nvectors,
bool per_vq_vectors, struct irq_affinity *desc)
{ {
struct virtio_pci_device *vp_dev = to_vp_device(vdev); struct virtio_pci_device *vp_dev = to_vp_device(vdev);
struct virtqueue *vq, *n; const char *name = dev_name(&vp_dev->vdev.dev);
unsigned i, v;
int err = -ENOMEM;
list_for_each_entry_safe(vq, n, &vdev->vqs, list) { vp_dev->msix_vectors = nvectors;
if (vp_dev->msix_vector_map) {
int v = vp_dev->msix_vector_map[vq->index];
if (v != VIRTIO_MSI_NO_VECTOR) vp_dev->msix_names = kmalloc(nvectors * sizeof *vp_dev->msix_names,
free_irq(pci_irq_vector(vp_dev->pci_dev, v), GFP_KERNEL);
vq); if (!vp_dev->msix_names)
} goto error;
vp_dev->del_vq(vq); vp_dev->msix_affinity_masks
= kzalloc(nvectors * sizeof *vp_dev->msix_affinity_masks,
GFP_KERNEL);
if (!vp_dev->msix_affinity_masks)
goto error;
for (i = 0; i < nvectors; ++i)
if (!alloc_cpumask_var(&vp_dev->msix_affinity_masks[i],
GFP_KERNEL))
goto error;
err = pci_alloc_irq_vectors_affinity(vp_dev->pci_dev, nvectors,
nvectors, PCI_IRQ_MSIX |
(desc ? PCI_IRQ_AFFINITY : 0),
desc);
if (err < 0)
goto error;
vp_dev->msix_enabled = 1;
/* Set the vector used for configuration */
v = vp_dev->msix_used_vectors;
snprintf(vp_dev->msix_names[v], sizeof *vp_dev->msix_names,
"%s-config", name);
err = request_irq(pci_irq_vector(vp_dev->pci_dev, v),
vp_config_changed, 0, vp_dev->msix_names[v],
vp_dev);
if (err)
goto error;
++vp_dev->msix_used_vectors;
v = vp_dev->config_vector(vp_dev, v);
/* Verify we had enough resources to assign the vector */
if (v == VIRTIO_MSI_NO_VECTOR) {
err = -EBUSY;
goto error;
} }
if (!per_vq_vectors) {
/* Shared vector for all VQs */
v = vp_dev->msix_used_vectors;
snprintf(vp_dev->msix_names[v], sizeof *vp_dev->msix_names,
"%s-virtqueues", name);
err = request_irq(pci_irq_vector(vp_dev->pci_dev, v),
vp_vring_interrupt, 0, vp_dev->msix_names[v],
vp_dev);
if (err)
goto error;
++vp_dev->msix_used_vectors;
}
return 0;
error:
return err;
}
static struct virtqueue *vp_setup_vq(struct virtio_device *vdev, unsigned index,
void (*callback)(struct virtqueue *vq),
const char *name,
u16 msix_vec)
{
struct virtio_pci_device *vp_dev = to_vp_device(vdev);
struct virtio_pci_vq_info *info = kmalloc(sizeof *info, GFP_KERNEL);
struct virtqueue *vq;
unsigned long flags;
/* fill out our structure that represents an active queue */
if (!info)
return ERR_PTR(-ENOMEM);
vq = vp_dev->setup_vq(vp_dev, info, index, callback, name,
msix_vec);
if (IS_ERR(vq))
goto out_info;
info->vq = vq;
if (callback) {
spin_lock_irqsave(&vp_dev->lock, flags);
list_add(&info->node, &vp_dev->virtqueues);
spin_unlock_irqrestore(&vp_dev->lock, flags);
} else {
INIT_LIST_HEAD(&info->node);
}
vp_dev->vqs[index] = info;
return vq;
out_info:
kfree(info);
return vq;
}
static void vp_del_vq(struct virtqueue *vq)
{
struct virtio_pci_device *vp_dev = to_vp_device(vq->vdev);
struct virtio_pci_vq_info *info = vp_dev->vqs[vq->index];
unsigned long flags;
spin_lock_irqsave(&vp_dev->lock, flags);
list_del(&info->node);
spin_unlock_irqrestore(&vp_dev->lock, flags);
vp_dev->del_vq(info);
kfree(info);
} }
/* the config->del_vqs() implementation */ /* the config->del_vqs() implementation */
void vp_del_vqs(struct virtio_device *vdev) void vp_del_vqs(struct virtio_device *vdev)
{ {
struct virtio_pci_device *vp_dev = to_vp_device(vdev); struct virtio_pci_device *vp_dev = to_vp_device(vdev);
struct virtqueue *vq, *n;
int i; int i;
if (WARN_ON_ONCE(list_empty_careful(&vdev->vqs))) list_for_each_entry_safe(vq, n, &vdev->vqs, list) {
return; if (vp_dev->per_vq_vectors) {
int v = vp_dev->vqs[vq->index]->msix_vector;
vp_remove_vqs(vdev); if (v != VIRTIO_MSI_NO_VECTOR) {
int irq = pci_irq_vector(vp_dev->pci_dev, v);
if (vp_dev->pci_dev->msix_enabled) { irq_set_affinity_hint(irq, NULL);
for (i = 0; i < vp_dev->msix_vectors; i++) free_irq(irq, vq);
}
}
vp_del_vq(vq);
}
vp_dev->per_vq_vectors = false;
if (vp_dev->intx_enabled) {
free_irq(vp_dev->pci_dev->irq, vp_dev);
vp_dev->intx_enabled = 0;
}
for (i = 0; i < vp_dev->msix_used_vectors; ++i)
free_irq(pci_irq_vector(vp_dev->pci_dev, i), vp_dev);
for (i = 0; i < vp_dev->msix_vectors; i++)
if (vp_dev->msix_affinity_masks[i])
free_cpumask_var(vp_dev->msix_affinity_masks[i]); free_cpumask_var(vp_dev->msix_affinity_masks[i]);
if (vp_dev->msix_enabled) {
/* Disable the vector used for configuration */ /* Disable the vector used for configuration */
vp_dev->config_vector(vp_dev, VIRTIO_MSI_NO_VECTOR); vp_dev->config_vector(vp_dev, VIRTIO_MSI_NO_VECTOR);
kfree(vp_dev->msix_affinity_masks); pci_free_irq_vectors(vp_dev->pci_dev);
kfree(vp_dev->msix_names); vp_dev->msix_enabled = 0;
kfree(vp_dev->msix_vector_map);
} }
free_irq(pci_irq_vector(vp_dev->pci_dev, 0), vp_dev); vp_dev->msix_vectors = 0;
pci_free_irq_vectors(vp_dev->pci_dev); vp_dev->msix_used_vectors = 0;
kfree(vp_dev->msix_names);
vp_dev->msix_names = NULL;
kfree(vp_dev->msix_affinity_masks);
vp_dev->msix_affinity_masks = NULL;
kfree(vp_dev->vqs);
vp_dev->vqs = NULL;
} }
static int vp_find_vqs_msix(struct virtio_device *vdev, unsigned nvqs, static int vp_find_vqs_msix(struct virtio_device *vdev, unsigned nvqs,
struct virtqueue *vqs[], vq_callback_t *callbacks[], struct virtqueue *vqs[], vq_callback_t *callbacks[],
const char * const names[], struct irq_affinity *desc) const char * const names[], bool per_vq_vectors,
struct irq_affinity *desc)
{ {
struct virtio_pci_device *vp_dev = to_vp_device(vdev); struct virtio_pci_device *vp_dev = to_vp_device(vdev);
const char *name = dev_name(&vp_dev->vdev.dev);
int i, j, err = -ENOMEM, allocated_vectors, nvectors;
unsigned flags = PCI_IRQ_MSIX;
bool shared = false;
u16 msix_vec; u16 msix_vec;
int i, err, nvectors, allocated_vectors;
if (desc) { vp_dev->vqs = kcalloc(nvqs, sizeof(*vp_dev->vqs), GFP_KERNEL);
flags |= PCI_IRQ_AFFINITY; if (!vp_dev->vqs)
desc->pre_vectors++; /* virtio config vector */ return -ENOMEM;
if (per_vq_vectors) {
/* Best option: one for change interrupt, one per vq. */
nvectors = 1;
for (i = 0; i < nvqs; ++i)
if (callbacks[i])
++nvectors;
} else {
/* Second best: one for change, shared for all vqs. */
nvectors = 2;
} }
nvectors = 1; err = vp_request_msix_vectors(vdev, nvectors, per_vq_vectors,
for (i = 0; i < nvqs; i++) per_vq_vectors ? desc : NULL);
if (callbacks[i])
nvectors++;
/* Try one vector per queue first. */
err = pci_alloc_irq_vectors_affinity(vp_dev->pci_dev, nvectors,
nvectors, flags, desc);
if (err < 0) {
/* Fallback to one vector for config, one shared for queues. */
shared = true;
err = pci_alloc_irq_vectors(vp_dev->pci_dev, 2, 2,
PCI_IRQ_MSIX);
if (err < 0)
return err;
}
if (err < 0)
return err;
vp_dev->msix_vectors = nvectors;
vp_dev->msix_names = kmalloc_array(nvectors,
sizeof(*vp_dev->msix_names), GFP_KERNEL);
if (!vp_dev->msix_names)
goto out_free_irq_vectors;
vp_dev->msix_affinity_masks = kcalloc(nvectors,
sizeof(*vp_dev->msix_affinity_masks), GFP_KERNEL);
if (!vp_dev->msix_affinity_masks)
goto out_free_msix_names;
for (i = 0; i < nvectors; ++i) {
if (!alloc_cpumask_var(&vp_dev->msix_affinity_masks[i],
GFP_KERNEL))
goto out_free_msix_affinity_masks;
}
/* Set the vector used for configuration */
snprintf(vp_dev->msix_names[0], sizeof(*vp_dev->msix_names),
"%s-config", name);
err = request_irq(pci_irq_vector(vp_dev->pci_dev, 0), vp_config_changed,
0, vp_dev->msix_names[0], vp_dev);
if (err) if (err)
goto out_free_msix_affinity_masks; goto error_find;
/* Verify we had enough resources to assign the vector */ vp_dev->per_vq_vectors = per_vq_vectors;
if (vp_dev->config_vector(vp_dev, 0) == VIRTIO_MSI_NO_VECTOR) { allocated_vectors = vp_dev->msix_used_vectors;
err = -EBUSY;
goto out_free_config_irq;
}
vp_dev->msix_vector_map = kmalloc_array(nvqs,
sizeof(*vp_dev->msix_vector_map), GFP_KERNEL);
if (!vp_dev->msix_vector_map)
goto out_disable_config_irq;
allocated_vectors = j = 1; /* vector 0 is the config interrupt */
for (i = 0; i < nvqs; ++i) { for (i = 0; i < nvqs; ++i) {
if (!names[i]) { if (!names[i]) {
vqs[i] = NULL; vqs[i] = NULL;
continue; continue;
} }
if (callbacks[i]) if (!callbacks[i])
msix_vec = allocated_vectors;
else
msix_vec = VIRTIO_MSI_NO_VECTOR; msix_vec = VIRTIO_MSI_NO_VECTOR;
else if (vp_dev->per_vq_vectors)
vqs[i] = vp_dev->setup_vq(vp_dev, i, callbacks[i], names[i], msix_vec = allocated_vectors++;
msix_vec); else
msix_vec = VP_MSIX_VQ_VECTOR;
vqs[i] = vp_setup_vq(vdev, i, callbacks[i], names[i],
msix_vec);
if (IS_ERR(vqs[i])) { if (IS_ERR(vqs[i])) {
err = PTR_ERR(vqs[i]); err = PTR_ERR(vqs[i]);
goto out_remove_vqs; goto error_find;
} }
if (msix_vec == VIRTIO_MSI_NO_VECTOR) { if (!vp_dev->per_vq_vectors || msix_vec == VIRTIO_MSI_NO_VECTOR)
vp_dev->msix_vector_map[i] = VIRTIO_MSI_NO_VECTOR;
continue; continue;
}
snprintf(vp_dev->msix_names[j], /* allocate per-vq irq if available and necessary */
sizeof(*vp_dev->msix_names), "%s-%s", snprintf(vp_dev->msix_names[msix_vec],
sizeof *vp_dev->msix_names,
"%s-%s",
dev_name(&vp_dev->vdev.dev), names[i]); dev_name(&vp_dev->vdev.dev), names[i]);
err = request_irq(pci_irq_vector(vp_dev->pci_dev, msix_vec), err = request_irq(pci_irq_vector(vp_dev->pci_dev, msix_vec),
vring_interrupt, IRQF_SHARED, vring_interrupt, 0,
vp_dev->msix_names[j], vqs[i]); vp_dev->msix_names[msix_vec],
if (err) { vqs[i]);
/* don't free this irq on error */ if (err)
vp_dev->msix_vector_map[i] = VIRTIO_MSI_NO_VECTOR; goto error_find;
goto out_remove_vqs;
}
vp_dev->msix_vector_map[i] = msix_vec;
j++;
/*
* Use a different vector for each queue if they are available,
* else share the same vector for all VQs.
*/
if (!shared)
allocated_vectors++;
} }
return 0; return 0;
out_remove_vqs: error_find:
vp_remove_vqs(vdev); vp_del_vqs(vdev);
kfree(vp_dev->msix_vector_map);
out_disable_config_irq:
vp_dev->config_vector(vp_dev, VIRTIO_MSI_NO_VECTOR);
out_free_config_irq:
free_irq(pci_irq_vector(vp_dev->pci_dev, 0), vp_dev);
out_free_msix_affinity_masks:
for (i = 0; i < nvectors; i++) {
if (vp_dev->msix_affinity_masks[i])
free_cpumask_var(vp_dev->msix_affinity_masks[i]);
}
kfree(vp_dev->msix_affinity_masks);
out_free_msix_names:
kfree(vp_dev->msix_names);
out_free_irq_vectors:
pci_free_irq_vectors(vp_dev->pci_dev);
return err; return err;
} }
@ -287,29 +350,33 @@ static int vp_find_vqs_intx(struct virtio_device *vdev, unsigned nvqs,
struct virtio_pci_device *vp_dev = to_vp_device(vdev); struct virtio_pci_device *vp_dev = to_vp_device(vdev);
int i, err; int i, err;
vp_dev->vqs = kcalloc(nvqs, sizeof(*vp_dev->vqs), GFP_KERNEL);
if (!vp_dev->vqs)
return -ENOMEM;
err = request_irq(vp_dev->pci_dev->irq, vp_interrupt, IRQF_SHARED, err = request_irq(vp_dev->pci_dev->irq, vp_interrupt, IRQF_SHARED,
dev_name(&vdev->dev), vp_dev); dev_name(&vdev->dev), vp_dev);
if (err) if (err)
return err; goto out_del_vqs;
vp_dev->intx_enabled = 1;
vp_dev->per_vq_vectors = false;
for (i = 0; i < nvqs; ++i) { for (i = 0; i < nvqs; ++i) {
if (!names[i]) { if (!names[i]) {
vqs[i] = NULL; vqs[i] = NULL;
continue; continue;
} }
vqs[i] = vp_dev->setup_vq(vp_dev, i, callbacks[i], names[i], vqs[i] = vp_setup_vq(vdev, i, callbacks[i], names[i],
VIRTIO_MSI_NO_VECTOR); VIRTIO_MSI_NO_VECTOR);
if (IS_ERR(vqs[i])) { if (IS_ERR(vqs[i])) {
err = PTR_ERR(vqs[i]); err = PTR_ERR(vqs[i]);
goto out_remove_vqs; goto out_del_vqs;
} }
} }
return 0; return 0;
out_del_vqs:
out_remove_vqs: vp_del_vqs(vdev);
vp_remove_vqs(vdev);
free_irq(pci_irq_vector(vp_dev->pci_dev, 0), vp_dev);
return err; return err;
} }
@ -320,9 +387,15 @@ int vp_find_vqs(struct virtio_device *vdev, unsigned nvqs,
{ {
int err; int err;
err = vp_find_vqs_msix(vdev, nvqs, vqs, callbacks, names, desc); /* Try MSI-X with one vector per queue. */
err = vp_find_vqs_msix(vdev, nvqs, vqs, callbacks, names, true, desc);
if (!err) if (!err)
return 0; return 0;
/* Fallback: MSI-X with one vector for config, one shared for queues. */
err = vp_find_vqs_msix(vdev, nvqs, vqs, callbacks, names, false, desc);
if (!err)
return 0;
/* Finally fall back to regular interrupts. */
return vp_find_vqs_intx(vdev, nvqs, vqs, callbacks, names); return vp_find_vqs_intx(vdev, nvqs, vqs, callbacks, names);
} }
@ -342,15 +415,16 @@ int vp_set_vq_affinity(struct virtqueue *vq, int cpu)
{ {
struct virtio_device *vdev = vq->vdev; struct virtio_device *vdev = vq->vdev;
struct virtio_pci_device *vp_dev = to_vp_device(vdev); struct virtio_pci_device *vp_dev = to_vp_device(vdev);
struct virtio_pci_vq_info *info = vp_dev->vqs[vq->index];
struct cpumask *mask;
unsigned int irq;
if (!vq->callback) if (!vq->callback)
return -EINVAL; return -EINVAL;
if (vp_dev->pci_dev->msix_enabled) { if (vp_dev->msix_enabled) {
int vec = vp_dev->msix_vector_map[vq->index]; mask = vp_dev->msix_affinity_masks[info->msix_vector];
struct cpumask *mask = vp_dev->msix_affinity_masks[vec]; irq = pci_irq_vector(vp_dev->pci_dev, info->msix_vector);
unsigned int irq = pci_irq_vector(vp_dev->pci_dev, vec);
if (cpu == -1) if (cpu == -1)
irq_set_affinity_hint(irq, NULL); irq_set_affinity_hint(irq, NULL);
else { else {
@ -365,12 +439,13 @@ int vp_set_vq_affinity(struct virtqueue *vq, int cpu)
const struct cpumask *vp_get_vq_affinity(struct virtio_device *vdev, int index) const struct cpumask *vp_get_vq_affinity(struct virtio_device *vdev, int index)
{ {
struct virtio_pci_device *vp_dev = to_vp_device(vdev); struct virtio_pci_device *vp_dev = to_vp_device(vdev);
unsigned int *map = vp_dev->msix_vector_map;
if (!map || map[index] == VIRTIO_MSI_NO_VECTOR) if (!vp_dev->per_vq_vectors ||
vp_dev->vqs[index]->msix_vector == VIRTIO_MSI_NO_VECTOR)
return NULL; return NULL;
return pci_irq_get_affinity(vp_dev->pci_dev, map[index]); return pci_irq_get_affinity(vp_dev->pci_dev,
vp_dev->vqs[index]->msix_vector);
} }
#ifdef CONFIG_PM_SLEEP #ifdef CONFIG_PM_SLEEP
@ -441,6 +516,8 @@ static int virtio_pci_probe(struct pci_dev *pci_dev,
vp_dev->vdev.dev.parent = &pci_dev->dev; vp_dev->vdev.dev.parent = &pci_dev->dev;
vp_dev->vdev.dev.release = virtio_pci_release_dev; vp_dev->vdev.dev.release = virtio_pci_release_dev;
vp_dev->pci_dev = pci_dev; vp_dev->pci_dev = pci_dev;
INIT_LIST_HEAD(&vp_dev->virtqueues);
spin_lock_init(&vp_dev->lock);
/* enable the device */ /* enable the device */
rc = pci_enable_device(pci_dev); rc = pci_enable_device(pci_dev);

View File

@ -31,6 +31,17 @@
#include <linux/highmem.h> #include <linux/highmem.h>
#include <linux/spinlock.h> #include <linux/spinlock.h>
struct virtio_pci_vq_info {
/* the actual virtqueue */
struct virtqueue *vq;
/* the list node for the virtqueues list */
struct list_head node;
/* MSI-X vector (or none) */
unsigned msix_vector;
};
/* Our device structure */ /* Our device structure */
struct virtio_pci_device { struct virtio_pci_device {
struct virtio_device vdev; struct virtio_device vdev;
@ -64,25 +75,47 @@ struct virtio_pci_device {
/* the IO mapping for the PCI config space */ /* the IO mapping for the PCI config space */
void __iomem *ioaddr; void __iomem *ioaddr;
/* a list of queues so we can dispatch IRQs */
spinlock_t lock;
struct list_head virtqueues;
/* array of all queues for house-keeping */
struct virtio_pci_vq_info **vqs;
/* MSI-X support */
int msix_enabled;
int intx_enabled;
cpumask_var_t *msix_affinity_masks; cpumask_var_t *msix_affinity_masks;
/* Name strings for interrupts. This size should be enough, /* Name strings for interrupts. This size should be enough,
* and I'm too lazy to allocate each name separately. */ * and I'm too lazy to allocate each name separately. */
char (*msix_names)[256]; char (*msix_names)[256];
/* Total Number of MSI-X vectors (including per-VQ ones). */ /* Number of available vectors */
int msix_vectors; unsigned msix_vectors;
/* Map of per-VQ MSI-X vectors, may be NULL */ /* Vectors allocated, excluding per-vq vectors if any */
unsigned *msix_vector_map; unsigned msix_used_vectors;
/* Whether we have vector per vq */
bool per_vq_vectors;
struct virtqueue *(*setup_vq)(struct virtio_pci_device *vp_dev, struct virtqueue *(*setup_vq)(struct virtio_pci_device *vp_dev,
struct virtio_pci_vq_info *info,
unsigned idx, unsigned idx,
void (*callback)(struct virtqueue *vq), void (*callback)(struct virtqueue *vq),
const char *name, const char *name,
u16 msix_vec); u16 msix_vec);
void (*del_vq)(struct virtqueue *vq); void (*del_vq)(struct virtio_pci_vq_info *info);
u16 (*config_vector)(struct virtio_pci_device *vp_dev, u16 vector); u16 (*config_vector)(struct virtio_pci_device *vp_dev, u16 vector);
}; };
/* Constants for MSI-X */
/* Use first vector for configuration changes, second and the rest for
* virtqueues Thus, we need at least 2 vectors for MSI. */
enum {
VP_MSIX_CONFIG_VECTOR = 0,
VP_MSIX_VQ_VECTOR = 1,
};
/* Convert a generic virtio device to our structure */ /* Convert a generic virtio device to our structure */
static struct virtio_pci_device *to_vp_device(struct virtio_device *vdev) static struct virtio_pci_device *to_vp_device(struct virtio_device *vdev)
{ {

View File

@ -112,6 +112,7 @@ static u16 vp_config_vector(struct virtio_pci_device *vp_dev, u16 vector)
} }
static struct virtqueue *setup_vq(struct virtio_pci_device *vp_dev, static struct virtqueue *setup_vq(struct virtio_pci_device *vp_dev,
struct virtio_pci_vq_info *info,
unsigned index, unsigned index,
void (*callback)(struct virtqueue *vq), void (*callback)(struct virtqueue *vq),
const char *name, const char *name,
@ -129,6 +130,8 @@ static struct virtqueue *setup_vq(struct virtio_pci_device *vp_dev,
if (!num || ioread32(vp_dev->ioaddr + VIRTIO_PCI_QUEUE_PFN)) if (!num || ioread32(vp_dev->ioaddr + VIRTIO_PCI_QUEUE_PFN))
return ERR_PTR(-ENOENT); return ERR_PTR(-ENOENT);
info->msix_vector = msix_vec;
/* create the vring */ /* create the vring */
vq = vring_create_virtqueue(index, num, vq = vring_create_virtqueue(index, num,
VIRTIO_PCI_VRING_ALIGN, &vp_dev->vdev, VIRTIO_PCI_VRING_ALIGN, &vp_dev->vdev,
@ -159,13 +162,14 @@ static struct virtqueue *setup_vq(struct virtio_pci_device *vp_dev,
return ERR_PTR(err); return ERR_PTR(err);
} }
static void del_vq(struct virtqueue *vq) static void del_vq(struct virtio_pci_vq_info *info)
{ {
struct virtqueue *vq = info->vq;
struct virtio_pci_device *vp_dev = to_vp_device(vq->vdev); struct virtio_pci_device *vp_dev = to_vp_device(vq->vdev);
iowrite16(vq->index, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_SEL); iowrite16(vq->index, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_SEL);
if (vp_dev->pci_dev->msix_enabled) { if (vp_dev->msix_enabled) {
iowrite16(VIRTIO_MSI_NO_VECTOR, iowrite16(VIRTIO_MSI_NO_VECTOR,
vp_dev->ioaddr + VIRTIO_MSI_QUEUE_VECTOR); vp_dev->ioaddr + VIRTIO_MSI_QUEUE_VECTOR);
/* Flush the write out to device */ /* Flush the write out to device */

View File

@ -293,6 +293,7 @@ static u16 vp_config_vector(struct virtio_pci_device *vp_dev, u16 vector)
} }
static struct virtqueue *setup_vq(struct virtio_pci_device *vp_dev, static struct virtqueue *setup_vq(struct virtio_pci_device *vp_dev,
struct virtio_pci_vq_info *info,
unsigned index, unsigned index,
void (*callback)(struct virtqueue *vq), void (*callback)(struct virtqueue *vq),
const char *name, const char *name,
@ -322,6 +323,8 @@ static struct virtqueue *setup_vq(struct virtio_pci_device *vp_dev,
/* get offset of notification word for this vq */ /* get offset of notification word for this vq */
off = vp_ioread16(&cfg->queue_notify_off); off = vp_ioread16(&cfg->queue_notify_off);
info->msix_vector = msix_vec;
/* create the vring */ /* create the vring */
vq = vring_create_virtqueue(index, num, vq = vring_create_virtqueue(index, num,
SMP_CACHE_BYTES, &vp_dev->vdev, SMP_CACHE_BYTES, &vp_dev->vdev,
@ -405,13 +408,14 @@ static int vp_modern_find_vqs(struct virtio_device *vdev, unsigned nvqs,
return 0; return 0;
} }
static void del_vq(struct virtqueue *vq) static void del_vq(struct virtio_pci_vq_info *info)
{ {
struct virtqueue *vq = info->vq;
struct virtio_pci_device *vp_dev = to_vp_device(vq->vdev); struct virtio_pci_device *vp_dev = to_vp_device(vq->vdev);
vp_iowrite16(vq->index, &vp_dev->common->queue_select); vp_iowrite16(vq->index, &vp_dev->common->queue_select);
if (vp_dev->pci_dev->msix_enabled) { if (vp_dev->msix_enabled) {
vp_iowrite16(VIRTIO_MSI_NO_VECTOR, vp_iowrite16(VIRTIO_MSI_NO_VECTOR,
&vp_dev->common->queue_msix_vector); &vp_dev->common->queue_msix_vector);
/* Flush the write out to device */ /* Flush the write out to device */

View File

@ -167,6 +167,7 @@ struct virtio_driver {
unsigned int feature_table_size; unsigned int feature_table_size;
const unsigned int *feature_table_legacy; const unsigned int *feature_table_legacy;
unsigned int feature_table_size_legacy; unsigned int feature_table_size_legacy;
int (*validate)(struct virtio_device *dev);
int (*probe)(struct virtio_device *dev); int (*probe)(struct virtio_device *dev);
void (*scan)(struct virtio_device *dev); void (*scan)(struct virtio_device *dev);
void (*remove)(struct virtio_device *dev); void (*remove)(struct virtio_device *dev);

View File

@ -79,7 +79,7 @@
* configuration space */ * configuration space */
#define VIRTIO_PCI_CONFIG_OFF(msix_enabled) ((msix_enabled) ? 24 : 20) #define VIRTIO_PCI_CONFIG_OFF(msix_enabled) ((msix_enabled) ? 24 : 20)
/* Deprecated: please use VIRTIO_PCI_CONFIG_OFF instead */ /* Deprecated: please use VIRTIO_PCI_CONFIG_OFF instead */
#define VIRTIO_PCI_CONFIG(dev) VIRTIO_PCI_CONFIG_OFF((dev)->pci_dev->msix_enabled) #define VIRTIO_PCI_CONFIG(dev) VIRTIO_PCI_CONFIG_OFF((dev)->msix_enabled)
/* Virtio ABI version, this must match exactly */ /* Virtio ABI version, this must match exactly */
#define VIRTIO_PCI_ABI_VERSION 0 #define VIRTIO_PCI_ABI_VERSION 0