2007-10-22 08:03:36 +07:00
|
|
|
#ifndef _LINUX_VIRTIO_H
|
|
|
|
#define _LINUX_VIRTIO_H
|
|
|
|
/* Everything a virtio driver needs to work with any particular virtio
|
|
|
|
* implementation. */
|
|
|
|
#include <linux/types.h>
|
|
|
|
#include <linux/scatterlist.h>
|
|
|
|
#include <linux/spinlock.h>
|
|
|
|
#include <linux/device.h>
|
|
|
|
#include <linux/mod_devicetable.h>
|
|
|
|
|
|
|
|
/**
|
|
|
|
* virtqueue - a queue to register buffers for sending or receiving.
|
|
|
|
* @callback: the function to call when buffers are consumed (can be NULL).
|
|
|
|
* @vdev: the virtio device this queue was created for.
|
|
|
|
* @vq_ops: the operations for this virtqueue (see below).
|
|
|
|
* @priv: a pointer for the virtqueue implementation to use.
|
|
|
|
*/
|
|
|
|
struct virtqueue
|
|
|
|
{
|
2008-02-05 11:49:57 +07:00
|
|
|
void (*callback)(struct virtqueue *vq);
|
2007-10-22 08:03:36 +07:00
|
|
|
struct virtio_device *vdev;
|
|
|
|
struct virtqueue_ops *vq_ops;
|
|
|
|
void *priv;
|
|
|
|
};
|
|
|
|
|
|
|
|
/**
|
|
|
|
* virtqueue_ops - operations for virtqueue abstraction layer
|
|
|
|
* @add_buf: expose buffer to other end
|
|
|
|
* vq: the struct virtqueue we're talking about.
|
|
|
|
* sg: the description of the buffer(s).
|
|
|
|
* out_num: the number of sg readable by other side
|
|
|
|
* in_num: the number of sg which are writable (after readable ones)
|
|
|
|
* data: the token identifying the buffer.
|
|
|
|
* Returns 0 or an error.
|
|
|
|
* @kick: update after add_buf
|
|
|
|
* vq: the struct virtqueue
|
|
|
|
* After one or more add_buf calls, invoke this to kick the other side.
|
|
|
|
* @get_buf: get the next used buffer
|
|
|
|
* vq: the struct virtqueue we're talking about.
|
|
|
|
* len: the length written into the buffer
|
|
|
|
* Returns NULL or the "data" token handed to add_buf.
|
2008-02-05 11:49:57 +07:00
|
|
|
* @disable_cb: disable callbacks
|
|
|
|
* vq: the struct virtqueue we're talking about.
|
2008-04-07 11:30:28 +07:00
|
|
|
* Note that this is not necessarily synchronous, hence unreliable and only
|
|
|
|
* useful as an optimization.
|
2008-02-05 11:49:57 +07:00
|
|
|
* @enable_cb: restart callbacks after disable_cb.
|
2007-10-22 08:03:36 +07:00
|
|
|
* vq: the struct virtqueue we're talking about.
|
virtio: fix race in enable_cb
There is a race in virtio_net, dealing with disabling/enabling the callback.
I saw the following oops:
kernel BUG at /space/kvm/drivers/virtio/virtio_ring.c:218!
illegal operation: 0001 [#1] SMP
Modules linked in: sunrpc dm_mod
CPU: 2 Not tainted 2.6.25-rc1zlive-host-10623-gd358142-dirty #99
Process swapper (pid: 0, task: 000000000f85a610, ksp: 000000000f873c60)
Krnl PSW : 0404300180000000 00000000002b81a6 (vring_disable_cb+0x16/0x20)
R:0 T:1 IO:0 EX:0 Key:0 M:1 W:0 P:0 AS:0 CC:3 PM:0 EA:3
Krnl GPRS: 0000000000000001 0000000000000001 0000000010005800 0000000000000001
000000000f3a0900 000000000f85a610 0000000000000000 0000000000000000
0000000000000000 000000000f870000 0000000000000000 0000000000001237
000000000f3a0920 000000000010ff74 00000000002846f6 000000000fa0bcd8
Krnl Code: 00000000002b819a: a7110001 tmll %r1,1
00000000002b819e: a7840004 brc 8,2b81a6
00000000002b81a2: a7f40001 brc 15,2b81a4
>00000000002b81a6: a51b0001 oill %r1,1
00000000002b81aa: 40102000 sth %r1,0(%r2)
00000000002b81ae: 07fe bcr 15,%r14
00000000002b81b0: eb7ff0380024 stmg %r7,%r15,56(%r15)
00000000002b81b6: a7f13e00 tmll %r15,15872
Call Trace:
([<000000000fa0bcd0>] 0xfa0bcd0)
[<00000000002b8350>] vring_interrupt+0x5c/0x6c
[<000000000010ab08>] do_extint+0xb8/0xf0
[<0000000000110716>] ext_no_vtime+0x16/0x1a
[<0000000000107e72>] cpu_idle+0x1c2/0x1e0
The problem can be triggered with a high amount of host->guest traffic.
I think its the following race:
poll says netif_rx_complete
poll calls enable_cb
enable_cb opens the interrupt mask
a new packet comes, an interrupt is triggered----\
enable_cb sees that there is more work |
enable_cb disables the interrupt |
. V
. interrupt is delivered
. skb_recv_done does atomic napi test, ok
some waiting disable_cb is called->check fails->bang!
.
poll would do napi check
poll would do disable_cb
The fix is to let enable_cb not disable the interrupt again, but expect the
caller to do the cleanup if it returns false. In that case, the interrupt is
only disabled, if the napi test_set_bit was successful.
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au> (cleaned up doco)
2008-03-14 20:17:05 +07:00
|
|
|
* This re-enables callbacks; it returns "false" if there are pending
|
|
|
|
* buffers in the queue, to detect a possible race between the driver
|
|
|
|
* checking for more work, and enabling callbacks.
|
2007-10-22 08:03:36 +07:00
|
|
|
*
|
|
|
|
* Locking rules are straightforward: the driver is responsible for
|
2008-04-07 11:30:28 +07:00
|
|
|
* locking. No two operations may be invoked simultaneously, with the exception
|
|
|
|
* of @disable_cb.
|
2007-10-22 08:03:36 +07:00
|
|
|
*
|
|
|
|
* All operations can be called in any context.
|
|
|
|
*/
|
|
|
|
struct virtqueue_ops {
|
|
|
|
int (*add_buf)(struct virtqueue *vq,
|
|
|
|
struct scatterlist sg[],
|
|
|
|
unsigned int out_num,
|
|
|
|
unsigned int in_num,
|
|
|
|
void *data);
|
|
|
|
|
|
|
|
void (*kick)(struct virtqueue *vq);
|
|
|
|
|
|
|
|
void *(*get_buf)(struct virtqueue *vq, unsigned int *len);
|
|
|
|
|
2008-02-05 11:49:57 +07:00
|
|
|
void (*disable_cb)(struct virtqueue *vq);
|
|
|
|
bool (*enable_cb)(struct virtqueue *vq);
|
2007-10-22 08:03:36 +07:00
|
|
|
};
|
|
|
|
|
|
|
|
/**
|
|
|
|
* virtio_device - representation of a device using virtio
|
|
|
|
* @index: unique position on the virtio bus
|
|
|
|
* @dev: underlying device.
|
|
|
|
* @id: the device type identification (used to match it with a driver).
|
|
|
|
* @config: the configuration ops for this device.
|
2008-05-03 09:50:50 +07:00
|
|
|
* @features: the features supported by both driver and device.
|
2007-10-22 08:03:36 +07:00
|
|
|
* @priv: private pointer for the driver's use.
|
|
|
|
*/
|
|
|
|
struct virtio_device
|
|
|
|
{
|
|
|
|
int index;
|
|
|
|
struct device dev;
|
|
|
|
struct virtio_device_id id;
|
|
|
|
struct virtio_config_ops *config;
|
2008-05-03 09:50:50 +07:00
|
|
|
/* Note that this is a Linux set_bit-style bitmap. */
|
|
|
|
unsigned long features[1];
|
2007-10-22 08:03:36 +07:00
|
|
|
void *priv;
|
|
|
|
};
|
|
|
|
|
|
|
|
int register_virtio_device(struct virtio_device *dev);
|
|
|
|
void unregister_virtio_device(struct virtio_device *dev);
|
|
|
|
|
|
|
|
/**
|
|
|
|
* virtio_driver - operations for a virtio I/O driver
|
|
|
|
* @driver: underlying device driver (populate name and owner).
|
|
|
|
* @id_table: the ids serviced by this driver.
|
2008-05-03 09:50:50 +07:00
|
|
|
* @feature_table: an array of feature numbers supported by this device.
|
|
|
|
* @feature_table_size: number of entries in the feature table array.
|
2007-10-22 08:03:36 +07:00
|
|
|
* @probe: the function to call when a device is found. Returns a token for
|
|
|
|
* remove, or PTR_ERR().
|
|
|
|
* @remove: the function when a device is removed.
|
2008-02-05 11:49:58 +07:00
|
|
|
* @config_changed: optional function to call when the device configuration
|
|
|
|
* changes; may be called in interrupt context.
|
2007-10-22 08:03:36 +07:00
|
|
|
*/
|
|
|
|
struct virtio_driver {
|
|
|
|
struct device_driver driver;
|
|
|
|
const struct virtio_device_id *id_table;
|
2008-05-03 09:50:50 +07:00
|
|
|
const unsigned int *feature_table;
|
|
|
|
unsigned int feature_table_size;
|
2007-10-22 08:03:36 +07:00
|
|
|
int (*probe)(struct virtio_device *dev);
|
|
|
|
void (*remove)(struct virtio_device *dev);
|
2008-02-05 11:49:58 +07:00
|
|
|
void (*config_changed)(struct virtio_device *dev);
|
2007-10-22 08:03:36 +07:00
|
|
|
};
|
|
|
|
|
|
|
|
int register_virtio_driver(struct virtio_driver *drv);
|
|
|
|
void unregister_virtio_driver(struct virtio_driver *drv);
|
|
|
|
#endif /* _LINUX_VIRTIO_H */
|