xen/blkback: pseudo support for multi hardware queues/rings

Preparatory patch for multiple hardware queues (rings). The number of
rings is unconditionally set to 1, larger number will be enabled in
"xen/blkback: get the number of hardware queues/rings from blkfront".

Signed-off-by: Arianna Avanzini <avanzini.arianna@gmail.com>
Signed-off-by: Bob Liu <bob.liu@oracle.com>
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
---
v2: Align variables in the structures.
This commit is contained in:
Konrad Rzeszutek Wilk 2015-12-11 12:08:48 -05:00
parent 597957000a
commit 2fb1ef4f12
2 changed files with 183 additions and 113 deletions

View File

@ -340,7 +340,8 @@ struct xen_blkif {
struct work_struct free_work;
unsigned int nr_ring_pages;
/* All rings for this device. */
struct xen_blkif_ring ring;
struct xen_blkif_ring *rings;
unsigned int nr_rings;
};
struct seg_buf {

View File

@ -86,9 +86,11 @@ static void xen_update_blkif_status(struct xen_blkif *blkif)
{
int err;
char name[BLKBACK_NAME_LEN];
struct xen_blkif_ring *ring;
int i;
/* Not ready to connect? */
if (!blkif->ring.irq || !blkif->vbd.bdev)
if (!blkif->rings || !blkif->rings[0].irq || !blkif->vbd.bdev)
return;
/* Already connected? */
@ -113,19 +115,55 @@ static void xen_update_blkif_status(struct xen_blkif *blkif)
}
invalidate_inode_pages2(blkif->vbd.bdev->bd_inode->i_mapping);
blkif->ring.xenblkd = kthread_run(xen_blkif_schedule, &blkif->ring, "%s", name);
if (IS_ERR(blkif->ring.xenblkd)) {
err = PTR_ERR(blkif->ring.xenblkd);
blkif->ring.xenblkd = NULL;
xenbus_dev_error(blkif->be->dev, err, "start xenblkd");
return;
for (i = 0; i < blkif->nr_rings; i++) {
ring = &blkif->rings[i];
ring->xenblkd = kthread_run(xen_blkif_schedule, ring, "%s-%d", name, i);
if (IS_ERR(ring->xenblkd)) {
err = PTR_ERR(ring->xenblkd);
ring->xenblkd = NULL;
xenbus_dev_fatal(blkif->be->dev, err,
"start %s-%d xenblkd", name, i);
goto out;
}
}
return;
out:
while (--i >= 0) {
ring = &blkif->rings[i];
kthread_stop(ring->xenblkd);
}
return;
}
static int xen_blkif_alloc_rings(struct xen_blkif *blkif)
{
unsigned int r;
blkif->rings = kzalloc(blkif->nr_rings * sizeof(struct xen_blkif_ring), GFP_KERNEL);
if (!blkif->rings)
return -ENOMEM;
for (r = 0; r < blkif->nr_rings; r++) {
struct xen_blkif_ring *ring = &blkif->rings[r];
spin_lock_init(&ring->blk_ring_lock);
init_waitqueue_head(&ring->wq);
INIT_LIST_HEAD(&ring->pending_free);
spin_lock_init(&ring->pending_free_lock);
init_waitqueue_head(&ring->pending_free_wq);
init_waitqueue_head(&ring->shutdown_wq);
ring->blkif = blkif;
xen_blkif_get(blkif);
}
return 0;
}
static struct xen_blkif *xen_blkif_alloc(domid_t domid)
{
struct xen_blkif *blkif;
struct xen_blkif_ring *ring;
BUILD_BUG_ON(MAX_INDIRECT_PAGES > BLKIF_MAX_INDIRECT_PAGES_PER_REQUEST);
@ -143,15 +181,11 @@ static struct xen_blkif *xen_blkif_alloc(domid_t domid)
blkif->st_print = jiffies;
INIT_WORK(&blkif->persistent_purge_work, xen_blkbk_unmap_purged_grants);
ring = &blkif->ring;
ring->blkif = blkif;
spin_lock_init(&ring->blk_ring_lock);
init_waitqueue_head(&ring->wq);
INIT_LIST_HEAD(&ring->pending_free);
spin_lock_init(&ring->pending_free_lock);
init_waitqueue_head(&ring->pending_free_wq);
init_waitqueue_head(&ring->shutdown_wq);
blkif->nr_rings = 1;
if (xen_blkif_alloc_rings(blkif)) {
kmem_cache_free(xen_blkif_cachep, blkif);
return ERR_PTR(-ENOMEM);
}
return blkif;
}
@ -216,50 +250,54 @@ static int xen_blkif_map(struct xen_blkif_ring *ring, grant_ref_t *gref,
static int xen_blkif_disconnect(struct xen_blkif *blkif)
{
struct pending_req *req, *n;
int i = 0, j;
struct xen_blkif_ring *ring = &blkif->ring;
unsigned int j, r;
if (ring->xenblkd) {
kthread_stop(ring->xenblkd);
wake_up(&ring->shutdown_wq);
ring->xenblkd = NULL;
for (r = 0; r < blkif->nr_rings; r++) {
struct xen_blkif_ring *ring = &blkif->rings[r];
unsigned int i = 0;
if (ring->xenblkd) {
kthread_stop(ring->xenblkd);
wake_up(&ring->shutdown_wq);
ring->xenblkd = NULL;
}
/* The above kthread_stop() guarantees that at this point we
* don't have any discard_io or other_io requests. So, checking
* for inflight IO is enough.
*/
if (atomic_read(&ring->inflight) > 0)
return -EBUSY;
if (ring->irq) {
unbind_from_irqhandler(ring->irq, ring);
ring->irq = 0;
}
if (ring->blk_rings.common.sring) {
xenbus_unmap_ring_vfree(blkif->be->dev, ring->blk_ring);
ring->blk_rings.common.sring = NULL;
}
/* Remove all persistent grants and the cache of ballooned pages. */
xen_blkbk_free_caches(ring);
/* Check that there is no request in use */
list_for_each_entry_safe(req, n, &ring->pending_free, free_list) {
list_del(&req->free_list);
for (j = 0; j < MAX_INDIRECT_SEGMENTS; j++)
kfree(req->segments[j]);
for (j = 0; j < MAX_INDIRECT_PAGES; j++)
kfree(req->indirect_pages[j]);
kfree(req);
i++;
}
WARN_ON(i != (XEN_BLKIF_REQS_PER_PAGE * blkif->nr_ring_pages));
}
/* The above kthread_stop() guarantees that at this point we
* don't have any discard_io or other_io requests. So, checking
* for inflight IO is enough.
*/
if (atomic_read(&ring->inflight) > 0)
return -EBUSY;
if (ring->irq) {
unbind_from_irqhandler(ring->irq, ring);
ring->irq = 0;
}
if (ring->blk_rings.common.sring) {
xenbus_unmap_ring_vfree(blkif->be->dev, ring->blk_ring);
ring->blk_rings.common.sring = NULL;
}
/* Remove all persistent grants and the cache of ballooned pages. */
xen_blkbk_free_caches(ring);
/* Check that there is no request in use */
list_for_each_entry_safe(req, n, &ring->pending_free, free_list) {
list_del(&req->free_list);
for (j = 0; j < MAX_INDIRECT_SEGMENTS; j++)
kfree(req->segments[j]);
for (j = 0; j < MAX_INDIRECT_PAGES; j++)
kfree(req->indirect_pages[j]);
kfree(req);
i++;
}
WARN_ON(i != (XEN_BLKIF_REQS_PER_PAGE * blkif->nr_ring_pages));
blkif->nr_ring_pages = 0;
return 0;
@ -279,6 +317,7 @@ static void xen_blkif_free(struct xen_blkif *blkif)
BUG_ON(!list_empty(&blkif->free_pages));
BUG_ON(!RB_EMPTY_ROOT(&blkif->persistent_gnts));
kfree(blkif->rings);
kmem_cache_free(xen_blkif_cachep, blkif);
}
@ -427,6 +466,7 @@ static int xen_vbd_create(struct xen_blkif *blkif, blkif_vdev_t handle,
static int xen_blkbk_remove(struct xenbus_device *dev)
{
struct backend_info *be = dev_get_drvdata(&dev->dev);
unsigned int i;
pr_debug("%s %p %d\n", __func__, dev, dev->otherend_id);
@ -443,7 +483,8 @@ static int xen_blkbk_remove(struct xenbus_device *dev)
if (be->blkif) {
xen_blkif_disconnect(be->blkif);
xen_blkif_put(be->blkif);
for (i = 0; i < be->blkif->nr_rings; i++)
xen_blkif_put(be->blkif);
}
kfree(be->mode);
@ -826,51 +867,43 @@ static void connect(struct backend_info *be)
xenbus_transaction_end(xbt, 1);
}
static int connect_ring(struct backend_info *be)
/*
* Each ring may have multi pages, depends on "ring-page-order".
*/
static int read_per_ring_refs(struct xen_blkif_ring *ring, const char *dir)
{
struct xenbus_device *dev = be->dev;
unsigned int ring_ref[XENBUS_MAX_RING_GRANTS];
unsigned int evtchn, nr_grefs, ring_page_order;
unsigned int pers_grants;
char protocol[64] = "";
struct pending_req *req, *n;
int err, i, j;
struct xen_blkif_ring *ring = &be->blkif->ring;
struct xen_blkif *blkif = ring->blkif;
struct xenbus_device *dev = blkif->be->dev;
unsigned int ring_page_order, nr_grefs, evtchn;
pr_debug("%s %s\n", __func__, dev->otherend);
err = xenbus_scanf(XBT_NIL, dev->otherend, "event-channel", "%u",
err = xenbus_scanf(XBT_NIL, dir, "event-channel", "%u",
&evtchn);
if (err != 1) {
err = -EINVAL;
xenbus_dev_fatal(dev, err, "reading %s/event-channel",
dev->otherend);
xenbus_dev_fatal(dev, err, "reading %s/event-channel", dir);
return err;
}
pr_info("event-channel %u\n", evtchn);
err = xenbus_scanf(XBT_NIL, dev->otherend, "ring-page-order", "%u",
&ring_page_order);
if (err != 1) {
err = xenbus_scanf(XBT_NIL, dev->otherend, "ring-ref",
"%u", &ring_ref[0]);
err = xenbus_scanf(XBT_NIL, dir, "ring-ref", "%u", &ring_ref[0]);
if (err != 1) {
err = -EINVAL;
xenbus_dev_fatal(dev, err, "reading %s/ring-ref",
dev->otherend);
xenbus_dev_fatal(dev, err, "reading %s/ring-ref", dir);
return err;
}
nr_grefs = 1;
pr_info("%s:using single page: ring-ref %d\n", dev->otherend,
ring_ref[0]);
} else {
unsigned int i;
if (ring_page_order > xen_blkif_max_ring_order) {
err = -EINVAL;
xenbus_dev_fatal(dev, err, "%s/request %d ring page order exceed max:%d",
dev->otherend, ring_page_order,
dir, ring_page_order,
xen_blkif_max_ring_order);
return err;
}
@ -880,46 +913,17 @@ static int connect_ring(struct backend_info *be)
char ring_ref_name[RINGREF_NAME_LEN];
snprintf(ring_ref_name, RINGREF_NAME_LEN, "ring-ref%u", i);
err = xenbus_scanf(XBT_NIL, dev->otherend, ring_ref_name,
err = xenbus_scanf(XBT_NIL, dir, ring_ref_name,
"%u", &ring_ref[i]);
if (err != 1) {
err = -EINVAL;
xenbus_dev_fatal(dev, err, "reading %s/%s",
dev->otherend, ring_ref_name);
dir, ring_ref_name);
return err;
}
pr_info("ring-ref%u: %u\n", i, ring_ref[i]);
}
}
be->blkif->blk_protocol = BLKIF_PROTOCOL_DEFAULT;
err = xenbus_gather(XBT_NIL, dev->otherend, "protocol",
"%63s", protocol, NULL);
if (err)
strcpy(protocol, "unspecified, assuming default");
else if (0 == strcmp(protocol, XEN_IO_PROTO_ABI_NATIVE))
be->blkif->blk_protocol = BLKIF_PROTOCOL_NATIVE;
else if (0 == strcmp(protocol, XEN_IO_PROTO_ABI_X86_32))
be->blkif->blk_protocol = BLKIF_PROTOCOL_X86_32;
else if (0 == strcmp(protocol, XEN_IO_PROTO_ABI_X86_64))
be->blkif->blk_protocol = BLKIF_PROTOCOL_X86_64;
else {
xenbus_dev_fatal(dev, err, "unknown fe protocol %s", protocol);
return -1;
}
err = xenbus_gather(XBT_NIL, dev->otherend,
"feature-persistent", "%u",
&pers_grants, NULL);
if (err)
pers_grants = 0;
be->blkif->vbd.feature_gnt_persistent = pers_grants;
be->blkif->vbd.overflow_max_grants = 0;
be->blkif->nr_ring_pages = nr_grefs;
pr_info("ring-pages:%d, event-channel %d, protocol %d (%s) %s\n",
nr_grefs, evtchn, be->blkif->blk_protocol, protocol,
pers_grants ? "persistent grants" : "");
blkif->nr_ring_pages = nr_grefs;
for (i = 0; i < nr_grefs * XEN_BLKIF_REQS_PER_PAGE; i++) {
req = kzalloc(sizeof(*req), GFP_KERNEL);
@ -964,6 +968,71 @@ static int connect_ring(struct backend_info *be)
kfree(req);
}
return -ENOMEM;
}
static int connect_ring(struct backend_info *be)
{
struct xenbus_device *dev = be->dev;
unsigned int pers_grants;
char protocol[64] = "";
int err, i;
char *xspath;
size_t xspathsize;
const size_t xenstore_path_ext_size = 11; /* sufficient for "/queue-NNN" */
pr_debug("%s %s\n", __func__, dev->otherend);
be->blkif->blk_protocol = BLKIF_PROTOCOL_DEFAULT;
err = xenbus_gather(XBT_NIL, dev->otherend, "protocol",
"%63s", protocol, NULL);
if (err)
strcpy(protocol, "unspecified, assuming default");
else if (0 == strcmp(protocol, XEN_IO_PROTO_ABI_NATIVE))
be->blkif->blk_protocol = BLKIF_PROTOCOL_NATIVE;
else if (0 == strcmp(protocol, XEN_IO_PROTO_ABI_X86_32))
be->blkif->blk_protocol = BLKIF_PROTOCOL_X86_32;
else if (0 == strcmp(protocol, XEN_IO_PROTO_ABI_X86_64))
be->blkif->blk_protocol = BLKIF_PROTOCOL_X86_64;
else {
xenbus_dev_fatal(dev, err, "unknown fe protocol %s", protocol);
return -1;
}
err = xenbus_gather(XBT_NIL, dev->otherend,
"feature-persistent", "%u",
&pers_grants, NULL);
if (err)
pers_grants = 0;
be->blkif->vbd.feature_gnt_persistent = pers_grants;
be->blkif->vbd.overflow_max_grants = 0;
pr_info("%s: using %d queues, protocol %d (%s) %s\n", dev->nodename,
be->blkif->nr_rings, be->blkif->blk_protocol, protocol,
pers_grants ? "persistent grants" : "");
if (be->blkif->nr_rings == 1)
return read_per_ring_refs(&be->blkif->rings[0], dev->otherend);
else {
xspathsize = strlen(dev->otherend) + xenstore_path_ext_size;
xspath = kmalloc(xspathsize, GFP_KERNEL);
if (!xspath) {
xenbus_dev_fatal(dev, -ENOMEM, "reading ring references");
return -ENOMEM;
}
for (i = 0; i < be->blkif->nr_rings; i++) {
memset(xspath, 0, xspathsize);
snprintf(xspath, xspathsize, "%s/queue-%u", dev->otherend, i);
err = read_per_ring_refs(&be->blkif->rings[i], xspath);
if (err) {
kfree(xspath);
return err;
}
}
kfree(xspath);
}
return 0;
}
static const struct xenbus_device_id xen_blkbk_ids[] = {