From 01460f3520c100010aacc8f8500cafcb17ce4665 Mon Sep 17 00:00:00 2001 From: Benjamin Marzinski Date: Fri, 10 Oct 2008 13:36:57 +0100 Subject: [PATCH 01/18] dm mpath: use more error codes This patch allows path errors from the multipath ctr function to propagate up to userspace as errno values from the ioctl() call. This is in response to https://www.redhat.com/archives/dm-devel/2008-May/msg00000.html and https://bugzilla.redhat.com/show_bug.cgi?id=444421 The patch only lets through the errors that it needs to in order to get the path errors from parse_path(). Signed-off-by: Benjamin Marzinski Signed-off-by: Alasdair G Kergon --- drivers/md/dm-mpath.c | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c index c2fcf28b4c70..c2ff77d77a5e 100644 --- a/drivers/md/dm-mpath.c +++ b/drivers/md/dm-mpath.c @@ -563,12 +563,12 @@ static struct pgpath *parse_path(struct arg_set *as, struct path_selector *ps, /* we need at least a path arg */ if (as->argc < 1) { ti->error = "no device given"; - return NULL; + return ERR_PTR(-EINVAL); } p = alloc_pgpath(); if (!p) - return NULL; + return ERR_PTR(-ENOMEM); r = dm_get_device(ti, shift(as), ti->begin, ti->len, dm_table_get_mode(ti->table), &p->path.dev); @@ -596,7 +596,7 @@ static struct pgpath *parse_path(struct arg_set *as, struct path_selector *ps, bad: free_pgpath(p); - return NULL; + return ERR_PTR(r); } static struct priority_group *parse_priority_group(struct arg_set *as, @@ -614,14 +614,14 @@ static struct priority_group *parse_priority_group(struct arg_set *as, if (as->argc < 2) { as->argc = 0; - ti->error = "not enough priority group aruments"; - return NULL; + ti->error = "not enough priority group arguments"; + return ERR_PTR(-EINVAL); } pg = alloc_priority_group(); if (!pg) { ti->error = "couldn't allocate priority group"; - return NULL; + return ERR_PTR(-ENOMEM); } pg->m = m; @@ -654,8 +654,10 @@ static struct priority_group *parse_priority_group(struct arg_set *as, path_args.argv = as->argv; pgpath = parse_path(&path_args, &pg->ps, ti); - if (!pgpath) + if (IS_ERR(pgpath)) { + r = PTR_ERR(pgpath); goto bad; + } pgpath->pg = pg; list_add_tail(&pgpath->list, &pg->pgpaths); @@ -666,7 +668,7 @@ static struct priority_group *parse_priority_group(struct arg_set *as, bad: free_priority_group(pg, ti); - return NULL; + return ERR_PTR(r); } static int parse_hw_handler(struct arg_set *as, struct multipath *m) @@ -785,8 +787,8 @@ static int multipath_ctr(struct dm_target *ti, unsigned int argc, struct priority_group *pg; pg = parse_priority_group(&as, m); - if (!pg) { - r = -EINVAL; + if (IS_ERR(pg)) { + r = PTR_ERR(pg); goto bad; } From 6680073d3ec7c6dbdbf77870bf1fea869767d779 Mon Sep 17 00:00:00 2001 From: Kiyoshi Ueda Date: Fri, 10 Oct 2008 13:36:58 +0100 Subject: [PATCH 02/18] dm mpath: remove is_active from struct dm_path This patch moves 'is_active' from struct dm_path to struct pgpath as it does not need exporting. Signed-off-by: Kiyoshi Ueda Signed-off-by: Jun'ichi Nomura Signed-off-by: Alasdair G Kergon --- drivers/md/dm-mpath.c | 13 +++++++------ drivers/md/dm-mpath.h | 2 -- 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c index c2ff77d77a5e..b2ab8489d0eb 100644 --- a/drivers/md/dm-mpath.c +++ b/drivers/md/dm-mpath.c @@ -30,6 +30,7 @@ struct pgpath { struct list_head list; struct priority_group *pg; /* Owning PG */ + unsigned is_active; /* Path status */ unsigned fail_count; /* Cumulative failure count */ struct dm_path path; @@ -123,7 +124,7 @@ static struct pgpath *alloc_pgpath(void) struct pgpath *pgpath = kzalloc(sizeof(*pgpath), GFP_KERNEL); if (pgpath) - pgpath->path.is_active = 1; + pgpath->is_active = 1; return pgpath; } @@ -854,13 +855,13 @@ static int fail_path(struct pgpath *pgpath) spin_lock_irqsave(&m->lock, flags); - if (!pgpath->path.is_active) + if (!pgpath->is_active) goto out; DMWARN("Failing path %s.", pgpath->path.dev->name); pgpath->pg->ps.type->fail_path(&pgpath->pg->ps, &pgpath->path); - pgpath->path.is_active = 0; + pgpath->is_active = 0; pgpath->fail_count++; m->nr_valid_paths--; @@ -890,7 +891,7 @@ static int reinstate_path(struct pgpath *pgpath) spin_lock_irqsave(&m->lock, flags); - if (pgpath->path.is_active) + if (pgpath->is_active) goto out; if (!pgpath->pg->ps.type->reinstate_path) { @@ -904,7 +905,7 @@ static int reinstate_path(struct pgpath *pgpath) if (r) goto out; - pgpath->path.is_active = 1; + pgpath->is_active = 1; m->current_pgpath = NULL; if (!m->nr_valid_paths++ && m->queue_size) @@ -1292,7 +1293,7 @@ static int multipath_status(struct dm_target *ti, status_type_t type, list_for_each_entry(p, &pg->pgpaths, list) { DMEMIT("%s %s %u ", p->path.dev->name, - p->path.is_active ? "A" : "F", + p->is_active ? "A" : "F", p->fail_count); if (pg->ps.type->status) sz += pg->ps.type->status(&pg->ps, diff --git a/drivers/md/dm-mpath.h b/drivers/md/dm-mpath.h index c198b856a452..e230f7196259 100644 --- a/drivers/md/dm-mpath.h +++ b/drivers/md/dm-mpath.h @@ -13,8 +13,6 @@ struct dm_dev; struct dm_path { struct dm_dev *dev; /* Read-only */ - unsigned is_active; /* Read-only */ - void *pscontext; /* For path-selector use */ }; From f7c83e2e4783c4f7abe6f3a85a8c5e210f98bc7b Mon Sep 17 00:00:00 2001 From: Jonathan Brassow Date: Fri, 10 Oct 2008 13:36:59 +0100 Subject: [PATCH 03/18] dm raid1: kcopyd should stop on error if errors handled dm-raid1 is setting the 'DM_KCOPYD_IGNORE_ERROR' flag unconditionally when assigning kcopyd work. kcopyd is responsible for copying an assigned section of disk to one or more other disks. The 'DM_KCOPYD_IGNORE_ERROR' flag affects kcopyd in the following way: When not set: kcopyd will immediately stop the copy operation when an error is encountered. When set: kcopyd will try to proceed regardless of errors and try to continue copying any remaining amount. Since dm-raid1 tracks regions of the address space that are (or are not) in sync and it now has the ability to handle these errors, we can safely enable this optimization. This optimization is conditional on whether mirror error handling has been enabled. Signed-off-by: Jonathan Brassow Signed-off-by: Alasdair G Kergon --- drivers/md/dm-raid1.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c index ff05fe893083..29913e42c4ab 100644 --- a/drivers/md/dm-raid1.c +++ b/drivers/md/dm-raid1.c @@ -842,7 +842,9 @@ static int recover(struct mirror_set *ms, struct region *reg) } /* hand to kcopyd */ - set_bit(DM_KCOPYD_IGNORE_ERROR, &flags); + if (!errors_handled(ms)) + set_bit(DM_KCOPYD_IGNORE_ERROR, &flags); + r = dm_kcopyd_copy(ms->kcopyd_client, &from, ms->nr_mirrors - 1, to, flags, recovery_complete, reg); From a481db784682b33d078c7bf8a1d0581dc09946c1 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Fri, 10 Oct 2008 13:37:00 +0100 Subject: [PATCH 04/18] dm exception store: introduce area_location function Move this logic to a function, because it will be reused later. Signed-off-by: Mikulas Patocka Signed-off-by: Alasdair G Kergon --- drivers/md/dm-exception-store.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/md/dm-exception-store.c b/drivers/md/dm-exception-store.c index 41f408068a7c..824cf31967c5 100644 --- a/drivers/md/dm-exception-store.c +++ b/drivers/md/dm-exception-store.c @@ -208,6 +208,14 @@ static int chunk_io(struct pstore *ps, uint32_t chunk, int rw, int metadata) return req.result; } +/* + * Convert a metadata area index to a chunk index. + */ +static chunk_t area_location(struct pstore *ps, chunk_t area) +{ + return 1 + ((ps->exceptions_per_area + 1) * area); +} + /* * Read or write a metadata area. Remembering to skip the first * chunk which holds the header. @@ -217,8 +225,7 @@ static int area_io(struct pstore *ps, uint32_t area, int rw) int r; uint32_t chunk; - /* convert a metadata area index to a chunk index */ - chunk = 1 + ((ps->exceptions_per_area + 1) * area); + chunk = area_location(ps, area); r = chunk_io(ps, chunk, rw, 0); if (r) From fd14acf6fc9f4635be201960004d847b14236a20 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Fri, 10 Oct 2008 13:37:01 +0100 Subject: [PATCH 05/18] dm exception store: use chunk_t for_areas Change uint32_t into chunk_t to remove 32-bit limitation on the number of chunks on systems with 64-bit sector numbers. Signed-off-by: Mikulas Patocka Signed-off-by: Alasdair G Kergon --- drivers/md/dm-exception-store.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/drivers/md/dm-exception-store.c b/drivers/md/dm-exception-store.c index 824cf31967c5..769ab677f8e0 100644 --- a/drivers/md/dm-exception-store.c +++ b/drivers/md/dm-exception-store.c @@ -108,12 +108,12 @@ struct pstore { * Used to keep track of which metadata area the data in * 'chunk' refers to. */ - uint32_t current_area; + chunk_t current_area; /* * The next free chunk for an exception. */ - uint32_t next_free; + chunk_t next_free; /* * The index of next free exception in the current @@ -175,7 +175,7 @@ static void do_metadata(struct work_struct *work) /* * Read or write a chunk aligned and sized block of data from a device. */ -static int chunk_io(struct pstore *ps, uint32_t chunk, int rw, int metadata) +static int chunk_io(struct pstore *ps, chunk_t chunk, int rw, int metadata) { struct dm_io_region where = { .bdev = ps->snap->cow->bdev, @@ -220,10 +220,10 @@ static chunk_t area_location(struct pstore *ps, chunk_t area) * Read or write a metadata area. Remembering to skip the first * chunk which holds the header. */ -static int area_io(struct pstore *ps, uint32_t area, int rw) +static int area_io(struct pstore *ps, chunk_t area, int rw) { int r; - uint32_t chunk; + chunk_t chunk; chunk = area_location(ps, area); @@ -235,7 +235,7 @@ static int area_io(struct pstore *ps, uint32_t area, int rw) return 0; } -static int zero_area(struct pstore *ps, uint32_t area) +static int zero_area(struct pstore *ps, chunk_t area) { memset(ps->area, 0, ps->snap->chunk_size << SECTOR_SHIFT); return area_io(ps, area, WRITE); @@ -411,7 +411,7 @@ static int insert_exceptions(struct pstore *ps, int *full) static int read_exceptions(struct pstore *ps) { - uint32_t area; + chunk_t area; int r, full = 1; /* @@ -524,6 +524,7 @@ static int persistent_prepare(struct exception_store *store, { struct pstore *ps = get_info(store); uint32_t stride; + chunk_t next_free; sector_t size = get_dev_size(store->snap->cow->bdev); /* Is there enough room ? */ @@ -537,7 +538,8 @@ static int persistent_prepare(struct exception_store *store, * into account the location of the metadata chunks. */ stride = (ps->exceptions_per_area + 1); - if ((++ps->next_free % stride) == 1) + next_free = ++ps->next_free; + if (sector_div(next_free, stride) == 1) ps->next_free++; atomic_inc(&ps->pending_count); From 3e1a8bdd05d6b1734a8ccf7af28042d72c447780 Mon Sep 17 00:00:00 2001 From: Milan Broz Date: Fri, 10 Oct 2008 13:37:02 +0100 Subject: [PATCH 06/18] dm crypt: tidy inc pending Move io pending to one place. No functional change, usefull to simplify debugging. Signed-off-by: Milan Broz Signed-off-by: Alasdair G Kergon --- drivers/md/dm-crypt.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index 13956437bc81..6b9be99eff5b 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -517,6 +517,11 @@ static void crypt_free_buffer_pages(struct crypt_config *cc, struct bio *clone) } } +static void crypt_inc_pending(struct dm_crypt_io *io) +{ + atomic_inc(&io->pending); +} + /* * One of the bios was finished. Check for completion of * the whole request and correctly clean up the buffer. @@ -591,7 +596,7 @@ static void kcryptd_io_read(struct dm_crypt_io *io) struct bio *base_bio = io->base_bio; struct bio *clone; - atomic_inc(&io->pending); + crypt_inc_pending(io); /* * The block layer might modify the bvec array, so always @@ -665,7 +670,7 @@ static void kcryptd_crypt_write_io_submit(struct dm_crypt_io *io, if (async) kcryptd_queue_io(io); else { - atomic_inc(&io->pending); + crypt_inc_pending(io); generic_make_request(clone); } } @@ -701,7 +706,7 @@ static void kcryptd_crypt_write_convert_loop(struct dm_crypt_io *io) if (unlikely(r < 0)) return; } else - atomic_inc(&io->pending); + crypt_inc_pending(io); /* out of memory -> run queues */ if (unlikely(remaining)) { @@ -720,7 +725,7 @@ static void kcryptd_crypt_write_convert(struct dm_crypt_io *io) /* * Prevent io from disappearing until this function completes. */ - atomic_inc(&io->pending); + crypt_inc_pending(io); crypt_convert_init(cc, &io->ctx, NULL, io->base_bio, io->sector); kcryptd_crypt_write_convert_loop(io); @@ -741,7 +746,7 @@ static void kcryptd_crypt_read_convert(struct dm_crypt_io *io) struct crypt_config *cc = io->target->private; int r = 0; - atomic_inc(&io->pending); + crypt_inc_pending(io); crypt_convert_init(cc, &io->ctx, io->base_bio, io->base_bio, io->sector); From dc440d1e56c481f80d5350daadc7d078a04ca729 Mon Sep 17 00:00:00 2001 From: Milan Broz Date: Fri, 10 Oct 2008 13:37:03 +0100 Subject: [PATCH 07/18] dm crypt: tidy crypt alloc Factor out crypt io allocation code. Later patches will call it from another place. No functional change. Signed-off-by: Milan Broz Signed-off-by: Alasdair G Kergon --- drivers/md/dm-crypt.c | 24 +++++++++++++++++------- 1 file changed, 17 insertions(+), 7 deletions(-) diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index 6b9be99eff5b..cac064d4aa72 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -517,6 +517,22 @@ static void crypt_free_buffer_pages(struct crypt_config *cc, struct bio *clone) } } +static struct dm_crypt_io *crypt_io_alloc(struct dm_target *ti, + struct bio *bio, sector_t sector) +{ + struct crypt_config *cc = ti->private; + struct dm_crypt_io *io; + + io = mempool_alloc(cc->io_pool, GFP_NOIO); + io->target = ti; + io->base_bio = bio; + io->sector = sector; + io->error = 0; + atomic_set(&io->pending, 0); + + return io; +} + static void crypt_inc_pending(struct dm_crypt_io *io) { atomic_inc(&io->pending); @@ -1113,15 +1129,9 @@ static void crypt_dtr(struct dm_target *ti) static int crypt_map(struct dm_target *ti, struct bio *bio, union map_info *map_context) { - struct crypt_config *cc = ti->private; struct dm_crypt_io *io; - io = mempool_alloc(cc->io_pool, GFP_NOIO); - io->target = ti; - io->base_bio = bio; - io->sector = bio->bi_sector - ti->begin; - io->error = 0; - atomic_set(&io->pending, 0); + io = crypt_io_alloc(ti, bio, bio->bi_sector - ti->begin); if (bio_data_dir(io->base_bio) == READ) kcryptd_queue_io(io); From fc5a5e9aa878f86642c962b309f793fb2db0727e Mon Sep 17 00:00:00 2001 From: Milan Broz Date: Fri, 10 Oct 2008 13:37:04 +0100 Subject: [PATCH 08/18] dm crypt: tidy write loop pending Move kcryptd_crypt_write_convert_loop inside kcryptd_crypt_write_convert. This change is needed for a later patch. No functional change. Signed-off-by: Milan Broz Signed-off-by: Alasdair G Kergon --- drivers/md/dm-crypt.c | 25 +++++++++---------------- 1 file changed, 9 insertions(+), 16 deletions(-) diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index cac064d4aa72..97b407582c03 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -691,13 +691,19 @@ static void kcryptd_crypt_write_io_submit(struct dm_crypt_io *io, } } -static void kcryptd_crypt_write_convert_loop(struct dm_crypt_io *io) +static void kcryptd_crypt_write_convert(struct dm_crypt_io *io) { struct crypt_config *cc = io->target->private; struct bio *clone; unsigned remaining = io->base_bio->bi_size; int r; + /* + * Prevent io from disappearing until this function completes. + */ + crypt_inc_pending(io); + crypt_convert_init(cc, &io->ctx, NULL, io->base_bio, io->sector); + /* * The allocated buffers can be smaller than the whole bio, * so repeat the whole process until all the data can be handled. @@ -706,7 +712,7 @@ static void kcryptd_crypt_write_convert_loop(struct dm_crypt_io *io) clone = crypt_alloc_buffer(io, remaining); if (unlikely(!clone)) { io->error = -ENOMEM; - return; + break; } io->ctx.bio_out = clone; @@ -720,7 +726,7 @@ static void kcryptd_crypt_write_convert_loop(struct dm_crypt_io *io) /* processed, no running async crypto */ kcryptd_crypt_write_io_submit(io, r, 0); if (unlikely(r < 0)) - return; + break; } else crypt_inc_pending(io); @@ -732,19 +738,6 @@ static void kcryptd_crypt_write_convert_loop(struct dm_crypt_io *io) congestion_wait(WRITE, HZ/100); } } -} - -static void kcryptd_crypt_write_convert(struct dm_crypt_io *io) -{ - struct crypt_config *cc = io->target->private; - - /* - * Prevent io from disappearing until this function completes. - */ - crypt_inc_pending(io); - - crypt_convert_init(cc, &io->ctx, NULL, io->base_bio, io->sector); - kcryptd_crypt_write_convert_loop(io); crypt_dec_pending(io); } From 1e37bb8e557a186d327eb4d1387953880ffc2cdd Mon Sep 17 00:00:00 2001 From: Alasdair G Kergon Date: Fri, 10 Oct 2008 13:37:05 +0100 Subject: [PATCH 09/18] dm crypt: remove inc_pending from write_io_submit Make the caller reponsible for incrementing the pending count before calling kcryptd_crypt_write_io_submit() in the non-async case to bring it into line with the async case. Signed-off-by: Alasdair G Kergon --- drivers/md/dm-crypt.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index 97b407582c03..0042636ad375 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -685,10 +685,8 @@ static void kcryptd_crypt_write_io_submit(struct dm_crypt_io *io, if (async) kcryptd_queue_io(io); - else { - crypt_inc_pending(io); + else generic_make_request(clone); - } } static void kcryptd_crypt_write_convert(struct dm_crypt_io *io) @@ -724,9 +722,12 @@ static void kcryptd_crypt_write_convert(struct dm_crypt_io *io) if (atomic_dec_and_test(&io->ctx.pending)) { /* processed, no running async crypto */ + crypt_inc_pending(io); kcryptd_crypt_write_io_submit(io, r, 0); - if (unlikely(r < 0)) + if (unlikely(r < 0)) { + crypt_dec_pending(io); break; + } } else crypt_inc_pending(io); From 6c031f41db15b6cb0cd33545cec28ca706cd3c7e Mon Sep 17 00:00:00 2001 From: Milan Broz Date: Fri, 10 Oct 2008 13:37:06 +0100 Subject: [PATCH 10/18] dm crypt: move dec_pending on error into write_io_submit Make kcryptd_crypt_write_io_submit() responsible for decrementing the pending count after an error. Also fixes a bug in the async path that forgot to decrement it. Signed-off-by: Milan Broz Signed-off-by: Alasdair G Kergon --- drivers/md/dm-crypt.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index 0042636ad375..d8126ac82960 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -674,6 +674,7 @@ static void kcryptd_crypt_write_io_submit(struct dm_crypt_io *io, crypt_free_buffer_pages(cc, clone); bio_put(clone); io->error = -EIO; + crypt_dec_pending(io); return; } @@ -724,10 +725,8 @@ static void kcryptd_crypt_write_convert(struct dm_crypt_io *io) /* processed, no running async crypto */ crypt_inc_pending(io); kcryptd_crypt_write_io_submit(io, r, 0); - if (unlikely(r < 0)) { - crypt_dec_pending(io); + if (unlikely(r < 0)) break; - } } else crypt_inc_pending(io); From 4e59409891c9cc30cb4d5d73250b0c968af8e39b Mon Sep 17 00:00:00 2001 From: Milan Broz Date: Fri, 10 Oct 2008 13:37:07 +0100 Subject: [PATCH 11/18] dm crypt: fix async inc_pending The pending reference count must be incremented *before* the async work is queued to another thread, not after. Otherwise there's a race if the work completes and decrements the reference count before it gets incremented. Signed-off-by: Milan Broz Signed-off-by: Alasdair G Kergon --- drivers/md/dm-crypt.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index d8126ac82960..262ed1816695 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -719,16 +719,15 @@ static void kcryptd_crypt_write_convert(struct dm_crypt_io *io) remaining -= clone->bi_size; + crypt_inc_pending(io); r = crypt_convert(cc, &io->ctx); if (atomic_dec_and_test(&io->ctx.pending)) { /* processed, no running async crypto */ - crypt_inc_pending(io); kcryptd_crypt_write_io_submit(io, r, 0); if (unlikely(r < 0)) break; - } else - crypt_inc_pending(io); + } /* out of memory -> run queues */ if (unlikely(remaining)) { From c8081618a9f832fdf7ca81eb087f9f61f2bf07d5 Mon Sep 17 00:00:00 2001 From: Milan Broz Date: Fri, 10 Oct 2008 13:37:08 +0100 Subject: [PATCH 12/18] dm crypt: tidy ctx pending Move the initialisation of ctx->pending into one place, at the start of crypt_convert(). Introduce crypt_finished to indicate whether or not the encryption is finished, for use in a later patch. No functional change. Signed-off-by: Milan Broz Signed-off-by: Alasdair G Kergon --- drivers/md/dm-crypt.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index 262ed1816695..f6018f5961de 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -333,7 +333,6 @@ static void crypt_convert_init(struct crypt_config *cc, ctx->idx_out = bio_out ? bio_out->bi_idx : 0; ctx->sector = sector + cc->iv_offset; init_completion(&ctx->restart); - atomic_set(&ctx->pending, 1); } static int crypt_convert_block(struct crypt_config *cc, @@ -408,6 +407,8 @@ static int crypt_convert(struct crypt_config *cc, { int r; + atomic_set(&ctx->pending, 1); + while(ctx->idx_in < ctx->bio_in->bi_vcnt && ctx->idx_out < ctx->bio_out->bi_vcnt) { @@ -694,6 +695,7 @@ static void kcryptd_crypt_write_convert(struct dm_crypt_io *io) { struct crypt_config *cc = io->target->private; struct bio *clone; + int crypt_finished; unsigned remaining = io->base_bio->bi_size; int r; @@ -721,19 +723,23 @@ static void kcryptd_crypt_write_convert(struct dm_crypt_io *io) crypt_inc_pending(io); r = crypt_convert(cc, &io->ctx); + crypt_finished = atomic_dec_and_test(&io->ctx.pending); - if (atomic_dec_and_test(&io->ctx.pending)) { - /* processed, no running async crypto */ + /* Encryption was already finished, submit io now */ + if (crypt_finished) { kcryptd_crypt_write_io_submit(io, r, 0); + + /* + * If there was an error, do not try next fragments. + * For async, error is processed in async handler. + */ if (unlikely(r < 0)) break; } /* out of memory -> run queues */ if (unlikely(remaining)) { - /* wait for async crypto then reinitialize pending */ wait_event(cc->writeq, !atomic_read(&io->ctx.pending)); - atomic_set(&io->ctx.pending, 1); congestion_wait(WRITE, HZ/100); } } From 933f01d43326fb12a978a8e0bb062c28a2de4d5a Mon Sep 17 00:00:00 2001 From: Milan Broz Date: Fri, 10 Oct 2008 13:37:08 +0100 Subject: [PATCH 13/18] dm crypt: avoid unnecessary wait when splitting bio Don't wait between submitting crypt requests for a bio unless we are short of memory. There are two situations when we must split an encrypted bio: 1) there are no free pages; 2) the new bio would violate underlying device restrictions (e.g. max hw segments). In case (2) we do not need to wait. Add output variable to crypt_alloc_buffer() to distinguish between these cases. Signed-off-by: Milan Broz Signed-off-by: Alasdair G Kergon --- drivers/md/dm-crypt.c | 26 ++++++++++++++++++-------- 1 file changed, 18 insertions(+), 8 deletions(-) diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index f6018f5961de..682ef9e6acd3 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -457,9 +457,11 @@ static void dm_crypt_bio_destructor(struct bio *bio) /* * Generate a new unfragmented bio with the given size * This should never violate the device limitations - * May return a smaller bio when running out of pages + * May return a smaller bio when running out of pages, indicated by + * *out_of_pages set to 1. */ -static struct bio *crypt_alloc_buffer(struct dm_crypt_io *io, unsigned size) +static struct bio *crypt_alloc_buffer(struct dm_crypt_io *io, unsigned size, + unsigned *out_of_pages) { struct crypt_config *cc = io->target->private; struct bio *clone; @@ -473,11 +475,14 @@ static struct bio *crypt_alloc_buffer(struct dm_crypt_io *io, unsigned size) return NULL; clone_init(io, clone); + *out_of_pages = 0; for (i = 0; i < nr_iovecs; i++) { page = mempool_alloc(cc->page_pool, gfp_mask); - if (!page) + if (!page) { + *out_of_pages = 1; break; + } /* * if additional pages cannot be allocated without waiting, @@ -696,6 +701,7 @@ static void kcryptd_crypt_write_convert(struct dm_crypt_io *io) struct crypt_config *cc = io->target->private; struct bio *clone; int crypt_finished; + unsigned out_of_pages = 0; unsigned remaining = io->base_bio->bi_size; int r; @@ -710,7 +716,7 @@ static void kcryptd_crypt_write_convert(struct dm_crypt_io *io) * so repeat the whole process until all the data can be handled. */ while (remaining) { - clone = crypt_alloc_buffer(io, remaining); + clone = crypt_alloc_buffer(io, remaining, &out_of_pages); if (unlikely(!clone)) { io->error = -ENOMEM; break; @@ -737,11 +743,15 @@ static void kcryptd_crypt_write_convert(struct dm_crypt_io *io) break; } - /* out of memory -> run queues */ - if (unlikely(remaining)) { - wait_event(cc->writeq, !atomic_read(&io->ctx.pending)); + /* + * Out of memory -> run queues + * But don't wait if split was due to the io size restriction + */ + if (unlikely(out_of_pages)) congestion_wait(WRITE, HZ/100); - } + + if (unlikely(remaining)) + wait_event(cc->writeq, !atomic_read(&io->ctx.pending)); } crypt_dec_pending(io); From 82b1519b345d61dcfae526e3fcb08128f39f9bcc Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Fri, 10 Oct 2008 13:37:09 +0100 Subject: [PATCH 14/18] dm: export struct dm_dev Split struct dm_dev in two and publish the part that other targets need in include/linux/device-mapper.h. Signed-off-by: Mikulas Patocka Signed-off-by: Alasdair G Kergon --- drivers/md/dm-ioctl.c | 4 +- drivers/md/dm-table.c | 71 +++++++++++++++++++---------------- drivers/md/dm.h | 7 +--- include/linux/device-mapper.h | 7 +++- 4 files changed, 49 insertions(+), 40 deletions(-) diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c index b262c0042de3..90e19f13f269 100644 --- a/drivers/md/dm-ioctl.c +++ b/drivers/md/dm-ioctl.c @@ -1131,7 +1131,7 @@ static void retrieve_deps(struct dm_table *table, unsigned int count = 0; struct list_head *tmp; size_t len, needed; - struct dm_dev *dd; + struct dm_dev_internal *dd; struct dm_target_deps *deps; deps = get_result_buffer(param, param_size, &len); @@ -1157,7 +1157,7 @@ static void retrieve_deps(struct dm_table *table, deps->count = count; count = 0; list_for_each_entry (dd, dm_table_get_devices(table), list) - deps->dev[count++] = huge_encode_dev(dd->bdev->bd_dev); + deps->dev[count++] = huge_encode_dev(dd->dm_dev.bdev->bd_dev); param->data_size = param->data_start + needed; } diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index 61f441409234..5dd32b8a57ac 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -250,7 +250,8 @@ static void free_devices(struct list_head *devices) struct list_head *tmp, *next; list_for_each_safe(tmp, next, devices) { - struct dm_dev *dd = list_entry(tmp, struct dm_dev, list); + struct dm_dev_internal *dd = + list_entry(tmp, struct dm_dev_internal, list); kfree(dd); } } @@ -327,12 +328,12 @@ static int lookup_device(const char *path, dev_t *dev) /* * See if we've already got a device in the list. */ -static struct dm_dev *find_device(struct list_head *l, dev_t dev) +static struct dm_dev_internal *find_device(struct list_head *l, dev_t dev) { - struct dm_dev *dd; + struct dm_dev_internal *dd; list_for_each_entry (dd, l, list) - if (dd->bdev->bd_dev == dev) + if (dd->dm_dev.bdev->bd_dev == dev) return dd; return NULL; @@ -341,45 +342,47 @@ static struct dm_dev *find_device(struct list_head *l, dev_t dev) /* * Open a device so we can use it as a map destination. */ -static int open_dev(struct dm_dev *d, dev_t dev, struct mapped_device *md) +static int open_dev(struct dm_dev_internal *d, dev_t dev, + struct mapped_device *md) { static char *_claim_ptr = "I belong to device-mapper"; struct block_device *bdev; int r; - BUG_ON(d->bdev); + BUG_ON(d->dm_dev.bdev); - bdev = open_by_devnum(dev, d->mode); + bdev = open_by_devnum(dev, d->dm_dev.mode); if (IS_ERR(bdev)) return PTR_ERR(bdev); r = bd_claim_by_disk(bdev, _claim_ptr, dm_disk(md)); if (r) blkdev_put(bdev); else - d->bdev = bdev; + d->dm_dev.bdev = bdev; return r; } /* * Close a device that we've been using. */ -static void close_dev(struct dm_dev *d, struct mapped_device *md) +static void close_dev(struct dm_dev_internal *d, struct mapped_device *md) { - if (!d->bdev) + if (!d->dm_dev.bdev) return; - bd_release_from_disk(d->bdev, dm_disk(md)); - blkdev_put(d->bdev); - d->bdev = NULL; + bd_release_from_disk(d->dm_dev.bdev, dm_disk(md)); + blkdev_put(d->dm_dev.bdev); + d->dm_dev.bdev = NULL; } /* * If possible, this checks an area of a destination device is valid. */ -static int check_device_area(struct dm_dev *dd, sector_t start, sector_t len) +static int check_device_area(struct dm_dev_internal *dd, sector_t start, + sector_t len) { - sector_t dev_size = dd->bdev->bd_inode->i_size >> SECTOR_SHIFT; + sector_t dev_size = dd->dm_dev.bdev->bd_inode->i_size >> SECTOR_SHIFT; if (!dev_size) return 1; @@ -392,16 +395,17 @@ static int check_device_area(struct dm_dev *dd, sector_t start, sector_t len) * careful to leave things as they were if we fail to reopen the * device. */ -static int upgrade_mode(struct dm_dev *dd, int new_mode, struct mapped_device *md) +static int upgrade_mode(struct dm_dev_internal *dd, int new_mode, + struct mapped_device *md) { int r; - struct dm_dev dd_copy; - dev_t dev = dd->bdev->bd_dev; + struct dm_dev_internal dd_copy; + dev_t dev = dd->dm_dev.bdev->bd_dev; dd_copy = *dd; - dd->mode |= new_mode; - dd->bdev = NULL; + dd->dm_dev.mode |= new_mode; + dd->dm_dev.bdev = NULL; r = open_dev(dd, dev, md); if (!r) close_dev(&dd_copy, md); @@ -421,7 +425,7 @@ static int __table_get_device(struct dm_table *t, struct dm_target *ti, { int r; dev_t uninitialized_var(dev); - struct dm_dev *dd; + struct dm_dev_internal *dd; unsigned int major, minor; BUG_ON(!t); @@ -443,20 +447,20 @@ static int __table_get_device(struct dm_table *t, struct dm_target *ti, if (!dd) return -ENOMEM; - dd->mode = mode; - dd->bdev = NULL; + dd->dm_dev.mode = mode; + dd->dm_dev.bdev = NULL; if ((r = open_dev(dd, dev, t->md))) { kfree(dd); return r; } - format_dev_t(dd->name, dev); + format_dev_t(dd->dm_dev.name, dev); atomic_set(&dd->count, 0); list_add(&dd->list, &t->devices); - } else if (dd->mode != (mode | dd->mode)) { + } else if (dd->dm_dev.mode != (mode | dd->dm_dev.mode)) { r = upgrade_mode(dd, mode, t->md); if (r) return r; @@ -465,11 +469,11 @@ static int __table_get_device(struct dm_table *t, struct dm_target *ti, if (!check_device_area(dd, start, len)) { DMWARN("device %s too small for target", path); - dm_put_device(ti, dd); + dm_put_device(ti, &dd->dm_dev); return -EINVAL; } - *result = dd; + *result = &dd->dm_dev; return 0; } @@ -540,8 +544,11 @@ int dm_get_device(struct dm_target *ti, const char *path, sector_t start, /* * Decrement a devices use count and remove it if necessary. */ -void dm_put_device(struct dm_target *ti, struct dm_dev *dd) +void dm_put_device(struct dm_target *ti, struct dm_dev *d) { + struct dm_dev_internal *dd = container_of(d, struct dm_dev_internal, + dm_dev); + if (atomic_dec_and_test(&dd->count)) { close_dev(dd, ti->table->md); list_del(&dd->list); @@ -937,12 +944,12 @@ int dm_table_resume_targets(struct dm_table *t) int dm_table_any_congested(struct dm_table *t, int bdi_bits) { - struct dm_dev *dd; + struct dm_dev_internal *dd; struct list_head *devices = dm_table_get_devices(t); int r = 0; list_for_each_entry(dd, devices, list) { - struct request_queue *q = bdev_get_queue(dd->bdev); + struct request_queue *q = bdev_get_queue(dd->dm_dev.bdev); r |= bdi_congested(&q->backing_dev_info, bdi_bits); } @@ -951,11 +958,11 @@ int dm_table_any_congested(struct dm_table *t, int bdi_bits) void dm_table_unplug_all(struct dm_table *t) { - struct dm_dev *dd; + struct dm_dev_internal *dd; struct list_head *devices = dm_table_get_devices(t); list_for_each_entry(dd, devices, list) { - struct request_queue *q = bdev_get_queue(dd->bdev); + struct request_queue *q = bdev_get_queue(dd->dm_dev.bdev); blk_unplug(q); } diff --git a/drivers/md/dm.h b/drivers/md/dm.h index 1e59a0b0a78a..65f2f562220e 100644 --- a/drivers/md/dm.h +++ b/drivers/md/dm.h @@ -25,13 +25,10 @@ /* * List of devices that a metadevice uses and should open/close. */ -struct dm_dev { +struct dm_dev_internal { struct list_head list; - atomic_t count; - int mode; - struct block_device *bdev; - char name[16]; + struct dm_dev dm_dev; }; struct dm_table; diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h index a90222e3297d..178390de195e 100644 --- a/include/linux/device-mapper.h +++ b/include/linux/device-mapper.h @@ -13,7 +13,6 @@ struct dm_target; struct dm_table; -struct dm_dev; struct mapped_device; struct bio_vec; @@ -84,6 +83,12 @@ void dm_error(const char *message); */ void dm_set_device_limits(struct dm_target *ti, struct block_device *bdev); +struct dm_dev { + struct block_device *bdev; + int mode; + char name[16]; +}; + /* * Constructors should call these functions to ensure destination devices * are opened/closed correctly. From 89343da077ad564ed130c46e5ea6a79388410fa5 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Fri, 10 Oct 2008 13:37:10 +0100 Subject: [PATCH 15/18] dm: publish dm_get_mapinfo Publish dm_get_mapinfo in include/linux/device-mapper.h because this function is used by targets. Signed-off-by: Mikulas Patocka Signed-off-by: Alasdair G Kergon --- drivers/md/dm.h | 1 - include/linux/device-mapper.h | 1 + 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/md/dm.h b/drivers/md/dm.h index 65f2f562220e..911d434361dd 100644 --- a/drivers/md/dm.h +++ b/drivers/md/dm.h @@ -91,7 +91,6 @@ int dm_stripe_init(void); void dm_stripe_exit(void); void *dm_vcalloc(unsigned long nmemb, unsigned long elem_size); -union map_info *dm_get_mapinfo(struct bio *bio); int dm_open_count(struct mapped_device *md); int dm_lock_for_deletion(struct mapped_device *md); diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h index 178390de195e..6b80961650f0 100644 --- a/include/linux/device-mapper.h +++ b/include/linux/device-mapper.h @@ -207,6 +207,7 @@ int dm_copy_name_and_uuid(struct mapped_device *md, char *name, char *uuid); struct gendisk *dm_disk(struct mapped_device *md); int dm_suspended(struct mapped_device *md); int dm_noflush_suspending(struct dm_target *ti); +union map_info *dm_get_mapinfo(struct bio *bio); /* * Geometry functions. From ea0ec640940c2ae3a8d71af3249fccf06a9997a3 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Fri, 10 Oct 2008 13:37:11 +0100 Subject: [PATCH 16/18] dm: publish dm_table_unplug_all Publish dm_table_unplug_all in include/linux/device-mapper.h because this function is used by targets. Signed-off-by: Mikulas Patocka Signed-off-by: Alasdair G Kergon --- drivers/md/dm.h | 1 - include/linux/device-mapper.h | 5 +++++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/md/dm.h b/drivers/md/dm.h index 911d434361dd..8812208978e3 100644 --- a/drivers/md/dm.h +++ b/drivers/md/dm.h @@ -46,7 +46,6 @@ void dm_table_presuspend_targets(struct dm_table *t); void dm_table_postsuspend_targets(struct dm_table *t); int dm_table_resume_targets(struct dm_table *t); int dm_table_any_congested(struct dm_table *t, int bdi_bits); -void dm_table_unplug_all(struct dm_table *t); /* * To check the return value from dm_table_find_target(). diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h index 6b80961650f0..e462c7f3b391 100644 --- a/include/linux/device-mapper.h +++ b/include/linux/device-mapper.h @@ -237,6 +237,11 @@ int dm_table_add_target(struct dm_table *t, const char *type, */ int dm_table_complete(struct dm_table *t); +/* + * Unplug all devices in a table. + */ +void dm_table_unplug_all(struct dm_table *t); + /* * Table reference counting. */ From 54160904260fa764ba6e2dc738770be30fdf9553 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Fri, 10 Oct 2008 13:37:12 +0100 Subject: [PATCH 17/18] dm: publish dm_vcalloc Publish dm_vcalloc in include/linux/device-mapper.h because this function is used by targets. Signed-off-by: Mikulas Patocka Signed-off-by: Alasdair G Kergon --- drivers/md/dm.h | 1 - include/linux/device-mapper.h | 5 +++++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/md/dm.h b/drivers/md/dm.h index 8812208978e3..cd189da2b2fa 100644 --- a/drivers/md/dm.h +++ b/drivers/md/dm.h @@ -89,7 +89,6 @@ void dm_linear_exit(void); int dm_stripe_init(void); void dm_stripe_exit(void); -void *dm_vcalloc(unsigned long nmemb, unsigned long elem_size); int dm_open_count(struct mapped_device *md); int dm_lock_for_deletion(struct mapped_device *md); diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h index e462c7f3b391..08d783592b73 100644 --- a/include/linux/device-mapper.h +++ b/include/linux/device-mapper.h @@ -267,6 +267,11 @@ void dm_table_event(struct dm_table *t); */ int dm_swap_table(struct mapped_device *md, struct dm_table *t); +/* + * A wrapper around vmalloc. + */ +void *dm_vcalloc(unsigned long nmemb, unsigned long elem_size); + /*----------------------------------------------------------------- * Macros. *---------------------------------------------------------------*/ From 0c2322e4ce144e130c03d813fe92de3798662c5e Mon Sep 17 00:00:00 2001 From: Alasdair G Kergon Date: Fri, 10 Oct 2008 13:37:13 +0100 Subject: [PATCH 18/18] dm: detect lost queue Detect and report buggy drivers that destroy their request_queue. Signed-off-by: Alasdair G Kergon Cc: Stefan Raspl Cc: Jens Axboe Cc: Andrew Morton --- drivers/md/dm-table.c | 24 ++++++++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index 5dd32b8a57ac..a740a6950f59 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -482,6 +482,13 @@ void dm_set_device_limits(struct dm_target *ti, struct block_device *bdev) { struct request_queue *q = bdev_get_queue(bdev); struct io_restrictions *rs = &ti->limits; + char b[BDEVNAME_SIZE]; + + if (unlikely(!q)) { + DMWARN("%s: Cannot set limits for nonexistent device %s", + dm_device_name(ti->table->md), bdevname(bdev, b)); + return; + } /* * Combine the device limits low. @@ -950,7 +957,14 @@ int dm_table_any_congested(struct dm_table *t, int bdi_bits) list_for_each_entry(dd, devices, list) { struct request_queue *q = bdev_get_queue(dd->dm_dev.bdev); - r |= bdi_congested(&q->backing_dev_info, bdi_bits); + char b[BDEVNAME_SIZE]; + + if (likely(q)) + r |= bdi_congested(&q->backing_dev_info, bdi_bits); + else + DMWARN_LIMIT("%s: any_congested: nonexistent device %s", + dm_device_name(t->md), + bdevname(dd->dm_dev.bdev, b)); } return r; @@ -963,8 +977,14 @@ void dm_table_unplug_all(struct dm_table *t) list_for_each_entry(dd, devices, list) { struct request_queue *q = bdev_get_queue(dd->dm_dev.bdev); + char b[BDEVNAME_SIZE]; - blk_unplug(q); + if (likely(q)) + blk_unplug(q); + else + DMWARN_LIMIT("%s: Cannot unplug nonexistent device %s", + dm_device_name(t->md), + bdevname(dd->dm_dev.bdev, b)); } }