A few fixes for md.

Most of these are related to the new "batched stripe writeout",
 but there are a few others.
 -----BEGIN PGP SIGNATURE-----
 Version: GnuPG v2
 
 iQIVAwUAVVAgfznsnt1WYoG5AQKNsBAAw+yXVnRlLfzTPOhiIolvdmdmck6ghpV7
 eTHTbDEVkDo+a+caU01mhkHtL2wEuxGqr4XHTASIM9Xjn2YLhH14HOtBPl6+IddR
 F58X+SN8/pK4tzc5P42AAbfGfqeJSBPSl49L94+Pd9BsagNYZOUx7c/WK5lKay6O
 ZQRvI3Bafg7VTzHCw931sN11yY/PS+45zpOentk5TlavRPcZfuVTh1QzSyAUvbtN
 sAFK2+ISflj8+TW0rLo44c3kkC7ARFKcq5lJt4WsQUntJTjiIROvqvaEP+IUZkGr
 y+yvCJbskJ6B03XrcuvdkFrp3Qe0yWtHCN6qKhc8AI/c/pcuz24xTYQ6V6LDyAy8
 N2z6m1Kri97qxZpOONNh31WOCmVyFmwiz7+54soEeTDg3xgTb60UiZe8T+9ZcxoY
 sN8ksVSGGapgQUUGYcidbZla6uSWNjZq9hQgDJFUwG3HhvT3E/qiUT9kqAxfPsLE
 Vxw/KQ5GkW+7ldaPmGuv29hc3THjsiLY5Bx0kBlmgao3fcT61WbNcZPJOQfMxlT7
 6qoJSiNoq612TaHj3X06FXhMGE/fwjwjOO+PpULxM47giHcKrVR/qlG4kREYa2vj
 UgV05rP/S+k3Mce4dHzvUuE57t0ADff0d/mZvhfzERjNdwTMdWgia89XLg7D06rk
 UGB/o2IwWug=
 =tapF
 -----END PGP SIGNATURE-----

Merge tag 'md/4.1-rc3-fixes' of git://neil.brown.name/md

Pull md bugfixes from Neil Brown:
 "A few fixes for md.

  Most of these are related to the new "batched stripe writeout", but
  there are a few others"

* tag 'md/4.1-rc3-fixes' of git://neil.brown.name/md:
  md/raid5: fix handling of degraded stripes in batches.
  md/raid5: fix allocation of 'scribble' array.
  md/raid5: don't record new size if resize_stripes fails.
  md/raid5: avoid reading parity blocks for full-stripe write to degraded array
  md/raid5: more incorrect BUG_ON in handle_stripe_fill.
  md/raid5: new alloc_stripe() to allocate an initialize a stripe.
  md-raid0: conditional mddev->queue access to suit dm-raid
This commit is contained in:
Linus Torvalds 2015-05-11 10:33:31 -07:00
commit c91aa67eed
2 changed files with 73 additions and 55 deletions

View File

@ -188,8 +188,9 @@ static int create_strip_zones(struct mddev *mddev, struct r0conf **private_conf)
}
dev[j] = rdev1;
disk_stack_limits(mddev->gendisk, rdev1->bdev,
rdev1->data_offset << 9);
if (mddev->queue)
disk_stack_limits(mddev->gendisk, rdev1->bdev,
rdev1->data_offset << 9);
if (rdev1->bdev->bd_disk->queue->merge_bvec_fn)
conf->has_merge_bvec = 1;

View File

@ -1078,9 +1078,6 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s)
pr_debug("skip op %ld on disc %d for sector %llu\n",
bi->bi_rw, i, (unsigned long long)sh->sector);
clear_bit(R5_LOCKED, &sh->dev[i].flags);
if (sh->batch_head)
set_bit(STRIPE_BATCH_ERR,
&sh->batch_head->state);
set_bit(STRIPE_HANDLE, &sh->state);
}
@ -1971,17 +1968,30 @@ static void raid_run_ops(struct stripe_head *sh, unsigned long ops_request)
put_cpu();
}
static struct stripe_head *alloc_stripe(struct kmem_cache *sc, gfp_t gfp)
{
struct stripe_head *sh;
sh = kmem_cache_zalloc(sc, gfp);
if (sh) {
spin_lock_init(&sh->stripe_lock);
spin_lock_init(&sh->batch_lock);
INIT_LIST_HEAD(&sh->batch_list);
INIT_LIST_HEAD(&sh->lru);
atomic_set(&sh->count, 1);
}
return sh;
}
static int grow_one_stripe(struct r5conf *conf, gfp_t gfp)
{
struct stripe_head *sh;
sh = kmem_cache_zalloc(conf->slab_cache, gfp);
sh = alloc_stripe(conf->slab_cache, gfp);
if (!sh)
return 0;
sh->raid_conf = conf;
spin_lock_init(&sh->stripe_lock);
if (grow_buffers(sh, gfp)) {
shrink_buffers(sh);
kmem_cache_free(conf->slab_cache, sh);
@ -1990,13 +2000,8 @@ static int grow_one_stripe(struct r5conf *conf, gfp_t gfp)
sh->hash_lock_index =
conf->max_nr_stripes % NR_STRIPE_HASH_LOCKS;
/* we just created an active stripe so... */
atomic_set(&sh->count, 1);
atomic_inc(&conf->active_stripes);
INIT_LIST_HEAD(&sh->lru);
spin_lock_init(&sh->batch_lock);
INIT_LIST_HEAD(&sh->batch_list);
sh->batch_head = NULL;
release_stripe(sh);
conf->max_nr_stripes++;
return 1;
@ -2060,6 +2065,35 @@ static struct flex_array *scribble_alloc(int num, int cnt, gfp_t flags)
return ret;
}
static int resize_chunks(struct r5conf *conf, int new_disks, int new_sectors)
{
unsigned long cpu;
int err = 0;
mddev_suspend(conf->mddev);
get_online_cpus();
for_each_present_cpu(cpu) {
struct raid5_percpu *percpu;
struct flex_array *scribble;
percpu = per_cpu_ptr(conf->percpu, cpu);
scribble = scribble_alloc(new_disks,
new_sectors / STRIPE_SECTORS,
GFP_NOIO);
if (scribble) {
flex_array_free(percpu->scribble);
percpu->scribble = scribble;
} else {
err = -ENOMEM;
break;
}
}
put_online_cpus();
mddev_resume(conf->mddev);
return err;
}
static int resize_stripes(struct r5conf *conf, int newsize)
{
/* Make all the stripes able to hold 'newsize' devices.
@ -2088,7 +2122,6 @@ static int resize_stripes(struct r5conf *conf, int newsize)
struct stripe_head *osh, *nsh;
LIST_HEAD(newstripes);
struct disk_info *ndisks;
unsigned long cpu;
int err;
struct kmem_cache *sc;
int i;
@ -2109,13 +2142,11 @@ static int resize_stripes(struct r5conf *conf, int newsize)
return -ENOMEM;
for (i = conf->max_nr_stripes; i; i--) {
nsh = kmem_cache_zalloc(sc, GFP_KERNEL);
nsh = alloc_stripe(sc, GFP_KERNEL);
if (!nsh)
break;
nsh->raid_conf = conf;
spin_lock_init(&nsh->stripe_lock);
list_add(&nsh->lru, &newstripes);
}
if (i) {
@ -2142,13 +2173,11 @@ static int resize_stripes(struct r5conf *conf, int newsize)
lock_device_hash_lock(conf, hash));
osh = get_free_stripe(conf, hash);
unlock_device_hash_lock(conf, hash);
atomic_set(&nsh->count, 1);
for(i=0; i<conf->pool_size; i++) {
nsh->dev[i].page = osh->dev[i].page;
nsh->dev[i].orig_page = osh->dev[i].page;
}
for( ; i<newsize; i++)
nsh->dev[i].page = NULL;
nsh->hash_lock_index = hash;
kmem_cache_free(conf->slab_cache, osh);
cnt++;
@ -2174,25 +2203,6 @@ static int resize_stripes(struct r5conf *conf, int newsize)
} else
err = -ENOMEM;
get_online_cpus();
for_each_present_cpu(cpu) {
struct raid5_percpu *percpu;
struct flex_array *scribble;
percpu = per_cpu_ptr(conf->percpu, cpu);
scribble = scribble_alloc(newsize, conf->chunk_sectors /
STRIPE_SECTORS, GFP_NOIO);
if (scribble) {
flex_array_free(percpu->scribble);
percpu->scribble = scribble;
} else {
err = -ENOMEM;
break;
}
}
put_online_cpus();
/* Step 4, return new stripes to service */
while(!list_empty(&newstripes)) {
nsh = list_entry(newstripes.next, struct stripe_head, lru);
@ -2212,7 +2222,8 @@ static int resize_stripes(struct r5conf *conf, int newsize)
conf->slab_cache = sc;
conf->active_name = 1-conf->active_name;
conf->pool_size = newsize;
if (!err)
conf->pool_size = newsize;
return err;
}
@ -2434,7 +2445,7 @@ static void raid5_end_write_request(struct bio *bi, int error)
}
rdev_dec_pending(rdev, conf->mddev);
if (sh->batch_head && !uptodate)
if (sh->batch_head && !uptodate && !replacement)
set_bit(STRIPE_BATCH_ERR, &sh->batch_head->state);
if (!test_and_clear_bit(R5_DOUBLE_LOCKED, &sh->dev[i].flags))
@ -3278,7 +3289,9 @@ static int need_this_block(struct stripe_head *sh, struct stripe_head_state *s,
/* reconstruct-write isn't being forced */
return 0;
for (i = 0; i < s->failed; i++) {
if (!test_bit(R5_UPTODATE, &fdev[i]->flags) &&
if (s->failed_num[i] != sh->pd_idx &&
s->failed_num[i] != sh->qd_idx &&
!test_bit(R5_UPTODATE, &fdev[i]->flags) &&
!test_bit(R5_OVERWRITE, &fdev[i]->flags))
return 1;
}
@ -3298,6 +3311,7 @@ static int fetch_block(struct stripe_head *sh, struct stripe_head_state *s,
*/
BUG_ON(test_bit(R5_Wantcompute, &dev->flags));
BUG_ON(test_bit(R5_Wantread, &dev->flags));
BUG_ON(sh->batch_head);
if ((s->uptodate == disks - 1) &&
(s->failed && (disk_idx == s->failed_num[0] ||
disk_idx == s->failed_num[1]))) {
@ -3366,7 +3380,6 @@ static void handle_stripe_fill(struct stripe_head *sh,
{
int i;
BUG_ON(sh->batch_head);
/* look for blocks to read/compute, skip this if a compute
* is already in flight, or if the stripe contents are in the
* midst of changing due to a write
@ -4198,15 +4211,9 @@ static void check_break_stripe_batch_list(struct stripe_head *sh)
return;
head_sh = sh;
do {
sh = list_first_entry(&sh->batch_list,
struct stripe_head, batch_list);
BUG_ON(sh == head_sh);
} while (!test_bit(STRIPE_DEGRADED, &sh->state));
while (sh != head_sh) {
next = list_first_entry(&sh->batch_list,
struct stripe_head, batch_list);
list_for_each_entry_safe(sh, next, &head_sh->batch_list, batch_list) {
list_del_init(&sh->batch_list);
set_mask_bits(&sh->state, ~STRIPE_EXPAND_SYNC_FLAG,
@ -4226,8 +4233,6 @@ static void check_break_stripe_batch_list(struct stripe_head *sh)
set_bit(STRIPE_HANDLE, &sh->state);
release_stripe(sh);
sh = next;
}
}
@ -6221,8 +6226,11 @@ static int alloc_scratch_buffer(struct r5conf *conf, struct raid5_percpu *percpu
percpu->spare_page = alloc_page(GFP_KERNEL);
if (!percpu->scribble)
percpu->scribble = scribble_alloc(max(conf->raid_disks,
conf->previous_raid_disks), conf->chunk_sectors /
STRIPE_SECTORS, GFP_KERNEL);
conf->previous_raid_disks),
max(conf->chunk_sectors,
conf->prev_chunk_sectors)
/ STRIPE_SECTORS,
GFP_KERNEL);
if (!percpu->scribble || (conf->level == 6 && !percpu->spare_page)) {
free_scratch_buffer(conf, percpu);
@ -7198,6 +7206,15 @@ static int check_reshape(struct mddev *mddev)
if (!check_stripe_cache(mddev))
return -ENOSPC;
if (mddev->new_chunk_sectors > mddev->chunk_sectors ||
mddev->delta_disks > 0)
if (resize_chunks(conf,
conf->previous_raid_disks
+ max(0, mddev->delta_disks),
max(mddev->new_chunk_sectors,
mddev->chunk_sectors)
) < 0)
return -ENOMEM;
return resize_stripes(conf, (conf->previous_raid_disks
+ mddev->delta_disks));
}