mirror of
https://github.com/AuxXxilium/linux_dsm_epyc7002.git
synced 2025-01-18 17:06:26 +07:00
Several bug fixes for md in 3.7
- raid5 discard has problems - raid10 replacement devices have problems - bad block lock seqlock usage has problems - dm-raid doesn't free everything -----BEGIN PGP SIGNATURE----- Version: GnuPG v2.0.19 (GNU/Linux) iQIVAwUAUK/PfTnsnt1WYoG5AQJlFBAAry6TrfIEed7Sz1BwY0w1Ofd5ZFt6DCN3 CXc6yi7LQhaMAUYsMcF07BFfuphal0St68vwckFkd1jPShUgruetzsUPLdS1+cql AKOQZmJegN+yvpf+N6PxER8z0Ju8M0RNVCvgRZB166ujmoEHGf7A564Hby+FINpZ zk1d5eVtcRL05oV0NbeLaX8bNp42nNx2wwvFtM6NEVF4vwbzGzXkC9ePQ6oERJvQ Oqsu6F+TzqztIPYk/fbl1Yr/FPVAWXi4dR7KNxs/jHFcnWPi9vKcjjh1jrq46rNy xQY+y0xW6FlN0uApIKT6NC3UWutgwOGUqRdCRc4LJ1nT6aHVIn5OCIsipgRrlV0O da5pM+rgIMJK3kyT6NjhtuWuQZE4P4OSOmnq5q81VT9XOKADVsFOfibtrIr8cxYS c/8mNJVfd+cU58XNKGIEt886DsN+uzWiY8U8HZVckfeVxrBTIPas4ERXlurx+G1D jhXqK8TuEfi6ILNdBlWPphAr2ytFqWWpQIGXgYGHEIJp5WaUHoEoEblznl1MiRlZ +tYIYy0SRkcZuxs6nUNF8Or5vFidjvaIFJPjIJwSIhwgzkaV+YFad4GfI7/WgWaq 7VU12MG7UlXLlaGN1Yadvh3jAk7L45DPzWUa/Zgvvtrvvdp3JU7VQhD8d6oc/kxD 3IOrUdAXWxU= =fznK -----END PGP SIGNATURE----- Merge tag 'md-3.7-fixes' of git://neil.brown.name/md Pull md fixes from NeilBrown: "Several bug fixes for md in 3.7: - raid5 discard has problems - raid10 replacement devices have problems - bad block lock seqlock usage has problems - dm-raid doesn't free everything" * tag 'md-3.7-fixes' of git://neil.brown.name/md: md/raid10: decrement correct pending counter when writing to replacement. md/raid10: close race that lose writes lost when replacement completes. md/raid5: Make sure we clear R5_Discard when discard is finished. md/raid5: move resolving of reconstruct_state earlier in stripe_handle. md/raid5: round discard alignment up to power of 2. md: make sure everything is freed when dm-raid stops an array. md: Avoid write invalid address if read_seqretry returned true. md: Reassigned the parameters if read_seqretry returned true in func md_is_badblock.
This commit is contained in:
commit
1d838d70fb
@ -1817,10 +1817,10 @@ static void super_1_sync(struct mddev *mddev, struct md_rdev *rdev)
|
||||
memset(bbp, 0xff, PAGE_SIZE);
|
||||
|
||||
for (i = 0 ; i < bb->count ; i++) {
|
||||
u64 internal_bb = *p++;
|
||||
u64 internal_bb = p[i];
|
||||
u64 store_bb = ((BB_OFFSET(internal_bb) << 10)
|
||||
| BB_LEN(internal_bb));
|
||||
*bbp++ = cpu_to_le64(store_bb);
|
||||
bbp[i] = cpu_to_le64(store_bb);
|
||||
}
|
||||
bb->changed = 0;
|
||||
if (read_seqretry(&bb->lock, seq))
|
||||
@ -5294,7 +5294,7 @@ void md_stop_writes(struct mddev *mddev)
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(md_stop_writes);
|
||||
|
||||
void md_stop(struct mddev *mddev)
|
||||
static void __md_stop(struct mddev *mddev)
|
||||
{
|
||||
mddev->ready = 0;
|
||||
mddev->pers->stop(mddev);
|
||||
@ -5304,6 +5304,18 @@ void md_stop(struct mddev *mddev)
|
||||
mddev->pers = NULL;
|
||||
clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
|
||||
}
|
||||
|
||||
void md_stop(struct mddev *mddev)
|
||||
{
|
||||
/* stop the array and free an attached data structures.
|
||||
* This is called from dm-raid
|
||||
*/
|
||||
__md_stop(mddev);
|
||||
bitmap_destroy(mddev);
|
||||
if (mddev->bio_set)
|
||||
bioset_free(mddev->bio_set);
|
||||
}
|
||||
|
||||
EXPORT_SYMBOL_GPL(md_stop);
|
||||
|
||||
static int md_set_readonly(struct mddev *mddev, struct block_device *bdev)
|
||||
@ -5364,7 +5376,7 @@ static int do_md_stop(struct mddev * mddev, int mode,
|
||||
set_disk_ro(disk, 0);
|
||||
|
||||
__md_stop_writes(mddev);
|
||||
md_stop(mddev);
|
||||
__md_stop(mddev);
|
||||
mddev->queue->merge_bvec_fn = NULL;
|
||||
mddev->queue->backing_dev_info.congested_fn = NULL;
|
||||
|
||||
@ -7936,9 +7948,9 @@ int md_is_badblock(struct badblocks *bb, sector_t s, int sectors,
|
||||
sector_t *first_bad, int *bad_sectors)
|
||||
{
|
||||
int hi;
|
||||
int lo = 0;
|
||||
int lo;
|
||||
u64 *p = bb->page;
|
||||
int rv = 0;
|
||||
int rv;
|
||||
sector_t target = s + sectors;
|
||||
unsigned seq;
|
||||
|
||||
@ -7953,7 +7965,8 @@ int md_is_badblock(struct badblocks *bb, sector_t s, int sectors,
|
||||
|
||||
retry:
|
||||
seq = read_seqbegin(&bb->lock);
|
||||
|
||||
lo = 0;
|
||||
rv = 0;
|
||||
hi = bb->count;
|
||||
|
||||
/* Binary search between lo and hi for 'target'
|
||||
|
@ -499,7 +499,7 @@ static void raid10_end_write_request(struct bio *bio, int error)
|
||||
*/
|
||||
one_write_done(r10_bio);
|
||||
if (dec_rdev)
|
||||
rdev_dec_pending(conf->mirrors[dev].rdev, conf->mddev);
|
||||
rdev_dec_pending(rdev, conf->mddev);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -1334,18 +1334,21 @@ static void make_request(struct mddev *mddev, struct bio * bio)
|
||||
blocked_rdev = rrdev;
|
||||
break;
|
||||
}
|
||||
if (rdev && (test_bit(Faulty, &rdev->flags)
|
||||
|| test_bit(Unmerged, &rdev->flags)))
|
||||
rdev = NULL;
|
||||
if (rrdev && (test_bit(Faulty, &rrdev->flags)
|
||||
|| test_bit(Unmerged, &rrdev->flags)))
|
||||
rrdev = NULL;
|
||||
|
||||
r10_bio->devs[i].bio = NULL;
|
||||
r10_bio->devs[i].repl_bio = NULL;
|
||||
if (!rdev || test_bit(Faulty, &rdev->flags) ||
|
||||
test_bit(Unmerged, &rdev->flags)) {
|
||||
|
||||
if (!rdev && !rrdev) {
|
||||
set_bit(R10BIO_Degraded, &r10_bio->state);
|
||||
continue;
|
||||
}
|
||||
if (test_bit(WriteErrorSeen, &rdev->flags)) {
|
||||
if (rdev && test_bit(WriteErrorSeen, &rdev->flags)) {
|
||||
sector_t first_bad;
|
||||
sector_t dev_sector = r10_bio->devs[i].addr;
|
||||
int bad_sectors;
|
||||
@ -1387,8 +1390,10 @@ static void make_request(struct mddev *mddev, struct bio * bio)
|
||||
max_sectors = good_sectors;
|
||||
}
|
||||
}
|
||||
r10_bio->devs[i].bio = bio;
|
||||
atomic_inc(&rdev->nr_pending);
|
||||
if (rdev) {
|
||||
r10_bio->devs[i].bio = bio;
|
||||
atomic_inc(&rdev->nr_pending);
|
||||
}
|
||||
if (rrdev) {
|
||||
r10_bio->devs[i].repl_bio = bio;
|
||||
atomic_inc(&rrdev->nr_pending);
|
||||
@ -1444,69 +1449,71 @@ static void make_request(struct mddev *mddev, struct bio * bio)
|
||||
for (i = 0; i < conf->copies; i++) {
|
||||
struct bio *mbio;
|
||||
int d = r10_bio->devs[i].devnum;
|
||||
if (!r10_bio->devs[i].bio)
|
||||
continue;
|
||||
if (r10_bio->devs[i].bio) {
|
||||
struct md_rdev *rdev = conf->mirrors[d].rdev;
|
||||
mbio = bio_clone_mddev(bio, GFP_NOIO, mddev);
|
||||
md_trim_bio(mbio, r10_bio->sector - bio->bi_sector,
|
||||
max_sectors);
|
||||
r10_bio->devs[i].bio = mbio;
|
||||
|
||||
mbio = bio_clone_mddev(bio, GFP_NOIO, mddev);
|
||||
md_trim_bio(mbio, r10_bio->sector - bio->bi_sector,
|
||||
max_sectors);
|
||||
r10_bio->devs[i].bio = mbio;
|
||||
mbio->bi_sector = (r10_bio->devs[i].addr+
|
||||
choose_data_offset(r10_bio,
|
||||
rdev));
|
||||
mbio->bi_bdev = rdev->bdev;
|
||||
mbio->bi_end_io = raid10_end_write_request;
|
||||
mbio->bi_rw = WRITE | do_sync | do_fua | do_discard;
|
||||
mbio->bi_private = r10_bio;
|
||||
|
||||
mbio->bi_sector = (r10_bio->devs[i].addr+
|
||||
choose_data_offset(r10_bio,
|
||||
conf->mirrors[d].rdev));
|
||||
mbio->bi_bdev = conf->mirrors[d].rdev->bdev;
|
||||
mbio->bi_end_io = raid10_end_write_request;
|
||||
mbio->bi_rw = WRITE | do_sync | do_fua | do_discard;
|
||||
mbio->bi_private = r10_bio;
|
||||
atomic_inc(&r10_bio->remaining);
|
||||
|
||||
atomic_inc(&r10_bio->remaining);
|
||||
cb = blk_check_plugged(raid10_unplug, mddev,
|
||||
sizeof(*plug));
|
||||
if (cb)
|
||||
plug = container_of(cb, struct raid10_plug_cb,
|
||||
cb);
|
||||
else
|
||||
plug = NULL;
|
||||
spin_lock_irqsave(&conf->device_lock, flags);
|
||||
if (plug) {
|
||||
bio_list_add(&plug->pending, mbio);
|
||||
plug->pending_cnt++;
|
||||
} else {
|
||||
bio_list_add(&conf->pending_bio_list, mbio);
|
||||
conf->pending_count++;
|
||||
}
|
||||
spin_unlock_irqrestore(&conf->device_lock, flags);
|
||||
if (!plug)
|
||||
md_wakeup_thread(mddev->thread);
|
||||
}
|
||||
|
||||
cb = blk_check_plugged(raid10_unplug, mddev, sizeof(*plug));
|
||||
if (cb)
|
||||
plug = container_of(cb, struct raid10_plug_cb, cb);
|
||||
else
|
||||
plug = NULL;
|
||||
spin_lock_irqsave(&conf->device_lock, flags);
|
||||
if (plug) {
|
||||
bio_list_add(&plug->pending, mbio);
|
||||
plug->pending_cnt++;
|
||||
} else {
|
||||
if (r10_bio->devs[i].repl_bio) {
|
||||
struct md_rdev *rdev = conf->mirrors[d].replacement;
|
||||
if (rdev == NULL) {
|
||||
/* Replacement just got moved to main 'rdev' */
|
||||
smp_mb();
|
||||
rdev = conf->mirrors[d].rdev;
|
||||
}
|
||||
mbio = bio_clone_mddev(bio, GFP_NOIO, mddev);
|
||||
md_trim_bio(mbio, r10_bio->sector - bio->bi_sector,
|
||||
max_sectors);
|
||||
r10_bio->devs[i].repl_bio = mbio;
|
||||
|
||||
mbio->bi_sector = (r10_bio->devs[i].addr +
|
||||
choose_data_offset(
|
||||
r10_bio, rdev));
|
||||
mbio->bi_bdev = rdev->bdev;
|
||||
mbio->bi_end_io = raid10_end_write_request;
|
||||
mbio->bi_rw = WRITE | do_sync | do_fua | do_discard;
|
||||
mbio->bi_private = r10_bio;
|
||||
|
||||
atomic_inc(&r10_bio->remaining);
|
||||
spin_lock_irqsave(&conf->device_lock, flags);
|
||||
bio_list_add(&conf->pending_bio_list, mbio);
|
||||
conf->pending_count++;
|
||||
spin_unlock_irqrestore(&conf->device_lock, flags);
|
||||
if (!mddev_check_plugged(mddev))
|
||||
md_wakeup_thread(mddev->thread);
|
||||
}
|
||||
spin_unlock_irqrestore(&conf->device_lock, flags);
|
||||
if (!plug)
|
||||
md_wakeup_thread(mddev->thread);
|
||||
|
||||
if (!r10_bio->devs[i].repl_bio)
|
||||
continue;
|
||||
|
||||
mbio = bio_clone_mddev(bio, GFP_NOIO, mddev);
|
||||
md_trim_bio(mbio, r10_bio->sector - bio->bi_sector,
|
||||
max_sectors);
|
||||
r10_bio->devs[i].repl_bio = mbio;
|
||||
|
||||
/* We are actively writing to the original device
|
||||
* so it cannot disappear, so the replacement cannot
|
||||
* become NULL here
|
||||
*/
|
||||
mbio->bi_sector = (r10_bio->devs[i].addr +
|
||||
choose_data_offset(
|
||||
r10_bio,
|
||||
conf->mirrors[d].replacement));
|
||||
mbio->bi_bdev = conf->mirrors[d].replacement->bdev;
|
||||
mbio->bi_end_io = raid10_end_write_request;
|
||||
mbio->bi_rw = WRITE | do_sync | do_fua | do_discard;
|
||||
mbio->bi_private = r10_bio;
|
||||
|
||||
atomic_inc(&r10_bio->remaining);
|
||||
spin_lock_irqsave(&conf->device_lock, flags);
|
||||
bio_list_add(&conf->pending_bio_list, mbio);
|
||||
conf->pending_count++;
|
||||
spin_unlock_irqrestore(&conf->device_lock, flags);
|
||||
if (!mddev_check_plugged(mddev))
|
||||
md_wakeup_thread(mddev->thread);
|
||||
}
|
||||
|
||||
/* Don't remove the bias on 'remaining' (one_write_done) until
|
||||
|
@ -2774,10 +2774,12 @@ static void handle_stripe_clean_event(struct r5conf *conf,
|
||||
dev = &sh->dev[i];
|
||||
if (!test_bit(R5_LOCKED, &dev->flags) &&
|
||||
(test_bit(R5_UPTODATE, &dev->flags) ||
|
||||
test_and_clear_bit(R5_Discard, &dev->flags))) {
|
||||
test_bit(R5_Discard, &dev->flags))) {
|
||||
/* We can return any write requests */
|
||||
struct bio *wbi, *wbi2;
|
||||
pr_debug("Return write for disc %d\n", i);
|
||||
if (test_and_clear_bit(R5_Discard, &dev->flags))
|
||||
clear_bit(R5_UPTODATE, &dev->flags);
|
||||
wbi = dev->written;
|
||||
dev->written = NULL;
|
||||
while (wbi && wbi->bi_sector <
|
||||
@ -2795,7 +2797,8 @@ static void handle_stripe_clean_event(struct r5conf *conf,
|
||||
!test_bit(STRIPE_DEGRADED, &sh->state),
|
||||
0);
|
||||
}
|
||||
}
|
||||
} else if (test_bit(R5_Discard, &sh->dev[i].flags))
|
||||
clear_bit(R5_Discard, &sh->dev[i].flags);
|
||||
|
||||
if (test_and_clear_bit(STRIPE_FULL_WRITE, &sh->state))
|
||||
if (atomic_dec_and_test(&conf->pending_full_writes))
|
||||
@ -3490,40 +3493,6 @@ static void handle_stripe(struct stripe_head *sh)
|
||||
handle_failed_sync(conf, sh, &s);
|
||||
}
|
||||
|
||||
/*
|
||||
* might be able to return some write requests if the parity blocks
|
||||
* are safe, or on a failed drive
|
||||
*/
|
||||
pdev = &sh->dev[sh->pd_idx];
|
||||
s.p_failed = (s.failed >= 1 && s.failed_num[0] == sh->pd_idx)
|
||||
|| (s.failed >= 2 && s.failed_num[1] == sh->pd_idx);
|
||||
qdev = &sh->dev[sh->qd_idx];
|
||||
s.q_failed = (s.failed >= 1 && s.failed_num[0] == sh->qd_idx)
|
||||
|| (s.failed >= 2 && s.failed_num[1] == sh->qd_idx)
|
||||
|| conf->level < 6;
|
||||
|
||||
if (s.written &&
|
||||
(s.p_failed || ((test_bit(R5_Insync, &pdev->flags)
|
||||
&& !test_bit(R5_LOCKED, &pdev->flags)
|
||||
&& (test_bit(R5_UPTODATE, &pdev->flags) ||
|
||||
test_bit(R5_Discard, &pdev->flags))))) &&
|
||||
(s.q_failed || ((test_bit(R5_Insync, &qdev->flags)
|
||||
&& !test_bit(R5_LOCKED, &qdev->flags)
|
||||
&& (test_bit(R5_UPTODATE, &qdev->flags) ||
|
||||
test_bit(R5_Discard, &qdev->flags))))))
|
||||
handle_stripe_clean_event(conf, sh, disks, &s.return_bi);
|
||||
|
||||
/* Now we might consider reading some blocks, either to check/generate
|
||||
* parity, or to satisfy requests
|
||||
* or to load a block that is being partially written.
|
||||
*/
|
||||
if (s.to_read || s.non_overwrite
|
||||
|| (conf->level == 6 && s.to_write && s.failed)
|
||||
|| (s.syncing && (s.uptodate + s.compute < disks))
|
||||
|| s.replacing
|
||||
|| s.expanding)
|
||||
handle_stripe_fill(sh, &s, disks);
|
||||
|
||||
/* Now we check to see if any write operations have recently
|
||||
* completed
|
||||
*/
|
||||
@ -3561,6 +3530,40 @@ static void handle_stripe(struct stripe_head *sh)
|
||||
s.dec_preread_active = 1;
|
||||
}
|
||||
|
||||
/*
|
||||
* might be able to return some write requests if the parity blocks
|
||||
* are safe, or on a failed drive
|
||||
*/
|
||||
pdev = &sh->dev[sh->pd_idx];
|
||||
s.p_failed = (s.failed >= 1 && s.failed_num[0] == sh->pd_idx)
|
||||
|| (s.failed >= 2 && s.failed_num[1] == sh->pd_idx);
|
||||
qdev = &sh->dev[sh->qd_idx];
|
||||
s.q_failed = (s.failed >= 1 && s.failed_num[0] == sh->qd_idx)
|
||||
|| (s.failed >= 2 && s.failed_num[1] == sh->qd_idx)
|
||||
|| conf->level < 6;
|
||||
|
||||
if (s.written &&
|
||||
(s.p_failed || ((test_bit(R5_Insync, &pdev->flags)
|
||||
&& !test_bit(R5_LOCKED, &pdev->flags)
|
||||
&& (test_bit(R5_UPTODATE, &pdev->flags) ||
|
||||
test_bit(R5_Discard, &pdev->flags))))) &&
|
||||
(s.q_failed || ((test_bit(R5_Insync, &qdev->flags)
|
||||
&& !test_bit(R5_LOCKED, &qdev->flags)
|
||||
&& (test_bit(R5_UPTODATE, &qdev->flags) ||
|
||||
test_bit(R5_Discard, &qdev->flags))))))
|
||||
handle_stripe_clean_event(conf, sh, disks, &s.return_bi);
|
||||
|
||||
/* Now we might consider reading some blocks, either to check/generate
|
||||
* parity, or to satisfy requests
|
||||
* or to load a block that is being partially written.
|
||||
*/
|
||||
if (s.to_read || s.non_overwrite
|
||||
|| (conf->level == 6 && s.to_write && s.failed)
|
||||
|| (s.syncing && (s.uptodate + s.compute < disks))
|
||||
|| s.replacing
|
||||
|| s.expanding)
|
||||
handle_stripe_fill(sh, &s, disks);
|
||||
|
||||
/* Now to consider new write requests and what else, if anything
|
||||
* should be read. We do not handle new writes when:
|
||||
* 1/ A 'write' operation (copy+xor) is already in flight.
|
||||
@ -5529,6 +5532,10 @@ static int run(struct mddev *mddev)
|
||||
* discard data disk but write parity disk
|
||||
*/
|
||||
stripe = stripe * PAGE_SIZE;
|
||||
/* Round up to power of 2, as discard handling
|
||||
* currently assumes that */
|
||||
while ((stripe-1) & stripe)
|
||||
stripe = (stripe | (stripe-1)) + 1;
|
||||
mddev->queue->limits.discard_alignment = stripe;
|
||||
mddev->queue->limits.discard_granularity = stripe;
|
||||
/*
|
||||
|
Loading…
Reference in New Issue
Block a user