diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 403f05ac1f2a..90e951730a23 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -397,14 +397,17 @@ static void raid10_end_write_request(struct bio *bio, int error) int dec_rdev = 1; struct r10conf *conf = r10_bio->mddev->private; int slot, repl; - struct md_rdev *rdev; + struct md_rdev *rdev = NULL; dev = find_bio_disk(conf, r10_bio, bio, &slot, &repl); if (repl) rdev = conf->mirrors[dev].replacement; - else + if (!rdev) { + smp_rmb(); + repl = 0; rdev = conf->mirrors[dev].rdev; + } /* * this branch is our 'one mirror IO has finished' event handler: */ @@ -1089,6 +1092,8 @@ retry_write: struct md_rdev *rdev = rcu_dereference(conf->mirrors[d].rdev); struct md_rdev *rrdev = rcu_dereference( conf->mirrors[d].replacement); + if (rdev == rrdev) + rrdev = NULL; if (rdev && unlikely(test_bit(Blocked, &rdev->flags))) { atomic_inc(&rdev->nr_pending); blocked_rdev = rdev; @@ -1170,9 +1175,15 @@ retry_write: rdev_dec_pending(conf->mirrors[d].rdev, mddev); } if (r10_bio->devs[j].repl_bio) { + struct md_rdev *rdev; d = r10_bio->devs[j].devnum; - rdev_dec_pending( - conf->mirrors[d].replacement, mddev); + rdev = conf->mirrors[d].replacement; + if (!rdev) { + /* Race with remove_disk */ + smp_mb(); + rdev = conf->mirrors[d].rdev; + } + rdev_dec_pending(rdev, mddev); } } allow_barrier(conf); @@ -1230,6 +1241,10 @@ retry_write: max_sectors); r10_bio->devs[i].repl_bio = mbio; + /* We are actively writing to the original device + * so it cannot disappear, so the replacement cannot + * become NULL here + */ mbio->bi_sector = (r10_bio->devs[i].addr+ conf->mirrors[d].replacement->data_offset); mbio->bi_bdev = conf->mirrors[d].replacement->bdev; @@ -1404,9 +1419,27 @@ static int raid10_spare_active(struct mddev *mddev) */ for (i = 0; i < conf->raid_disks; i++) { tmp = conf->mirrors + i; - if (tmp->rdev - && !test_bit(Faulty, &tmp->rdev->flags) - && !test_and_set_bit(In_sync, &tmp->rdev->flags)) { + if (tmp->replacement + && tmp->replacement->recovery_offset == MaxSector + && !test_bit(Faulty, &tmp->replacement->flags) + && !test_and_set_bit(In_sync, &tmp->replacement->flags)) { + /* Replacement has just become active */ + if (!tmp->rdev + || !test_and_clear_bit(In_sync, &tmp->rdev->flags)) + count++; + if (tmp->rdev) { + /* Replaced device not technically faulty, + * but we need to be sure it gets removed + * and never re-added. + */ + set_bit(Faulty, &tmp->rdev->flags); + sysfs_notify_dirent_safe( + tmp->rdev->sysfs_state); + } + sysfs_notify_dirent_safe(tmp->replacement->sysfs_state); + } else if (tmp->rdev + && !test_bit(Faulty, &tmp->rdev->flags) + && !test_and_set_bit(In_sync, &tmp->rdev->flags)) { count++; sysfs_notify_dirent(tmp->rdev->sysfs_state); } @@ -1506,6 +1539,7 @@ static int raid10_remove_disk(struct mddev *mddev, struct md_rdev *rdev) */ if (!test_bit(Faulty, &rdev->flags) && mddev->recovery_disabled != p->recovery_disabled && + (!p->replacement || p->replacement == rdev) && enough(conf, -1)) { err = -EBUSY; goto abort; @@ -1517,7 +1551,21 @@ static int raid10_remove_disk(struct mddev *mddev, struct md_rdev *rdev) err = -EBUSY; *rdevp = rdev; goto abort; - } + } else if (p->replacement) { + /* We must have just cleared 'rdev' */ + p->rdev = p->replacement; + clear_bit(Replacement, &p->replacement->flags); + smp_mb(); /* Make sure other CPUs may see both as identical + * but will never see neither -- if they are careful. + */ + p->replacement = NULL; + clear_bit(WantReplacement, &rdev->flags); + } else + /* We might have just remove the Replacement as faulty + * Clear the flag just in case + */ + clear_bit(WantReplacement, &rdev->flags); + err = md_integrity_register(mddev); abort: @@ -1595,13 +1643,15 @@ static void end_sync_write(struct bio *bio, int error) int bad_sectors; int slot; int repl; - struct md_rdev *rdev; + struct md_rdev *rdev = NULL; d = find_bio_disk(conf, r10_bio, bio, &slot, &repl); if (repl) rdev = conf->mirrors[d].replacement; - else + if (!rdev) { + smp_mb(); rdev = conf->mirrors[d].rdev; + } if (!uptodate) { if (repl) @@ -2368,7 +2418,7 @@ static void handle_write_completed(struct r10conf *conf, struct r10bio *r10_bio) } bio = r10_bio->devs[m].repl_bio; rdev = conf->mirrors[dev].replacement; - if (bio == IO_MADE_GOOD) { + if (rdev && bio == IO_MADE_GOOD) { rdev_clear_badblocks( rdev, r10_bio->devs[m].addr,