]> Pileus Git - ~andy/linux/blobdiff - drivers/md/md.c
Prevent DM RAID from loading bitmap twice.
[~andy/linux] / drivers / md / md.c
index 266e82ebaf110892f749c8d9c6ae22211426ae31..33aa06f12b87a5ba9a7f73ce15a697ac052c3b3a 100644 (file)
@@ -44,6 +44,7 @@
 #include <linux/hdreg.h>
 #include <linux/proc_fs.h>
 #include <linux/random.h>
+#include <linux/module.h>
 #include <linux/reboot.h>
 #include <linux/file.h>
 #include <linux/compat.h>
@@ -332,18 +333,17 @@ static DEFINE_SPINLOCK(all_mddevs_lock);
  * call has finished, the bio has been linked into some internal structure
  * and so is visible to ->quiesce(), so we don't need the refcount any more.
  */
-static int md_make_request(struct request_queue *q, struct bio *bio)
+static void md_make_request(struct request_queue *q, struct bio *bio)
 {
        const int rw = bio_data_dir(bio);
        struct mddev *mddev = q->queuedata;
-       int rv;
        int cpu;
        unsigned int sectors;
 
        if (mddev == NULL || mddev->pers == NULL
            || !mddev->ready) {
                bio_io_error(bio);
-               return 0;
+               return;
        }
        smp_rmb(); /* Ensure implications of  'active' are visible */
        rcu_read_lock();
@@ -368,7 +368,7 @@ static int md_make_request(struct request_queue *q, struct bio *bio)
         * go away inside make_request
         */
        sectors = bio_sectors(bio);
-       rv = mddev->pers->make_request(mddev, bio);
+       mddev->pers->make_request(mddev, bio);
 
        cpu = part_stat_lock();
        part_stat_inc(cpu, &mddev->gendisk->part0, ios[rw]);
@@ -377,8 +377,6 @@ static int md_make_request(struct request_queue *q, struct bio *bio)
 
        if (atomic_dec_and_test(&mddev->active_io) && mddev->suspended)
                wake_up(&mddev->sb_wait);
-
-       return rv;
 }
 
 /* mddev_suspend makes sure no new requests are submitted
@@ -477,8 +475,7 @@ static void md_submit_flush_data(struct work_struct *ws)
                bio_endio(bio, 0);
        else {
                bio->bi_rw &= ~REQ_FLUSH;
-               if (mddev->pers->make_request(mddev, bio))
-                       generic_make_request(bio);
+               mddev->pers->make_request(mddev, bio);
        }
 
        mddev->flush_bio = NULL;
@@ -573,7 +570,7 @@ static void mddev_put(struct mddev *mddev)
            mddev->ctime == 0 && !mddev->hold_active) {
                /* Array is not configured at all, and not held active,
                 * so destroy it */
-               list_del(&mddev->all_mddevs);
+               list_del_init(&mddev->all_mddevs);
                bs = mddev->bio_set;
                mddev->bio_set = NULL;
                if (mddev->gendisk) {
@@ -1717,6 +1714,8 @@ static int super_1_validate(struct mddev *mddev, struct md_rdev *rdev)
                }
                if (sb->devflags & WriteMostly1)
                        set_bit(WriteMostly, &rdev->flags);
+               if (le32_to_cpu(sb->feature_map) & MD_FEATURE_REPLACEMENT)
+                       set_bit(Replacement, &rdev->flags);
        } else /* MULTIPATH are always insync */
                set_bit(In_sync, &rdev->flags);
 
@@ -1770,6 +1769,9 @@ static void super_1_sync(struct mddev *mddev, struct md_rdev *rdev)
                sb->recovery_offset =
                        cpu_to_le64(rdev->recovery_offset);
        }
+       if (test_bit(Replacement, &rdev->flags))
+               sb->feature_map |=
+                       cpu_to_le32(MD_FEATURE_REPLACEMENT);
 
        if (mddev->reshape_position != MaxSector) {
                sb->feature_map |= cpu_to_le32(MD_FEATURE_RESHAPE_ACTIVE);
@@ -2549,7 +2551,8 @@ state_show(struct md_rdev *rdev, char *page)
                sep = ",";
        }
        if (test_bit(Blocked, &rdev->flags) ||
-           rdev->badblocks.unacked_exist) {
+           (rdev->badblocks.unacked_exist
+            && !test_bit(Faulty, &rdev->flags))) {
                len += sprintf(page+len, "%sblocked", sep);
                sep = ",";
        }
@@ -2562,6 +2565,15 @@ state_show(struct md_rdev *rdev, char *page)
                len += sprintf(page+len, "%swrite_error", sep);
                sep = ",";
        }
+       if (test_bit(WantReplacement, &rdev->flags)) {
+               len += sprintf(page+len, "%swant_replacement", sep);
+               sep = ",";
+       }
+       if (test_bit(Replacement, &rdev->flags)) {
+               len += sprintf(page+len, "%sreplacement", sep);
+               sep = ",";
+       }
+
        return len+sprintf(page+len, "\n");
 }
 
@@ -2630,6 +2642,42 @@ state_store(struct md_rdev *rdev, const char *buf, size_t len)
        } else if (cmd_match(buf, "-write_error")) {
                clear_bit(WriteErrorSeen, &rdev->flags);
                err = 0;
+       } else if (cmd_match(buf, "want_replacement")) {
+               /* Any non-spare device that is not a replacement can
+                * become want_replacement at any time, but we then need to
+                * check if recovery is needed.
+                */
+               if (rdev->raid_disk >= 0 &&
+                   !test_bit(Replacement, &rdev->flags))
+                       set_bit(WantReplacement, &rdev->flags);
+               set_bit(MD_RECOVERY_NEEDED, &rdev->mddev->recovery);
+               md_wakeup_thread(rdev->mddev->thread);
+               err = 0;
+       } else if (cmd_match(buf, "-want_replacement")) {
+               /* Clearing 'want_replacement' is always allowed.
+                * Once replacements starts it is too late though.
+                */
+               err = 0;
+               clear_bit(WantReplacement, &rdev->flags);
+       } else if (cmd_match(buf, "replacement")) {
+               /* Can only set a device as a replacement when array has not
+                * yet been started.  Once running, replacement is automatic
+                * from spares, or by assigning 'slot'.
+                */
+               if (rdev->mddev->pers)
+                       err = -EBUSY;
+               else {
+                       set_bit(Replacement, &rdev->flags);
+                       err = 0;
+               }
+       } else if (cmd_match(buf, "-replacement")) {
+               /* Similarly, can only clear Replacement before start */
+               if (rdev->mddev->pers)
+                       err = -EBUSY;
+               else {
+                       clear_bit(Replacement, &rdev->flags);
+                       err = 0;
+               }
        }
        if (!err)
                sysfs_notify_dirent_safe(rdev->sysfs_state);
@@ -2691,7 +2739,7 @@ slot_store(struct md_rdev *rdev, const char *buf, size_t len)
                if (rdev->mddev->pers->hot_remove_disk == NULL)
                        return -EINVAL;
                err = rdev->mddev->pers->
-                       hot_remove_disk(rdev->mddev, rdev->raid_disk);
+                       hot_remove_disk(rdev->mddev, rdev);
                if (err)
                        return err;
                sysfs_unlink_rdev(rdev->mddev, rdev);
@@ -2699,7 +2747,6 @@ slot_store(struct md_rdev *rdev, const char *buf, size_t len)
                set_bit(MD_RECOVERY_NEEDED, &rdev->mddev->recovery);
                md_wakeup_thread(rdev->mddev->thread);
        } else if (rdev->mddev->pers) {
-               struct md_rdev *rdev2;
                /* Activating a spare .. or possibly reactivating
                 * if we ever get bitmaps working here.
                 */
@@ -2713,10 +2760,6 @@ slot_store(struct md_rdev *rdev, const char *buf, size_t len)
                if (rdev->mddev->pers->hot_add_disk == NULL)
                        return -EINVAL;
 
-               list_for_each_entry(rdev2, &rdev->mddev->disks, same_set)
-                       if (rdev2->raid_disk == slot)
-                               return -EEXIST;
-
                if (slot >= rdev->mddev->raid_disks &&
                    slot >= rdev->mddev->raid_disks + rdev->mddev->delta_disks)
                        return -ENOSPC;
@@ -3791,6 +3834,8 @@ array_state_store(struct mddev *mddev, const char *buf, size_t len)
        if (err)
                return err;
        else {
+               if (mddev->hold_active == UNTIL_IOCTL)
+                       mddev->hold_active = 0;
                sysfs_notify_dirent_safe(mddev->sysfs_state);
                return len;
        }
@@ -4490,11 +4535,20 @@ md_attr_show(struct kobject *kobj, struct attribute *attr, char *page)
 
        if (!entry->show)
                return -EIO;
+       spin_lock(&all_mddevs_lock);
+       if (list_empty(&mddev->all_mddevs)) {
+               spin_unlock(&all_mddevs_lock);
+               return -EBUSY;
+       }
+       mddev_get(mddev);
+       spin_unlock(&all_mddevs_lock);
+
        rv = mddev_lock(mddev);
        if (!rv) {
                rv = entry->show(mddev, page);
                mddev_unlock(mddev);
        }
+       mddev_put(mddev);
        return rv;
 }
 
@@ -4510,13 +4564,19 @@ md_attr_store(struct kobject *kobj, struct attribute *attr,
                return -EIO;
        if (!capable(CAP_SYS_ADMIN))
                return -EACCES;
+       spin_lock(&all_mddevs_lock);
+       if (list_empty(&mddev->all_mddevs)) {
+               spin_unlock(&all_mddevs_lock);
+               return -EBUSY;
+       }
+       mddev_get(mddev);
+       spin_unlock(&all_mddevs_lock);
        rv = mddev_lock(mddev);
-       if (mddev->hold_active == UNTIL_IOCTL)
-               mddev->hold_active = 0;
        if (!rv) {
                rv = entry->store(mddev, page, length);
                mddev_unlock(mddev);
        }
+       mddev_put(mddev);
        return rv;
 }
 
@@ -6039,8 +6099,15 @@ static int md_ioctl(struct block_device *bdev, fmode_t mode,
        struct mddev *mddev = NULL;
        int ro;
 
-       if (!capable(CAP_SYS_ADMIN))
-               return -EACCES;
+       switch (cmd) {
+       case RAID_VERSION:
+       case GET_ARRAY_INFO:
+       case GET_DISK_INFO:
+               break;
+       default:
+               if (!capable(CAP_SYS_ADMIN))
+                       return -EACCES;
+       }
 
        /*
         * Commands dealing with the RAID driver but not any
@@ -6700,8 +6767,11 @@ static int md_seq_show(struct seq_file *seq, void *v)
                        if (test_bit(Faulty, &rdev->flags)) {
                                seq_printf(seq, "(F)");
                                continue;
-                       } else if (rdev->raid_disk < 0)
+                       }
+                       if (rdev->raid_disk < 0)
                                seq_printf(seq, "(S)"); /* spare */
+                       if (test_bit(Replacement, &rdev->flags))
+                               seq_printf(seq, "(R)");
                        sectors += rdev->sectors;
                }
 
@@ -7313,6 +7383,7 @@ static int remove_and_add_spares(struct mddev *mddev)
 {
        struct md_rdev *rdev;
        int spares = 0;
+       int removed = 0;
 
        mddev->curr_resync_completed = 0;
 
@@ -7323,30 +7394,32 @@ static int remove_and_add_spares(struct mddev *mddev)
                     ! test_bit(In_sync, &rdev->flags)) &&
                    atomic_read(&rdev->nr_pending)==0) {
                        if (mddev->pers->hot_remove_disk(
-                                   mddev, rdev->raid_disk)==0) {
+                                   mddev, rdev) == 0) {
                                sysfs_unlink_rdev(mddev, rdev);
                                rdev->raid_disk = -1;
+                               removed++;
                        }
                }
+       if (removed)
+               sysfs_notify(&mddev->kobj, NULL,
+                            "degraded");
 
-       if (mddev->degraded) {
-               list_for_each_entry(rdev, &mddev->disks, same_set) {
-                       if (rdev->raid_disk >= 0 &&
-                           !test_bit(In_sync, &rdev->flags) &&
-                           !test_bit(Faulty, &rdev->flags))
+
+       list_for_each_entry(rdev, &mddev->disks, same_set) {
+               if (rdev->raid_disk >= 0 &&
+                   !test_bit(In_sync, &rdev->flags) &&
+                   !test_bit(Faulty, &rdev->flags))
+                       spares++;
+               if (rdev->raid_disk < 0
+                   && !test_bit(Faulty, &rdev->flags)) {
+                       rdev->recovery_offset = 0;
+                       if (mddev->pers->
+                           hot_add_disk(mddev, rdev) == 0) {
+                               if (sysfs_link_rdev(mddev, rdev))
+                                       /* failure here is OK */;
                                spares++;
-                       if (rdev->raid_disk < 0
-                           && !test_bit(Faulty, &rdev->flags)) {
-                               rdev->recovery_offset = 0;
-                               if (mddev->pers->
-                                   hot_add_disk(mddev, rdev) == 0) {
-                                       if (sysfs_link_rdev(mddev, rdev))
-                                               /* failure here is OK */;
-                                       spares++;
-                                       md_new_event(mddev);
-                                       set_bit(MD_CHANGE_DEVS, &mddev->flags);
-                               } else
-                                       break;
+                               md_new_event(mddev);
+                               set_bit(MD_CHANGE_DEVS, &mddev->flags);
                        }
                }
        }
@@ -7461,7 +7534,7 @@ void md_check_recovery(struct mddev *mddev)
                                    test_bit(Faulty, &rdev->flags) &&
                                    atomic_read(&rdev->nr_pending)==0) {
                                        if (mddev->pers->hot_remove_disk(
-                                                   mddev, rdev->raid_disk)==0) {
+                                                   mddev, rdev) == 0) {
                                                sysfs_unlink_rdev(mddev, rdev);
                                                rdev->raid_disk = -1;
                                        }
@@ -7843,6 +7916,7 @@ int rdev_set_badblocks(struct md_rdev *rdev, sector_t s, int sectors,
                                  s + rdev->data_offset, sectors, acknowledged);
        if (rv) {
                /* Make sure they get written out promptly */
+               sysfs_notify_dirent_safe(rdev->sysfs_state);
                set_bit(MD_CHANGE_CLEAN, &rdev->mddev->flags);
                md_wakeup_thread(rdev->mddev->thread);
        }