#include <linux/hdreg.h>
#include <linux/proc_fs.h>
#include <linux/random.h>
+#include <linux/module.h>
#include <linux/reboot.h>
#include <linux/file.h>
#include <linux/compat.h>
* call has finished, the bio has been linked into some internal structure
* and so is visible to ->quiesce(), so we don't need the refcount any more.
*/
-static int md_make_request(struct request_queue *q, struct bio *bio)
+static void md_make_request(struct request_queue *q, struct bio *bio)
{
const int rw = bio_data_dir(bio);
struct mddev *mddev = q->queuedata;
- int rv;
int cpu;
unsigned int sectors;
if (mddev == NULL || mddev->pers == NULL
|| !mddev->ready) {
bio_io_error(bio);
- return 0;
+ return;
}
smp_rmb(); /* Ensure implications of 'active' are visible */
rcu_read_lock();
* go away inside make_request
*/
sectors = bio_sectors(bio);
- rv = mddev->pers->make_request(mddev, bio);
+ mddev->pers->make_request(mddev, bio);
cpu = part_stat_lock();
part_stat_inc(cpu, &mddev->gendisk->part0, ios[rw]);
if (atomic_dec_and_test(&mddev->active_io) && mddev->suspended)
wake_up(&mddev->sb_wait);
-
- return rv;
}
/* mddev_suspend makes sure no new requests are submitted
bio_endio(bio, 0);
else {
bio->bi_rw &= ~REQ_FLUSH;
- if (mddev->pers->make_request(mddev, bio))
- generic_make_request(bio);
+ mddev->pers->make_request(mddev, bio);
}
mddev->flush_bio = NULL;
mddev->ctime == 0 && !mddev->hold_active) {
/* Array is not configured at all, and not held active,
* so destroy it */
- list_del(&mddev->all_mddevs);
+ list_del_init(&mddev->all_mddevs);
bs = mddev->bio_set;
mddev->bio_set = NULL;
if (mddev->gendisk) {
}
if (sb->devflags & WriteMostly1)
set_bit(WriteMostly, &rdev->flags);
+ if (le32_to_cpu(sb->feature_map) & MD_FEATURE_REPLACEMENT)
+ set_bit(Replacement, &rdev->flags);
} else /* MULTIPATH are always insync */
set_bit(In_sync, &rdev->flags);
sb->recovery_offset =
cpu_to_le64(rdev->recovery_offset);
}
+ if (test_bit(Replacement, &rdev->flags))
+ sb->feature_map |=
+ cpu_to_le32(MD_FEATURE_REPLACEMENT);
if (mddev->reshape_position != MaxSector) {
sb->feature_map |= cpu_to_le32(MD_FEATURE_RESHAPE_ACTIVE);
sep = ",";
}
if (test_bit(Blocked, &rdev->flags) ||
- rdev->badblocks.unacked_exist) {
+ (rdev->badblocks.unacked_exist
+ && !test_bit(Faulty, &rdev->flags))) {
len += sprintf(page+len, "%sblocked", sep);
sep = ",";
}
len += sprintf(page+len, "%swrite_error", sep);
sep = ",";
}
+ if (test_bit(WantReplacement, &rdev->flags)) {
+ len += sprintf(page+len, "%swant_replacement", sep);
+ sep = ",";
+ }
+ if (test_bit(Replacement, &rdev->flags)) {
+ len += sprintf(page+len, "%sreplacement", sep);
+ sep = ",";
+ }
+
return len+sprintf(page+len, "\n");
}
} else if (cmd_match(buf, "-write_error")) {
clear_bit(WriteErrorSeen, &rdev->flags);
err = 0;
+ } else if (cmd_match(buf, "want_replacement")) {
+ /* Any non-spare device that is not a replacement can
+ * become want_replacement at any time, but we then need to
+ * check if recovery is needed.
+ */
+ if (rdev->raid_disk >= 0 &&
+ !test_bit(Replacement, &rdev->flags))
+ set_bit(WantReplacement, &rdev->flags);
+ set_bit(MD_RECOVERY_NEEDED, &rdev->mddev->recovery);
+ md_wakeup_thread(rdev->mddev->thread);
+ err = 0;
+ } else if (cmd_match(buf, "-want_replacement")) {
+ /* Clearing 'want_replacement' is always allowed.
+ * Once replacements starts it is too late though.
+ */
+ err = 0;
+ clear_bit(WantReplacement, &rdev->flags);
+ } else if (cmd_match(buf, "replacement")) {
+ /* Can only set a device as a replacement when array has not
+ * yet been started. Once running, replacement is automatic
+ * from spares, or by assigning 'slot'.
+ */
+ if (rdev->mddev->pers)
+ err = -EBUSY;
+ else {
+ set_bit(Replacement, &rdev->flags);
+ err = 0;
+ }
+ } else if (cmd_match(buf, "-replacement")) {
+ /* Similarly, can only clear Replacement before start */
+ if (rdev->mddev->pers)
+ err = -EBUSY;
+ else {
+ clear_bit(Replacement, &rdev->flags);
+ err = 0;
+ }
}
if (!err)
sysfs_notify_dirent_safe(rdev->sysfs_state);
if (rdev->mddev->pers->hot_remove_disk == NULL)
return -EINVAL;
err = rdev->mddev->pers->
- hot_remove_disk(rdev->mddev, rdev->raid_disk);
+ hot_remove_disk(rdev->mddev, rdev);
if (err)
return err;
sysfs_unlink_rdev(rdev->mddev, rdev);
set_bit(MD_RECOVERY_NEEDED, &rdev->mddev->recovery);
md_wakeup_thread(rdev->mddev->thread);
} else if (rdev->mddev->pers) {
- struct md_rdev *rdev2;
/* Activating a spare .. or possibly reactivating
* if we ever get bitmaps working here.
*/
if (rdev->mddev->pers->hot_add_disk == NULL)
return -EINVAL;
- list_for_each_entry(rdev2, &rdev->mddev->disks, same_set)
- if (rdev2->raid_disk == slot)
- return -EEXIST;
-
if (slot >= rdev->mddev->raid_disks &&
slot >= rdev->mddev->raid_disks + rdev->mddev->delta_disks)
return -ENOSPC;
if (err)
return err;
else {
+ if (mddev->hold_active == UNTIL_IOCTL)
+ mddev->hold_active = 0;
sysfs_notify_dirent_safe(mddev->sysfs_state);
return len;
}
if (!entry->show)
return -EIO;
+ spin_lock(&all_mddevs_lock);
+ if (list_empty(&mddev->all_mddevs)) {
+ spin_unlock(&all_mddevs_lock);
+ return -EBUSY;
+ }
+ mddev_get(mddev);
+ spin_unlock(&all_mddevs_lock);
+
rv = mddev_lock(mddev);
if (!rv) {
rv = entry->show(mddev, page);
mddev_unlock(mddev);
}
+ mddev_put(mddev);
return rv;
}
return -EIO;
if (!capable(CAP_SYS_ADMIN))
return -EACCES;
+ spin_lock(&all_mddevs_lock);
+ if (list_empty(&mddev->all_mddevs)) {
+ spin_unlock(&all_mddevs_lock);
+ return -EBUSY;
+ }
+ mddev_get(mddev);
+ spin_unlock(&all_mddevs_lock);
rv = mddev_lock(mddev);
- if (mddev->hold_active == UNTIL_IOCTL)
- mddev->hold_active = 0;
if (!rv) {
rv = entry->store(mddev, page, length);
mddev_unlock(mddev);
}
+ mddev_put(mddev);
return rv;
}
struct mddev *mddev = NULL;
int ro;
- if (!capable(CAP_SYS_ADMIN))
- return -EACCES;
+ switch (cmd) {
+ case RAID_VERSION:
+ case GET_ARRAY_INFO:
+ case GET_DISK_INFO:
+ break;
+ default:
+ if (!capable(CAP_SYS_ADMIN))
+ return -EACCES;
+ }
/*
* Commands dealing with the RAID driver but not any
if (test_bit(Faulty, &rdev->flags)) {
seq_printf(seq, "(F)");
continue;
- } else if (rdev->raid_disk < 0)
+ }
+ if (rdev->raid_disk < 0)
seq_printf(seq, "(S)"); /* spare */
+ if (test_bit(Replacement, &rdev->flags))
+ seq_printf(seq, "(R)");
sectors += rdev->sectors;
}
{
struct md_rdev *rdev;
int spares = 0;
+ int removed = 0;
mddev->curr_resync_completed = 0;
! test_bit(In_sync, &rdev->flags)) &&
atomic_read(&rdev->nr_pending)==0) {
if (mddev->pers->hot_remove_disk(
- mddev, rdev->raid_disk)==0) {
+ mddev, rdev) == 0) {
sysfs_unlink_rdev(mddev, rdev);
rdev->raid_disk = -1;
+ removed++;
}
}
+ if (removed)
+ sysfs_notify(&mddev->kobj, NULL,
+ "degraded");
- if (mddev->degraded) {
- list_for_each_entry(rdev, &mddev->disks, same_set) {
- if (rdev->raid_disk >= 0 &&
- !test_bit(In_sync, &rdev->flags) &&
- !test_bit(Faulty, &rdev->flags))
+
+ list_for_each_entry(rdev, &mddev->disks, same_set) {
+ if (rdev->raid_disk >= 0 &&
+ !test_bit(In_sync, &rdev->flags) &&
+ !test_bit(Faulty, &rdev->flags))
+ spares++;
+ if (rdev->raid_disk < 0
+ && !test_bit(Faulty, &rdev->flags)) {
+ rdev->recovery_offset = 0;
+ if (mddev->pers->
+ hot_add_disk(mddev, rdev) == 0) {
+ if (sysfs_link_rdev(mddev, rdev))
+ /* failure here is OK */;
spares++;
- if (rdev->raid_disk < 0
- && !test_bit(Faulty, &rdev->flags)) {
- rdev->recovery_offset = 0;
- if (mddev->pers->
- hot_add_disk(mddev, rdev) == 0) {
- if (sysfs_link_rdev(mddev, rdev))
- /* failure here is OK */;
- spares++;
- md_new_event(mddev);
- set_bit(MD_CHANGE_DEVS, &mddev->flags);
- } else
- break;
+ md_new_event(mddev);
+ set_bit(MD_CHANGE_DEVS, &mddev->flags);
}
}
}
test_bit(Faulty, &rdev->flags) &&
atomic_read(&rdev->nr_pending)==0) {
if (mddev->pers->hot_remove_disk(
- mddev, rdev->raid_disk)==0) {
+ mddev, rdev) == 0) {
sysfs_unlink_rdev(mddev, rdev);
rdev->raid_disk = -1;
}
s + rdev->data_offset, sectors, acknowledged);
if (rv) {
/* Make sure they get written out promptly */
+ sysfs_notify_dirent_safe(rdev->sysfs_state);
set_bit(MD_CHANGE_CLEAN, &rdev->mddev->flags);
md_wakeup_thread(rdev->mddev->thread);
}