static void * r1buf_pool_alloc(gfp_t gfp_flags, void *data)
{
struct pool_info *pi = data;
- struct page *page;
struct r1bio *r1_bio;
struct bio *bio;
int i, j;
j = 1;
while(j--) {
bio = r1_bio->bios[j];
- for (i = 0; i < RESYNC_PAGES; i++) {
- page = alloc_page(gfp_flags);
- if (unlikely(!page))
- goto out_free_pages;
+ bio->bi_vcnt = RESYNC_PAGES;
- bio->bi_io_vec[i].bv_page = page;
- bio->bi_vcnt = i+1;
- }
+ if (bio_alloc_pages(bio, gfp_flags))
+ goto out_free_bio;
}
/* If not user-requests, copy the page pointers to all bios */
if (!test_bit(MD_RECOVERY_REQUESTED, &pi->mddev->recovery)) {
return r1_bio;
-out_free_pages:
- for (j=0 ; j < pi->raid_disks; j++)
- for (i=0; i < r1_bio->bios[j]->bi_vcnt ; i++)
- put_page(r1_bio->bios[j]->bi_io_vec[i].bv_page);
- j = -1;
out_free_bio:
while (++j < pi->raid_disks)
bio_put(r1_bio->bios[j]);
(bio_data_dir(bio) == WRITE) ? "write" : "read",
(unsigned long long) bio->bi_sector,
(unsigned long long) bio->bi_sector +
- (bio->bi_size >> 9) - 1);
+ bio_sectors(bio) - 1);
call_bio_endio(r1_bio);
}
r1_bio->bios[mirror] = NULL;
to_put = bio;
- set_bit(R1BIO_Uptodate, &r1_bio->state);
+ /*
+ * Do not set R1BIO_Uptodate if the current device is
+ * rebuilding or Faulty. This is because we cannot use
+ * such device for properly reading the data back (we could
+ * potentially use it, if the current write would have felt
+ * before rdev->recovery_offset, but for simplicity we don't
+ * check this here.
+ */
+ if (test_bit(In_sync, &conf->mirrors[mirror].rdev->flags) &&
+ !test_bit(Faulty, &conf->mirrors[mirror].rdev->flags))
+ set_bit(R1BIO_Uptodate, &r1_bio->state);
/* Maybe we can clear some bad blocks. */
if (is_badblock(conf->mirrors[mirror].rdev,
" %llu-%llu\n",
(unsigned long long) mbio->bi_sector,
(unsigned long long) mbio->bi_sector +
- (mbio->bi_size >> 9) - 1);
+ bio_sectors(mbio) - 1);
call_bio_endio(r1_bio);
}
}
wake_up(&conf->wait_barrier);
}
-static void freeze_array(struct r1conf *conf)
+static void freeze_array(struct r1conf *conf, int extra)
{
/* stop syncio and normal IO and wait for everything to
* go quite.
* We increment barrier and nr_waiting, and then
- * wait until nr_pending match nr_queued+1
+ * wait until nr_pending match nr_queued+extra
* This is called in the context of one normal IO request
* that has failed. Thus any sync request that might be pending
* will be blocked by nr_pending, and we need to wait for
* pending IO requests to complete or be queued for re-try.
- * Thus the number queued (nr_queued) plus this request (1)
+ * Thus the number queued (nr_queued) plus this request (extra)
* must match the number of pending IOs (nr_pending) before
* we continue.
*/
conf->barrier++;
conf->nr_waiting++;
wait_event_lock_irq_cmd(conf->wait_barrier,
- conf->nr_pending == conf->nr_queued+1,
+ conf->nr_pending == conf->nr_queued+extra,
conf->resync_lock,
flush_pending_writes(conf));
spin_unlock_irq(&conf->resync_lock);
if (unlikely(!bvecs))
return;
- bio_for_each_segment(bvec, bio, i) {
+ bio_for_each_segment_all(bvec, bio, i) {
bvecs[i] = *bvec;
bvecs[i].bv_page = alloc_page(GFP_NOIO);
if (unlikely(!bvecs[i].bv_page))
md_write_start(mddev, bio); /* wait on superblock update early */
if (bio_data_dir(bio) == WRITE &&
- bio->bi_sector + bio->bi_size/512 > mddev->suspend_lo &&
+ bio_end_sector(bio) > mddev->suspend_lo &&
bio->bi_sector < mddev->suspend_hi) {
/* As the suspend_* range is controlled by
* userspace, we want an interruptible
flush_signals(current);
prepare_to_wait(&conf->wait_barrier,
&w, TASK_INTERRUPTIBLE);
- if (bio->bi_sector + bio->bi_size/512 <= mddev->suspend_lo ||
+ if (bio_end_sector(bio) <= mddev->suspend_lo ||
bio->bi_sector >= mddev->suspend_hi)
break;
schedule();
r1_bio = mempool_alloc(conf->r1bio_pool, GFP_NOIO);
r1_bio->master_bio = bio;
- r1_bio->sectors = bio->bi_size >> 9;
+ r1_bio->sectors = bio_sectors(bio);
r1_bio->state = 0;
r1_bio->mddev = mddev;
r1_bio->sector = bio->bi_sector;
r1_bio = mempool_alloc(conf->r1bio_pool, GFP_NOIO);
r1_bio->master_bio = bio;
- r1_bio->sectors = (bio->bi_size >> 9) - sectors_handled;
+ r1_bio->sectors = bio_sectors(bio) - sectors_handled;
r1_bio->state = 0;
r1_bio->mddev = mddev;
r1_bio->sector = bio->bi_sector + sectors_handled;
struct bio_vec *bvec;
int j;
- /* Yes, I really want the '__' version so that
- * we clear any unused pointer in the io_vec, rather
- * than leave them unchanged. This is important
- * because when we come to free the pages, we won't
- * know the original bi_idx, so we just free
- * them all
+ /*
+ * We trimmed the bio, so _all is legit
*/
- __bio_for_each_segment(bvec, mbio, j, 0)
+ bio_for_each_segment_all(bvec, mbio, j)
bvec->bv_page = r1_bio->behind_bvecs[j].bv_page;
if (test_bit(WriteMostly, &conf->mirrors[i].rdev->flags))
atomic_inc(&r1_bio->behind_remaining);
/* Mustn't call r1_bio_write_done before this next test,
* as it could result in the bio being freed.
*/
- if (sectors_handled < (bio->bi_size >> 9)) {
+ if (sectors_handled < bio_sectors(bio)) {
r1_bio_write_done(r1_bio);
/* We need another r1_bio. It has already been counted
* in bio->bi_phys_segments
*/
r1_bio = mempool_alloc(conf->r1bio_pool, GFP_NOIO);
r1_bio->master_bio = bio;
- r1_bio->sectors = (bio->bi_size >> 9) - sectors_handled;
+ r1_bio->sectors = bio_sectors(bio) - sectors_handled;
r1_bio->state = 0;
r1_bio->mddev = mddev;
r1_bio->sector = bio->bi_sector + sectors_handled;
* we wait for all outstanding requests to complete.
*/
synchronize_sched();
- raise_barrier(conf);
- lower_barrier(conf);
+ freeze_array(conf, 0);
+ unfreeze_array(conf);
clear_bit(Unmerged, &rdev->flags);
}
md_integrity_add_rdev(rdev, mddev);
*/
struct md_rdev *repl =
conf->mirrors[conf->raid_disks + number].rdev;
- raise_barrier(conf);
+ freeze_array(conf, 0);
clear_bit(Replacement, &repl->flags);
p->rdev = repl;
conf->mirrors[conf->raid_disks + number].rdev = NULL;
- lower_barrier(conf);
+ unfreeze_array(conf);
clear_bit(WantReplacement, &rdev->flags);
} else
clear_bit(WantReplacement, &rdev->flags);
struct bio *sbio = r1_bio->bios[i];
int size;
- if (r1_bio->bios[i]->bi_end_io != end_sync_read)
+ if (sbio->bi_end_io != end_sync_read)
continue;
if (test_bit(BIO_UPTODATE, &sbio->bi_flags)) {
continue;
}
/* fixup the bio for reuse */
+ bio_reset(sbio);
sbio->bi_vcnt = vcnt;
sbio->bi_size = r1_bio->sectors << 9;
- sbio->bi_idx = 0;
- sbio->bi_phys_segments = 0;
- sbio->bi_flags &= ~(BIO_POOL_MASK - 1);
- sbio->bi_flags |= 1 << BIO_UPTODATE;
- sbio->bi_next = NULL;
sbio->bi_sector = r1_bio->sector +
conf->mirrors[i].rdev->data_offset;
sbio->bi_bdev = conf->mirrors[i].rdev->bdev;
+ sbio->bi_end_io = end_sync_read;
+ sbio->bi_private = r1_bio;
+
size = sbio->bi_size;
for (j = 0; j < vcnt ; j++) {
struct bio_vec *bi;
else
bi->bv_len = size;
size -= PAGE_SIZE;
- memcpy(page_address(bi->bv_page),
- page_address(pbio->bi_io_vec[j].bv_page),
- PAGE_SIZE);
}
+
+ bio_copy_data(sbio, pbio);
}
return 0;
}
wbio->bi_rw = WRITE;
wbio->bi_end_io = end_sync_write;
atomic_inc(&r1_bio->remaining);
- md_sync_acct(conf->mirrors[i].rdev->bdev, wbio->bi_size >> 9);
+ md_sync_acct(conf->mirrors[i].rdev->bdev, bio_sectors(wbio));
generic_make_request(wbio);
}
}
}
-static void bi_complete(struct bio *bio, int error)
-{
- complete((struct completion *)bio->bi_private);
-}
-
-static int submit_bio_wait(int rw, struct bio *bio)
-{
- struct completion event;
- rw |= REQ_SYNC;
-
- init_completion(&event);
- bio->bi_private = &event;
- bio->bi_end_io = bi_complete;
- submit_bio(rw, bio);
- wait_for_completion(&event);
-
- return test_bit(BIO_UPTODATE, &bio->bi_flags);
-}
-
static int narrow_write_error(struct r1bio *r1_bio, int i)
{
struct mddev *mddev = r1_bio->mddev;
struct r1conf *conf = mddev->private;
struct md_rdev *rdev = conf->mirrors[i].rdev;
- int vcnt, idx;
- struct bio_vec *vec;
/* bio has the data to be written to device 'i' where
* we just recently had a write error.
& ~(sector_t)(block_sectors - 1))
- sector;
- if (test_bit(R1BIO_BehindIO, &r1_bio->state)) {
- vcnt = r1_bio->behind_page_count;
- vec = r1_bio->behind_bvecs;
- idx = 0;
- while (vec[idx].bv_page == NULL)
- idx++;
- } else {
- vcnt = r1_bio->master_bio->bi_vcnt;
- vec = r1_bio->master_bio->bi_io_vec;
- idx = r1_bio->master_bio->bi_idx;
- }
while (sect_to_write) {
struct bio *wbio;
if (sectors > sect_to_write)
sectors = sect_to_write;
/* Write at 'sector' for 'sectors'*/
- wbio = bio_alloc_mddev(GFP_NOIO, vcnt, mddev);
- memcpy(wbio->bi_io_vec, vec, vcnt * sizeof(struct bio_vec));
- wbio->bi_sector = r1_bio->sector;
+ if (test_bit(R1BIO_BehindIO, &r1_bio->state)) {
+ unsigned vcnt = r1_bio->behind_page_count;
+ struct bio_vec *vec = r1_bio->behind_bvecs;
+
+ while (!vec->bv_page) {
+ vec++;
+ vcnt--;
+ }
+
+ wbio = bio_alloc_mddev(GFP_NOIO, vcnt, mddev);
+ memcpy(wbio->bi_io_vec, vec, vcnt * sizeof(struct bio_vec));
+
+ wbio->bi_vcnt = vcnt;
+ } else {
+ wbio = bio_clone_mddev(r1_bio->master_bio, GFP_NOIO, mddev);
+ }
+
wbio->bi_rw = WRITE;
- wbio->bi_vcnt = vcnt;
+ wbio->bi_sector = r1_bio->sector;
wbio->bi_size = r1_bio->sectors << 9;
- wbio->bi_idx = idx;
md_trim_bio(wbio, sector - r1_bio->sector, sectors);
wbio->bi_sector += rdev->data_offset;
* frozen
*/
if (mddev->ro == 0) {
- freeze_array(conf);
+ freeze_array(conf, 1);
fix_read_error(conf, r1_bio->read_disk,
r1_bio->sector, r1_bio->sectors);
unfreeze_array(conf);
r1_bio = mempool_alloc(conf->r1bio_pool, GFP_NOIO);
r1_bio->master_bio = mbio;
- r1_bio->sectors = (mbio->bi_size >> 9)
- - sectors_handled;
+ r1_bio->sectors = bio_sectors(mbio) - sectors_handled;
r1_bio->state = 0;
set_bit(R1BIO_ReadError, &r1_bio->state);
r1_bio->mddev = mddev;
for (i = 0; i < conf->raid_disks * 2; i++) {
struct md_rdev *rdev;
bio = r1_bio->bios[i];
-
- /* take from bio_init */
- bio->bi_next = NULL;
- bio->bi_flags &= ~(BIO_POOL_MASK-1);
- bio->bi_flags |= 1 << BIO_UPTODATE;
- bio->bi_rw = READ;
- bio->bi_vcnt = 0;
- bio->bi_idx = 0;
- bio->bi_phys_segments = 0;
- bio->bi_size = 0;
- bio->bi_end_io = NULL;
- bio->bi_private = NULL;
+ bio_reset(bio);
rdev = rcu_dereference(conf->mirrors[i].rdev);
if (rdev == NULL ||
return PTR_ERR(conf);
if (mddev->queue)
- blk_queue_max_write_same_sectors(mddev->queue,
- mddev->chunk_sectors);
+ blk_queue_max_write_same_sectors(mddev->queue, 0);
+
rdev_for_each(rdev, mddev) {
if (!mddev->gendisk)
continue;
return -ENOMEM;
}
- raise_barrier(conf);
+ freeze_array(conf, 0);
/* ok, everything is stopped */
oldpool = conf->r1bio_pool;
conf->raid_disks = mddev->raid_disks = raid_disks;
mddev->delta_disks = 0;
- lower_barrier(conf);
+ unfreeze_array(conf);
set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
md_wakeup_thread(mddev->thread);