]> Pileus Git - ~andy/linux/blobdiff - drivers/md/raid10.c
Merge branch 'for-3.14/core' of git://git.kernel.dk/linux-block
[~andy/linux] / drivers / md / raid10.c
index 8d39d63281b9b5441b3ec8e524955356c8690871..33fc408e5eacef0a1dce55fd5c0d578fc244b663 100644 (file)
@@ -1152,14 +1152,12 @@ static void raid10_unplug(struct blk_plug_cb *cb, bool from_schedule)
        kfree(plug);
 }
 
-static void make_request(struct mddev *mddev, struct bio * bio)
+static void __make_request(struct mddev *mddev, struct bio *bio)
 {
        struct r10conf *conf = mddev->private;
        struct r10bio *r10_bio;
        struct bio *read_bio;
        int i;
-       sector_t chunk_mask = (conf->geo.chunk_mask & conf->prev.chunk_mask);
-       int chunk_sects = chunk_mask + 1;
        const int rw = bio_data_dir(bio);
        const unsigned long do_sync = (bio->bi_rw & REQ_SYNC);
        const unsigned long do_fua = (bio->bi_rw & REQ_FUA);
@@ -1174,88 +1172,27 @@ static void make_request(struct mddev *mddev, struct bio * bio)
        int max_sectors;
        int sectors;
 
-       if (unlikely(bio->bi_rw & REQ_FLUSH)) {
-               md_flush_request(mddev, bio);
-               return;
-       }
-
-       /* If this request crosses a chunk boundary, we need to
-        * split it.  This will only happen for 1 PAGE (or less) requests.
-        */
-       if (unlikely((bio->bi_sector & chunk_mask) + bio_sectors(bio)
-                    > chunk_sects
-                    && (conf->geo.near_copies < conf->geo.raid_disks
-                        || conf->prev.near_copies < conf->prev.raid_disks))) {
-               struct bio_pair *bp;
-               /* Sanity check -- queue functions should prevent this happening */
-               if (bio_segments(bio) > 1)
-                       goto bad_map;
-               /* This is a one page bio that upper layers
-                * refuse to split for us, so we need to split it.
-                */
-               bp = bio_split(bio,
-                              chunk_sects - (bio->bi_sector & (chunk_sects - 1)) );
-
-               /* Each of these 'make_request' calls will call 'wait_barrier'.
-                * If the first succeeds but the second blocks due to the resync
-                * thread raising the barrier, we will deadlock because the
-                * IO to the underlying device will be queued in generic_make_request
-                * and will never complete, so will never reduce nr_pending.
-                * So increment nr_waiting here so no new raise_barriers will
-                * succeed, and so the second wait_barrier cannot block.
-                */
-               spin_lock_irq(&conf->resync_lock);
-               conf->nr_waiting++;
-               spin_unlock_irq(&conf->resync_lock);
-
-               make_request(mddev, &bp->bio1);
-               make_request(mddev, &bp->bio2);
-
-               spin_lock_irq(&conf->resync_lock);
-               conf->nr_waiting--;
-               wake_up(&conf->wait_barrier);
-               spin_unlock_irq(&conf->resync_lock);
-
-               bio_pair_release(bp);
-               return;
-       bad_map:
-               printk("md/raid10:%s: make_request bug: can't convert block across chunks"
-                      " or bigger than %dk %llu %d\n", mdname(mddev), chunk_sects/2,
-                      (unsigned long long)bio->bi_sector, bio_sectors(bio) / 2);
-
-               bio_io_error(bio);
-               return;
-       }
-
-       md_write_start(mddev, bio);
-
-       /*
-        * Register the new request and wait if the reconstruction
-        * thread has put up a bar for new requests.
-        * Continue immediately if no resync is active currently.
-        */
-       wait_barrier(conf);
-
        sectors = bio_sectors(bio);
        while (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
-           bio->bi_sector < conf->reshape_progress &&
-           bio->bi_sector + sectors > conf->reshape_progress) {
+           bio->bi_iter.bi_sector < conf->reshape_progress &&
+           bio->bi_iter.bi_sector + sectors > conf->reshape_progress) {
                /* IO spans the reshape position.  Need to wait for
                 * reshape to pass
                 */
                allow_barrier(conf);
                wait_event(conf->wait_barrier,
-                          conf->reshape_progress <= bio->bi_sector ||
-                          conf->reshape_progress >= bio->bi_sector + sectors);
+                          conf->reshape_progress <= bio->bi_iter.bi_sector ||
+                          conf->reshape_progress >= bio->bi_iter.bi_sector +
+                          sectors);
                wait_barrier(conf);
        }
        if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
            bio_data_dir(bio) == WRITE &&
            (mddev->reshape_backwards
-            ? (bio->bi_sector < conf->reshape_safe &&
-               bio->bi_sector + sectors > conf->reshape_progress)
-            : (bio->bi_sector + sectors > conf->reshape_safe &&
-               bio->bi_sector < conf->reshape_progress))) {
+            ? (bio->bi_iter.bi_sector < conf->reshape_safe &&
+               bio->bi_iter.bi_sector + sectors > conf->reshape_progress)
+            : (bio->bi_iter.bi_sector + sectors > conf->reshape_safe &&
+               bio->bi_iter.bi_sector < conf->reshape_progress))) {
                /* Need to update reshape_position in metadata */
                mddev->reshape_position = conf->reshape_progress;
                set_bit(MD_CHANGE_DEVS, &mddev->flags);
@@ -1273,7 +1210,7 @@ static void make_request(struct mddev *mddev, struct bio * bio)
        r10_bio->sectors = sectors;
 
        r10_bio->mddev = mddev;
-       r10_bio->sector = bio->bi_sector;
+       r10_bio->sector = bio->bi_iter.bi_sector;
        r10_bio->state = 0;
 
        /* We might need to issue multiple reads to different
@@ -1302,13 +1239,13 @@ read_again:
                slot = r10_bio->read_slot;
 
                read_bio = bio_clone_mddev(bio, GFP_NOIO, mddev);
-               bio_trim(read_bio, r10_bio->sector - bio->bi_sector,
+               bio_trim(read_bio, r10_bio->sector - bio->bi_iter.bi_sector,
                         max_sectors);
 
                r10_bio->devs[slot].bio = read_bio;
                r10_bio->devs[slot].rdev = rdev;
 
-               read_bio->bi_sector = r10_bio->devs[slot].addr +
+               read_bio->bi_iter.bi_sector = r10_bio->devs[slot].addr +
                        choose_data_offset(r10_bio, rdev);
                read_bio->bi_bdev = rdev->bdev;
                read_bio->bi_end_io = raid10_end_read_request;
@@ -1320,7 +1257,7 @@ read_again:
                         * need another r10_bio.
                         */
                        sectors_handled = (r10_bio->sector + max_sectors
-                                          - bio->bi_sector);
+                                          - bio->bi_iter.bi_sector);
                        r10_bio->sectors = max_sectors;
                        spin_lock_irq(&conf->device_lock);
                        if (bio->bi_phys_segments == 0)
@@ -1341,7 +1278,8 @@ read_again:
                        r10_bio->sectors = bio_sectors(bio) - sectors_handled;
                        r10_bio->state = 0;
                        r10_bio->mddev = mddev;
-                       r10_bio->sector = bio->bi_sector + sectors_handled;
+                       r10_bio->sector = bio->bi_iter.bi_sector +
+                               sectors_handled;
                        goto read_again;
                } else
                        generic_make_request(read_bio);
@@ -1499,7 +1437,8 @@ retry_write:
                        bio->bi_phys_segments++;
                spin_unlock_irq(&conf->device_lock);
        }
-       sectors_handled = r10_bio->sector + max_sectors - bio->bi_sector;
+       sectors_handled = r10_bio->sector + max_sectors -
+               bio->bi_iter.bi_sector;
 
        atomic_set(&r10_bio->remaining, 1);
        bitmap_startwrite(mddev->bitmap, r10_bio->sector, r10_bio->sectors, 0);
@@ -1510,11 +1449,11 @@ retry_write:
                if (r10_bio->devs[i].bio) {
                        struct md_rdev *rdev = conf->mirrors[d].rdev;
                        mbio = bio_clone_mddev(bio, GFP_NOIO, mddev);
-                       bio_trim(mbio, r10_bio->sector - bio->bi_sector,
+                       bio_trim(mbio, r10_bio->sector - bio->bi_iter.bi_sector,
                                 max_sectors);
                        r10_bio->devs[i].bio = mbio;
 
-                       mbio->bi_sector = (r10_bio->devs[i].addr+
+                       mbio->bi_iter.bi_sector = (r10_bio->devs[i].addr+
                                           choose_data_offset(r10_bio,
                                                              rdev));
                        mbio->bi_bdev = rdev->bdev;
@@ -1553,11 +1492,11 @@ retry_write:
                                rdev = conf->mirrors[d].rdev;
                        }
                        mbio = bio_clone_mddev(bio, GFP_NOIO, mddev);
-                       bio_trim(mbio, r10_bio->sector - bio->bi_sector,
+                       bio_trim(mbio, r10_bio->sector - bio->bi_iter.bi_sector,
                                 max_sectors);
                        r10_bio->devs[i].repl_bio = mbio;
 
-                       mbio->bi_sector = (r10_bio->devs[i].addr +
+                       mbio->bi_iter.bi_sector = (r10_bio->devs[i].addr +
                                           choose_data_offset(
                                                   r10_bio, rdev));
                        mbio->bi_bdev = rdev->bdev;
@@ -1591,11 +1530,57 @@ retry_write:
                r10_bio->sectors = bio_sectors(bio) - sectors_handled;
 
                r10_bio->mddev = mddev;
-               r10_bio->sector = bio->bi_sector + sectors_handled;
+               r10_bio->sector = bio->bi_iter.bi_sector + sectors_handled;
                r10_bio->state = 0;
                goto retry_write;
        }
        one_write_done(r10_bio);
+}
+
+static void make_request(struct mddev *mddev, struct bio *bio)
+{
+       struct r10conf *conf = mddev->private;
+       sector_t chunk_mask = (conf->geo.chunk_mask & conf->prev.chunk_mask);
+       int chunk_sects = chunk_mask + 1;
+
+       struct bio *split;
+
+       if (unlikely(bio->bi_rw & REQ_FLUSH)) {
+               md_flush_request(mddev, bio);
+               return;
+       }
+
+       md_write_start(mddev, bio);
+
+       /*
+        * Register the new request and wait if the reconstruction
+        * thread has put up a bar for new requests.
+        * Continue immediately if no resync is active currently.
+        */
+       wait_barrier(conf);
+
+       do {
+
+               /*
+                * If this request crosses a chunk boundary, we need to split
+                * it.
+                */
+               if (unlikely((bio->bi_iter.bi_sector & chunk_mask) +
+                            bio_sectors(bio) > chunk_sects
+                            && (conf->geo.near_copies < conf->geo.raid_disks
+                                || conf->prev.near_copies <
+                                conf->prev.raid_disks))) {
+                       split = bio_split(bio, chunk_sects -
+                                         (bio->bi_iter.bi_sector &
+                                          (chunk_sects - 1)),
+                                         GFP_NOIO, fs_bio_set);
+                       bio_chain(split, bio);
+               } else {
+                       split = bio;
+               }
+
+               __make_request(mddev, split);
+       } while (split != bio);
 
        /* In case raid10d snuck in to freeze_array */
        wake_up(&conf->wait_barrier);
@@ -2124,10 +2109,10 @@ static void sync_request_write(struct mddev *mddev, struct r10bio *r10_bio)
                bio_reset(tbio);
 
                tbio->bi_vcnt = vcnt;
-               tbio->bi_size = r10_bio->sectors << 9;
+               tbio->bi_iter.bi_size = r10_bio->sectors << 9;
                tbio->bi_rw = WRITE;
                tbio->bi_private = r10_bio;
-               tbio->bi_sector = r10_bio->devs[i].addr;
+               tbio->bi_iter.bi_sector = r10_bio->devs[i].addr;
 
                for (j=0; j < vcnt ; j++) {
                        tbio->bi_io_vec[j].bv_offset = 0;
@@ -2144,7 +2129,7 @@ static void sync_request_write(struct mddev *mddev, struct r10bio *r10_bio)
                atomic_inc(&r10_bio->remaining);
                md_sync_acct(conf->mirrors[d].rdev->bdev, bio_sectors(tbio));
 
-               tbio->bi_sector += conf->mirrors[d].rdev->data_offset;
+               tbio->bi_iter.bi_sector += conf->mirrors[d].rdev->data_offset;
                tbio->bi_bdev = conf->mirrors[d].rdev->bdev;
                generic_make_request(tbio);
        }
@@ -2614,8 +2599,8 @@ static int narrow_write_error(struct r10bio *r10_bio, int i)
                        sectors = sect_to_write;
                /* Write at 'sector' for 'sectors' */
                wbio = bio_clone_mddev(bio, GFP_NOIO, mddev);
-               bio_trim(wbio, sector - bio->bi_sector, sectors);
-               wbio->bi_sector = (r10_bio->devs[i].addr+
+               bio_trim(wbio, sector - bio->bi_iter.bi_sector, sectors);
+               wbio->bi_iter.bi_sector = (r10_bio->devs[i].addr+
                                   choose_data_offset(r10_bio, rdev) +
                                   (sector - r10_bio->sector));
                wbio->bi_bdev = rdev->bdev;
@@ -2687,10 +2672,10 @@ read_more:
                (unsigned long long)r10_bio->sector);
        bio = bio_clone_mddev(r10_bio->master_bio,
                              GFP_NOIO, mddev);
-       bio_trim(bio, r10_bio->sector - bio->bi_sector, max_sectors);
+       bio_trim(bio, r10_bio->sector - bio->bi_iter.bi_sector, max_sectors);
        r10_bio->devs[slot].bio = bio;
        r10_bio->devs[slot].rdev = rdev;
-       bio->bi_sector = r10_bio->devs[slot].addr
+       bio->bi_iter.bi_sector = r10_bio->devs[slot].addr
                + choose_data_offset(r10_bio, rdev);
        bio->bi_bdev = rdev->bdev;
        bio->bi_rw = READ | do_sync;
@@ -2701,7 +2686,7 @@ read_more:
                struct bio *mbio = r10_bio->master_bio;
                int sectors_handled =
                        r10_bio->sector + max_sectors
-                       - mbio->bi_sector;
+                       - mbio->bi_iter.bi_sector;
                r10_bio->sectors = max_sectors;
                spin_lock_irq(&conf->device_lock);
                if (mbio->bi_phys_segments == 0)
@@ -2719,7 +2704,7 @@ read_more:
                set_bit(R10BIO_ReadError,
                        &r10_bio->state);
                r10_bio->mddev = mddev;
-               r10_bio->sector = mbio->bi_sector
+               r10_bio->sector = mbio->bi_iter.bi_sector
                        + sectors_handled;
 
                goto read_more;
@@ -3157,7 +3142,8 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr,
                                bio->bi_end_io = end_sync_read;
                                bio->bi_rw = READ;
                                from_addr = r10_bio->devs[j].addr;
-                               bio->bi_sector = from_addr + rdev->data_offset;
+                               bio->bi_iter.bi_sector = from_addr +
+                                       rdev->data_offset;
                                bio->bi_bdev = rdev->bdev;
                                atomic_inc(&rdev->nr_pending);
                                /* and we write to 'i' (if not in_sync) */
@@ -3181,7 +3167,7 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr,
                                        bio->bi_private = r10_bio;
                                        bio->bi_end_io = end_sync_write;
                                        bio->bi_rw = WRITE;
-                                       bio->bi_sector = to_addr
+                                       bio->bi_iter.bi_sector = to_addr
                                                + rdev->data_offset;
                                        bio->bi_bdev = rdev->bdev;
                                        atomic_inc(&r10_bio->remaining);
@@ -3210,7 +3196,8 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr,
                                bio->bi_private = r10_bio;
                                bio->bi_end_io = end_sync_write;
                                bio->bi_rw = WRITE;
-                               bio->bi_sector = to_addr + rdev->data_offset;
+                               bio->bi_iter.bi_sector = to_addr +
+                                       rdev->data_offset;
                                bio->bi_bdev = rdev->bdev;
                                atomic_inc(&r10_bio->remaining);
                                break;
@@ -3328,7 +3315,7 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr,
                        bio->bi_private = r10_bio;
                        bio->bi_end_io = end_sync_read;
                        bio->bi_rw = READ;
-                       bio->bi_sector = sector +
+                       bio->bi_iter.bi_sector = sector +
                                conf->mirrors[d].rdev->data_offset;
                        bio->bi_bdev = conf->mirrors[d].rdev->bdev;
                        count++;
@@ -3350,7 +3337,7 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr,
                        bio->bi_private = r10_bio;
                        bio->bi_end_io = end_sync_write;
                        bio->bi_rw = WRITE;
-                       bio->bi_sector = sector +
+                       bio->bi_iter.bi_sector = sector +
                                conf->mirrors[d].replacement->data_offset;
                        bio->bi_bdev = conf->mirrors[d].replacement->bdev;
                        count++;
@@ -3397,7 +3384,7 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr,
                             bio2 = bio2->bi_next) {
                                /* remove last page from this bio */
                                bio2->bi_vcnt--;
-                               bio2->bi_size -= len;
+                               bio2->bi_iter.bi_size -= len;
                                bio2->bi_flags &= ~(1<< BIO_SEG_VALID);
                        }
                        goto bio_full;
@@ -4418,7 +4405,7 @@ read_more:
        read_bio = bio_alloc_mddev(GFP_KERNEL, RESYNC_PAGES, mddev);
 
        read_bio->bi_bdev = rdev->bdev;
-       read_bio->bi_sector = (r10_bio->devs[r10_bio->read_slot].addr
+       read_bio->bi_iter.bi_sector = (r10_bio->devs[r10_bio->read_slot].addr
                               + rdev->data_offset);
        read_bio->bi_private = r10_bio;
        read_bio->bi_end_io = end_sync_read;
@@ -4426,7 +4413,7 @@ read_more:
        read_bio->bi_flags &= ~(BIO_POOL_MASK - 1);
        read_bio->bi_flags |= 1 << BIO_UPTODATE;
        read_bio->bi_vcnt = 0;
-       read_bio->bi_size = 0;
+       read_bio->bi_iter.bi_size = 0;
        r10_bio->master_bio = read_bio;
        r10_bio->read_slot = r10_bio->devs[r10_bio->read_slot].devnum;
 
@@ -4452,7 +4439,8 @@ read_more:
 
                bio_reset(b);
                b->bi_bdev = rdev2->bdev;
-               b->bi_sector = r10_bio->devs[s/2].addr + rdev2->new_data_offset;
+               b->bi_iter.bi_sector = r10_bio->devs[s/2].addr +
+                       rdev2->new_data_offset;
                b->bi_private = r10_bio;
                b->bi_end_io = end_reshape_write;
                b->bi_rw = WRITE;
@@ -4479,7 +4467,7 @@ read_more:
                             bio2 = bio2->bi_next) {
                                /* Remove last page from this bio */
                                bio2->bi_vcnt--;
-                               bio2->bi_size -= len;
+                               bio2->bi_iter.bi_size -= len;
                                bio2->bi_flags &= ~(1<<BIO_SEG_VALID);
                        }
                        goto bio_full;