]> Pileus Git - ~andy/linux/blobdiff - drivers/md/raid1.c
ipv4: fix wildcard search with inet_confirm_addr()
[~andy/linux] / drivers / md / raid1.c
index af6681b19776d2f695030452a9d6ab42821ed0df..1e5a540995e932852df5ff484a96bfcc8636a432 100644 (file)
@@ -66,7 +66,8 @@
  */
 static int max_queued_requests = 1024;
 
-static void allow_barrier(struct r1conf *conf);
+static void allow_barrier(struct r1conf *conf, sector_t start_next_window,
+                         sector_t bi_sector);
 static void lower_barrier(struct r1conf *conf);
 
 static void * r1bio_pool_alloc(gfp_t gfp_flags, void *data)
@@ -84,10 +85,12 @@ static void r1bio_pool_free(void *r1_bio, void *data)
 }
 
 #define RESYNC_BLOCK_SIZE (64*1024)
-//#define RESYNC_BLOCK_SIZE PAGE_SIZE
+#define RESYNC_DEPTH 32
 #define RESYNC_SECTORS (RESYNC_BLOCK_SIZE >> 9)
 #define RESYNC_PAGES ((RESYNC_BLOCK_SIZE + PAGE_SIZE-1) / PAGE_SIZE)
-#define RESYNC_WINDOW (2048*1024)
+#define RESYNC_WINDOW (RESYNC_BLOCK_SIZE * RESYNC_DEPTH)
+#define RESYNC_WINDOW_SECTORS (RESYNC_WINDOW >> 9)
+#define NEXT_NORMALIO_DISTANCE (3 * RESYNC_WINDOW_SECTORS)
 
 static void * r1buf_pool_alloc(gfp_t gfp_flags, void *data)
 {
@@ -225,6 +228,8 @@ static void call_bio_endio(struct r1bio *r1_bio)
        struct bio *bio = r1_bio->master_bio;
        int done;
        struct r1conf *conf = r1_bio->mddev->private;
+       sector_t start_next_window = r1_bio->start_next_window;
+       sector_t bi_sector = bio->bi_sector;
 
        if (bio->bi_phys_segments) {
                unsigned long flags;
@@ -232,6 +237,11 @@ static void call_bio_endio(struct r1bio *r1_bio)
                bio->bi_phys_segments--;
                done = (bio->bi_phys_segments == 0);
                spin_unlock_irqrestore(&conf->device_lock, flags);
+               /*
+                * make_request() might be waiting for
+                * bi_phys_segments to decrease
+                */
+               wake_up(&conf->wait_barrier);
        } else
                done = 1;
 
@@ -243,7 +253,7 @@ static void call_bio_endio(struct r1bio *r1_bio)
                 * Wake up any possible resync thread that waits for the device
                 * to go idle.
                 */
-               allow_barrier(conf);
+               allow_barrier(conf, start_next_window, bi_sector);
        }
 }
 
@@ -814,8 +824,6 @@ static void flush_pending_writes(struct r1conf *conf)
  *    there is no normal IO happeing.  It must arrange to call
  *    lower_barrier when the particular background IO completes.
  */
-#define RESYNC_DEPTH 32
-
 static void raise_barrier(struct r1conf *conf)
 {
        spin_lock_irq(&conf->resync_lock);
@@ -827,9 +835,19 @@ static void raise_barrier(struct r1conf *conf)
        /* block any new IO from starting */
        conf->barrier++;
 
-       /* Now wait for all pending IO to complete */
+       /* For these conditions we must wait:
+        * A: while the array is in frozen state
+        * B: while barrier >= RESYNC_DEPTH, meaning resync reach
+        *    the max count which allowed.
+        * C: next_resync + RESYNC_SECTORS > start_next_window, meaning
+        *    next resync will reach to the window which normal bios are
+        *    handling.
+        */
        wait_event_lock_irq(conf->wait_barrier,
-                           !conf->nr_pending && conf->barrier < RESYNC_DEPTH,
+                           !conf->array_frozen &&
+                           conf->barrier < RESYNC_DEPTH &&
+                           (conf->start_next_window >=
+                            conf->next_resync + RESYNC_SECTORS),
                            conf->resync_lock);
 
        spin_unlock_irq(&conf->resync_lock);
@@ -845,10 +863,33 @@ static void lower_barrier(struct r1conf *conf)
        wake_up(&conf->wait_barrier);
 }
 
-static void wait_barrier(struct r1conf *conf)
+static bool need_to_wait_for_sync(struct r1conf *conf, struct bio *bio)
 {
+       bool wait = false;
+
+       if (conf->array_frozen || !bio)
+               wait = true;
+       else if (conf->barrier && bio_data_dir(bio) == WRITE) {
+               if (conf->next_resync < RESYNC_WINDOW_SECTORS)
+                       wait = true;
+               else if ((conf->next_resync - RESYNC_WINDOW_SECTORS
+                               >= bio_end_sector(bio)) ||
+                        (conf->next_resync + NEXT_NORMALIO_DISTANCE
+                               <= bio->bi_sector))
+                       wait = false;
+               else
+                       wait = true;
+       }
+
+       return wait;
+}
+
+static sector_t wait_barrier(struct r1conf *conf, struct bio *bio)
+{
+       sector_t sector = 0;
+
        spin_lock_irq(&conf->resync_lock);
-       if (conf->barrier) {
+       if (need_to_wait_for_sync(conf, bio)) {
                conf->nr_waiting++;
                /* Wait for the barrier to drop.
                 * However if there are already pending
@@ -860,22 +901,67 @@ static void wait_barrier(struct r1conf *conf)
                 * count down.
                 */
                wait_event_lock_irq(conf->wait_barrier,
-                                   !conf->barrier ||
-                                   (conf->nr_pending &&
+                                   !conf->array_frozen &&
+                                   (!conf->barrier ||
+                                   ((conf->start_next_window <
+                                     conf->next_resync + RESYNC_SECTORS) &&
                                     current->bio_list &&
-                                    !bio_list_empty(current->bio_list)),
+                                    !bio_list_empty(current->bio_list))),
                                    conf->resync_lock);
                conf->nr_waiting--;
        }
+
+       if (bio && bio_data_dir(bio) == WRITE) {
+               if (conf->next_resync + NEXT_NORMALIO_DISTANCE
+                   <= bio->bi_sector) {
+                       if (conf->start_next_window == MaxSector)
+                               conf->start_next_window =
+                                       conf->next_resync +
+                                       NEXT_NORMALIO_DISTANCE;
+
+                       if ((conf->start_next_window + NEXT_NORMALIO_DISTANCE)
+                           <= bio->bi_sector)
+                               conf->next_window_requests++;
+                       else
+                               conf->current_window_requests++;
+               }
+               if (bio->bi_sector >= conf->start_next_window)
+                       sector = conf->start_next_window;
+       }
+
        conf->nr_pending++;
        spin_unlock_irq(&conf->resync_lock);
+       return sector;
 }
 
-static void allow_barrier(struct r1conf *conf)
+static void allow_barrier(struct r1conf *conf, sector_t start_next_window,
+                         sector_t bi_sector)
 {
        unsigned long flags;
+
        spin_lock_irqsave(&conf->resync_lock, flags);
        conf->nr_pending--;
+       if (start_next_window) {
+               if (start_next_window == conf->start_next_window) {
+                       if (conf->start_next_window + NEXT_NORMALIO_DISTANCE
+                           <= bi_sector)
+                               conf->next_window_requests--;
+                       else
+                               conf->current_window_requests--;
+               } else
+                       conf->current_window_requests--;
+
+               if (!conf->current_window_requests) {
+                       if (conf->next_window_requests) {
+                               conf->current_window_requests =
+                                       conf->next_window_requests;
+                               conf->next_window_requests = 0;
+                               conf->start_next_window +=
+                                       NEXT_NORMALIO_DISTANCE;
+                       } else
+                               conf->start_next_window = MaxSector;
+               }
+       }
        spin_unlock_irqrestore(&conf->resync_lock, flags);
        wake_up(&conf->wait_barrier);
 }
@@ -884,8 +970,7 @@ static void freeze_array(struct r1conf *conf, int extra)
 {
        /* stop syncio and normal IO and wait for everything to
         * go quite.
-        * We increment barrier and nr_waiting, and then
-        * wait until nr_pending match nr_queued+extra
+        * We wait until nr_pending match nr_queued+extra
         * This is called in the context of one normal IO request
         * that has failed. Thus any sync request that might be pending
         * will be blocked by nr_pending, and we need to wait for
@@ -895,8 +980,7 @@ static void freeze_array(struct r1conf *conf, int extra)
         * we continue.
         */
        spin_lock_irq(&conf->resync_lock);
-       conf->barrier++;
-       conf->nr_waiting++;
+       conf->array_frozen = 1;
        wait_event_lock_irq_cmd(conf->wait_barrier,
                                conf->nr_pending == conf->nr_queued+extra,
                                conf->resync_lock,
@@ -907,8 +991,7 @@ static void unfreeze_array(struct r1conf *conf)
 {
        /* reverse the effect of the freeze */
        spin_lock_irq(&conf->resync_lock);
-       conf->barrier--;
-       conf->nr_waiting--;
+       conf->array_frozen = 0;
        wake_up(&conf->wait_barrier);
        spin_unlock_irq(&conf->resync_lock);
 }
@@ -1013,6 +1096,7 @@ static void make_request(struct mddev *mddev, struct bio * bio)
        int first_clone;
        int sectors_handled;
        int max_sectors;
+       sector_t start_next_window;
 
        /*
         * Register the new request and wait if the reconstruction
@@ -1042,7 +1126,7 @@ static void make_request(struct mddev *mddev, struct bio * bio)
                finish_wait(&conf->wait_barrier, &w);
        }
 
-       wait_barrier(conf);
+       start_next_window = wait_barrier(conf, bio);
 
        bitmap = mddev->bitmap;
 
@@ -1163,6 +1247,7 @@ read_again:
 
        disks = conf->raid_disks * 2;
  retry_write:
+       r1_bio->start_next_window = start_next_window;
        blocked_rdev = NULL;
        rcu_read_lock();
        max_sectors = r1_bio->sectors;
@@ -1231,14 +1316,24 @@ read_again:
        if (unlikely(blocked_rdev)) {
                /* Wait for this device to become unblocked */
                int j;
+               sector_t old = start_next_window;
 
                for (j = 0; j < i; j++)
                        if (r1_bio->bios[j])
                                rdev_dec_pending(conf->mirrors[j].rdev, mddev);
                r1_bio->state = 0;
-               allow_barrier(conf);
+               allow_barrier(conf, start_next_window, bio->bi_sector);
                md_wait_for_blocked_rdev(blocked_rdev, mddev);
-               wait_barrier(conf);
+               start_next_window = wait_barrier(conf, bio);
+               /*
+                * We must make sure the multi r1bios of bio have
+                * the same value of bi_phys_segments
+                */
+               if (bio->bi_phys_segments && old &&
+                   old != start_next_window)
+                       /* Wait for the former r1bio(s) to complete */
+                       wait_event(conf->wait_barrier,
+                                  bio->bi_phys_segments == 1);
                goto retry_write;
        }
 
@@ -1438,11 +1533,14 @@ static void print_conf(struct r1conf *conf)
 
 static void close_sync(struct r1conf *conf)
 {
-       wait_barrier(conf);
-       allow_barrier(conf);
+       wait_barrier(conf, NULL);
+       allow_barrier(conf, 0, 0);
 
        mempool_destroy(conf->r1buf_pool);
        conf->r1buf_pool = NULL;
+
+       conf->next_resync = 0;
+       conf->start_next_window = MaxSector;
 }
 
 static int raid1_spare_active(struct mddev *mddev)
@@ -2714,6 +2812,9 @@ static struct r1conf *setup_conf(struct mddev *mddev)
        conf->pending_count = 0;
        conf->recovery_disabled = mddev->recovery_disabled - 1;
 
+       conf->start_next_window = MaxSector;
+       conf->current_window_requests = conf->next_window_requests = 0;
+
        err = -EIO;
        for (i = 0; i < conf->raid_disks * 2; i++) {
 
@@ -2871,8 +2972,8 @@ static int stop(struct mddev *mddev)
                           atomic_read(&bitmap->behind_writes) == 0);
        }
 
-       raise_barrier(conf);
-       lower_barrier(conf);
+       freeze_array(conf, 0);
+       unfreeze_array(conf);
 
        md_unregister_thread(&mddev->thread);
        if (conf->r1bio_pool)
@@ -3031,10 +3132,10 @@ static void raid1_quiesce(struct mddev *mddev, int state)
                wake_up(&conf->wait_barrier);
                break;
        case 1:
-               raise_barrier(conf);
+               freeze_array(conf, 0);
                break;
        case 0:
-               lower_barrier(conf);
+               unfreeze_array(conf);
                break;
        }
 }
@@ -3051,7 +3152,8 @@ static void *raid1_takeover(struct mddev *mddev)
                mddev->new_chunk_sectors = 0;
                conf = setup_conf(mddev);
                if (!IS_ERR(conf))
-                       conf->barrier = 1;
+                       /* Array must appear to be quiesced */
+                       conf->array_frozen = 1;
                return conf;
        }
        return ERR_PTR(-EINVAL);