]> Pileus Git - ~andy/linux/blobdiff - drivers/block/drbd/drbd_bitmap.c
Merge branch 'drbd-8.4_ed6' into for-3.8-drivers-drbd-8.4_ed6
[~andy/linux] / drivers / block / drbd / drbd_bitmap.c
index 8d8069758042601f5a02c060a265e43e3d1401d8..1ab205a4bf69c1b3e1ea733ebf716bd1bd83c31a 100644 (file)
@@ -119,13 +119,9 @@ static void __bm_print_lock_info(struct drbd_conf *mdev, const char *func)
        if (!__ratelimit(&drbd_ratelimit_state))
                return;
        dev_err(DEV, "FIXME %s in %s, bitmap locked for '%s' by %s\n",
-           current == mdev->receiver.task ? "receiver" :
-           current == mdev->asender.task  ? "asender"  :
-           current == mdev->worker.task   ? "worker"   : current->comm,
-           func, b->bm_why ?: "?",
-           b->bm_task == mdev->receiver.task ? "receiver" :
-           b->bm_task == mdev->asender.task  ? "asender"  :
-           b->bm_task == mdev->worker.task   ? "worker"   : "?");
+               drbd_task_to_thread_name(mdev->tconn, current),
+               func, b->bm_why ?: "?",
+               drbd_task_to_thread_name(mdev->tconn, b->bm_task));
 }
 
 void drbd_bm_lock(struct drbd_conf *mdev, char *why, enum bm_flag flags)
@@ -142,13 +138,9 @@ void drbd_bm_lock(struct drbd_conf *mdev, char *why, enum bm_flag flags)
 
        if (trylock_failed) {
                dev_warn(DEV, "%s going to '%s' but bitmap already locked for '%s' by %s\n",
-                   current == mdev->receiver.task ? "receiver" :
-                   current == mdev->asender.task  ? "asender"  :
-                   current == mdev->worker.task   ? "worker"   : current->comm,
-                   why, b->bm_why ?: "?",
-                   b->bm_task == mdev->receiver.task ? "receiver" :
-                   b->bm_task == mdev->asender.task  ? "asender"  :
-                   b->bm_task == mdev->worker.task   ? "worker"   : "?");
+                        drbd_task_to_thread_name(mdev->tconn, current),
+                        why, b->bm_why ?: "?",
+                        drbd_task_to_thread_name(mdev->tconn, b->bm_task));
                mutex_lock(&b->bm_change);
        }
        if (BM_LOCKED_MASK & b->bm_flags)
@@ -196,6 +188,9 @@ void drbd_bm_unlock(struct drbd_conf *mdev)
 /* to mark for lazy writeout once syncer cleared all clearable bits,
  * we if bits have been cleared since last IO. */
 #define BM_PAGE_LAZY_WRITEOUT  28
+/* pages marked with this "HINT" will be considered for writeout
+ * on activity log transactions */
+#define BM_PAGE_HINT_WRITEOUT  27
 
 /* store_page_idx uses non-atomic assignment. It is only used directly after
  * allocating the page.  All other bm_set_page_* and bm_clear_page_* need to
@@ -227,8 +222,7 @@ static void bm_page_unlock_io(struct drbd_conf *mdev, int page_nr)
 {
        struct drbd_bitmap *b = mdev->bitmap;
        void *addr = &page_private(b->bm_pages[page_nr]);
-       clear_bit(BM_PAGE_IO_LOCK, addr);
-       smp_mb__after_clear_bit();
+       clear_bit_unlock(BM_PAGE_IO_LOCK, addr);
        wake_up(&mdev->bitmap->bm_io_wait);
 }
 
@@ -246,6 +240,27 @@ static void bm_set_page_need_writeout(struct page *page)
        set_bit(BM_PAGE_NEED_WRITEOUT, &page_private(page));
 }
 
+/**
+ * drbd_bm_mark_for_writeout() - mark a page with a "hint" to be considered for writeout
+ * @mdev:      DRBD device.
+ * @page_nr:   the bitmap page to mark with the "hint" flag
+ *
+ * From within an activity log transaction, we mark a few pages with these
+ * hints, then call drbd_bm_write_hinted(), which will only write out changed
+ * pages which are flagged with this mark.
+ */
+void drbd_bm_mark_for_writeout(struct drbd_conf *mdev, int page_nr)
+{
+       struct page *page;
+       if (page_nr >= mdev->bitmap->bm_number_of_pages) {
+               dev_warn(DEV, "BAD: page_nr: %u, number_of_pages: %u\n",
+                        page_nr, (int)mdev->bitmap->bm_number_of_pages);
+               return;
+       }
+       page = mdev->bitmap->bm_pages[page_nr];
+       set_bit(BM_PAGE_HINT_WRITEOUT, &page_private(page));
+}
+
 static int bm_test_page_unchanged(struct page *page)
 {
        volatile const unsigned long *addr = &page_private(page);
@@ -376,7 +391,7 @@ static struct page **bm_realloc_pages(struct drbd_bitmap *b, unsigned long want)
         * GFP_NOIO, as this is called while drbd IO is "suspended",
         * and during resize or attach on diskless Primary,
         * we must not block on IO to ourselves.
-        * Context is receiver thread or cqueue thread/dmsetup.  */
+        * Context is receiver thread or dmsetup. */
        bytes = sizeof(struct page *)*want;
        new_pages = kzalloc(bytes, GFP_NOIO);
        if (!new_pages) {
@@ -441,7 +456,8 @@ int drbd_bm_init(struct drbd_conf *mdev)
 
 sector_t drbd_bm_capacity(struct drbd_conf *mdev)
 {
-       ERR_IF(!mdev->bitmap) return 0;
+       if (!expect(mdev->bitmap))
+               return 0;
        return mdev->bitmap->bm_dev_capacity;
 }
 
@@ -449,7 +465,8 @@ sector_t drbd_bm_capacity(struct drbd_conf *mdev)
  */
 void drbd_bm_cleanup(struct drbd_conf *mdev)
 {
-       ERR_IF (!mdev->bitmap) return;
+       if (!expect(mdev->bitmap))
+               return;
        bm_free_pages(mdev->bitmap->bm_pages, mdev->bitmap->bm_number_of_pages);
        bm_vk_free(mdev->bitmap->bm_pages, (BM_P_VMALLOCED & mdev->bitmap->bm_flags));
        kfree(mdev->bitmap);
@@ -612,7 +629,8 @@ int drbd_bm_resize(struct drbd_conf *mdev, sector_t capacity, int set_new_bits)
        int err = 0, growing;
        int opages_vmalloced;
 
-       ERR_IF(!b) return -ENOMEM;
+       if (!expect(b))
+               return -ENOMEM;
 
        drbd_bm_lock(mdev, "resize", BM_LOCKED_MASK);
 
@@ -734,8 +752,10 @@ unsigned long _drbd_bm_total_weight(struct drbd_conf *mdev)
        unsigned long s;
        unsigned long flags;
 
-       ERR_IF(!b) return 0;
-       ERR_IF(!b->bm_pages) return 0;
+       if (!expect(b))
+               return 0;
+       if (!expect(b->bm_pages))
+               return 0;
 
        spin_lock_irqsave(&b->bm_lock, flags);
        s = b->bm_set;
@@ -758,8 +778,10 @@ unsigned long drbd_bm_total_weight(struct drbd_conf *mdev)
 size_t drbd_bm_words(struct drbd_conf *mdev)
 {
        struct drbd_bitmap *b = mdev->bitmap;
-       ERR_IF(!b) return 0;
-       ERR_IF(!b->bm_pages) return 0;
+       if (!expect(b))
+               return 0;
+       if (!expect(b->bm_pages))
+               return 0;
 
        return b->bm_words;
 }
@@ -767,7 +789,8 @@ size_t drbd_bm_words(struct drbd_conf *mdev)
 unsigned long drbd_bm_bits(struct drbd_conf *mdev)
 {
        struct drbd_bitmap *b = mdev->bitmap;
-       ERR_IF(!b) return 0;
+       if (!expect(b))
+               return 0;
 
        return b->bm_bits;
 }
@@ -788,8 +811,10 @@ void drbd_bm_merge_lel(struct drbd_conf *mdev, size_t offset, size_t number,
 
        end = offset + number;
 
-       ERR_IF(!b) return;
-       ERR_IF(!b->bm_pages) return;
+       if (!expect(b))
+               return;
+       if (!expect(b->bm_pages))
+               return;
        if (number == 0)
                return;
        WARN_ON(offset >= b->bm_words);
@@ -833,8 +858,10 @@ void drbd_bm_get_lel(struct drbd_conf *mdev, size_t offset, size_t number,
 
        end = offset + number;
 
-       ERR_IF(!b) return;
-       ERR_IF(!b->bm_pages) return;
+       if (!expect(b))
+               return;
+       if (!expect(b->bm_pages))
+               return;
 
        spin_lock_irq(&b->bm_lock);
        if ((offset >= b->bm_words) ||
@@ -862,8 +889,10 @@ void drbd_bm_get_lel(struct drbd_conf *mdev, size_t offset, size_t number,
 void drbd_bm_set_all(struct drbd_conf *mdev)
 {
        struct drbd_bitmap *b = mdev->bitmap;
-       ERR_IF(!b) return;
-       ERR_IF(!b->bm_pages) return;
+       if (!expect(b))
+               return;
+       if (!expect(b->bm_pages))
+               return;
 
        spin_lock_irq(&b->bm_lock);
        bm_memset(b, 0, 0xff, b->bm_words);
@@ -876,8 +905,10 @@ void drbd_bm_set_all(struct drbd_conf *mdev)
 void drbd_bm_clear_all(struct drbd_conf *mdev)
 {
        struct drbd_bitmap *b = mdev->bitmap;
-       ERR_IF(!b) return;
-       ERR_IF(!b->bm_pages) return;
+       if (!expect(b))
+               return;
+       if (!expect(b->bm_pages))
+               return;
 
        spin_lock_irq(&b->bm_lock);
        bm_memset(b, 0, 0, b->bm_words);
@@ -891,7 +922,8 @@ struct bm_aio_ctx {
        unsigned int done;
        unsigned flags;
 #define BM_AIO_COPY_PAGES      1
-#define BM_WRITE_ALL_PAGES     2
+#define BM_AIO_WRITE_HINTED    2
+#define BM_WRITE_ALL_PAGES     4
        int error;
        struct kref kref;
 };
@@ -1062,6 +1094,11 @@ static int bm_rw(struct drbd_conf *mdev, int rw, unsigned flags, unsigned lazy_w
                if (lazy_writeout_upper_idx && i == lazy_writeout_upper_idx)
                        break;
                if (rw & WRITE) {
+                       if ((flags & BM_AIO_WRITE_HINTED) &&
+                           !test_and_clear_bit(BM_PAGE_HINT_WRITEOUT,
+                                   &page_private(b->bm_pages[i])))
+                               continue;
+
                        if (!(flags & BM_WRITE_ALL_PAGES) &&
                            bm_test_page_unchanged(b->bm_pages[i])) {
                                dynamic_dev_dbg(DEV, "skipped bm write for idx %u\n", i);
@@ -1094,9 +1131,11 @@ static int bm_rw(struct drbd_conf *mdev, int rw, unsigned flags, unsigned lazy_w
        else
                kref_put(&ctx->kref, &bm_aio_ctx_destroy);
 
-       dev_info(DEV, "bitmap %s of %u pages took %lu jiffies\n",
-                       rw == WRITE ? "WRITE" : "READ",
-                       count, jiffies - now);
+       /* summary for global bitmap IO */
+       if (flags == 0)
+               dev_info(DEV, "bitmap %s of %u pages took %lu jiffies\n",
+                        rw == WRITE ? "WRITE" : "READ",
+                        count, jiffies - now);
 
        if (ctx->error) {
                dev_alert(DEV, "we had at least one MD IO ERROR during bitmap IO\n");
@@ -1117,8 +1156,9 @@ static int bm_rw(struct drbd_conf *mdev, int rw, unsigned flags, unsigned lazy_w
        }
        now = b->bm_set;
 
-       dev_info(DEV, "%s (%lu bits) marked out-of-sync by on disk bit-map.\n",
-            ppsize(ppb, now << (BM_BLOCK_SHIFT-10)), now);
+       if (flags == 0)
+               dev_info(DEV, "%s (%lu bits) marked out-of-sync by on disk bit-map.\n",
+                    ppsize(ppb, now << (BM_BLOCK_SHIFT-10)), now);
 
        kref_put(&ctx->kref, &bm_aio_ctx_destroy);
        return err;
@@ -1181,9 +1221,17 @@ int drbd_bm_write_copy_pages(struct drbd_conf *mdev) __must_hold(local)
        return bm_rw(mdev, WRITE, BM_AIO_COPY_PAGES, 0);
 }
 
+/**
+ * drbd_bm_write_hinted() - Write bitmap pages with "hint" marks, if they have changed.
+ * @mdev:      DRBD device.
+ */
+int drbd_bm_write_hinted(struct drbd_conf *mdev) __must_hold(local)
+{
+       return bm_rw(mdev, WRITE, BM_AIO_WRITE_HINTED | BM_AIO_COPY_PAGES, 0);
+}
 
 /**
- * drbd_bm_write_page: Writes a PAGE_SIZE aligned piece of bitmap
+ * drbd_bm_write_page() - Writes a PAGE_SIZE aligned piece of bitmap
  * @mdev:      DRBD device.
  * @idx:       bitmap page index
  *
@@ -1291,8 +1339,10 @@ static unsigned long bm_find_next(struct drbd_conf *mdev,
        struct drbd_bitmap *b = mdev->bitmap;
        unsigned long i = DRBD_END_OF_BITMAP;
 
-       ERR_IF(!b) return i;
-       ERR_IF(!b->bm_pages) return i;
+       if (!expect(b))
+               return i;
+       if (!expect(b->bm_pages))
+               return i;
 
        spin_lock_irq(&b->bm_lock);
        if (BM_DONT_TEST & b->bm_flags)
@@ -1393,8 +1443,10 @@ static int bm_change_bits_to(struct drbd_conf *mdev, const unsigned long s,
        struct drbd_bitmap *b = mdev->bitmap;
        int c = 0;
 
-       ERR_IF(!b) return 1;
-       ERR_IF(!b->bm_pages) return 0;
+       if (!expect(b))
+               return 1;
+       if (!expect(b->bm_pages))
+               return 0;
 
        spin_lock_irqsave(&b->bm_lock, flags);
        if ((val ? BM_DONT_SET : BM_DONT_CLEAR) & b->bm_flags)
@@ -1425,13 +1477,21 @@ static inline void bm_set_full_words_within_one_page(struct drbd_bitmap *b,
 {
        int i;
        int bits;
+       int changed = 0;
        unsigned long *paddr = kmap_atomic(b->bm_pages[page_nr]);
        for (i = first_word; i < last_word; i++) {
                bits = hweight_long(paddr[i]);
                paddr[i] = ~0UL;
-               b->bm_set += BITS_PER_LONG - bits;
+               changed += BITS_PER_LONG - bits;
        }
        kunmap_atomic(paddr);
+       if (changed) {
+               /* We only need lazy writeout, the information is still in the
+                * remote bitmap as well, and is reconstructed during the next
+                * bitmap exchange, if lost locally due to a crash. */
+               bm_set_page_lazy_writeout(b->bm_pages[page_nr]);
+               b->bm_set += changed;
+       }
 }
 
 /* Same thing as drbd_bm_set_bits,
@@ -1526,8 +1586,10 @@ int drbd_bm_test_bit(struct drbd_conf *mdev, const unsigned long bitnr)
        unsigned long *p_addr;
        int i;
 
-       ERR_IF(!b) return 0;
-       ERR_IF(!b->bm_pages) return 0;
+       if (!expect(b))
+               return 0;
+       if (!expect(b->bm_pages))
+               return 0;
 
        spin_lock_irqsave(&b->bm_lock, flags);
        if (BM_DONT_TEST & b->bm_flags)
@@ -1561,8 +1623,10 @@ int drbd_bm_count_bits(struct drbd_conf *mdev, const unsigned long s, const unsi
         * robust in case we screwed up elsewhere, in that case pretend there
         * was one dirty bit in the requested area, so we won't try to do a
         * local read there (no bitmap probably implies no disk) */
-       ERR_IF(!b) return 1;
-       ERR_IF(!b->bm_pages) return 1;
+       if (!expect(b))
+               return 1;
+       if (!expect(b->bm_pages))
+               return 1;
 
        spin_lock_irqsave(&b->bm_lock, flags);
        if (BM_DONT_TEST & b->bm_flags)
@@ -1575,11 +1639,10 @@ int drbd_bm_count_bits(struct drbd_conf *mdev, const unsigned long s, const unsi
                                bm_unmap(p_addr);
                        p_addr = bm_map_pidx(b, idx);
                }
-               ERR_IF (bitnr >= b->bm_bits) {
-                       dev_err(DEV, "bitnr=%lu bm_bits=%lu\n", bitnr, b->bm_bits);
-               } else {
+               if (expect(bitnr < b->bm_bits))
                        c += (0 != test_bit_le(bitnr - (page_nr << (PAGE_SHIFT+3)), p_addr));
-               }
+               else
+                       dev_err(DEV, "bitnr=%lu bm_bits=%lu\n", bitnr, b->bm_bits);
        }
        if (p_addr)
                bm_unmap(p_addr);
@@ -1609,8 +1672,10 @@ int drbd_bm_e_weight(struct drbd_conf *mdev, unsigned long enr)
        unsigned long flags;
        unsigned long *p_addr, *bm;
 
-       ERR_IF(!b) return 0;
-       ERR_IF(!b->bm_pages) return 0;
+       if (!expect(b))
+               return 0;
+       if (!expect(b->bm_pages))
+               return 0;
 
        spin_lock_irqsave(&b->bm_lock, flags);
        if (BM_DONT_TEST & b->bm_flags)
@@ -1632,47 +1697,3 @@ int drbd_bm_e_weight(struct drbd_conf *mdev, unsigned long enr)
        spin_unlock_irqrestore(&b->bm_lock, flags);
        return count;
 }
-
-/* Set all bits covered by the AL-extent al_enr.
- * Returns number of bits changed. */
-unsigned long drbd_bm_ALe_set_all(struct drbd_conf *mdev, unsigned long al_enr)
-{
-       struct drbd_bitmap *b = mdev->bitmap;
-       unsigned long *p_addr, *bm;
-       unsigned long weight;
-       unsigned long s, e;
-       int count, i, do_now;
-       ERR_IF(!b) return 0;
-       ERR_IF(!b->bm_pages) return 0;
-
-       spin_lock_irq(&b->bm_lock);
-       if (BM_DONT_SET & b->bm_flags)
-               bm_print_lock_info(mdev);
-       weight = b->bm_set;
-
-       s = al_enr * BM_WORDS_PER_AL_EXT;
-       e = min_t(size_t, s + BM_WORDS_PER_AL_EXT, b->bm_words);
-       /* assert that s and e are on the same page */
-       D_ASSERT((e-1) >> (PAGE_SHIFT - LN2_BPL + 3)
-             ==  s    >> (PAGE_SHIFT - LN2_BPL + 3));
-       count = 0;
-       if (s < b->bm_words) {
-               i = do_now = e-s;
-               p_addr = bm_map_pidx(b, bm_word_to_page_idx(b, s));
-               bm = p_addr + MLPP(s);
-               while (i--) {
-                       count += hweight_long(*bm);
-                       *bm = -1UL;
-                       bm++;
-               }
-               bm_unmap(p_addr);
-               b->bm_set += do_now*BITS_PER_LONG - count;
-               if (e == b->bm_words)
-                       b->bm_set -= bm_clear_surplus(b);
-       } else {
-               dev_err(DEV, "start offset (%lu) too large in drbd_bm_ALe_set_all\n", s);
-       }
-       weight = b->bm_set - weight;
-       spin_unlock_irq(&b->bm_lock);
-       return weight;
-}