]> Pileus Git - ~andy/linux/blobdiff - fs/btrfs/extent_io.c
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/jikos/trivial
[~andy/linux] / fs / btrfs / extent_io.c
index 97f6703fd493ec26c524a918bc36121652cc71c6..deafe19c34b5cea729ed40d8ca61262782ea0541 100644 (file)
@@ -20,6 +20,7 @@
 #include "volumes.h"
 #include "check-integrity.h"
 #include "locking.h"
+#include "rcu-string.h"
 
 static struct kmem_cache *extent_state_cache;
 static struct kmem_cache *extent_buffer_cache;
@@ -186,7 +187,6 @@ static struct rb_node *tree_insert(struct rb_root *root, u64 offset,
                        return parent;
        }
 
-       entry = rb_entry(node, struct tree_entry, rb_node);
        rb_link_node(node, parent, p);
        rb_insert_color(node, root);
        return NULL;
@@ -413,7 +413,7 @@ static struct extent_state *next_state(struct extent_state *state)
 
 /*
  * utility function to clear some bits in an extent state struct.
- * it will optionally wake up any one waiting on this state (wake == 1)
+ * it will optionally wake up any one waiting on this state (wake == 1).
  *
  * If no bits are set on the state struct after clearing things, the
  * struct is freed and removed from the tree
@@ -570,10 +570,8 @@ hit_next:
                if (err)
                        goto out;
                if (state->end <= end) {
-                       clear_state_bit(tree, state, &bits, wake);
-                       if (last_end == (u64)-1)
-                               goto out;
-                       start = last_end + 1;
+                       state = clear_state_bit(tree, state, &bits, wake);
+                       goto next;
                }
                goto search_again;
        }
@@ -781,7 +779,6 @@ hit_next:
         * Just lock what we found and keep going
         */
        if (state->start == start && state->end <= end) {
-               struct rb_node *next_node;
                if (state->state & exclusive_bits) {
                        *failed_start = state->start;
                        err = -EEXIST;
@@ -789,20 +786,15 @@ hit_next:
                }
 
                set_state_bits(tree, state, &bits);
-
                cache_state(state, cached_state);
                merge_state(tree, state);
                if (last_end == (u64)-1)
                        goto out;
-
                start = last_end + 1;
-               next_node = rb_next(&state->rb_node);
-               if (next_node && start < end && prealloc && !need_resched()) {
-                       state = rb_entry(next_node, struct extent_state,
-                                        rb_node);
-                       if (state->start == start)
-                               goto hit_next;
-               }
+               state = next_state(state);
+               if (start < end && state && state->start == start &&
+                   !need_resched())
+                       goto hit_next;
                goto search_again;
        }
 
@@ -845,6 +837,10 @@ hit_next:
                        if (last_end == (u64)-1)
                                goto out;
                        start = last_end + 1;
+                       state = next_state(state);
+                       if (start < end && state && state->start == start &&
+                           !need_resched())
+                               goto hit_next;
                }
                goto search_again;
        }
@@ -995,21 +991,14 @@ hit_next:
         * Just lock what we found and keep going
         */
        if (state->start == start && state->end <= end) {
-               struct rb_node *next_node;
-
                set_state_bits(tree, state, &bits);
-               clear_state_bit(tree, state, &clear_bits, 0);
+               state = clear_state_bit(tree, state, &clear_bits, 0);
                if (last_end == (u64)-1)
                        goto out;
-
                start = last_end + 1;
-               next_node = rb_next(&state->rb_node);
-               if (next_node && start < end && prealloc && !need_resched()) {
-                       state = rb_entry(next_node, struct extent_state,
-                                        rb_node);
-                       if (state->start == start)
-                               goto hit_next;
-               }
+               if (start < end && state && state->start == start &&
+                   !need_resched())
+                       goto hit_next;
                goto search_again;
        }
 
@@ -1043,10 +1032,13 @@ hit_next:
                        goto out;
                if (state->end <= end) {
                        set_state_bits(tree, state, &bits);
-                       clear_state_bit(tree, state, &clear_bits, 0);
+                       state = clear_state_bit(tree, state, &clear_bits, 0);
                        if (last_end == (u64)-1)
                                goto out;
                        start = last_end + 1;
+                       if (start < end && state && state->start == start &&
+                           !need_resched())
+                               goto hit_next;
                }
                goto search_again;
        }
@@ -1174,9 +1166,8 @@ int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end,
                              cached_state, mask);
 }
 
-static int clear_extent_uptodate(struct extent_io_tree *tree, u64 start,
-                                u64 end, struct extent_state **cached_state,
-                                gfp_t mask)
+int clear_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end,
+                         struct extent_state **cached_state, gfp_t mask)
 {
        return clear_extent_bit(tree, start, end, EXTENT_UPTODATE, 0, 0,
                                cached_state, mask);
@@ -1294,7 +1285,7 @@ out:
  * returned if we find something, and *start_ret and *end_ret are
  * set to reflect the state struct that was found.
  *
- * If nothing was found, 1 is returned, < 0 on error
+ * If nothing was found, 1 is returned. If found something, return 0.
  */
 int find_first_extent_bit(struct extent_io_tree *tree, u64 start,
                          u64 *start_ret, u64 *end_ret, int bits)
@@ -1924,12 +1915,13 @@ int repair_io_failure(struct btrfs_mapping_tree *map_tree, u64 start,
        if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) {
                /* try to remap that extent elsewhere? */
                bio_put(bio);
+               btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_WRITE_ERRS);
                return -EIO;
        }
 
-       printk(KERN_INFO "btrfs read error corrected: ino %lu off %llu (dev %s "
-                       "sector %llu)\n", page->mapping->host->i_ino, start,
-                       dev->name, sector);
+       printk_in_rcu(KERN_INFO "btrfs read error corrected: ino %lu off %llu "
+                     "(dev %s sector %llu)\n", page->mapping->host->i_ino,
+                     start, rcu_str_deref(dev->name), sector);
 
        bio_put(bio);
        return 0;
@@ -2223,17 +2215,7 @@ int end_extent_writepage(struct page *page, int err, u64 start, u64 end)
                        uptodate = 0;
        }
 
-       if (!uptodate && tree->ops &&
-           tree->ops->writepage_io_failed_hook) {
-               ret = tree->ops->writepage_io_failed_hook(NULL, page,
-                                                start, end, NULL);
-               /* Writeback already completed */
-               if (ret == 0)
-                       return 1;
-       }
-
        if (!uptodate) {
-               clear_extent_uptodate(tree, start, end, NULL, GFP_NOFS);
                ClearPageUptodate(page);
                SetPageError(page);
        }
@@ -2348,10 +2330,23 @@ static void end_bio_extent_readpage(struct bio *bio, int err)
                if (uptodate && tree->ops && tree->ops->readpage_end_io_hook) {
                        ret = tree->ops->readpage_end_io_hook(page, start, end,
                                                              state, mirror);
-                       if (ret)
+                       if (ret) {
+                               /* no IO indicated but software detected errors
+                                * in the block, either checksum errors or
+                                * issues with the contents */
+                               struct btrfs_root *root =
+                                       BTRFS_I(page->mapping->host)->root;
+                               struct btrfs_device *device;
+
                                uptodate = 0;
-                       else
+                               device = btrfs_find_device_for_logical(
+                                               root, start, mirror);
+                               if (device)
+                                       btrfs_dev_stat_inc_and_print(device,
+                                               BTRFS_DEV_STAT_CORRUPTION_ERRS);
+                       } else {
                                clean_io_failure(start, page);
+                       }
                }
 
                if (!uptodate && tree->ops && tree->ops->readpage_io_failed_hook) {
@@ -3165,7 +3160,7 @@ static int write_one_eb(struct extent_buffer *eb,
        u64 offset = eb->start;
        unsigned long i, num_pages;
        int rw = (epd->sync_io ? WRITE_SYNC : WRITE);
-       int ret;
+       int ret = 0;
 
        clear_bit(EXTENT_BUFFER_IOERR, &eb->bflags);
        num_pages = num_extent_pages(eb->start, eb->len);
@@ -3330,6 +3325,7 @@ static int extent_write_cache_pages(struct extent_io_tree *tree,
                             writepage_t writepage, void *data,
                             void (*flush_fn)(void *))
 {
+       struct inode *inode = mapping->host;
        int ret = 0;
        int done = 0;
        int nr_to_write_done = 0;
@@ -3340,6 +3336,18 @@ static int extent_write_cache_pages(struct extent_io_tree *tree,
        int scanned = 0;
        int tag;
 
+       /*
+        * We have to hold onto the inode so that ordered extents can do their
+        * work when the IO finishes.  The alternative to this is failing to add
+        * an ordered extent if the igrab() fails there and that is a huge pain
+        * to deal with, so instead just hold onto the inode throughout the
+        * writepages operation.  If it fails here we are freeing up the inode
+        * anyway and we'd rather not waste our time writing out stuff that is
+        * going to be truncated anyway.
+        */
+       if (!igrab(inode))
+               return 0;
+
        pagevec_init(&pvec, 0);
        if (wbc->range_cyclic) {
                index = mapping->writeback_index; /* Start from prev offset */
@@ -3434,6 +3442,7 @@ retry:
                index = 0;
                goto retry;
        }
+       btrfs_add_delayed_iput(inode);
        return ret;
 }
 
@@ -3931,6 +3940,7 @@ static struct extent_buffer *__alloc_extent_buffer(struct extent_io_tree *tree,
        eb->start = start;
        eb->len = len;
        eb->tree = tree;
+       eb->bflags = 0;
        rwlock_init(&eb->lock);
        atomic_set(&eb->write_locks, 0);
        atomic_set(&eb->read_locks, 0);
@@ -3968,6 +3978,60 @@ static struct extent_buffer *__alloc_extent_buffer(struct extent_io_tree *tree,
        return eb;
 }
 
+struct extent_buffer *btrfs_clone_extent_buffer(struct extent_buffer *src)
+{
+       unsigned long i;
+       struct page *p;
+       struct extent_buffer *new;
+       unsigned long num_pages = num_extent_pages(src->start, src->len);
+
+       new = __alloc_extent_buffer(NULL, src->start, src->len, GFP_ATOMIC);
+       if (new == NULL)
+               return NULL;
+
+       for (i = 0; i < num_pages; i++) {
+               p = alloc_page(GFP_ATOMIC);
+               BUG_ON(!p);
+               attach_extent_buffer_page(new, p);
+               WARN_ON(PageDirty(p));
+               SetPageUptodate(p);
+               new->pages[i] = p;
+       }
+
+       copy_extent_buffer(new, src, 0, 0, src->len);
+       set_bit(EXTENT_BUFFER_UPTODATE, &new->bflags);
+       set_bit(EXTENT_BUFFER_DUMMY, &new->bflags);
+
+       return new;
+}
+
+struct extent_buffer *alloc_dummy_extent_buffer(u64 start, unsigned long len)
+{
+       struct extent_buffer *eb;
+       unsigned long num_pages = num_extent_pages(0, len);
+       unsigned long i;
+
+       eb = __alloc_extent_buffer(NULL, start, len, GFP_ATOMIC);
+       if (!eb)
+               return NULL;
+
+       for (i = 0; i < num_pages; i++) {
+               eb->pages[i] = alloc_page(GFP_ATOMIC);
+               if (!eb->pages[i])
+                       goto err;
+       }
+       set_extent_buffer_uptodate(eb);
+       btrfs_set_header_nritems(eb, 0);
+       set_bit(EXTENT_BUFFER_DUMMY, &eb->bflags);
+
+       return eb;
+err:
+       for (i--; i > 0; i--)
+               __free_page(eb->pages[i]);
+       __free_extent_buffer(eb);
+       return NULL;
+}
+
 static int extent_buffer_under_io(struct extent_buffer *eb)
 {
        return (atomic_read(&eb->io_pages) ||
@@ -3982,18 +4046,21 @@ static void btrfs_release_extent_buffer_page(struct extent_buffer *eb,
                                                unsigned long start_idx)
 {
        unsigned long index;
+       unsigned long num_pages;
        struct page *page;
+       int mapped = !test_bit(EXTENT_BUFFER_DUMMY, &eb->bflags);
 
        BUG_ON(extent_buffer_under_io(eb));
 
-       index = num_extent_pages(eb->start, eb->len);
+       num_pages = num_extent_pages(eb->start, eb->len);
+       index = start_idx + num_pages;
        if (start_idx >= index)
                return;
 
        do {
                index--;
                page = extent_buffer_page(eb, index);
-               if (page) {
+               if (page && mapped) {
                        spin_lock(&page->mapping->private_lock);
                        /*
                         * We do this since we'll remove the pages after we've
@@ -4018,6 +4085,8 @@ static void btrfs_release_extent_buffer_page(struct extent_buffer *eb,
                        }
                        spin_unlock(&page->mapping->private_lock);
 
+               }
+               if (page) {
                        /* One for when we alloced the page */
                        page_cache_release(page);
                }
@@ -4236,14 +4305,18 @@ static void release_extent_buffer(struct extent_buffer *eb, gfp_t mask)
 {
        WARN_ON(atomic_read(&eb->refs) == 0);
        if (atomic_dec_and_test(&eb->refs)) {
-               struct extent_io_tree *tree = eb->tree;
+               if (test_bit(EXTENT_BUFFER_DUMMY, &eb->bflags)) {
+                       spin_unlock(&eb->refs_lock);
+               } else {
+                       struct extent_io_tree *tree = eb->tree;
 
-               spin_unlock(&eb->refs_lock);
+                       spin_unlock(&eb->refs_lock);
 
-               spin_lock(&tree->buffer_lock);
-               radix_tree_delete(&tree->buffer,
-                                 eb->start >> PAGE_CACHE_SHIFT);
-               spin_unlock(&tree->buffer_lock);
+                       spin_lock(&tree->buffer_lock);
+                       radix_tree_delete(&tree->buffer,
+                                         eb->start >> PAGE_CACHE_SHIFT);
+                       spin_unlock(&tree->buffer_lock);
+               }
 
                /* Should be safe to release our pages at this point */
                btrfs_release_extent_buffer_page(eb, 0);
@@ -4260,6 +4333,10 @@ void free_extent_buffer(struct extent_buffer *eb)
                return;
 
        spin_lock(&eb->refs_lock);
+       if (atomic_read(&eb->refs) == 2 &&
+           test_bit(EXTENT_BUFFER_DUMMY, &eb->bflags))
+               atomic_dec(&eb->refs);
+
        if (atomic_read(&eb->refs) == 2 &&
            test_bit(EXTENT_BUFFER_STALE, &eb->bflags) &&
            !extent_buffer_under_io(eb) &&