Btrfs: allow compressed extents to be merged during defragment

[~andy/linux] / fs / btrfs / inode.c
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c

index 6d1b93c8aafb8a4d7b832cab8585ebf1ac1ced42..66571dce88d1ceb4917ea4c5498223fe7fc1958c 100644 (file)
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -484,18 +484,20 @@ cont:
                                                     compress_type, pages);
                 }
                 if (ret <= 0) {
+                       unsigned long clear_flags = EXTENT_DELALLOC |
+                               EXTENT_DEFRAG;
+                       clear_flags |= (ret < 0) ? EXTENT_DO_ACCOUNTING : 0;
+
                         /*
                          * inline extent creation worked or returned error,
                          * we don't need to create any more async work items.
                          * Unlock and free up our temp pages.
                          */
-                       extent_clear_unlock_delalloc(inode,
-                            &BTRFS_I(inode)->io_tree,
-                            start, end, NULL,
-                            EXTENT_CLEAR_UNLOCK_PAGE | EXTENT_CLEAR_DIRTY |
-                            EXTENT_CLEAR_DELALLOC |
-                            EXTENT_SET_WRITEBACK | EXTENT_END_WRITEBACK);
-
+                       extent_clear_unlock_delalloc(inode, start, end, NULL,
+                                                    clear_flags, PAGE_UNLOCK |
+                                                    PAGE_CLEAR_DIRTY |
+                                                    PAGE_SET_WRITEBACK |
+                                                    PAGE_END_WRITEBACK);
                         btrfs_end_transaction(trans, root);
                         goto free_pages_out;
                 }
@@ -592,13 +594,11 @@ free_pages_out:
         goto out;
  
  cleanup_and_out:
-       extent_clear_unlock_delalloc(inode, &BTRFS_I(inode)->io_tree,
-                                    start, end, NULL,
-                                    EXTENT_CLEAR_UNLOCK_PAGE |
-                                    EXTENT_CLEAR_DIRTY |
-                                    EXTENT_CLEAR_DELALLOC |
-                                    EXTENT_SET_WRITEBACK |
-                                    EXTENT_END_WRITEBACK);
+       extent_clear_unlock_delalloc(inode, start, end, NULL,
+                                    EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING |
+                                    EXTENT_DEFRAG, PAGE_UNLOCK |
+                                    PAGE_CLEAR_DIRTY | PAGE_SET_WRITEBACK |
+                                    PAGE_END_WRITEBACK);
         if (!trans || IS_ERR(trans))
                 btrfs_error(root->fs_info, ret, "Failed to join transaction");
         else
@@ -770,16 +770,12 @@ retry:
                 /*
                  * clear dirty, set writeback and unlock the pages.
                  */
-               extent_clear_unlock_delalloc(inode,
-                               &BTRFS_I(inode)->io_tree,
-                               async_extent->start,
+               extent_clear_unlock_delalloc(inode, async_extent->start,
                                 async_extent->start +
                                 async_extent->ram_size - 1,
-                               NULL, EXTENT_CLEAR_UNLOCK_PAGE |
-                               EXTENT_CLEAR_UNLOCK |
-                               EXTENT_CLEAR_DELALLOC |
-                               EXTENT_CLEAR_DIRTY | EXTENT_SET_WRITEBACK);
-
+                               NULL, EXTENT_LOCKED | EXTENT_DELALLOC,
+                               PAGE_UNLOCK | PAGE_CLEAR_DIRTY |
+                               PAGE_SET_WRITEBACK);
                 ret = btrfs_submit_compressed_write(inode,
                                     async_extent->start,
                                     async_extent->ram_size,
@@ -798,16 +794,13 @@ out:
  out_free_reserve:
         btrfs_free_reserved_extent(root, ins.objectid, ins.offset);
  out_free:
-       extent_clear_unlock_delalloc(inode, &BTRFS_I(inode)->io_tree,
-                                    async_extent->start,
+       extent_clear_unlock_delalloc(inode, async_extent->start,
                                      async_extent->start +
                                      async_extent->ram_size - 1,
-                                    NULL, EXTENT_CLEAR_UNLOCK_PAGE |
-                                    EXTENT_CLEAR_UNLOCK |
-                                    EXTENT_CLEAR_DELALLOC |
-                                    EXTENT_CLEAR_DIRTY |
-                                    EXTENT_SET_WRITEBACK |
-                                    EXTENT_END_WRITEBACK);
+                                    NULL, EXTENT_LOCKED | EXTENT_DELALLOC |
+                                    EXTENT_DEFRAG | EXTENT_DO_ACCOUNTING,
+                                    PAGE_UNLOCK | PAGE_CLEAR_DIRTY |
+                                    PAGE_SET_WRITEBACK | PAGE_END_WRITEBACK);
         kfree(async_extent);
         goto again;
  }
@@ -892,15 +885,11 @@ static noinline int __cow_file_range(struct btrfs_trans_handle *trans,
                 ret = cow_file_range_inline(trans, root, inode,
                                             start, end, 0, 0, NULL);
                 if (ret == 0) {
-                       extent_clear_unlock_delalloc(inode,
-                                    &BTRFS_I(inode)->io_tree,
-                                    start, end, NULL,
-                                    EXTENT_CLEAR_UNLOCK_PAGE |
-                                    EXTENT_CLEAR_UNLOCK |
-                                    EXTENT_CLEAR_DELALLOC |
-                                    EXTENT_CLEAR_DIRTY |
-                                    EXTENT_SET_WRITEBACK |
-                                    EXTENT_END_WRITEBACK);
+                       extent_clear_unlock_delalloc(inode, start, end, NULL,
+                                    EXTENT_LOCKED | EXTENT_DELALLOC |
+                                    EXTENT_DEFRAG, PAGE_UNLOCK |
+                                    PAGE_CLEAR_DIRTY | PAGE_SET_WRITEBACK |
+                                    PAGE_END_WRITEBACK);
  
                         *nr_written = *nr_written +
                              (end - start + PAGE_CACHE_SIZE) / PAGE_CACHE_SIZE;
@@ -990,13 +979,13 @@ static noinline int __cow_file_range(struct btrfs_trans_handle *trans,
                  * Do set the Private2 bit so we know this page was properly
                  * setup for writepage
                  */
-               op = unlock ? EXTENT_CLEAR_UNLOCK_PAGE : 0;
-               op |= EXTENT_CLEAR_UNLOCK | EXTENT_CLEAR_DELALLOC |
-                       EXTENT_SET_PRIVATE2;
+               op = unlock ? PAGE_UNLOCK : 0;
+               op |= PAGE_SET_PRIVATE2;
  
-               extent_clear_unlock_delalloc(inode, &BTRFS_I(inode)->io_tree,
-                                            start, start + ram_size - 1,
-                                            locked_page, op);
+               extent_clear_unlock_delalloc(inode, start,
+                                            start + ram_size - 1, locked_page,
+                                            EXTENT_LOCKED | EXTENT_DELALLOC,
+                                            op);
                 disk_num_bytes -= cur_alloc_size;
                 num_bytes -= cur_alloc_size;
                 alloc_hint = ins.objectid + ins.offset;
@@ -1008,16 +997,11 @@ out:
  out_reserve:
         btrfs_free_reserved_extent(root, ins.objectid, ins.offset);
  out_unlock:
-       extent_clear_unlock_delalloc(inode,
-                    &BTRFS_I(inode)->io_tree,
-                    start, end, locked_page,
-                    EXTENT_CLEAR_UNLOCK_PAGE |
-                    EXTENT_CLEAR_UNLOCK |
-                    EXTENT_CLEAR_DELALLOC |
-                    EXTENT_CLEAR_DIRTY |
-                    EXTENT_SET_WRITEBACK |
-                    EXTENT_END_WRITEBACK);
-
+       extent_clear_unlock_delalloc(inode, start, end, locked_page,
+                                    EXTENT_LOCKED | EXTENT_DO_ACCOUNTING |
+                                    EXTENT_DELALLOC | EXTENT_DEFRAG,
+                                    PAGE_UNLOCK | PAGE_CLEAR_DIRTY |
+                                    PAGE_SET_WRITEBACK | PAGE_END_WRITEBACK);
         goto out;
  }
  
@@ -1033,15 +1017,13 @@ static noinline int cow_file_range(struct inode *inode,
  
         trans = btrfs_join_transaction(root);
         if (IS_ERR(trans)) {
-               extent_clear_unlock_delalloc(inode,
-                            &BTRFS_I(inode)->io_tree,
-                            start, end, locked_page,
-                            EXTENT_CLEAR_UNLOCK_PAGE |
-                            EXTENT_CLEAR_UNLOCK |
-                            EXTENT_CLEAR_DELALLOC |
-                            EXTENT_CLEAR_DIRTY |
-                            EXTENT_SET_WRITEBACK |
-                            EXTENT_END_WRITEBACK);
+               extent_clear_unlock_delalloc(inode, start, end, locked_page,
+                                            EXTENT_LOCKED | EXTENT_DELALLOC |
+                                            EXTENT_DO_ACCOUNTING |
+                                            EXTENT_DEFRAG, PAGE_UNLOCK |
+                                            PAGE_CLEAR_DIRTY |
+                                            PAGE_SET_WRITEBACK |
+                                            PAGE_END_WRITEBACK);
                 return PTR_ERR(trans);
         }
         trans->block_rsv = &root->fs_info->delalloc_block_rsv;
@@ -1221,15 +1203,13 @@ static noinline int run_delalloc_nocow(struct inode *inode,
  
         path = btrfs_alloc_path();
         if (!path) {
-               extent_clear_unlock_delalloc(inode,
-                            &BTRFS_I(inode)->io_tree,
-                            start, end, locked_page,
-                            EXTENT_CLEAR_UNLOCK_PAGE |
-                            EXTENT_CLEAR_UNLOCK |
-                            EXTENT_CLEAR_DELALLOC |
-                            EXTENT_CLEAR_DIRTY |
-                            EXTENT_SET_WRITEBACK |
-                            EXTENT_END_WRITEBACK);
+               extent_clear_unlock_delalloc(inode, start, end, locked_page,
+                                            EXTENT_LOCKED | EXTENT_DELALLOC |
+                                            EXTENT_DO_ACCOUNTING |
+                                            EXTENT_DEFRAG, PAGE_UNLOCK |
+                                            PAGE_CLEAR_DIRTY |
+                                            PAGE_SET_WRITEBACK |
+                                            PAGE_END_WRITEBACK);
                 return -ENOMEM;
         }
  
@@ -1241,15 +1221,13 @@ static noinline int run_delalloc_nocow(struct inode *inode,
                 trans = btrfs_join_transaction(root);
  
         if (IS_ERR(trans)) {
-               extent_clear_unlock_delalloc(inode,
-                            &BTRFS_I(inode)->io_tree,
-                            start, end, locked_page,
-                            EXTENT_CLEAR_UNLOCK_PAGE |
-                            EXTENT_CLEAR_UNLOCK |
-                            EXTENT_CLEAR_DELALLOC |
-                            EXTENT_CLEAR_DIRTY |
-                            EXTENT_SET_WRITEBACK |
-                            EXTENT_END_WRITEBACK);
+               extent_clear_unlock_delalloc(inode, start, end, locked_page,
+                                            EXTENT_LOCKED | EXTENT_DELALLOC |
+                                            EXTENT_DO_ACCOUNTING |
+                                            EXTENT_DEFRAG, PAGE_UNLOCK |
+                                            PAGE_CLEAR_DIRTY |
+                                            PAGE_SET_WRITEBACK |
+                                            PAGE_END_WRITEBACK);
                 btrfs_free_path(path);
                 return PTR_ERR(trans);
         }
@@ -1428,11 +1406,11 @@ out_check:
                         }
                 }
  
-               extent_clear_unlock_delalloc(inode, &BTRFS_I(inode)->io_tree,
-                               cur_offset, cur_offset + num_bytes - 1,
-                               locked_page, EXTENT_CLEAR_UNLOCK_PAGE |
-                               EXTENT_CLEAR_UNLOCK | EXTENT_CLEAR_DELALLOC |
-                               EXTENT_SET_PRIVATE2);
+               extent_clear_unlock_delalloc(inode, cur_offset,
+                                            cur_offset + num_bytes - 1,
+                                            locked_page, EXTENT_LOCKED |
+                                            EXTENT_DELALLOC, PAGE_UNLOCK |
+                                            PAGE_SET_PRIVATE2);
                 cur_offset = extent_end;
                 if (cur_offset > end)
                         break;
@@ -1460,16 +1438,13 @@ error:
                 ret = err;
  
         if (ret && cur_offset < end)
-               extent_clear_unlock_delalloc(inode,
-                            &BTRFS_I(inode)->io_tree,
-                            cur_offset, end, locked_page,
-                            EXTENT_CLEAR_UNLOCK_PAGE |
-                            EXTENT_CLEAR_UNLOCK |
-                            EXTENT_CLEAR_DELALLOC |
-                            EXTENT_CLEAR_DIRTY |
-                            EXTENT_SET_WRITEBACK |
-                            EXTENT_END_WRITEBACK);
-
+               extent_clear_unlock_delalloc(inode, cur_offset, end,
+                                            locked_page, EXTENT_LOCKED |
+                                            EXTENT_DELALLOC | EXTENT_DEFRAG |
+                                            EXTENT_DO_ACCOUNTING, PAGE_UNLOCK |
+                                            PAGE_CLEAR_DIRTY |
+                                            PAGE_SET_WRITEBACK |
+                                            PAGE_END_WRITEBACK);
         btrfs_free_path(path);
         return ret;
  }
@@ -2132,6 +2107,7 @@ static noinline int record_one_backref(u64 inum, u64 offset, u64 root_id,
                 WARN_ON(1);
                 return ret;
         }
+       ret = 0;
  
         while (1) {
                 cond_resched();
@@ -2166,16 +2142,21 @@ static noinline int record_one_backref(u64 inum, u64 offset, u64 root_id,
                 if (btrfs_file_extent_disk_bytenr(leaf, extent) != old->bytenr)
                         continue;
  
-               extent_offset = btrfs_file_extent_offset(leaf, extent);
-               if (key.offset - extent_offset != offset)
+               /*
+                * 'offset' refers to the exact key.offset,
+                * NOT the 'offset' field in btrfs_extent_data_ref, ie.
+                * (key.offset - extent_offset).
+                */
+               if (key.offset != offset)
                         continue;
  
+               extent_offset = btrfs_file_extent_offset(leaf, extent);
                 num_bytes = btrfs_file_extent_num_bytes(leaf, extent);
+
                 if (extent_offset >= old->extent_offset + old->offset +
                     old->len || extent_offset + num_bytes <=
                     old->extent_offset + old->offset)
                         continue;
-
                 break;
         }
  
@@ -2187,7 +2168,7 @@ static noinline int record_one_backref(u64 inum, u64 offset, u64 root_id,
  
         backref->root_id = root_id;
         backref->inum = inum;
-       backref->file_pos = offset + extent_offset;
+       backref->file_pos = offset;
         backref->num_bytes = num_bytes;
         backref->extent_offset = extent_offset;
         backref->generation = btrfs_file_extent_generation(leaf, extent);
@@ -2210,7 +2191,8 @@ static noinline bool record_extent_backrefs(struct btrfs_path *path,
         new->path = path;
  
         list_for_each_entry_safe(old, tmp, &new->head, list) {
-               ret = iterate_inodes_from_logical(old->bytenr, fs_info,
+               ret = iterate_inodes_from_logical(old->bytenr +
+                                                 old->extent_offset, fs_info,
                                                   path, record_one_backref,
                                                   old);
                 BUG_ON(ret < 0 && ret != -ENOENT);
@@ -2230,16 +2212,18 @@ static noinline bool record_extent_backrefs(struct btrfs_path *path,
  
  static int relink_is_mergable(struct extent_buffer *leaf,
                               struct btrfs_file_extent_item *fi,
-                             u64 disk_bytenr)
+                             struct new_sa_defrag_extent *new)
  {
-       if (btrfs_file_extent_disk_bytenr(leaf, fi) != disk_bytenr)
+       if (btrfs_file_extent_disk_bytenr(leaf, fi) != new->bytenr)
                 return 0;
  
         if (btrfs_file_extent_type(leaf, fi) != BTRFS_FILE_EXTENT_REG)
                 return 0;
  
-       if (btrfs_file_extent_compression(leaf, fi) ||
-           btrfs_file_extent_encryption(leaf, fi) ||
+       if (btrfs_file_extent_compression(leaf, fi) != new->compress_type)
+               return 0;
+
+       if (btrfs_file_extent_encryption(leaf, fi) ||
             btrfs_file_extent_other_encoding(leaf, fi))
                 return 0;
  
@@ -2383,8 +2367,8 @@ again:
                                     struct btrfs_file_extent_item);
                 extent_len = btrfs_file_extent_num_bytes(leaf, fi);
  
-               if (relink_is_mergable(leaf, fi, new->bytenr) &&
-                   extent_len + found_key.offset == start) {
+               if (extent_len + found_key.offset == start &&
+                   relink_is_mergable(leaf, fi, new)) {
                         btrfs_set_file_extent_num_bytes(leaf, fi,
                                                         extent_len + len);
                         btrfs_mark_buffer_dirty(leaf);
@@ -2819,16 +2803,16 @@ static int btrfs_writepage_end_io_hook(struct page *page, u64 start, u64 end,
   * if there's a match, we allow the bio to finish.  If not, the code in
   * extent_io.c will try to find good copies for us.
   */
-static int btrfs_readpage_end_io_hook(struct page *page, u64 start, u64 end,
-                              struct extent_state *state, int mirror)
+static int btrfs_readpage_end_io_hook(struct btrfs_io_bio *io_bio,
+                                     u64 phy_offset, struct page *page,
+                                     u64 start, u64 end, int mirror)
  {
         size_t offset = start - page_offset(page);
         struct inode *inode = page->mapping->host;
         struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
         char *kaddr;
-       u64 private = ~(u32)0;
-       int ret;
         struct btrfs_root *root = BTRFS_I(inode)->root;
+       u32 csum_expected;
         u32 csum = ~(u32)0;
         static DEFINE_RATELIMIT_STATE(_rs, DEFAULT_RATELIMIT_INTERVAL,
                                       DEFAULT_RATELIMIT_BURST);
@@ -2848,19 +2832,13 @@ static int btrfs_readpage_end_io_hook(struct page *page, u64 start, u64 end,
                 return 0;
         }
  
-       if (state && state->start == start) {
-               private = state->private;
-               ret = 0;
-       } else {
-               ret = get_state_private(io_tree, start, &private);
-       }
-       kaddr = kmap_atomic(page);
-       if (ret)
-               goto zeroit;
+       phy_offset >>= inode->i_sb->s_blocksize_bits;
+       csum_expected = *(((u32 *)io_bio->csum) + phy_offset);
  
+       kaddr = kmap_atomic(page);
         csum = btrfs_csum_data(kaddr + offset, csum,  end - start + 1);
         btrfs_csum_final(csum, (char *)&csum);
-       if (csum != private)
+       if (csum != csum_expected)
                 goto zeroit;
  
         kunmap_atomic(kaddr);
@@ -2869,14 +2847,13 @@ good:
  
  zeroit:
         if (__ratelimit(&_rs))
-               btrfs_info(root->fs_info, "csum failed ino %llu off %llu csum %u private %llu",
+               btrfs_info(root->fs_info, "csum failed ino %llu off %llu csum %u expected csum %u",
                         (unsigned long long)btrfs_ino(page->mapping->host),
-                       (unsigned long long)start, csum,
-                       (unsigned long long)private);
+                       (unsigned long long)start, csum, csum_expected);
         memset(kaddr + offset, 1, end - start + 1);
         flush_dcache_page(page);
         kunmap_atomic(kaddr);
-       if (private == 0)
+       if (csum_expected == 0)
                 return 0;
         return -EIO;
  }
@@ -4391,9 +4368,6 @@ static int btrfs_setsize(struct inode *inode, struct iattr *attr)
         int mask = attr->ia_valid;
         int ret;
  
-       if (newsize == oldsize)
-               return 0;
-
         /*
          * The regular truncate() case without ATTR_CTIME and ATTR_MTIME is a
          * special case where we need to update the times despite not having
@@ -5165,14 +5139,31 @@ next:
         }
  
         /* Reached end of directory/root. Bump pos past the last item. */
-       if (key_type == BTRFS_DIR_INDEX_KEY)
-               /*
-                * 32-bit glibc will use getdents64, but then strtol -
-                * so the last number we can serve is this.
-                */
-               ctx->pos = 0x7fffffff;
-       else
-               ctx->pos++;
+       ctx->pos++;
+
+       /*
+        * Stop new entries from being returned after we return the last
+        * entry.
+        *
+        * New directory entries are assigned a strictly increasing
+        * offset.  This means that new entries created during readdir
+        * are *guaranteed* to be seen in the future by that readdir.
+        * This has broken buggy programs which operate on names as
+        * they're returned by readdir.  Until we re-use freed offsets
+        * we have this hack to stop new entries from being returned
+        * under the assumption that they'll never reach this huge
+        * offset.
+        *
+        * This is being careful not to overflow 32bit loff_t unless the
+        * last entry requires it because doing so has broken 32bit apps
+        * in the past.
+        */
+       if (key_type == BTRFS_DIR_INDEX_KEY) {
+               if (ctx->pos >= INT_MAX)
+                       ctx->pos = LLONG_MAX;
+               else
+                       ctx->pos = INT_MAX;
+       }
  nopos:
         ret = 0;
  err:
@@ -6791,26 +6782,6 @@ unlock_err:
         return ret;
  }
  
-struct btrfs_dio_private {
-       struct inode *inode;
-       u64 logical_offset;
-       u64 disk_bytenr;
-       u64 bytes;
-       void *private;
-
-       /* number of bios pending for this dio */
-       atomic_t pending_bios;
-
-       /* IO errors */
-       int errors;
-
-       /* orig_bio is our btrfs_io_bio */
-       struct bio *orig_bio;
-
-       /* dio_bio came from fs/direct-io.c */
-       struct bio *dio_bio;
-};
-
  static void btrfs_endio_direct_read(struct bio *bio, int err)
  {
         struct btrfs_dio_private *dip = bio->bi_private;
@@ -6819,6 +6790,8 @@ static void btrfs_endio_direct_read(struct bio *bio, int err)
         struct inode *inode = dip->inode;
         struct btrfs_root *root = BTRFS_I(inode)->root;
         struct bio *dio_bio;
+       u32 *csums = (u32 *)dip->csum;
+       int index = 0;
         u64 start;
  
         start = dip->logical_offset;
@@ -6827,12 +6800,8 @@ static void btrfs_endio_direct_read(struct bio *bio, int err)
                         struct page *page = bvec->bv_page;
                         char *kaddr;
                         u32 csum = ~(u32)0;
-                       u64 private = ~(u32)0;
                         unsigned long flags;
  
-                       if (get_state_private(&BTRFS_I(inode)->io_tree,
-                                             start, &private))
-                               goto failed;
                         local_irq_save(flags);
                         kaddr = kmap_atomic(page);
                         csum = btrfs_csum_data(kaddr + bvec->bv_offset,
@@ -6842,18 +6811,18 @@ static void btrfs_endio_direct_read(struct bio *bio, int err)
                         local_irq_restore(flags);
  
                         flush_dcache_page(bvec->bv_page);
-                       if (csum != private) {
-failed:
-                               btrfs_err(root->fs_info, "csum failed ino %llu off %llu csum %u private %u",
-                                       (unsigned long long)btrfs_ino(inode),
-                                       (unsigned long long)start,
-                                       csum, (unsigned)private);
+                       if (csum != csums[index]) {
+                               btrfs_err(root->fs_info, "csum failed ino %llu off %llu csum %u expected csum %u",
+                                         (unsigned long long)btrfs_ino(inode),
+                                         (unsigned long long)start,
+                                         csum, csums[index]);
                                 err = -EIO;
                         }
                 }
  
                 start += bvec->bv_len;
                 bvec++;
+               index++;
         } while (bvec <= bvec_end);
  
         unlock_extent(&BTRFS_I(inode)->io_tree, dip->logical_offset,
@@ -6970,6 +6939,7 @@ static inline int __btrfs_submit_dio_bio(struct bio *bio, struct inode *inode,
                                          int rw, u64 file_offset, int skip_sum,
                                          int async_submit)
  {
+       struct btrfs_dio_private *dip = bio->bi_private;
         int write = rw & REQ_WRITE;
         struct btrfs_root *root = BTRFS_I(inode)->root;
         int ret;
@@ -7004,7 +6974,8 @@ static inline int __btrfs_submit_dio_bio(struct bio *bio, struct inode *inode,
                 if (ret)
                         goto err;
         } else if (!skip_sum) {
-               ret = btrfs_lookup_bio_sums_dio(root, inode, bio, file_offset);
+               ret = btrfs_lookup_bio_sums_dio(root, inode, dip, bio,
+                                               file_offset);
                 if (ret)
                         goto err;
         }
@@ -7039,6 +7010,7 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip,
                 bio_put(orig_bio);
                 return -EIO;
         }
+
         if (map_length >= orig_bio->bi_size) {
                 bio = orig_bio;
                 goto submit;
@@ -7134,19 +7106,28 @@ static void btrfs_submit_direct(int rw, struct bio *dio_bio,
         struct btrfs_dio_private *dip;
         struct bio *io_bio;
         int skip_sum;
+       int sum_len;
         int write = rw & REQ_WRITE;
         int ret = 0;
+       u16 csum_size;
  
         skip_sum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM;
  
         io_bio = btrfs_bio_clone(dio_bio, GFP_NOFS);
-
         if (!io_bio) {
                 ret = -ENOMEM;
                 goto free_ordered;
         }
  
-       dip = kmalloc(sizeof(*dip), GFP_NOFS);
+       if (!skip_sum && !write) {
+               csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
+               sum_len = dio_bio->bi_size >> inode->i_sb->s_blocksize_bits;
+               sum_len *= csum_size;
+       } else {
+               sum_len = 0;
+       }
+
+       dip = kmalloc(sizeof(*dip) + sum_len, GFP_NOFS);
         if (!dip) {
                 ret = -ENOMEM;
                 goto free_io_bio;