]> Pileus Git - ~andy/linux/blobdiff - fs/btrfs/inode.c
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux...
[~andy/linux] / fs / btrfs / inode.c
index d546d8c3038baa4451aa2f338a0c24592a3ea48f..5c4ab9c18940cc7827a75df6e02a84370cae3edd 100644 (file)
 #include "inode-map.h"
 #include "backref.h"
 #include "hash.h"
+#include "props.h"
 
 struct btrfs_iget_args {
-       u64 ino;
+       struct btrfs_key *location;
        struct btrfs_root *root;
 };
 
@@ -125,13 +126,12 @@ static int btrfs_init_inode_security(struct btrfs_trans_handle *trans,
  * no overlapping inline items exist in the btree
  */
 static noinline int insert_inline_extent(struct btrfs_trans_handle *trans,
+                               struct btrfs_path *path, int extent_inserted,
                                struct btrfs_root *root, struct inode *inode,
                                u64 start, size_t size, size_t compressed_size,
                                int compress_type,
                                struct page **compressed_pages)
 {
-       struct btrfs_key key;
-       struct btrfs_path *path;
        struct extent_buffer *leaf;
        struct page *page = NULL;
        char *kaddr;
@@ -140,29 +140,29 @@ static noinline int insert_inline_extent(struct btrfs_trans_handle *trans,
        int err = 0;
        int ret;
        size_t cur_size = size;
-       size_t datasize;
        unsigned long offset;
 
        if (compressed_size && compressed_pages)
                cur_size = compressed_size;
 
-       path = btrfs_alloc_path();
-       if (!path)
-               return -ENOMEM;
+       inode_add_bytes(inode, size);
 
-       path->leave_spinning = 1;
+       if (!extent_inserted) {
+               struct btrfs_key key;
+               size_t datasize;
 
-       key.objectid = btrfs_ino(inode);
-       key.offset = start;
-       btrfs_set_key_type(&key, BTRFS_EXTENT_DATA_KEY);
-       datasize = btrfs_file_extent_calc_inline_size(cur_size);
+               key.objectid = btrfs_ino(inode);
+               key.offset = start;
+               btrfs_set_key_type(&key, BTRFS_EXTENT_DATA_KEY);
 
-       inode_add_bytes(inode, size);
-       ret = btrfs_insert_empty_item(trans, root, path, &key,
-                                     datasize);
-       if (ret) {
-               err = ret;
-               goto fail;
+               datasize = btrfs_file_extent_calc_inline_size(cur_size);
+               path->leave_spinning = 1;
+               ret = btrfs_insert_empty_item(trans, root, path, &key,
+                                             datasize);
+               if (ret) {
+                       err = ret;
+                       goto fail;
+               }
        }
        leaf = path->nodes[0];
        ei = btrfs_item_ptr(leaf, path->slots[0],
@@ -203,7 +203,7 @@ static noinline int insert_inline_extent(struct btrfs_trans_handle *trans,
                page_cache_release(page);
        }
        btrfs_mark_buffer_dirty(leaf);
-       btrfs_free_path(path);
+       btrfs_release_path(path);
 
        /*
         * we're an inline extent, so nobody can
@@ -219,7 +219,6 @@ static noinline int insert_inline_extent(struct btrfs_trans_handle *trans,
 
        return ret;
 fail:
-       btrfs_free_path(path);
        return err;
 }
 
@@ -242,6 +241,9 @@ static noinline int cow_file_range_inline(struct btrfs_root *root,
        u64 aligned_end = ALIGN(end, root->sectorsize);
        u64 data_len = inline_len;
        int ret;
+       struct btrfs_path *path;
+       int extent_inserted = 0;
+       u32 extent_item_size;
 
        if (compressed_size)
                data_len = compressed_size;
@@ -256,12 +258,27 @@ static noinline int cow_file_range_inline(struct btrfs_root *root,
                return 1;
        }
 
+       path = btrfs_alloc_path();
+       if (!path)
+               return -ENOMEM;
+
        trans = btrfs_join_transaction(root);
-       if (IS_ERR(trans))
+       if (IS_ERR(trans)) {
+               btrfs_free_path(path);
                return PTR_ERR(trans);
+       }
        trans->block_rsv = &root->fs_info->delalloc_block_rsv;
 
-       ret = btrfs_drop_extents(trans, root, inode, start, aligned_end, 1);
+       if (compressed_size && compressed_pages)
+               extent_item_size = btrfs_file_extent_calc_inline_size(
+                  compressed_size);
+       else
+               extent_item_size = btrfs_file_extent_calc_inline_size(
+                   inline_len);
+
+       ret = __btrfs_drop_extents(trans, root, inode, path,
+                                  start, aligned_end, NULL,
+                                  1, 1, extent_item_size, &extent_inserted);
        if (ret) {
                btrfs_abort_transaction(trans, root, ret);
                goto out;
@@ -269,7 +286,8 @@ static noinline int cow_file_range_inline(struct btrfs_root *root,
 
        if (isize > actual_end)
                inline_len = min_t(u64, isize, actual_end);
-       ret = insert_inline_extent(trans, root, inode, start,
+       ret = insert_inline_extent(trans, path, extent_inserted,
+                                  root, inode, start,
                                   inline_len, compressed_size,
                                   compress_type, compressed_pages);
        if (ret && ret != -ENOSPC) {
@@ -284,6 +302,7 @@ static noinline int cow_file_range_inline(struct btrfs_root *root,
        btrfs_delalloc_release_metadata(inode, end + 1 - start);
        btrfs_drop_extent_cache(inode, start, aligned_end - 1, 0);
 out:
+       btrfs_free_path(path);
        btrfs_end_transaction(trans, root);
        return ret;
 }
@@ -1262,7 +1281,8 @@ next_slot:
                        nocow = 1;
                } else if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
                        extent_end = found_key.offset +
-                               btrfs_file_extent_inline_len(leaf, fi);
+                               btrfs_file_extent_inline_len(leaf,
+                                                    path->slots[0], fi);
                        extent_end = ALIGN(extent_end, root->sectorsize);
                } else {
                        BUG_ON(1);
@@ -1841,14 +1861,13 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans,
        struct btrfs_path *path;
        struct extent_buffer *leaf;
        struct btrfs_key ins;
+       int extent_inserted = 0;
        int ret;
 
        path = btrfs_alloc_path();
        if (!path)
                return -ENOMEM;
 
-       path->leave_spinning = 1;
-
        /*
         * we may be replacing one extent in the tree with another.
         * The new extent is pinned in the extent map, and we don't want
@@ -1858,17 +1877,23 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans,
         * the caller is expected to unpin it and allow it to be merged
         * with the others.
         */
-       ret = btrfs_drop_extents(trans, root, inode, file_pos,
-                                file_pos + num_bytes, 0);
+       ret = __btrfs_drop_extents(trans, root, inode, path, file_pos,
+                                  file_pos + num_bytes, NULL, 0,
+                                  1, sizeof(*fi), &extent_inserted);
        if (ret)
                goto out;
 
-       ins.objectid = btrfs_ino(inode);
-       ins.offset = file_pos;
-       ins.type = BTRFS_EXTENT_DATA_KEY;
-       ret = btrfs_insert_empty_item(trans, root, path, &ins, sizeof(*fi));
-       if (ret)
-               goto out;
+       if (!extent_inserted) {
+               ins.objectid = btrfs_ino(inode);
+               ins.offset = file_pos;
+               ins.type = BTRFS_EXTENT_DATA_KEY;
+
+               path->leave_spinning = 1;
+               ret = btrfs_insert_empty_item(trans, root, path, &ins,
+                                             sizeof(*fi));
+               if (ret)
+                       goto out;
+       }
        leaf = path->nodes[0];
        fi = btrfs_item_ptr(leaf, path->slots[0],
                            struct btrfs_file_extent_item);
@@ -2290,7 +2315,7 @@ again:
                u64 extent_len;
                struct btrfs_key found_key;
 
-               ret = btrfs_search_slot(trans, root, &key, path, 1, 1);
+               ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
                if (ret < 0)
                        goto out_free_path;
 
@@ -2543,12 +2568,6 @@ out_kfree:
        return NULL;
 }
 
-/*
- * helper function for btrfs_finish_ordered_io, this
- * just reads in some of the csum leaves to prime them into ram
- * before we start the transaction.  It limits the amount of btree
- * reads required while inside the transaction.
- */
 /* as ordered data IO finishes, this gets called so we can finish
  * an ordered extent if the range of bytes in the file it covers are
  * fully written.
@@ -3248,7 +3267,8 @@ out:
  * slot is the slot the inode is in, objectid is the objectid of the inode
  */
 static noinline int acls_after_inode_item(struct extent_buffer *leaf,
-                                         int slot, u64 objectid)
+                                         int slot, u64 objectid,
+                                         int *first_xattr_slot)
 {
        u32 nritems = btrfs_header_nritems(leaf);
        struct btrfs_key found_key;
@@ -3264,6 +3284,7 @@ static noinline int acls_after_inode_item(struct extent_buffer *leaf,
        }
 
        slot++;
+       *first_xattr_slot = -1;
        while (slot < nritems) {
                btrfs_item_key_to_cpu(leaf, &found_key, slot);
 
@@ -3273,6 +3294,8 @@ static noinline int acls_after_inode_item(struct extent_buffer *leaf,
 
                /* we found an xattr, assume we've got an acl */
                if (found_key.type == BTRFS_XATTR_ITEM_KEY) {
+                       if (*first_xattr_slot == -1)
+                               *first_xattr_slot = slot;
                        if (found_key.offset == xattr_access ||
                            found_key.offset == xattr_default)
                                return 1;
@@ -3301,6 +3324,8 @@ static noinline int acls_after_inode_item(struct extent_buffer *leaf,
         * something larger than an xattr.  We have to assume the inode
         * has acls
         */
+       if (*first_xattr_slot == -1)
+               *first_xattr_slot = slot;
        return 1;
 }
 
@@ -3315,10 +3340,12 @@ static void btrfs_read_locked_inode(struct inode *inode)
        struct btrfs_timespec *tspec;
        struct btrfs_root *root = BTRFS_I(inode)->root;
        struct btrfs_key location;
+       unsigned long ptr;
        int maybe_acls;
        u32 rdev;
        int ret;
        bool filled = false;
+       int first_xattr_slot;
 
        ret = btrfs_fill_inode(inode, &rdev);
        if (!ret)
@@ -3328,7 +3355,6 @@ static void btrfs_read_locked_inode(struct inode *inode)
        if (!path)
                goto make_bad;
 
-       path->leave_spinning = 1;
        memcpy(&location, &BTRFS_I(inode)->location, sizeof(location));
 
        ret = btrfs_lookup_inode(NULL, root, path, &location, 0);
@@ -3338,7 +3364,7 @@ static void btrfs_read_locked_inode(struct inode *inode)
        leaf = path->nodes[0];
 
        if (filled)
-               goto cache_acl;
+               goto cache_index;
 
        inode_item = btrfs_item_ptr(leaf, path->slots[0],
                                    struct btrfs_inode_item);
@@ -3381,18 +3407,51 @@ static void btrfs_read_locked_inode(struct inode *inode)
 
        BTRFS_I(inode)->index_cnt = (u64)-1;
        BTRFS_I(inode)->flags = btrfs_inode_flags(leaf, inode_item);
+
+cache_index:
+       path->slots[0]++;
+       if (inode->i_nlink != 1 ||
+           path->slots[0] >= btrfs_header_nritems(leaf))
+               goto cache_acl;
+
+       btrfs_item_key_to_cpu(leaf, &location, path->slots[0]);
+       if (location.objectid != btrfs_ino(inode))
+               goto cache_acl;
+
+       ptr = btrfs_item_ptr_offset(leaf, path->slots[0]);
+       if (location.type == BTRFS_INODE_REF_KEY) {
+               struct btrfs_inode_ref *ref;
+
+               ref = (struct btrfs_inode_ref *)ptr;
+               BTRFS_I(inode)->dir_index = btrfs_inode_ref_index(leaf, ref);
+       } else if (location.type == BTRFS_INODE_EXTREF_KEY) {
+               struct btrfs_inode_extref *extref;
+
+               extref = (struct btrfs_inode_extref *)ptr;
+               BTRFS_I(inode)->dir_index = btrfs_inode_extref_index(leaf,
+                                                                    extref);
+       }
 cache_acl:
        /*
         * try to precache a NULL acl entry for files that don't have
         * any xattrs or acls
         */
        maybe_acls = acls_after_inode_item(leaf, path->slots[0],
-                                          btrfs_ino(inode));
+                                          btrfs_ino(inode), &first_xattr_slot);
+       if (first_xattr_slot != -1) {
+               path->slots[0] = first_xattr_slot;
+               ret = btrfs_load_inode_props(inode, path);
+               if (ret)
+                       btrfs_err(root->fs_info,
+                                 "error loading props for ino %llu (root %llu): %d\n",
+                                 btrfs_ino(inode),
+                                 root->root_key.objectid, ret);
+       }
+       btrfs_free_path(path);
+
        if (!maybe_acls)
                cache_no_acl(inode);
 
-       btrfs_free_path(path);
-
        switch (inode->i_mode & S_IFMT) {
        case S_IFREG:
                inode->i_mapping->a_ops = &btrfs_aops;
@@ -3496,7 +3555,6 @@ static noinline int btrfs_update_inode_item(struct btrfs_trans_handle *trans,
                goto failed;
        }
 
-       btrfs_unlock_up_safe(path, 1);
        leaf = path->nodes[0];
        inode_item = btrfs_item_ptr(leaf, path->slots[0],
                                    struct btrfs_inode_item);
@@ -3593,6 +3651,24 @@ static int __btrfs_unlink_inode(struct btrfs_trans_handle *trans,
                goto err;
        btrfs_release_path(path);
 
+       /*
+        * If we don't have dir index, we have to get it by looking up
+        * the inode ref, since we get the inode ref, remove it directly,
+        * it is unnecessary to do delayed deletion.
+        *
+        * But if we have dir index, needn't search inode ref to get it.
+        * Since the inode ref is close to the inode item, it is better
+        * that we delay to delete it, and just do this deletion when
+        * we update the inode item.
+        */
+       if (BTRFS_I(inode)->dir_index) {
+               ret = btrfs_delayed_delete_inode_ref(inode);
+               if (!ret) {
+                       index = BTRFS_I(inode)->dir_index;
+                       goto skip_backref;
+               }
+       }
+
        ret = btrfs_del_inode_ref(trans, root, name, name_len, ino,
                                  dir_ino, &index);
        if (ret) {
@@ -3602,7 +3678,7 @@ static int __btrfs_unlink_inode(struct btrfs_trans_handle *trans,
                btrfs_abort_transaction(trans, root, ret);
                goto err;
        }
-
+skip_backref:
        ret = btrfs_delete_delayed_dir_index(trans, root, dir, index);
        if (ret) {
                btrfs_abort_transaction(trans, root, ret);
@@ -3948,7 +4024,7 @@ search_again:
                                    btrfs_file_extent_num_bytes(leaf, fi);
                        } else if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
                                item_end += btrfs_file_extent_inline_len(leaf,
-                                                                        fi);
+                                                        path->slots[0], fi);
                        }
                        item_end--;
                }
@@ -4018,6 +4094,12 @@ search_again:
                                        inode_sub_bytes(inode, item_end + 1 -
                                                        new_size);
                                }
+
+                               /*
+                                * update the ram bytes to properly reflect
+                                * the new size of our item
+                                */
+                               btrfs_set_file_extent_ram_bytes(leaf, fi, size);
                                size =
                                    btrfs_file_extent_calc_inline_size(size);
                                btrfs_truncate_item(root, path, size, 1);
@@ -4203,6 +4285,49 @@ out:
        return ret;
 }
 
+static int maybe_insert_hole(struct btrfs_root *root, struct inode *inode,
+                            u64 offset, u64 len)
+{
+       struct btrfs_trans_handle *trans;
+       int ret;
+
+       /*
+        * Still need to make sure the inode looks like it's been updated so
+        * that any holes get logged if we fsync.
+        */
+       if (btrfs_fs_incompat(root->fs_info, NO_HOLES)) {
+               BTRFS_I(inode)->last_trans = root->fs_info->generation;
+               BTRFS_I(inode)->last_sub_trans = root->log_transid;
+               BTRFS_I(inode)->last_log_commit = root->last_log_commit;
+               return 0;
+       }
+
+       /*
+        * 1 - for the one we're dropping
+        * 1 - for the one we're adding
+        * 1 - for updating the inode.
+        */
+       trans = btrfs_start_transaction(root, 3);
+       if (IS_ERR(trans))
+               return PTR_ERR(trans);
+
+       ret = btrfs_drop_extents(trans, root, inode, offset, offset + len, 1);
+       if (ret) {
+               btrfs_abort_transaction(trans, root, ret);
+               btrfs_end_transaction(trans, root);
+               return ret;
+       }
+
+       ret = btrfs_insert_file_extent(trans, root, btrfs_ino(inode), offset,
+                                      0, 0, len, 0, len, 0, 0, 0);
+       if (ret)
+               btrfs_abort_transaction(trans, root, ret);
+       else
+               btrfs_update_inode(trans, root, inode);
+       btrfs_end_transaction(trans, root);
+       return ret;
+}
+
 /*
  * This function puts in dummy file extents for the area we're creating a hole
  * for.  So if we are truncating this file to a larger size we need to insert
@@ -4211,7 +4336,6 @@ out:
  */
 int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size)
 {
-       struct btrfs_trans_handle *trans;
        struct btrfs_root *root = BTRFS_I(inode)->root;
        struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
        struct extent_map *em = NULL;
@@ -4266,31 +4390,10 @@ int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size)
                        struct extent_map *hole_em;
                        hole_size = last_byte - cur_offset;
 
-                       trans = btrfs_start_transaction(root, 3);
-                       if (IS_ERR(trans)) {
-                               err = PTR_ERR(trans);
-                               break;
-                       }
-
-                       err = btrfs_drop_extents(trans, root, inode,
-                                                cur_offset,
-                                                cur_offset + hole_size, 1);
-                       if (err) {
-                               btrfs_abort_transaction(trans, root, err);
-                               btrfs_end_transaction(trans, root);
-                               break;
-                       }
-
-                       err = btrfs_insert_file_extent(trans, root,
-                                       btrfs_ino(inode), cur_offset, 0,
-                                       0, hole_size, 0, hole_size,
-                                       0, 0, 0);
-                       if (err) {
-                               btrfs_abort_transaction(trans, root, err);
-                               btrfs_end_transaction(trans, root);
+                       err = maybe_insert_hole(root, inode, cur_offset,
+                                               hole_size);
+                       if (err)
                                break;
-                       }
-
                        btrfs_drop_extent_cache(inode, cur_offset,
                                                cur_offset + hole_size - 1, 0);
                        hole_em = alloc_extent_map();
@@ -4309,7 +4412,7 @@ int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size)
                        hole_em->ram_bytes = hole_size;
                        hole_em->bdev = root->fs_info->fs_devices->latest_bdev;
                        hole_em->compress_type = BTRFS_COMPRESS_NONE;
-                       hole_em->generation = trans->transid;
+                       hole_em->generation = root->fs_info->generation;
 
                        while (1) {
                                write_lock(&em_tree->lock);
@@ -4322,17 +4425,14 @@ int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size)
                                                        hole_size - 1, 0);
                        }
                        free_extent_map(hole_em);
-next:
-                       btrfs_update_inode(trans, root, inode);
-                       btrfs_end_transaction(trans, root);
                }
+next:
                free_extent_map(em);
                em = NULL;
                cur_offset = last_byte;
                if (cur_offset >= block_end)
                        break;
        }
-
        free_extent_map(em);
        unlock_extent_cached(io_tree, hole_start, block_end - 1, &cached_state,
                             GFP_NOFS);
@@ -4474,6 +4574,64 @@ static int btrfs_setattr(struct dentry *dentry, struct iattr *attr)
        return err;
 }
 
+/*
+ * While truncating the inode pages during eviction, we get the VFS calling
+ * btrfs_invalidatepage() against each page of the inode. This is slow because
+ * the calls to btrfs_invalidatepage() result in a huge amount of calls to
+ * lock_extent_bits() and clear_extent_bit(), which keep merging and splitting
+ * extent_state structures over and over, wasting lots of time.
+ *
+ * Therefore if the inode is being evicted, let btrfs_invalidatepage() skip all
+ * those expensive operations on a per page basis and do only the ordered io
+ * finishing, while we release here the extent_map and extent_state structures,
+ * without the excessive merging and splitting.
+ */
+static void evict_inode_truncate_pages(struct inode *inode)
+{
+       struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
+       struct extent_map_tree *map_tree = &BTRFS_I(inode)->extent_tree;
+       struct rb_node *node;
+
+       ASSERT(inode->i_state & I_FREEING);
+       truncate_inode_pages(&inode->i_data, 0);
+
+       write_lock(&map_tree->lock);
+       while (!RB_EMPTY_ROOT(&map_tree->map)) {
+               struct extent_map *em;
+
+               node = rb_first(&map_tree->map);
+               em = rb_entry(node, struct extent_map, rb_node);
+               clear_bit(EXTENT_FLAG_PINNED, &em->flags);
+               clear_bit(EXTENT_FLAG_LOGGING, &em->flags);
+               remove_extent_mapping(map_tree, em);
+               free_extent_map(em);
+       }
+       write_unlock(&map_tree->lock);
+
+       spin_lock(&io_tree->lock);
+       while (!RB_EMPTY_ROOT(&io_tree->state)) {
+               struct extent_state *state;
+               struct extent_state *cached_state = NULL;
+
+               node = rb_first(&io_tree->state);
+               state = rb_entry(node, struct extent_state, rb_node);
+               atomic_inc(&state->refs);
+               spin_unlock(&io_tree->lock);
+
+               lock_extent_bits(io_tree, state->start, state->end,
+                                0, &cached_state);
+               clear_extent_bit(io_tree, state->start, state->end,
+                                EXTENT_LOCKED | EXTENT_DIRTY |
+                                EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING |
+                                EXTENT_DEFRAG, 1, 1,
+                                &cached_state, GFP_NOFS);
+               free_extent_state(state);
+
+               spin_lock(&io_tree->lock);
+       }
+       spin_unlock(&io_tree->lock);
+}
+
 void btrfs_evict_inode(struct inode *inode)
 {
        struct btrfs_trans_handle *trans;
@@ -4484,7 +4642,8 @@ void btrfs_evict_inode(struct inode *inode)
 
        trace_btrfs_inode_evict(inode);
 
-       truncate_inode_pages(&inode->i_data, 0);
+       evict_inode_truncate_pages(inode);
+
        if (inode->i_nlink &&
            ((btrfs_root_refs(&root->root_item) != 0 &&
              root->root_key.objectid != BTRFS_ROOT_TREE_OBJECTID) ||
@@ -4659,9 +4818,9 @@ static int fixup_tree_root_location(struct btrfs_root *root,
        }
 
        err = -ENOENT;
-       ret = btrfs_find_root_ref(root->fs_info->tree_root, path,
-                                 BTRFS_I(dir)->root->root_key.objectid,
-                                 location->objectid);
+       ret = btrfs_find_item(root->fs_info->tree_root, path,
+                               BTRFS_I(dir)->root->root_key.objectid,
+                               location->objectid, BTRFS_ROOT_REF_KEY, NULL);
        if (ret) {
                if (ret < 0)
                        err = ret;
@@ -4822,7 +4981,9 @@ again:
 static int btrfs_init_locked_inode(struct inode *inode, void *p)
 {
        struct btrfs_iget_args *args = p;
-       inode->i_ino = args->ino;
+       inode->i_ino = args->location->objectid;
+       memcpy(&BTRFS_I(inode)->location, args->location,
+              sizeof(*args->location));
        BTRFS_I(inode)->root = args->root;
        return 0;
 }
@@ -4830,19 +4991,19 @@ static int btrfs_init_locked_inode(struct inode *inode, void *p)
 static int btrfs_find_actor(struct inode *inode, void *opaque)
 {
        struct btrfs_iget_args *args = opaque;
-       return args->ino == btrfs_ino(inode) &&
+       return args->location->objectid == BTRFS_I(inode)->location.objectid &&
                args->root == BTRFS_I(inode)->root;
 }
 
 static struct inode *btrfs_iget_locked(struct super_block *s,
-                                      u64 objectid,
+                                      struct btrfs_key *location,
                                       struct btrfs_root *root)
 {
        struct inode *inode;
        struct btrfs_iget_args args;
-       unsigned long hashval = btrfs_inode_hash(objectid, root);
+       unsigned long hashval = btrfs_inode_hash(location->objectid, root);
 
-       args.ino = objectid;
+       args.location = location;
        args.root = root;
 
        inode = iget5_locked(s, hashval, btrfs_find_actor,
@@ -4859,13 +5020,11 @@ struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location,
 {
        struct inode *inode;
 
-       inode = btrfs_iget_locked(s, location->objectid, root);
+       inode = btrfs_iget_locked(s, location, root);
        if (!inode)
                return ERR_PTR(-ENOMEM);
 
        if (inode->i_state & I_NEW) {
-               BTRFS_I(inode)->root = root;
-               memcpy(&BTRFS_I(inode)->location, location, sizeof(*location));
                btrfs_read_locked_inode(inode);
                if (!is_bad_inode(inode)) {
                        inode_tree_add(inode);
@@ -4921,7 +5080,7 @@ struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry)
                return ERR_PTR(ret);
 
        if (location.objectid == 0)
-               return NULL;
+               return ERR_PTR(-ENOENT);
 
        if (location.type == BTRFS_INODE_ITEM_KEY) {
                inode = btrfs_iget(dir->i_sb, &location, root, NULL);
@@ -4985,10 +5144,17 @@ static void btrfs_dentry_release(struct dentry *dentry)
 static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry,
                                   unsigned int flags)
 {
-       struct dentry *ret;
+       struct inode *inode;
 
-       ret = d_splice_alias(btrfs_lookup_dentry(dir, dentry), dentry);
-       return ret;
+       inode = btrfs_lookup_dentry(dir, dentry);
+       if (IS_ERR(inode)) {
+               if (PTR_ERR(inode) == -ENOENT)
+                       inode = NULL;
+               else
+                       return ERR_CAST(inode);
+       }
+
+       return d_splice_alias(inode, dentry);
 }
 
 unsigned char btrfs_filetype_table[] = {
@@ -5358,7 +5524,6 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
        u32 sizes[2];
        unsigned long ptr;
        int ret;
-       int owner;
 
        path = btrfs_alloc_path();
        if (!path)
@@ -5392,6 +5557,7 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
         * number
         */
        BTRFS_I(inode)->index_cnt = 2;
+       BTRFS_I(inode)->dir_index = *index;
        BTRFS_I(inode)->root = root;
        BTRFS_I(inode)->generation = trans->transid;
        inode->i_generation = BTRFS_I(inode)->generation;
@@ -5404,11 +5570,6 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
         */
        set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &BTRFS_I(inode)->runtime_flags);
 
-       if (S_ISDIR(mode))
-               owner = 0;
-       else
-               owner = 1;
-
        key[0].objectid = objectid;
        btrfs_set_key_type(&key[0], BTRFS_INODE_ITEM_KEY);
        key[0].offset = 0;
@@ -5473,6 +5634,12 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
 
        btrfs_update_root_times(trans, root);
 
+       ret = btrfs_inode_inherit_props(trans, inode, dir);
+       if (ret)
+               btrfs_err(root->fs_info,
+                         "error inheriting props for ino %llu (root %llu): %d",
+                         btrfs_ino(inode), root->root_key.objectid, ret);
+
        return inode;
 fail:
        if (dir)
@@ -5741,6 +5908,8 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
                goto fail;
        }
 
+       /* There are several dir indexes for this inode, clear the cache. */
+       BTRFS_I(inode)->dir_index = 0ULL;
        inc_nlink(inode);
        inode_inc_iversion(inode);
        inode->i_ctime = CURRENT_TIME;
@@ -6004,7 +6173,7 @@ again:
                       btrfs_file_extent_num_bytes(leaf, item);
        } else if (found_type == BTRFS_FILE_EXTENT_INLINE) {
                size_t size;
-               size = btrfs_file_extent_inline_len(leaf, item);
+               size = btrfs_file_extent_inline_len(leaf, path->slots[0], item);
                extent_end = ALIGN(extent_start + size, root->sectorsize);
        }
 next:
@@ -6073,7 +6242,7 @@ next:
                        goto out;
                }
 
-               size = btrfs_file_extent_inline_len(leaf, item);
+               size = btrfs_file_extent_inline_len(leaf, path->slots[0], item);
                extent_offset = page_offset(page) + pg_offset - extent_start;
                copy_size = min_t(u64, PAGE_CACHE_SIZE - pg_offset,
                                size - extent_offset);
@@ -6390,6 +6559,7 @@ noinline int can_nocow_extent(struct inode *inode, u64 offset, u64 *len,
        int slot;
        int found_type;
        bool nocow = (BTRFS_I(inode)->flags & BTRFS_INODE_NODATACOW);
+
        path = btrfs_alloc_path();
        if (!path)
                return -ENOMEM;
@@ -6433,6 +6603,10 @@ noinline int can_nocow_extent(struct inode *inode, u64 offset, u64 *len,
        if (!nocow && found_type == BTRFS_FILE_EXTENT_REG)
                goto out;
 
+       extent_end = key.offset + btrfs_file_extent_num_bytes(leaf, fi);
+       if (extent_end <= offset)
+               goto out;
+
        disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
        if (disk_bytenr == 0)
                goto out;
@@ -6450,8 +6624,6 @@ noinline int can_nocow_extent(struct inode *inode, u64 offset, u64 *len,
                *ram_bytes = btrfs_file_extent_ram_bytes(leaf, fi);
        }
 
-       extent_end = key.offset + btrfs_file_extent_num_bytes(leaf, fi);
-
        if (btrfs_extent_readonly(root, disk_bytenr))
                goto out;
        btrfs_release_path(path);
@@ -6895,8 +7067,8 @@ static void btrfs_end_dio_bio(struct bio *bio, int err)
        struct btrfs_dio_private *dip = bio->bi_private;
 
        if (err) {
-               printk(KERN_ERR "btrfs direct IO failed ino %llu rw %lu "
-                     "sector %#Lx len %u err no %d\n",
+               btrfs_err(BTRFS_I(dip->inode)->root->fs_info,
+                         "direct IO failed ino %llu rw %lu sector %#Lx len %u err no %d",
                      btrfs_ino(dip->inode), bio->bi_rw,
                      (unsigned long long)bio->bi_iter.bi_sector,
                      bio->bi_iter.bi_size, err);
@@ -7370,6 +7542,7 @@ static void btrfs_invalidatepage(struct page *page, unsigned int offset,
        struct extent_state *cached_state = NULL;
        u64 page_start = page_offset(page);
        u64 page_end = page_start + PAGE_CACHE_SIZE - 1;
+       int inode_evicting = inode->i_state & I_FREEING;
 
        /*
         * we have the page locked, so new writeback can't start,
@@ -7385,17 +7558,21 @@ static void btrfs_invalidatepage(struct page *page, unsigned int offset,
                btrfs_releasepage(page, GFP_NOFS);
                return;
        }
-       lock_extent_bits(tree, page_start, page_end, 0, &cached_state);
-       ordered = btrfs_lookup_ordered_extent(inode, page_offset(page));
+
+       if (!inode_evicting)
+               lock_extent_bits(tree, page_start, page_end, 0, &cached_state);
+       ordered = btrfs_lookup_ordered_extent(inode, page_start);
        if (ordered) {
                /*
                 * IO on this page will never be started, so we need
                 * to account for any ordered extents now
                 */
-               clear_extent_bit(tree, page_start, page_end,
-                                EXTENT_DIRTY | EXTENT_DELALLOC |
-                                EXTENT_LOCKED | EXTENT_DO_ACCOUNTING |
-                                EXTENT_DEFRAG, 1, 0, &cached_state, GFP_NOFS);
+               if (!inode_evicting)
+                       clear_extent_bit(tree, page_start, page_end,
+                                        EXTENT_DIRTY | EXTENT_DELALLOC |
+                                        EXTENT_LOCKED | EXTENT_DO_ACCOUNTING |
+                                        EXTENT_DEFRAG, 1, 0, &cached_state,
+                                        GFP_NOFS);
                /*
                 * whoever cleared the private bit is responsible
                 * for the finish_ordered_io
@@ -7419,14 +7596,22 @@ static void btrfs_invalidatepage(struct page *page, unsigned int offset,
                                btrfs_finish_ordered_io(ordered);
                }
                btrfs_put_ordered_extent(ordered);
-               cached_state = NULL;
-               lock_extent_bits(tree, page_start, page_end, 0, &cached_state);
+               if (!inode_evicting) {
+                       cached_state = NULL;
+                       lock_extent_bits(tree, page_start, page_end, 0,
+                                        &cached_state);
+               }
+       }
+
+       if (!inode_evicting) {
+               clear_extent_bit(tree, page_start, page_end,
+                                EXTENT_LOCKED | EXTENT_DIRTY |
+                                EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING |
+                                EXTENT_DEFRAG, 1, 1,
+                                &cached_state, GFP_NOFS);
+
+               __btrfs_releasepage(page, GFP_NOFS);
        }
-       clear_extent_bit(tree, page_start, page_end,
-                EXTENT_LOCKED | EXTENT_DIRTY | EXTENT_DELALLOC |
-                EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG, 1, 1,
-                &cached_state, GFP_NOFS);
-       __btrfs_releasepage(page, GFP_NOFS);
 
        ClearPageChecked(page);
        if (PagePrivate(page)) {
@@ -7736,7 +7921,9 @@ out:
  * create a new subvolume directory/inode (helper for the ioctl).
  */
 int btrfs_create_subvol_root(struct btrfs_trans_handle *trans,
-                            struct btrfs_root *new_root, u64 new_dirid)
+                            struct btrfs_root *new_root,
+                            struct btrfs_root *parent_root,
+                            u64 new_dirid)
 {
        struct inode *inode;
        int err;
@@ -7754,6 +7941,12 @@ int btrfs_create_subvol_root(struct btrfs_trans_handle *trans,
        set_nlink(inode, 1);
        btrfs_i_size_write(inode, 0);
 
+       err = btrfs_subvol_inherit_props(trans, new_root, parent_root);
+       if (err)
+               btrfs_err(new_root->fs_info,
+                         "error inheriting subvolume %llu properties: %d\n",
+                         new_root->root_key.objectid, err);
+
        err = btrfs_update_inode(trans, new_root, inode);
 
        iput(inode);
@@ -7779,6 +7972,7 @@ struct inode *btrfs_alloc_inode(struct super_block *sb)
        ei->flags = 0;
        ei->csum_bytes = 0;
        ei->index_cnt = (u64)-1;
+       ei->dir_index = 0;
        ei->last_unlink_trans = 0;
        ei->last_log_commit = 0;
 
@@ -8066,6 +8260,7 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
        if (ret)
                goto out_fail;
 
+       BTRFS_I(old_inode)->dir_index = 0ULL;
        if (unlikely(old_ino == BTRFS_FIRST_FREE_OBJECTID)) {
                /* force full log commit if subvolume involved. */
                root->fs_info->last_trans_log_full_commit = trans->transid;
@@ -8154,6 +8349,9 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
                goto out_fail;
        }
 
+       if (old_inode->i_nlink == 1)
+               BTRFS_I(old_inode)->dir_index = index;
+
        if (old_ino != BTRFS_FIRST_FREE_OBJECTID) {
                struct dentry *parent = new_dentry->d_parent;
                btrfs_log_new_name(trans, old_inode, old_dir, parent);
@@ -8289,7 +8487,7 @@ int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput)
 {
        int ret;
 
-       if (root->fs_info->sb->s_flags & MS_RDONLY)
+       if (test_bit(BTRFS_FS_STATE_ERROR, &root->fs_info->fs_state))
                return -EROFS;
 
        ret = __start_delalloc_inodes(root, delay_iput);
@@ -8315,7 +8513,7 @@ int btrfs_start_delalloc_roots(struct btrfs_fs_info *fs_info, int delay_iput)
        struct list_head splice;
        int ret;
 
-       if (fs_info->sb->s_flags & MS_RDONLY)
+       if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state))
                return -EROFS;
 
        INIT_LIST_HEAD(&splice);