]> Pileus Git - ~andy/linux/commitdiff
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux...
authorLinus Torvalds <torvalds@linux-foundation.org>
Fri, 31 Jan 2014 04:08:20 +0000 (20:08 -0800)
committerLinus Torvalds <torvalds@linux-foundation.org>
Fri, 31 Jan 2014 04:08:20 +0000 (20:08 -0800)
Pull btrfs updates from Chris Mason:
 "This is a pretty big pull, and most of these changes have been
  floating in btrfs-next for a long time.  Filipe's properties work is a
  cool building block for inheriting attributes like compression down on
  a per inode basis.

  Jeff Mahoney kicked in code to export filesystem info into sysfs.

  Otherwise, lots of performance improvements, cleanups and bug fixes.

  Looks like there are still a few other small pending incrementals, but
  I wanted to get the bulk of this in first"

* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs: (149 commits)
  Btrfs: fix spin_unlock in check_ref_cleanup
  Btrfs: setup inode location during btrfs_init_inode_locked
  Btrfs: don't use ram_bytes for uncompressed inline items
  Btrfs: fix btrfs_search_slot_for_read backwards iteration
  Btrfs: do not export ulist functions
  Btrfs: rework ulist with list+rb_tree
  Btrfs: fix memory leaks on walking backrefs failure
  Btrfs: fix send file hole detection leading to data corruption
  Btrfs: add a reschedule point in btrfs_find_all_roots()
  Btrfs: make send's file extent item search more efficient
  Btrfs: fix to catch all errors when resolving indirect ref
  Btrfs: fix protection between walking backrefs and root deletion
  btrfs: fix warning while merging two adjacent extents
  Btrfs: fix infinite path build loops in incremental send
  btrfs: undo sysfs when open_ctree() fails
  Btrfs: fix snprintf usage by send's gen_unique_name
  btrfs: fix defrag 32-bit integer overflow
  btrfs: sysfs: list the NO_HOLES feature
  btrfs: sysfs: don't show reserved incompat feature
  btrfs: call permission checks earlier in ioctls and return EPERM
  ...

12 files changed:
1  2 
fs/btrfs/check-integrity.c
fs/btrfs/compression.c
fs/btrfs/ctree.h
fs/btrfs/disk-io.c
fs/btrfs/extent_io.c
fs/btrfs/file-item.c
fs/btrfs/inode.c
fs/btrfs/ioctl.c
fs/btrfs/scrub.c
fs/btrfs/volumes.c
fs/btrfs/xattr.c
lib/kobject.c

index cb05e1c842c5b8b84dee98d1a3f452eaa179417e,160fb509d7209ca310b0b18f4068dbd2bb442dd5..49a62b4dda3b0184ccd30880b8449b967eda5f27
@@@ -1456,10 -1456,14 +1456,14 @@@ static int btrfsic_handle_extent_data
        btrfsic_read_from_block_data(block_ctx, &file_extent_item,
                                     file_extent_item_offset,
                                     sizeof(struct btrfs_file_extent_item));
-       next_bytenr = btrfs_stack_file_extent_disk_bytenr(&file_extent_item) +
-                     btrfs_stack_file_extent_offset(&file_extent_item);
-       generation = btrfs_stack_file_extent_generation(&file_extent_item);
-       num_bytes = btrfs_stack_file_extent_num_bytes(&file_extent_item);
+       next_bytenr = btrfs_stack_file_extent_disk_bytenr(&file_extent_item);
+       if (btrfs_stack_file_extent_compression(&file_extent_item) ==
+           BTRFS_COMPRESS_NONE) {
+               next_bytenr += btrfs_stack_file_extent_offset(&file_extent_item);
+               num_bytes = btrfs_stack_file_extent_num_bytes(&file_extent_item);
+       } else {
+               num_bytes = btrfs_stack_file_extent_disk_num_bytes(&file_extent_item);
+       }
        generation = btrfs_stack_file_extent_generation(&file_extent_item);
  
        if (state->print_mask & BTRFSIC_PRINT_MASK_VERY_VERBOSE)
@@@ -1695,7 -1699,7 +1699,7 @@@ static int btrfsic_read_block(struct bt
                        return -1;
                }
                bio->bi_bdev = block_ctx->dev->bdev;
 -              bio->bi_sector = dev_bytenr >> 9;
 +              bio->bi_iter.bi_sector = dev_bytenr >> 9;
  
                for (j = i; j < num_pages; j++) {
                        ret = bio_add_page(bio, block_ctx->pagev[j],
@@@ -3013,7 -3017,7 +3017,7 @@@ static void __btrfsic_submit_bio(int rw
                int bio_is_patched;
                char **mapped_datav;
  
 -              dev_bytenr = 512 * bio->bi_sector;
 +              dev_bytenr = 512 * bio->bi_iter.bi_sector;
                bio_is_patched = 0;
                if (dev_state->state->print_mask &
                    BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH)
                               "submit_bio(rw=0x%x, bi_vcnt=%u,"
                               " bi_sector=%llu (bytenr %llu), bi_bdev=%p)\n",
                               rw, bio->bi_vcnt,
 -                             (unsigned long long)bio->bi_sector, dev_bytenr,
 -                             bio->bi_bdev);
 +                             (unsigned long long)bio->bi_iter.bi_sector,
 +                             dev_bytenr, bio->bi_bdev);
  
                mapped_datav = kmalloc(sizeof(*mapped_datav) * bio->bi_vcnt,
                                       GFP_NOFS);
diff --combined fs/btrfs/compression.c
index f5cdeb4b553824744429cff1f4d8b57c17a27909,af815eb8f970183664394215d8cc743ff3898ab0..e2600cdb6c257e366b873445c396a917249e76f3
@@@ -128,11 -128,10 +128,10 @@@ static int check_compressed_csum(struc
                kunmap_atomic(kaddr);
  
                if (csum != *cb_sum) {
-                       printk(KERN_INFO "btrfs csum failed ino %llu "
-                              "extent %llu csum %u "
-                              "wanted %u mirror %d\n",
-                              btrfs_ino(inode), disk_start, csum, *cb_sum,
-                              cb->mirror_num);
+                       btrfs_info(BTRFS_I(inode)->root->fs_info,
+                          "csum failed ino %llu extent %llu csum %u wanted %u mirror %d",
+                          btrfs_ino(inode), disk_start, csum, *cb_sum,
+                          cb->mirror_num);
                        ret = -EIO;
                        goto fail;
                }
@@@ -172,8 -171,7 +171,8 @@@ static void end_compressed_bio_read(str
                goto out;
  
        inode = cb->inode;
 -      ret = check_compressed_csum(inode, cb, (u64)bio->bi_sector << 9);
 +      ret = check_compressed_csum(inode, cb,
 +                                  (u64)bio->bi_iter.bi_sector << 9);
        if (ret)
                goto csum_failed;
  
@@@ -202,16 -200,18 +201,16 @@@ csum_failed
        if (cb->errors) {
                bio_io_error(cb->orig_bio);
        } else {
 -              int bio_index = 0;
 -              struct bio_vec *bvec = cb->orig_bio->bi_io_vec;
 +              int i;
 +              struct bio_vec *bvec;
  
                /*
                 * we have verified the checksum already, set page
                 * checked so the end_io handlers know about it
                 */
 -              while (bio_index < cb->orig_bio->bi_vcnt) {
 +              bio_for_each_segment_all(bvec, cb->orig_bio, i)
                        SetPageChecked(bvec->bv_page);
 -                      bvec++;
 -                      bio_index++;
 -              }
 +
                bio_endio(cb->orig_bio, 0);
        }
  
@@@ -371,7 -371,7 +370,7 @@@ int btrfs_submit_compressed_write(struc
        for (pg_index = 0; pg_index < cb->nr_pages; pg_index++) {
                page = compressed_pages[pg_index];
                page->mapping = inode->i_mapping;
 -              if (bio->bi_size)
 +              if (bio->bi_iter.bi_size)
                        ret = io_tree->ops->merge_bio_hook(WRITE, page, 0,
                                                           PAGE_CACHE_SIZE,
                                                           bio, 0);
                        bio_add_page(bio, page, PAGE_CACHE_SIZE, 0);
                }
                if (bytes_left < PAGE_CACHE_SIZE) {
-                       printk("bytes left %lu compress len %lu nr %lu\n",
+                       btrfs_info(BTRFS_I(inode)->root->fs_info,
+                                       "bytes left %lu compress len %lu nr %lu",
                               bytes_left, cb->compressed_len, cb->nr_pages);
                }
                bytes_left -= PAGE_CACHE_SIZE;
@@@ -505,7 -506,7 +505,7 @@@ static noinline int add_ra_bio_pages(st
  
                if (!em || last_offset < em->start ||
                    (last_offset + PAGE_CACHE_SIZE > extent_map_end(em)) ||
 -                  (em->block_start >> 9) != cb->orig_bio->bi_sector) {
 +                  (em->block_start >> 9) != cb->orig_bio->bi_iter.bi_sector) {
                        free_extent_map(em);
                        unlock_extent(tree, last_offset, end);
                        unlock_page(page);
@@@ -551,7 -552,7 +551,7 @@@ next
   * in it.  We don't actually do IO on those pages but allocate new ones
   * to hold the compressed pages on disk.
   *
 - * bio->bi_sector points to the compressed extent on disk
 + * bio->bi_iter.bi_sector points to the compressed extent on disk
   * bio->bi_io_vec points to all of the inode pages
   * bio->bi_vcnt is a count of pages
   *
@@@ -572,7 -573,7 +572,7 @@@ int btrfs_submit_compressed_read(struc
        struct page *page;
        struct block_device *bdev;
        struct bio *comp_bio;
 -      u64 cur_disk_byte = (u64)bio->bi_sector << 9;
 +      u64 cur_disk_byte = (u64)bio->bi_iter.bi_sector << 9;
        u64 em_len;
        u64 em_start;
        struct extent_map *em;
                page->mapping = inode->i_mapping;
                page->index = em_start >> PAGE_CACHE_SHIFT;
  
 -              if (comp_bio->bi_size)
 +              if (comp_bio->bi_iter.bi_size)
                        ret = tree->ops->merge_bio_hook(READ, page, 0,
                                                        PAGE_CACHE_SIZE,
                                                        comp_bio, 0);
                                                        comp_bio, sums);
                                BUG_ON(ret); /* -ENOMEM */
                        }
 -                      sums += (comp_bio->bi_size + root->sectorsize - 1) /
 -                              root->sectorsize;
 +                      sums += (comp_bio->bi_iter.bi_size +
 +                               root->sectorsize - 1) / root->sectorsize;
  
                        ret = btrfs_map_bio(root, READ, comp_bio,
                                            mirror_num, 0);
diff --combined fs/btrfs/ctree.h
index 7506825211a29d44772cdcd93732875258281daf,fceddbdfdd3d0377cb6d5d4c92eadf381f978740..2c1a42ca519f43a8dd85ce95a24fc6ed0a22d07d
@@@ -521,9 -521,15 +521,15 @@@ struct btrfs_super_block 
  #define BTRFS_FEATURE_INCOMPAT_EXTENDED_IREF  (1ULL << 6)
  #define BTRFS_FEATURE_INCOMPAT_RAID56         (1ULL << 7)
  #define BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA        (1ULL << 8)
+ #define BTRFS_FEATURE_INCOMPAT_NO_HOLES               (1ULL << 9)
  
  #define BTRFS_FEATURE_COMPAT_SUPP             0ULL
+ #define BTRFS_FEATURE_COMPAT_SAFE_SET         0ULL
+ #define BTRFS_FEATURE_COMPAT_SAFE_CLEAR               0ULL
  #define BTRFS_FEATURE_COMPAT_RO_SUPP          0ULL
+ #define BTRFS_FEATURE_COMPAT_RO_SAFE_SET      0ULL
+ #define BTRFS_FEATURE_COMPAT_RO_SAFE_CLEAR    0ULL
  #define BTRFS_FEATURE_INCOMPAT_SUPP                   \
        (BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF |         \
         BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL |        \
         BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO |          \
         BTRFS_FEATURE_INCOMPAT_RAID56 |                \
         BTRFS_FEATURE_INCOMPAT_EXTENDED_IREF |         \
-        BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA)
+        BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA |       \
+        BTRFS_FEATURE_INCOMPAT_NO_HOLES)
+ #define BTRFS_FEATURE_INCOMPAT_SAFE_SET                       \
+       (BTRFS_FEATURE_INCOMPAT_EXTENDED_IREF)
+ #define BTRFS_FEATURE_INCOMPAT_SAFE_CLEAR             0ULL
  
  /*
   * A leaf is full of items. offset and size tell us where to find
@@@ -1094,7 -1105,7 +1105,7 @@@ struct btrfs_qgroup_limit_item 
  } __attribute__ ((__packed__));
  
  struct btrfs_space_info {
-       u64 flags;
+       spinlock_t lock;
  
        u64 total_bytes;        /* total bytes in the space,
                                   this doesn't take mirrors into account */
                                   transaction finishes */
        u64 bytes_reserved;     /* total bytes the allocator has reserved for
                                   current allocations */
-       u64 bytes_readonly;     /* total bytes that are read only */
        u64 bytes_may_use;      /* number of bytes that may be used for
                                   delalloc/allocations */
+       u64 bytes_readonly;     /* total bytes that are read only */
+       unsigned int full:1;    /* indicates that we cannot allocate any more
+                                  chunks for this space */
+       unsigned int chunk_alloc:1;     /* set if we are allocating a chunk */
+       unsigned int flush:1;           /* set if we are trying to make space */
+       unsigned int force_alloc;       /* set if we need to force a chunk
+                                          alloc for this space */
        u64 disk_used;          /* total bytes used on disk */
        u64 disk_total;         /* total bytes on disk, takes mirrors into
                                   account */
  
+       u64 flags;
        /*
         * bytes_pinned is kept in line with what is actually pinned, as in
         * we've called update_block_group and dropped the bytes_used counter
         */
        struct percpu_counter total_bytes_pinned;
  
-       unsigned int full:1;    /* indicates that we cannot allocate any more
-                                  chunks for this space */
-       unsigned int chunk_alloc:1;     /* set if we are allocating a chunk */
-       unsigned int flush:1;           /* set if we are trying to make space */
-       unsigned int force_alloc;       /* set if we need to force a chunk
-                                          alloc for this space */
        struct list_head list;
  
+       struct rw_semaphore groups_sem;
        /* for block groups in our same type */
        struct list_head block_groups[BTRFS_NR_RAID_TYPES];
-       spinlock_t lock;
-       struct rw_semaphore groups_sem;
        wait_queue_head_t wait;
+       struct kobject kobj;
+       struct kobject block_group_kobjs[BTRFS_NR_RAID_TYPES];
  };
  
  #define       BTRFS_BLOCK_RSV_GLOBAL          1
@@@ -1346,6 -1361,7 +1361,7 @@@ struct btrfs_fs_info 
  
        u64 generation;
        u64 last_trans_committed;
+       u64 avg_delayed_ref_runtime;
  
        /*
         * this is updated to the current trans every time a full commit
        spinlock_t tree_mod_seq_lock;
        atomic64_t tree_mod_seq;
        struct list_head tree_mod_seq_list;
-       struct seq_list tree_mod_seq_elem;
  
        /* this protects tree_mod_log */
        rwlock_t tree_mod_log_lock;
        int thread_pool_size;
  
        struct kobject super_kobj;
+       struct kobject *space_info_kobj;
+       struct kobject *device_dir_kobj;
        struct completion kobj_unregister;
        int do_barriers;
        int closing;
        spinlock_t reada_lock;
        struct radix_tree_root reada_tree;
  
+       /* Extent buffer radix tree */
+       spinlock_t buffer_lock;
+       struct radix_tree_root buffer_radix;
        /* next backup root to be overwritten */
        int backup_root_index;
  
@@@ -1795,6 -1816,12 +1816,12 @@@ struct btrfs_root 
        struct list_head ordered_extents;
        struct list_head ordered_root;
        u64 nr_ordered_extents;
+       /*
+        * Number of currently running SEND ioctls to prevent
+        * manipulation with the read-only status via SUBVOL_SETFLAGS
+        */
+       int send_in_progress;
  };
  
  struct btrfs_ioctl_defrag_range_args {
  #define BTRFS_MOUNT_CHECK_INTEGRITY_INCLUDING_EXTENT_DATA (1 << 21)
  #define BTRFS_MOUNT_PANIC_ON_FATAL_ERROR      (1 << 22)
  #define BTRFS_MOUNT_RESCAN_UUID_TREE  (1 << 23)
+ #define       BTRFS_MOUNT_CHANGE_INODE_CACHE  (1 << 24)
  
  #define BTRFS_DEFAULT_COMMIT_INTERVAL (30)
  
@@@ -2925,6 -2953,10 +2953,10 @@@ BTRFS_SETGET_STACK_FUNCS(stack_file_ext
                         struct btrfs_file_extent_item, generation, 64);
  BTRFS_SETGET_STACK_FUNCS(stack_file_extent_num_bytes,
                         struct btrfs_file_extent_item, num_bytes, 64);
+ BTRFS_SETGET_STACK_FUNCS(stack_file_extent_disk_num_bytes,
+                        struct btrfs_file_extent_item, disk_num_bytes, 64);
+ BTRFS_SETGET_STACK_FUNCS(stack_file_extent_compression,
+                        struct btrfs_file_extent_item, compression, 8);
  
  static inline unsigned long
  btrfs_file_extent_inline_start(struct btrfs_file_extent_item *e)
@@@ -2958,15 -2990,6 +2990,6 @@@ BTRFS_SETGET_FUNCS(file_extent_encrypti
  BTRFS_SETGET_FUNCS(file_extent_other_encoding, struct btrfs_file_extent_item,
                   other_encoding, 16);
  
- /* this returns the number of file bytes represented by the inline item.
-  * If an item is compressed, this is the uncompressed size
-  */
- static inline u32 btrfs_file_extent_inline_len(struct extent_buffer *eb,
-                                              struct btrfs_file_extent_item *e)
- {
-       return btrfs_file_extent_ram_bytes(eb, e);
- }
  /*
   * this returns the number of bytes used by the item on disk, minus the
   * size of any extent headers.  If a file is compressed on disk, this is
@@@ -2980,6 -3003,32 +3003,32 @@@ static inline u32 btrfs_file_extent_inl
        return btrfs_item_size(eb, e) - offset;
  }
  
+ /* this returns the number of file bytes represented by the inline item.
+  * If an item is compressed, this is the uncompressed size
+  */
+ static inline u32 btrfs_file_extent_inline_len(struct extent_buffer *eb,
+                                              int slot,
+                                              struct btrfs_file_extent_item *fi)
+ {
+       struct btrfs_map_token token;
+       btrfs_init_map_token(&token);
+       /*
+        * return the space used on disk if this item isn't
+        * compressed or encoded
+        */
+       if (btrfs_token_file_extent_compression(eb, fi, &token) == 0 &&
+           btrfs_token_file_extent_encryption(eb, fi, &token) == 0 &&
+           btrfs_token_file_extent_other_encoding(eb, fi, &token) == 0) {
+               return btrfs_file_extent_inline_item_len(eb,
+                                                        btrfs_item_nr(slot));
+       }
+       /* otherwise use the ram bytes field */
+       return btrfs_token_file_extent_ram_bytes(eb, fi, &token);
+ }
  /* btrfs_dev_stats_item */
  static inline u64 btrfs_dev_stats_value(struct extent_buffer *eb,
                                        struct btrfs_dev_stats_item *ptr,
@@@ -3143,6 -3192,8 +3192,8 @@@ static inline u64 btrfs_calc_trunc_meta
  
  int btrfs_should_throttle_delayed_refs(struct btrfs_trans_handle *trans,
                                       struct btrfs_root *root);
+ int btrfs_check_space_for_delayed_refs(struct btrfs_trans_handle *trans,
+                                      struct btrfs_root *root);
  void btrfs_put_block_group(struct btrfs_block_group_cache *cache);
  int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
                           struct btrfs_root *root, unsigned long count);
@@@ -3163,6 -3214,7 +3214,7 @@@ struct btrfs_block_group_cache *btrfs_l
                                                 struct btrfs_fs_info *info,
                                                 u64 bytenr);
  void btrfs_put_block_group(struct btrfs_block_group_cache *cache);
+ int get_block_group_index(struct btrfs_block_group_cache *cache);
  struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
                                        struct btrfs_root *root, u32 blocksize,
                                        u64 parent, u64 root_objectid,
@@@ -3301,6 -3353,8 +3353,8 @@@ int btrfs_comp_cpu_keys(struct btrfs_ke
  int btrfs_previous_item(struct btrfs_root *root,
                        struct btrfs_path *path, u64 min_objectid,
                        int type);
+ int btrfs_previous_extent_item(struct btrfs_root *root,
+                       struct btrfs_path *path, u64 min_objectid);
  void btrfs_set_item_key_safe(struct btrfs_root *root, struct btrfs_path *path,
                             struct btrfs_key *new_key);
  struct extent_buffer *btrfs_root_node(struct btrfs_root *root);
@@@ -3350,6 -3404,8 +3404,8 @@@ int btrfs_duplicate_item(struct btrfs_t
                         struct btrfs_root *root,
                         struct btrfs_path *path,
                         struct btrfs_key *new_key);
+ int btrfs_find_item(struct btrfs_root *fs_root, struct btrfs_path *path,
+               u64 inum, u64 ioff, u8 key_type, struct btrfs_key *found_key);
  int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root
                      *root, struct btrfs_key *key, struct btrfs_path *p, int
                      ins_len, int cow);
@@@ -3399,6 -3455,7 +3455,7 @@@ static inline int btrfs_insert_empty_it
  }
  
  int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path);
+ int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path);
  int btrfs_next_old_leaf(struct btrfs_root *root, struct btrfs_path *path,
                        u64 time_seq);
  static inline int btrfs_next_old_item(struct btrfs_root *root,
@@@ -3563,12 -3620,6 +3620,6 @@@ int btrfs_del_inode_ref(struct btrfs_tr
                           struct btrfs_root *root,
                           const char *name, int name_len,
                           u64 inode_objectid, u64 ref_objectid, u64 *index);
- int btrfs_get_inode_ref_index(struct btrfs_trans_handle *trans,
-                             struct btrfs_root *root,
-                             struct btrfs_path *path,
-                             const char *name, int name_len,
-                             u64 inode_objectid, u64 ref_objectid, int mod,
-                             u64 *ret_index);
  int btrfs_insert_empty_inode(struct btrfs_trans_handle *trans,
                             struct btrfs_root *root,
                             struct btrfs_path *path, u64 objectid);
@@@ -3676,7 -3727,9 +3727,9 @@@ int btrfs_start_delalloc_roots(struct b
  int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end,
                              struct extent_state **cached_state);
  int btrfs_create_subvol_root(struct btrfs_trans_handle *trans,
-                            struct btrfs_root *new_root, u64 new_dirid);
+                            struct btrfs_root *new_root,
+                            struct btrfs_root *parent_root,
+                            u64 new_dirid);
  int btrfs_merge_bio_hook(int rw, struct page *page, unsigned long offset,
                         size_t size, struct bio *bio,
                         unsigned long bio_flags);
@@@ -3745,7 -3798,10 +3798,10 @@@ extern const struct file_operations btr
  int __btrfs_drop_extents(struct btrfs_trans_handle *trans,
                         struct btrfs_root *root, struct inode *inode,
                         struct btrfs_path *path, u64 start, u64 end,
-                        u64 *drop_end, int drop_cache);
+                        u64 *drop_end, int drop_cache,
+                        int replace_extent,
+                        u32 extent_item_size,
+                        int *key_inserted);
  int btrfs_drop_extents(struct btrfs_trans_handle *trans,
                       struct btrfs_root *root, struct inode *inode, u64 start,
                       u64 end, int drop_cache);
@@@ -3764,6 -3820,8 +3820,8 @@@ int btrfs_defrag_leaves(struct btrfs_tr
  /* sysfs.c */
  int btrfs_init_sysfs(void);
  void btrfs_exit_sysfs(void);
+ int btrfs_sysfs_add_one(struct btrfs_fs_info *fs_info);
+ void btrfs_sysfs_remove_one(struct btrfs_fs_info *fs_info);
  
  /* xattr.c */
  ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size);
@@@ -3796,14 -3854,20 +3854,20 @@@ void btrfs_printk(const struct btrfs_fs
        btrfs_printk(fs_info, KERN_NOTICE fmt, ##args)
  #define btrfs_info(fs_info, fmt, args...) \
        btrfs_printk(fs_info, KERN_INFO fmt, ##args)
+ #ifdef DEBUG
  #define btrfs_debug(fs_info, fmt, args...) \
        btrfs_printk(fs_info, KERN_DEBUG fmt, ##args)
+ #else
+ #define btrfs_debug(fs_info, fmt, args...) \
+     no_printk(KERN_DEBUG fmt, ##args)
+ #endif
  
  #ifdef CONFIG_BTRFS_ASSERT
  
  static inline void assfail(char *expr, char *file, int line)
  {
-       printk(KERN_ERR "BTRFS assertion failed: %s, file: %s, line: %d",
+       pr_err("BTRFS: assertion failed: %s, file: %s, line: %d",
               expr, file, line);
        BUG();
  }
@@@ -3841,7 -3905,7 +3905,7 @@@ static inline void __btrfs_set_fs_incom
                if (!(features & flag)) {
                        features |= flag;
                        btrfs_set_super_incompat_flags(disk_super, features);
-                       printk(KERN_INFO "btrfs: setting %llu feature flag\n",
+                       btrfs_info(fs_info, "setting %llu feature flag",
                                         flag);
                }
                spin_unlock(&fs_info->super_lock);
@@@ -3899,17 -3963,20 +3963,17 @@@ do {                                                                 
  /* acl.c */
  #ifdef CONFIG_BTRFS_FS_POSIX_ACL
  struct posix_acl *btrfs_get_acl(struct inode *inode, int type);
 +int btrfs_set_acl(struct inode *inode, struct posix_acl *acl, int type);
  int btrfs_init_acl(struct btrfs_trans_handle *trans,
                   struct inode *inode, struct inode *dir);
 -int btrfs_acl_chmod(struct inode *inode);
  #else
  #define btrfs_get_acl NULL
 +#define btrfs_set_acl NULL
  static inline int btrfs_init_acl(struct btrfs_trans_handle *trans,
                                 struct inode *inode, struct inode *dir)
  {
        return 0;
  }
 -static inline int btrfs_acl_chmod(struct inode *inode)
 -{
 -      return 0;
 -}
  #endif
  
  /* relocation.c */
diff --combined fs/btrfs/disk-io.c
index e71039ea66cf9d4bf8a56bdd984bc7e26597aa74,7619147da382e398bfe8302fc16e1a1e7ede9377..0e69295d0031e558eb3ebb257ff2d4bb2f79772f
@@@ -48,6 -48,7 +48,7 @@@
  #include "rcu-string.h"
  #include "dev-replace.h"
  #include "raid56.h"
+ #include "sysfs.h"
  
  #ifdef CONFIG_X86
  #include <asm/cpufeature.h>
@@@ -299,11 -300,11 +300,11 @@@ static int csum_tree_block(struct btrfs
                        memcpy(&found, result, csum_size);
  
                        read_extent_buffer(buf, &val, 0, csum_size);
-                       printk_ratelimited(KERN_INFO "btrfs: %s checksum verify "
-                                      "failed on %llu wanted %X found %X "
-                                      "level %d\n",
-                                      root->fs_info->sb->s_id, buf->start,
-                                      val, found, btrfs_header_level(buf));
+                       printk_ratelimited(KERN_INFO
+                               "BTRFS: %s checksum verify failed on %llu wanted %X found %X "
+                               "level %d\n",
+                               root->fs_info->sb->s_id, buf->start,
+                               val, found, btrfs_header_level(buf));
                        if (result != (char *)&inline_result)
                                kfree(result);
                        return 1;
@@@ -382,13 -383,14 +383,14 @@@ static int btrfs_check_super_csum(char 
                        ret = 1;
  
                if (ret && btrfs_super_generation(disk_sb) < 10) {
-                       printk(KERN_WARNING "btrfs: super block crcs don't match, older mkfs detected\n");
+                       printk(KERN_WARNING
+                               "BTRFS: super block crcs don't match, older mkfs detected\n");
                        ret = 0;
                }
        }
  
        if (csum_type >= ARRAY_SIZE(btrfs_csum_sizes)) {
-               printk(KERN_ERR "btrfs: unsupported checksum algorithm %u\n",
+               printk(KERN_ERR "BTRFS: unsupported checksum algorithm %u\n",
                                csum_type);
                ret = 1;
        }
@@@ -464,13 -466,10 +466,10 @@@ static int btree_read_extent_buffer_pag
  
  static int csum_dirty_buffer(struct btrfs_root *root, struct page *page)
  {
-       struct extent_io_tree *tree;
        u64 start = page_offset(page);
        u64 found_start;
        struct extent_buffer *eb;
  
-       tree = &BTRFS_I(page->mapping->host)->io_tree;
        eb = (struct extent_buffer *)page->private;
        if (page != eb->pages[0])
                return 0;
@@@ -500,8 -499,8 +499,8 @@@ static int check_tree_block_fsid(struc
  }
  
  #define CORRUPT(reason, eb, root, slot)                               \
-       printk(KERN_CRIT "btrfs: corrupt leaf, %s: block=%llu," \
-              "root=%llu, slot=%d\n", reason,                  \
+       btrfs_crit(root->fs_info, "corrupt leaf, %s: block=%llu,"       \
+                  "root=%llu, slot=%d", reason,                        \
               btrfs_header_bytenr(eb), root->objectid, slot)
  
  static noinline int check_leaf(struct btrfs_root *root,
@@@ -569,7 -568,6 +568,6 @@@ static int btree_readpage_end_io_hook(s
                                      u64 phy_offset, struct page *page,
                                      u64 start, u64 end, int mirror)
  {
-       struct extent_io_tree *tree;
        u64 found_start;
        int found_level;
        struct extent_buffer *eb;
        if (!page->private)
                goto out;
  
-       tree = &BTRFS_I(page->mapping->host)->io_tree;
        eb = (struct extent_buffer *)page->private;
  
        /* the pending IO might have been the only thing that kept this buffer
  
        found_start = btrfs_header_bytenr(eb);
        if (found_start != eb->start) {
-               printk_ratelimited(KERN_INFO "btrfs bad tree block start "
+               printk_ratelimited(KERN_INFO "BTRFS: bad tree block start "
                               "%llu %llu\n",
                               found_start, eb->start);
                ret = -EIO;
                goto err;
        }
        if (check_tree_block_fsid(root, eb)) {
-               printk_ratelimited(KERN_INFO "btrfs bad fsid on block %llu\n",
+               printk_ratelimited(KERN_INFO "BTRFS: bad fsid on block %llu\n",
                               eb->start);
                ret = -EIO;
                goto err;
        }
        found_level = btrfs_header_level(eb);
        if (found_level >= BTRFS_MAX_LEVEL) {
-               btrfs_info(root->fs_info, "bad tree block level %d\n",
+               btrfs_info(root->fs_info, "bad tree block level %d",
                           (int)btrfs_header_level(eb));
                ret = -EIO;
                goto err;
@@@ -842,17 -839,20 +839,17 @@@ int btrfs_wq_submit_bio(struct btrfs_fs
  
  static int btree_csum_one_bio(struct bio *bio)
  {
 -      struct bio_vec *bvec = bio->bi_io_vec;
 -      int bio_index = 0;
 +      struct bio_vec *bvec;
        struct btrfs_root *root;
 -      int ret = 0;
 +      int i, ret = 0;
  
 -      WARN_ON(bio->bi_vcnt <= 0);
 -      while (bio_index < bio->bi_vcnt) {
 +      bio_for_each_segment_all(bvec, bio, i) {
                root = BTRFS_I(bvec->bv_page->mapping->host)->root;
                ret = csum_dirty_buffer(root, bvec->bv_page);
                if (ret)
                        break;
 -              bio_index++;
 -              bvec++;
        }
 +
        return ret;
  }
  
@@@ -964,11 -964,9 +961,9 @@@ static int btree_migratepage(struct add
  static int btree_writepages(struct address_space *mapping,
                            struct writeback_control *wbc)
  {
-       struct extent_io_tree *tree;
        struct btrfs_fs_info *fs_info;
        int ret;
  
-       tree = &BTRFS_I(mapping->host)->io_tree;
        if (wbc->sync_mode == WB_SYNC_NONE) {
  
                if (wbc->for_kupdate)
@@@ -1007,8 -1005,9 +1002,9 @@@ static void btree_invalidatepage(struc
        extent_invalidatepage(tree, page, offset);
        btree_releasepage(page, GFP_NOFS);
        if (PagePrivate(page)) {
-               printk(KERN_WARNING "btrfs warning page private not zero "
-                      "on page %llu\n", (unsigned long long)page_offset(page));
+               btrfs_warn(BTRFS_I(page->mapping->host)->root->fs_info,
+                          "page private not zero on page %llu",
+                          (unsigned long long)page_offset(page));
                ClearPagePrivate(page);
                set_page_private(page, 0);
                page_cache_release(page);
@@@ -1092,21 -1091,13 +1088,13 @@@ int reada_tree_block_flagged(struct btr
  struct extent_buffer *btrfs_find_tree_block(struct btrfs_root *root,
                                            u64 bytenr, u32 blocksize)
  {
-       struct inode *btree_inode = root->fs_info->btree_inode;
-       struct extent_buffer *eb;
-       eb = find_extent_buffer(&BTRFS_I(btree_inode)->io_tree, bytenr);
-       return eb;
+       return find_extent_buffer(root->fs_info, bytenr);
  }
  
  struct extent_buffer *btrfs_find_create_tree_block(struct btrfs_root *root,
                                                 u64 bytenr, u32 blocksize)
  {
-       struct inode *btree_inode = root->fs_info->btree_inode;
-       struct extent_buffer *eb;
-       eb = alloc_extent_buffer(&BTRFS_I(btree_inode)->io_tree,
-                                bytenr, blocksize);
-       return eb;
+       return alloc_extent_buffer(root->fs_info, bytenr, blocksize);
  }
  
  
@@@ -1270,7 -1261,6 +1258,6 @@@ struct btrfs_root *btrfs_create_tree(st
        struct btrfs_root *root;
        struct btrfs_key key;
        int ret = 0;
-       u64 bytenr;
        uuid_le uuid;
  
        root = btrfs_alloc_root(fs_info);
                goto fail;
        }
  
-       bytenr = leaf->start;
        memset_extent_buffer(leaf, 0, 0, sizeof(struct btrfs_header));
        btrfs_set_header_bytenr(leaf, leaf->start);
        btrfs_set_header_generation(leaf, trans->transid);
@@@ -1613,7 -1602,8 +1599,8 @@@ again
        if (ret)
                goto fail;
  
-       ret = btrfs_find_orphan_item(fs_info->tree_root, location->objectid);
+       ret = btrfs_find_item(fs_info->tree_root, NULL, BTRFS_ORPHAN_OBJECTID,
+                       location->objectid, BTRFS_ORPHAN_ITEM_KEY, NULL);
        if (ret < 0)
                goto fail;
        if (ret == 0)
@@@ -1681,18 -1671,16 +1668,16 @@@ static void end_workqueue_fn(struct btr
  {
        struct bio *bio;
        struct end_io_wq *end_io_wq;
-       struct btrfs_fs_info *fs_info;
        int error;
  
        end_io_wq = container_of(work, struct end_io_wq, work);
        bio = end_io_wq->bio;
-       fs_info = end_io_wq->info;
  
        error = end_io_wq->error;
        bio->bi_private = end_io_wq->private;
        bio->bi_end_io = end_io_wq->end_io;
        kfree(end_io_wq);
 -      bio_endio(bio, error);
 +      bio_endio_nodec(bio, error);
  }
  
  static int cleaner_kthread(void *arg)
@@@ -2077,6 -2065,12 +2062,12 @@@ static void del_fs_roots(struct btrfs_f
                for (i = 0; i < ret; i++)
                        btrfs_drop_and_free_fs_root(fs_info, gang[i]);
        }
+       if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state)) {
+               btrfs_free_log_root_tree(NULL, fs_info);
+               btrfs_destroy_pinned_extent(fs_info->tree_root,
+                                           fs_info->pinned_extents);
+       }
  }
  
  int open_ctree(struct super_block *sb,
        mapping_set_gfp_mask(fs_info->btree_inode->i_mapping, GFP_NOFS);
  
        INIT_RADIX_TREE(&fs_info->fs_roots_radix, GFP_ATOMIC);
+       INIT_RADIX_TREE(&fs_info->buffer_radix, GFP_ATOMIC);
        INIT_LIST_HEAD(&fs_info->trans_list);
        INIT_LIST_HEAD(&fs_info->dead_roots);
        INIT_LIST_HEAD(&fs_info->delayed_iputs);
        spin_lock_init(&fs_info->free_chunk_lock);
        spin_lock_init(&fs_info->tree_mod_seq_lock);
        spin_lock_init(&fs_info->super_lock);
+       spin_lock_init(&fs_info->buffer_lock);
        rwlock_init(&fs_info->tree_mod_log_lock);
        mutex_init(&fs_info->reloc_mutex);
        seqlock_init(&fs_info->profiles_lock);
        fs_info->free_chunk_space = 0;
        fs_info->tree_mod_log = RB_ROOT;
        fs_info->commit_interval = BTRFS_DEFAULT_COMMIT_INTERVAL;
+       fs_info->avg_delayed_ref_runtime = div64_u64(NSEC_PER_SEC, 64);
        /* readahead state */
        INIT_RADIX_TREE(&fs_info->reada_tree, GFP_NOFS & ~__GFP_WAIT);
        spin_lock_init(&fs_info->reada_lock);
         * Pass the whole disk block of size BTRFS_SUPER_INFO_SIZE (4k).
         */
        if (btrfs_check_super_csum(bh->b_data)) {
-               printk(KERN_ERR "btrfs: superblock checksum mismatch\n");
+               printk(KERN_ERR "BTRFS: superblock checksum mismatch\n");
                err = -EINVAL;
                goto fail_alloc;
        }
  
        ret = btrfs_check_super_valid(fs_info, sb->s_flags & MS_RDONLY);
        if (ret) {
-               printk(KERN_ERR "btrfs: superblock contains fatal errors\n");
+               printk(KERN_ERR "BTRFS: superblock contains fatal errors\n");
                err = -EINVAL;
                goto fail_alloc;
        }
                features |= BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO;
  
        if (features & BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA)
-               printk(KERN_ERR "btrfs: has skinny extents\n");
+               printk(KERN_ERR "BTRFS: has skinny extents\n");
  
        /*
         * flag our filesystem as having big metadata blocks if
         */
        if (btrfs_super_leafsize(disk_super) > PAGE_CACHE_SIZE) {
                if (!(features & BTRFS_FEATURE_INCOMPAT_BIG_METADATA))
-                       printk(KERN_INFO "btrfs flagging fs with big metadata feature\n");
+                       printk(KERN_INFO "BTRFS: flagging fs with big metadata feature\n");
                features |= BTRFS_FEATURE_INCOMPAT_BIG_METADATA;
        }
  
         */
        if ((features & BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS) &&
            (sectorsize != leafsize)) {
-               printk(KERN_WARNING "btrfs: unequal leaf/node/sector sizes "
+               printk(KERN_WARNING "BTRFS: unequal leaf/node/sector sizes "
                                "are not allowed for mixed block groups on %s\n",
                                sb->s_id);
                goto fail_alloc;
        sb->s_blocksize_bits = blksize_bits(sectorsize);
  
        if (btrfs_super_magic(disk_super) != BTRFS_MAGIC) {
-               printk(KERN_INFO "btrfs: valid FS not found on %s\n", sb->s_id);
+               printk(KERN_INFO "BTRFS: valid FS not found on %s\n", sb->s_id);
                goto fail_sb_buffer;
        }
  
        if (sectorsize != PAGE_SIZE) {
-               printk(KERN_WARNING "btrfs: Incompatible sector size(%lu) "
+               printk(KERN_WARNING "BTRFS: Incompatible sector size(%lu) "
                       "found on %s\n", (unsigned long)sectorsize, sb->s_id);
                goto fail_sb_buffer;
        }
        ret = btrfs_read_sys_array(tree_root);
        mutex_unlock(&fs_info->chunk_mutex);
        if (ret) {
-               printk(KERN_WARNING "btrfs: failed to read the system "
+               printk(KERN_WARNING "BTRFS: failed to read the system "
                       "array on %s\n", sb->s_id);
                goto fail_sb_buffer;
        }
                                           blocksize, generation);
        if (!chunk_root->node ||
            !test_bit(EXTENT_BUFFER_UPTODATE, &chunk_root->node->bflags)) {
-               printk(KERN_WARNING "btrfs: failed to read chunk root on %s\n",
+               printk(KERN_WARNING "BTRFS: failed to read chunk root on %s\n",
                       sb->s_id);
                goto fail_tree_roots;
        }
  
        ret = btrfs_read_chunk_tree(chunk_root);
        if (ret) {
-               printk(KERN_WARNING "btrfs: failed to read chunk tree on %s\n",
+               printk(KERN_WARNING "BTRFS: failed to read chunk tree on %s\n",
                       sb->s_id);
                goto fail_tree_roots;
        }
        btrfs_close_extra_devices(fs_info, fs_devices, 0);
  
        if (!fs_devices->latest_bdev) {
-               printk(KERN_CRIT "btrfs: failed to read devices on %s\n",
+               printk(KERN_CRIT "BTRFS: failed to read devices on %s\n",
                       sb->s_id);
                goto fail_tree_roots;
        }
@@@ -2650,7 -2646,7 +2643,7 @@@ retry_root_backup
                                          blocksize, generation);
        if (!tree_root->node ||
            !test_bit(EXTENT_BUFFER_UPTODATE, &tree_root->node->bflags)) {
-               printk(KERN_WARNING "btrfs: failed to read tree root on %s\n",
+               printk(KERN_WARNING "BTRFS: failed to read tree root on %s\n",
                       sb->s_id);
  
                goto recovery_tree_root;
  
        ret = btrfs_recover_balance(fs_info);
        if (ret) {
-               printk(KERN_WARNING "btrfs: failed to recover balance\n");
+               printk(KERN_WARNING "BTRFS: failed to recover balance\n");
                goto fail_block_groups;
        }
  
        ret = btrfs_init_dev_stats(fs_info);
        if (ret) {
-               printk(KERN_ERR "btrfs: failed to init dev_stats: %d\n",
+               printk(KERN_ERR "BTRFS: failed to init dev_stats: %d\n",
                       ret);
                goto fail_block_groups;
        }
  
        ret = btrfs_init_dev_replace(fs_info);
        if (ret) {
-               pr_err("btrfs: failed to init dev_replace: %d\n", ret);
+               pr_err("BTRFS: failed to init dev_replace: %d\n", ret);
                goto fail_block_groups;
        }
  
        btrfs_close_extra_devices(fs_info, fs_devices, 1);
  
-       ret = btrfs_init_space_info(fs_info);
+       ret = btrfs_sysfs_add_one(fs_info);
        if (ret) {
-               printk(KERN_ERR "Failed to initial space info: %d\n", ret);
+               pr_err("BTRFS: failed to init sysfs interface: %d\n", ret);
                goto fail_block_groups;
        }
  
+       ret = btrfs_init_space_info(fs_info);
+       if (ret) {
+               printk(KERN_ERR "BTRFS: Failed to initial space info: %d\n", ret);
+               goto fail_sysfs;
+       }
        ret = btrfs_read_block_groups(extent_root);
        if (ret) {
-               printk(KERN_ERR "Failed to read block groups: %d\n", ret);
-               goto fail_block_groups;
+               printk(KERN_ERR "BTRFS: Failed to read block groups: %d\n", ret);
+               goto fail_sysfs;
        }
        fs_info->num_tolerated_disk_barrier_failures =
                btrfs_calc_num_tolerated_disk_barrier_failures(fs_info);
        if (fs_info->fs_devices->missing_devices >
             fs_info->num_tolerated_disk_barrier_failures &&
            !(sb->s_flags & MS_RDONLY)) {
-               printk(KERN_WARNING
-                      "Btrfs: too many missing devices, writeable mount is not allowed\n");
-               goto fail_block_groups;
+               printk(KERN_WARNING "BTRFS: "
+                       "too many missing devices, writeable mount is not allowed\n");
+               goto fail_sysfs;
        }
  
        fs_info->cleaner_kthread = kthread_run(cleaner_kthread, tree_root,
                                               "btrfs-cleaner");
        if (IS_ERR(fs_info->cleaner_kthread))
-               goto fail_block_groups;
+               goto fail_sysfs;
  
        fs_info->transaction_kthread = kthread_run(transaction_kthread,
                                                   tree_root,
        if (!btrfs_test_opt(tree_root, SSD) &&
            !btrfs_test_opt(tree_root, NOSSD) &&
            !fs_info->fs_devices->rotating) {
-               printk(KERN_INFO "Btrfs detected SSD devices, enabling SSD "
+               printk(KERN_INFO "BTRFS: detected SSD devices, enabling SSD "
                       "mode\n");
                btrfs_set_opt(fs_info->mount_opt, SSD);
        }
  
+       /* Set the real inode map cache flag */
+       if (btrfs_test_opt(tree_root, CHANGE_INODE_CACHE))
+               btrfs_set_opt(tree_root->fs_info->mount_opt, INODE_MAP_CACHE);
  #ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY
        if (btrfs_test_opt(tree_root, CHECK_INTEGRITY)) {
                ret = btrfsic_mount(tree_root, fs_devices,
                                    1 : 0,
                                    fs_info->check_integrity_print_mask);
                if (ret)
-                       printk(KERN_WARNING "btrfs: failed to initialize"
+                       printk(KERN_WARNING "BTRFS: failed to initialize"
                               " integrity check module %s\n", sb->s_id);
        }
  #endif
                u64 bytenr = btrfs_super_log_root(disk_super);
  
                if (fs_devices->rw_devices == 0) {
-                       printk(KERN_WARNING "Btrfs log replay required "
+                       printk(KERN_WARNING "BTRFS: log replay required "
                               "on RO media\n");
                        err = -EIO;
                        goto fail_qgroup;
                                                      generation + 1);
                if (!log_tree_root->node ||
                    !extent_buffer_uptodate(log_tree_root->node)) {
-                       printk(KERN_ERR "btrfs: failed to read log tree\n");
+                       printk(KERN_ERR "BTRFS: failed to read log tree\n");
                        free_extent_buffer(log_tree_root->node);
                        kfree(log_tree_root);
                        goto fail_trans_kthread;
                ret = btrfs_recover_relocation(tree_root);
                if (ret < 0) {
                        printk(KERN_WARNING
-                              "btrfs: failed to recover relocation\n");
+                              "BTRFS: failed to recover relocation\n");
                        err = -EINVAL;
                        goto fail_qgroup;
                }
  
        ret = btrfs_resume_balance_async(fs_info);
        if (ret) {
-               printk(KERN_WARNING "btrfs: failed to resume balance\n");
+               printk(KERN_WARNING "BTRFS: failed to resume balance\n");
                close_ctree(tree_root);
                return ret;
        }
  
        ret = btrfs_resume_dev_replace_async(fs_info);
        if (ret) {
-               pr_warn("btrfs: failed to resume dev_replace\n");
+               pr_warn("BTRFS: failed to resume dev_replace\n");
                close_ctree(tree_root);
                return ret;
        }
        btrfs_qgroup_rescan_resume(fs_info);
  
        if (create_uuid_tree) {
-               pr_info("btrfs: creating UUID tree\n");
+               pr_info("BTRFS: creating UUID tree\n");
                ret = btrfs_create_uuid_tree(fs_info);
                if (ret) {
-                       pr_warn("btrfs: failed to create the UUID tree %d\n",
+                       pr_warn("BTRFS: failed to create the UUID tree %d\n",
                                ret);
                        close_ctree(tree_root);
                        return ret;
                }
        } else if (check_uuid_tree ||
                   btrfs_test_opt(tree_root, RESCAN_UUID_TREE)) {
-               pr_info("btrfs: checking UUID tree\n");
+               pr_info("BTRFS: checking UUID tree\n");
                ret = btrfs_check_uuid_tree(fs_info);
                if (ret) {
-                       pr_warn("btrfs: failed to check the UUID tree %d\n",
+                       pr_warn("BTRFS: failed to check the UUID tree %d\n",
                                ret);
                        close_ctree(tree_root);
                        return ret;
@@@ -2942,6 -2948,9 +2945,9 @@@ fail_cleaner
         */
        filemap_write_and_wait(fs_info->btree_inode->i_mapping);
  
+ fail_sysfs:
+       btrfs_sysfs_remove_one(fs_info);
  fail_block_groups:
        btrfs_put_block_group_cache(fs_info);
        btrfs_free_block_groups(fs_info);
@@@ -2997,7 -3006,7 +3003,7 @@@ static void btrfs_end_buffer_write_sync
                struct btrfs_device *device = (struct btrfs_device *)
                        bh->b_private;
  
-               printk_ratelimited_in_rcu(KERN_WARNING "lost page write due to "
+               printk_ratelimited_in_rcu(KERN_WARNING "BTRFS: lost page write due to "
                                          "I/O error on %s\n",
                                          rcu_str_deref(device->name));
                /* note, we dont' set_buffer_write_io_error because we have
@@@ -3116,7 -3125,7 +3122,7 @@@ static int write_dev_supers(struct btrf
                        bh = __getblk(device->bdev, bytenr / 4096,
                                      BTRFS_SUPER_INFO_SIZE);
                        if (!bh) {
-                               printk(KERN_ERR "btrfs: couldn't get super "
+                               printk(KERN_ERR "BTRFS: couldn't get super "
                                       "buffer head for bytenr %Lu\n", bytenr);
                                errors++;
                                continue;
                 * we fua the first super.  The others we allow
                 * to go down lazy.
                 */
-               ret = btrfsic_submit_bh(WRITE_FUA, bh);
+               if (i == 0)
+                       ret = btrfsic_submit_bh(WRITE_FUA, bh);
+               else
+                       ret = btrfsic_submit_bh(WRITE_SYNC, bh);
                if (ret)
                        errors++;
        }
@@@ -3183,7 -3195,7 +3192,7 @@@ static int write_dev_flush(struct btrfs
                wait_for_completion(&device->flush_wait);
  
                if (bio_flagged(bio, BIO_EOPNOTSUPP)) {
-                       printk_in_rcu("btrfs: disabling barriers on dev %s\n",
+                       printk_in_rcu("BTRFS: disabling barriers on dev %s\n",
                                      rcu_str_deref(device->name));
                        device->nobarriers = 1;
                } else if (!bio_flagged(bio, BIO_UPTODATE)) {
@@@ -3404,7 -3416,7 +3413,7 @@@ static int write_all_supers(struct btrf
                        total_errors++;
        }
        if (total_errors > max_errors) {
-               printk(KERN_ERR "btrfs: %d errors while writing supers\n",
+               btrfs_err(root->fs_info, "%d errors while writing supers",
                       total_errors);
                mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
  
@@@ -3452,10 -3464,8 +3461,8 @@@ void btrfs_drop_and_free_fs_root(struc
        if (btrfs_root_refs(&root->root_item) == 0)
                synchronize_srcu(&fs_info->subvol_srcu);
  
-       if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state)) {
+       if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state))
                btrfs_free_log(NULL, root);
-               btrfs_free_log_root_tree(NULL, fs_info);
-       }
  
        __btrfs_remove_free_space_cache(root->free_ino_pinned);
        __btrfs_remove_free_space_cache(root->free_ino_ctl);
@@@ -3560,14 -3570,12 +3567,12 @@@ int close_ctree(struct btrfs_root *root
        if (!(fs_info->sb->s_flags & MS_RDONLY)) {
                ret = btrfs_commit_super(root);
                if (ret)
-                       printk(KERN_ERR "btrfs: commit super ret %d\n", ret);
+                       btrfs_err(root->fs_info, "commit super ret %d", ret);
        }
  
        if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state))
                btrfs_error_commit_super(root);
  
-       btrfs_put_block_group_cache(fs_info);
        kthread_stop(fs_info->transaction_kthread);
        kthread_stop(fs_info->cleaner_kthread);
  
        btrfs_free_qgroup_config(root->fs_info);
  
        if (percpu_counter_sum(&fs_info->delalloc_bytes)) {
-               printk(KERN_INFO "btrfs: at unmount delalloc count %lld\n",
+               btrfs_info(root->fs_info, "at unmount delalloc count %lld",
                       percpu_counter_sum(&fs_info->delalloc_bytes));
        }
  
+       btrfs_sysfs_remove_one(fs_info);
        del_fs_roots(fs_info);
  
+       btrfs_put_block_group_cache(fs_info);
        btrfs_free_block_groups(fs_info);
  
        btrfs_stop_all_workers(fs_info);
@@@ -3800,55 -3812,55 +3809,55 @@@ static int btrfs_destroy_delayed_refs(s
        delayed_refs = &trans->delayed_refs;
  
        spin_lock(&delayed_refs->lock);
-       if (delayed_refs->num_entries == 0) {
+       if (atomic_read(&delayed_refs->num_entries) == 0) {
                spin_unlock(&delayed_refs->lock);
-               printk(KERN_INFO "delayed_refs has NO entry\n");
+               btrfs_info(root->fs_info, "delayed_refs has NO entry");
                return ret;
        }
  
-       while ((node = rb_first(&delayed_refs->root)) != NULL) {
-               struct btrfs_delayed_ref_head *head = NULL;
+       while ((node = rb_first(&delayed_refs->href_root)) != NULL) {
+               struct btrfs_delayed_ref_head *head;
                bool pin_bytes = false;
  
-               ref = rb_entry(node, struct btrfs_delayed_ref_node, rb_node);
-               atomic_set(&ref->refs, 1);
-               if (btrfs_delayed_ref_is_head(ref)) {
-                       head = btrfs_delayed_node_to_head(ref);
-                       if (!mutex_trylock(&head->mutex)) {
-                               atomic_inc(&ref->refs);
-                               spin_unlock(&delayed_refs->lock);
-                               /* Need to wait for the delayed ref to run */
-                               mutex_lock(&head->mutex);
-                               mutex_unlock(&head->mutex);
-                               btrfs_put_delayed_ref(ref);
-                               spin_lock(&delayed_refs->lock);
-                               continue;
-                       }
-                       if (head->must_insert_reserved)
-                               pin_bytes = true;
-                       btrfs_free_delayed_extent_op(head->extent_op);
-                       delayed_refs->num_heads--;
-                       if (list_empty(&head->cluster))
-                               delayed_refs->num_heads_ready--;
-                       list_del_init(&head->cluster);
-               }
+               head = rb_entry(node, struct btrfs_delayed_ref_head,
+                               href_node);
+               if (!mutex_trylock(&head->mutex)) {
+                       atomic_inc(&head->node.refs);
+                       spin_unlock(&delayed_refs->lock);
  
-               ref->in_tree = 0;
-               rb_erase(&ref->rb_node, &delayed_refs->root);
-               delayed_refs->num_entries--;
-               spin_unlock(&delayed_refs->lock);
-               if (head) {
-                       if (pin_bytes)
-                               btrfs_pin_extent(root, ref->bytenr,
-                                                ref->num_bytes, 1);
+                       mutex_lock(&head->mutex);
                        mutex_unlock(&head->mutex);
+                       btrfs_put_delayed_ref(&head->node);
+                       spin_lock(&delayed_refs->lock);
+                       continue;
+               }
+               spin_lock(&head->lock);
+               while ((node = rb_first(&head->ref_root)) != NULL) {
+                       ref = rb_entry(node, struct btrfs_delayed_ref_node,
+                                      rb_node);
+                       ref->in_tree = 0;
+                       rb_erase(&ref->rb_node, &head->ref_root);
+                       atomic_dec(&delayed_refs->num_entries);
+                       btrfs_put_delayed_ref(ref);
+                       cond_resched_lock(&head->lock);
                }
-               btrfs_put_delayed_ref(ref);
+               if (head->must_insert_reserved)
+                       pin_bytes = true;
+               btrfs_free_delayed_extent_op(head->extent_op);
+               delayed_refs->num_heads--;
+               if (head->processing == 0)
+                       delayed_refs->num_heads_ready--;
+               atomic_dec(&delayed_refs->num_entries);
+               head->node.in_tree = 0;
+               rb_erase(&head->href_node, &delayed_refs->href_root);
+               spin_unlock(&head->lock);
+               spin_unlock(&delayed_refs->lock);
+               mutex_unlock(&head->mutex);
  
+               if (pin_bytes)
+                       btrfs_pin_extent(root, head->node.bytenr,
+                                        head->node.num_bytes, 1);
+               btrfs_put_delayed_ref(&head->node);
                cond_resched();
                spin_lock(&delayed_refs->lock);
        }
diff --combined fs/btrfs/extent_io.c
index bcb6f1b780d64512868303c04a7939060612e3e3,fbe501d3bd014804ca2bf20958f147c11c975c62..85bbd01f1271379de6b3bcf41f4a42bd9d30320a
@@@ -59,7 -59,7 +59,7 @@@ void btrfs_leak_debug_check(void
  
        while (!list_empty(&states)) {
                state = list_entry(states.next, struct extent_state, leak_list);
-               printk(KERN_ERR "btrfs state leak: start %llu end %llu "
+               printk(KERN_ERR "BTRFS: state leak: start %llu end %llu "
                       "state %lu in tree %p refs %d\n",
                       state->start, state->end, state->state, state->tree,
                       atomic_read(&state->refs));
@@@ -69,7 -69,7 +69,7 @@@
  
        while (!list_empty(&buffers)) {
                eb = list_entry(buffers.next, struct extent_buffer, leak_list);
-               printk(KERN_ERR "btrfs buffer leak start %llu len %lu "
+               printk(KERN_ERR "BTRFS: buffer leak start %llu len %lu "
                       "refs %d\n",
                       eb->start, eb->len, atomic_read(&eb->refs));
                list_del(&eb->leak_list);
        }
  }
  
- #define btrfs_debug_check_extent_io_range(inode, start, end)          \
-       __btrfs_debug_check_extent_io_range(__func__, (inode), (start), (end))
+ #define btrfs_debug_check_extent_io_range(tree, start, end)           \
+       __btrfs_debug_check_extent_io_range(__func__, (tree), (start), (end))
  static inline void __btrfs_debug_check_extent_io_range(const char *caller,
-               struct inode *inode, u64 start, u64 end)
+               struct extent_io_tree *tree, u64 start, u64 end)
  {
-       u64 isize = i_size_read(inode);
+       struct inode *inode;
+       u64 isize;
+       if (!tree->mapping)
+               return;
  
+       inode = tree->mapping->host;
+       isize = i_size_read(inode);
        if (end >= PAGE_SIZE && (end % 2) == 0 && end != isize - 1) {
                printk_ratelimited(KERN_DEBUG
-                   "btrfs: %s: ino %llu isize %llu odd range [%llu,%llu]\n",
+                   "BTRFS: %s: ino %llu isize %llu odd range [%llu,%llu]\n",
                                caller, btrfs_ino(inode), isize, start, end);
        }
  }
@@@ -124,6 -130,8 +130,8 @@@ static noinline void flush_write_bio(vo
  static inline struct btrfs_fs_info *
  tree_fs_info(struct extent_io_tree *tree)
  {
+       if (!tree->mapping)
+               return NULL;
        return btrfs_sb(tree->mapping->host->i_sb);
  }
  
@@@ -186,11 -194,9 +194,9 @@@ void extent_io_tree_init(struct extent_
                         struct address_space *mapping)
  {
        tree->state = RB_ROOT;
-       INIT_RADIX_TREE(&tree->buffer, GFP_ATOMIC);
        tree->ops = NULL;
        tree->dirty_bytes = 0;
        spin_lock_init(&tree->lock);
-       spin_lock_init(&tree->buffer_lock);
        tree->mapping = mapping;
  }
  
@@@ -224,12 -230,20 +230,20 @@@ void free_extent_state(struct extent_st
  }
  
  static struct rb_node *tree_insert(struct rb_root *root, u64 offset,
-                                  struct rb_node *node)
+                                  struct rb_node *node,
+                                  struct rb_node ***p_in,
+                                  struct rb_node **parent_in)
  {
        struct rb_node **p = &root->rb_node;
        struct rb_node *parent = NULL;
        struct tree_entry *entry;
  
+       if (p_in && parent_in) {
+               p = *p_in;
+               parent = *parent_in;
+               goto do_insert;
+       }
        while (*p) {
                parent = *p;
                entry = rb_entry(parent, struct tree_entry, rb_node);
                        return parent;
        }
  
+ do_insert:
        rb_link_node(node, parent, p);
        rb_insert_color(node, root);
        return NULL;
  }
  
  static struct rb_node *__etree_search(struct extent_io_tree *tree, u64 offset,
-                                    struct rb_node **prev_ret,
-                                    struct rb_node **next_ret)
+                                     struct rb_node **prev_ret,
+                                     struct rb_node **next_ret,
+                                     struct rb_node ***p_ret,
+                                     struct rb_node **parent_ret)
  {
        struct rb_root *root = &tree->state;
-       struct rb_node *n = root->rb_node;
+       struct rb_node **n = &root->rb_node;
        struct rb_node *prev = NULL;
        struct rb_node *orig_prev = NULL;
        struct tree_entry *entry;
        struct tree_entry *prev_entry = NULL;
  
-       while (n) {
-               entry = rb_entry(n, struct tree_entry, rb_node);
-               prev = n;
+       while (*n) {
+               prev = *n;
+               entry = rb_entry(prev, struct tree_entry, rb_node);
                prev_entry = entry;
  
                if (offset < entry->start)
-                       n = n->rb_left;
+                       n = &(*n)->rb_left;
                else if (offset > entry->end)
-                       n = n->rb_right;
+                       n = &(*n)->rb_right;
                else
-                       return n;
+                       return *n;
        }
  
+       if (p_ret)
+               *p_ret = n;
+       if (parent_ret)
+               *parent_ret = prev;
        if (prev_ret) {
                orig_prev = prev;
                while (prev && offset > prev_entry->end) {
        return NULL;
  }
  
- static inline struct rb_node *tree_search(struct extent_io_tree *tree,
-                                         u64 offset)
+ static inline struct rb_node *
+ tree_search_for_insert(struct extent_io_tree *tree,
+                      u64 offset,
+                      struct rb_node ***p_ret,
+                      struct rb_node **parent_ret)
  {
        struct rb_node *prev = NULL;
        struct rb_node *ret;
  
-       ret = __etree_search(tree, offset, &prev, NULL);
+       ret = __etree_search(tree, offset, &prev, NULL, p_ret, parent_ret);
        if (!ret)
                return prev;
        return ret;
  }
  
+ static inline struct rb_node *tree_search(struct extent_io_tree *tree,
+                                         u64 offset)
+ {
+       return tree_search_for_insert(tree, offset, NULL, NULL);
+ }
  static void merge_cb(struct extent_io_tree *tree, struct extent_state *new,
                     struct extent_state *other)
  {
@@@ -385,23 -416,25 +416,25 @@@ static void set_state_bits(struct exten
   */
  static int insert_state(struct extent_io_tree *tree,
                        struct extent_state *state, u64 start, u64 end,
+                       struct rb_node ***p,
+                       struct rb_node **parent,
                        unsigned long *bits)
  {
        struct rb_node *node;
  
        if (end < start)
-               WARN(1, KERN_ERR "btrfs end < start %llu %llu\n",
+               WARN(1, KERN_ERR "BTRFS: end < start %llu %llu\n",
                       end, start);
        state->start = start;
        state->end = end;
  
        set_state_bits(tree, state, bits);
  
-       node = tree_insert(&tree->state, end, &state->rb_node);
+       node = tree_insert(&tree->state, end, &state->rb_node, p, parent);
        if (node) {
                struct extent_state *found;
                found = rb_entry(node, struct extent_state, rb_node);
-               printk(KERN_ERR "btrfs found node %llu %llu on insert of "
+               printk(KERN_ERR "BTRFS: found node %llu %llu on insert of "
                       "%llu %llu\n",
                       found->start, found->end, start, end);
                return -EEXIST;
@@@ -444,7 -477,8 +477,8 @@@ static int split_state(struct extent_io
        prealloc->state = orig->state;
        orig->start = split;
  
-       node = tree_insert(&tree->state, prealloc->end, &prealloc->rb_node);
+       node = tree_insert(&tree->state, prealloc->end, &prealloc->rb_node,
+                          NULL, NULL);
        if (node) {
                free_extent_state(prealloc);
                return -EEXIST;
@@@ -542,7 -576,7 +576,7 @@@ int clear_extent_bit(struct extent_io_t
        int err;
        int clear = 0;
  
-       btrfs_debug_check_extent_io_range(tree->mapping->host, start, end);
+       btrfs_debug_check_extent_io_range(tree, start, end);
  
        if (bits & EXTENT_DELALLOC)
                bits |= EXTENT_NORESERVE;
@@@ -702,7 -736,7 +736,7 @@@ static void wait_extent_bit(struct exte
        struct extent_state *state;
        struct rb_node *node;
  
-       btrfs_debug_check_extent_io_range(tree->mapping->host, start, end);
+       btrfs_debug_check_extent_io_range(tree, start, end);
  
        spin_lock(&tree->lock);
  again:
@@@ -783,11 -817,13 +817,13 @@@ __set_extent_bit(struct extent_io_tree 
        struct extent_state *state;
        struct extent_state *prealloc = NULL;
        struct rb_node *node;
+       struct rb_node **p;
+       struct rb_node *parent;
        int err = 0;
        u64 last_start;
        u64 last_end;
  
-       btrfs_debug_check_extent_io_range(tree->mapping->host, start, end);
+       btrfs_debug_check_extent_io_range(tree, start, end);
  
        bits |= EXTENT_FIRST_DELALLOC;
  again:
         * this search will find all the extents that end after
         * our range starts.
         */
-       node = tree_search(tree, start);
+       node = tree_search_for_insert(tree, start, &p, &parent);
        if (!node) {
                prealloc = alloc_extent_state_atomic(prealloc);
                BUG_ON(!prealloc);
-               err = insert_state(tree, prealloc, start, end, &bits);
+               err = insert_state(tree, prealloc, start, end,
+                                  &p, &parent, &bits);
                if (err)
                        extent_io_tree_panic(tree, err);
  
+               cache_state(prealloc, cached_state);
                prealloc = NULL;
                goto out;
        }
@@@ -919,7 -957,7 +957,7 @@@ hit_next
                 * the later extent.
                 */
                err = insert_state(tree, prealloc, start, this_end,
-                                  &bits);
+                                  NULL, NULL, &bits);
                if (err)
                        extent_io_tree_panic(tree, err);
  
@@@ -1005,11 -1043,13 +1043,13 @@@ int convert_extent_bit(struct extent_io
        struct extent_state *state;
        struct extent_state *prealloc = NULL;
        struct rb_node *node;
+       struct rb_node **p;
+       struct rb_node *parent;
        int err = 0;
        u64 last_start;
        u64 last_end;
  
-       btrfs_debug_check_extent_io_range(tree->mapping->host, start, end);
+       btrfs_debug_check_extent_io_range(tree, start, end);
  
  again:
        if (!prealloc && (mask & __GFP_WAIT)) {
         * this search will find all the extents that end after
         * our range starts.
         */
-       node = tree_search(tree, start);
+       node = tree_search_for_insert(tree, start, &p, &parent);
        if (!node) {
                prealloc = alloc_extent_state_atomic(prealloc);
                if (!prealloc) {
                        err = -ENOMEM;
                        goto out;
                }
-               err = insert_state(tree, prealloc, start, end, &bits);
-               prealloc = NULL;
+               err = insert_state(tree, prealloc, start, end,
+                                  &p, &parent, &bits);
                if (err)
                        extent_io_tree_panic(tree, err);
+               cache_state(prealloc, cached_state);
+               prealloc = NULL;
                goto out;
        }
        state = rb_entry(node, struct extent_state, rb_node);
@@@ -1135,7 -1177,7 +1177,7 @@@ hit_next
                 * the later extent.
                 */
                err = insert_state(tree, prealloc, start, this_end,
-                                  &bits);
+                                  NULL, NULL, &bits);
                if (err)
                        extent_io_tree_panic(tree, err);
                cache_state(prealloc, cached_state);
@@@ -1984,7 -2026,7 +2026,7 @@@ int repair_io_failure(struct btrfs_fs_i
        bio = btrfs_io_bio_alloc(GFP_NOFS, 1);
        if (!bio)
                return -EIO;
 -      bio->bi_size = 0;
 +      bio->bi_iter.bi_size = 0;
        map_length = length;
  
        ret = btrfs_map_block(fs_info, WRITE, logical,
        }
        BUG_ON(mirror_num != bbio->mirror_num);
        sector = bbio->stripes[mirror_num-1].physical >> 9;
 -      bio->bi_sector = sector;
 +      bio->bi_iter.bi_sector = sector;
        dev = bbio->stripes[mirror_num-1].dev;
        kfree(bbio);
        if (!dev || !dev->bdev || !dev->writeable) {
                return -EIO;
        }
  
-       printk_ratelimited_in_rcu(KERN_INFO "btrfs read error corrected: ino %lu off %llu "
-                     "(dev %s sector %llu)\n", page->mapping->host->i_ino,
-                     start, rcu_str_deref(dev->name), sector);
+       printk_ratelimited_in_rcu(KERN_INFO
+                       "BTRFS: read error corrected: ino %lu off %llu "
+                   "(dev %s sector %llu)\n", page->mapping->host->i_ino,
+                   start, rcu_str_deref(dev->name), sector);
  
        bio_put(bio);
        return 0;
@@@ -2156,7 -2199,7 +2199,7 @@@ static int bio_readpage_error(struct bi
                        return -EIO;
                }
  
-               if (em->start > start || em->start + em->len < start) {
+               if (em->start > start || em->start + em->len <= start) {
                        free_extent_map(em);
                        em = NULL;
                }
                return -EIO;
        }
        bio->bi_end_io = failed_bio->bi_end_io;
 -      bio->bi_sector = failrec->logical >> 9;
 +      bio->bi_iter.bi_sector = failrec->logical >> 9;
        bio->bi_bdev = BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev;
 -      bio->bi_size = 0;
 +      bio->bi_iter.bi_size = 0;
  
        btrfs_failed_bio = btrfs_io_bio(failed_bio);
        if (btrfs_failed_bio->csum) {
@@@ -2332,35 -2375,41 +2375,39 @@@ int end_extent_writepage(struct page *p
   */
  static void end_bio_extent_writepage(struct bio *bio, int err)
  {
 -      struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
 +      struct bio_vec *bvec;
-       struct extent_io_tree *tree;
        u64 start;
        u64 end;
 +      int i;
  
 -      do {
 +      bio_for_each_segment_all(bvec, bio, i) {
                struct page *page = bvec->bv_page;
-               tree = &BTRFS_I(page->mapping->host)->io_tree;
  
                /* We always issue full-page reads, but if some block
                 * in a page fails to read, blk_update_request() will
                 * advance bv_offset and adjust bv_len to compensate.
                 * Print a warning for nonzero offsets, and an error
                 * if they don't add up to a full page.  */
-               if (bvec->bv_offset || bvec->bv_len != PAGE_CACHE_SIZE)
-                       printk("%s page write in btrfs with offset %u and length %u\n",
-                              bvec->bv_offset + bvec->bv_len != PAGE_CACHE_SIZE
-                              ? KERN_ERR "partial" : KERN_INFO "incomplete",
-                              bvec->bv_offset, bvec->bv_len);
+               if (bvec->bv_offset || bvec->bv_len != PAGE_CACHE_SIZE) {
+                       if (bvec->bv_offset + bvec->bv_len != PAGE_CACHE_SIZE)
+                               btrfs_err(BTRFS_I(page->mapping->host)->root->fs_info,
+                                  "partial page write in btrfs with offset %u and length %u",
+                                       bvec->bv_offset, bvec->bv_len);
+                       else
+                               btrfs_info(BTRFS_I(page->mapping->host)->root->fs_info,
+                                  "incomplete page write in btrfs with offset %u and "
+                                  "length %u",
+                                       bvec->bv_offset, bvec->bv_len);
+               }
  
                start = page_offset(page);
                end = start + bvec->bv_offset + bvec->bv_len - 1;
  
 -              if (--bvec >= bio->bi_io_vec)
 -                      prefetchw(&bvec->bv_page->flags);
 -
                if (end_extent_writepage(page, err, start, end))
                        continue;
  
                end_page_writeback(page);
 -      } while (bvec >= bio->bi_io_vec);
 +      }
  
        bio_put(bio);
  }
@@@ -2390,8 -2439,9 +2437,8 @@@ endio_readpage_release_extent(struct ex
   */
  static void end_bio_extent_readpage(struct bio *bio, int err)
  {
 +      struct bio_vec *bvec;
        int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
 -      struct bio_vec *bvec_end = bio->bi_io_vec + bio->bi_vcnt - 1;
 -      struct bio_vec *bvec = bio->bi_io_vec;
        struct btrfs_io_bio *io_bio = btrfs_io_bio(bio);
        struct extent_io_tree *tree;
        u64 offset = 0;
        u64 extent_len = 0;
        int mirror;
        int ret;
 +      int i;
  
        if (err)
                uptodate = 0;
  
 -      do {
 +      bio_for_each_segment_all(bvec, bio, i) {
                struct page *page = bvec->bv_page;
                struct inode *inode = page->mapping->host;
  
                pr_debug("end_bio_extent_readpage: bi_sector=%llu, err=%d, "
 -                       "mirror=%lu\n", (u64)bio->bi_sector, err,
 +                       "mirror=%lu\n", (u64)bio->bi_iter.bi_sector, err,
                         io_bio->mirror_num);
                tree = &BTRFS_I(inode)->io_tree;
  
                 * advance bv_offset and adjust bv_len to compensate.
                 * Print a warning for nonzero offsets, and an error
                 * if they don't add up to a full page.  */
-               if (bvec->bv_offset || bvec->bv_len != PAGE_CACHE_SIZE)
-                       printk("%s page read in btrfs with offset %u and length %u\n",
-                              bvec->bv_offset + bvec->bv_len != PAGE_CACHE_SIZE
-                              ? KERN_ERR "partial" : KERN_INFO "incomplete",
-                              bvec->bv_offset, bvec->bv_len);
+               if (bvec->bv_offset || bvec->bv_len != PAGE_CACHE_SIZE) {
+                       if (bvec->bv_offset + bvec->bv_len != PAGE_CACHE_SIZE)
+                               btrfs_err(BTRFS_I(page->mapping->host)->root->fs_info,
+                                  "partial page read in btrfs with offset %u and length %u",
+                                       bvec->bv_offset, bvec->bv_len);
+                       else
+                               btrfs_info(BTRFS_I(page->mapping->host)->root->fs_info,
+                                  "incomplete page read in btrfs with offset %u and "
+                                  "length %u",
+                                       bvec->bv_offset, bvec->bv_len);
+               }
  
                start = page_offset(page);
                end = start + bvec->bv_offset + bvec->bv_len - 1;
                len = bvec->bv_len;
  
 -              if (++bvec <= bvec_end)
 -                      prefetchw(&bvec->bv_page->flags);
 -
                mirror = io_bio->mirror_num;
                if (likely(uptodate && tree->ops &&
                           tree->ops->readpage_end_io_hook)) {
@@@ -2511,7 -2569,7 +2564,7 @@@ readpage_ok
                        extent_start = start;
                        extent_len = end + 1 - start;
                }
 -      } while (bvec <= bvec_end);
 +      }
  
        if (extent_len)
                endio_readpage_release_extent(tree, extent_start, extent_len,
@@@ -2542,8 -2600,9 +2595,8 @@@ btrfs_bio_alloc(struct block_device *bd
        }
  
        if (bio) {
 -              bio->bi_size = 0;
                bio->bi_bdev = bdev;
 -              bio->bi_sector = first_sector;
 +              bio->bi_iter.bi_sector = first_sector;
                btrfs_bio = btrfs_io_bio(bio);
                btrfs_bio->csum = NULL;
                btrfs_bio->csum_allocated = NULL;
@@@ -2637,7 -2696,7 +2690,7 @@@ static int submit_extent_page(int rw, s
        if (bio_ret && *bio_ret) {
                bio = *bio_ret;
                if (old_compressed)
 -                      contig = bio->bi_sector == sector;
 +                      contig = bio->bi_iter.bi_sector == sector;
                else
                        contig = bio_end_sector(bio) == sector;
  
@@@ -3281,8 -3340,8 +3334,8 @@@ static int __extent_writepage(struct pa
  
                        set_range_writeback(tree, cur, cur + iosize - 1);
                        if (!PageWriteback(page)) {
-                               printk(KERN_ERR "btrfs warning page %lu not "
-                                      "writeback, cur %llu end %llu\n",
+                               btrfs_err(BTRFS_I(inode)->root->fs_info,
+                                          "page %lu not writeback, cur %llu end %llu",
                                       page->index, cur, end);
                        }
  
@@@ -3404,18 -3463,20 +3457,18 @@@ static void end_extent_buffer_writeback
  
  static void end_bio_extent_buffer_writepage(struct bio *bio, int err)
  {
 -      int uptodate = err == 0;
 -      struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
 +      struct bio_vec *bvec;
        struct extent_buffer *eb;
 -      int done;
 +      int i, done;
  
 -      do {
 +      bio_for_each_segment_all(bvec, bio, i) {
                struct page *page = bvec->bv_page;
  
 -              bvec--;
                eb = (struct extent_buffer *)page->private;
                BUG_ON(!eb);
                done = atomic_dec_and_test(&eb->io_pages);
  
 -              if (!uptodate || test_bit(EXTENT_BUFFER_IOERR, &eb->bflags)) {
 +              if (err || test_bit(EXTENT_BUFFER_IOERR, &eb->bflags)) {
                        set_bit(EXTENT_BUFFER_IOERR, &eb->bflags);
                        ClearPageUptodate(page);
                        SetPageError(page);
                        continue;
  
                end_extent_buffer_writeback(eb);
 -      } while (bvec >= bio->bi_io_vec);
 +      }
  
        bio_put(bio);
 -
  }
  
  static int write_one_eb(struct extent_buffer *eb,
                        struct extent_page_data *epd)
  {
        struct block_device *bdev = fs_info->fs_devices->latest_bdev;
+       struct extent_io_tree *tree = &BTRFS_I(fs_info->btree_inode)->io_tree;
        u64 offset = eb->start;
        unsigned long i, num_pages;
        unsigned long bio_flags = 0;
  
                clear_page_dirty_for_io(p);
                set_page_writeback(p);
-               ret = submit_extent_page(rw, eb->tree, p, offset >> 9,
+               ret = submit_extent_page(rw, tree, p, offset >> 9,
                                         PAGE_CACHE_SIZE, 0, bdev, &epd->bio,
                                         -1, end_bio_extent_buffer_writepage,
                                         0, epd->bio_flags, bio_flags);
@@@ -4073,12 -4136,10 +4127,10 @@@ int extent_fiemap(struct inode *inode, 
        struct extent_map *em = NULL;
        struct extent_state *cached_state = NULL;
        struct btrfs_path *path;
-       struct btrfs_file_extent_item *item;
        int end = 0;
        u64 em_start = 0;
        u64 em_len = 0;
        u64 em_end = 0;
-       unsigned long emflags;
  
        if (len == 0)
                return -EINVAL;
        }
        WARN_ON(!ret);
        path->slots[0]--;
-       item = btrfs_item_ptr(path->nodes[0], path->slots[0],
-                             struct btrfs_file_extent_item);
        btrfs_item_key_to_cpu(path->nodes[0], &found_key, path->slots[0]);
        found_type = btrfs_key_type(&found_key);
  
                        offset_in_extent = em_start - em->start;
                em_end = extent_map_end(em);
                em_len = em_end - em_start;
-               emflags = em->flags;
                disko = 0;
                flags = 0;
  
@@@ -4324,10 -4382,9 +4373,9 @@@ static inline void btrfs_release_extent
        __free_extent_buffer(eb);
  }
  
- static struct extent_buffer *__alloc_extent_buffer(struct extent_io_tree *tree,
-                                                  u64 start,
-                                                  unsigned long len,
-                                                  gfp_t mask)
+ static struct extent_buffer *
+ __alloc_extent_buffer(struct btrfs_fs_info *fs_info, u64 start,
+                     unsigned long len, gfp_t mask)
  {
        struct extent_buffer *eb = NULL;
  
                return NULL;
        eb->start = start;
        eb->len = len;
-       eb->tree = tree;
+       eb->fs_info = fs_info;
        eb->bflags = 0;
        rwlock_init(&eb->lock);
        atomic_set(&eb->write_locks, 0);
@@@ -4468,13 -4525,14 +4516,14 @@@ static void mark_extent_buffer_accessed
        }
  }
  
- struct extent_buffer *find_extent_buffer(struct extent_io_tree *tree,
-                                                       u64 start)
+ struct extent_buffer *find_extent_buffer(struct btrfs_fs_info *fs_info,
+                                        u64 start)
  {
        struct extent_buffer *eb;
  
        rcu_read_lock();
-       eb = radix_tree_lookup(&tree->buffer, start >> PAGE_CACHE_SHIFT);
+       eb = radix_tree_lookup(&fs_info->buffer_radix,
+                              start >> PAGE_CACHE_SHIFT);
        if (eb && atomic_inc_not_zero(&eb->refs)) {
                rcu_read_unlock();
                mark_extent_buffer_accessed(eb);
        return NULL;
  }
  
- struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree,
+ struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info,
                                          u64 start, unsigned long len)
  {
        unsigned long num_pages = num_extent_pages(start, len);
        struct extent_buffer *eb;
        struct extent_buffer *exists = NULL;
        struct page *p;
-       struct address_space *mapping = tree->mapping;
+       struct address_space *mapping = fs_info->btree_inode->i_mapping;
        int uptodate = 1;
        int ret;
  
-       eb = find_extent_buffer(tree, start);
+       eb = find_extent_buffer(fs_info, start);
        if (eb)
                return eb;
  
-       eb = __alloc_extent_buffer(tree, start, len, GFP_NOFS);
+       eb = __alloc_extent_buffer(fs_info, start, len, GFP_NOFS);
        if (!eb)
                return NULL;
  
@@@ -4558,12 -4615,13 +4606,13 @@@ again
        if (ret)
                goto free_eb;
  
-       spin_lock(&tree->buffer_lock);
-       ret = radix_tree_insert(&tree->buffer, start >> PAGE_CACHE_SHIFT, eb);
-       spin_unlock(&tree->buffer_lock);
+       spin_lock(&fs_info->buffer_lock);
+       ret = radix_tree_insert(&fs_info->buffer_radix,
+                               start >> PAGE_CACHE_SHIFT, eb);
+       spin_unlock(&fs_info->buffer_lock);
        radix_tree_preload_end();
        if (ret == -EEXIST) {
-               exists = find_extent_buffer(tree, start);
+               exists = find_extent_buffer(fs_info, start);
                if (exists)
                        goto free_eb;
                else
        }
        /* add one reference for the tree */
        check_buffer_tree_ref(eb);
+       set_bit(EXTENT_BUFFER_IN_TREE, &eb->bflags);
  
        /*
         * there is a race where release page may have
@@@ -4614,17 -4673,17 +4664,17 @@@ static int release_extent_buffer(struc
  {
        WARN_ON(atomic_read(&eb->refs) == 0);
        if (atomic_dec_and_test(&eb->refs)) {
-               if (test_bit(EXTENT_BUFFER_DUMMY, &eb->bflags)) {
-                       spin_unlock(&eb->refs_lock);
-               } else {
-                       struct extent_io_tree *tree = eb->tree;
+               if (test_and_clear_bit(EXTENT_BUFFER_IN_TREE, &eb->bflags)) {
+                       struct btrfs_fs_info *fs_info = eb->fs_info;
  
                        spin_unlock(&eb->refs_lock);
  
-                       spin_lock(&tree->buffer_lock);
-                       radix_tree_delete(&tree->buffer,
+                       spin_lock(&fs_info->buffer_lock);
+                       radix_tree_delete(&fs_info->buffer_radix,
                                          eb->start >> PAGE_CACHE_SHIFT);
-                       spin_unlock(&tree->buffer_lock);
+                       spin_unlock(&fs_info->buffer_lock);
+               } else {
+                       spin_unlock(&eb->refs_lock);
                }
  
                /* Should be safe to release our pages at this point */
@@@ -5103,12 -5162,12 +5153,12 @@@ void memcpy_extent_buffer(struct extent
        unsigned long src_i;
  
        if (src_offset + len > dst->len) {
-               printk(KERN_ERR "btrfs memmove bogus src_offset %lu move "
+               printk(KERN_ERR "BTRFS: memmove bogus src_offset %lu move "
                       "len %lu dst len %lu\n", src_offset, len, dst->len);
                BUG_ON(1);
        }
        if (dst_offset + len > dst->len) {
-               printk(KERN_ERR "btrfs memmove bogus dst_offset %lu move "
+               printk(KERN_ERR "BTRFS: memmove bogus dst_offset %lu move "
                       "len %lu dst len %lu\n", dst_offset, len, dst->len);
                BUG_ON(1);
        }
@@@ -5150,12 -5209,12 +5200,12 @@@ void memmove_extent_buffer(struct exten
        unsigned long src_i;
  
        if (src_offset + len > dst->len) {
-               printk(KERN_ERR "btrfs memmove bogus src_offset %lu move "
+               printk(KERN_ERR "BTRFS: memmove bogus src_offset %lu move "
                       "len %lu len %lu\n", src_offset, len, dst->len);
                BUG_ON(1);
        }
        if (dst_offset + len > dst->len) {
-               printk(KERN_ERR "btrfs memmove bogus dst_offset %lu move "
+               printk(KERN_ERR "BTRFS: memmove bogus dst_offset %lu move "
                       "len %lu len %lu\n", dst_offset, len, dst->len);
                BUG_ON(1);
        }
diff --combined fs/btrfs/file-item.c
index 84a46a42d26269b94fbb0a823e1fec43439d5e69,9d846588f7220a1bdb8eacd5e73aeaf83998d469..127555b29f587fab26de53b9a52f137f958ce3f5
@@@ -182,7 -182,7 +182,7 @@@ static int __btrfs_lookup_bio_sums(stru
        if (!path)
                return -ENOMEM;
  
 -      nblocks = bio->bi_size >> inode->i_sb->s_blocksize_bits;
 +      nblocks = bio->bi_iter.bi_size >> inode->i_sb->s_blocksize_bits;
        if (!dst) {
                if (nblocks * csum_size > BTRFS_BIO_INLINE_CSUM_SIZE) {
                        btrfs_bio->csum_allocated = kmalloc(nblocks * csum_size,
                csum = (u8 *)dst;
        }
  
 -      if (bio->bi_size > PAGE_CACHE_SIZE * 8)
 +      if (bio->bi_iter.bi_size > PAGE_CACHE_SIZE * 8)
                path->reada = 2;
  
        WARN_ON(bio->bi_vcnt <= 0);
                path->skip_locking = 1;
        }
  
 -      disk_bytenr = (u64)bio->bi_sector << 9;
 +      disk_bytenr = (u64)bio->bi_iter.bi_sector << 9;
        if (dio)
                offset = logical_offset;
        while (bio_index < bio->bi_vcnt) {
                                                offset + bvec->bv_len - 1,
                                                EXTENT_NODATASUM, GFP_NOFS);
                                } else {
-                                       printk(KERN_INFO "btrfs no csum found "
-                                              "for inode %llu start %llu\n",
+                                       btrfs_info(BTRFS_I(inode)->root->fs_info,
+                                                  "no csum found for inode %llu start %llu",
                                               btrfs_ino(inode), offset);
                                }
                                item = NULL;
@@@ -302,7 -302,7 +302,7 @@@ int btrfs_lookup_bio_sums_dio(struct bt
                              struct btrfs_dio_private *dip, struct bio *bio,
                              u64 offset)
  {
 -      int len = (bio->bi_sector << 9) - dip->disk_bytenr;
 +      int len = (bio->bi_iter.bi_sector << 9) - dip->disk_bytenr;
        u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
        int ret;
  
@@@ -447,12 -447,11 +447,12 @@@ int btrfs_csum_one_bio(struct btrfs_roo
        u64 offset;
  
        WARN_ON(bio->bi_vcnt <= 0);
 -      sums = kzalloc(btrfs_ordered_sum_size(root, bio->bi_size), GFP_NOFS);
 +      sums = kzalloc(btrfs_ordered_sum_size(root, bio->bi_iter.bi_size),
 +                     GFP_NOFS);
        if (!sums)
                return -ENOMEM;
  
 -      sums->len = bio->bi_size;
 +      sums->len = bio->bi_iter.bi_size;
        INIT_LIST_HEAD(&sums->list);
  
        if (contig)
  
        ordered = btrfs_lookup_ordered_extent(inode, offset);
        BUG_ON(!ordered); /* Logic error */
 -      sums->bytenr = (u64)bio->bi_sector << 9;
 +      sums->bytenr = (u64)bio->bi_iter.bi_sector << 9;
        index = 0;
  
        while (bio_index < bio->bi_vcnt) {
                        btrfs_add_ordered_sum(inode, ordered, sums);
                        btrfs_put_ordered_extent(ordered);
  
 -                      bytes_left = bio->bi_size - total_bytes;
 +                      bytes_left = bio->bi_iter.bi_size - total_bytes;
  
                        sums = kzalloc(btrfs_ordered_sum_size(root, bytes_left),
                                       GFP_NOFS);
                        sums->len = bytes_left;
                        ordered = btrfs_lookup_ordered_extent(inode, offset);
                        BUG_ON(!ordered); /* Logic error */
 -                      sums->bytenr = ((u64)bio->bi_sector << 9) +
 +                      sums->bytenr = ((u64)bio->bi_iter.bi_sector << 9) +
                                       total_bytes;
                        index = 0;
                }
diff --combined fs/btrfs/inode.c
index d546d8c3038baa4451aa2f338a0c24592a3ea48f,fb74a536add3f9f930139c9dca7d1e1f841ccc5c..5c4ab9c18940cc7827a75df6e02a84370cae3edd
  #include "inode-map.h"
  #include "backref.h"
  #include "hash.h"
+ #include "props.h"
  
  struct btrfs_iget_args {
-       u64 ino;
+       struct btrfs_key *location;
        struct btrfs_root *root;
  };
  
@@@ -125,13 -126,12 +126,12 @@@ static int btrfs_init_inode_security(st
   * no overlapping inline items exist in the btree
   */
  static noinline int insert_inline_extent(struct btrfs_trans_handle *trans,
+                               struct btrfs_path *path, int extent_inserted,
                                struct btrfs_root *root, struct inode *inode,
                                u64 start, size_t size, size_t compressed_size,
                                int compress_type,
                                struct page **compressed_pages)
  {
-       struct btrfs_key key;
-       struct btrfs_path *path;
        struct extent_buffer *leaf;
        struct page *page = NULL;
        char *kaddr;
        int err = 0;
        int ret;
        size_t cur_size = size;
-       size_t datasize;
        unsigned long offset;
  
        if (compressed_size && compressed_pages)
                cur_size = compressed_size;
  
-       path = btrfs_alloc_path();
-       if (!path)
-               return -ENOMEM;
+       inode_add_bytes(inode, size);
  
-       path->leave_spinning = 1;
+       if (!extent_inserted) {
+               struct btrfs_key key;
+               size_t datasize;
  
-       key.objectid = btrfs_ino(inode);
-       key.offset = start;
-       btrfs_set_key_type(&key, BTRFS_EXTENT_DATA_KEY);
-       datasize = btrfs_file_extent_calc_inline_size(cur_size);
+               key.objectid = btrfs_ino(inode);
+               key.offset = start;
+               btrfs_set_key_type(&key, BTRFS_EXTENT_DATA_KEY);
  
-       inode_add_bytes(inode, size);
-       ret = btrfs_insert_empty_item(trans, root, path, &key,
-                                     datasize);
-       if (ret) {
-               err = ret;
-               goto fail;
+               datasize = btrfs_file_extent_calc_inline_size(cur_size);
+               path->leave_spinning = 1;
+               ret = btrfs_insert_empty_item(trans, root, path, &key,
+                                             datasize);
+               if (ret) {
+                       err = ret;
+                       goto fail;
+               }
        }
        leaf = path->nodes[0];
        ei = btrfs_item_ptr(leaf, path->slots[0],
                page_cache_release(page);
        }
        btrfs_mark_buffer_dirty(leaf);
-       btrfs_free_path(path);
+       btrfs_release_path(path);
  
        /*
         * we're an inline extent, so nobody can
  
        return ret;
  fail:
-       btrfs_free_path(path);
        return err;
  }
  
@@@ -242,6 -241,9 +241,9 @@@ static noinline int cow_file_range_inli
        u64 aligned_end = ALIGN(end, root->sectorsize);
        u64 data_len = inline_len;
        int ret;
+       struct btrfs_path *path;
+       int extent_inserted = 0;
+       u32 extent_item_size;
  
        if (compressed_size)
                data_len = compressed_size;
                return 1;
        }
  
+       path = btrfs_alloc_path();
+       if (!path)
+               return -ENOMEM;
        trans = btrfs_join_transaction(root);
-       if (IS_ERR(trans))
+       if (IS_ERR(trans)) {
+               btrfs_free_path(path);
                return PTR_ERR(trans);
+       }
        trans->block_rsv = &root->fs_info->delalloc_block_rsv;
  
-       ret = btrfs_drop_extents(trans, root, inode, start, aligned_end, 1);
+       if (compressed_size && compressed_pages)
+               extent_item_size = btrfs_file_extent_calc_inline_size(
+                  compressed_size);
+       else
+               extent_item_size = btrfs_file_extent_calc_inline_size(
+                   inline_len);
+       ret = __btrfs_drop_extents(trans, root, inode, path,
+                                  start, aligned_end, NULL,
+                                  1, 1, extent_item_size, &extent_inserted);
        if (ret) {
                btrfs_abort_transaction(trans, root, ret);
                goto out;
  
        if (isize > actual_end)
                inline_len = min_t(u64, isize, actual_end);
-       ret = insert_inline_extent(trans, root, inode, start,
+       ret = insert_inline_extent(trans, path, extent_inserted,
+                                  root, inode, start,
                                   inline_len, compressed_size,
                                   compress_type, compressed_pages);
        if (ret && ret != -ENOSPC) {
        btrfs_delalloc_release_metadata(inode, end + 1 - start);
        btrfs_drop_extent_cache(inode, start, aligned_end - 1, 0);
  out:
+       btrfs_free_path(path);
        btrfs_end_transaction(trans, root);
        return ret;
  }
@@@ -1262,7 -1281,8 +1281,8 @@@ next_slot
                        nocow = 1;
                } else if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
                        extent_end = found_key.offset +
-                               btrfs_file_extent_inline_len(leaf, fi);
+                               btrfs_file_extent_inline_len(leaf,
+                                                    path->slots[0], fi);
                        extent_end = ALIGN(extent_end, root->sectorsize);
                } else {
                        BUG_ON(1);
@@@ -1577,7 -1597,7 +1597,7 @@@ int btrfs_merge_bio_hook(int rw, struc
                         unsigned long bio_flags)
  {
        struct btrfs_root *root = BTRFS_I(page->mapping->host)->root;
 -      u64 logical = (u64)bio->bi_sector << 9;
 +      u64 logical = (u64)bio->bi_iter.bi_sector << 9;
        u64 length = 0;
        u64 map_length;
        int ret;
        if (bio_flags & EXTENT_BIO_COMPRESSED)
                return 0;
  
 -      length = bio->bi_size;
 +      length = bio->bi_iter.bi_size;
        map_length = length;
        ret = btrfs_map_block(root->fs_info, rw, logical,
                              &map_length, NULL, 0);
@@@ -1841,14 -1861,13 +1861,13 @@@ static int insert_reserved_file_extent(
        struct btrfs_path *path;
        struct extent_buffer *leaf;
        struct btrfs_key ins;
+       int extent_inserted = 0;
        int ret;
  
        path = btrfs_alloc_path();
        if (!path)
                return -ENOMEM;
  
-       path->leave_spinning = 1;
        /*
         * we may be replacing one extent in the tree with another.
         * The new extent is pinned in the extent map, and we don't want
         * the caller is expected to unpin it and allow it to be merged
         * with the others.
         */
-       ret = btrfs_drop_extents(trans, root, inode, file_pos,
-                                file_pos + num_bytes, 0);
+       ret = __btrfs_drop_extents(trans, root, inode, path, file_pos,
+                                  file_pos + num_bytes, NULL, 0,
+                                  1, sizeof(*fi), &extent_inserted);
        if (ret)
                goto out;
  
-       ins.objectid = btrfs_ino(inode);
-       ins.offset = file_pos;
-       ins.type = BTRFS_EXTENT_DATA_KEY;
-       ret = btrfs_insert_empty_item(trans, root, path, &ins, sizeof(*fi));
-       if (ret)
-               goto out;
+       if (!extent_inserted) {
+               ins.objectid = btrfs_ino(inode);
+               ins.offset = file_pos;
+               ins.type = BTRFS_EXTENT_DATA_KEY;
+               path->leave_spinning = 1;
+               ret = btrfs_insert_empty_item(trans, root, path, &ins,
+                                             sizeof(*fi));
+               if (ret)
+                       goto out;
+       }
        leaf = path->nodes[0];
        fi = btrfs_item_ptr(leaf, path->slots[0],
                            struct btrfs_file_extent_item);
@@@ -2290,7 -2315,7 +2315,7 @@@ again
                u64 extent_len;
                struct btrfs_key found_key;
  
-               ret = btrfs_search_slot(trans, root, &key, path, 1, 1);
+               ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
                if (ret < 0)
                        goto out_free_path;
  
@@@ -2543,12 -2568,6 +2568,6 @@@ out_kfree
        return NULL;
  }
  
- /*
-  * helper function for btrfs_finish_ordered_io, this
-  * just reads in some of the csum leaves to prime them into ram
-  * before we start the transaction.  It limits the amount of btree
-  * reads required while inside the transaction.
-  */
  /* as ordered data IO finishes, this gets called so we can finish
   * an ordered extent if the range of bytes in the file it covers are
   * fully written.
@@@ -3248,7 -3267,8 +3267,8 @@@ out
   * slot is the slot the inode is in, objectid is the objectid of the inode
   */
  static noinline int acls_after_inode_item(struct extent_buffer *leaf,
-                                         int slot, u64 objectid)
+                                         int slot, u64 objectid,
+                                         int *first_xattr_slot)
  {
        u32 nritems = btrfs_header_nritems(leaf);
        struct btrfs_key found_key;
        }
  
        slot++;
+       *first_xattr_slot = -1;
        while (slot < nritems) {
                btrfs_item_key_to_cpu(leaf, &found_key, slot);
  
  
                /* we found an xattr, assume we've got an acl */
                if (found_key.type == BTRFS_XATTR_ITEM_KEY) {
+                       if (*first_xattr_slot == -1)
+                               *first_xattr_slot = slot;
                        if (found_key.offset == xattr_access ||
                            found_key.offset == xattr_default)
                                return 1;
         * something larger than an xattr.  We have to assume the inode
         * has acls
         */
+       if (*first_xattr_slot == -1)
+               *first_xattr_slot = slot;
        return 1;
  }
  
@@@ -3315,10 -3340,12 +3340,12 @@@ static void btrfs_read_locked_inode(str
        struct btrfs_timespec *tspec;
        struct btrfs_root *root = BTRFS_I(inode)->root;
        struct btrfs_key location;
+       unsigned long ptr;
        int maybe_acls;
        u32 rdev;
        int ret;
        bool filled = false;
+       int first_xattr_slot;
  
        ret = btrfs_fill_inode(inode, &rdev);
        if (!ret)
        if (!path)
                goto make_bad;
  
-       path->leave_spinning = 1;
        memcpy(&location, &BTRFS_I(inode)->location, sizeof(location));
  
        ret = btrfs_lookup_inode(NULL, root, path, &location, 0);
        leaf = path->nodes[0];
  
        if (filled)
-               goto cache_acl;
+               goto cache_index;
  
        inode_item = btrfs_item_ptr(leaf, path->slots[0],
                                    struct btrfs_inode_item);
  
        BTRFS_I(inode)->index_cnt = (u64)-1;
        BTRFS_I(inode)->flags = btrfs_inode_flags(leaf, inode_item);
+ cache_index:
+       path->slots[0]++;
+       if (inode->i_nlink != 1 ||
+           path->slots[0] >= btrfs_header_nritems(leaf))
+               goto cache_acl;
+       btrfs_item_key_to_cpu(leaf, &location, path->slots[0]);
+       if (location.objectid != btrfs_ino(inode))
+               goto cache_acl;
+       ptr = btrfs_item_ptr_offset(leaf, path->slots[0]);
+       if (location.type == BTRFS_INODE_REF_KEY) {
+               struct btrfs_inode_ref *ref;
+               ref = (struct btrfs_inode_ref *)ptr;
+               BTRFS_I(inode)->dir_index = btrfs_inode_ref_index(leaf, ref);
+       } else if (location.type == BTRFS_INODE_EXTREF_KEY) {
+               struct btrfs_inode_extref *extref;
+               extref = (struct btrfs_inode_extref *)ptr;
+               BTRFS_I(inode)->dir_index = btrfs_inode_extref_index(leaf,
+                                                                    extref);
+       }
  cache_acl:
        /*
         * try to precache a NULL acl entry for files that don't have
         * any xattrs or acls
         */
        maybe_acls = acls_after_inode_item(leaf, path->slots[0],
-                                          btrfs_ino(inode));
+                                          btrfs_ino(inode), &first_xattr_slot);
+       if (first_xattr_slot != -1) {
+               path->slots[0] = first_xattr_slot;
+               ret = btrfs_load_inode_props(inode, path);
+               if (ret)
+                       btrfs_err(root->fs_info,
+                                 "error loading props for ino %llu (root %llu): %d\n",
+                                 btrfs_ino(inode),
+                                 root->root_key.objectid, ret);
+       }
+       btrfs_free_path(path);
        if (!maybe_acls)
                cache_no_acl(inode);
  
-       btrfs_free_path(path);
        switch (inode->i_mode & S_IFMT) {
        case S_IFREG:
                inode->i_mapping->a_ops = &btrfs_aops;
@@@ -3496,7 -3555,6 +3555,6 @@@ static noinline int btrfs_update_inode_
                goto failed;
        }
  
-       btrfs_unlock_up_safe(path, 1);
        leaf = path->nodes[0];
        inode_item = btrfs_item_ptr(leaf, path->slots[0],
                                    struct btrfs_inode_item);
@@@ -3593,6 -3651,24 +3651,24 @@@ static int __btrfs_unlink_inode(struct 
                goto err;
        btrfs_release_path(path);
  
+       /*
+        * If we don't have dir index, we have to get it by looking up
+        * the inode ref, since we get the inode ref, remove it directly,
+        * it is unnecessary to do delayed deletion.
+        *
+        * But if we have dir index, needn't search inode ref to get it.
+        * Since the inode ref is close to the inode item, it is better
+        * that we delay to delete it, and just do this deletion when
+        * we update the inode item.
+        */
+       if (BTRFS_I(inode)->dir_index) {
+               ret = btrfs_delayed_delete_inode_ref(inode);
+               if (!ret) {
+                       index = BTRFS_I(inode)->dir_index;
+                       goto skip_backref;
+               }
+       }
        ret = btrfs_del_inode_ref(trans, root, name, name_len, ino,
                                  dir_ino, &index);
        if (ret) {
                btrfs_abort_transaction(trans, root, ret);
                goto err;
        }
+ skip_backref:
        ret = btrfs_delete_delayed_dir_index(trans, root, dir, index);
        if (ret) {
                btrfs_abort_transaction(trans, root, ret);
@@@ -3948,7 -4024,7 +4024,7 @@@ search_again
                                    btrfs_file_extent_num_bytes(leaf, fi);
                        } else if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
                                item_end += btrfs_file_extent_inline_len(leaf,
-                                                                        fi);
+                                                        path->slots[0], fi);
                        }
                        item_end--;
                }
                                        inode_sub_bytes(inode, item_end + 1 -
                                                        new_size);
                                }
+                               /*
+                                * update the ram bytes to properly reflect
+                                * the new size of our item
+                                */
+                               btrfs_set_file_extent_ram_bytes(leaf, fi, size);
                                size =
                                    btrfs_file_extent_calc_inline_size(size);
                                btrfs_truncate_item(root, path, size, 1);
        return ret;
  }
  
+ static int maybe_insert_hole(struct btrfs_root *root, struct inode *inode,
+                            u64 offset, u64 len)
+ {
+       struct btrfs_trans_handle *trans;
+       int ret;
+       /*
+        * Still need to make sure the inode looks like it's been updated so
+        * that any holes get logged if we fsync.
+        */
+       if (btrfs_fs_incompat(root->fs_info, NO_HOLES)) {
+               BTRFS_I(inode)->last_trans = root->fs_info->generation;
+               BTRFS_I(inode)->last_sub_trans = root->log_transid;
+               BTRFS_I(inode)->last_log_commit = root->last_log_commit;
+               return 0;
+       }
+       /*
+        * 1 - for the one we're dropping
+        * 1 - for the one we're adding
+        * 1 - for updating the inode.
+        */
+       trans = btrfs_start_transaction(root, 3);
+       if (IS_ERR(trans))
+               return PTR_ERR(trans);
+       ret = btrfs_drop_extents(trans, root, inode, offset, offset + len, 1);
+       if (ret) {
+               btrfs_abort_transaction(trans, root, ret);
+               btrfs_end_transaction(trans, root);
+               return ret;
+       }
+       ret = btrfs_insert_file_extent(trans, root, btrfs_ino(inode), offset,
+                                      0, 0, len, 0, len, 0, 0, 0);
+       if (ret)
+               btrfs_abort_transaction(trans, root, ret);
+       else
+               btrfs_update_inode(trans, root, inode);
+       btrfs_end_transaction(trans, root);
+       return ret;
+ }
  /*
   * This function puts in dummy file extents for the area we're creating a hole
   * for.  So if we are truncating this file to a larger size we need to insert
   */
  int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size)
  {
-       struct btrfs_trans_handle *trans;
        struct btrfs_root *root = BTRFS_I(inode)->root;
        struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
        struct extent_map *em = NULL;
                        struct extent_map *hole_em;
                        hole_size = last_byte - cur_offset;
  
-                       trans = btrfs_start_transaction(root, 3);
-                       if (IS_ERR(trans)) {
-                               err = PTR_ERR(trans);
-                               break;
-                       }
-                       err = btrfs_drop_extents(trans, root, inode,
-                                                cur_offset,
-                                                cur_offset + hole_size, 1);
-                       if (err) {
-                               btrfs_abort_transaction(trans, root, err);
-                               btrfs_end_transaction(trans, root);
-                               break;
-                       }
-                       err = btrfs_insert_file_extent(trans, root,
-                                       btrfs_ino(inode), cur_offset, 0,
-                                       0, hole_size, 0, hole_size,
-                                       0, 0, 0);
-                       if (err) {
-                               btrfs_abort_transaction(trans, root, err);
-                               btrfs_end_transaction(trans, root);
+                       err = maybe_insert_hole(root, inode, cur_offset,
+                                               hole_size);
+                       if (err)
                                break;
-                       }
                        btrfs_drop_extent_cache(inode, cur_offset,
                                                cur_offset + hole_size - 1, 0);
                        hole_em = alloc_extent_map();
                        hole_em->ram_bytes = hole_size;
                        hole_em->bdev = root->fs_info->fs_devices->latest_bdev;
                        hole_em->compress_type = BTRFS_COMPRESS_NONE;
-                       hole_em->generation = trans->transid;
+                       hole_em->generation = root->fs_info->generation;
  
                        while (1) {
                                write_lock(&em_tree->lock);
                                                        hole_size - 1, 0);
                        }
                        free_extent_map(hole_em);
- next:
-                       btrfs_update_inode(trans, root, inode);
-                       btrfs_end_transaction(trans, root);
                }
+ next:
                free_extent_map(em);
                em = NULL;
                cur_offset = last_byte;
                if (cur_offset >= block_end)
                        break;
        }
        free_extent_map(em);
        unlock_extent_cached(io_tree, hole_start, block_end - 1, &cached_state,
                             GFP_NOFS);
@@@ -4354,12 -4454,8 +4454,12 @@@ static int btrfs_setsize(struct inode *
         * these flags set.  For all other operations the VFS set these flags
         * explicitly if it wants a timestamp update.
         */
 -      if (newsize != oldsize && (!(mask & (ATTR_CTIME | ATTR_MTIME))))
 -              inode->i_ctime = inode->i_mtime = current_fs_time(inode->i_sb);
 +      if (newsize != oldsize) {
 +              inode_inc_iversion(inode);
 +              if (!(mask & (ATTR_CTIME | ATTR_MTIME)))
 +                      inode->i_ctime = inode->i_mtime =
 +                              current_fs_time(inode->i_sb);
 +      }
  
        if (newsize > oldsize) {
                truncate_pagecache(inode, newsize);
@@@ -4468,12 -4564,70 +4568,70 @@@ static int btrfs_setattr(struct dentry 
                err = btrfs_dirty_inode(inode);
  
                if (!err && attr->ia_valid & ATTR_MODE)
 -                      err = btrfs_acl_chmod(inode);
 +                      err = posix_acl_chmod(inode, inode->i_mode);
        }
  
        return err;
  }
  
+ /*
+  * While truncating the inode pages during eviction, we get the VFS calling
+  * btrfs_invalidatepage() against each page of the inode. This is slow because
+  * the calls to btrfs_invalidatepage() result in a huge amount of calls to
+  * lock_extent_bits() and clear_extent_bit(), which keep merging and splitting
+  * extent_state structures over and over, wasting lots of time.
+  *
+  * Therefore if the inode is being evicted, let btrfs_invalidatepage() skip all
+  * those expensive operations on a per page basis and do only the ordered io
+  * finishing, while we release here the extent_map and extent_state structures,
+  * without the excessive merging and splitting.
+  */
+ static void evict_inode_truncate_pages(struct inode *inode)
+ {
+       struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
+       struct extent_map_tree *map_tree = &BTRFS_I(inode)->extent_tree;
+       struct rb_node *node;
+       ASSERT(inode->i_state & I_FREEING);
+       truncate_inode_pages(&inode->i_data, 0);
+       write_lock(&map_tree->lock);
+       while (!RB_EMPTY_ROOT(&map_tree->map)) {
+               struct extent_map *em;
+               node = rb_first(&map_tree->map);
+               em = rb_entry(node, struct extent_map, rb_node);
+               clear_bit(EXTENT_FLAG_PINNED, &em->flags);
+               clear_bit(EXTENT_FLAG_LOGGING, &em->flags);
+               remove_extent_mapping(map_tree, em);
+               free_extent_map(em);
+       }
+       write_unlock(&map_tree->lock);
+       spin_lock(&io_tree->lock);
+       while (!RB_EMPTY_ROOT(&io_tree->state)) {
+               struct extent_state *state;
+               struct extent_state *cached_state = NULL;
+               node = rb_first(&io_tree->state);
+               state = rb_entry(node, struct extent_state, rb_node);
+               atomic_inc(&state->refs);
+               spin_unlock(&io_tree->lock);
+               lock_extent_bits(io_tree, state->start, state->end,
+                                0, &cached_state);
+               clear_extent_bit(io_tree, state->start, state->end,
+                                EXTENT_LOCKED | EXTENT_DIRTY |
+                                EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING |
+                                EXTENT_DEFRAG, 1, 1,
+                                &cached_state, GFP_NOFS);
+               free_extent_state(state);
+               spin_lock(&io_tree->lock);
+       }
+       spin_unlock(&io_tree->lock);
+ }
  void btrfs_evict_inode(struct inode *inode)
  {
        struct btrfs_trans_handle *trans;
  
        trace_btrfs_inode_evict(inode);
  
-       truncate_inode_pages(&inode->i_data, 0);
+       evict_inode_truncate_pages(inode);
        if (inode->i_nlink &&
            ((btrfs_root_refs(&root->root_item) != 0 &&
              root->root_key.objectid != BTRFS_ROOT_TREE_OBJECTID) ||
@@@ -4659,9 -4814,9 +4818,9 @@@ static int fixup_tree_root_location(str
        }
  
        err = -ENOENT;
-       ret = btrfs_find_root_ref(root->fs_info->tree_root, path,
-                                 BTRFS_I(dir)->root->root_key.objectid,
-                                 location->objectid);
+       ret = btrfs_find_item(root->fs_info->tree_root, path,
+                               BTRFS_I(dir)->root->root_key.objectid,
+                               location->objectid, BTRFS_ROOT_REF_KEY, NULL);
        if (ret) {
                if (ret < 0)
                        err = ret;
@@@ -4822,7 -4977,9 +4981,9 @@@ again
  static int btrfs_init_locked_inode(struct inode *inode, void *p)
  {
        struct btrfs_iget_args *args = p;
-       inode->i_ino = args->ino;
+       inode->i_ino = args->location->objectid;
+       memcpy(&BTRFS_I(inode)->location, args->location,
+              sizeof(*args->location));
        BTRFS_I(inode)->root = args->root;
        return 0;
  }
  static int btrfs_find_actor(struct inode *inode, void *opaque)
  {
        struct btrfs_iget_args *args = opaque;
-       return args->ino == btrfs_ino(inode) &&
+       return args->location->objectid == BTRFS_I(inode)->location.objectid &&
                args->root == BTRFS_I(inode)->root;
  }
  
  static struct inode *btrfs_iget_locked(struct super_block *s,
-                                      u64 objectid,
+                                      struct btrfs_key *location,
                                       struct btrfs_root *root)
  {
        struct inode *inode;
        struct btrfs_iget_args args;
-       unsigned long hashval = btrfs_inode_hash(objectid, root);
+       unsigned long hashval = btrfs_inode_hash(location->objectid, root);
  
-       args.ino = objectid;
+       args.location = location;
        args.root = root;
  
        inode = iget5_locked(s, hashval, btrfs_find_actor,
@@@ -4859,13 -5016,11 +5020,11 @@@ struct inode *btrfs_iget(struct super_b
  {
        struct inode *inode;
  
-       inode = btrfs_iget_locked(s, location->objectid, root);
+       inode = btrfs_iget_locked(s, location, root);
        if (!inode)
                return ERR_PTR(-ENOMEM);
  
        if (inode->i_state & I_NEW) {
-               BTRFS_I(inode)->root = root;
-               memcpy(&BTRFS_I(inode)->location, location, sizeof(*location));
                btrfs_read_locked_inode(inode);
                if (!is_bad_inode(inode)) {
                        inode_tree_add(inode);
@@@ -4921,7 -5076,7 +5080,7 @@@ struct inode *btrfs_lookup_dentry(struc
                return ERR_PTR(ret);
  
        if (location.objectid == 0)
-               return NULL;
+               return ERR_PTR(-ENOENT);
  
        if (location.type == BTRFS_INODE_ITEM_KEY) {
                inode = btrfs_iget(dir->i_sb, &location, root, NULL);
@@@ -4985,10 -5140,17 +5144,17 @@@ static void btrfs_dentry_release(struc
  static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry,
                                   unsigned int flags)
  {
-       struct dentry *ret;
+       struct inode *inode;
  
-       ret = d_splice_alias(btrfs_lookup_dentry(dir, dentry), dentry);
-       return ret;
+       inode = btrfs_lookup_dentry(dir, dentry);
+       if (IS_ERR(inode)) {
+               if (PTR_ERR(inode) == -ENOENT)
+                       inode = NULL;
+               else
+                       return ERR_CAST(inode);
+       }
+       return d_splice_alias(inode, dentry);
  }
  
  unsigned char btrfs_filetype_table[] = {
@@@ -5358,7 -5520,6 +5524,6 @@@ static struct inode *btrfs_new_inode(st
        u32 sizes[2];
        unsigned long ptr;
        int ret;
-       int owner;
  
        path = btrfs_alloc_path();
        if (!path)
         * number
         */
        BTRFS_I(inode)->index_cnt = 2;
+       BTRFS_I(inode)->dir_index = *index;
        BTRFS_I(inode)->root = root;
        BTRFS_I(inode)->generation = trans->transid;
        inode->i_generation = BTRFS_I(inode)->generation;
         */
        set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &BTRFS_I(inode)->runtime_flags);
  
-       if (S_ISDIR(mode))
-               owner = 0;
-       else
-               owner = 1;
        key[0].objectid = objectid;
        btrfs_set_key_type(&key[0], BTRFS_INODE_ITEM_KEY);
        key[0].offset = 0;
  
        btrfs_update_root_times(trans, root);
  
+       ret = btrfs_inode_inherit_props(trans, inode, dir);
+       if (ret)
+               btrfs_err(root->fs_info,
+                         "error inheriting props for ino %llu (root %llu): %d",
+                         btrfs_ino(inode), root->root_key.objectid, ret);
        return inode;
  fail:
        if (dir)
@@@ -5741,6 -5904,8 +5908,8 @@@ static int btrfs_link(struct dentry *ol
                goto fail;
        }
  
+       /* There are several dir indexes for this inode, clear the cache. */
+       BTRFS_I(inode)->dir_index = 0ULL;
        inc_nlink(inode);
        inode_inc_iversion(inode);
        inode->i_ctime = CURRENT_TIME;
@@@ -6004,7 -6169,7 +6173,7 @@@ again
                       btrfs_file_extent_num_bytes(leaf, item);
        } else if (found_type == BTRFS_FILE_EXTENT_INLINE) {
                size_t size;
-               size = btrfs_file_extent_inline_len(leaf, item);
+               size = btrfs_file_extent_inline_len(leaf, path->slots[0], item);
                extent_end = ALIGN(extent_start + size, root->sectorsize);
        }
  next:
                        goto out;
                }
  
-               size = btrfs_file_extent_inline_len(leaf, item);
+               size = btrfs_file_extent_inline_len(leaf, path->slots[0], item);
                extent_offset = page_offset(page) + pg_offset - extent_start;
                copy_size = min_t(u64, PAGE_CACHE_SIZE - pg_offset,
                                size - extent_offset);
@@@ -6390,6 -6555,7 +6559,7 @@@ noinline int can_nocow_extent(struct in
        int slot;
        int found_type;
        bool nocow = (BTRFS_I(inode)->flags & BTRFS_INODE_NODATACOW);
        path = btrfs_alloc_path();
        if (!path)
                return -ENOMEM;
        if (!nocow && found_type == BTRFS_FILE_EXTENT_REG)
                goto out;
  
+       extent_end = key.offset + btrfs_file_extent_num_bytes(leaf, fi);
+       if (extent_end <= offset)
+               goto out;
        disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
        if (disk_bytenr == 0)
                goto out;
                *ram_bytes = btrfs_file_extent_ram_bytes(leaf, fi);
        }
  
-       extent_end = key.offset + btrfs_file_extent_num_bytes(leaf, fi);
        if (btrfs_extent_readonly(root, disk_bytenr))
                goto out;
        btrfs_release_path(path);
@@@ -6783,16 -6951,17 +6955,16 @@@ unlock_err
  static void btrfs_endio_direct_read(struct bio *bio, int err)
  {
        struct btrfs_dio_private *dip = bio->bi_private;
 -      struct bio_vec *bvec_end = bio->bi_io_vec + bio->bi_vcnt - 1;
 -      struct bio_vec *bvec = bio->bi_io_vec;
 +      struct bio_vec *bvec;
        struct inode *inode = dip->inode;
        struct btrfs_root *root = BTRFS_I(inode)->root;
        struct bio *dio_bio;
        u32 *csums = (u32 *)dip->csum;
 -      int index = 0;
        u64 start;
 +      int i;
  
        start = dip->logical_offset;
 -      do {
 +      bio_for_each_segment_all(bvec, bio, i) {
                if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)) {
                        struct page *page = bvec->bv_page;
                        char *kaddr;
                        local_irq_restore(flags);
  
                        flush_dcache_page(bvec->bv_page);
 -                      if (csum != csums[index]) {
 +                      if (csum != csums[i]) {
                                btrfs_err(root->fs_info, "csum failed ino %llu off %llu csum %u expected csum %u",
                                          btrfs_ino(inode), start, csum,
 -                                        csums[index]);
 +                                        csums[i]);
                                err = -EIO;
                        }
                }
  
                start += bvec->bv_len;
 -              bvec++;
 -              index++;
 -      } while (bvec <= bvec_end);
 +      }
  
        unlock_extent(&BTRFS_I(inode)->io_tree, dip->logical_offset,
                      dip->logical_offset + dip->bytes - 1);
@@@ -6895,11 -7066,10 +7067,11 @@@ static void btrfs_end_dio_bio(struct bi
        struct btrfs_dio_private *dip = bio->bi_private;
  
        if (err) {
-               printk(KERN_ERR "btrfs direct IO failed ino %llu rw %lu "
-                     "sector %#Lx len %u err no %d\n",
+               btrfs_err(BTRFS_I(dip->inode)->root->fs_info,
+                         "direct IO failed ino %llu rw %lu sector %#Lx len %u err no %d",
                      btrfs_ino(dip->inode), bio->bi_rw,
 -                    (unsigned long long)bio->bi_sector, bio->bi_size, err);
 +                    (unsigned long long)bio->bi_iter.bi_sector,
 +                    bio->bi_iter.bi_size, err);
                dip->errors = 1;
  
                /*
@@@ -6990,7 -7160,7 +7162,7 @@@ static int btrfs_submit_direct_hook(in
        struct bio *bio;
        struct bio *orig_bio = dip->orig_bio;
        struct bio_vec *bvec = orig_bio->bi_io_vec;
 -      u64 start_sector = orig_bio->bi_sector;
 +      u64 start_sector = orig_bio->bi_iter.bi_sector;
        u64 file_offset = dip->logical_offset;
        u64 submit_len = 0;
        u64 map_length;
        int ret = 0;
        int async_submit = 0;
  
 -      map_length = orig_bio->bi_size;
 +      map_length = orig_bio->bi_iter.bi_size;
        ret = btrfs_map_block(root->fs_info, rw, start_sector << 9,
                              &map_length, NULL, 0);
        if (ret) {
                return -EIO;
        }
  
 -      if (map_length >= orig_bio->bi_size) {
 +      if (map_length >= orig_bio->bi_iter.bi_size) {
                bio = orig_bio;
                goto submit;
        }
                        bio->bi_private = dip;
                        bio->bi_end_io = btrfs_end_dio_bio;
  
 -                      map_length = orig_bio->bi_size;
 +                      map_length = orig_bio->bi_iter.bi_size;
                        ret = btrfs_map_block(root->fs_info, rw,
                                              start_sector << 9,
                                              &map_length, NULL, 0);
@@@ -7116,8 -7286,7 +7288,8 @@@ static void btrfs_submit_direct(int rw
  
        if (!skip_sum && !write) {
                csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
 -              sum_len = dio_bio->bi_size >> inode->i_sb->s_blocksize_bits;
 +              sum_len = dio_bio->bi_iter.bi_size >>
 +                      inode->i_sb->s_blocksize_bits;
                sum_len *= csum_size;
        } else {
                sum_len = 0;
        dip->private = dio_bio->bi_private;
        dip->inode = inode;
        dip->logical_offset = file_offset;
 -      dip->bytes = dio_bio->bi_size;
 -      dip->disk_bytenr = (u64)dio_bio->bi_sector << 9;
 +      dip->bytes = dio_bio->bi_iter.bi_size;
 +      dip->disk_bytenr = (u64)dio_bio->bi_iter.bi_sector << 9;
        io_bio->bi_private = dip;
        dip->errors = 0;
        dip->orig_bio = io_bio;
@@@ -7370,6 -7539,7 +7542,7 @@@ static void btrfs_invalidatepage(struc
        struct extent_state *cached_state = NULL;
        u64 page_start = page_offset(page);
        u64 page_end = page_start + PAGE_CACHE_SIZE - 1;
+       int inode_evicting = inode->i_state & I_FREEING;
  
        /*
         * we have the page locked, so new writeback can't start,
                btrfs_releasepage(page, GFP_NOFS);
                return;
        }
-       lock_extent_bits(tree, page_start, page_end, 0, &cached_state);
-       ordered = btrfs_lookup_ordered_extent(inode, page_offset(page));
+       if (!inode_evicting)
+               lock_extent_bits(tree, page_start, page_end, 0, &cached_state);
+       ordered = btrfs_lookup_ordered_extent(inode, page_start);
        if (ordered) {
                /*
                 * IO on this page will never be started, so we need
                 * to account for any ordered extents now
                 */
-               clear_extent_bit(tree, page_start, page_end,
-                                EXTENT_DIRTY | EXTENT_DELALLOC |
-                                EXTENT_LOCKED | EXTENT_DO_ACCOUNTING |
-                                EXTENT_DEFRAG, 1, 0, &cached_state, GFP_NOFS);
+               if (!inode_evicting)
+                       clear_extent_bit(tree, page_start, page_end,
+                                        EXTENT_DIRTY | EXTENT_DELALLOC |
+                                        EXTENT_LOCKED | EXTENT_DO_ACCOUNTING |
+                                        EXTENT_DEFRAG, 1, 0, &cached_state,
+                                        GFP_NOFS);
                /*
                 * whoever cleared the private bit is responsible
                 * for the finish_ordered_io
                                btrfs_finish_ordered_io(ordered);
                }
                btrfs_put_ordered_extent(ordered);
-               cached_state = NULL;
-               lock_extent_bits(tree, page_start, page_end, 0, &cached_state);
+               if (!inode_evicting) {
+                       cached_state = NULL;
+                       lock_extent_bits(tree, page_start, page_end, 0,
+                                        &cached_state);
+               }
+       }
+       if (!inode_evicting) {
+               clear_extent_bit(tree, page_start, page_end,
+                                EXTENT_LOCKED | EXTENT_DIRTY |
+                                EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING |
+                                EXTENT_DEFRAG, 1, 1,
+                                &cached_state, GFP_NOFS);
+               __btrfs_releasepage(page, GFP_NOFS);
        }
-       clear_extent_bit(tree, page_start, page_end,
-                EXTENT_LOCKED | EXTENT_DIRTY | EXTENT_DELALLOC |
-                EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG, 1, 1,
-                &cached_state, GFP_NOFS);
-       __btrfs_releasepage(page, GFP_NOFS);
  
        ClearPageChecked(page);
        if (PagePrivate(page)) {
@@@ -7736,7 -7918,9 +7921,9 @@@ out
   * create a new subvolume directory/inode (helper for the ioctl).
   */
  int btrfs_create_subvol_root(struct btrfs_trans_handle *trans,
-                            struct btrfs_root *new_root, u64 new_dirid)
+                            struct btrfs_root *new_root,
+                            struct btrfs_root *parent_root,
+                            u64 new_dirid)
  {
        struct inode *inode;
        int err;
        set_nlink(inode, 1);
        btrfs_i_size_write(inode, 0);
  
+       err = btrfs_subvol_inherit_props(trans, new_root, parent_root);
+       if (err)
+               btrfs_err(new_root->fs_info,
+                         "error inheriting subvolume %llu properties: %d\n",
+                         new_root->root_key.objectid, err);
        err = btrfs_update_inode(trans, new_root, inode);
  
        iput(inode);
@@@ -7779,6 -7969,7 +7972,7 @@@ struct inode *btrfs_alloc_inode(struct 
        ei->flags = 0;
        ei->csum_bytes = 0;
        ei->index_cnt = (u64)-1;
+       ei->dir_index = 0;
        ei->last_unlink_trans = 0;
        ei->last_log_commit = 0;
  
@@@ -8066,6 -8257,7 +8260,7 @@@ static int btrfs_rename(struct inode *o
        if (ret)
                goto out_fail;
  
+       BTRFS_I(old_inode)->dir_index = 0ULL;
        if (unlikely(old_ino == BTRFS_FIRST_FREE_OBJECTID)) {
                /* force full log commit if subvolume involved. */
                root->fs_info->last_trans_log_full_commit = trans->transid;
                goto out_fail;
        }
  
+       if (old_inode->i_nlink == 1)
+               BTRFS_I(old_inode)->dir_index = index;
        if (old_ino != BTRFS_FIRST_FREE_OBJECTID) {
                struct dentry *parent = new_dentry->d_parent;
                btrfs_log_new_name(trans, old_inode, old_dir, parent);
@@@ -8289,7 -8484,7 +8487,7 @@@ int btrfs_start_delalloc_inodes(struct 
  {
        int ret;
  
-       if (root->fs_info->sb->s_flags & MS_RDONLY)
+       if (test_bit(BTRFS_FS_STATE_ERROR, &root->fs_info->fs_state))
                return -EROFS;
  
        ret = __start_delalloc_inodes(root, delay_iput);
@@@ -8315,7 -8510,7 +8513,7 @@@ int btrfs_start_delalloc_roots(struct b
        struct list_head splice;
        int ret;
  
-       if (fs_info->sb->s_flags & MS_RDONLY)
+       if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state))
                return -EROFS;
  
        INIT_LIST_HEAD(&splice);
@@@ -8652,14 -8847,12 +8850,14 @@@ static const struct inode_operations bt
        .removexattr    = btrfs_removexattr,
        .permission     = btrfs_permission,
        .get_acl        = btrfs_get_acl,
 +      .set_acl        = btrfs_set_acl,
        .update_time    = btrfs_update_time,
  };
  static const struct inode_operations btrfs_dir_ro_inode_operations = {
        .lookup         = btrfs_lookup,
        .permission     = btrfs_permission,
        .get_acl        = btrfs_get_acl,
 +      .set_acl        = btrfs_set_acl,
        .update_time    = btrfs_update_time,
  };
  
@@@ -8729,7 -8922,6 +8927,7 @@@ static const struct inode_operations bt
        .permission     = btrfs_permission,
        .fiemap         = btrfs_fiemap,
        .get_acl        = btrfs_get_acl,
 +      .set_acl        = btrfs_set_acl,
        .update_time    = btrfs_update_time,
  };
  static const struct inode_operations btrfs_special_inode_operations = {
        .listxattr      = btrfs_listxattr,
        .removexattr    = btrfs_removexattr,
        .get_acl        = btrfs_get_acl,
 +      .set_acl        = btrfs_set_acl,
        .update_time    = btrfs_update_time,
  };
  static const struct inode_operations btrfs_symlink_inode_operations = {
        .getxattr       = btrfs_getxattr,
        .listxattr      = btrfs_listxattr,
        .removexattr    = btrfs_removexattr,
 -      .get_acl        = btrfs_get_acl,
        .update_time    = btrfs_update_time,
  };
  
diff --combined fs/btrfs/ioctl.c
index ad27dcea319c74558d9f38e4647111bca362654a,34772cbcc7aaf09dae41fc3771f8103b3f889dce..b0134892dc70cdf69be04ad44e3e52183a86fcb0
@@@ -56,6 -56,8 +56,8 @@@
  #include "rcu-string.h"
  #include "send.h"
  #include "dev-replace.h"
+ #include "props.h"
+ #include "sysfs.h"
  
  static int btrfs_clone(struct inode *src, struct inode *inode,
                       u64 off, u64 olen, u64 olen_aligned, u64 destoff);
@@@ -190,6 -192,9 +192,9 @@@ static int btrfs_ioctl_setflags(struct 
        unsigned int i_oldflags;
        umode_t mode;
  
+       if (!inode_owner_or_capable(inode))
+               return -EPERM;
        if (btrfs_root_readonly(root))
                return -EROFS;
  
        if (ret)
                return ret;
  
-       if (!inode_owner_or_capable(inode))
-               return -EACCES;
        ret = mnt_want_write_file(file);
        if (ret)
                return ret;
        if (flags & FS_NOCOMP_FL) {
                ip->flags &= ~BTRFS_INODE_COMPRESS;
                ip->flags |= BTRFS_INODE_NOCOMPRESS;
+               ret = btrfs_set_prop(inode, "btrfs.compression", NULL, 0, 0);
+               if (ret && ret != -ENODATA)
+                       goto out_drop;
        } else if (flags & FS_COMPR_FL) {
+               const char *comp;
                ip->flags |= BTRFS_INODE_COMPRESS;
                ip->flags &= ~BTRFS_INODE_NOCOMPRESS;
+               if (root->fs_info->compress_type == BTRFS_COMPRESS_LZO)
+                       comp = "lzo";
+               else
+                       comp = "zlib";
+               ret = btrfs_set_prop(inode, "btrfs.compression",
+                                    comp, strlen(comp), 0);
+               if (ret)
+                       goto out_drop;
        } else {
                ip->flags &= ~(BTRFS_INODE_COMPRESS | BTRFS_INODE_NOCOMPRESS);
        }
@@@ -392,6 -410,7 +410,7 @@@ static noinline int create_subvol(struc
        struct btrfs_root *new_root;
        struct btrfs_block_rsv block_rsv;
        struct timespec cur_time = CURRENT_TIME;
+       struct inode *inode;
        int ret;
        int err;
        u64 objectid;
        trans = btrfs_start_transaction(root, 0);
        if (IS_ERR(trans)) {
                ret = PTR_ERR(trans);
-               goto out;
+               btrfs_subvolume_release_metadata(root, &block_rsv,
+                                                qgroup_reserved);
+               return ret;
        }
        trans->block_rsv = &block_rsv;
        trans->bytes_reserved = block_rsv.size;
  
        btrfs_record_root_in_trans(trans, new_root);
  
-       ret = btrfs_create_subvol_root(trans, new_root, new_dirid);
+       ret = btrfs_create_subvol_root(trans, new_root, root, new_dirid);
        if (ret) {
                /* We potentially lose an unused inode item here */
                btrfs_abort_transaction(trans, root, ret);
  fail:
        trans->block_rsv = NULL;
        trans->bytes_reserved = 0;
+       btrfs_subvolume_release_metadata(root, &block_rsv, qgroup_reserved);
        if (async_transid) {
                *async_transid = trans->transid;
                err = btrfs_commit_transaction_async(trans, root, 1);
        if (err && !ret)
                ret = err;
  
-       if (!ret)
-               d_instantiate(dentry, btrfs_lookup_dentry(dir, dentry));
- out:
-       btrfs_subvolume_release_metadata(root, &block_rsv, qgroup_reserved);
+       if (!ret) {
+               inode = btrfs_lookup_dentry(dir, dentry);
+               if (IS_ERR(inode))
+                       return PTR_ERR(inode);
+               d_instantiate(dentry, inode);
+       }
        return ret;
  }
  
@@@ -642,7 -667,7 +667,7 @@@ static int create_snapshot(struct btrfs
                ret = PTR_ERR(inode);
                goto fail;
        }
-       BUG_ON(!inode);
        d_instantiate(dentry, inode);
        ret = 0;
  fail:
@@@ -1011,7 -1036,7 +1036,7 @@@ out
  static int cluster_pages_for_defrag(struct inode *inode,
                                    struct page **pages,
                                    unsigned long start_index,
-                                   int num_pages)
+                                   unsigned long num_pages)
  {
        unsigned long file_end;
        u64 isize = i_size_read(inode);
@@@ -1169,8 -1194,8 +1194,8 @@@ int btrfs_defrag_file(struct inode *ino
        int defrag_count = 0;
        int compress_type = BTRFS_COMPRESS_ZLIB;
        int extent_thresh = range->extent_thresh;
-       int max_cluster = (256 * 1024) >> PAGE_CACHE_SHIFT;
-       int cluster = max_cluster;
+       unsigned long max_cluster = (256 * 1024) >> PAGE_CACHE_SHIFT;
+       unsigned long cluster = max_cluster;
        u64 new_align = ~((u64)128 * 1024 - 1);
        struct page **pages = NULL;
  
                        break;
  
                if (btrfs_defrag_cancelled(root->fs_info)) {
-                       printk(KERN_DEBUG "btrfs: defrag_file cancelled\n");
+                       printk(KERN_DEBUG "BTRFS: defrag_file cancelled\n");
                        ret = -EAGAIN;
                        break;
                }
@@@ -1416,20 -1441,20 +1441,20 @@@ static noinline int btrfs_ioctl_resize(
                        ret = -EINVAL;
                        goto out_free;
                }
-               printk(KERN_INFO "btrfs: resizing devid %llu\n", devid);
+               btrfs_info(root->fs_info, "resizing devid %llu", devid);
        }
  
        device = btrfs_find_device(root->fs_info, devid, NULL, NULL);
        if (!device) {
-               printk(KERN_INFO "btrfs: resizer unable to find device %llu\n",
+               btrfs_info(root->fs_info, "resizer unable to find device %llu",
                       devid);
                ret = -ENODEV;
                goto out_free;
        }
  
        if (!device->writeable) {
-               printk(KERN_INFO "btrfs: resizer unable to apply on "
-                      "readonly device %llu\n",
+               btrfs_info(root->fs_info,
+                          "resizer unable to apply on readonly device %llu",
                       devid);
                ret = -EPERM;
                goto out_free;
                }
                new_size = old_size - new_size;
        } else if (mod > 0) {
+               if (new_size > ULLONG_MAX - old_size) {
+                       ret = -EINVAL;
+                       goto out_free;
+               }
                new_size = old_size + new_size;
        }
  
        do_div(new_size, root->sectorsize);
        new_size *= root->sectorsize;
  
-       printk_in_rcu(KERN_INFO "btrfs: new size for %s is %llu\n",
+       printk_in_rcu(KERN_INFO "BTRFS: new size for %s is %llu\n",
                      rcu_str_deref(device->name), new_size);
  
        if (new_size > old_size) {
@@@ -1542,9 -1571,15 +1571,15 @@@ static noinline int btrfs_ioctl_snap_cr
  
                src_inode = file_inode(src.file);
                if (src_inode->i_sb != file_inode(file)->i_sb) {
-                       printk(KERN_INFO "btrfs: Snapshot src from "
-                              "another FS\n");
+                       btrfs_info(BTRFS_I(src_inode)->root->fs_info,
+                                  "Snapshot src from another FS");
                        ret = -EINVAL;
+               } else if (!inode_owner_or_capable(src_inode)) {
+                       /*
+                        * Subvolume creation is not restricted, but snapshots
+                        * are limited to own subvolumes only
+                        */
+                       ret = -EPERM;
                } else {
                        ret = btrfs_mksubvol(&file->f_path, name, namelen,
                                             BTRFS_I(src_inode)->root,
@@@ -1662,6 -1697,9 +1697,9 @@@ static noinline int btrfs_ioctl_subvol_
        u64 flags;
        int ret = 0;
  
+       if (!inode_owner_or_capable(inode))
+               return -EPERM;
        ret = mnt_want_write_file(file);
        if (ret)
                goto out;
                goto out_drop_write;
        }
  
-       if (!inode_owner_or_capable(inode)) {
-               ret = -EACCES;
-               goto out_drop_write;
-       }
        down_write(&root->fs_info->subvol_sem);
  
        /* nothing to do */
                goto out_drop_sem;
  
        root_flags = btrfs_root_flags(&root->root_item);
-       if (flags & BTRFS_SUBVOL_RDONLY)
+       if (flags & BTRFS_SUBVOL_RDONLY) {
                btrfs_set_root_flags(&root->root_item,
                                     root_flags | BTRFS_ROOT_SUBVOL_RDONLY);
-       else
-               btrfs_set_root_flags(&root->root_item,
+       } else {
+               /*
+                * Block RO -> RW transition if this subvolume is involved in
+                * send
+                */
+               spin_lock(&root->root_item_lock);
+               if (root->send_in_progress == 0) {
+                       btrfs_set_root_flags(&root->root_item,
                                     root_flags & ~BTRFS_ROOT_SUBVOL_RDONLY);
+                       spin_unlock(&root->root_item_lock);
+               } else {
+                       spin_unlock(&root->root_item_lock);
+                       btrfs_warn(root->fs_info,
+                       "Attempt to set subvolume %llu read-write during send",
+                                       root->root_key.objectid);
+                       ret = -EPERM;
+                       goto out_drop_sem;
+               }
+       }
  
        trans = btrfs_start_transaction(root, 1);
        if (IS_ERR(trans)) {
@@@ -1910,7 -1959,7 +1959,7 @@@ static noinline int search_ioctl(struc
                key.offset = (u64)-1;
                root = btrfs_read_fs_root_no_name(info, &key);
                if (IS_ERR(root)) {
-                       printk(KERN_ERR "could not find root %llu\n",
+                       printk(KERN_ERR "BTRFS: could not find root %llu\n",
                               sk->tree_id);
                        btrfs_free_path(path);
                        return -ENOENT;
@@@ -2000,7 -2049,7 +2049,7 @@@ static noinline int btrfs_search_path_i
        key.offset = (u64)-1;
        root = btrfs_read_fs_root_no_name(info, &key);
        if (IS_ERR(root)) {
-               printk(KERN_ERR "could not find root %llu\n", tree_id);
+               printk(KERN_ERR "BTRFS: could not find root %llu\n", tree_id);
                ret = -ENOENT;
                goto out;
        }
@@@ -2686,11 -2735,14 +2735,11 @@@ out_unlock
  #define BTRFS_MAX_DEDUPE_LEN  (16 * 1024 * 1024)
  
  static long btrfs_ioctl_file_extent_same(struct file *file,
 -                                       void __user *argp)
 +                      struct btrfs_ioctl_same_args __user *argp)
  {
 -      struct btrfs_ioctl_same_args tmp;
        struct btrfs_ioctl_same_args *same;
        struct btrfs_ioctl_same_extent_info *info;
 -      struct inode *src = file->f_dentry->d_inode;
 -      struct file *dst_file = NULL;
 -      struct inode *dst;
 +      struct inode *src = file_inode(file);
        u64 off;
        u64 len;
        int i;
        unsigned long size;
        u64 bs = BTRFS_I(src)->root->fs_info->sb->s_blocksize;
        bool is_admin = capable(CAP_SYS_ADMIN);
 +      u16 count;
  
        if (!(file->f_mode & FMODE_READ))
                return -EINVAL;
        if (ret)
                return ret;
  
 -      if (copy_from_user(&tmp,
 -                         (struct btrfs_ioctl_same_args __user *)argp,
 -                         sizeof(tmp))) {
 +      if (get_user(count, &argp->dest_count)) {
                ret = -EFAULT;
                goto out;
        }
  
 -      size = sizeof(tmp) +
 -              tmp.dest_count * sizeof(struct btrfs_ioctl_same_extent_info);
 +      size = offsetof(struct btrfs_ioctl_same_args __user, info[count]);
  
 -      same = memdup_user((struct btrfs_ioctl_same_args __user *)argp, size);
 +      same = memdup_user(argp, size);
  
        if (IS_ERR(same)) {
                ret = PTR_ERR(same);
                goto out;
  
        /* pre-format output fields to sane values */
 -      for (i = 0; i < same->dest_count; i++) {
 +      for (i = 0; i < count; i++) {
                same->info[i].bytes_deduped = 0ULL;
                same->info[i].status = 0;
        }
  
 -      ret = 0;
 -      for (i = 0; i < same->dest_count; i++) {
 -              info = &same->info[i];
 -
 -              dst_file = fget(info->fd);
 -              if (!dst_file) {
 +      for (i = 0, info = same->info; i < count; i++, info++) {
 +              struct inode *dst;
 +              struct fd dst_file = fdget(info->fd);
 +              if (!dst_file.file) {
                        info->status = -EBADF;
 -                      goto next;
 +                      continue;
                }
 +              dst = file_inode(dst_file.file);
  
 -              if (!(is_admin || (dst_file->f_mode & FMODE_WRITE))) {
 +              if (!(is_admin || (dst_file.file->f_mode & FMODE_WRITE))) {
                        info->status = -EINVAL;
 -                      goto next;
 -              }
 -
 -              info->status = -EXDEV;
 -              if (file->f_path.mnt != dst_file->f_path.mnt)
 -                      goto next;
 -
 -              dst = dst_file->f_dentry->d_inode;
 -              if (src->i_sb != dst->i_sb)
 -                      goto next;
 -
 -              if (S_ISDIR(dst->i_mode)) {
 +              } else if (file->f_path.mnt != dst_file.file->f_path.mnt) {
 +                      info->status = -EXDEV;
 +              } else if (S_ISDIR(dst->i_mode)) {
                        info->status = -EISDIR;
 -                      goto next;
 -              }
 -
 -              if (!S_ISREG(dst->i_mode)) {
 +              } else if (!S_ISREG(dst->i_mode)) {
                        info->status = -EACCES;
 -                      goto next;
 +              } else {
 +                      info->status = btrfs_extent_same(src, off, len, dst,
 +                                                      info->logical_offset);
 +                      if (info->status == 0)
 +                              info->bytes_deduped += len;
                }
 -
 -              info->status = btrfs_extent_same(src, off, len, dst,
 -                                              info->logical_offset);
 -              if (info->status == 0)
 -                      info->bytes_deduped += len;
 -
 -next:
 -              if (dst_file)
 -                      fput(dst_file);
 +              fdput(dst_file);
        }
  
        ret = copy_to_user(argp, same, size);
@@@ -2838,12 -2909,14 +2887,14 @@@ static int btrfs_clone(struct inode *sr
                 * note the key will change type as we walk through the
                 * tree.
                 */
+               path->leave_spinning = 1;
                ret = btrfs_search_slot(NULL, BTRFS_I(src)->root, &key, path,
                                0, 0);
                if (ret < 0)
                        goto out;
  
                nritems = btrfs_header_nritems(path->nodes[0]);
+ process_slot:
                if (path->slots[0] >= nritems) {
                        ret = btrfs_next_leaf(BTRFS_I(src)->root, path);
                        if (ret < 0)
                        u8 comp;
                        u64 endoff;
  
-                       size = btrfs_item_size_nr(leaf, slot);
-                       read_extent_buffer(leaf, buf,
-                                          btrfs_item_ptr_offset(leaf, slot),
-                                          size);
                        extent = btrfs_item_ptr(leaf, slot,
                                                struct btrfs_file_extent_item);
                        comp = btrfs_file_extent_compression(leaf, extent);
                                datal = btrfs_file_extent_ram_bytes(leaf,
                                                                    extent);
                        }
-                       btrfs_release_path(path);
  
                        if (key.offset + datal <= off ||
-                           key.offset >= off + len - 1)
-                               goto next;
+                           key.offset >= off + len - 1) {
+                               path->slots[0]++;
+                               goto process_slot;
+                       }
+                       size = btrfs_item_size_nr(leaf, slot);
+                       read_extent_buffer(leaf, buf,
+                                          btrfs_item_ptr_offset(leaf, slot),
+                                          size);
+                       btrfs_release_path(path);
+                       path->leave_spinning = 0;
  
                        memcpy(&new_key, &key, sizeof(new_key));
                        new_key.objectid = btrfs_ino(inode);
                        }
                        ret = btrfs_end_transaction(trans, root);
                }
- next:
                btrfs_release_path(path);
                key.offset++;
        }
@@@ -3196,9 -3272,17 +3250,17 @@@ static noinline long btrfs_ioctl_clone(
  
        unlock_extent(&BTRFS_I(src)->io_tree, off, off + len - 1);
  out_unlock:
-       mutex_unlock(&src->i_mutex);
-       if (!same_inode)
-               mutex_unlock(&inode->i_mutex);
+       if (!same_inode) {
+               if (inode < src) {
+                       mutex_unlock(&src->i_mutex);
+                       mutex_unlock(&inode->i_mutex);
+               } else {
+                       mutex_unlock(&inode->i_mutex);
+                       mutex_unlock(&src->i_mutex);
+               }
+       } else {
+               mutex_unlock(&src->i_mutex);
+       }
  out_fput:
        fdput(src_file);
  out_drop_write:
@@@ -3321,8 -3405,8 +3383,8 @@@ static long btrfs_ioctl_default_subvol(
        if (IS_ERR_OR_NULL(di)) {
                btrfs_free_path(path);
                btrfs_end_transaction(trans, root);
-               printk(KERN_ERR "Umm, you don't have the default dir item, "
-                      "this isn't going to work\n");
+               btrfs_err(new_root->fs_info, "Umm, you don't have the default dir"
+                          "item, this isn't going to work");
                ret = -ENOENT;
                goto out;
        }
        return ret;
  }
  
+ static long btrfs_ioctl_global_rsv(struct btrfs_root *root, void __user *arg)
+ {
+       struct btrfs_block_rsv *block_rsv = &root->fs_info->global_block_rsv;
+       u64 reserved;
+       spin_lock(&block_rsv->lock);
+       reserved = block_rsv->reserved;
+       spin_unlock(&block_rsv->lock);
+       if (arg && copy_to_user(arg, &reserved, sizeof(reserved)))
+               return -EFAULT;
+       return 0;
+ }
  /*
   * there are many ways the trans_start and trans_end ioctls can lead
   * to deadlocks.  They should only be used by applications that
@@@ -4303,6 -4401,9 +4379,9 @@@ static long btrfs_ioctl_set_received_su
        int ret = 0;
        int received_uuid_changed;
  
+       if (!inode_owner_or_capable(inode))
+               return -EPERM;
        ret = mnt_want_write_file(file);
        if (ret < 0)
                return ret;
                goto out;
        }
  
-       if (!inode_owner_or_capable(inode)) {
-               ret = -EACCES;
-               goto out;
-       }
        sa = memdup_user(arg, sizeof(*sa));
        if (IS_ERR(sa)) {
                ret = PTR_ERR(sa);
@@@ -4409,8 -4505,8 +4483,8 @@@ static int btrfs_ioctl_get_fslabel(stru
        len = strnlen(label, BTRFS_LABEL_SIZE);
  
        if (len == BTRFS_LABEL_SIZE) {
-               pr_warn("btrfs: label is too long, return the first %zu bytes\n",
-                       --len);
+               btrfs_warn(root->fs_info,
+                       "label is too long, return the first %zu bytes", --len);
        }
  
        ret = copy_to_user(arg, label, len);
@@@ -4433,7 -4529,7 +4507,7 @@@ static int btrfs_ioctl_set_fslabel(stru
                return -EFAULT;
  
        if (strnlen(label, BTRFS_LABEL_SIZE) == BTRFS_LABEL_SIZE) {
-               pr_err("btrfs: unable to set label with more than %d bytes\n",
+               btrfs_err(root->fs_info, "unable to set label with more than %d bytes",
                       BTRFS_LABEL_SIZE - 1);
                return -EINVAL;
        }
@@@ -4458,6 -4554,166 +4532,166 @@@ out_unlock
        return ret;
  }
  
+ #define INIT_FEATURE_FLAGS(suffix) \
+       { .compat_flags = BTRFS_FEATURE_COMPAT_##suffix, \
+         .compat_ro_flags = BTRFS_FEATURE_COMPAT_RO_##suffix, \
+         .incompat_flags = BTRFS_FEATURE_INCOMPAT_##suffix }
+ static int btrfs_ioctl_get_supported_features(struct file *file,
+                                             void __user *arg)
+ {
+       static struct btrfs_ioctl_feature_flags features[3] = {
+               INIT_FEATURE_FLAGS(SUPP),
+               INIT_FEATURE_FLAGS(SAFE_SET),
+               INIT_FEATURE_FLAGS(SAFE_CLEAR)
+       };
+       if (copy_to_user(arg, &features, sizeof(features)))
+               return -EFAULT;
+       return 0;
+ }
+ static int btrfs_ioctl_get_features(struct file *file, void __user *arg)
+ {
+       struct btrfs_root *root = BTRFS_I(file_inode(file))->root;
+       struct btrfs_super_block *super_block = root->fs_info->super_copy;
+       struct btrfs_ioctl_feature_flags features;
+       features.compat_flags = btrfs_super_compat_flags(super_block);
+       features.compat_ro_flags = btrfs_super_compat_ro_flags(super_block);
+       features.incompat_flags = btrfs_super_incompat_flags(super_block);
+       if (copy_to_user(arg, &features, sizeof(features)))
+               return -EFAULT;
+       return 0;
+ }
+ static int check_feature_bits(struct btrfs_root *root,
+                             enum btrfs_feature_set set,
+                             u64 change_mask, u64 flags, u64 supported_flags,
+                             u64 safe_set, u64 safe_clear)
+ {
+       const char *type = btrfs_feature_set_names[set];
+       char *names;
+       u64 disallowed, unsupported;
+       u64 set_mask = flags & change_mask;
+       u64 clear_mask = ~flags & change_mask;
+       unsupported = set_mask & ~supported_flags;
+       if (unsupported) {
+               names = btrfs_printable_features(set, unsupported);
+               if (names) {
+                       btrfs_warn(root->fs_info,
+                          "this kernel does not support the %s feature bit%s",
+                          names, strchr(names, ',') ? "s" : "");
+                       kfree(names);
+               } else
+                       btrfs_warn(root->fs_info,
+                          "this kernel does not support %s bits 0x%llx",
+                          type, unsupported);
+               return -EOPNOTSUPP;
+       }
+       disallowed = set_mask & ~safe_set;
+       if (disallowed) {
+               names = btrfs_printable_features(set, disallowed);
+               if (names) {
+                       btrfs_warn(root->fs_info,
+                          "can't set the %s feature bit%s while mounted",
+                          names, strchr(names, ',') ? "s" : "");
+                       kfree(names);
+               } else
+                       btrfs_warn(root->fs_info,
+                          "can't set %s bits 0x%llx while mounted",
+                          type, disallowed);
+               return -EPERM;
+       }
+       disallowed = clear_mask & ~safe_clear;
+       if (disallowed) {
+               names = btrfs_printable_features(set, disallowed);
+               if (names) {
+                       btrfs_warn(root->fs_info,
+                          "can't clear the %s feature bit%s while mounted",
+                          names, strchr(names, ',') ? "s" : "");
+                       kfree(names);
+               } else
+                       btrfs_warn(root->fs_info,
+                          "can't clear %s bits 0x%llx while mounted",
+                          type, disallowed);
+               return -EPERM;
+       }
+       return 0;
+ }
+ #define check_feature(root, change_mask, flags, mask_base)    \
+ check_feature_bits(root, FEAT_##mask_base, change_mask, flags,        \
+                  BTRFS_FEATURE_ ## mask_base ## _SUPP,        \
+                  BTRFS_FEATURE_ ## mask_base ## _SAFE_SET,    \
+                  BTRFS_FEATURE_ ## mask_base ## _SAFE_CLEAR)
+ static int btrfs_ioctl_set_features(struct file *file, void __user *arg)
+ {
+       struct btrfs_root *root = BTRFS_I(file_inode(file))->root;
+       struct btrfs_super_block *super_block = root->fs_info->super_copy;
+       struct btrfs_ioctl_feature_flags flags[2];
+       struct btrfs_trans_handle *trans;
+       u64 newflags;
+       int ret;
+       if (!capable(CAP_SYS_ADMIN))
+               return -EPERM;
+       if (copy_from_user(flags, arg, sizeof(flags)))
+               return -EFAULT;
+       /* Nothing to do */
+       if (!flags[0].compat_flags && !flags[0].compat_ro_flags &&
+           !flags[0].incompat_flags)
+               return 0;
+       ret = check_feature(root, flags[0].compat_flags,
+                           flags[1].compat_flags, COMPAT);
+       if (ret)
+               return ret;
+       ret = check_feature(root, flags[0].compat_ro_flags,
+                           flags[1].compat_ro_flags, COMPAT_RO);
+       if (ret)
+               return ret;
+       ret = check_feature(root, flags[0].incompat_flags,
+                           flags[1].incompat_flags, INCOMPAT);
+       if (ret)
+               return ret;
+       trans = btrfs_start_transaction(root, 1);
+       if (IS_ERR(trans))
+               return PTR_ERR(trans);
+       spin_lock(&root->fs_info->super_lock);
+       newflags = btrfs_super_compat_flags(super_block);
+       newflags |= flags[0].compat_flags & flags[1].compat_flags;
+       newflags &= ~(flags[0].compat_flags & ~flags[1].compat_flags);
+       btrfs_set_super_compat_flags(super_block, newflags);
+       newflags = btrfs_super_compat_ro_flags(super_block);
+       newflags |= flags[0].compat_ro_flags & flags[1].compat_ro_flags;
+       newflags &= ~(flags[0].compat_ro_flags & ~flags[1].compat_ro_flags);
+       btrfs_set_super_compat_ro_flags(super_block, newflags);
+       newflags = btrfs_super_incompat_flags(super_block);
+       newflags |= flags[0].incompat_flags & flags[1].incompat_flags;
+       newflags &= ~(flags[0].incompat_flags & ~flags[1].incompat_flags);
+       btrfs_set_super_incompat_flags(super_block, newflags);
+       spin_unlock(&root->fs_info->super_lock);
+       return btrfs_end_transaction(trans, root);
+ }
  long btrfs_ioctl(struct file *file, unsigned int
                cmd, unsigned long arg)
  {
                return btrfs_ioctl_logical_to_ino(root, argp);
        case BTRFS_IOC_SPACE_INFO:
                return btrfs_ioctl_space_info(root, argp);
+       case BTRFS_IOC_GLOBAL_RSV:
+               return btrfs_ioctl_global_rsv(root, argp);
        case BTRFS_IOC_SYNC: {
                int ret;
  
                return btrfs_ioctl_set_fslabel(file, argp);
        case BTRFS_IOC_FILE_EXTENT_SAME:
                return btrfs_ioctl_file_extent_same(file, argp);
+       case BTRFS_IOC_GET_SUPPORTED_FEATURES:
+               return btrfs_ioctl_get_supported_features(file, argp);
+       case BTRFS_IOC_GET_FEATURES:
+               return btrfs_ioctl_get_features(file, argp);
+       case BTRFS_IOC_SET_FEATURES:
+               return btrfs_ioctl_set_features(file, argp);
        }
  
        return -ENOTTY;
diff --combined fs/btrfs/scrub.c
index bb9a928fa3a848c597d842a94fe2e49a48766cf0,51c342b9f5ef18d9f037e9573d22fbe77f613440..efba5d1282ee40addd128cf05427181a0aade83f
@@@ -256,6 -256,8 +256,8 @@@ static int copy_nocow_pages_for_inode(u
  static int copy_nocow_pages(struct scrub_ctx *sctx, u64 logical, u64 len,
                            int mirror_num, u64 physical_for_dev_replace);
  static void copy_nocow_pages_worker(struct btrfs_work *work);
+ static void __scrub_blocked_if_needed(struct btrfs_fs_info *fs_info);
+ static void scrub_blocked_if_needed(struct btrfs_fs_info *fs_info);
  
  
  static void scrub_pending_bio_inc(struct scrub_ctx *sctx)
@@@ -269,6 -271,29 +271,29 @@@ static void scrub_pending_bio_dec(struc
        wake_up(&sctx->list_wait);
  }
  
+ static void __scrub_blocked_if_needed(struct btrfs_fs_info *fs_info)
+ {
+       while (atomic_read(&fs_info->scrub_pause_req)) {
+               mutex_unlock(&fs_info->scrub_lock);
+               wait_event(fs_info->scrub_pause_wait,
+                  atomic_read(&fs_info->scrub_pause_req) == 0);
+               mutex_lock(&fs_info->scrub_lock);
+       }
+ }
+ static void scrub_blocked_if_needed(struct btrfs_fs_info *fs_info)
+ {
+       atomic_inc(&fs_info->scrubs_paused);
+       wake_up(&fs_info->scrub_pause_wait);
+       mutex_lock(&fs_info->scrub_lock);
+       __scrub_blocked_if_needed(fs_info);
+       atomic_dec(&fs_info->scrubs_paused);
+       mutex_unlock(&fs_info->scrub_lock);
+       wake_up(&fs_info->scrub_pause_wait);
+ }
  /*
   * used for workers that require transaction commits (i.e., for the
   * NOCOW case)
@@@ -480,7 -505,7 +505,7 @@@ static int scrub_print_warning_inode(u6
         * hold all of the paths here
         */
        for (i = 0; i < ipath->fspath->elem_cnt; ++i)
-               printk_in_rcu(KERN_WARNING "btrfs: %s at logical %llu on dev "
+               printk_in_rcu(KERN_WARNING "BTRFS: %s at logical %llu on dev "
                        "%s, sector %llu, root %llu, inode %llu, offset %llu, "
                        "length %llu, links %u (path: %s)\n", swarn->errstr,
                        swarn->logical, rcu_str_deref(swarn->dev->name),
        return 0;
  
  err:
-       printk_in_rcu(KERN_WARNING "btrfs: %s at logical %llu on dev "
+       printk_in_rcu(KERN_WARNING "BTRFS: %s at logical %llu on dev "
                "%s, sector %llu, root %llu, inode %llu, offset %llu: path "
                "resolving failed with ret=%d\n", swarn->errstr,
                swarn->logical, rcu_str_deref(swarn->dev->name),
@@@ -555,7 -580,7 +580,7 @@@ static void scrub_print_warning(const c
                        ret = tree_backref_for_extent(&ptr, eb, ei, item_size,
                                                        &ref_root, &ref_level);
                        printk_in_rcu(KERN_WARNING
-                               "btrfs: %s at logical %llu on dev %s, "
+                               "BTRFS: %s at logical %llu on dev %s, "
                                "sector %llu: metadata %s (level %d) in tree "
                                "%llu\n", errstr, swarn.logical,
                                rcu_str_deref(dev->name),
@@@ -704,13 -729,11 +729,11 @@@ static void scrub_fixup_nodatasum(struc
        struct scrub_fixup_nodatasum *fixup;
        struct scrub_ctx *sctx;
        struct btrfs_trans_handle *trans = NULL;
-       struct btrfs_fs_info *fs_info;
        struct btrfs_path *path;
        int uncorrectable = 0;
  
        fixup = container_of(work, struct scrub_fixup_nodatasum, work);
        sctx = fixup->sctx;
-       fs_info = fixup->root->fs_info;
  
        path = btrfs_alloc_path();
        if (!path) {
@@@ -759,8 -782,8 +782,8 @@@ out
                btrfs_dev_replace_stats_inc(
                        &sctx->dev_root->fs_info->dev_replace.
                        num_uncorrectable_read_errors);
-               printk_ratelimited_in_rcu(KERN_ERR
-                       "btrfs: unable to fixup (nodatasum) error at logical %llu on dev %s\n",
+               printk_ratelimited_in_rcu(KERN_ERR "BTRFS: "
+                   "unable to fixup (nodatasum) error at logical %llu on dev %s\n",
                        fixup->logical, rcu_str_deref(fixup->dev->name));
        }
  
@@@ -1161,7 -1184,7 +1184,7 @@@ corrected_error
                        sctx->stat.corrected_errors++;
                        spin_unlock(&sctx->stat_lock);
                        printk_ratelimited_in_rcu(KERN_ERR
-                               "btrfs: fixed up error at logical %llu on dev %s\n",
+                               "BTRFS: fixed up error at logical %llu on dev %s\n",
                                logical, rcu_str_deref(dev->name));
                }
        } else {
@@@ -1170,7 -1193,7 +1193,7 @@@ did_not_correct_error
                sctx->stat.uncorrectable_errors++;
                spin_unlock(&sctx->stat_lock);
                printk_ratelimited_in_rcu(KERN_ERR
-                       "btrfs: unable to fixup (regular) error at logical %llu on dev %s\n",
+                       "BTRFS: unable to fixup (regular) error at logical %llu on dev %s\n",
                        logical, rcu_str_deref(dev->name));
        }
  
@@@ -1308,7 -1331,7 +1331,7 @@@ static void scrub_recheck_block(struct 
                        continue;
                }
                bio->bi_bdev = page->dev->bdev;
 -              bio->bi_sector = page->physical >> 9;
 +              bio->bi_iter.bi_sector = page->physical >> 9;
  
                bio_add_page(bio, page->page, PAGE_SIZE, 0);
                if (btrfsic_submit_bio_wait(READ, bio))
@@@ -1418,8 -1441,9 +1441,9 @@@ static int scrub_repair_page_from_good_
                int ret;
  
                if (!page_bad->dev->bdev) {
-                       printk_ratelimited(KERN_WARNING
-                               "btrfs: scrub_repair_page_from_good_copy(bdev == NULL) is unexpected!\n");
+                       printk_ratelimited(KERN_WARNING "BTRFS: "
+                               "scrub_repair_page_from_good_copy(bdev == NULL) "
+                               "is unexpected!\n");
                        return -EIO;
                }
  
                if (!bio)
                        return -EIO;
                bio->bi_bdev = page_bad->dev->bdev;
 -              bio->bi_sector = page_bad->physical >> 9;
 +              bio->bi_iter.bi_sector = page_bad->physical >> 9;
  
                ret = bio_add_page(bio, page_good->page, PAGE_SIZE, 0);
                if (PAGE_SIZE != ret) {
@@@ -1520,7 -1544,7 +1544,7 @@@ again
                bio->bi_private = sbio;
                bio->bi_end_io = scrub_wr_bio_end_io;
                bio->bi_bdev = sbio->dev->bdev;
 -              bio->bi_sector = sbio->physical >> 9;
 +              bio->bi_iter.bi_sector = sbio->physical >> 9;
                sbio->err = 0;
        } else if (sbio->physical + sbio->page_count * PAGE_SIZE !=
                   spage->physical_for_dev_replace ||
@@@ -1877,7 -1901,7 +1901,7 @@@ static void scrub_submit(struct scrub_c
                 * This case is handled correctly (but _very_ slowly).
                 */
                printk_ratelimited(KERN_WARNING
-                       "btrfs: scrub_submit(bio bdev == NULL) is unexpected!\n");
+                       "BTRFS: scrub_submit(bio bdev == NULL) is unexpected!\n");
                bio_endio(sbio->bio, -EIO);
        } else {
                btrfsic_submit_bio(READ, sbio->bio);
@@@ -1926,7 -1950,7 +1950,7 @@@ again
                bio->bi_private = sbio;
                bio->bi_end_io = scrub_bio_end_io;
                bio->bi_bdev = sbio->dev->bdev;
 -              bio->bi_sector = sbio->physical >> 9;
 +              bio->bi_iter.bi_sector = sbio->physical >> 9;
                sbio->err = 0;
        } else if (sbio->physical + sbio->page_count * PAGE_SIZE !=
                   spage->physical ||
@@@ -2286,8 -2310,7 +2310,7 @@@ static noinline_for_stack int scrub_str
  
        wait_event(sctx->list_wait,
                   atomic_read(&sctx->bios_in_flight) == 0);
-       atomic_inc(&fs_info->scrubs_paused);
-       wake_up(&fs_info->scrub_pause_wait);
+       scrub_blocked_if_needed(fs_info);
  
        /* FIXME it might be better to start readahead at commit root */
        key_start.objectid = logical;
        if (!IS_ERR(reada2))
                btrfs_reada_wait(reada2);
  
-       mutex_lock(&fs_info->scrub_lock);
-       while (atomic_read(&fs_info->scrub_pause_req)) {
-               mutex_unlock(&fs_info->scrub_lock);
-               wait_event(fs_info->scrub_pause_wait,
-                  atomic_read(&fs_info->scrub_pause_req) == 0);
-               mutex_lock(&fs_info->scrub_lock);
-       }
-       atomic_dec(&fs_info->scrubs_paused);
-       mutex_unlock(&fs_info->scrub_lock);
-       wake_up(&fs_info->scrub_pause_wait);
  
        /*
         * collect all data csums for the stripe to avoid seeking during
                        wait_event(sctx->list_wait,
                                   atomic_read(&sctx->bios_in_flight) == 0);
                        atomic_set(&sctx->wr_ctx.flush_all_writes, 0);
-                       atomic_inc(&fs_info->scrubs_paused);
-                       wake_up(&fs_info->scrub_pause_wait);
-                       mutex_lock(&fs_info->scrub_lock);
-                       while (atomic_read(&fs_info->scrub_pause_req)) {
-                               mutex_unlock(&fs_info->scrub_lock);
-                               wait_event(fs_info->scrub_pause_wait,
-                                  atomic_read(&fs_info->scrub_pause_req) == 0);
-                               mutex_lock(&fs_info->scrub_lock);
-                       }
-                       atomic_dec(&fs_info->scrubs_paused);
-                       mutex_unlock(&fs_info->scrub_lock);
-                       wake_up(&fs_info->scrub_pause_wait);
+                       scrub_blocked_if_needed(fs_info);
                }
  
+               if (btrfs_fs_incompat(fs_info, SKINNY_METADATA))
+                       key.type = BTRFS_METADATA_ITEM_KEY;
+               else
+                       key.type = BTRFS_EXTENT_ITEM_KEY;
                key.objectid = logical;
-               key.type = BTRFS_EXTENT_ITEM_KEY;
                key.offset = (u64)-1;
  
                ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
                        goto out;
  
                if (ret > 0) {
-                       ret = btrfs_previous_item(root, path, 0,
-                                                 BTRFS_EXTENT_ITEM_KEY);
+                       ret = btrfs_previous_extent_item(root, path, 0);
                        if (ret < 0)
                                goto out;
                        if (ret > 0) {
  
                        if (key.objectid < logical &&
                            (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
-                               printk(KERN_ERR
-                                      "btrfs scrub: tree block %llu spanning "
-                                      "stripes, ignored. logical=%llu\n",
+                               btrfs_err(fs_info,
+                                          "scrub: tree block %llu spanning "
+                                          "stripes, ignored. logical=%llu",
                                       key.objectid, logical);
                                goto next;
                        }
@@@ -2683,21 -2687,9 +2687,9 @@@ int scrub_enumerate_chunks(struct scrub
                wait_event(sctx->list_wait,
                           atomic_read(&sctx->bios_in_flight) == 0);
                atomic_set(&sctx->wr_ctx.flush_all_writes, 0);
-               atomic_inc(&fs_info->scrubs_paused);
-               wake_up(&fs_info->scrub_pause_wait);
                wait_event(sctx->list_wait,
                           atomic_read(&sctx->workers_pending) == 0);
-               mutex_lock(&fs_info->scrub_lock);
-               while (atomic_read(&fs_info->scrub_pause_req)) {
-                       mutex_unlock(&fs_info->scrub_lock);
-                       wait_event(fs_info->scrub_pause_wait,
-                          atomic_read(&fs_info->scrub_pause_req) == 0);
-                       mutex_lock(&fs_info->scrub_lock);
-               }
-               atomic_dec(&fs_info->scrubs_paused);
-               mutex_unlock(&fs_info->scrub_lock);
-               wake_up(&fs_info->scrub_pause_wait);
+               scrub_blocked_if_needed(fs_info);
  
                btrfs_put_block_group(cache);
                if (ret)
@@@ -2823,8 -2815,8 +2815,8 @@@ int btrfs_scrub_dev(struct btrfs_fs_inf
         * check some assumptions
         */
        if (fs_info->chunk_root->nodesize != fs_info->chunk_root->leafsize) {
-               printk(KERN_ERR
-                      "btrfs_scrub: size assumption nodesize == leafsize (%d == %d) fails\n",
+               btrfs_err(fs_info,
+                          "scrub: size assumption nodesize == leafsize (%d == %d) fails",
                       fs_info->chunk_root->nodesize,
                       fs_info->chunk_root->leafsize);
                return -EINVAL;
                 * the way scrub is implemented. Do not handle this
                 * situation at all because it won't ever happen.
                 */
-               printk(KERN_ERR
-                      "btrfs_scrub: size assumption nodesize <= BTRFS_STRIPE_LEN (%d <= %d) fails\n",
+               btrfs_err(fs_info,
+                          "scrub: size assumption nodesize <= BTRFS_STRIPE_LEN (%d <= %d) fails",
                       fs_info->chunk_root->nodesize, BTRFS_STRIPE_LEN);
                return -EINVAL;
        }
  
        if (fs_info->chunk_root->sectorsize != PAGE_SIZE) {
                /* not supported for data w/o checksums */
-               printk(KERN_ERR
-                      "btrfs_scrub: size assumption sectorsize != PAGE_SIZE (%d != %lu) fails\n",
+               btrfs_err(fs_info,
+                          "scrub: size assumption sectorsize != PAGE_SIZE "
+                          "(%d != %lu) fails",
                       fs_info->chunk_root->sectorsize, PAGE_SIZE);
                return -EINVAL;
        }
                 * would exhaust the array bounds of pagev member in
                 * struct scrub_block
                 */
-               pr_err("btrfs_scrub: size assumption nodesize and sectorsize <= SCRUB_MAX_PAGES_PER_BLOCK (%d <= %d && %d <= %d) fails\n",
+               btrfs_err(fs_info, "scrub: size assumption nodesize and sectorsize "
+                          "<= SCRUB_MAX_PAGES_PER_BLOCK (%d <= %d && %d <= %d) fails",
                       fs_info->chunk_root->nodesize,
                       SCRUB_MAX_PAGES_PER_BLOCK,
                       fs_info->chunk_root->sectorsize,
        }
        sctx->readonly = readonly;
        dev->scrub_device = sctx;
+       mutex_unlock(&fs_info->fs_devices->device_list_mutex);
  
+       /*
+        * checking @scrub_pause_req here, we can avoid
+        * race between committing transaction and scrubbing.
+        */
+       __scrub_blocked_if_needed(fs_info);
        atomic_inc(&fs_info->scrubs_running);
        mutex_unlock(&fs_info->scrub_lock);
  
                 * by holding device list mutex, we can
                 * kick off writing super in log tree sync.
                 */
+               mutex_lock(&fs_info->fs_devices->device_list_mutex);
                ret = scrub_supers(sctx, dev);
+               mutex_unlock(&fs_info->fs_devices->device_list_mutex);
        }
-       mutex_unlock(&fs_info->fs_devices->device_list_mutex);
  
        if (!ret)
                ret = scrub_enumerate_chunks(sctx, dev, start, end,
@@@ -3167,7 -3168,8 +3168,8 @@@ static void copy_nocow_pages_worker(str
        ret = iterate_inodes_from_logical(logical, fs_info, path,
                                          record_inode_for_nocow, nocow_ctx);
        if (ret != 0 && ret != -ENOENT) {
-               pr_warn("iterate_inodes_from_logical() failed: log %llu, phys %llu, len %llu, mir %u, ret %d\n",
+               btrfs_warn(fs_info, "iterate_inodes_from_logical() failed: log %llu, "
+                       "phys %llu, len %llu, mir %u, ret %d",
                        logical, physical_for_dev_replace, len, mirror_num,
                        ret);
                not_written = 1;
@@@ -3289,7 -3291,7 +3291,7 @@@ static int copy_nocow_pages_for_inode(u
  again:
                page = find_or_create_page(inode->i_mapping, index, GFP_NOFS);
                if (!page) {
-                       pr_err("find_or_create_page() failed\n");
+                       btrfs_err(fs_info, "find_or_create_page() failed");
                        ret = -ENOMEM;
                        goto out;
                }
@@@ -3361,7 -3363,7 +3363,7 @@@ static int write_page_nocow(struct scru
                return -EIO;
        if (!dev->bdev) {
                printk_ratelimited(KERN_WARNING
-                       "btrfs: scrub write_page_nocow(bdev == NULL) is unexpected!\n");
+                       "BTRFS: scrub write_page_nocow(bdev == NULL) is unexpected!\n");
                return -EIO;
        }
        bio = btrfs_io_bio_alloc(GFP_NOFS, 1);
                spin_unlock(&sctx->stat_lock);
                return -ENOMEM;
        }
 -      bio->bi_size = 0;
 -      bio->bi_sector = physical_for_dev_replace >> 9;
 +      bio->bi_iter.bi_size = 0;
 +      bio->bi_iter.bi_sector = physical_for_dev_replace >> 9;
        bio->bi_bdev = dev->bdev;
        ret = bio_add_page(bio, page, PAGE_CACHE_SIZE, 0);
        if (ret != PAGE_CACHE_SIZE) {
diff --combined fs/btrfs/volumes.c
index 54d2685a3071f512bc8d8a5c67a60a03f179b29b,b68afe32419f1a79b24712362fbe26bcd60e112e..bab0b84d8f806adf711b797c0909cb9dd664cc95
@@@ -125,7 -125,7 +125,7 @@@ static void btrfs_kobject_uevent(struc
  
        ret = kobject_uevent(&disk_to_dev(bdev->bd_disk)->kobj, action);
        if (ret)
-               pr_warn("Sending event '%d' to kobject: '%s' (%p): failed\n",
+               pr_warn("BTRFS: Sending event '%d' to kobject: '%s' (%p): failed\n",
                        action,
                        kobject_name(&disk_to_dev(bdev->bd_disk)->kobj),
                        &disk_to_dev(bdev->bd_disk)->kobj);
@@@ -200,7 -200,7 +200,7 @@@ btrfs_get_bdev_and_sb(const char *devic
  
        if (IS_ERR(*bdev)) {
                ret = PTR_ERR(*bdev);
-               printk(KERN_INFO "btrfs: open %s failed\n", device_path);
+               printk(KERN_INFO "BTRFS: open %s failed\n", device_path);
                goto error;
        }
  
@@@ -912,9 -912,9 +912,9 @@@ int btrfs_scan_one_device(const char *p
        if (disk_super->label[0]) {
                if (disk_super->label[BTRFS_LABEL_SIZE - 1])
                        disk_super->label[BTRFS_LABEL_SIZE - 1] = '\0';
-               printk(KERN_INFO "btrfs: device label %s ", disk_super->label);
+               printk(KERN_INFO "BTRFS: device label %s ", disk_super->label);
        } else {
-               printk(KERN_INFO "btrfs: device fsid %pU ", disk_super->fsid);
+               printk(KERN_INFO "BTRFS: device fsid %pU ", disk_super->fsid);
        }
  
        printk(KERN_CONT "devid %llu transid %llu %s\n", devid, transid, path);
@@@ -1813,7 -1813,7 +1813,7 @@@ int btrfs_find_device_missing_or_by_pat
                }
  
                if (!*device) {
-                       pr_err("btrfs: no missing device found\n");
+                       btrfs_err(root->fs_info, "no missing device found");
                        return -ENOENT;
                }
  
@@@ -3052,7 -3052,7 +3052,7 @@@ loop
  error:
        btrfs_free_path(path);
        if (enospc_errors) {
-               printk(KERN_INFO "btrfs: %d enospc errors during balance\n",
+               btrfs_info(fs_info, "%d enospc errors during balance",
                       enospc_errors);
                if (!ret)
                        ret = -ENOSPC;
@@@ -3138,8 -3138,8 +3138,8 @@@ int btrfs_balance(struct btrfs_balance_
                if (!(bctl->flags & BTRFS_BALANCE_DATA) ||
                    !(bctl->flags & BTRFS_BALANCE_METADATA) ||
                    memcmp(&bctl->data, &bctl->meta, sizeof(bctl->data))) {
-                       printk(KERN_ERR "btrfs: with mixed groups data and "
-                              "metadata balance options must be the same\n");
+                       btrfs_err(fs_info, "with mixed groups data and "
+                                  "metadata balance options must be the same");
                        ret = -EINVAL;
                        goto out;
                }
        if ((bctl->data.flags & BTRFS_BALANCE_ARGS_CONVERT) &&
            (!alloc_profile_is_valid(bctl->data.target, 1) ||
             (bctl->data.target & ~allowed))) {
-               printk(KERN_ERR "btrfs: unable to start balance with target "
-                      "data profile %llu\n",
+               btrfs_err(fs_info, "unable to start balance with target "
+                          "data profile %llu",
                       bctl->data.target);
                ret = -EINVAL;
                goto out;
        if ((bctl->meta.flags & BTRFS_BALANCE_ARGS_CONVERT) &&
            (!alloc_profile_is_valid(bctl->meta.target, 1) ||
             (bctl->meta.target & ~allowed))) {
-               printk(KERN_ERR "btrfs: unable to start balance with target "
-                      "metadata profile %llu\n",
+               btrfs_err(fs_info,
+                          "unable to start balance with target metadata profile %llu",
                       bctl->meta.target);
                ret = -EINVAL;
                goto out;
        if ((bctl->sys.flags & BTRFS_BALANCE_ARGS_CONVERT) &&
            (!alloc_profile_is_valid(bctl->sys.target, 1) ||
             (bctl->sys.target & ~allowed))) {
-               printk(KERN_ERR "btrfs: unable to start balance with target "
-                      "system profile %llu\n",
+               btrfs_err(fs_info,
+                          "unable to start balance with target system profile %llu",
                       bctl->sys.target);
                ret = -EINVAL;
                goto out;
        /* allow dup'ed data chunks only in mixed mode */
        if (!mixed && (bctl->data.flags & BTRFS_BALANCE_ARGS_CONVERT) &&
            (bctl->data.target & BTRFS_BLOCK_GROUP_DUP)) {
-               printk(KERN_ERR "btrfs: dup for data is not allowed\n");
+               btrfs_err(fs_info, "dup for data is not allowed");
                ret = -EINVAL;
                goto out;
        }
                     (fs_info->avail_metadata_alloc_bits & allowed) &&
                     !(bctl->meta.target & allowed))) {
                        if (bctl->flags & BTRFS_BALANCE_FORCE) {
-                               printk(KERN_INFO "btrfs: force reducing metadata "
-                                      "integrity\n");
+                               btrfs_info(fs_info, "force reducing metadata integrity");
                        } else {
-                               printk(KERN_ERR "btrfs: balance will reduce metadata "
-                                      "integrity, use force if you want this\n");
+                               btrfs_err(fs_info, "balance will reduce metadata "
+                                          "integrity, use force if you want this");
                                ret = -EINVAL;
                                goto out;
                        }
@@@ -3303,7 -3302,7 +3302,7 @@@ static int balance_kthread(void *data
        mutex_lock(&fs_info->balance_mutex);
  
        if (fs_info->balance_ctl) {
-               printk(KERN_INFO "btrfs: continuing balance\n");
+               btrfs_info(fs_info, "continuing balance");
                ret = btrfs_balance(fs_info->balance_ctl, NULL);
        }
  
@@@ -3325,7 -3324,7 +3324,7 @@@ int btrfs_resume_balance_async(struct b
        spin_unlock(&fs_info->balance_lock);
  
        if (btrfs_test_opt(fs_info->tree_root, SKIP_BALANCE)) {
-               printk(KERN_INFO "btrfs: force skipping balance\n");
+               btrfs_info(fs_info, "force skipping balance");
                return 0;
        }
  
@@@ -3543,7 -3542,7 +3542,7 @@@ update_tree
                                                  BTRFS_UUID_KEY_SUBVOL,
                                                  key.objectid);
                        if (ret < 0) {
-                               pr_warn("btrfs: uuid_tree_add failed %d\n",
+                               btrfs_warn(fs_info, "uuid_tree_add failed %d",
                                        ret);
                                break;
                        }
                                                 BTRFS_UUID_KEY_RECEIVED_SUBVOL,
                                                  key.objectid);
                        if (ret < 0) {
-                               pr_warn("btrfs: uuid_tree_add failed %d\n",
+                               btrfs_warn(fs_info, "uuid_tree_add failed %d",
                                        ret);
                                break;
                        }
@@@ -3590,7 -3589,7 +3589,7 @@@ out
        if (trans && !IS_ERR(trans))
                btrfs_end_transaction(trans, fs_info->uuid_root);
        if (ret)
-               pr_warn("btrfs: btrfs_uuid_scan_kthread failed %d\n", ret);
+               btrfs_warn(fs_info, "btrfs_uuid_scan_kthread failed %d", ret);
        else
                fs_info->update_uuid_tree_gen = 1;
        up(&fs_info->uuid_tree_rescan_sem);
@@@ -3654,7 -3653,7 +3653,7 @@@ static int btrfs_uuid_rescan_kthread(vo
         */
        ret = btrfs_uuid_tree_iterate(fs_info, btrfs_check_uuid_tree_entry);
        if (ret < 0) {
-               pr_warn("btrfs: iterating uuid_tree failed %d\n", ret);
+               btrfs_warn(fs_info, "iterating uuid_tree failed %d", ret);
                up(&fs_info->uuid_tree_rescan_sem);
                return ret;
        }
@@@ -3695,7 -3694,7 +3694,7 @@@ int btrfs_create_uuid_tree(struct btrfs
        task = kthread_run(btrfs_uuid_scan_kthread, fs_info, "btrfs-uuid");
        if (IS_ERR(task)) {
                /* fs_info->update_uuid_tree_gen remains 0 in all error case */
-               pr_warn("btrfs: failed to start uuid_scan task\n");
+               btrfs_warn(fs_info, "failed to start uuid_scan task");
                up(&fs_info->uuid_tree_rescan_sem);
                return PTR_ERR(task);
        }
@@@ -3711,7 -3710,7 +3710,7 @@@ int btrfs_check_uuid_tree(struct btrfs_
        task = kthread_run(btrfs_uuid_rescan_kthread, fs_info, "btrfs-uuid");
        if (IS_ERR(task)) {
                /* fs_info->update_uuid_tree_gen remains 0 in all error case */
-               pr_warn("btrfs: failed to start uuid_rescan task\n");
+               btrfs_warn(fs_info, "failed to start uuid_rescan task");
                up(&fs_info->uuid_tree_rescan_sem);
                return PTR_ERR(task);
        }
@@@ -4033,7 -4032,7 +4032,7 @@@ static int __btrfs_alloc_chunk(struct b
                max_stripe_size = 32 * 1024 * 1024;
                max_chunk_size = 2 * max_stripe_size;
        } else {
-               printk(KERN_ERR "btrfs: invalid chunk type 0x%llx requested\n",
+               btrfs_err(info, "invalid chunk type 0x%llx requested\n",
                       type);
                BUG_ON(1);
        }
  
                if (!device->writeable) {
                        WARN(1, KERN_ERR
-                              "btrfs: read-only device in alloc_list\n");
+                              "BTRFS: read-only device in alloc_list\n");
                        continue;
                }
  
@@@ -5193,13 -5192,13 +5192,13 @@@ int btrfs_rmap_block(struct btrfs_mappi
        read_unlock(&em_tree->lock);
  
        if (!em) {
-               printk(KERN_ERR "btrfs: couldn't find em for chunk %Lu\n",
+               printk(KERN_ERR "BTRFS: couldn't find em for chunk %Lu\n",
                       chunk_start);
                return -EIO;
        }
  
        if (em->start != chunk_start) {
-               printk(KERN_ERR "btrfs: bad chunk start, em=%Lu, wanted=%Lu\n",
+               printk(KERN_ERR "BTRFS: bad chunk start, em=%Lu, wanted=%Lu\n",
                       em->start, chunk_start);
                free_extent_map(em);
                return -EIO;
@@@ -5298,13 -5297,6 +5297,13 @@@ static void btrfs_end_bio(struct bio *b
                        bio_put(bio);
                        bio = bbio->orig_bio;
                }
 +
 +              /*
 +               * We have original bio now. So increment bi_remaining to
 +               * account for it in endio
 +               */
 +              atomic_inc(&bio->bi_remaining);
 +
                bio->bi_private = bbio->private;
                bio->bi_end_io = bbio->end_io;
                btrfs_io_bio(bio)->mirror_num = bbio->mirror_num;
@@@ -5418,7 -5410,7 +5417,7 @@@ static int bio_size_ok(struct block_dev
        if (!q->merge_bvec_fn)
                return 1;
  
 -      bvm.bi_size = bio->bi_size - prev->bv_len;
 +      bvm.bi_size = bio->bi_iter.bi_size - prev->bv_len;
        if (q->merge_bvec_fn(q, &bvm, prev) < prev->bv_len)
                return 0;
        return 1;
@@@ -5433,7 -5425,7 +5432,7 @@@ static void submit_stripe_bio(struct bt
        bio->bi_private = bbio;
        btrfs_io_bio(bio)->stripe_index = dev_nr;
        bio->bi_end_io = btrfs_end_bio;
 -      bio->bi_sector = physical >> 9;
 +      bio->bi_iter.bi_sector = physical >> 9;
  #ifdef DEBUG
        {
                struct rcu_string *name;
@@@ -5471,7 -5463,7 +5470,7 @@@ again
        while (bvec <= (first_bio->bi_io_vec + first_bio->bi_vcnt - 1)) {
                if (bio_add_page(bio, bvec->bv_page, bvec->bv_len,
                                 bvec->bv_offset) < bvec->bv_len) {
 -                      u64 len = bio->bi_size;
 +                      u64 len = bio->bi_iter.bi_size;
  
                        atomic_inc(&bbio->stripes_pending);
                        submit_stripe_bio(root, bbio, bio, physical, dev_nr,
@@@ -5493,7 -5485,7 +5492,7 @@@ static void bbio_error(struct btrfs_bi
                bio->bi_private = bbio->private;
                bio->bi_end_io = bbio->end_io;
                btrfs_io_bio(bio)->mirror_num = bbio->mirror_num;
 -              bio->bi_sector = logical >> 9;
 +              bio->bi_iter.bi_sector = logical >> 9;
                kfree(bbio);
                bio_endio(bio, -EIO);
        }
@@@ -5504,7 -5496,7 +5503,7 @@@ int btrfs_map_bio(struct btrfs_root *ro
  {
        struct btrfs_device *dev;
        struct bio *first_bio = bio;
 -      u64 logical = (u64)bio->bi_sector << 9;
 +      u64 logical = (u64)bio->bi_iter.bi_sector << 9;
        u64 length = 0;
        u64 map_length;
        u64 *raid_map = NULL;
        int total_devs = 1;
        struct btrfs_bio *bbio = NULL;
  
 -      length = bio->bi_size;
 +      length = bio->bi_iter.bi_size;
        map_length = length;
  
        ret = __btrfs_map_block(root->fs_info, rw, logical, &map_length, &bbio,
@@@ -6130,7 -6122,8 +6129,8 @@@ static int update_dev_stat_item(struct 
        BUG_ON(!path);
        ret = btrfs_search_slot(trans, dev_root, &key, path, -1, 1);
        if (ret < 0) {
-               printk_in_rcu(KERN_WARNING "btrfs: error %d while searching for dev_stats item for device %s!\n",
+               printk_in_rcu(KERN_WARNING "BTRFS: "
+                       "error %d while searching for dev_stats item for device %s!\n",
                              ret, rcu_str_deref(device->name));
                goto out;
        }
                /* need to delete old one and insert a new one */
                ret = btrfs_del_item(trans, dev_root, path);
                if (ret != 0) {
-                       printk_in_rcu(KERN_WARNING "btrfs: delete too small dev_stats item for device %s failed %d!\n",
+                       printk_in_rcu(KERN_WARNING "BTRFS: "
+                               "delete too small dev_stats item for device %s failed %d!\n",
                                      rcu_str_deref(device->name), ret);
                        goto out;
                }
                ret = btrfs_insert_empty_item(trans, dev_root, path,
                                              &key, sizeof(*ptr));
                if (ret < 0) {
-                       printk_in_rcu(KERN_WARNING "btrfs: insert dev_stats item for device %s failed %d!\n",
+                       printk_in_rcu(KERN_WARNING "BTRFS: "
+                                         "insert dev_stats item for device %s failed %d!\n",
                                      rcu_str_deref(device->name), ret);
                        goto out;
                }
@@@ -6206,16 -6201,14 +6208,14 @@@ static void btrfs_dev_stat_print_on_err
  {
        if (!dev->dev_stats_valid)
                return;
-       printk_ratelimited_in_rcu(KERN_ERR
-                          "btrfs: bdev %s errs: wr %u, rd %u, flush %u, corrupt %u, gen %u\n",
+       printk_ratelimited_in_rcu(KERN_ERR "BTRFS: "
+                          "bdev %s errs: wr %u, rd %u, flush %u, corrupt %u, gen %u\n",
                           rcu_str_deref(dev->name),
                           btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_WRITE_ERRS),
                           btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_READ_ERRS),
                           btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_FLUSH_ERRS),
-                          btrfs_dev_stat_read(dev,
-                                              BTRFS_DEV_STAT_CORRUPTION_ERRS),
-                          btrfs_dev_stat_read(dev,
-                                              BTRFS_DEV_STAT_GENERATION_ERRS));
+                          btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_CORRUPTION_ERRS),
+                          btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_GENERATION_ERRS));
  }
  
  static void btrfs_dev_stat_print_on_load(struct btrfs_device *dev)
        if (i == BTRFS_DEV_STAT_VALUES_MAX)
                return; /* all values == 0, suppress message */
  
-       printk_in_rcu(KERN_INFO "btrfs: bdev %s errs: wr %u, rd %u, flush %u, corrupt %u, gen %u\n",
+       printk_in_rcu(KERN_INFO "BTRFS: "
+                  "bdev %s errs: wr %u, rd %u, flush %u, corrupt %u, gen %u\n",
               rcu_str_deref(dev->name),
               btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_WRITE_ERRS),
               btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_READ_ERRS),
@@@ -6249,12 -6243,10 +6250,10 @@@ int btrfs_get_dev_stats(struct btrfs_ro
        mutex_unlock(&fs_devices->device_list_mutex);
  
        if (!dev) {
-               printk(KERN_WARNING
-                      "btrfs: get dev_stats failed, device not found\n");
+               btrfs_warn(root->fs_info, "get dev_stats failed, device not found");
                return -ENODEV;
        } else if (!dev->dev_stats_valid) {
-               printk(KERN_WARNING
-                      "btrfs: get dev_stats failed, not yet valid\n");
+               btrfs_warn(root->fs_info, "get dev_stats failed, not yet valid");
                return -ENODEV;
        } else if (stats->flags & BTRFS_DEV_STATS_RESET) {
                for (i = 0; i < BTRFS_DEV_STAT_VALUES_MAX; i++) {
diff --combined fs/btrfs/xattr.c
index 3d1c301c9260299fbecc13eaeade5b6521c3ef26,4b33765add386a975e9f6a28f01ad734d76bf3f5..ad8328d797ea9910c21f0ecd5db6051d347a4571
  #include <linux/rwsem.h>
  #include <linux/xattr.h>
  #include <linux/security.h>
 +#include <linux/posix_acl_xattr.h>
  #include "ctree.h"
  #include "btrfs_inode.h"
  #include "transaction.h"
  #include "xattr.h"
  #include "disk-io.h"
+ #include "props.h"
  
  
  ssize_t __btrfs_getxattr(struct inode *inode, const char *name,
@@@ -314,8 -314,8 +315,8 @@@ err
   */
  const struct xattr_handler *btrfs_xattr_handlers[] = {
  #ifdef CONFIG_BTRFS_FS_POSIX_ACL
 -      &btrfs_xattr_acl_access_handler,
 -      &btrfs_xattr_acl_default_handler,
 +      &posix_acl_access_xattr_handler,
 +      &posix_acl_default_xattr_handler,
  #endif
        NULL,
  };
@@@ -332,7 -332,8 +333,8 @@@ static bool btrfs_is_valid_xattr(const 
                        XATTR_SECURITY_PREFIX_LEN) ||
               !strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN) ||
               !strncmp(name, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN) ||
-              !strncmp(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN);
+              !strncmp(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN) ||
+               !strncmp(name, XATTR_BTRFS_PREFIX, XATTR_BTRFS_PREFIX_LEN);
  }
  
  ssize_t btrfs_getxattr(struct dentry *dentry, const char *name,
@@@ -374,6 -375,10 +376,10 @@@ int btrfs_setxattr(struct dentry *dentr
        if (!btrfs_is_valid_xattr(name))
                return -EOPNOTSUPP;
  
+       if (!strncmp(name, XATTR_BTRFS_PREFIX, XATTR_BTRFS_PREFIX_LEN))
+               return btrfs_set_prop(dentry->d_inode, name,
+                                     value, size, flags);
        if (size == 0)
                value = "";  /* empty EA, do not remove */
  
@@@ -403,6 -408,10 +409,10 @@@ int btrfs_removexattr(struct dentry *de
        if (!btrfs_is_valid_xattr(name))
                return -EOPNOTSUPP;
  
+       if (!strncmp(name, XATTR_BTRFS_PREFIX, XATTR_BTRFS_PREFIX_LEN))
+               return btrfs_set_prop(dentry->d_inode, name,
+                                     NULL, 0, XATTR_REPLACE);
        return __btrfs_setxattr(NULL, dentry->d_inode, name, NULL, 0,
                                XATTR_REPLACE);
  }
diff --combined lib/kobject.c
index b0b26665c61161d4f33d1dbdaf49fe74275f400e,03512a40a3ef9e93387a3217617bc8f9927ebcbe..cb14aeac4ccaeedfe537d423c41c3d0b021e1c9f
   */
  
  #include <linux/kobject.h>
 -#include <linux/kobj_completion.h>
  #include <linux/string.h>
  #include <linux/export.h>
  #include <linux/stat.h>
  #include <linux/slab.h>
 +#include <linux/random.h>
  
  /**
   * kobject_namespace - return @kobj's namespace tag
@@@ -65,17 -65,13 +65,17 @@@ static int populate_dir(struct kobject 
  
  static int create_dir(struct kobject *kobj)
  {
 +      const struct kobj_ns_type_operations *ops;
        int error;
  
        error = sysfs_create_dir_ns(kobj, kobject_namespace(kobj));
 -      if (!error) {
 -              error = populate_dir(kobj);
 -              if (error)
 -                      sysfs_remove_dir(kobj);
 +      if (error)
 +              return error;
 +
 +      error = populate_dir(kobj);
 +      if (error) {
 +              sysfs_remove_dir(kobj);
 +              return error;
        }
  
        /*
         */
        sysfs_get(kobj->sd);
  
 -      return error;
 +      /*
 +       * If @kobj has ns_ops, its children need to be filtered based on
 +       * their namespace tags.  Enable namespace support on @kobj->sd.
 +       */
 +      ops = kobj_child_ns_ops(kobj);
 +      if (ops) {
 +              BUG_ON(ops->type <= KOBJ_NS_TYPE_NONE);
 +              BUG_ON(ops->type >= KOBJ_NS_TYPES);
 +              BUG_ON(!kobj_ns_type_registered(ops->type));
 +
 +              kernfs_enable_ns(kobj->sd);
 +      }
 +
 +      return 0;
  }
  
  static int get_kobj_path_length(struct kobject *kobj)
@@@ -264,10 -247,8 +264,10 @@@ int kobject_set_name_vargs(struct kobje
                return 0;
  
        kobj->name = kvasprintf(GFP_KERNEL, fmt, vargs);
 -      if (!kobj->name)
 +      if (!kobj->name) {
 +              kobj->name = old_name;
                return -ENOMEM;
 +      }
  
        /* ewww... some of these buggers have '/' in the name ... */
        while ((s = strchr(kobj->name, '/')))
@@@ -365,7 -346,7 +365,7 @@@ static int kobject_add_varg(struct kobj
   *
   * If @parent is set, then the parent of the @kobj will be set to it.
   * If @parent is NULL, then the parent of the @kobj will be set to the
 - * kobject associted with the kset assigned to this kobject.  If no kset
 + * kobject associated with the kset assigned to this kobject.  If no kset
   * is assigned to the kobject, then the kobject will be located in the
   * root of the sysfs tree.
   *
@@@ -555,7 -536,7 +555,7 @@@ out
   */
  void kobject_del(struct kobject *kobj)
  {
 -      struct sysfs_dirent *sd;
 +      struct kernfs_node *sd;
  
        if (!kobj)
                return;
@@@ -644,12 -625,10 +644,12 @@@ static void kobject_release(struct kre
  {
        struct kobject *kobj = container_of(kref, struct kobject, kref);
  #ifdef CONFIG_DEBUG_KOBJECT_RELEASE
 -      pr_info("kobject: '%s' (%p): %s, parent %p (delayed)\n",
 -               kobject_name(kobj), kobj, __func__, kobj->parent);
 +      unsigned long delay = HZ + HZ * (get_random_int() & 0x3);
 +      pr_info("kobject: '%s' (%p): %s, parent %p (delayed %ld)\n",
 +               kobject_name(kobj), kobj, __func__, kobj->parent, delay);
        INIT_DELAYED_WORK(&kobj->release, kobject_delayed_cleanup);
 -      schedule_delayed_work(&kobj->release, HZ);
 +
 +      schedule_delayed_work(&kobj->release, delay);
  #else
        kobject_cleanup(kobj);
  #endif
@@@ -779,7 -758,57 +779,8 @@@ const struct sysfs_ops kobj_sysfs_ops 
        .show   = kobj_attr_show,
        .store  = kobj_attr_store,
  };
+ EXPORT_SYMBOL_GPL(kobj_sysfs_ops);
  
 -/**
 - * kobj_completion_init - initialize a kobj_completion object.
 - * @kc: kobj_completion
 - * @ktype: type of kobject to initialize
 - *
 - * kobj_completion structures can be embedded within structures with different
 - * lifetime rules.  During the release of the enclosing object, we can
 - * wait on the release of the kobject so that we don't free it while it's
 - * still busy.
 - */
 -void kobj_completion_init(struct kobj_completion *kc, struct kobj_type *ktype)
 -{
 -      init_completion(&kc->kc_unregister);
 -      kobject_init(&kc->kc_kobj, ktype);
 -}
 -EXPORT_SYMBOL_GPL(kobj_completion_init);
 -
 -/**
 - * kobj_completion_release - release a kobj_completion object
 - * @kobj: kobject embedded in kobj_completion
 - *
 - * Used with kobject_release to notify waiters that the kobject has been
 - * released.
 - */
 -void kobj_completion_release(struct kobject *kobj)
 -{
 -      struct kobj_completion *kc = kobj_to_kobj_completion(kobj);
 -      complete(&kc->kc_unregister);
 -}
 -EXPORT_SYMBOL_GPL(kobj_completion_release);
 -
 -/**
 - * kobj_completion_del_and_wait - release the kobject and wait for it
 - * @kc: kobj_completion object to release
 - *
 - * Delete the kobject from sysfs and drop the reference count.  Then wait
 - * until any other outstanding references are also dropped.  This routine
 - * is only necessary once other references may have been taken on the
 - * kobject.  Typically this happens when the kobject has been published
 - * to sysfs via kobject_add.
 - */
 -void kobj_completion_del_and_wait(struct kobj_completion *kc)
 -{
 -      kobject_del(&kc->kc_kobj);
 -      kobject_put(&kc->kc_kobj);
 -      wait_for_completion(&kc->kc_unregister);
 -}
 -EXPORT_SYMBOL_GPL(kobj_completion_del_and_wait);
 -
  /**
   * kset_register - initialize and add a kset.
   * @k: kset.
@@@ -807,7 -836,6 +808,7 @@@ void kset_unregister(struct kset *k
  {
        if (!k)
                return;
 +      kobject_del(&k->kobj);
        kobject_put(&k->kobj);
  }