Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux...

author Linus Torvalds <torvalds@linux-foundation.org>

Fri, 31 Jan 2014 04:08:20 +0000 (20:08 -0800)

committer Linus Torvalds <torvalds@linux-foundation.org>

Fri, 31 Jan 2014 04:08:20 +0000 (20:08 -0800)
author Linus Torvalds <torvalds@linux-foundation.org>
Fri, 31 Jan 2014 04:08:20 +0000 (20:08 -0800)
committer Linus Torvalds <torvalds@linux-foundation.org>
Fri, 31 Jan 2014 04:08:20 +0000 (20:08 -0800)
diff --git a/Documentation/filesystems/btrfs.txt b/Documentation/filesystems/btrfs.txt

index 5dd282dda55c5eca0fe50b6b1cfc0116d3cc7432..d11cc2f8077b9ea6e4284419fd40121c13d0d1c2 100644 (file)
--- a/Documentation/filesystems/btrfs.txt
+++ b/Documentation/filesystems/btrfs.txt
@@ -38,7 +38,7 @@ Mount Options
  =============
  
  When mounting a btrfs filesystem, the following option are accepted.
-Unless otherwise specified, all options default to off.
+Options with (*) are default options and will not show in the mount options.
  
    alloc_start=<bytes>
         Debugging option to force all block allocations above a certain
@@ -46,10 +46,12 @@ Unless otherwise specified, all options default to off.
         bytes, optionally with a K, M, or G suffix, case insensitive.
         Default is 1MB.
  
+  noautodefrag(*)
    autodefrag
-       Detect small random writes into files and queue them up for the
-       defrag process.  Works best for small files; Not well suited for
-       large database workloads.
+       Disable/enable auto defragmentation.
+       Auto defragmentation detects small random writes into files and queue
+       them up for the defrag process.  Works best for small files;
+       Not well suited for large database workloads.
  
    check_int
    check_int_data
@@ -96,21 +98,26 @@ Unless otherwise specified, all options default to off.
         can be avoided.  Especially useful when trying to mount a multi-device
         setup as root.  May be specified multiple times for multiple devices.
  
+  nodiscard(*)
    discard
-       Issue frequent commands to let the block device reclaim space freed by
-       the filesystem.  This is useful for SSD devices, thinly provisioned
+       Disable/enable discard mount option.
+       Discard issues frequent commands to let the block device reclaim space
+       freed by the filesystem.
+       This is useful for SSD devices, thinly provisioned
         LUNs and virtual machine images, but may have a significant
         performance impact.  (The fstrim command is also available to
         initiate batch trims from userspace).
  
+  noenospc_debug(*)
    enospc_debug
-       Debugging option to be more verbose in some ENOSPC conditions.
+       Disable/enable debugging option to be more verbose in some ENOSPC conditions.
  
    fatal_errors=<action>
         Action to take when encountering a fatal error: 
           "bug" - BUG() on a fatal error.  This is the default.
           "panic" - panic() on a fatal error.
  
+  noflushoncommit(*)
    flushoncommit
         The 'flushoncommit' mount option forces any data dirtied by a write in a
         prior transaction to commit as part of the current commit.  This makes
@@ -134,26 +141,32 @@ Unless otherwise specified, all options default to off.
         Specify that 1 metadata chunk should be allocated after every <value>
         data chunks.  Off by default.
  
+  acl(*)
    noacl
-       Disable support for Posix Access Control Lists (ACLs).  See the
+       Enable/disable support for Posix Access Control Lists (ACLs).  See the
         acl(5) manual page for more information about ACLs.
  
+  barrier(*)
    nobarrier
-        Disables the use of block layer write barriers.  Write barriers ensure
-       that certain IOs make it through the device cache and are on persistent
-       storage.  If used on a device with a volatile (non-battery-backed)
-       write-back cache, this option will lead to filesystem corruption on a
-       system crash or power loss.
+        Enable/disable the use of block layer write barriers.  Write barriers
+       ensure that certain IOs make it through the device cache and are on
+       persistent storage. If disabled on a device with a volatile
+       (non-battery-backed) write-back cache, nobarrier option will lead to
+       filesystem corruption on a system crash or power loss.
  
+  datacow(*)
    nodatacow
-       Disable data copy-on-write for newly created files.  Implies nodatasum,
-       and disables all compression.
+       Enable/disable data copy-on-write for newly created files.
+       Nodatacow implies nodatasum, and disables all compression.
  
+  datasum(*)
    nodatasum
-       Disable data checksumming for newly created files.
+       Enable/disable data checksumming for newly created files.
+       Datasum implies datacow.
  
+  treelog(*)
    notreelog
-       Disable the tree logging used for fsync and O_SYNC writes.
+       Enable/disable the tree logging used for fsync and O_SYNC writes.
  
    recovery
         Enable autorecovery attempts if a bad tree root is found at mount time.
diff --git a/fs/btrfs/Kconfig b/fs/btrfs/Kconfig

index aa976eced2d2ea8dfa9c0e97ea84da7438626d62..a66768ebc8d19d394f2cd0818d56178a50f84803 100644 (file)
--- a/fs/btrfs/Kconfig
+++ b/fs/btrfs/Kconfig
@@ -1,6 +1,7 @@
  config BTRFS_FS
         tristate "Btrfs filesystem support"
-       select LIBCRC32C
+       select CRYPTO
+       select CRYPTO_CRC32C
         select ZLIB_INFLATE
         select ZLIB_DEFLATE
         select LZO_COMPRESS
diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile

index 1a44e42d602a1b41f60a04ea9a1a290273f7e7dd..f341a98031d2e080522239c99392b47b849b9c2e 100644 (file)
--- a/fs/btrfs/Makefile
+++ b/fs/btrfs/Makefile
@@ -9,7 +9,7 @@ btrfs-y += super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \
            export.o tree-log.o free-space-cache.o zlib.o lzo.o \
            compression.o delayed-ref.o relocation.o delayed-inode.o scrub.o \
            reada.o backref.o ulist.o qgroup.o send.o dev-replace.o raid56.o \
-          uuid-tree.o
+          uuid-tree.o props.o hash.o
  
  btrfs-$(CONFIG_BTRFS_FS_POSIX_ACL) += acl.o
  btrfs-$(CONFIG_BTRFS_FS_CHECK_INTEGRITY) += check-integrity.o
diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c

index 3775947429b28ea07678c4f58877bfcbb5f8bed6..aded3ef3d3d4abfa6d61fa078a51e64e4b80be65 100644 (file)
--- a/fs/btrfs/backref.c
+++ b/fs/btrfs/backref.c
@@ -66,6 +66,16 @@ static int check_extent_in_eb(struct btrfs_key *key, struct extent_buffer *eb,
         return 0;
  }
  
+static void free_inode_elem_list(struct extent_inode_elem *eie)
+{
+       struct extent_inode_elem *eie_next;
+
+       for (; eie; eie = eie_next) {
+               eie_next = eie->next;
+               kfree(eie);
+       }
+}
+
  static int find_extent_in_eb(struct extent_buffer *eb, u64 wanted_disk_byte,
                                 u64 extent_item_pos,
                                 struct extent_inode_elem **eie)
@@ -209,18 +219,19 @@ static int __add_prelim_ref(struct list_head *head, u64 root_id,
  }
  
  static int add_all_parents(struct btrfs_root *root, struct btrfs_path *path,
-                               struct ulist *parents, int level,
-                               struct btrfs_key *key_for_search, u64 time_seq,
-                               u64 wanted_disk_byte,
-                               const u64 *extent_item_pos)
+                          struct ulist *parents, struct __prelim_ref *ref,
+                          int level, u64 time_seq, const u64 *extent_item_pos)
  {
         int ret = 0;
         int slot;
         struct extent_buffer *eb;
         struct btrfs_key key;
+       struct btrfs_key *key_for_search = &ref->key_for_search;
         struct btrfs_file_extent_item *fi;
         struct extent_inode_elem *eie = NULL, *old = NULL;
         u64 disk_byte;
+       u64 wanted_disk_byte = ref->wanted_disk_byte;
+       u64 count = 0;
  
         if (level != 0) {
                 eb = path->nodes[level];
@@ -238,7 +249,7 @@ static int add_all_parents(struct btrfs_root *root, struct btrfs_path *path,
         if (path->slots[0] >= btrfs_header_nritems(path->nodes[0]))
                 ret = btrfs_next_old_leaf(root, path, time_seq);
  
-       while (!ret) {
+       while (!ret && count < ref->count) {
                 eb = path->nodes[0];
                 slot = path->slots[0];
  
@@ -254,6 +265,7 @@ static int add_all_parents(struct btrfs_root *root, struct btrfs_path *path,
                 if (disk_byte == wanted_disk_byte) {
                         eie = NULL;
                         old = NULL;
+                       count++;
                         if (extent_item_pos) {
                                 ret = check_extent_in_eb(&key, eb, fi,
                                                 *extent_item_pos,
@@ -273,6 +285,7 @@ static int add_all_parents(struct btrfs_root *root, struct btrfs_path *path,
                                         old = old->next;
                                 old->next = eie;
                         }
+                       eie = NULL;
                 }
  next:
                 ret = btrfs_next_old_item(root, path, time_seq);
@@ -280,6 +293,8 @@ next:
  
         if (ret > 0)
                 ret = 0;
+       else if (ret < 0)
+               free_inode_elem_list(eie);
         return ret;
  }
  
@@ -299,23 +314,34 @@ static int __resolve_indirect_ref(struct btrfs_fs_info *fs_info,
         int ret = 0;
         int root_level;
         int level = ref->level;
+       int index;
  
         root_key.objectid = ref->root_id;
         root_key.type = BTRFS_ROOT_ITEM_KEY;
         root_key.offset = (u64)-1;
+
+       index = srcu_read_lock(&fs_info->subvol_srcu);
+
         root = btrfs_read_fs_root_no_name(fs_info, &root_key);
         if (IS_ERR(root)) {
+               srcu_read_unlock(&fs_info->subvol_srcu, index);
                 ret = PTR_ERR(root);
                 goto out;
         }
  
         root_level = btrfs_old_root_level(root, time_seq);
  
-       if (root_level + 1 == level)
+       if (root_level + 1 == level) {
+               srcu_read_unlock(&fs_info->subvol_srcu, index);
                 goto out;
+       }
  
         path->lowest_level = level;
         ret = btrfs_search_old_slot(root, &ref->key_for_search, path, time_seq);
+
+       /* root node has been locked, we can release @subvol_srcu safely here */
+       srcu_read_unlock(&fs_info->subvol_srcu, index);
+
         pr_debug("search slot in root %llu (level %d, ref count %d) returned "
                  "%d for key (%llu %u %llu)\n",
                  ref->root_id, level, ref->count, ret,
@@ -334,9 +360,8 @@ static int __resolve_indirect_ref(struct btrfs_fs_info *fs_info,
                 eb = path->nodes[level];
         }
  
-       ret = add_all_parents(root, path, parents, level, &ref->key_for_search,
-                               time_seq, ref->wanted_disk_byte,
-                               extent_item_pos);
+       ret = add_all_parents(root, path, parents, ref, level, time_seq,
+                             extent_item_pos);
  out:
         path->lowest_level = 0;
         btrfs_release_path(path);
@@ -376,10 +401,16 @@ static int __resolve_indirect_refs(struct btrfs_fs_info *fs_info,
                         continue;
                 err = __resolve_indirect_ref(fs_info, path, time_seq, ref,
                                              parents, extent_item_pos);
-               if (err == -ENOMEM)
-                       goto out;
-               if (err)
+               /*
+                * we can only tolerate ENOENT,otherwise,we should catch error
+                * and return directly.
+                */
+               if (err == -ENOENT) {
                         continue;
+               } else if (err) {
+                       ret = err;
+                       goto out;
+               }
  
                 /* we put the first parent into the ref at hand */
                 ULIST_ITER_INIT(&uiter);
@@ -538,14 +569,13 @@ static int __add_delayed_refs(struct btrfs_delayed_ref_head *head, u64 seq,
         if (extent_op && extent_op->update_key)
                 btrfs_disk_key_to_cpu(&op_key, &extent_op->key);
  
-       while ((n = rb_prev(n))) {
+       spin_lock(&head->lock);
+       n = rb_first(&head->ref_root);
+       while (n) {
                 struct btrfs_delayed_ref_node *node;
                 node = rb_entry(n, struct btrfs_delayed_ref_node,
                                 rb_node);
-               if (node->bytenr != head->node.bytenr)
-                       break;
-               WARN_ON(node->is_head);
-
+               n = rb_next(n);
                 if (node->seq > seq)
                         continue;
  
@@ -612,10 +642,10 @@ static int __add_delayed_refs(struct btrfs_delayed_ref_head *head, u64 seq,
                         WARN_ON(1);
                 }
                 if (ret)
-                       return ret;
+                       break;
         }
-
-       return 0;
+       spin_unlock(&head->lock);
+       return ret;
  }
  
  /*
@@ -828,6 +858,7 @@ static int find_parent_nodes(struct btrfs_trans_handle *trans,
         struct list_head prefs_delayed;
         struct list_head prefs;
         struct __prelim_ref *ref;
+       struct extent_inode_elem *eie = NULL;
  
         INIT_LIST_HEAD(&prefs);
         INIT_LIST_HEAD(&prefs_delayed);
@@ -882,15 +913,15 @@ again:
                                 btrfs_put_delayed_ref(&head->node);
                                 goto again;
                         }
+                       spin_unlock(&delayed_refs->lock);
                         ret = __add_delayed_refs(head, time_seq,
                                                  &prefs_delayed);
                         mutex_unlock(&head->mutex);
-                       if (ret) {
-                               spin_unlock(&delayed_refs->lock);
+                       if (ret)
                                 goto out;
-                       }
+               } else {
+                       spin_unlock(&delayed_refs->lock);
                 }
-               spin_unlock(&delayed_refs->lock);
         }
  
         if (path->slots[0]) {
@@ -941,7 +972,6 @@ again:
                                 goto out;
                 }
                 if (ref->count && ref->parent) {
-                       struct extent_inode_elem *eie = NULL;
                         if (extent_item_pos && !ref->inode_list) {
                                 u32 bsz;
                                 struct extent_buffer *eb;
@@ -976,6 +1006,7 @@ again:
                                         eie = eie->next;
                                 eie->next = ref->inode_list;
                         }
+                       eie = NULL;
                 }
                 list_del(&ref->list);
                 kmem_cache_free(btrfs_prelim_ref_cache, ref);
@@ -994,7 +1025,8 @@ out:
                 list_del(&ref->list);
                 kmem_cache_free(btrfs_prelim_ref_cache, ref);
         }
-
+       if (ret < 0)
+               free_inode_elem_list(eie);
         return ret;
  }
  
@@ -1002,7 +1034,6 @@ static void free_leaf_list(struct ulist *blocks)
  {
         struct ulist_node *node = NULL;
         struct extent_inode_elem *eie;
-       struct extent_inode_elem *eie_next;
         struct ulist_iterator uiter;
  
         ULIST_ITER_INIT(&uiter);
@@ -1010,10 +1041,7 @@ static void free_leaf_list(struct ulist *blocks)
                 if (!node->aux)
                         continue;
                 eie = (struct extent_inode_elem *)(uintptr_t)node->aux;
-               for (; eie; eie = eie_next) {
-                       eie_next = eie->next;
-                       kfree(eie);
-               }
+               free_inode_elem_list(eie);
                 node->aux = 0;
         }
  
@@ -1101,44 +1129,13 @@ int btrfs_find_all_roots(struct btrfs_trans_handle *trans,
                 if (!node)
                         break;
                 bytenr = node->val;
+               cond_resched();
         }
  
         ulist_free(tmp);
         return 0;
  }
  
-
-static int __inode_info(u64 inum, u64 ioff, u8 key_type,
-                       struct btrfs_root *fs_root, struct btrfs_path *path,
-                       struct btrfs_key *found_key)
-{
-       int ret;
-       struct btrfs_key key;
-       struct extent_buffer *eb;
-
-       key.type = key_type;
-       key.objectid = inum;
-       key.offset = ioff;
-
-       ret = btrfs_search_slot(NULL, fs_root, &key, path, 0, 0);
-       if (ret < 0)
-               return ret;
-
-       eb = path->nodes[0];
-       if (ret && path->slots[0] >= btrfs_header_nritems(eb)) {
-               ret = btrfs_next_leaf(fs_root, path);
-               if (ret)
-                       return ret;
-               eb = path->nodes[0];
-       }
-
-       btrfs_item_key_to_cpu(eb, found_key, path->slots[0]);
-       if (found_key->type != key.type || found_key->objectid != key.objectid)
-               return 1;
-
-       return 0;
-}
-
  /*
   * this makes the path point to (inum INODE_ITEM ioff)
   */
@@ -1146,16 +1143,16 @@ int inode_item_info(u64 inum, u64 ioff, struct btrfs_root *fs_root,
                         struct btrfs_path *path)
  {
         struct btrfs_key key;
-       return __inode_info(inum, ioff, BTRFS_INODE_ITEM_KEY, fs_root, path,
-                               &key);
+       return btrfs_find_item(fs_root, path, inum, ioff,
+                       BTRFS_INODE_ITEM_KEY, &key);
  }
  
  static int inode_ref_info(u64 inum, u64 ioff, struct btrfs_root *fs_root,
                                 struct btrfs_path *path,
                                 struct btrfs_key *found_key)
  {
-       return __inode_info(inum, ioff, BTRFS_INODE_REF_KEY, fs_root, path,
-                               found_key);
+       return btrfs_find_item(fs_root, path, inum, ioff,
+                       BTRFS_INODE_REF_KEY, found_key);
  }
  
  int btrfs_find_one_extref(struct btrfs_root *root, u64 inode_objectid,
@@ -1335,20 +1332,45 @@ int extent_from_logical(struct btrfs_fs_info *fs_info, u64 logical,
         ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, path, 0, 0);
         if (ret < 0)
                 return ret;
-       ret = btrfs_previous_item(fs_info->extent_root, path,
-                                       0, BTRFS_EXTENT_ITEM_KEY);
-       if (ret < 0)
-               return ret;
  
-       btrfs_item_key_to_cpu(path->nodes[0], found_key, path->slots[0]);
+       while (1) {
+               u32 nritems;
+               if (path->slots[0] == 0) {
+                       btrfs_set_path_blocking(path);
+                       ret = btrfs_prev_leaf(fs_info->extent_root, path);
+                       if (ret != 0) {
+                               if (ret > 0) {
+                                       pr_debug("logical %llu is not within "
+                                                "any extent\n", logical);
+                                       ret = -ENOENT;
+                               }
+                               return ret;
+                       }
+               } else {
+                       path->slots[0]--;
+               }
+               nritems = btrfs_header_nritems(path->nodes[0]);
+               if (nritems == 0) {
+                       pr_debug("logical %llu is not within any extent\n",
+                                logical);
+                       return -ENOENT;
+               }
+               if (path->slots[0] == nritems)
+                       path->slots[0]--;
+
+               btrfs_item_key_to_cpu(path->nodes[0], found_key,
+                                     path->slots[0]);
+               if (found_key->type == BTRFS_EXTENT_ITEM_KEY ||
+                   found_key->type == BTRFS_METADATA_ITEM_KEY)
+                       break;
+       }
+
         if (found_key->type == BTRFS_METADATA_ITEM_KEY)
                 size = fs_info->extent_root->leafsize;
         else if (found_key->type == BTRFS_EXTENT_ITEM_KEY)
                 size = found_key->offset;
  
-       if ((found_key->type != BTRFS_EXTENT_ITEM_KEY &&
-            found_key->type != BTRFS_METADATA_ITEM_KEY) ||
-           found_key->objectid > logical ||
+       if (found_key->objectid > logical ||
             found_key->objectid + size <= logical) {
                 pr_debug("logical %llu is not within any extent\n", logical);
                 return -ENOENT;
@@ -1601,7 +1623,6 @@ static int iterate_inode_refs(u64 inum, struct btrfs_root *fs_root,
         struct btrfs_key found_key;
  
         while (!ret) {
-               path->leave_spinning = 1;
                 ret = inode_ref_info(inum, parent ? parent+1 : 0, fs_root, path,
                                      &found_key);
                 if (ret < 0)
@@ -1614,9 +1635,12 @@ static int iterate_inode_refs(u64 inum, struct btrfs_root *fs_root,
  
                 parent = found_key.offset;
                 slot = path->slots[0];
-               eb = path->nodes[0];
-               /* make sure we can use eb after releasing the path */
-               atomic_inc(&eb->refs);
+               eb = btrfs_clone_extent_buffer(path->nodes[0]);
+               if (!eb) {
+                       ret = -ENOMEM;
+                       break;
+               }
+               extent_buffer_get(eb);
                 btrfs_tree_read_lock(eb);
                 btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK);
                 btrfs_release_path(path);
@@ -1674,17 +1698,20 @@ static int iterate_inode_extrefs(u64 inum, struct btrfs_root *fs_root,
                 ++found;
  
                 slot = path->slots[0];
-               eb = path->nodes[0];
-               /* make sure we can use eb after releasing the path */
-               atomic_inc(&eb->refs);
+               eb = btrfs_clone_extent_buffer(path->nodes[0]);
+               if (!eb) {
+                       ret = -ENOMEM;
+                       break;
+               }
+               extent_buffer_get(eb);
  
                 btrfs_tree_read_lock(eb);
                 btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK);
                 btrfs_release_path(path);
  
                 leaf = path->nodes[0];
-               item_size = btrfs_item_size_nr(leaf, path->slots[0]);
-               ptr = btrfs_item_ptr_offset(leaf, path->slots[0]);
+               item_size = btrfs_item_size_nr(leaf, slot);
+               ptr = btrfs_item_ptr_offset(leaf, slot);
                 cur_offset = 0;
  
                 while (cur_offset < item_size) {
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h

index ac0b39db27d175af15a718a41ac2ebf11c32150f..8fed2125689ed39b928800e78cfbf3b720d52a24 100644 (file)
--- a/fs/btrfs/btrfs_inode.h
+++ b/fs/btrfs/btrfs_inode.h
@@ -43,6 +43,7 @@
  #define BTRFS_INODE_COPY_EVERYTHING            8
  #define BTRFS_INODE_IN_DELALLOC_LIST           9
  #define BTRFS_INODE_READDIO_NEED_LOCK          10
+#define BTRFS_INODE_HAS_PROPS                  11
  
  /* in memory btrfs inode */
  struct btrfs_inode {
@@ -135,6 +136,9 @@ struct btrfs_inode {
          */
         u64 index_cnt;
  
+       /* Cache the directory index number to speed the dir/file remove */
+       u64 dir_index;
+
         /* the fsync log has some corner cases that mean we have to check
          * directories to see if any unlinks have been done before
          * the directory was logged.  See tree-log.c for all the
diff --git a/fs/btrfs/check-integrity.c b/fs/btrfs/check-integrity.c

index cb05e1c842c5b8b84dee98d1a3f452eaa179417e..49a62b4dda3b0184ccd30880b8449b967eda5f27 100644 (file)
--- a/fs/btrfs/check-integrity.c
+++ b/fs/btrfs/check-integrity.c
@@ -1456,10 +1456,14 @@ static int btrfsic_handle_extent_data(
         btrfsic_read_from_block_data(block_ctx, &file_extent_item,
                                      file_extent_item_offset,
                                      sizeof(struct btrfs_file_extent_item));
-       next_bytenr = btrfs_stack_file_extent_disk_bytenr(&file_extent_item) +
-                     btrfs_stack_file_extent_offset(&file_extent_item);
-       generation = btrfs_stack_file_extent_generation(&file_extent_item);
-       num_bytes = btrfs_stack_file_extent_num_bytes(&file_extent_item);
+       next_bytenr = btrfs_stack_file_extent_disk_bytenr(&file_extent_item);
+       if (btrfs_stack_file_extent_compression(&file_extent_item) ==
+           BTRFS_COMPRESS_NONE) {
+               next_bytenr += btrfs_stack_file_extent_offset(&file_extent_item);
+               num_bytes = btrfs_stack_file_extent_num_bytes(&file_extent_item);
+       } else {
+               num_bytes = btrfs_stack_file_extent_disk_num_bytes(&file_extent_item);
+       }
         generation = btrfs_stack_file_extent_generation(&file_extent_item);
  
         if (state->print_mask & BTRFSIC_PRINT_MASK_VERY_VERBOSE)
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c

index f5cdeb4b553824744429cff1f4d8b57c17a27909..e2600cdb6c257e366b873445c396a917249e76f3 100644 (file)
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -128,11 +128,10 @@ static int check_compressed_csum(struct inode *inode,
                 kunmap_atomic(kaddr);
  
                 if (csum != *cb_sum) {
-                       printk(KERN_INFO "btrfs csum failed ino %llu "
-                              "extent %llu csum %u "
-                              "wanted %u mirror %d\n",
-                              btrfs_ino(inode), disk_start, csum, *cb_sum,
-                              cb->mirror_num);
+                       btrfs_info(BTRFS_I(inode)->root->fs_info,
+                          "csum failed ino %llu extent %llu csum %u wanted %u mirror %d",
+                          btrfs_ino(inode), disk_start, csum, *cb_sum,
+                          cb->mirror_num);
                         ret = -EIO;
                         goto fail;
                 }
@@ -411,7 +410,8 @@ int btrfs_submit_compressed_write(struct inode *inode, u64 start,
                         bio_add_page(bio, page, PAGE_CACHE_SIZE, 0);
                 }
                 if (bytes_left < PAGE_CACHE_SIZE) {
-                       printk("bytes left %lu compress len %lu nr %lu\n",
+                       btrfs_info(BTRFS_I(inode)->root->fs_info,
+                                       "bytes left %lu compress len %lu nr %lu",
                                bytes_left, cb->compressed_len, cb->nr_pages);
                 }
                 bytes_left -= PAGE_CACHE_SIZE;
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c

index 316136bd6dd7eb3899bcf060c3e0bd749f674310..cbd3a7d6fa681acfc0b00cb515fcddbcf8880f49 100644 (file)
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -39,9 +39,8 @@ static int balance_node_right(struct btrfs_trans_handle *trans,
                               struct extent_buffer *src_buf);
  static void del_ptr(struct btrfs_root *root, struct btrfs_path *path,
                     int level, int slot);
-static void tree_mod_log_free_eb(struct btrfs_fs_info *fs_info,
+static int tree_mod_log_free_eb(struct btrfs_fs_info *fs_info,
                                  struct extent_buffer *eb);
-static int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path);
  
  struct btrfs_path *btrfs_alloc_path(void)
  {
@@ -475,6 +474,8 @@ void btrfs_put_tree_mod_seq(struct btrfs_fs_info *fs_info,
   * the index is the shifted logical of the *new* root node for root replace
   * operations, or the shifted logical of the affected block for all other
   * operations.
+ *
+ * Note: must be called with write lock (tree_mod_log_write_lock).
   */
  static noinline int
  __tree_mod_log_insert(struct btrfs_fs_info *fs_info, struct tree_mod_elem *tm)
@@ -483,24 +484,9 @@ __tree_mod_log_insert(struct btrfs_fs_info *fs_info, struct tree_mod_elem *tm)
         struct rb_node **new;
         struct rb_node *parent = NULL;
         struct tree_mod_elem *cur;
-       int ret = 0;
  
         BUG_ON(!tm);
  
-       tree_mod_log_write_lock(fs_info);
-       if (list_empty(&fs_info->tree_mod_seq_list)) {
-               tree_mod_log_write_unlock(fs_info);
-               /*
-                * Ok we no longer care about logging modifications, free up tm
-                * and return 0.  Any callers shouldn't be using tm after
-                * calling tree_mod_log_insert, but if they do we can just
-                * change this to return a special error code to let the callers
-                * do their own thing.
-                */
-               kfree(tm);
-               return 0;
-       }
-
         spin_lock(&fs_info->tree_mod_seq_lock);
         tm->seq = btrfs_inc_tree_mod_seq_minor(fs_info);
         spin_unlock(&fs_info->tree_mod_seq_lock);
@@ -518,18 +504,13 @@ __tree_mod_log_insert(struct btrfs_fs_info *fs_info, struct tree_mod_elem *tm)
                         new = &((*new)->rb_left);
                 else if (cur->seq > tm->seq)
                         new = &((*new)->rb_right);
-               else {
-                       ret = -EEXIST;
-                       kfree(tm);
-                       goto out;
-               }
+               else
+                       return -EEXIST;
         }
  
         rb_link_node(&tm->node, parent, new);
         rb_insert_color(&tm->node, tm_root);
-out:
-       tree_mod_log_write_unlock(fs_info);
-       return ret;
+       return 0;
  }
  
  /*
@@ -545,19 +526,38 @@ static inline int tree_mod_dont_log(struct btrfs_fs_info *fs_info,
                 return 1;
         if (eb && btrfs_header_level(eb) == 0)
                 return 1;
+
+       tree_mod_log_write_lock(fs_info);
+       if (list_empty(&(fs_info)->tree_mod_seq_list)) {
+               tree_mod_log_write_unlock(fs_info);
+               return 1;
+       }
+
         return 0;
  }
  
-static inline int
-__tree_mod_log_insert_key(struct btrfs_fs_info *fs_info,
-                         struct extent_buffer *eb, int slot,
-                         enum mod_log_op op, gfp_t flags)
+/* Similar to tree_mod_dont_log, but doesn't acquire any locks. */
+static inline int tree_mod_need_log(const struct btrfs_fs_info *fs_info,
+                                   struct extent_buffer *eb)
+{
+       smp_mb();
+       if (list_empty(&(fs_info)->tree_mod_seq_list))
+               return 0;
+       if (eb && btrfs_header_level(eb) == 0)
+               return 0;
+
+       return 1;
+}
+
+static struct tree_mod_elem *
+alloc_tree_mod_elem(struct extent_buffer *eb, int slot,
+                   enum mod_log_op op, gfp_t flags)
  {
         struct tree_mod_elem *tm;
  
         tm = kzalloc(sizeof(*tm), flags);
         if (!tm)
-               return -ENOMEM;
+               return NULL;
  
         tm->index = eb->start >> PAGE_CACHE_SHIFT;
         if (op != MOD_LOG_KEY_ADD) {
@@ -567,8 +567,9 @@ __tree_mod_log_insert_key(struct btrfs_fs_info *fs_info,
         tm->op = op;
         tm->slot = slot;
         tm->generation = btrfs_node_ptr_generation(eb, slot);
+       RB_CLEAR_NODE(&tm->node);
  
-       return __tree_mod_log_insert(fs_info, tm);
+       return tm;
  }
  
  static noinline int
@@ -576,10 +577,27 @@ tree_mod_log_insert_key(struct btrfs_fs_info *fs_info,
                         struct extent_buffer *eb, int slot,
                         enum mod_log_op op, gfp_t flags)
  {
-       if (tree_mod_dont_log(fs_info, eb))
+       struct tree_mod_elem *tm;
+       int ret;
+
+       if (!tree_mod_need_log(fs_info, eb))
                 return 0;
  
-       return __tree_mod_log_insert_key(fs_info, eb, slot, op, flags);
+       tm = alloc_tree_mod_elem(eb, slot, op, flags);
+       if (!tm)
+               return -ENOMEM;
+
+       if (tree_mod_dont_log(fs_info, eb)) {
+               kfree(tm);
+               return 0;
+       }
+
+       ret = __tree_mod_log_insert(fs_info, tm);
+       tree_mod_log_write_unlock(fs_info);
+       if (ret)
+               kfree(tm);
+
+       return ret;
  }
  
  static noinline int
@@ -587,53 +605,95 @@ tree_mod_log_insert_move(struct btrfs_fs_info *fs_info,
                          struct extent_buffer *eb, int dst_slot, int src_slot,
                          int nr_items, gfp_t flags)
  {
-       struct tree_mod_elem *tm;
-       int ret;
+       struct tree_mod_elem *tm = NULL;
+       struct tree_mod_elem **tm_list = NULL;
+       int ret = 0;
         int i;
+       int locked = 0;
  
-       if (tree_mod_dont_log(fs_info, eb))
+       if (!tree_mod_need_log(fs_info, eb))
                 return 0;
  
+       tm_list = kzalloc(nr_items * sizeof(struct tree_mod_elem *), flags);
+       if (!tm_list)
+               return -ENOMEM;
+
+       tm = kzalloc(sizeof(*tm), flags);
+       if (!tm) {
+               ret = -ENOMEM;
+               goto free_tms;
+       }
+
+       tm->index = eb->start >> PAGE_CACHE_SHIFT;
+       tm->slot = src_slot;
+       tm->move.dst_slot = dst_slot;
+       tm->move.nr_items = nr_items;
+       tm->op = MOD_LOG_MOVE_KEYS;
+
+       for (i = 0; i + dst_slot < src_slot && i < nr_items; i++) {
+               tm_list[i] = alloc_tree_mod_elem(eb, i + dst_slot,
+                   MOD_LOG_KEY_REMOVE_WHILE_MOVING, flags);
+               if (!tm_list[i]) {
+                       ret = -ENOMEM;
+                       goto free_tms;
+               }
+       }
+
+       if (tree_mod_dont_log(fs_info, eb))
+               goto free_tms;
+       locked = 1;
+
         /*
          * When we override something during the move, we log these removals.
          * This can only happen when we move towards the beginning of the
          * buffer, i.e. dst_slot < src_slot.
          */
         for (i = 0; i + dst_slot < src_slot && i < nr_items; i++) {
-               ret = __tree_mod_log_insert_key(fs_info, eb, i + dst_slot,
-                               MOD_LOG_KEY_REMOVE_WHILE_MOVING, GFP_NOFS);
-               BUG_ON(ret < 0);
+               ret = __tree_mod_log_insert(fs_info, tm_list[i]);
+               if (ret)
+                       goto free_tms;
         }
  
-       tm = kzalloc(sizeof(*tm), flags);
-       if (!tm)
-               return -ENOMEM;
+       ret = __tree_mod_log_insert(fs_info, tm);
+       if (ret)
+               goto free_tms;
+       tree_mod_log_write_unlock(fs_info);
+       kfree(tm_list);
  
-       tm->index = eb->start >> PAGE_CACHE_SHIFT;
-       tm->slot = src_slot;
-       tm->move.dst_slot = dst_slot;
-       tm->move.nr_items = nr_items;
-       tm->op = MOD_LOG_MOVE_KEYS;
+       return 0;
+free_tms:
+       for (i = 0; i < nr_items; i++) {
+               if (tm_list[i] && !RB_EMPTY_NODE(&tm_list[i]->node))
+                       rb_erase(&tm_list[i]->node, &fs_info->tree_mod_log);
+               kfree(tm_list[i]);
+       }
+       if (locked)
+               tree_mod_log_write_unlock(fs_info);
+       kfree(tm_list);
+       kfree(tm);
  
-       return __tree_mod_log_insert(fs_info, tm);
+       return ret;
  }
  
-static inline void
-__tree_mod_log_free_eb(struct btrfs_fs_info *fs_info, struct extent_buffer *eb)
+static inline int
+__tree_mod_log_free_eb(struct btrfs_fs_info *fs_info,
+                      struct tree_mod_elem **tm_list,
+                      int nritems)
  {
-       int i;
-       u32 nritems;
+       int i, j;
         int ret;
  
-       if (btrfs_header_level(eb) == 0)
-               return;
-
-       nritems = btrfs_header_nritems(eb);
         for (i = nritems - 1; i >= 0; i--) {
-               ret = __tree_mod_log_insert_key(fs_info, eb, i,
-                               MOD_LOG_KEY_REMOVE_WHILE_FREEING, GFP_NOFS);
-               BUG_ON(ret < 0);
+               ret = __tree_mod_log_insert(fs_info, tm_list[i]);
+               if (ret) {
+                       for (j = nritems - 1; j > i; j--)
+                               rb_erase(&tm_list[j]->node,
+                                        &fs_info->tree_mod_log);
+                       return ret;
+               }
         }
+
+       return 0;
  }
  
  static noinline int
@@ -642,17 +702,38 @@ tree_mod_log_insert_root(struct btrfs_fs_info *fs_info,
                          struct extent_buffer *new_root, gfp_t flags,
                          int log_removal)
  {
-       struct tree_mod_elem *tm;
+       struct tree_mod_elem *tm = NULL;
+       struct tree_mod_elem **tm_list = NULL;
+       int nritems = 0;
+       int ret = 0;
+       int i;
  
-       if (tree_mod_dont_log(fs_info, NULL))
+       if (!tree_mod_need_log(fs_info, NULL))
                 return 0;
  
-       if (log_removal)
-               __tree_mod_log_free_eb(fs_info, old_root);
+       if (log_removal && btrfs_header_level(old_root) > 0) {
+               nritems = btrfs_header_nritems(old_root);
+               tm_list = kzalloc(nritems * sizeof(struct tree_mod_elem *),
+                                 flags);
+               if (!tm_list) {
+                       ret = -ENOMEM;
+                       goto free_tms;
+               }
+               for (i = 0; i < nritems; i++) {
+                       tm_list[i] = alloc_tree_mod_elem(old_root, i,
+                           MOD_LOG_KEY_REMOVE_WHILE_FREEING, flags);
+                       if (!tm_list[i]) {
+                               ret = -ENOMEM;
+                               goto free_tms;
+                       }
+               }
+       }
  
         tm = kzalloc(sizeof(*tm), flags);
-       if (!tm)
-               return -ENOMEM;
+       if (!tm) {
+               ret = -ENOMEM;
+               goto free_tms;
+       }
  
         tm->index = new_root->start >> PAGE_CACHE_SHIFT;
         tm->old_root.logical = old_root->start;
@@ -660,7 +741,30 @@ tree_mod_log_insert_root(struct btrfs_fs_info *fs_info,
         tm->generation = btrfs_header_generation(old_root);
         tm->op = MOD_LOG_ROOT_REPLACE;
  
-       return __tree_mod_log_insert(fs_info, tm);
+       if (tree_mod_dont_log(fs_info, NULL))
+               goto free_tms;
+
+       if (tm_list)
+               ret = __tree_mod_log_free_eb(fs_info, tm_list, nritems);
+       if (!ret)
+               ret = __tree_mod_log_insert(fs_info, tm);
+
+       tree_mod_log_write_unlock(fs_info);
+       if (ret)
+               goto free_tms;
+       kfree(tm_list);
+
+       return ret;
+
+free_tms:
+       if (tm_list) {
+               for (i = 0; i < nritems; i++)
+                       kfree(tm_list[i]);
+               kfree(tm_list);
+       }
+       kfree(tm);
+
+       return ret;
  }
  
  static struct tree_mod_elem *
@@ -729,31 +833,75 @@ tree_mod_log_search(struct btrfs_fs_info *fs_info, u64 start, u64 min_seq)
         return __tree_mod_log_search(fs_info, start, min_seq, 0);
  }
  
-static noinline void
+static noinline int
  tree_mod_log_eb_copy(struct btrfs_fs_info *fs_info, struct extent_buffer *dst,
                      struct extent_buffer *src, unsigned long dst_offset,
                      unsigned long src_offset, int nr_items)
  {
-       int ret;
+       int ret = 0;
+       struct tree_mod_elem **tm_list = NULL;
+       struct tree_mod_elem **tm_list_add, **tm_list_rem;
         int i;
+       int locked = 0;
  
-       if (tree_mod_dont_log(fs_info, NULL))
-               return;
+       if (!tree_mod_need_log(fs_info, NULL))
+               return 0;
  
         if (btrfs_header_level(dst) == 0 && btrfs_header_level(src) == 0)
-               return;
+               return 0;
+
+       tm_list = kzalloc(nr_items * 2 * sizeof(struct tree_mod_elem *),
+                         GFP_NOFS);
+       if (!tm_list)
+               return -ENOMEM;
  
+       tm_list_add = tm_list;
+       tm_list_rem = tm_list + nr_items;
         for (i = 0; i < nr_items; i++) {
-               ret = __tree_mod_log_insert_key(fs_info, src,
-                                               i + src_offset,
-                                               MOD_LOG_KEY_REMOVE, GFP_NOFS);
-               BUG_ON(ret < 0);
-               ret = __tree_mod_log_insert_key(fs_info, dst,
-                                                    i + dst_offset,
-                                                    MOD_LOG_KEY_ADD,
-                                                    GFP_NOFS);
-               BUG_ON(ret < 0);
+               tm_list_rem[i] = alloc_tree_mod_elem(src, i + src_offset,
+                   MOD_LOG_KEY_REMOVE, GFP_NOFS);
+               if (!tm_list_rem[i]) {
+                       ret = -ENOMEM;
+                       goto free_tms;
+               }
+
+               tm_list_add[i] = alloc_tree_mod_elem(dst, i + dst_offset,
+                   MOD_LOG_KEY_ADD, GFP_NOFS);
+               if (!tm_list_add[i]) {
+                       ret = -ENOMEM;
+                       goto free_tms;
+               }
         }
+
+       if (tree_mod_dont_log(fs_info, NULL))
+               goto free_tms;
+       locked = 1;
+
+       for (i = 0; i < nr_items; i++) {
+               ret = __tree_mod_log_insert(fs_info, tm_list_rem[i]);
+               if (ret)
+                       goto free_tms;
+               ret = __tree_mod_log_insert(fs_info, tm_list_add[i]);
+               if (ret)
+                       goto free_tms;
+       }
+
+       tree_mod_log_write_unlock(fs_info);
+       kfree(tm_list);
+
+       return 0;
+
+free_tms:
+       for (i = 0; i < nr_items * 2; i++) {
+               if (tm_list[i] && !RB_EMPTY_NODE(&tm_list[i]->node))
+                       rb_erase(&tm_list[i]->node, &fs_info->tree_mod_log);
+               kfree(tm_list[i]);
+       }
+       if (locked)
+               tree_mod_log_write_unlock(fs_info);
+       kfree(tm_list);
+
+       return ret;
  }
  
  static inline void
@@ -772,18 +920,58 @@ tree_mod_log_set_node_key(struct btrfs_fs_info *fs_info,
  {
         int ret;
  
-       ret = __tree_mod_log_insert_key(fs_info, eb, slot,
+       ret = tree_mod_log_insert_key(fs_info, eb, slot,
                                         MOD_LOG_KEY_REPLACE,
                                         atomic ? GFP_ATOMIC : GFP_NOFS);
         BUG_ON(ret < 0);
  }
  
-static noinline void
+static noinline int
  tree_mod_log_free_eb(struct btrfs_fs_info *fs_info, struct extent_buffer *eb)
  {
+       struct tree_mod_elem **tm_list = NULL;
+       int nritems = 0;
+       int i;
+       int ret = 0;
+
+       if (btrfs_header_level(eb) == 0)
+               return 0;
+
+       if (!tree_mod_need_log(fs_info, NULL))
+               return 0;
+
+       nritems = btrfs_header_nritems(eb);
+       tm_list = kzalloc(nritems * sizeof(struct tree_mod_elem *),
+                         GFP_NOFS);
+       if (!tm_list)
+               return -ENOMEM;
+
+       for (i = 0; i < nritems; i++) {
+               tm_list[i] = alloc_tree_mod_elem(eb, i,
+                   MOD_LOG_KEY_REMOVE_WHILE_FREEING, GFP_NOFS);
+               if (!tm_list[i]) {
+                       ret = -ENOMEM;
+                       goto free_tms;
+               }
+       }
+
         if (tree_mod_dont_log(fs_info, eb))
-               return;
-       __tree_mod_log_free_eb(fs_info, eb);
+               goto free_tms;
+
+       ret = __tree_mod_log_free_eb(fs_info, tm_list, nritems);
+       tree_mod_log_write_unlock(fs_info);
+       if (ret)
+               goto free_tms;
+       kfree(tm_list);
+
+       return 0;
+
+free_tms:
+       for (i = 0; i < nritems; i++)
+               kfree(tm_list[i]);
+       kfree(tm_list);
+
+       return ret;
  }
  
  static noinline void
@@ -1041,8 +1229,13 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
                 btrfs_set_node_ptr_generation(parent, parent_slot,
                                               trans->transid);
                 btrfs_mark_buffer_dirty(parent);
-               if (last_ref)
-                       tree_mod_log_free_eb(root->fs_info, buf);
+               if (last_ref) {
+                       ret = tree_mod_log_free_eb(root->fs_info, buf);
+                       if (ret) {
+                               btrfs_abort_transaction(trans, root, ret);
+                               return ret;
+                       }
+               }
                 btrfs_free_tree_block(trans, root, buf, parent_start,
                                       last_ref);
         }
@@ -1287,8 +1480,8 @@ get_old_root(struct btrfs_root *root, u64 time_seq)
                 old = read_tree_block(root, logical, blocksize, 0);
                 if (WARN_ON(!old || !extent_buffer_uptodate(old))) {
                         free_extent_buffer(old);
-                       pr_warn("btrfs: failed to read tree block %llu from get_old_root\n",
-                               logical);
+                       btrfs_warn(root->fs_info,
+                               "failed to read tree block %llu from get_old_root", logical);
                 } else {
                         eb = btrfs_clone_extent_buffer(old);
                         free_extent_buffer(old);
@@ -2462,6 +2655,49 @@ static int key_search(struct extent_buffer *b, struct btrfs_key *key,
         return 0;
  }
  
+int btrfs_find_item(struct btrfs_root *fs_root, struct btrfs_path *found_path,
+               u64 iobjectid, u64 ioff, u8 key_type,
+               struct btrfs_key *found_key)
+{
+       int ret;
+       struct btrfs_key key;
+       struct extent_buffer *eb;
+       struct btrfs_path *path;
+
+       key.type = key_type;
+       key.objectid = iobjectid;
+       key.offset = ioff;
+
+       if (found_path == NULL) {
+               path = btrfs_alloc_path();
+               if (!path)
+                       return -ENOMEM;
+       } else
+               path = found_path;
+
+       ret = btrfs_search_slot(NULL, fs_root, &key, path, 0, 0);
+       if ((ret < 0) || (found_key == NULL)) {
+               if (path != found_path)
+                       btrfs_free_path(path);
+               return ret;
+       }
+
+       eb = path->nodes[0];
+       if (ret && path->slots[0] >= btrfs_header_nritems(eb)) {
+               ret = btrfs_next_leaf(fs_root, path);
+               if (ret)
+                       return ret;
+               eb = path->nodes[0];
+       }
+
+       btrfs_item_key_to_cpu(eb, found_key, path->slots[0]);
+       if (found_key->type != key.type ||
+                       found_key->objectid != key.objectid)
+               return 1;
+
+       return 0;
+}
+
  /*
   * look for key in the tree.  path is filled in with nodes along the way
   * if key is found, we return zero and you can find the item in the leaf
@@ -2495,6 +2731,7 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root
         lowest_level = p->lowest_level;
         WARN_ON(lowest_level && ins_len > 0);
         WARN_ON(p->nodes[0] != NULL);
+       BUG_ON(!cow && ins_len);
  
         if (ins_len < 0) {
                 lowest_unlock = 2;
@@ -2603,8 +2840,6 @@ again:
                         }
                 }
  cow_done:
-               BUG_ON(!cow && ins_len);
-
                 p->nodes[level] = b;
                 btrfs_clear_path_blocking(p, NULL, 0);
  
@@ -2614,13 +2849,19 @@ cow_done:
                  * It is safe to drop the lock on our parent before we
                  * go through the expensive btree search on b.
                  *
-                * If cow is true, then we might be changing slot zero,
-                * which may require changing the parent.  So, we can't
-                * drop the lock until after we know which slot we're
-                * operating on.
+                * If we're inserting or deleting (ins_len != 0), then we might
+                * be changing slot zero, which may require changing the parent.
+                * So, we can't drop the lock until after we know which slot
+                * we're operating on.
                  */
-               if (!cow)
-                       btrfs_unlock_up_safe(p, level + 1);
+               if (!ins_len && !p->keep_locks) {
+                       int u = level + 1;
+
+                       if (u < BTRFS_MAX_LEVEL && p->locks[u]) {
+                               btrfs_tree_unlock_rw(p->nodes[u], p->locks[u]);
+                               p->locks[u] = 0;
+                       }
+               }
  
                 ret = key_search(b, key, level, &prev_cmp, &slot);
  
@@ -2648,7 +2889,7 @@ cow_done:
                          * which means we must have a write lock
                          * on the parent
                          */
-                       if (slot == 0 && cow &&
+                       if (slot == 0 && ins_len &&
                             write_lock_level < level + 1) {
                                 write_lock_level = level + 1;
                                 btrfs_release_path(p);
@@ -2901,7 +3142,9 @@ again:
                         if (ret < 0)
                                 return ret;
                         if (!ret) {
-                               p->slots[0] = btrfs_header_nritems(leaf) - 1;
+                               leaf = p->nodes[0];
+                               if (p->slots[0] == btrfs_header_nritems(leaf))
+                                       p->slots[0]--;
                                 return 0;
                         }
                         if (!return_any)
@@ -3022,8 +3265,12 @@ static int push_node_left(struct btrfs_trans_handle *trans,
         } else
                 push_items = min(src_nritems - 8, push_items);
  
-       tree_mod_log_eb_copy(root->fs_info, dst, src, dst_nritems, 0,
-                            push_items);
+       ret = tree_mod_log_eb_copy(root->fs_info, dst, src, dst_nritems, 0,
+                                  push_items);
+       if (ret) {
+               btrfs_abort_transaction(trans, root, ret);
+               return ret;
+       }
         copy_extent_buffer(dst, src,
                            btrfs_node_key_ptr_offset(dst_nritems),
                            btrfs_node_key_ptr_offset(0),
@@ -3093,8 +3340,12 @@ static int balance_node_right(struct btrfs_trans_handle *trans,
                                       (dst_nritems) *
                                       sizeof(struct btrfs_key_ptr));
  
-       tree_mod_log_eb_copy(root->fs_info, dst, src, 0,
-                            src_nritems - push_items, push_items);
+       ret = tree_mod_log_eb_copy(root->fs_info, dst, src, 0,
+                                  src_nritems - push_items, push_items);
+       if (ret) {
+               btrfs_abort_transaction(trans, root, ret);
+               return ret;
+       }
         copy_extent_buffer(dst, src,
                            btrfs_node_key_ptr_offset(0),
                            btrfs_node_key_ptr_offset(src_nritems - push_items),
@@ -3295,7 +3546,12 @@ static noinline int split_node(struct btrfs_trans_handle *trans,
                             btrfs_header_chunk_tree_uuid(split),
                             BTRFS_UUID_SIZE);
  
-       tree_mod_log_eb_copy(root->fs_info, split, c, 0, mid, c_nritems - mid);
+       ret = tree_mod_log_eb_copy(root->fs_info, split, c, 0,
+                                  mid, c_nritems - mid);
+       if (ret) {
+               btrfs_abort_transaction(trans, root, ret);
+               return ret;
+       }
         copy_extent_buffer(split, c,
                            btrfs_node_key_ptr_offset(0),
                            btrfs_node_key_ptr_offset(mid),
@@ -3362,8 +3618,8 @@ noinline int btrfs_leaf_free_space(struct btrfs_root *root,
         int ret;
         ret = BTRFS_LEAF_DATA_SIZE(root) - leaf_space_used(leaf, 0, nritems);
         if (ret < 0) {
-               printk(KERN_CRIT "leaf free space ret %d, leaf data size %lu, "
-                      "used %d nritems %d\n",
+               btrfs_crit(root->fs_info,
+                       "leaf free space ret %d, leaf data size %lu, used %d nritems %d",
                        ret, (unsigned long) BTRFS_LEAF_DATA_SIZE(root),
                        leaf_space_used(leaf, 0, nritems), nritems);
         }
@@ -3571,6 +3827,19 @@ static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root
         if (left_nritems == 0)
                 goto out_unlock;
  
+       if (path->slots[0] == left_nritems && !empty) {
+               /* Key greater than all keys in the leaf, right neighbor has
+                * enough room for it and we're not emptying our leaf to delete
+                * it, therefore use right neighbor to insert the new item and
+                * no need to touch/dirty our left leaft. */
+               btrfs_tree_unlock(left);
+               free_extent_buffer(left);
+               path->nodes[0] = right;
+               path->slots[0] = 0;
+               path->slots[1]++;
+               return 0;
+       }
+
         return __push_leaf_right(trans, root, path, min_data_size, empty,
                                 right, free_space, left_nritems, min_slot);
  out_unlock:
@@ -3887,14 +4156,17 @@ static noinline int push_for_double_split(struct btrfs_trans_handle *trans,
         int progress = 0;
         int slot;
         u32 nritems;
+       int space_needed = data_size;
  
         slot = path->slots[0];
+       if (slot < btrfs_header_nritems(path->nodes[0]))
+               space_needed -= btrfs_leaf_free_space(root, path->nodes[0]);
  
         /*
          * try to push all the items after our slot into the
          * right leaf
          */
-       ret = push_leaf_right(trans, root, path, 1, data_size, 0, slot);
+       ret = push_leaf_right(trans, root, path, 1, space_needed, 0, slot);
         if (ret < 0)
                 return ret;
  
@@ -3914,7 +4186,7 @@ static noinline int push_for_double_split(struct btrfs_trans_handle *trans,
  
         /* try to push all the items before our slot into the next leaf */
         slot = path->slots[0];
-       ret = push_leaf_left(trans, root, path, 1, data_size, 0, slot);
+       ret = push_leaf_left(trans, root, path, 1, space_needed, 0, slot);
         if (ret < 0)
                 return ret;
  
@@ -3958,13 +4230,18 @@ static noinline int split_leaf(struct btrfs_trans_handle *trans,
  
         /* first try to make some room by pushing left and right */
         if (data_size && path->nodes[1]) {
-               wret = push_leaf_right(trans, root, path, data_size,
-                                      data_size, 0, 0);
+               int space_needed = data_size;
+
+               if (slot < btrfs_header_nritems(l))
+                       space_needed -= btrfs_leaf_free_space(root, l);
+
+               wret = push_leaf_right(trans, root, path, space_needed,
+                                      space_needed, 0, 0);
                 if (wret < 0)
                         return wret;
                 if (wret) {
-                       wret = push_leaf_left(trans, root, path, data_size,
-                                             data_size, 0, (u32)-1);
+                       wret = push_leaf_left(trans, root, path, space_needed,
+                                             space_needed, 0, (u32)-1);
                         if (wret < 0)
                                 return wret;
                 }
@@ -4432,7 +4709,7 @@ void btrfs_extend_item(struct btrfs_root *root, struct btrfs_path *path,
         BUG_ON(slot < 0);
         if (slot >= nritems) {
                 btrfs_print_leaf(root, leaf);
-               printk(KERN_CRIT "slot %d too large, nritems %d\n",
+               btrfs_crit(root->fs_info, "slot %d too large, nritems %d",
                        slot, nritems);
                 BUG_ON(1);
         }
@@ -4495,7 +4772,7 @@ void setup_items_for_insert(struct btrfs_root *root, struct btrfs_path *path,
  
         if (btrfs_leaf_free_space(root, leaf) < total_size) {
                 btrfs_print_leaf(root, leaf);
-               printk(KERN_CRIT "not enough freespace need %u have %d\n",
+               btrfs_crit(root->fs_info, "not enough freespace need %u have %d",
                        total_size, btrfs_leaf_free_space(root, leaf));
                 BUG();
         }
@@ -4505,7 +4782,7 @@ void setup_items_for_insert(struct btrfs_root *root, struct btrfs_path *path,
  
                 if (old_data < data_end) {
                         btrfs_print_leaf(root, leaf);
-                       printk(KERN_CRIT "slot %d old_data %d data_end %d\n",
+                       btrfs_crit(root->fs_info, "slot %d old_data %d data_end %d",
                                slot, old_data, data_end);
                         BUG_ON(1);
                 }
@@ -4817,7 +5094,7 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
   * This may release the path, and so you may lose any locks held at the
   * time you call it.
   */
-static int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path)
+int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path)
  {
         struct btrfs_key key;
         struct btrfs_disk_key found_key;
@@ -5240,7 +5517,7 @@ int btrfs_compare_trees(struct btrfs_root *left_root,
  
                         if (!left_start_ctransid || !right_start_ctransid) {
                                 WARN(1, KERN_WARNING
-                                       "btrfs: btrfs_compare_tree detected "
+                                       "BTRFS: btrfs_compare_tree detected "
                                         "a change in one of the trees while "
                                         "iterating. This is probably a "
                                         "bug.\n");
@@ -5680,3 +5957,46 @@ int btrfs_previous_item(struct btrfs_root *root,
         }
         return 1;
  }
+
+/*
+ * search in extent tree to find a previous Metadata/Data extent item with
+ * min objecitd.
+ *
+ * returns 0 if something is found, 1 if nothing was found and < 0 on error
+ */
+int btrfs_previous_extent_item(struct btrfs_root *root,
+                       struct btrfs_path *path, u64 min_objectid)
+{
+       struct btrfs_key found_key;
+       struct extent_buffer *leaf;
+       u32 nritems;
+       int ret;
+
+       while (1) {
+               if (path->slots[0] == 0) {
+                       btrfs_set_path_blocking(path);
+                       ret = btrfs_prev_leaf(root, path);
+                       if (ret != 0)
+                               return ret;
+               } else {
+                       path->slots[0]--;
+               }
+               leaf = path->nodes[0];
+               nritems = btrfs_header_nritems(leaf);
+               if (nritems == 0)
+                       return 1;
+               if (path->slots[0] == nritems)
+                       path->slots[0]--;
+
+               btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
+               if (found_key.objectid < min_objectid)
+                       break;
+               if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
+                   found_key.type == BTRFS_METADATA_ITEM_KEY)
+                       return 0;
+               if (found_key.objectid == min_objectid &&
+                   found_key.type < BTRFS_EXTENT_ITEM_KEY)
+                       break;
+       }
+       return 1;
+}
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h

index 7506825211a29d44772cdcd93732875258281daf..2c1a42ca519f43a8dd85ce95a24fc6ed0a22d07d 100644 (file)
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -521,9 +521,15 @@ struct btrfs_super_block {
  #define BTRFS_FEATURE_INCOMPAT_EXTENDED_IREF   (1ULL << 6)
  #define BTRFS_FEATURE_INCOMPAT_RAID56          (1ULL << 7)
  #define BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA (1ULL << 8)
+#define BTRFS_FEATURE_INCOMPAT_NO_HOLES                (1ULL << 9)
  
  #define BTRFS_FEATURE_COMPAT_SUPP              0ULL
+#define BTRFS_FEATURE_COMPAT_SAFE_SET          0ULL
+#define BTRFS_FEATURE_COMPAT_SAFE_CLEAR                0ULL
  #define BTRFS_FEATURE_COMPAT_RO_SUPP           0ULL
+#define BTRFS_FEATURE_COMPAT_RO_SAFE_SET       0ULL
+#define BTRFS_FEATURE_COMPAT_RO_SAFE_CLEAR     0ULL
+
  #define BTRFS_FEATURE_INCOMPAT_SUPP                    \
         (BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF |         \
          BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL |        \
@@ -532,7 +538,12 @@ struct btrfs_super_block {
          BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO |          \
          BTRFS_FEATURE_INCOMPAT_RAID56 |                \
          BTRFS_FEATURE_INCOMPAT_EXTENDED_IREF |         \
-        BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA)
+        BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA |       \
+        BTRFS_FEATURE_INCOMPAT_NO_HOLES)
+
+#define BTRFS_FEATURE_INCOMPAT_SAFE_SET                        \
+       (BTRFS_FEATURE_INCOMPAT_EXTENDED_IREF)
+#define BTRFS_FEATURE_INCOMPAT_SAFE_CLEAR              0ULL
  
  /*
   * A leaf is full of items. offset and size tell us where to find
@@ -1094,7 +1105,7 @@ struct btrfs_qgroup_limit_item {
  } __attribute__ ((__packed__));
  
  struct btrfs_space_info {
-       u64 flags;
+       spinlock_t lock;
  
         u64 total_bytes;        /* total bytes in the space,
                                    this doesn't take mirrors into account */
@@ -1104,14 +1115,25 @@ struct btrfs_space_info {
                                    transaction finishes */
         u64 bytes_reserved;     /* total bytes the allocator has reserved for
                                    current allocations */
-       u64 bytes_readonly;     /* total bytes that are read only */
-
         u64 bytes_may_use;      /* number of bytes that may be used for
                                    delalloc/allocations */
+       u64 bytes_readonly;     /* total bytes that are read only */
+
+       unsigned int full:1;    /* indicates that we cannot allocate any more
+                                  chunks for this space */
+       unsigned int chunk_alloc:1;     /* set if we are allocating a chunk */
+
+       unsigned int flush:1;           /* set if we are trying to make space */
+
+       unsigned int force_alloc;       /* set if we need to force a chunk
+                                          alloc for this space */
+
         u64 disk_used;          /* total bytes used on disk */
         u64 disk_total;         /* total bytes on disk, takes mirrors into
                                    account */
  
+       u64 flags;
+
         /*
          * bytes_pinned is kept in line with what is actually pinned, as in
          * we've called update_block_group and dropped the bytes_used counter
@@ -1124,22 +1146,15 @@ struct btrfs_space_info {
          */
         struct percpu_counter total_bytes_pinned;
  
-       unsigned int full:1;    /* indicates that we cannot allocate any more
-                                  chunks for this space */
-       unsigned int chunk_alloc:1;     /* set if we are allocating a chunk */
-
-       unsigned int flush:1;           /* set if we are trying to make space */
-
-       unsigned int force_alloc;       /* set if we need to force a chunk
-                                          alloc for this space */
-
         struct list_head list;
  
+       struct rw_semaphore groups_sem;
         /* for block groups in our same type */
         struct list_head block_groups[BTRFS_NR_RAID_TYPES];
-       spinlock_t lock;
-       struct rw_semaphore groups_sem;
         wait_queue_head_t wait;
+
+       struct kobject kobj;
+       struct kobject block_group_kobjs[BTRFS_NR_RAID_TYPES];
  };
  
  #define        BTRFS_BLOCK_RSV_GLOBAL          1
@@ -1346,6 +1361,7 @@ struct btrfs_fs_info {
  
         u64 generation;
         u64 last_trans_committed;
+       u64 avg_delayed_ref_runtime;
  
         /*
          * this is updated to the current trans every time a full commit
@@ -1448,7 +1464,6 @@ struct btrfs_fs_info {
         spinlock_t tree_mod_seq_lock;
         atomic64_t tree_mod_seq;
         struct list_head tree_mod_seq_list;
-       struct seq_list tree_mod_seq_elem;
  
         /* this protects tree_mod_log */
         rwlock_t tree_mod_log_lock;
@@ -1515,6 +1530,8 @@ struct btrfs_fs_info {
         int thread_pool_size;
  
         struct kobject super_kobj;
+       struct kobject *space_info_kobj;
+       struct kobject *device_dir_kobj;
         struct completion kobj_unregister;
         int do_barriers;
         int closing;
@@ -1643,6 +1660,10 @@ struct btrfs_fs_info {
         spinlock_t reada_lock;
         struct radix_tree_root reada_tree;
  
+       /* Extent buffer radix tree */
+       spinlock_t buffer_lock;
+       struct radix_tree_root buffer_radix;
+
         /* next backup root to be overwritten */
         int backup_root_index;
  
@@ -1795,6 +1816,12 @@ struct btrfs_root {
         struct list_head ordered_extents;
         struct list_head ordered_root;
         u64 nr_ordered_extents;
+
+       /*
+        * Number of currently running SEND ioctls to prevent
+        * manipulation with the read-only status via SUBVOL_SETFLAGS
+        */
+       int send_in_progress;
  };
  
  struct btrfs_ioctl_defrag_range_args {
@@ -1997,6 +2024,7 @@ struct btrfs_ioctl_defrag_range_args {
  #define BTRFS_MOUNT_CHECK_INTEGRITY_INCLUDING_EXTENT_DATA (1 << 21)
  #define BTRFS_MOUNT_PANIC_ON_FATAL_ERROR       (1 << 22)
  #define BTRFS_MOUNT_RESCAN_UUID_TREE   (1 << 23)
+#define        BTRFS_MOUNT_CHANGE_INODE_CACHE  (1 << 24)
  
  #define BTRFS_DEFAULT_COMMIT_INTERVAL  (30)
  
@@ -2925,6 +2953,10 @@ BTRFS_SETGET_STACK_FUNCS(stack_file_extent_generation,
                          struct btrfs_file_extent_item, generation, 64);
  BTRFS_SETGET_STACK_FUNCS(stack_file_extent_num_bytes,
                          struct btrfs_file_extent_item, num_bytes, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_file_extent_disk_num_bytes,
+                        struct btrfs_file_extent_item, disk_num_bytes, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_file_extent_compression,
+                        struct btrfs_file_extent_item, compression, 8);
  
  static inline unsigned long
  btrfs_file_extent_inline_start(struct btrfs_file_extent_item *e)
@@ -2958,15 +2990,6 @@ BTRFS_SETGET_FUNCS(file_extent_encryption, struct btrfs_file_extent_item,
  BTRFS_SETGET_FUNCS(file_extent_other_encoding, struct btrfs_file_extent_item,
                    other_encoding, 16);
  
-/* this returns the number of file bytes represented by the inline item.
- * If an item is compressed, this is the uncompressed size
- */
-static inline u32 btrfs_file_extent_inline_len(struct extent_buffer *eb,
-                                              struct btrfs_file_extent_item *e)
-{
-       return btrfs_file_extent_ram_bytes(eb, e);
-}
-
  /*
   * this returns the number of bytes used by the item on disk, minus the
   * size of any extent headers.  If a file is compressed on disk, this is
@@ -2980,6 +3003,32 @@ static inline u32 btrfs_file_extent_inline_item_len(struct extent_buffer *eb,
         return btrfs_item_size(eb, e) - offset;
  }
  
+/* this returns the number of file bytes represented by the inline item.
+ * If an item is compressed, this is the uncompressed size
+ */
+static inline u32 btrfs_file_extent_inline_len(struct extent_buffer *eb,
+                                              int slot,
+                                              struct btrfs_file_extent_item *fi)
+{
+       struct btrfs_map_token token;
+
+       btrfs_init_map_token(&token);
+       /*
+        * return the space used on disk if this item isn't
+        * compressed or encoded
+        */
+       if (btrfs_token_file_extent_compression(eb, fi, &token) == 0 &&
+           btrfs_token_file_extent_encryption(eb, fi, &token) == 0 &&
+           btrfs_token_file_extent_other_encoding(eb, fi, &token) == 0) {
+               return btrfs_file_extent_inline_item_len(eb,
+                                                        btrfs_item_nr(slot));
+       }
+
+       /* otherwise use the ram bytes field */
+       return btrfs_token_file_extent_ram_bytes(eb, fi, &token);
+}
+
+
  /* btrfs_dev_stats_item */
  static inline u64 btrfs_dev_stats_value(struct extent_buffer *eb,
                                         struct btrfs_dev_stats_item *ptr,
@@ -3143,6 +3192,8 @@ static inline u64 btrfs_calc_trunc_metadata_size(struct btrfs_root *root,
  
  int btrfs_should_throttle_delayed_refs(struct btrfs_trans_handle *trans,
                                        struct btrfs_root *root);
+int btrfs_check_space_for_delayed_refs(struct btrfs_trans_handle *trans,
+                                      struct btrfs_root *root);
  void btrfs_put_block_group(struct btrfs_block_group_cache *cache);
  int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
                            struct btrfs_root *root, unsigned long count);
@@ -3163,6 +3214,7 @@ struct btrfs_block_group_cache *btrfs_lookup_block_group(
                                                  struct btrfs_fs_info *info,
                                                  u64 bytenr);
  void btrfs_put_block_group(struct btrfs_block_group_cache *cache);
+int get_block_group_index(struct btrfs_block_group_cache *cache);
  struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
                                         struct btrfs_root *root, u32 blocksize,
                                         u64 parent, u64 root_objectid,
@@ -3301,6 +3353,8 @@ int btrfs_comp_cpu_keys(struct btrfs_key *k1, struct btrfs_key *k2);
  int btrfs_previous_item(struct btrfs_root *root,
                         struct btrfs_path *path, u64 min_objectid,
                         int type);
+int btrfs_previous_extent_item(struct btrfs_root *root,
+                       struct btrfs_path *path, u64 min_objectid);
  void btrfs_set_item_key_safe(struct btrfs_root *root, struct btrfs_path *path,
                              struct btrfs_key *new_key);
  struct extent_buffer *btrfs_root_node(struct btrfs_root *root);
@@ -3350,6 +3404,8 @@ int btrfs_duplicate_item(struct btrfs_trans_handle *trans,
                          struct btrfs_root *root,
                          struct btrfs_path *path,
                          struct btrfs_key *new_key);
+int btrfs_find_item(struct btrfs_root *fs_root, struct btrfs_path *path,
+               u64 inum, u64 ioff, u8 key_type, struct btrfs_key *found_key);
  int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root
                       *root, struct btrfs_key *key, struct btrfs_path *p, int
                       ins_len, int cow);
@@ -3399,6 +3455,7 @@ static inline int btrfs_insert_empty_item(struct btrfs_trans_handle *trans,
  }
  
  int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path);
+int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path);
  int btrfs_next_old_leaf(struct btrfs_root *root, struct btrfs_path *path,
                         u64 time_seq);
  static inline int btrfs_next_old_item(struct btrfs_root *root,
@@ -3563,12 +3620,6 @@ int btrfs_del_inode_ref(struct btrfs_trans_handle *trans,
                            struct btrfs_root *root,
                            const char *name, int name_len,
                            u64 inode_objectid, u64 ref_objectid, u64 *index);
-int btrfs_get_inode_ref_index(struct btrfs_trans_handle *trans,
-                             struct btrfs_root *root,
-                             struct btrfs_path *path,
-                             const char *name, int name_len,
-                             u64 inode_objectid, u64 ref_objectid, int mod,
-                             u64 *ret_index);
  int btrfs_insert_empty_inode(struct btrfs_trans_handle *trans,
                              struct btrfs_root *root,
                              struct btrfs_path *path, u64 objectid);
@@ -3676,7 +3727,9 @@ int btrfs_start_delalloc_roots(struct btrfs_fs_info *fs_info, int delay_iput);
  int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end,
                               struct extent_state **cached_state);
  int btrfs_create_subvol_root(struct btrfs_trans_handle *trans,
-                            struct btrfs_root *new_root, u64 new_dirid);
+                            struct btrfs_root *new_root,
+                            struct btrfs_root *parent_root,
+                            u64 new_dirid);
  int btrfs_merge_bio_hook(int rw, struct page *page, unsigned long offset,
                          size_t size, struct bio *bio,
                          unsigned long bio_flags);
@@ -3745,7 +3798,10 @@ extern const struct file_operations btrfs_file_operations;
  int __btrfs_drop_extents(struct btrfs_trans_handle *trans,
                          struct btrfs_root *root, struct inode *inode,
                          struct btrfs_path *path, u64 start, u64 end,
-                        u64 *drop_end, int drop_cache);
+                        u64 *drop_end, int drop_cache,
+                        int replace_extent,
+                        u32 extent_item_size,
+                        int *key_inserted);
  int btrfs_drop_extents(struct btrfs_trans_handle *trans,
                        struct btrfs_root *root, struct inode *inode, u64 start,
                        u64 end, int drop_cache);
@@ -3764,6 +3820,8 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans,
  /* sysfs.c */
  int btrfs_init_sysfs(void);
  void btrfs_exit_sysfs(void);
+int btrfs_sysfs_add_one(struct btrfs_fs_info *fs_info);
+void btrfs_sysfs_remove_one(struct btrfs_fs_info *fs_info);
  
  /* xattr.c */
  ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size);
@@ -3796,14 +3854,20 @@ void btrfs_printk(const struct btrfs_fs_info *fs_info, const char *fmt, ...)
         btrfs_printk(fs_info, KERN_NOTICE fmt, ##args)
  #define btrfs_info(fs_info, fmt, args...) \
         btrfs_printk(fs_info, KERN_INFO fmt, ##args)
+
+#ifdef DEBUG
  #define btrfs_debug(fs_info, fmt, args...) \
         btrfs_printk(fs_info, KERN_DEBUG fmt, ##args)
+#else
+#define btrfs_debug(fs_info, fmt, args...) \
+    no_printk(KERN_DEBUG fmt, ##args)
+#endif
  
  #ifdef CONFIG_BTRFS_ASSERT
  
  static inline void assfail(char *expr, char *file, int line)
  {
-       printk(KERN_ERR "BTRFS assertion failed: %s, file: %s, line: %d",
+       pr_err("BTRFS: assertion failed: %s, file: %s, line: %d",
                expr, file, line);
         BUG();
  }
@@ -3841,7 +3905,7 @@ static inline void __btrfs_set_fs_incompat(struct btrfs_fs_info *fs_info,
                 if (!(features & flag)) {
                         features |= flag;
                         btrfs_set_super_incompat_flags(disk_super, features);
-                       printk(KERN_INFO "btrfs: setting %llu feature flag\n",
+                       btrfs_info(fs_info, "setting %llu feature flag",
                                          flag);
                 }
                 spin_unlock(&fs_info->super_lock);
diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c

index 8d292fbae659eff6a65bd7a8c255d741d998635e..451b00c86f6c0a038ed532f29855009abaa1d8a3 100644 (file)
--- a/fs/btrfs/delayed-inode.c
+++ b/fs/btrfs/delayed-inode.c
@@ -55,8 +55,7 @@ static inline void btrfs_init_delayed_node(
         delayed_node->inode_id = inode_id;
         atomic_set(&delayed_node->refs, 0);
         delayed_node->count = 0;
-       delayed_node->in_list = 0;
-       delayed_node->inode_dirty = 0;
+       delayed_node->flags = 0;
         delayed_node->ins_root = RB_ROOT;
         delayed_node->del_root = RB_ROOT;
         mutex_init(&delayed_node->mutex);
@@ -172,7 +171,7 @@ static void btrfs_queue_delayed_node(struct btrfs_delayed_root *root,
                                      int mod)
  {
         spin_lock(&root->lock);
-       if (node->in_list) {
+       if (test_bit(BTRFS_DELAYED_NODE_IN_LIST, &node->flags)) {
                 if (!list_empty(&node->p_list))
                         list_move_tail(&node->p_list, &root->prepare_list);
                 else if (mod)
@@ -182,7 +181,7 @@ static void btrfs_queue_delayed_node(struct btrfs_delayed_root *root,
                 list_add_tail(&node->p_list, &root->prepare_list);
                 atomic_inc(&node->refs);        /* inserted into list */
                 root->nodes++;
-               node->in_list = 1;
+               set_bit(BTRFS_DELAYED_NODE_IN_LIST, &node->flags);
         }
         spin_unlock(&root->lock);
  }
@@ -192,13 +191,13 @@ static void btrfs_dequeue_delayed_node(struct btrfs_delayed_root *root,
                                        struct btrfs_delayed_node *node)
  {
         spin_lock(&root->lock);
-       if (node->in_list) {
+       if (test_bit(BTRFS_DELAYED_NODE_IN_LIST, &node->flags)) {
                 root->nodes--;
                 atomic_dec(&node->refs);        /* not in the list */
                 list_del_init(&node->n_list);
                 if (!list_empty(&node->p_list))
                         list_del_init(&node->p_list);
-               node->in_list = 0;
+               clear_bit(BTRFS_DELAYED_NODE_IN_LIST, &node->flags);
         }
         spin_unlock(&root->lock);
  }
@@ -231,7 +230,8 @@ static struct btrfs_delayed_node *btrfs_next_delayed_node(
  
         delayed_root = node->root->fs_info->delayed_root;
         spin_lock(&delayed_root->lock);
-       if (!node->in_list) {   /* not in the list */
+       if (!test_bit(BTRFS_DELAYED_NODE_IN_LIST, &node->flags)) {
+               /* not in the list */
                 if (list_empty(&delayed_root->node_list))
                         goto out;
                 p = delayed_root->node_list.next;
@@ -1004,9 +1004,10 @@ static void btrfs_release_delayed_inode(struct btrfs_delayed_node *delayed_node)
  {
         struct btrfs_delayed_root *delayed_root;
  
-       if (delayed_node && delayed_node->inode_dirty) {
+       if (delayed_node &&
+           test_bit(BTRFS_DELAYED_NODE_INODE_DIRTY, &delayed_node->flags)) {
                 BUG_ON(!delayed_node->root);
-               delayed_node->inode_dirty = 0;
+               clear_bit(BTRFS_DELAYED_NODE_INODE_DIRTY, &delayed_node->flags);
                 delayed_node->count--;
  
                 delayed_root = delayed_node->root->fs_info->delayed_root;
@@ -1014,6 +1015,18 @@ static void btrfs_release_delayed_inode(struct btrfs_delayed_node *delayed_node)
         }
  }
  
+static void btrfs_release_delayed_iref(struct btrfs_delayed_node *delayed_node)
+{
+       struct btrfs_delayed_root *delayed_root;
+
+       ASSERT(delayed_node->root);
+       clear_bit(BTRFS_DELAYED_NODE_DEL_IREF, &delayed_node->flags);
+       delayed_node->count--;
+
+       delayed_root = delayed_node->root->fs_info->delayed_root;
+       finish_one_item(delayed_root);
+}
+
  static int __btrfs_update_delayed_inode(struct btrfs_trans_handle *trans,
                                         struct btrfs_root *root,
                                         struct btrfs_path *path,
@@ -1022,13 +1035,19 @@ static int __btrfs_update_delayed_inode(struct btrfs_trans_handle *trans,
         struct btrfs_key key;
         struct btrfs_inode_item *inode_item;
         struct extent_buffer *leaf;
+       int mod;
         int ret;
  
         key.objectid = node->inode_id;
         btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY);
         key.offset = 0;
  
-       ret = btrfs_lookup_inode(trans, root, path, &key, 1);
+       if (test_bit(BTRFS_DELAYED_NODE_DEL_IREF, &node->flags))
+               mod = -1;
+       else
+               mod = 1;
+
+       ret = btrfs_lookup_inode(trans, root, path, &key, mod);
         if (ret > 0) {
                 btrfs_release_path(path);
                 return -ENOENT;
@@ -1036,19 +1055,58 @@ static int __btrfs_update_delayed_inode(struct btrfs_trans_handle *trans,
                 return ret;
         }
  
-       btrfs_unlock_up_safe(path, 1);
         leaf = path->nodes[0];
         inode_item = btrfs_item_ptr(leaf, path->slots[0],
                                     struct btrfs_inode_item);
         write_extent_buffer(leaf, &node->inode_item, (unsigned long)inode_item,
                             sizeof(struct btrfs_inode_item));
         btrfs_mark_buffer_dirty(leaf);
-       btrfs_release_path(path);
  
+       if (!test_bit(BTRFS_DELAYED_NODE_DEL_IREF, &node->flags))
+               goto no_iref;
+
+       path->slots[0]++;
+       if (path->slots[0] >= btrfs_header_nritems(leaf))
+               goto search;
+again:
+       btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
+       if (key.objectid != node->inode_id)
+               goto out;
+
+       if (key.type != BTRFS_INODE_REF_KEY &&
+           key.type != BTRFS_INODE_EXTREF_KEY)
+               goto out;
+
+       /*
+        * Delayed iref deletion is for the inode who has only one link,
+        * so there is only one iref. The case that several irefs are
+        * in the same item doesn't exist.
+        */
+       btrfs_del_item(trans, root, path);
+out:
+       btrfs_release_delayed_iref(node);
+no_iref:
+       btrfs_release_path(path);
+err_out:
         btrfs_delayed_inode_release_metadata(root, node);
         btrfs_release_delayed_inode(node);
  
-       return 0;
+       return ret;
+
+search:
+       btrfs_release_path(path);
+
+       btrfs_set_key_type(&key, BTRFS_INODE_EXTREF_KEY);
+       key.offset = -1;
+       ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
+       if (ret < 0)
+               goto err_out;
+       ASSERT(ret);
+
+       ret = 0;
+       leaf = path->nodes[0];
+       path->slots[0]--;
+       goto again;
  }
  
  static inline int btrfs_update_delayed_inode(struct btrfs_trans_handle *trans,
@@ -1059,7 +1117,7 @@ static inline int btrfs_update_delayed_inode(struct btrfs_trans_handle *trans,
         int ret;
  
         mutex_lock(&node->mutex);
-       if (!node->inode_dirty) {
+       if (!test_bit(BTRFS_DELAYED_NODE_INODE_DIRTY, &node->flags)) {
                 mutex_unlock(&node->mutex);
                 return 0;
         }
@@ -1203,7 +1261,7 @@ int btrfs_commit_inode_delayed_inode(struct inode *inode)
                 return 0;
  
         mutex_lock(&delayed_node->mutex);
-       if (!delayed_node->inode_dirty) {
+       if (!test_bit(BTRFS_DELAYED_NODE_INODE_DIRTY, &delayed_node->flags)) {
                 mutex_unlock(&delayed_node->mutex);
                 btrfs_release_delayed_node(delayed_node);
                 return 0;
@@ -1227,7 +1285,7 @@ int btrfs_commit_inode_delayed_inode(struct inode *inode)
         trans->block_rsv = &delayed_node->root->fs_info->delayed_block_rsv;
  
         mutex_lock(&delayed_node->mutex);
-       if (delayed_node->inode_dirty)
+       if (test_bit(BTRFS_DELAYED_NODE_INODE_DIRTY, &delayed_node->flags))
                 ret = __btrfs_update_delayed_inode(trans, delayed_node->root,
                                                    path, delayed_node);
         else
@@ -1300,36 +1358,9 @@ again:
         trans->block_rsv = &root->fs_info->delayed_block_rsv;
  
         __btrfs_commit_inode_delayed_items(trans, path, delayed_node);
-       /*
-        * Maybe new delayed items have been inserted, so we need requeue
-        * the work. Besides that, we must dequeue the empty delayed nodes
-        * to avoid the race between delayed items balance and the worker.
-        * The race like this:
-        *      Task1                           Worker thread
-        *                                      count == 0, needn't requeue
-        *                                        also needn't insert the
-        *                                        delayed node into prepare
-        *                                        list again.
-        *      add lots of delayed items
-        *      queue the delayed node
-        *        already in the list,
-        *        and not in the prepare
-        *        list, it means the delayed
-        *        node is being dealt with
-        *        by the worker.
-        *      do delayed items balance
-        *        the delayed node is being
-        *        dealt with by the worker
-        *        now, just wait.
-        *                                      the worker goto idle.
-        * Task1 will sleep until the transaction is commited.
-        */
-       mutex_lock(&delayed_node->mutex);
-       btrfs_dequeue_delayed_node(root->fs_info->delayed_root, delayed_node);
-       mutex_unlock(&delayed_node->mutex);
  
         trans->block_rsv = block_rsv;
-       btrfs_end_transaction_dmeta(trans, root);
+       btrfs_end_transaction(trans, root);
         btrfs_btree_balance_dirty_nodelay(root);
  
  release_path:
@@ -1376,52 +1407,41 @@ void btrfs_assert_delayed_root_empty(struct btrfs_root *root)
         WARN_ON(btrfs_first_delayed_node(delayed_root));
  }
  
-static int refs_newer(struct btrfs_delayed_root *delayed_root,
-                     int seq, int count)
+static int could_end_wait(struct btrfs_delayed_root *delayed_root, int seq)
  {
         int val = atomic_read(&delayed_root->items_seq);
  
-       if (val < seq || val >= seq + count)
+       if (val < seq || val >= seq + BTRFS_DELAYED_BATCH)
+               return 1;
+
+       if (atomic_read(&delayed_root->items) < BTRFS_DELAYED_BACKGROUND)
                 return 1;
+
         return 0;
  }
  
  void btrfs_balance_delayed_items(struct btrfs_root *root)
  {
         struct btrfs_delayed_root *delayed_root;
-       int seq;
  
         delayed_root = btrfs_get_delayed_root(root);
  
         if (atomic_read(&delayed_root->items) < BTRFS_DELAYED_BACKGROUND)
                 return;
  
-       seq = atomic_read(&delayed_root->items_seq);
-
         if (atomic_read(&delayed_root->items) >= BTRFS_DELAYED_WRITEBACK) {
+               int seq;
                 int ret;
-               DEFINE_WAIT(__wait);
+
+               seq = atomic_read(&delayed_root->items_seq);
  
                 ret = btrfs_wq_run_delayed_node(delayed_root, root, 0);
                 if (ret)
                         return;
  
-               while (1) {
-                       prepare_to_wait(&delayed_root->wait, &__wait,
-                                       TASK_INTERRUPTIBLE);
-
-                       if (refs_newer(delayed_root, seq,
-                                      BTRFS_DELAYED_BATCH) ||
-                           atomic_read(&delayed_root->items) <
-                           BTRFS_DELAYED_BACKGROUND) {
-                               break;
-                       }
-                       if (!signal_pending(current))
-                               schedule();
-                       else
-                               break;
-               }
-               finish_wait(&delayed_root->wait, &__wait);
+               wait_event_interruptible(delayed_root->wait,
+                                        could_end_wait(delayed_root, seq));
+               return;
         }
  
         btrfs_wq_run_delayed_node(delayed_root, root, BTRFS_DELAYED_BATCH);
@@ -1472,9 +1492,9 @@ int btrfs_insert_delayed_dir_index(struct btrfs_trans_handle *trans,
         mutex_lock(&delayed_node->mutex);
         ret = __btrfs_add_delayed_insertion_item(delayed_node, delayed_item);
         if (unlikely(ret)) {
-               printk(KERN_ERR "err add delayed dir index item(name: %.*s) "
+               btrfs_err(root->fs_info, "err add delayed dir index item(name: %.*s) "
                                 "into the insertion tree of the delayed node"
-                               "(root id: %llu, inode id: %llu, errno: %d)\n",
+                               "(root id: %llu, inode id: %llu, errno: %d)",
                                 name_len, name, delayed_node->root->objectid,
                                 delayed_node->inode_id, ret);
                 BUG();
@@ -1544,9 +1564,9 @@ int btrfs_delete_delayed_dir_index(struct btrfs_trans_handle *trans,
         mutex_lock(&node->mutex);
         ret = __btrfs_add_delayed_deletion_item(node, item);
         if (unlikely(ret)) {
-               printk(KERN_ERR "err add delayed dir index item(index: %llu) "
+               btrfs_err(root->fs_info, "err add delayed dir index item(index: %llu) "
                                 "into the deletion tree of the delayed node"
-                               "(root id: %llu, inode id: %llu, errno: %d)\n",
+                               "(root id: %llu, inode id: %llu, errno: %d)",
                                 index, node->root->objectid, node->inode_id,
                                 ret);
                 BUG();
@@ -1759,7 +1779,7 @@ int btrfs_fill_inode(struct inode *inode, u32 *rdev)
                 return -ENOENT;
  
         mutex_lock(&delayed_node->mutex);
-       if (!delayed_node->inode_dirty) {
+       if (!test_bit(BTRFS_DELAYED_NODE_INODE_DIRTY, &delayed_node->flags)) {
                 mutex_unlock(&delayed_node->mutex);
                 btrfs_release_delayed_node(delayed_node);
                 return -ENOENT;
@@ -1810,7 +1830,7 @@ int btrfs_delayed_update_inode(struct btrfs_trans_handle *trans,
                 return PTR_ERR(delayed_node);
  
         mutex_lock(&delayed_node->mutex);
-       if (delayed_node->inode_dirty) {
+       if (test_bit(BTRFS_DELAYED_NODE_INODE_DIRTY, &delayed_node->flags)) {
                 fill_stack_inode_item(trans, &delayed_node->inode_item, inode);
                 goto release_node;
         }
@@ -1821,7 +1841,7 @@ int btrfs_delayed_update_inode(struct btrfs_trans_handle *trans,
                 goto release_node;
  
         fill_stack_inode_item(trans, &delayed_node->inode_item, inode);
-       delayed_node->inode_dirty = 1;
+       set_bit(BTRFS_DELAYED_NODE_INODE_DIRTY, &delayed_node->flags);
         delayed_node->count++;
         atomic_inc(&root->fs_info->delayed_root->items);
  release_node:
@@ -1830,6 +1850,41 @@ release_node:
         return ret;
  }
  
+int btrfs_delayed_delete_inode_ref(struct inode *inode)
+{
+       struct btrfs_delayed_node *delayed_node;
+
+       delayed_node = btrfs_get_or_create_delayed_node(inode);
+       if (IS_ERR(delayed_node))
+               return PTR_ERR(delayed_node);
+
+       /*
+        * We don't reserve space for inode ref deletion is because:
+        * - We ONLY do async inode ref deletion for the inode who has only
+        *   one link(i_nlink == 1), it means there is only one inode ref.
+        *   And in most case, the inode ref and the inode item are in the
+        *   same leaf, and we will deal with them at the same time.
+        *   Since we are sure we will reserve the space for the inode item,
+        *   it is unnecessary to reserve space for inode ref deletion.
+        * - If the inode ref and the inode item are not in the same leaf,
+        *   We also needn't worry about enospc problem, because we reserve
+        *   much more space for the inode update than it needs.
+        * - At the worst, we can steal some space from the global reservation.
+        *   It is very rare.
+        */
+       mutex_lock(&delayed_node->mutex);
+       if (test_bit(BTRFS_DELAYED_NODE_DEL_IREF, &delayed_node->flags))
+               goto release_node;
+
+       set_bit(BTRFS_DELAYED_NODE_DEL_IREF, &delayed_node->flags);
+       delayed_node->count++;
+       atomic_inc(&BTRFS_I(inode)->root->fs_info->delayed_root->items);
+release_node:
+       mutex_unlock(&delayed_node->mutex);
+       btrfs_release_delayed_node(delayed_node);
+       return 0;
+}
+
  static void __btrfs_kill_delayed_node(struct btrfs_delayed_node *delayed_node)
  {
         struct btrfs_root *root = delayed_node->root;
@@ -1852,7 +1907,10 @@ static void __btrfs_kill_delayed_node(struct btrfs_delayed_node *delayed_node)
                 btrfs_release_delayed_item(prev_item);
         }
  
-       if (delayed_node->inode_dirty) {
+       if (test_bit(BTRFS_DELAYED_NODE_DEL_IREF, &delayed_node->flags))
+               btrfs_release_delayed_iref(delayed_node);
+
+       if (test_bit(BTRFS_DELAYED_NODE_INODE_DIRTY, &delayed_node->flags)) {
                 btrfs_delayed_inode_release_metadata(root, delayed_node);
                 btrfs_release_delayed_inode(delayed_node);
         }
diff --git a/fs/btrfs/delayed-inode.h b/fs/btrfs/delayed-inode.h

index a4b38f934d1471c3518c6d1e8e6f887adfdb7a70..f70119f254216583f3c7317085ec209eceb03d0f 100644 (file)
--- a/fs/btrfs/delayed-inode.h
+++ b/fs/btrfs/delayed-inode.h
@@ -48,6 +48,10 @@ struct btrfs_delayed_root {
         wait_queue_head_t wait;
  };
  
+#define BTRFS_DELAYED_NODE_IN_LIST     0
+#define BTRFS_DELAYED_NODE_INODE_DIRTY 1
+#define BTRFS_DELAYED_NODE_DEL_IREF    2
+
  struct btrfs_delayed_node {
         u64 inode_id;
         u64 bytes_reserved;
@@ -65,8 +69,7 @@ struct btrfs_delayed_node {
         struct btrfs_inode_item inode_item;
         atomic_t refs;
         u64 index_cnt;
-       bool in_list;
-       bool inode_dirty;
+       unsigned long flags;
         int count;
  };
  
@@ -125,6 +128,7 @@ int btrfs_commit_inode_delayed_inode(struct inode *inode);
  int btrfs_delayed_update_inode(struct btrfs_trans_handle *trans,
                                struct btrfs_root *root, struct inode *inode);
  int btrfs_fill_inode(struct inode *inode, u32 *rdev);
+int btrfs_delayed_delete_inode_ref(struct inode *inode);
  
  /* Used for drop dead root */
  void btrfs_kill_all_delayed_nodes(struct btrfs_root *root);
diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c

index e4d467be2dd44d131977d64c3029af3fc9d99ce3..f3bff89eecf09346e2eb49b4ffeb861f35e33d4b 100644 (file)
--- a/fs/btrfs/delayed-ref.c
+++ b/fs/btrfs/delayed-ref.c
@@ -161,35 +161,61 @@ static struct btrfs_delayed_ref_node *tree_insert(struct rb_root *root,
         return NULL;
  }
  
+/* insert a new ref to head ref rbtree */
+static struct btrfs_delayed_ref_head *htree_insert(struct rb_root *root,
+                                                  struct rb_node *node)
+{
+       struct rb_node **p = &root->rb_node;
+       struct rb_node *parent_node = NULL;
+       struct btrfs_delayed_ref_head *entry;
+       struct btrfs_delayed_ref_head *ins;
+       u64 bytenr;
+
+       ins = rb_entry(node, struct btrfs_delayed_ref_head, href_node);
+       bytenr = ins->node.bytenr;
+       while (*p) {
+               parent_node = *p;
+               entry = rb_entry(parent_node, struct btrfs_delayed_ref_head,
+                                href_node);
+
+               if (bytenr < entry->node.bytenr)
+                       p = &(*p)->rb_left;
+               else if (bytenr > entry->node.bytenr)
+                       p = &(*p)->rb_right;
+               else
+                       return entry;
+       }
+
+       rb_link_node(node, parent_node, p);
+       rb_insert_color(node, root);
+       return NULL;
+}
+
  /*
   * find an head entry based on bytenr. This returns the delayed ref
   * head if it was able to find one, or NULL if nothing was in that spot.
   * If return_bigger is given, the next bigger entry is returned if no exact
   * match is found.
   */
-static struct btrfs_delayed_ref_node *find_ref_head(struct rb_root *root,
-                                 u64 bytenr,
-                                 struct btrfs_delayed_ref_node **last,
-                                 int return_bigger)
+static struct btrfs_delayed_ref_head *
+find_ref_head(struct rb_root *root, u64 bytenr,
+             struct btrfs_delayed_ref_head **last, int return_bigger)
  {
         struct rb_node *n;
-       struct btrfs_delayed_ref_node *entry;
+       struct btrfs_delayed_ref_head *entry;
         int cmp = 0;
  
  again:
         n = root->rb_node;
         entry = NULL;
         while (n) {
-               entry = rb_entry(n, struct btrfs_delayed_ref_node, rb_node);
-               WARN_ON(!entry->in_tree);
+               entry = rb_entry(n, struct btrfs_delayed_ref_head, href_node);
                 if (last)
                         *last = entry;
  
-               if (bytenr < entry->bytenr)
+               if (bytenr < entry->node.bytenr)
                         cmp = -1;
-               else if (bytenr > entry->bytenr)
-                       cmp = 1;
-               else if (!btrfs_delayed_ref_is_head(entry))
+               else if (bytenr > entry->node.bytenr)
                         cmp = 1;
                 else
                         cmp = 0;
@@ -203,12 +229,12 @@ again:
         }
         if (entry && return_bigger) {
                 if (cmp > 0) {
-                       n = rb_next(&entry->rb_node);
+                       n = rb_next(&entry->href_node);
                         if (!n)
                                 n = rb_first(root);
-                       entry = rb_entry(n, struct btrfs_delayed_ref_node,
-                                        rb_node);
-                       bytenr = entry->bytenr;
+                       entry = rb_entry(n, struct btrfs_delayed_ref_head,
+                                        href_node);
+                       bytenr = entry->node.bytenr;
                         return_bigger = 0;
                         goto again;
                 }
@@ -243,33 +269,38 @@ int btrfs_delayed_ref_lock(struct btrfs_trans_handle *trans,
  
  static inline void drop_delayed_ref(struct btrfs_trans_handle *trans,
                                     struct btrfs_delayed_ref_root *delayed_refs,
+                                   struct btrfs_delayed_ref_head *head,
                                     struct btrfs_delayed_ref_node *ref)
  {
-       rb_erase(&ref->rb_node, &delayed_refs->root);
+       if (btrfs_delayed_ref_is_head(ref)) {
+               head = btrfs_delayed_node_to_head(ref);
+               rb_erase(&head->href_node, &delayed_refs->href_root);
+       } else {
+               assert_spin_locked(&head->lock);
+               rb_erase(&ref->rb_node, &head->ref_root);
+       }
         ref->in_tree = 0;
         btrfs_put_delayed_ref(ref);
-       delayed_refs->num_entries--;
+       atomic_dec(&delayed_refs->num_entries);
         if (trans->delayed_ref_updates)
                 trans->delayed_ref_updates--;
  }
  
  static int merge_ref(struct btrfs_trans_handle *trans,
                      struct btrfs_delayed_ref_root *delayed_refs,
+                    struct btrfs_delayed_ref_head *head,
                      struct btrfs_delayed_ref_node *ref, u64 seq)
  {
         struct rb_node *node;
-       int merged = 0;
         int mod = 0;
         int done = 0;
  
-       node = rb_prev(&ref->rb_node);
-       while (node) {
+       node = rb_next(&ref->rb_node);
+       while (!done && node) {
                 struct btrfs_delayed_ref_node *next;
  
                 next = rb_entry(node, struct btrfs_delayed_ref_node, rb_node);
-               node = rb_prev(node);
-               if (next->bytenr != ref->bytenr)
-                       break;
+               node = rb_next(node);
                 if (seq && next->seq >= seq)
                         break;
                 if (comp_entry(ref, next, 0))
@@ -289,12 +320,11 @@ static int merge_ref(struct btrfs_trans_handle *trans,
                         mod = -next->ref_mod;
                 }
  
-               merged++;
-               drop_delayed_ref(trans, delayed_refs, next);
+               drop_delayed_ref(trans, delayed_refs, head, next);
                 ref->ref_mod += mod;
                 if (ref->ref_mod == 0) {
-                       drop_delayed_ref(trans, delayed_refs, ref);
-                       break;
+                       drop_delayed_ref(trans, delayed_refs, head, ref);
+                       done = 1;
                 } else {
                         /*
                          * You can't have multiples of the same ref on a tree
@@ -303,13 +333,8 @@ static int merge_ref(struct btrfs_trans_handle *trans,
                         WARN_ON(ref->type == BTRFS_TREE_BLOCK_REF_KEY ||
                                 ref->type == BTRFS_SHARED_BLOCK_REF_KEY);
                 }
-
-               if (done)
-                       break;
-               node = rb_prev(&ref->rb_node);
         }
-
-       return merged;
+       return done;
  }
  
  void btrfs_merge_delayed_refs(struct btrfs_trans_handle *trans,
@@ -320,6 +345,14 @@ void btrfs_merge_delayed_refs(struct btrfs_trans_handle *trans,
         struct rb_node *node;
         u64 seq = 0;
  
+       assert_spin_locked(&head->lock);
+       /*
+        * We don't have too much refs to merge in the case of delayed data
+        * refs.
+        */
+       if (head->is_data)
+               return;
+
         spin_lock(&fs_info->tree_mod_seq_lock);
         if (!list_empty(&fs_info->tree_mod_seq_list)) {
                 struct seq_list *elem;
@@ -330,22 +363,19 @@ void btrfs_merge_delayed_refs(struct btrfs_trans_handle *trans,
         }
         spin_unlock(&fs_info->tree_mod_seq_lock);
  
-       node = rb_prev(&head->node.rb_node);
+       node = rb_first(&head->ref_root);
         while (node) {
                 struct btrfs_delayed_ref_node *ref;
  
                 ref = rb_entry(node, struct btrfs_delayed_ref_node,
                                rb_node);
-               if (ref->bytenr != head->node.bytenr)
-                       break;
-
                 /* We can't merge refs that are outside of our seq count */
                 if (seq && ref->seq >= seq)
                         break;
-               if (merge_ref(trans, delayed_refs, ref, seq))
-                       node = rb_prev(&head->node.rb_node);
+               if (merge_ref(trans, delayed_refs, head, ref, seq))
+                       node = rb_first(&head->ref_root);
                 else
-                       node = rb_prev(node);
+                       node = rb_next(&ref->rb_node);
         }
  }
  
@@ -373,71 +403,52 @@ int btrfs_check_delayed_seq(struct btrfs_fs_info *fs_info,
         return ret;
  }
  
-int btrfs_find_ref_cluster(struct btrfs_trans_handle *trans,
-                          struct list_head *cluster, u64 start)
+struct btrfs_delayed_ref_head *
+btrfs_select_ref_head(struct btrfs_trans_handle *trans)
  {
-       int count = 0;
         struct btrfs_delayed_ref_root *delayed_refs;
-       struct rb_node *node;
-       struct btrfs_delayed_ref_node *ref;
         struct btrfs_delayed_ref_head *head;
+       u64 start;
+       bool loop = false;
  
         delayed_refs = &trans->transaction->delayed_refs;
-       if (start == 0) {
-               node = rb_first(&delayed_refs->root);
-       } else {
-               ref = NULL;
-               find_ref_head(&delayed_refs->root, start + 1, &ref, 1);
-               if (ref) {
-                       node = &ref->rb_node;
-               } else
-                       node = rb_first(&delayed_refs->root);
-       }
+
  again:
-       while (node && count < 32) {
-               ref = rb_entry(node, struct btrfs_delayed_ref_node, rb_node);
-               if (btrfs_delayed_ref_is_head(ref)) {
-                       head = btrfs_delayed_node_to_head(ref);
-                       if (list_empty(&head->cluster)) {
-                               list_add_tail(&head->cluster, cluster);
-                               delayed_refs->run_delayed_start =
-                                       head->node.bytenr;
-                               count++;
-
-                               WARN_ON(delayed_refs->num_heads_ready == 0);
-                               delayed_refs->num_heads_ready--;
-                       } else if (count) {
-                               /* the goal of the clustering is to find extents
-                                * that are likely to end up in the same extent
-                                * leaf on disk.  So, we don't want them spread
-                                * all over the tree.  Stop now if we've hit
-                                * a head that was already in use
-                                */
-                               break;
-                       }
-               }
-               node = rb_next(node);
-       }
-       if (count) {
-               return 0;
-       } else if (start) {
-               /*
-                * we've gone to the end of the rbtree without finding any
-                * clusters.  start from the beginning and try again
-                */
+       start = delayed_refs->run_delayed_start;
+       head = find_ref_head(&delayed_refs->href_root, start, NULL, 1);
+       if (!head && !loop) {
+               delayed_refs->run_delayed_start = 0;
                 start = 0;
-               node = rb_first(&delayed_refs->root);
-               goto again;
+               loop = true;
+               head = find_ref_head(&delayed_refs->href_root, start, NULL, 1);
+               if (!head)
+                       return NULL;
+       } else if (!head && loop) {
+               return NULL;
         }
-       return 1;
-}
  
-void btrfs_release_ref_cluster(struct list_head *cluster)
-{
-       struct list_head *pos, *q;
+       while (head->processing) {
+               struct rb_node *node;
+
+               node = rb_next(&head->href_node);
+               if (!node) {
+                       if (loop)
+                               return NULL;
+                       delayed_refs->run_delayed_start = 0;
+                       start = 0;
+                       loop = true;
+                       goto again;
+               }
+               head = rb_entry(node, struct btrfs_delayed_ref_head,
+                               href_node);
+       }
  
-       list_for_each_safe(pos, q, cluster)
-               list_del_init(pos);
+       head->processing = 1;
+       WARN_ON(delayed_refs->num_heads_ready == 0);
+       delayed_refs->num_heads_ready--;
+       delayed_refs->run_delayed_start = head->node.bytenr +
+               head->node.num_bytes;
+       return head;
  }
  
  /*
@@ -451,6 +462,7 @@ void btrfs_release_ref_cluster(struct list_head *cluster)
  static noinline void
  update_existing_ref(struct btrfs_trans_handle *trans,
                     struct btrfs_delayed_ref_root *delayed_refs,
+                   struct btrfs_delayed_ref_head *head,
                     struct btrfs_delayed_ref_node *existing,
                     struct btrfs_delayed_ref_node *update)
  {
@@ -463,7 +475,7 @@ update_existing_ref(struct btrfs_trans_handle *trans,
                  */
                 existing->ref_mod--;
                 if (existing->ref_mod == 0)
-                       drop_delayed_ref(trans, delayed_refs, existing);
+                       drop_delayed_ref(trans, delayed_refs, head, existing);
                 else
                         WARN_ON(existing->type == BTRFS_TREE_BLOCK_REF_KEY ||
                                 existing->type == BTRFS_SHARED_BLOCK_REF_KEY);
@@ -533,9 +545,13 @@ update_existing_head_ref(struct btrfs_delayed_ref_node *existing,
                 }
         }
         /*
-        * update the reference mod on the head to reflect this new operation
+        * update the reference mod on the head to reflect this new operation,
+        * only need the lock for this case cause we could be processing it
+        * currently, for refs we just added we know we're a-ok.
          */
+       spin_lock(&existing_ref->lock);
         existing->ref_mod += update->ref_mod;
+       spin_unlock(&existing_ref->lock);
  }
  
  /*
@@ -543,13 +559,13 @@ update_existing_head_ref(struct btrfs_delayed_ref_node *existing,
   * this does all the dirty work in terms of maintaining the correct
   * overall modification count.
   */
-static noinline void add_delayed_ref_head(struct btrfs_fs_info *fs_info,
-                                       struct btrfs_trans_handle *trans,
-                                       struct btrfs_delayed_ref_node *ref,
-                                       u64 bytenr, u64 num_bytes,
-                                       int action, int is_data)
+static noinline struct btrfs_delayed_ref_head *
+add_delayed_ref_head(struct btrfs_fs_info *fs_info,
+                    struct btrfs_trans_handle *trans,
+                    struct btrfs_delayed_ref_node *ref, u64 bytenr,
+                    u64 num_bytes, int action, int is_data)
  {
-       struct btrfs_delayed_ref_node *existing;
+       struct btrfs_delayed_ref_head *existing;
         struct btrfs_delayed_ref_head *head_ref = NULL;
         struct btrfs_delayed_ref_root *delayed_refs;
         int count_mod = 1;
@@ -596,38 +612,43 @@ static noinline void add_delayed_ref_head(struct btrfs_fs_info *fs_info,
         head_ref = btrfs_delayed_node_to_head(ref);
         head_ref->must_insert_reserved = must_insert_reserved;
         head_ref->is_data = is_data;
+       head_ref->ref_root = RB_ROOT;
+       head_ref->processing = 0;
  
-       INIT_LIST_HEAD(&head_ref->cluster);
+       spin_lock_init(&head_ref->lock);
         mutex_init(&head_ref->mutex);
  
         trace_add_delayed_ref_head(ref, head_ref, action);
  
-       existing = tree_insert(&delayed_refs->root, &ref->rb_node);
-
+       existing = htree_insert(&delayed_refs->href_root,
+                               &head_ref->href_node);
         if (existing) {
-               update_existing_head_ref(existing, ref);
+               update_existing_head_ref(&existing->node, ref);
                 /*
                  * we've updated the existing ref, free the newly
                  * allocated ref
                  */
                 kmem_cache_free(btrfs_delayed_ref_head_cachep, head_ref);
+               head_ref = existing;
         } else {
                 delayed_refs->num_heads++;
                 delayed_refs->num_heads_ready++;
-               delayed_refs->num_entries++;
+               atomic_inc(&delayed_refs->num_entries);
                 trans->delayed_ref_updates++;
         }
+       return head_ref;
  }
  
  /*
   * helper to insert a delayed tree ref into the rbtree.
   */
-static noinline void add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
-                                        struct btrfs_trans_handle *trans,
-                                        struct btrfs_delayed_ref_node *ref,
-                                        u64 bytenr, u64 num_bytes, u64 parent,
-                                        u64 ref_root, int level, int action,
-                                        int for_cow)
+static noinline void
+add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
+                    struct btrfs_trans_handle *trans,
+                    struct btrfs_delayed_ref_head *head_ref,
+                    struct btrfs_delayed_ref_node *ref, u64 bytenr,
+                    u64 num_bytes, u64 parent, u64 ref_root, int level,
+                    int action, int for_cow)
  {
         struct btrfs_delayed_ref_node *existing;
         struct btrfs_delayed_tree_ref *full_ref;
@@ -663,30 +684,33 @@ static noinline void add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
  
         trace_add_delayed_tree_ref(ref, full_ref, action);
  
-       existing = tree_insert(&delayed_refs->root, &ref->rb_node);
-
+       spin_lock(&head_ref->lock);
+       existing = tree_insert(&head_ref->ref_root, &ref->rb_node);
         if (existing) {
-               update_existing_ref(trans, delayed_refs, existing, ref);
+               update_existing_ref(trans, delayed_refs, head_ref, existing,
+                                   ref);
                 /*
                  * we've updated the existing ref, free the newly
                  * allocated ref
                  */
                 kmem_cache_free(btrfs_delayed_tree_ref_cachep, full_ref);
         } else {
-               delayed_refs->num_entries++;
+               atomic_inc(&delayed_refs->num_entries);
                 trans->delayed_ref_updates++;
         }
+       spin_unlock(&head_ref->lock);
  }
  
  /*
   * helper to insert a delayed data ref into the rbtree.
   */
-static noinline void add_delayed_data_ref(struct btrfs_fs_info *fs_info,
-                                        struct btrfs_trans_handle *trans,
-                                        struct btrfs_delayed_ref_node *ref,
-                                        u64 bytenr, u64 num_bytes, u64 parent,
-                                        u64 ref_root, u64 owner, u64 offset,
-                                        int action, int for_cow)
+static noinline void
+add_delayed_data_ref(struct btrfs_fs_info *fs_info,
+                    struct btrfs_trans_handle *trans,
+                    struct btrfs_delayed_ref_head *head_ref,
+                    struct btrfs_delayed_ref_node *ref, u64 bytenr,
+                    u64 num_bytes, u64 parent, u64 ref_root, u64 owner,
+                    u64 offset, int action, int for_cow)
  {
         struct btrfs_delayed_ref_node *existing;
         struct btrfs_delayed_data_ref *full_ref;
@@ -724,19 +748,21 @@ static noinline void add_delayed_data_ref(struct btrfs_fs_info *fs_info,
  
         trace_add_delayed_data_ref(ref, full_ref, action);
  
-       existing = tree_insert(&delayed_refs->root, &ref->rb_node);
-
+       spin_lock(&head_ref->lock);
+       existing = tree_insert(&head_ref->ref_root, &ref->rb_node);
         if (existing) {
-               update_existing_ref(trans, delayed_refs, existing, ref);
+               update_existing_ref(trans, delayed_refs, head_ref, existing,
+                                   ref);
                 /*
                  * we've updated the existing ref, free the newly
                  * allocated ref
                  */
                 kmem_cache_free(btrfs_delayed_data_ref_cachep, full_ref);
         } else {
-               delayed_refs->num_entries++;
+               atomic_inc(&delayed_refs->num_entries);
                 trans->delayed_ref_updates++;
         }
+       spin_unlock(&head_ref->lock);
  }
  
  /*
@@ -775,10 +801,10 @@ int btrfs_add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
          * insert both the head node and the new ref without dropping
          * the spin lock
          */
-       add_delayed_ref_head(fs_info, trans, &head_ref->node, bytenr,
-                                  num_bytes, action, 0);
+       head_ref = add_delayed_ref_head(fs_info, trans, &head_ref->node,
+                                       bytenr, num_bytes, action, 0);
  
-       add_delayed_tree_ref(fs_info, trans, &ref->node, bytenr,
+       add_delayed_tree_ref(fs_info, trans, head_ref, &ref->node, bytenr,
                                    num_bytes, parent, ref_root, level, action,
                                    for_cow);
         spin_unlock(&delayed_refs->lock);
@@ -823,10 +849,10 @@ int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info,
          * insert both the head node and the new ref without dropping
          * the spin lock
          */
-       add_delayed_ref_head(fs_info, trans, &head_ref->node, bytenr,
-                                  num_bytes, action, 1);
+       head_ref = add_delayed_ref_head(fs_info, trans, &head_ref->node,
+                                       bytenr, num_bytes, action, 1);
  
-       add_delayed_data_ref(fs_info, trans, &ref->node, bytenr,
+       add_delayed_data_ref(fs_info, trans, head_ref, &ref->node, bytenr,
                                    num_bytes, parent, ref_root, owner, offset,
                                    action, for_cow);
         spin_unlock(&delayed_refs->lock);
@@ -869,14 +895,10 @@ int btrfs_add_delayed_extent_op(struct btrfs_fs_info *fs_info,
  struct btrfs_delayed_ref_head *
  btrfs_find_delayed_ref_head(struct btrfs_trans_handle *trans, u64 bytenr)
  {
-       struct btrfs_delayed_ref_node *ref;
         struct btrfs_delayed_ref_root *delayed_refs;
  
         delayed_refs = &trans->transaction->delayed_refs;
-       ref = find_ref_head(&delayed_refs->root, bytenr, NULL, 0);
-       if (ref)
-               return btrfs_delayed_node_to_head(ref);
-       return NULL;
+       return find_ref_head(&delayed_refs->href_root, bytenr, NULL, 0);
  }
  
  void btrfs_delayed_ref_exit(void)
diff --git a/fs/btrfs/delayed-ref.h b/fs/btrfs/delayed-ref.h

index 70b962cc177d973688b0d2deb56965bffbc423ae..4ba9b93022ffd4e70e862e67674fdacbfb899e1c 100644 (file)
--- a/fs/btrfs/delayed-ref.h
+++ b/fs/btrfs/delayed-ref.h
@@ -81,7 +81,10 @@ struct btrfs_delayed_ref_head {
          */
         struct mutex mutex;
  
-       struct list_head cluster;
+       spinlock_t lock;
+       struct rb_root ref_root;
+
+       struct rb_node href_node;
  
         struct btrfs_delayed_extent_op *extent_op;
         /*
@@ -98,6 +101,7 @@ struct btrfs_delayed_ref_head {
          */
         unsigned int must_insert_reserved:1;
         unsigned int is_data:1;
+       unsigned int processing:1;
  };
  
  struct btrfs_delayed_tree_ref {
@@ -116,7 +120,8 @@ struct btrfs_delayed_data_ref {
  };
  
  struct btrfs_delayed_ref_root {
-       struct rb_root root;
+       /* head ref rbtree */
+       struct rb_root href_root;
  
         /* this spin lock protects the rbtree and the entries inside */
         spinlock_t lock;
@@ -124,7 +129,7 @@ struct btrfs_delayed_ref_root {
         /* how many delayed ref updates we've queued, used by the
          * throttling code
          */
-       unsigned long num_entries;
+       atomic_t num_entries;
  
         /* total number of head nodes in tree */
         unsigned long num_heads;
@@ -132,15 +137,6 @@ struct btrfs_delayed_ref_root {
         /* total number of head nodes ready for processing */
         unsigned long num_heads_ready;
  
-       /*
-        * bumped when someone is making progress on the delayed
-        * refs, so that other procs know they are just adding to
-        * contention intead of helping
-        */
-       atomic_t procs_running_refs;
-       atomic_t ref_seq;
-       wait_queue_head_t wait;
-
         /*
          * set when the tree is flushing before a transaction commit,
          * used by the throttling code to decide if new updates need
@@ -226,9 +222,9 @@ static inline void btrfs_delayed_ref_unlock(struct btrfs_delayed_ref_head *head)
         mutex_unlock(&head->mutex);
  }
  
-int btrfs_find_ref_cluster(struct btrfs_trans_handle *trans,
-                          struct list_head *cluster, u64 search_start);
-void btrfs_release_ref_cluster(struct list_head *cluster);
+
+struct btrfs_delayed_ref_head *
+btrfs_select_ref_head(struct btrfs_trans_handle *trans);
  
  int btrfs_check_delayed_seq(struct btrfs_fs_info *fs_info,
                             struct btrfs_delayed_ref_root *delayed_refs,
diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c

index 2cfc3dfff64f5708f71ec83af691b425b9f01b01..564c92638b20a8d4929a920eb843c4f4fe71745b 100644 (file)
--- a/fs/btrfs/dev-replace.c
+++ b/fs/btrfs/dev-replace.c
@@ -102,7 +102,8 @@ no_valid_dev_replace_entry_found:
         ptr = btrfs_item_ptr(eb, slot, struct btrfs_dev_replace_item);
  
         if (item_size != sizeof(struct btrfs_dev_replace_item)) {
-               pr_warn("btrfs: dev_replace entry found has unexpected size, ignore entry\n");
+               btrfs_warn(fs_info,
+                       "dev_replace entry found has unexpected size, ignore entry");
                 goto no_valid_dev_replace_entry_found;
         }
  
@@ -145,13 +146,19 @@ no_valid_dev_replace_entry_found:
                 if (!dev_replace->srcdev &&
                     !btrfs_test_opt(dev_root, DEGRADED)) {
                         ret = -EIO;
-                       pr_warn("btrfs: cannot mount because device replace operation is ongoing and\n" "srcdev (devid %llu) is missing, need to run 'btrfs dev scan'?\n",
-                               src_devid);
+                       btrfs_warn(fs_info,
+                          "cannot mount because device replace operation is ongoing and");
+                       btrfs_warn(fs_info,
+                          "srcdev (devid %llu) is missing, need to run 'btrfs dev scan'?",
+                          src_devid);
                 }
                 if (!dev_replace->tgtdev &&
                     !btrfs_test_opt(dev_root, DEGRADED)) {
                         ret = -EIO;
-                       pr_warn("btrfs: cannot mount because device replace operation is ongoing and\n" "tgtdev (devid %llu) is missing, need to run btrfs dev scan?\n",
+                       btrfs_warn(fs_info,
+                          "cannot mount because device replace operation is ongoing and");
+                       btrfs_warn(fs_info,
+                          "tgtdev (devid %llu) is missing, need to run 'btrfs dev scan'?",
                                 BTRFS_DEV_REPLACE_DEVID);
                 }
                 if (dev_replace->tgtdev) {
@@ -210,7 +217,7 @@ int btrfs_run_dev_replace(struct btrfs_trans_handle *trans,
         }
         ret = btrfs_search_slot(trans, dev_root, &key, path, -1, 1);
         if (ret < 0) {
-               pr_warn("btrfs: error %d while searching for dev_replace item!\n",
+               btrfs_warn(fs_info, "error %d while searching for dev_replace item!",
                         ret);
                 goto out;
         }
@@ -230,7 +237,7 @@ int btrfs_run_dev_replace(struct btrfs_trans_handle *trans,
                  */
                 ret = btrfs_del_item(trans, dev_root, path);
                 if (ret != 0) {
-                       pr_warn("btrfs: delete too small dev_replace item failed %d!\n",
+                       btrfs_warn(fs_info, "delete too small dev_replace item failed %d!",
                                 ret);
                         goto out;
                 }
@@ -243,7 +250,7 @@ int btrfs_run_dev_replace(struct btrfs_trans_handle *trans,
                 ret = btrfs_insert_empty_item(trans, dev_root, path,
                                               &key, sizeof(*ptr));
                 if (ret < 0) {
-                       pr_warn("btrfs: insert dev_replace item failed %d!\n",
+                       btrfs_warn(fs_info, "insert dev_replace item failed %d!",
                                 ret);
                         goto out;
                 }
@@ -305,7 +312,7 @@ int btrfs_dev_replace_start(struct btrfs_root *root,
         struct btrfs_device *src_device = NULL;
  
         if (btrfs_fs_incompat(fs_info, RAID56)) {
-               pr_warn("btrfs: dev_replace cannot yet handle RAID5/RAID6\n");
+               btrfs_warn(fs_info, "dev_replace cannot yet handle RAID5/RAID6");
                 return -EINVAL;
         }
  
@@ -325,7 +332,7 @@ int btrfs_dev_replace_start(struct btrfs_root *root,
         ret = btrfs_init_dev_replace_tgtdev(root, args->start.tgtdev_name,
                                             &tgt_device);
         if (ret) {
-               pr_err("btrfs: target device %s is invalid!\n",
+               btrfs_err(fs_info, "target device %s is invalid!",
                        args->start.tgtdev_name);
                 mutex_unlock(&fs_info->volume_mutex);
                 return -EINVAL;
@@ -341,7 +348,7 @@ int btrfs_dev_replace_start(struct btrfs_root *root,
         }
  
         if (tgt_device->total_bytes < src_device->total_bytes) {
-               pr_err("btrfs: target device is smaller than source device!\n");
+               btrfs_err(fs_info, "target device is smaller than source device!");
                 ret = -EINVAL;
                 goto leave_no_lock;
         }
@@ -366,7 +373,7 @@ int btrfs_dev_replace_start(struct btrfs_root *root,
         dev_replace->tgtdev = tgt_device;
  
         printk_in_rcu(KERN_INFO
-                     "btrfs: dev_replace from %s (devid %llu) to %s started\n",
+                     "BTRFS: dev_replace from %s (devid %llu) to %s started\n",
                       src_device->missing ? "<missing disk>" :
                         rcu_str_deref(src_device->name),
                       src_device->devid,
@@ -489,7 +496,7 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info,
  
         if (scrub_ret) {
                 printk_in_rcu(KERN_ERR
-                             "btrfs: btrfs_scrub_dev(%s, %llu, %s) failed %d\n",
+                             "BTRFS: btrfs_scrub_dev(%s, %llu, %s) failed %d\n",
                               src_device->missing ? "<missing disk>" :
                                 rcu_str_deref(src_device->name),
                               src_device->devid,
@@ -504,7 +511,7 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info,
         }
  
         printk_in_rcu(KERN_INFO
-                     "btrfs: dev_replace from %s (devid %llu) to %s) finished\n",
+                     "BTRFS: dev_replace from %s (devid %llu) to %s) finished\n",
                       src_device->missing ? "<missing disk>" :
                         rcu_str_deref(src_device->name),
                       src_device->devid,
@@ -699,7 +706,7 @@ void btrfs_dev_replace_suspend_for_unmount(struct btrfs_fs_info *fs_info)
                         BTRFS_IOCTL_DEV_REPLACE_STATE_SUSPENDED;
                 dev_replace->time_stopped = get_seconds();
                 dev_replace->item_needs_writeback = 1;
-               pr_info("btrfs: suspending dev_replace for unmount\n");
+               btrfs_info(fs_info, "suspending dev_replace for unmount");
                 break;
         }
  
@@ -728,8 +735,9 @@ int btrfs_resume_dev_replace_async(struct btrfs_fs_info *fs_info)
                 break;
         }
         if (!dev_replace->tgtdev || !dev_replace->tgtdev->bdev) {
-               pr_info("btrfs: cannot continue dev_replace, tgtdev is missing\n"
-                       "btrfs: you may cancel the operation after 'mount -o degraded'\n");
+               btrfs_info(fs_info, "cannot continue dev_replace, tgtdev is missing");
+               btrfs_info(fs_info,
+                       "you may cancel the operation after 'mount -o degraded'");
                 btrfs_dev_replace_unlock(dev_replace);
                 return 0;
         }
@@ -755,14 +763,14 @@ static int btrfs_dev_replace_kthread(void *data)
                 kfree(status_args);
                 do_div(progress, 10);
                 printk_in_rcu(KERN_INFO
-                             "btrfs: continuing dev_replace from %s (devid %llu) to %s @%u%%\n",
-                             dev_replace->srcdev->missing ? "<missing disk>" :
-                               rcu_str_deref(dev_replace->srcdev->name),
-                             dev_replace->srcdev->devid,
-                             dev_replace->tgtdev ?
-                               rcu_str_deref(dev_replace->tgtdev->name) :
-                               "<missing target disk>",
-                             (unsigned int)progress);
+                       "BTRFS: continuing dev_replace from %s (devid %llu) to %s @%u%%\n",
+                       dev_replace->srcdev->missing ? "<missing disk>" :
+                       rcu_str_deref(dev_replace->srcdev->name),
+                       dev_replace->srcdev->devid,
+                       dev_replace->tgtdev ?
+                       rcu_str_deref(dev_replace->tgtdev->name) :
+                       "<missing target disk>",
+                       (unsigned int)progress);
         }
         btrfs_dev_replace_continue_on_mount(fs_info);
         atomic_set(&fs_info->mutually_exclusive_operation_running, 0);
diff --git a/fs/btrfs/dir-item.c b/fs/btrfs/dir-item.c

index c031ea3fd70f70d64452a3529d3ef99c830a57bf..a0691df5dceaa9dfdb3100732bf82ee81793f32c 100644 (file)
--- a/fs/btrfs/dir-item.c
+++ b/fs/btrfs/dir-item.c
@@ -261,7 +261,7 @@ int btrfs_check_dir_item_collision(struct btrfs_root *root, u64 dir,
          * see if there is room in the item to insert this
          * name
          */
-       data_size = sizeof(*di) + name_len + sizeof(struct btrfs_item);
+       data_size = sizeof(*di) + name_len;
         leaf = path->nodes[0];
         slot = path->slots[0];
         if (data_size + btrfs_item_size_nr(leaf, slot) +
@@ -459,7 +459,7 @@ int verify_dir_item(struct btrfs_root *root,
         u8 type = btrfs_dir_type(leaf, dir_item);
  
         if (type >= BTRFS_FT_MAX) {
-               printk(KERN_CRIT "btrfs: invalid dir item type: %d\n",
+               btrfs_crit(root->fs_info, "invalid dir item type: %d",
                        (int)type);
                 return 1;
         }
@@ -468,7 +468,7 @@ int verify_dir_item(struct btrfs_root *root,
                 namelen = XATTR_NAME_MAX;
  
         if (btrfs_dir_name_len(leaf, dir_item) > namelen) {
-               printk(KERN_CRIT "btrfs: invalid dir item name len: %u\n",
+               btrfs_crit(root->fs_info, "invalid dir item name len: %u",
                        (unsigned)btrfs_dir_data_len(leaf, dir_item));
                 return 1;
         }
@@ -476,7 +476,7 @@ int verify_dir_item(struct btrfs_root *root,
         /* BTRFS_MAX_XATTR_SIZE is the same for all dir items */
         if ((btrfs_dir_data_len(leaf, dir_item) +
              btrfs_dir_name_len(leaf, dir_item)) > BTRFS_MAX_XATTR_SIZE(root)) {
-               printk(KERN_CRIT "btrfs: invalid dir item name + data len: %u + %u\n",
+               btrfs_crit(root->fs_info, "invalid dir item name + data len: %u + %u",
                        (unsigned)btrfs_dir_name_len(leaf, dir_item),
                        (unsigned)btrfs_dir_data_len(leaf, dir_item));
                 return 1;
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c

index e71039ea66cf9d4bf8a56bdd984bc7e26597aa74..0e69295d0031e558eb3ebb257ff2d4bb2f79772f 100644 (file)
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -48,6 +48,7 @@
  #include "rcu-string.h"
  #include "dev-replace.h"
  #include "raid56.h"
+#include "sysfs.h"
  
  #ifdef CONFIG_X86
  #include <asm/cpufeature.h>
@@ -299,11 +300,11 @@ static int csum_tree_block(struct btrfs_root *root, struct extent_buffer *buf,
                         memcpy(&found, result, csum_size);
  
                         read_extent_buffer(buf, &val, 0, csum_size);
-                       printk_ratelimited(KERN_INFO "btrfs: %s checksum verify "
-                                      "failed on %llu wanted %X found %X "
-                                      "level %d\n",
-                                      root->fs_info->sb->s_id, buf->start,
-                                      val, found, btrfs_header_level(buf));
+                       printk_ratelimited(KERN_INFO
+                               "BTRFS: %s checksum verify failed on %llu wanted %X found %X "
+                               "level %d\n",
+                               root->fs_info->sb->s_id, buf->start,
+                               val, found, btrfs_header_level(buf));
                         if (result != (char *)&inline_result)
                                 kfree(result);
                         return 1;
@@ -382,13 +383,14 @@ static int btrfs_check_super_csum(char *raw_disk_sb)
                         ret = 1;
  
                 if (ret && btrfs_super_generation(disk_sb) < 10) {
-                       printk(KERN_WARNING "btrfs: super block crcs don't match, older mkfs detected\n");
+                       printk(KERN_WARNING
+                               "BTRFS: super block crcs don't match, older mkfs detected\n");
                         ret = 0;
                 }
         }
  
         if (csum_type >= ARRAY_SIZE(btrfs_csum_sizes)) {
-               printk(KERN_ERR "btrfs: unsupported checksum algorithm %u\n",
+               printk(KERN_ERR "BTRFS: unsupported checksum algorithm %u\n",
                                 csum_type);
                 ret = 1;
         }
@@ -464,13 +466,10 @@ static int btree_read_extent_buffer_pages(struct btrfs_root *root,
  
  static int csum_dirty_buffer(struct btrfs_root *root, struct page *page)
  {
-       struct extent_io_tree *tree;
         u64 start = page_offset(page);
         u64 found_start;
         struct extent_buffer *eb;
  
-       tree = &BTRFS_I(page->mapping->host)->io_tree;
-
         eb = (struct extent_buffer *)page->private;
         if (page != eb->pages[0])
                 return 0;
@@ -500,8 +499,8 @@ static int check_tree_block_fsid(struct btrfs_root *root,
  }
  
  #define CORRUPT(reason, eb, root, slot)                                \
-       printk(KERN_CRIT "btrfs: corrupt leaf, %s: block=%llu," \
-              "root=%llu, slot=%d\n", reason,                  \
+       btrfs_crit(root->fs_info, "corrupt leaf, %s: block=%llu,"       \
+                  "root=%llu, slot=%d", reason,                        \
                btrfs_header_bytenr(eb), root->objectid, slot)
  
  static noinline int check_leaf(struct btrfs_root *root,
@@ -569,7 +568,6 @@ static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio,
                                       u64 phy_offset, struct page *page,
                                       u64 start, u64 end, int mirror)
  {
-       struct extent_io_tree *tree;
         u64 found_start;
         int found_level;
         struct extent_buffer *eb;
@@ -580,7 +578,6 @@ static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio,
         if (!page->private)
                 goto out;
  
-       tree = &BTRFS_I(page->mapping->host)->io_tree;
         eb = (struct extent_buffer *)page->private;
  
         /* the pending IO might have been the only thing that kept this buffer
@@ -600,21 +597,21 @@ static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio,
  
         found_start = btrfs_header_bytenr(eb);
         if (found_start != eb->start) {
-               printk_ratelimited(KERN_INFO "btrfs bad tree block start "
+               printk_ratelimited(KERN_INFO "BTRFS: bad tree block start "
                                "%llu %llu\n",
                                found_start, eb->start);
                 ret = -EIO;
                 goto err;
         }
         if (check_tree_block_fsid(root, eb)) {
-               printk_ratelimited(KERN_INFO "btrfs bad fsid on block %llu\n",
+               printk_ratelimited(KERN_INFO "BTRFS: bad fsid on block %llu\n",
                                eb->start);
                 ret = -EIO;
                 goto err;
         }
         found_level = btrfs_header_level(eb);
         if (found_level >= BTRFS_MAX_LEVEL) {
-               btrfs_info(root->fs_info, "bad tree block level %d\n",
+               btrfs_info(root->fs_info, "bad tree block level %d",
                            (int)btrfs_header_level(eb));
                 ret = -EIO;
                 goto err;
@@ -964,11 +961,9 @@ static int btree_migratepage(struct address_space *mapping,
  static int btree_writepages(struct address_space *mapping,
                             struct writeback_control *wbc)
  {
-       struct extent_io_tree *tree;
         struct btrfs_fs_info *fs_info;
         int ret;
  
-       tree = &BTRFS_I(mapping->host)->io_tree;
         if (wbc->sync_mode == WB_SYNC_NONE) {
  
                 if (wbc->for_kupdate)
@@ -1007,8 +1002,9 @@ static void btree_invalidatepage(struct page *page, unsigned int offset,
         extent_invalidatepage(tree, page, offset);
         btree_releasepage(page, GFP_NOFS);
         if (PagePrivate(page)) {
-               printk(KERN_WARNING "btrfs warning page private not zero "
-                      "on page %llu\n", (unsigned long long)page_offset(page));
+               btrfs_warn(BTRFS_I(page->mapping->host)->root->fs_info,
+                          "page private not zero on page %llu",
+                          (unsigned long long)page_offset(page));
                 ClearPagePrivate(page);
                 set_page_private(page, 0);
                 page_cache_release(page);
@@ -1092,21 +1088,13 @@ int reada_tree_block_flagged(struct btrfs_root *root, u64 bytenr, u32 blocksize,
  struct extent_buffer *btrfs_find_tree_block(struct btrfs_root *root,
                                             u64 bytenr, u32 blocksize)
  {
-       struct inode *btree_inode = root->fs_info->btree_inode;
-       struct extent_buffer *eb;
-       eb = find_extent_buffer(&BTRFS_I(btree_inode)->io_tree, bytenr);
-       return eb;
+       return find_extent_buffer(root->fs_info, bytenr);
  }
  
  struct extent_buffer *btrfs_find_create_tree_block(struct btrfs_root *root,
                                                  u64 bytenr, u32 blocksize)
  {
-       struct inode *btree_inode = root->fs_info->btree_inode;
-       struct extent_buffer *eb;
-
-       eb = alloc_extent_buffer(&BTRFS_I(btree_inode)->io_tree,
-                                bytenr, blocksize);
-       return eb;
+       return alloc_extent_buffer(root->fs_info, bytenr, blocksize);
  }
  
  
@@ -1270,7 +1258,6 @@ struct btrfs_root *btrfs_create_tree(struct btrfs_trans_handle *trans,
         struct btrfs_root *root;
         struct btrfs_key key;
         int ret = 0;
-       u64 bytenr;
         uuid_le uuid;
  
         root = btrfs_alloc_root(fs_info);
@@ -1292,7 +1279,6 @@ struct btrfs_root *btrfs_create_tree(struct btrfs_trans_handle *trans,
                 goto fail;
         }
  
-       bytenr = leaf->start;
         memset_extent_buffer(leaf, 0, 0, sizeof(struct btrfs_header));
         btrfs_set_header_bytenr(leaf, leaf->start);
         btrfs_set_header_generation(leaf, trans->transid);
@@ -1613,7 +1599,8 @@ again:
         if (ret)
                 goto fail;
  
-       ret = btrfs_find_orphan_item(fs_info->tree_root, location->objectid);
+       ret = btrfs_find_item(fs_info->tree_root, NULL, BTRFS_ORPHAN_OBJECTID,
+                       location->objectid, BTRFS_ORPHAN_ITEM_KEY, NULL);
         if (ret < 0)
                 goto fail;
         if (ret == 0)
@@ -1681,12 +1668,10 @@ static void end_workqueue_fn(struct btrfs_work *work)
  {
         struct bio *bio;
         struct end_io_wq *end_io_wq;
-       struct btrfs_fs_info *fs_info;
         int error;
  
         end_io_wq = container_of(work, struct end_io_wq, work);
         bio = end_io_wq->bio;
-       fs_info = end_io_wq->info;
  
         error = end_io_wq->error;
         bio->bi_private = end_io_wq->private;
@@ -2077,6 +2062,12 @@ static void del_fs_roots(struct btrfs_fs_info *fs_info)
                 for (i = 0; i < ret; i++)
                         btrfs_drop_and_free_fs_root(fs_info, gang[i]);
         }
+
+       if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state)) {
+               btrfs_free_log_root_tree(NULL, fs_info);
+               btrfs_destroy_pinned_extent(fs_info->tree_root,
+                                           fs_info->pinned_extents);
+       }
  }
  
  int open_ctree(struct super_block *sb,
@@ -2151,6 +2142,7 @@ int open_ctree(struct super_block *sb,
         mapping_set_gfp_mask(fs_info->btree_inode->i_mapping, GFP_NOFS);
  
         INIT_RADIX_TREE(&fs_info->fs_roots_radix, GFP_ATOMIC);
+       INIT_RADIX_TREE(&fs_info->buffer_radix, GFP_ATOMIC);
         INIT_LIST_HEAD(&fs_info->trans_list);
         INIT_LIST_HEAD(&fs_info->dead_roots);
         INIT_LIST_HEAD(&fs_info->delayed_iputs);
@@ -2164,6 +2156,7 @@ int open_ctree(struct super_block *sb,
         spin_lock_init(&fs_info->free_chunk_lock);
         spin_lock_init(&fs_info->tree_mod_seq_lock);
         spin_lock_init(&fs_info->super_lock);
+       spin_lock_init(&fs_info->buffer_lock);
         rwlock_init(&fs_info->tree_mod_log_lock);
         mutex_init(&fs_info->reloc_mutex);
         seqlock_init(&fs_info->profiles_lock);
@@ -2195,7 +2188,7 @@ int open_ctree(struct super_block *sb,
         fs_info->free_chunk_space = 0;
         fs_info->tree_mod_log = RB_ROOT;
         fs_info->commit_interval = BTRFS_DEFAULT_COMMIT_INTERVAL;
-
+       fs_info->avg_delayed_ref_runtime = div64_u64(NSEC_PER_SEC, 64);
         /* readahead state */
         INIT_RADIX_TREE(&fs_info->reada_tree, GFP_NOFS & ~__GFP_WAIT);
         spin_lock_init(&fs_info->reada_lock);
@@ -2334,7 +2327,7 @@ int open_ctree(struct super_block *sb,
          * Pass the whole disk block of size BTRFS_SUPER_INFO_SIZE (4k).
          */
         if (btrfs_check_super_csum(bh->b_data)) {
-               printk(KERN_ERR "btrfs: superblock checksum mismatch\n");
+               printk(KERN_ERR "BTRFS: superblock checksum mismatch\n");
                 err = -EINVAL;
                 goto fail_alloc;
         }
@@ -2353,7 +2346,7 @@ int open_ctree(struct super_block *sb,
  
         ret = btrfs_check_super_valid(fs_info, sb->s_flags & MS_RDONLY);
         if (ret) {
-               printk(KERN_ERR "btrfs: superblock contains fatal errors\n");
+               printk(KERN_ERR "BTRFS: superblock contains fatal errors\n");
                 err = -EINVAL;
                 goto fail_alloc;
         }
@@ -2418,7 +2411,7 @@ int open_ctree(struct super_block *sb,
                 features |= BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO;
  
         if (features & BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA)
-               printk(KERN_ERR "btrfs: has skinny extents\n");
+               printk(KERN_ERR "BTRFS: has skinny extents\n");
  
         /*
          * flag our filesystem as having big metadata blocks if
@@ -2426,7 +2419,7 @@ int open_ctree(struct super_block *sb,
          */
         if (btrfs_super_leafsize(disk_super) > PAGE_CACHE_SIZE) {
                 if (!(features & BTRFS_FEATURE_INCOMPAT_BIG_METADATA))
-                       printk(KERN_INFO "btrfs flagging fs with big metadata feature\n");
+                       printk(KERN_INFO "BTRFS: flagging fs with big metadata feature\n");
                 features |= BTRFS_FEATURE_INCOMPAT_BIG_METADATA;
         }
  
@@ -2443,7 +2436,7 @@ int open_ctree(struct super_block *sb,
          */
         if ((features & BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS) &&
             (sectorsize != leafsize)) {
-               printk(KERN_WARNING "btrfs: unequal leaf/node/sector sizes "
+               printk(KERN_WARNING "BTRFS: unequal leaf/node/sector sizes "
                                 "are not allowed for mixed block groups on %s\n",
                                 sb->s_id);
                 goto fail_alloc;
@@ -2580,12 +2573,12 @@ int open_ctree(struct super_block *sb,
         sb->s_blocksize_bits = blksize_bits(sectorsize);
  
         if (btrfs_super_magic(disk_super) != BTRFS_MAGIC) {
-               printk(KERN_INFO "btrfs: valid FS not found on %s\n", sb->s_id);
+               printk(KERN_INFO "BTRFS: valid FS not found on %s\n", sb->s_id);
                 goto fail_sb_buffer;
         }
  
         if (sectorsize != PAGE_SIZE) {
-               printk(KERN_WARNING "btrfs: Incompatible sector size(%lu) "
+               printk(KERN_WARNING "BTRFS: Incompatible sector size(%lu) "
                        "found on %s\n", (unsigned long)sectorsize, sb->s_id);
                 goto fail_sb_buffer;
         }
@@ -2594,7 +2587,7 @@ int open_ctree(struct super_block *sb,
         ret = btrfs_read_sys_array(tree_root);
         mutex_unlock(&fs_info->chunk_mutex);
         if (ret) {
-               printk(KERN_WARNING "btrfs: failed to read the system "
+               printk(KERN_WARNING "BTRFS: failed to read the system "
                        "array on %s\n", sb->s_id);
                 goto fail_sb_buffer;
         }
@@ -2611,7 +2604,7 @@ int open_ctree(struct super_block *sb,
                                            blocksize, generation);
         if (!chunk_root->node ||
             !test_bit(EXTENT_BUFFER_UPTODATE, &chunk_root->node->bflags)) {
-               printk(KERN_WARNING "btrfs: failed to read chunk root on %s\n",
+               printk(KERN_WARNING "BTRFS: failed to read chunk root on %s\n",
                        sb->s_id);
                 goto fail_tree_roots;
         }
@@ -2623,7 +2616,7 @@ int open_ctree(struct super_block *sb,
  
         ret = btrfs_read_chunk_tree(chunk_root);
         if (ret) {
-               printk(KERN_WARNING "btrfs: failed to read chunk tree on %s\n",
+               printk(KERN_WARNING "BTRFS: failed to read chunk tree on %s\n",
                        sb->s_id);
                 goto fail_tree_roots;
         }
@@ -2635,7 +2628,7 @@ int open_ctree(struct super_block *sb,
         btrfs_close_extra_devices(fs_info, fs_devices, 0);
  
         if (!fs_devices->latest_bdev) {
-               printk(KERN_CRIT "btrfs: failed to read devices on %s\n",
+               printk(KERN_CRIT "BTRFS: failed to read devices on %s\n",
                        sb->s_id);
                 goto fail_tree_roots;
         }
@@ -2650,7 +2643,7 @@ retry_root_backup:
                                           blocksize, generation);
         if (!tree_root->node ||
             !test_bit(EXTENT_BUFFER_UPTODATE, &tree_root->node->bflags)) {
-               printk(KERN_WARNING "btrfs: failed to read tree root on %s\n",
+               printk(KERN_WARNING "BTRFS: failed to read tree root on %s\n",
                        sb->s_id);
  
                 goto recovery_tree_root;
@@ -2721,50 +2714,56 @@ retry_root_backup:
  
         ret = btrfs_recover_balance(fs_info);
         if (ret) {
-               printk(KERN_WARNING "btrfs: failed to recover balance\n");
+               printk(KERN_WARNING "BTRFS: failed to recover balance\n");
                 goto fail_block_groups;
         }
  
         ret = btrfs_init_dev_stats(fs_info);
         if (ret) {
-               printk(KERN_ERR "btrfs: failed to init dev_stats: %d\n",
+               printk(KERN_ERR "BTRFS: failed to init dev_stats: %d\n",
                        ret);
                 goto fail_block_groups;
         }
  
         ret = btrfs_init_dev_replace(fs_info);
         if (ret) {
-               pr_err("btrfs: failed to init dev_replace: %d\n", ret);
+               pr_err("BTRFS: failed to init dev_replace: %d\n", ret);
                 goto fail_block_groups;
         }
  
         btrfs_close_extra_devices(fs_info, fs_devices, 1);
  
-       ret = btrfs_init_space_info(fs_info);
+       ret = btrfs_sysfs_add_one(fs_info);
         if (ret) {
-               printk(KERN_ERR "Failed to initial space info: %d\n", ret);
+               pr_err("BTRFS: failed to init sysfs interface: %d\n", ret);
                 goto fail_block_groups;
         }
  
+       ret = btrfs_init_space_info(fs_info);
+       if (ret) {
+               printk(KERN_ERR "BTRFS: Failed to initial space info: %d\n", ret);
+               goto fail_sysfs;
+       }
+
         ret = btrfs_read_block_groups(extent_root);
         if (ret) {
-               printk(KERN_ERR "Failed to read block groups: %d\n", ret);
-               goto fail_block_groups;
+               printk(KERN_ERR "BTRFS: Failed to read block groups: %d\n", ret);
+               goto fail_sysfs;
         }
         fs_info->num_tolerated_disk_barrier_failures =
                 btrfs_calc_num_tolerated_disk_barrier_failures(fs_info);
         if (fs_info->fs_devices->missing_devices >
              fs_info->num_tolerated_disk_barrier_failures &&
             !(sb->s_flags & MS_RDONLY)) {
-               printk(KERN_WARNING
-                      "Btrfs: too many missing devices, writeable mount is not allowed\n");
-               goto fail_block_groups;
+               printk(KERN_WARNING "BTRFS: "
+                       "too many missing devices, writeable mount is not allowed\n");
+               goto fail_sysfs;
         }
  
         fs_info->cleaner_kthread = kthread_run(cleaner_kthread, tree_root,
                                                "btrfs-cleaner");
         if (IS_ERR(fs_info->cleaner_kthread))
-               goto fail_block_groups;
+               goto fail_sysfs;
  
         fs_info->transaction_kthread = kthread_run(transaction_kthread,
                                                    tree_root,
@@ -2775,11 +2774,15 @@ retry_root_backup:
         if (!btrfs_test_opt(tree_root, SSD) &&
             !btrfs_test_opt(tree_root, NOSSD) &&
             !fs_info->fs_devices->rotating) {
-               printk(KERN_INFO "Btrfs detected SSD devices, enabling SSD "
+               printk(KERN_INFO "BTRFS: detected SSD devices, enabling SSD "
                        "mode\n");
                 btrfs_set_opt(fs_info->mount_opt, SSD);
         }
  
+       /* Set the real inode map cache flag */
+       if (btrfs_test_opt(tree_root, CHANGE_INODE_CACHE))
+               btrfs_set_opt(tree_root->fs_info->mount_opt, INODE_MAP_CACHE);
+
  #ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY
         if (btrfs_test_opt(tree_root, CHECK_INTEGRITY)) {
                 ret = btrfsic_mount(tree_root, fs_devices,
@@ -2788,7 +2791,7 @@ retry_root_backup:
                                     1 : 0,
                                     fs_info->check_integrity_print_mask);
                 if (ret)
-                       printk(KERN_WARNING "btrfs: failed to initialize"
+                       printk(KERN_WARNING "BTRFS: failed to initialize"
                                " integrity check module %s\n", sb->s_id);
         }
  #endif
@@ -2801,7 +2804,7 @@ retry_root_backup:
                 u64 bytenr = btrfs_super_log_root(disk_super);
  
                 if (fs_devices->rw_devices == 0) {
-                       printk(KERN_WARNING "Btrfs log replay required "
+                       printk(KERN_WARNING "BTRFS: log replay required "
                                "on RO media\n");
                         err = -EIO;
                         goto fail_qgroup;
@@ -2824,7 +2827,7 @@ retry_root_backup:
                                                       generation + 1);
                 if (!log_tree_root->node ||
                     !extent_buffer_uptodate(log_tree_root->node)) {
-                       printk(KERN_ERR "btrfs: failed to read log tree\n");
+                       printk(KERN_ERR "BTRFS: failed to read log tree\n");
                         free_extent_buffer(log_tree_root->node);
                         kfree(log_tree_root);
                         goto fail_trans_kthread;
@@ -2858,7 +2861,7 @@ retry_root_backup:
                 ret = btrfs_recover_relocation(tree_root);
                 if (ret < 0) {
                         printk(KERN_WARNING
-                              "btrfs: failed to recover relocation\n");
+                              "BTRFS: failed to recover relocation\n");
                         err = -EINVAL;
                         goto fail_qgroup;
                 }
@@ -2888,14 +2891,14 @@ retry_root_backup:
  
         ret = btrfs_resume_balance_async(fs_info);
         if (ret) {
-               printk(KERN_WARNING "btrfs: failed to resume balance\n");
+               printk(KERN_WARNING "BTRFS: failed to resume balance\n");
                 close_ctree(tree_root);
                 return ret;
         }
  
         ret = btrfs_resume_dev_replace_async(fs_info);
         if (ret) {
-               pr_warn("btrfs: failed to resume dev_replace\n");
+               pr_warn("BTRFS: failed to resume dev_replace\n");
                 close_ctree(tree_root);
                 return ret;
         }
@@ -2903,20 +2906,20 @@ retry_root_backup:
         btrfs_qgroup_rescan_resume(fs_info);
  
         if (create_uuid_tree) {
-               pr_info("btrfs: creating UUID tree\n");
+               pr_info("BTRFS: creating UUID tree\n");
                 ret = btrfs_create_uuid_tree(fs_info);
                 if (ret) {
-                       pr_warn("btrfs: failed to create the UUID tree %d\n",
+                       pr_warn("BTRFS: failed to create the UUID tree %d\n",
                                 ret);
                         close_ctree(tree_root);
                         return ret;
                 }
         } else if (check_uuid_tree ||
                    btrfs_test_opt(tree_root, RESCAN_UUID_TREE)) {
-               pr_info("btrfs: checking UUID tree\n");
+               pr_info("BTRFS: checking UUID tree\n");
                 ret = btrfs_check_uuid_tree(fs_info);
                 if (ret) {
-                       pr_warn("btrfs: failed to check the UUID tree %d\n",
+                       pr_warn("BTRFS: failed to check the UUID tree %d\n",
                                 ret);
                         close_ctree(tree_root);
                         return ret;
@@ -2942,6 +2945,9 @@ fail_cleaner:
          */
         filemap_write_and_wait(fs_info->btree_inode->i_mapping);
  
+fail_sysfs:
+       btrfs_sysfs_remove_one(fs_info);
+
  fail_block_groups:
         btrfs_put_block_group_cache(fs_info);
         btrfs_free_block_groups(fs_info);
@@ -2997,7 +3003,7 @@ static void btrfs_end_buffer_write_sync(struct buffer_head *bh, int uptodate)
                 struct btrfs_device *device = (struct btrfs_device *)
                         bh->b_private;
  
-               printk_ratelimited_in_rcu(KERN_WARNING "lost page write due to "
+               printk_ratelimited_in_rcu(KERN_WARNING "BTRFS: lost page write due to "
                                           "I/O error on %s\n",
                                           rcu_str_deref(device->name));
                 /* note, we dont' set_buffer_write_io_error because we have
@@ -3116,7 +3122,7 @@ static int write_dev_supers(struct btrfs_device *device,
                         bh = __getblk(device->bdev, bytenr / 4096,
                                       BTRFS_SUPER_INFO_SIZE);
                         if (!bh) {
-                               printk(KERN_ERR "btrfs: couldn't get super "
+                               printk(KERN_ERR "BTRFS: couldn't get super "
                                        "buffer head for bytenr %Lu\n", bytenr);
                                 errors++;
                                 continue;
@@ -3137,7 +3143,10 @@ static int write_dev_supers(struct btrfs_device *device,
                  * we fua the first super.  The others we allow
                  * to go down lazy.
                  */
-               ret = btrfsic_submit_bh(WRITE_FUA, bh);
+               if (i == 0)
+                       ret = btrfsic_submit_bh(WRITE_FUA, bh);
+               else
+                       ret = btrfsic_submit_bh(WRITE_SYNC, bh);
                 if (ret)
                         errors++;
         }
@@ -3183,7 +3192,7 @@ static int write_dev_flush(struct btrfs_device *device, int wait)
                 wait_for_completion(&device->flush_wait);
  
                 if (bio_flagged(bio, BIO_EOPNOTSUPP)) {
-                       printk_in_rcu("btrfs: disabling barriers on dev %s\n",
+                       printk_in_rcu("BTRFS: disabling barriers on dev %s\n",
                                       rcu_str_deref(device->name));
                         device->nobarriers = 1;
                 } else if (!bio_flagged(bio, BIO_UPTODATE)) {
@@ -3404,7 +3413,7 @@ static int write_all_supers(struct btrfs_root *root, int max_mirrors)
                         total_errors++;
         }
         if (total_errors > max_errors) {
-               printk(KERN_ERR "btrfs: %d errors while writing supers\n",
+               btrfs_err(root->fs_info, "%d errors while writing supers",
                        total_errors);
                 mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
  
@@ -3452,10 +3461,8 @@ void btrfs_drop_and_free_fs_root(struct btrfs_fs_info *fs_info,
         if (btrfs_root_refs(&root->root_item) == 0)
                 synchronize_srcu(&fs_info->subvol_srcu);
  
-       if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state)) {
+       if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state))
                 btrfs_free_log(NULL, root);
-               btrfs_free_log_root_tree(NULL, fs_info);
-       }
  
         __btrfs_remove_free_space_cache(root->free_ino_pinned);
         __btrfs_remove_free_space_cache(root->free_ino_ctl);
@@ -3560,14 +3567,12 @@ int close_ctree(struct btrfs_root *root)
         if (!(fs_info->sb->s_flags & MS_RDONLY)) {
                 ret = btrfs_commit_super(root);
                 if (ret)
-                       printk(KERN_ERR "btrfs: commit super ret %d\n", ret);
+                       btrfs_err(root->fs_info, "commit super ret %d", ret);
         }
  
         if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state))
                 btrfs_error_commit_super(root);
  
-       btrfs_put_block_group_cache(fs_info);
-
         kthread_stop(fs_info->transaction_kthread);
         kthread_stop(fs_info->cleaner_kthread);
  
@@ -3577,12 +3582,16 @@ int close_ctree(struct btrfs_root *root)
         btrfs_free_qgroup_config(root->fs_info);
  
         if (percpu_counter_sum(&fs_info->delalloc_bytes)) {
-               printk(KERN_INFO "btrfs: at unmount delalloc count %lld\n",
+               btrfs_info(root->fs_info, "at unmount delalloc count %lld",
                        percpu_counter_sum(&fs_info->delalloc_bytes));
         }
  
+       btrfs_sysfs_remove_one(fs_info);
+
         del_fs_roots(fs_info);
  
+       btrfs_put_block_group_cache(fs_info);
+
         btrfs_free_block_groups(fs_info);
  
         btrfs_stop_all_workers(fs_info);
@@ -3800,55 +3809,55 @@ static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans,
         delayed_refs = &trans->delayed_refs;
  
         spin_lock(&delayed_refs->lock);
-       if (delayed_refs->num_entries == 0) {
+       if (atomic_read(&delayed_refs->num_entries) == 0) {
                 spin_unlock(&delayed_refs->lock);
-               printk(KERN_INFO "delayed_refs has NO entry\n");
+               btrfs_info(root->fs_info, "delayed_refs has NO entry");
                 return ret;
         }
  
-       while ((node = rb_first(&delayed_refs->root)) != NULL) {
-               struct btrfs_delayed_ref_head *head = NULL;
+       while ((node = rb_first(&delayed_refs->href_root)) != NULL) {
+               struct btrfs_delayed_ref_head *head;
                 bool pin_bytes = false;
  
-               ref = rb_entry(node, struct btrfs_delayed_ref_node, rb_node);
-               atomic_set(&ref->refs, 1);
-               if (btrfs_delayed_ref_is_head(ref)) {
-
-                       head = btrfs_delayed_node_to_head(ref);
-                       if (!mutex_trylock(&head->mutex)) {
-                               atomic_inc(&ref->refs);
-                               spin_unlock(&delayed_refs->lock);
-
-                               /* Need to wait for the delayed ref to run */
-                               mutex_lock(&head->mutex);
-                               mutex_unlock(&head->mutex);
-                               btrfs_put_delayed_ref(ref);
-
-                               spin_lock(&delayed_refs->lock);
-                               continue;
-                       }
-
-                       if (head->must_insert_reserved)
-                               pin_bytes = true;
-                       btrfs_free_delayed_extent_op(head->extent_op);
-                       delayed_refs->num_heads--;
-                       if (list_empty(&head->cluster))
-                               delayed_refs->num_heads_ready--;
-                       list_del_init(&head->cluster);
-               }
+               head = rb_entry(node, struct btrfs_delayed_ref_head,
+                               href_node);
+               if (!mutex_trylock(&head->mutex)) {
+                       atomic_inc(&head->node.refs);
+                       spin_unlock(&delayed_refs->lock);
  
-               ref->in_tree = 0;
-               rb_erase(&ref->rb_node, &delayed_refs->root);
-               delayed_refs->num_entries--;
-               spin_unlock(&delayed_refs->lock);
-               if (head) {
-                       if (pin_bytes)
-                               btrfs_pin_extent(root, ref->bytenr,
-                                                ref->num_bytes, 1);
+                       mutex_lock(&head->mutex);
                         mutex_unlock(&head->mutex);
+                       btrfs_put_delayed_ref(&head->node);
+                       spin_lock(&delayed_refs->lock);
+                       continue;
+               }
+               spin_lock(&head->lock);
+               while ((node = rb_first(&head->ref_root)) != NULL) {
+                       ref = rb_entry(node, struct btrfs_delayed_ref_node,
+                                      rb_node);
+                       ref->in_tree = 0;
+                       rb_erase(&ref->rb_node, &head->ref_root);
+                       atomic_dec(&delayed_refs->num_entries);
+                       btrfs_put_delayed_ref(ref);
+                       cond_resched_lock(&head->lock);
                 }
-               btrfs_put_delayed_ref(ref);
+               if (head->must_insert_reserved)
+                       pin_bytes = true;
+               btrfs_free_delayed_extent_op(head->extent_op);
+               delayed_refs->num_heads--;
+               if (head->processing == 0)
+                       delayed_refs->num_heads_ready--;
+               atomic_dec(&delayed_refs->num_entries);
+               head->node.in_tree = 0;
+               rb_erase(&head->href_node, &delayed_refs->href_root);
+               spin_unlock(&head->lock);
+               spin_unlock(&delayed_refs->lock);
+               mutex_unlock(&head->mutex);
  
+               if (pin_bytes)
+                       btrfs_pin_extent(root, head->node.bytenr,
+                                        head->node.num_bytes, 1);
+               btrfs_put_delayed_ref(&head->node);
                 cond_resched();
                 spin_lock(&delayed_refs->lock);
         }
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c

index 9c01509dd8abfb0fddc5480b7f4cb3b73002aad4..9c9ecc93ae2c3152d85cb2e15eeabd7bec95160f 100644 (file)
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -35,6 +35,7 @@
  #include "locking.h"
  #include "free-space-cache.h"
  #include "math.h"
+#include "sysfs.h"
  
  #undef SCRAMBLE_DELAYED_REFS
  
@@ -441,7 +442,8 @@ next:
                         if (ret)
                                 break;
  
-                       if (need_resched()) {
+                       if (need_resched() ||
+                           rwsem_is_contended(&fs_info->extent_commit_sem)) {
                                 caching_ctl->progress = last;
                                 btrfs_release_path(path);
                                 up_read(&fs_info->extent_commit_sem);
@@ -855,12 +857,14 @@ again:
                         btrfs_put_delayed_ref(&head->node);
                         goto search_again;
                 }
+               spin_lock(&head->lock);
                 if (head->extent_op && head->extent_op->update_flags)
                         extent_flags |= head->extent_op->flags_to_set;
                 else
                         BUG_ON(num_refs == 0);
  
                 num_refs += head->node.ref_mod;
+               spin_unlock(&head->lock);
                 mutex_unlock(&head->mutex);
         }
         spin_unlock(&delayed_refs->lock);
@@ -1070,11 +1074,11 @@ static u64 hash_extent_data_ref(u64 root_objectid, u64 owner, u64 offset)
         __le64 lenum;
  
         lenum = cpu_to_le64(root_objectid);
-       high_crc = crc32c(high_crc, &lenum, sizeof(lenum));
+       high_crc = btrfs_crc32c(high_crc, &lenum, sizeof(lenum));
         lenum = cpu_to_le64(owner);
-       low_crc = crc32c(low_crc, &lenum, sizeof(lenum));
+       low_crc = btrfs_crc32c(low_crc, &lenum, sizeof(lenum));
         lenum = cpu_to_le64(offset);
-       low_crc = crc32c(low_crc, &lenum, sizeof(lenum));
+       low_crc = btrfs_crc32c(low_crc, &lenum, sizeof(lenum));
  
         return ((u64)high_crc << 31) ^ (u64)low_crc;
  }
@@ -2285,64 +2289,62 @@ static noinline struct btrfs_delayed_ref_node *
  select_delayed_ref(struct btrfs_delayed_ref_head *head)
  {
         struct rb_node *node;
-       struct btrfs_delayed_ref_node *ref;
-       int action = BTRFS_ADD_DELAYED_REF;
-again:
+       struct btrfs_delayed_ref_node *ref, *last = NULL;;
+
         /*
          * select delayed ref of type BTRFS_ADD_DELAYED_REF first.
          * this prevents ref count from going down to zero when
          * there still are pending delayed ref.
          */
-       node = rb_prev(&head->node.rb_node);
-       while (1) {
-               if (!node)
-                       break;
+       node = rb_first(&head->ref_root);
+       while (node) {
                 ref = rb_entry(node, struct btrfs_delayed_ref_node,
                                 rb_node);
-               if (ref->bytenr != head->node.bytenr)
-                       break;
-               if (ref->action == action)
+               if (ref->action == BTRFS_ADD_DELAYED_REF)
                         return ref;
-               node = rb_prev(node);
-       }
-       if (action == BTRFS_ADD_DELAYED_REF) {
-               action = BTRFS_DROP_DELAYED_REF;
-               goto again;
+               else if (last == NULL)
+                       last = ref;
+               node = rb_next(node);
         }
-       return NULL;
+       return last;
  }
  
  /*
   * Returns 0 on success or if called with an already aborted transaction.
   * Returns -ENOMEM or -EIO on failure and will abort the transaction.
   */
-static noinline int run_clustered_refs(struct btrfs_trans_handle *trans,
-                                      struct btrfs_root *root,
-                                      struct list_head *cluster)
+static noinline int __btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
+                                            struct btrfs_root *root,
+                                            unsigned long nr)
  {
         struct btrfs_delayed_ref_root *delayed_refs;
         struct btrfs_delayed_ref_node *ref;
         struct btrfs_delayed_ref_head *locked_ref = NULL;
         struct btrfs_delayed_extent_op *extent_op;
         struct btrfs_fs_info *fs_info = root->fs_info;
+       ktime_t start = ktime_get();
         int ret;
-       int count = 0;
+       unsigned long count = 0;
+       unsigned long actual_count = 0;
         int must_insert_reserved = 0;
  
         delayed_refs = &trans->transaction->delayed_refs;
         while (1) {
                 if (!locked_ref) {
-                       /* pick a new head ref from the cluster list */
-                       if (list_empty(cluster))
+                       if (count >= nr)
                                 break;
  
-                       locked_ref = list_entry(cluster->next,
-                                    struct btrfs_delayed_ref_head, cluster);
+                       spin_lock(&delayed_refs->lock);
+                       locked_ref = btrfs_select_ref_head(trans);
+                       if (!locked_ref) {
+                               spin_unlock(&delayed_refs->lock);
+                               break;
+                       }
  
                         /* grab the lock that says we are going to process
                          * all the refs for this head */
                         ret = btrfs_delayed_ref_lock(trans, locked_ref);
-
+                       spin_unlock(&delayed_refs->lock);
                         /*
                          * we may have dropped the spin lock to get the head
                          * mutex lock, and that might have given someone else
@@ -2363,6 +2365,7 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans,
                  * finish.  If we merged anything we need to re-loop so we can
                  * get a good ref.
                  */
+               spin_lock(&locked_ref->lock);
                 btrfs_merge_delayed_refs(trans, fs_info, delayed_refs,
                                          locked_ref);
  
@@ -2374,17 +2377,14 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans,
  
                 if (ref && ref->seq &&
                     btrfs_check_delayed_seq(fs_info, delayed_refs, ref->seq)) {
-                       /*
-                        * there are still refs with lower seq numbers in the
-                        * process of being added. Don't run this ref yet.
-                        */
-                       list_del_init(&locked_ref->cluster);
+                       spin_unlock(&locked_ref->lock);
                         btrfs_delayed_ref_unlock(locked_ref);
-                       locked_ref = NULL;
+                       spin_lock(&delayed_refs->lock);
+                       locked_ref->processing = 0;
                         delayed_refs->num_heads_ready++;
                         spin_unlock(&delayed_refs->lock);
+                       locked_ref = NULL;
                         cond_resched();
-                       spin_lock(&delayed_refs->lock);
                         continue;
                 }
  
@@ -2399,6 +2399,8 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans,
                 locked_ref->extent_op = NULL;
  
                 if (!ref) {
+
+
                         /* All delayed refs have been processed, Go ahead
                          * and send the head node to run_one_delayed_ref,
                          * so that any accounting fixes can happen
@@ -2411,8 +2413,7 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans,
                         }
  
                         if (extent_op) {
-                               spin_unlock(&delayed_refs->lock);
-
+                               spin_unlock(&locked_ref->lock);
                                 ret = run_delayed_extent_op(trans, root,
                                                             ref, extent_op);
                                 btrfs_free_delayed_extent_op(extent_op);
@@ -2426,19 +2427,39 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans,
                                          */
                                         if (must_insert_reserved)
                                                 locked_ref->must_insert_reserved = 1;
+                                       locked_ref->processing = 0;
                                         btrfs_debug(fs_info, "run_delayed_extent_op returned %d", ret);
-                                       spin_lock(&delayed_refs->lock);
                                         btrfs_delayed_ref_unlock(locked_ref);
                                         return ret;
                                 }
+                               continue;
+                       }
  
-                               goto next;
+                       /*
+                        * Need to drop our head ref lock and re-aqcuire the
+                        * delayed ref lock and then re-check to make sure
+                        * nobody got added.
+                        */
+                       spin_unlock(&locked_ref->lock);
+                       spin_lock(&delayed_refs->lock);
+                       spin_lock(&locked_ref->lock);
+                       if (rb_first(&locked_ref->ref_root)) {
+                               spin_unlock(&locked_ref->lock);
+                               spin_unlock(&delayed_refs->lock);
+                               continue;
                         }
+                       ref->in_tree = 0;
+                       delayed_refs->num_heads--;
+                       rb_erase(&locked_ref->href_node,
+                                &delayed_refs->href_root);
+                       spin_unlock(&delayed_refs->lock);
+               } else {
+                       actual_count++;
+                       ref->in_tree = 0;
+                       rb_erase(&ref->rb_node, &locked_ref->ref_root);
                 }
+               atomic_dec(&delayed_refs->num_entries);
  
-               ref->in_tree = 0;
-               rb_erase(&ref->rb_node, &delayed_refs->root);
-               delayed_refs->num_entries--;
                 if (!btrfs_delayed_ref_is_head(ref)) {
                         /*
                          * when we play the delayed ref, also correct the
@@ -2455,20 +2476,18 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans,
                         default:
                                 WARN_ON(1);
                         }
-               } else {
-                       list_del_init(&locked_ref->cluster);
                 }
-               spin_unlock(&delayed_refs->lock);
+               spin_unlock(&locked_ref->lock);
  
                 ret = run_one_delayed_ref(trans, root, ref, extent_op,
                                           must_insert_reserved);
  
                 btrfs_free_delayed_extent_op(extent_op);
                 if (ret) {
+                       locked_ref->processing = 0;
                         btrfs_delayed_ref_unlock(locked_ref);
                         btrfs_put_delayed_ref(ref);
                         btrfs_debug(fs_info, "run_one_delayed_ref returned %d", ret);
-                       spin_lock(&delayed_refs->lock);
                         return ret;
                 }
  
@@ -2484,11 +2503,29 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans,
                 }
                 btrfs_put_delayed_ref(ref);
                 count++;
-next:
                 cond_resched();
+       }
+
+       /*
+        * We don't want to include ref heads since we can have empty ref heads
+        * and those will drastically skew our runtime down since we just do
+        * accounting, no actual extent tree updates.
+        */
+       if (actual_count > 0) {
+               u64 runtime = ktime_to_ns(ktime_sub(ktime_get(), start));
+               u64 avg;
+
+               /*
+                * We weigh the current average higher than our current runtime
+                * to avoid large swings in the average.
+                */
                 spin_lock(&delayed_refs->lock);
+               avg = fs_info->avg_delayed_ref_runtime * 3 + runtime;
+               avg = div64_u64(avg, 4);
+               fs_info->avg_delayed_ref_runtime = avg;
+               spin_unlock(&delayed_refs->lock);
         }
-       return count;
+       return 0;
  }
  
  #ifdef SCRAMBLE_DELAYED_REFS
@@ -2570,16 +2607,6 @@ int btrfs_delayed_refs_qgroup_accounting(struct btrfs_trans_handle *trans,
         return ret;
  }
  
-static int refs_newer(struct btrfs_delayed_ref_root *delayed_refs, int seq,
-                     int count)
-{
-       int val = atomic_read(&delayed_refs->ref_seq);
-
-       if (val < seq || val >= seq + count)
-               return 1;
-       return 0;
-}
-
  static inline u64 heads_to_leaves(struct btrfs_root *root, u64 heads)
  {
         u64 num_bytes;
@@ -2596,7 +2623,7 @@ static inline u64 heads_to_leaves(struct btrfs_root *root, u64 heads)
         return div64_u64(num_bytes, BTRFS_LEAF_DATA_SIZE(root));
  }
  
-int btrfs_should_throttle_delayed_refs(struct btrfs_trans_handle *trans,
+int btrfs_check_space_for_delayed_refs(struct btrfs_trans_handle *trans,
                                        struct btrfs_root *root)
  {
         struct btrfs_block_rsv *global_rsv;
@@ -2625,6 +2652,22 @@ int btrfs_should_throttle_delayed_refs(struct btrfs_trans_handle *trans,
         return ret;
  }
  
+int btrfs_should_throttle_delayed_refs(struct btrfs_trans_handle *trans,
+                                      struct btrfs_root *root)
+{
+       struct btrfs_fs_info *fs_info = root->fs_info;
+       u64 num_entries =
+               atomic_read(&trans->transaction->delayed_refs.num_entries);
+       u64 avg_runtime;
+
+       smp_mb();
+       avg_runtime = fs_info->avg_delayed_ref_runtime;
+       if (num_entries * avg_runtime >= NSEC_PER_SEC)
+               return 1;
+
+       return btrfs_check_space_for_delayed_refs(trans, root);
+}
+
  /*
   * this starts processing the delayed reference count updates and
   * extent insertions we have queued up so far.  count can be
@@ -2640,13 +2683,10 @@ int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
  {
         struct rb_node *node;
         struct btrfs_delayed_ref_root *delayed_refs;
-       struct btrfs_delayed_ref_node *ref;
-       struct list_head cluster;
+       struct btrfs_delayed_ref_head *head;
         int ret;
-       u64 delayed_start;
         int run_all = count == (unsigned long)-1;
         int run_most = 0;
-       int loops;
  
         /* We'll clean this up in btrfs_cleanup_transaction */
         if (trans->aborted)
@@ -2658,130 +2698,40 @@ int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
         btrfs_delayed_refs_qgroup_accounting(trans, root->fs_info);
  
         delayed_refs = &trans->transaction->delayed_refs;
-       INIT_LIST_HEAD(&cluster);
         if (count == 0) {
-               count = delayed_refs->num_entries * 2;
+               count = atomic_read(&delayed_refs->num_entries) * 2;
                 run_most = 1;
         }
  
-       if (!run_all && !run_most) {
-               int old;
-               int seq = atomic_read(&delayed_refs->ref_seq);
-
-progress:
-               old = atomic_cmpxchg(&delayed_refs->procs_running_refs, 0, 1);
-               if (old) {
-                       DEFINE_WAIT(__wait);
-                       if (delayed_refs->flushing ||
-                           !btrfs_should_throttle_delayed_refs(trans, root))
-                               return 0;
-
-                       prepare_to_wait(&delayed_refs->wait, &__wait,
-                                       TASK_UNINTERRUPTIBLE);
-
-                       old = atomic_cmpxchg(&delayed_refs->procs_running_refs, 0, 1);
-                       if (old) {
-                               schedule();
-                               finish_wait(&delayed_refs->wait, &__wait);
-
-                               if (!refs_newer(delayed_refs, seq, 256))
-                                       goto progress;
-                               else
-                                       return 0;
-                       } else {
-                               finish_wait(&delayed_refs->wait, &__wait);
-                               goto again;
-                       }
-               }
-
-       } else {
-               atomic_inc(&delayed_refs->procs_running_refs);
-       }
-
  again:
-       loops = 0;
-       spin_lock(&delayed_refs->lock);
-
  #ifdef SCRAMBLE_DELAYED_REFS
         delayed_refs->run_delayed_start = find_middle(&delayed_refs->root);
  #endif
-
-       while (1) {
-               if (!(run_all || run_most) &&
-                   !btrfs_should_throttle_delayed_refs(trans, root))
-                       break;
-
-               /*
-                * go find something we can process in the rbtree.  We start at
-                * the beginning of the tree, and then build a cluster
-                * of refs to process starting at the first one we are able to
-                * lock
-                */
-               delayed_start = delayed_refs->run_delayed_start;
-               ret = btrfs_find_ref_cluster(trans, &cluster,
-                                            delayed_refs->run_delayed_start);
-               if (ret)
-                       break;
-
-               ret = run_clustered_refs(trans, root, &cluster);
-               if (ret < 0) {
-                       btrfs_release_ref_cluster(&cluster);
-                       spin_unlock(&delayed_refs->lock);
-                       btrfs_abort_transaction(trans, root, ret);
-                       atomic_dec(&delayed_refs->procs_running_refs);
-                       wake_up(&delayed_refs->wait);
-                       return ret;
-               }
-
-               atomic_add(ret, &delayed_refs->ref_seq);
-
-               count -= min_t(unsigned long, ret, count);
-
-               if (count == 0)
-                       break;
-
-               if (delayed_start >= delayed_refs->run_delayed_start) {
-                       if (loops == 0) {
-                               /*
-                                * btrfs_find_ref_cluster looped. let's do one
-                                * more cycle. if we don't run any delayed ref
-                                * during that cycle (because we can't because
-                                * all of them are blocked), bail out.
-                                */
-                               loops = 1;
-                       } else {
-                               /*
-                                * no runnable refs left, stop trying
-                                */
-                               BUG_ON(run_all);
-                               break;
-                       }
-               }
-               if (ret) {
-                       /* refs were run, let's reset staleness detection */
-                       loops = 0;
-               }
+       ret = __btrfs_run_delayed_refs(trans, root, count);
+       if (ret < 0) {
+               btrfs_abort_transaction(trans, root, ret);
+               return ret;
         }
  
         if (run_all) {
-               if (!list_empty(&trans->new_bgs)) {
-                       spin_unlock(&delayed_refs->lock);
+               if (!list_empty(&trans->new_bgs))
                         btrfs_create_pending_block_groups(trans, root);
-                       spin_lock(&delayed_refs->lock);
-               }
  
-               node = rb_first(&delayed_refs->root);
-               if (!node)
+               spin_lock(&delayed_refs->lock);
+               node = rb_first(&delayed_refs->href_root);
+               if (!node) {
+                       spin_unlock(&delayed_refs->lock);
                         goto out;
+               }
                 count = (unsigned long)-1;
  
                 while (node) {
-                       ref = rb_entry(node, struct btrfs_delayed_ref_node,
-                                      rb_node);
-                       if (btrfs_delayed_ref_is_head(ref)) {
-                               struct btrfs_delayed_ref_head *head;
+                       head = rb_entry(node, struct btrfs_delayed_ref_head,
+                                       href_node);
+                       if (btrfs_delayed_ref_is_head(&head->node)) {
+                               struct btrfs_delayed_ref_node *ref;
  
-                               head = btrfs_delayed_node_to_head(ref);
+                               ref = &head->node;
                                 atomic_inc(&ref->refs);
  
                                 spin_unlock(&delayed_refs->lock);
@@ -2795,20 +2745,16 @@ again:
                                 btrfs_put_delayed_ref(ref);
                                 cond_resched();
                                 goto again;
+                       } else {
+                               WARN_ON(1);
                         }
                         node = rb_next(node);
                 }
                 spin_unlock(&delayed_refs->lock);
-               schedule_timeout(1);
+               cond_resched();
                 goto again;
         }
  out:
-       atomic_dec(&delayed_refs->procs_running_refs);
-       smp_mb();
-       if (waitqueue_active(&delayed_refs->wait))
-               wake_up(&delayed_refs->wait);
-
-       spin_unlock(&delayed_refs->lock);
         assert_qgroups_uptodate(trans);
         return 0;
  }
@@ -2850,12 +2796,13 @@ static noinline int check_delayed_ref(struct btrfs_trans_handle *trans,
         struct rb_node *node;
         int ret = 0;
  
-       ret = -ENOENT;
         delayed_refs = &trans->transaction->delayed_refs;
         spin_lock(&delayed_refs->lock);
         head = btrfs_find_delayed_ref_head(trans, bytenr);
-       if (!head)
-               goto out;
+       if (!head) {
+               spin_unlock(&delayed_refs->lock);
+               return 0;
+       }
  
         if (!mutex_trylock(&head->mutex)) {
                 atomic_inc(&head->node.refs);
@@ -2872,40 +2819,35 @@ static noinline int check_delayed_ref(struct btrfs_trans_handle *trans,
                 btrfs_put_delayed_ref(&head->node);
                 return -EAGAIN;
         }
+       spin_unlock(&delayed_refs->lock);
  
-       node = rb_prev(&head->node.rb_node);
-       if (!node)
-               goto out_unlock;
-
-       ref = rb_entry(node, struct btrfs_delayed_ref_node, rb_node);
-
-       if (ref->bytenr != bytenr)
-               goto out_unlock;
-
-       ret = 1;
-       if (ref->type != BTRFS_EXTENT_DATA_REF_KEY)
-               goto out_unlock;
+       spin_lock(&head->lock);
+       node = rb_first(&head->ref_root);
+       while (node) {
+               ref = rb_entry(node, struct btrfs_delayed_ref_node, rb_node);
+               node = rb_next(node);
  
-       data_ref = btrfs_delayed_node_to_data_ref(ref);
+               /* If it's a shared ref we know a cross reference exists */
+               if (ref->type != BTRFS_EXTENT_DATA_REF_KEY) {
+                       ret = 1;
+                       break;
+               }
  
-       node = rb_prev(node);
-       if (node) {
-               int seq = ref->seq;
+               data_ref = btrfs_delayed_node_to_data_ref(ref);
  
-               ref = rb_entry(node, struct btrfs_delayed_ref_node, rb_node);
-               if (ref->bytenr == bytenr && ref->seq == seq)
-                       goto out_unlock;
+               /*
+                * If our ref doesn't match the one we're currently looking at
+                * then we have a cross reference.
+                */
+               if (data_ref->root != root->root_key.objectid ||
+                   data_ref->objectid != objectid ||
+                   data_ref->offset != offset) {
+                       ret = 1;
+                       break;
+               }
         }
-
-       if (data_ref->root != root->root_key.objectid ||
-           data_ref->objectid != objectid || data_ref->offset != offset)
-               goto out_unlock;
-
-       ret = 0;
-out_unlock:
+       spin_unlock(&head->lock);
         mutex_unlock(&head->mutex);
-out:
-       spin_unlock(&delayed_refs->lock);
         return ret;
  }
  
@@ -3402,6 +3344,23 @@ int btrfs_extent_readonly(struct btrfs_root *root, u64 bytenr)
         return readonly;
  }
  
+static const char *alloc_name(u64 flags)
+{
+       switch (flags) {
+       case BTRFS_BLOCK_GROUP_METADATA|BTRFS_BLOCK_GROUP_DATA:
+               return "mixed";
+       case BTRFS_BLOCK_GROUP_METADATA:
+               return "metadata";
+       case BTRFS_BLOCK_GROUP_DATA:
+               return "data";
+       case BTRFS_BLOCK_GROUP_SYSTEM:
+               return "system";
+       default:
+               WARN_ON(1);
+               return "invalid-combination";
+       };
+}
+
  static int update_space_info(struct btrfs_fs_info *info, u64 flags,
                              u64 total_bytes, u64 bytes_used,
                              struct btrfs_space_info **space_info)
@@ -3439,8 +3398,10 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags,
                 return ret;
         }
  
-       for (i = 0; i < BTRFS_NR_RAID_TYPES; i++)
+       for (i = 0; i < BTRFS_NR_RAID_TYPES; i++) {
                 INIT_LIST_HEAD(&found->block_groups[i]);
+               kobject_init(&found->block_group_kobjs[i], &btrfs_raid_ktype);
+       }
         init_rwsem(&found->groups_sem);
         spin_lock_init(&found->lock);
         found->flags = flags & BTRFS_BLOCK_GROUP_TYPE_MASK;
@@ -3457,11 +3418,21 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags,
         found->chunk_alloc = 0;
         found->flush = 0;
         init_waitqueue_head(&found->wait);
+
+       ret = kobject_init_and_add(&found->kobj, &space_info_ktype,
+                                   info->space_info_kobj, "%s",
+                                   alloc_name(found->flags));
+       if (ret) {
+               kfree(found);
+               return ret;
+       }
+
         *space_info = found;
         list_add_rcu(&found->list, &info->space_info);
         if (flags & BTRFS_BLOCK_GROUP_DATA)
                 info->data_sinfo = found;
-       return 0;
+
+       return ret;
  }
  
  static void set_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags)
@@ -4637,7 +4608,7 @@ void btrfs_block_rsv_release(struct btrfs_root *root,
                              u64 num_bytes)
  {
         struct btrfs_block_rsv *global_rsv = &root->fs_info->global_block_rsv;
-       if (global_rsv->full || global_rsv == block_rsv ||
+       if (global_rsv == block_rsv ||
             block_rsv->space_info != global_rsv->space_info)
                 global_rsv = NULL;
         block_rsv_release_bytes(root->fs_info, block_rsv, global_rsv,
@@ -5916,24 +5887,16 @@ static noinline int check_ref_cleanup(struct btrfs_trans_handle *trans,
  {
         struct btrfs_delayed_ref_head *head;
         struct btrfs_delayed_ref_root *delayed_refs;
-       struct btrfs_delayed_ref_node *ref;
-       struct rb_node *node;
         int ret = 0;
  
         delayed_refs = &trans->transaction->delayed_refs;
         spin_lock(&delayed_refs->lock);
         head = btrfs_find_delayed_ref_head(trans, bytenr);
         if (!head)
-               goto out;
+               goto out_delayed_unlock;
  
-       node = rb_prev(&head->node.rb_node);
-       if (!node)
-               goto out;
-
-       ref = rb_entry(node, struct btrfs_delayed_ref_node, rb_node);
-
-       /* there are still entries for this ref, we can't drop it */
-       if (ref->bytenr == bytenr)
+       spin_lock(&head->lock);
+       if (rb_first(&head->ref_root))
                 goto out;
  
         if (head->extent_op) {
@@ -5955,19 +5918,19 @@ static noinline int check_ref_cleanup(struct btrfs_trans_handle *trans,
          * ahead and process it.
          */
         head->node.in_tree = 0;
-       rb_erase(&head->node.rb_node, &delayed_refs->root);
+       rb_erase(&head->href_node, &delayed_refs->href_root);
  
-       delayed_refs->num_entries--;
+       atomic_dec(&delayed_refs->num_entries);
  
         /*
          * we don't take a ref on the node because we're removing it from the
          * tree, so we just steal the ref the tree was holding.
          */
         delayed_refs->num_heads--;
-       if (list_empty(&head->cluster))
+       if (head->processing == 0)
                 delayed_refs->num_heads_ready--;
-
-       list_del_init(&head->cluster);
+       head->processing = 0;
+       spin_unlock(&head->lock);
         spin_unlock(&delayed_refs->lock);
  
         BUG_ON(head->extent_op);
@@ -5978,6 +5941,9 @@ static noinline int check_ref_cleanup(struct btrfs_trans_handle *trans,
         btrfs_put_delayed_ref(&head->node);
         return ret;
  out:
+       spin_unlock(&head->lock);
+
+out_delayed_unlock:
         spin_unlock(&delayed_refs->lock);
         return 0;
  }
@@ -6145,11 +6111,29 @@ int __get_raid_index(u64 flags)
         return BTRFS_RAID_SINGLE; /* BTRFS_BLOCK_GROUP_SINGLE */
  }
  
-static int get_block_group_index(struct btrfs_block_group_cache *cache)
+int get_block_group_index(struct btrfs_block_group_cache *cache)
  {
         return __get_raid_index(cache->flags);
  }
  
+static const char *btrfs_raid_type_names[BTRFS_NR_RAID_TYPES] = {
+       [BTRFS_RAID_RAID10]     = "raid10",
+       [BTRFS_RAID_RAID1]      = "raid1",
+       [BTRFS_RAID_DUP]        = "dup",
+       [BTRFS_RAID_RAID0]      = "raid0",
+       [BTRFS_RAID_SINGLE]     = "single",
+       [BTRFS_RAID_RAID5]      = "raid5",
+       [BTRFS_RAID_RAID6]      = "raid6",
+};
+
+static const char *get_raid_name(enum btrfs_raid_types type)
+{
+       if (type >= BTRFS_NR_RAID_TYPES)
+               return NULL;
+
+       return btrfs_raid_type_names[type];
+}
+
  enum btrfs_loop_type {
         LOOP_CACHING_NOWAIT = 0,
         LOOP_CACHING_WAIT = 1,
@@ -6177,7 +6161,6 @@ static noinline int find_free_extent(struct btrfs_root *orig_root,
         struct btrfs_root *root = orig_root->fs_info->extent_root;
         struct btrfs_free_cluster *last_ptr = NULL;
         struct btrfs_block_group_cache *block_group = NULL;
-       struct btrfs_block_group_cache *used_block_group;
         u64 search_start = 0;
         u64 max_extent_size = 0;
         int empty_cluster = 2 * 1024 * 1024;
@@ -6186,7 +6169,6 @@ static noinline int find_free_extent(struct btrfs_root *orig_root,
         int index = __get_raid_index(flags);
         int alloc_type = (flags & BTRFS_BLOCK_GROUP_DATA) ?
                 RESERVE_ALLOC_NO_ACCOUNT : RESERVE_ALLOC;
-       bool found_uncached_bg = false;
         bool failed_cluster_refill = false;
         bool failed_alloc = false;
         bool use_cluster = true;
@@ -6239,7 +6221,6 @@ static noinline int find_free_extent(struct btrfs_root *orig_root,
         if (search_start == hint_byte) {
                 block_group = btrfs_lookup_block_group(root->fs_info,
                                                        search_start);
-               used_block_group = block_group;
                 /*
                  * we don't want to use the block group if it doesn't match our
                  * allocation bits, or if its not cached.
@@ -6276,7 +6257,6 @@ search:
                 u64 offset;
                 int cached;
  
-               used_block_group = block_group;
                 btrfs_get_block_group(block_group);
                 search_start = block_group->key.objectid;
  
@@ -6304,7 +6284,6 @@ search:
  have_block_group:
                 cached = block_group_cache_done(block_group);
                 if (unlikely(!cached)) {
-                       found_uncached_bg = true;
                         ret = cache_block_group(block_group, 0);
                         BUG_ON(ret < 0);
                         ret = 0;
@@ -6320,6 +6299,7 @@ have_block_group:
                  * lets look there
                  */
                 if (last_ptr) {
+                       struct btrfs_block_group_cache *used_block_group;
                         unsigned long aligned_cluster;
                         /*
                          * the refill lock keeps out other
@@ -6330,10 +6310,8 @@ have_block_group:
                         if (used_block_group != block_group &&
                             (!used_block_group ||
                              used_block_group->ro ||
-                            !block_group_bits(used_block_group, flags))) {
-                               used_block_group = block_group;
+                            !block_group_bits(used_block_group, flags)))
                                 goto refill_cluster;
-                       }
  
                         if (used_block_group != block_group)
                                 btrfs_get_block_group(used_block_group);
@@ -6347,17 +6325,19 @@ have_block_group:
                                 /* we have a block, we're done */
                                 spin_unlock(&last_ptr->refill_lock);
                                 trace_btrfs_reserve_extent_cluster(root,
-                                       block_group, search_start, num_bytes);
+                                               used_block_group,
+                                               search_start, num_bytes);
+                               if (used_block_group != block_group) {
+                                       btrfs_put_block_group(block_group);
+                                       block_group = used_block_group;
+                               }
                                 goto checks;
                         }
  
                         WARN_ON(last_ptr->block_group != used_block_group);
-                       if (used_block_group != block_group) {
+                       if (used_block_group != block_group)
                                 btrfs_put_block_group(used_block_group);
-                               used_block_group = block_group;
-                       }
  refill_cluster:
-                       BUG_ON(used_block_group != block_group);
                         /* If we are on LOOP_NO_EMPTY_SIZE, we can't
                          * set up a new clusters, so lets just skip it
                          * and let the allocator find whatever block
@@ -6476,25 +6456,25 @@ unclustered_alloc:
                         goto loop;
                 }
  checks:
-               search_start = stripe_align(root, used_block_group,
+               search_start = stripe_align(root, block_group,
                                             offset, num_bytes);
  
                 /* move on to the next group */
                 if (search_start + num_bytes >
-                   used_block_group->key.objectid + used_block_group->key.offset) {
-                       btrfs_add_free_space(used_block_group, offset, num_bytes);
+                   block_group->key.objectid + block_group->key.offset) {
+                       btrfs_add_free_space(block_group, offset, num_bytes);
                         goto loop;
                 }
  
                 if (offset < search_start)
-                       btrfs_add_free_space(used_block_group, offset,
+                       btrfs_add_free_space(block_group, offset,
                                              search_start - offset);
                 BUG_ON(offset > search_start);
  
-               ret = btrfs_update_reserved_bytes(used_block_group, num_bytes,
+               ret = btrfs_update_reserved_bytes(block_group, num_bytes,
                                                   alloc_type);
                 if (ret == -EAGAIN) {
-                       btrfs_add_free_space(used_block_group, offset, num_bytes);
+                       btrfs_add_free_space(block_group, offset, num_bytes);
                         goto loop;
                 }
  
@@ -6504,16 +6484,12 @@ checks:
  
                 trace_btrfs_reserve_extent(orig_root, block_group,
                                            search_start, num_bytes);
-               if (used_block_group != block_group)
-                       btrfs_put_block_group(used_block_group);
                 btrfs_put_block_group(block_group);
                 break;
  loop:
                 failed_cluster_refill = false;
                 failed_alloc = false;
                 BUG_ON(index != get_block_group_index(block_group));
-               if (used_block_group != block_group)
-                       btrfs_put_block_group(used_block_group);
                 btrfs_put_block_group(block_group);
         }
         up_read(&space_info->groups_sem);
@@ -6584,12 +6560,12 @@ static void dump_space_info(struct btrfs_space_info *info, u64 bytes,
         int index = 0;
  
         spin_lock(&info->lock);
-       printk(KERN_INFO "space_info %llu has %llu free, is %sfull\n",
+       printk(KERN_INFO "BTRFS: space_info %llu has %llu free, is %sfull\n",
                info->flags,
                info->total_bytes - info->bytes_used - info->bytes_pinned -
                info->bytes_reserved - info->bytes_readonly,
                (info->full) ? "" : "not ");
-       printk(KERN_INFO "space_info total=%llu, used=%llu, pinned=%llu, "
+       printk(KERN_INFO "BTRFS: space_info total=%llu, used=%llu, pinned=%llu, "
                "reserved=%llu, may_use=%llu, readonly=%llu\n",
                info->total_bytes, info->bytes_used, info->bytes_pinned,
                info->bytes_reserved, info->bytes_may_use,
@@ -6603,7 +6579,9 @@ static void dump_space_info(struct btrfs_space_info *info, u64 bytes,
  again:
         list_for_each_entry(cache, &info->block_groups[index], list) {
                 spin_lock(&cache->lock);
-               printk(KERN_INFO "block group %llu has %llu bytes, %llu used %llu pinned %llu reserved %s\n",
+               printk(KERN_INFO "BTRFS: "
+                          "block group %llu has %llu bytes, "
+                          "%llu used %llu pinned %llu reserved %s\n",
                        cache->key.objectid, cache->key.offset,
                        btrfs_block_group_used(&cache->item), cache->pinned,
                        cache->reserved, cache->ro ? "[readonly]" : "");
@@ -6966,7 +6944,7 @@ again:
                                 /*DEFAULT_RATELIMIT_BURST*/ 1);
                 if (__ratelimit(&_rs))
                         WARN(1, KERN_DEBUG
-                               "btrfs: block rsv returned %d\n", ret);
+                               "BTRFS: block rsv returned %d\n", ret);
         }
  try_reserve:
         ret = reserve_metadata_bytes(root, block_rsv, blocksize,
@@ -7714,7 +7692,7 @@ int btrfs_drop_snapshot(struct btrfs_root *root,
  
                         btrfs_end_transaction_throttle(trans, tree_root);
                         if (!for_reloc && btrfs_need_cleaner_sleep(root)) {
-                               pr_debug("btrfs: drop snapshot early exit\n");
+                               pr_debug("BTRFS: drop snapshot early exit\n");
                                 err = -EAGAIN;
                                 goto out_free;
                         }
@@ -7779,7 +7757,7 @@ out:
          */
         if (!for_reloc && root_dropped == false)
                 btrfs_add_dead_root(root);
-       if (err)
+       if (err && err != -EAGAIN)
                 btrfs_std_error(root->fs_info, err);
         return err;
  }
@@ -8333,6 +8311,8 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info)
         release_global_block_rsv(info);
  
         while (!list_empty(&info->space_info)) {
+               int i;
+
                 space_info = list_entry(info->space_info.next,
                                         struct btrfs_space_info,
                                         list);
@@ -8343,9 +8323,17 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info)
                                 dump_space_info(space_info, 0, 0);
                         }
                 }
-               percpu_counter_destroy(&space_info->total_bytes_pinned);
                 list_del(&space_info->list);
-               kfree(space_info);
+               for (i = 0; i < BTRFS_NR_RAID_TYPES; i++) {
+                       struct kobject *kobj;
+                       kobj = &space_info->block_group_kobjs[i];
+                       if (kobj->parent) {
+                               kobject_del(kobj);
+                               kobject_put(kobj);
+                       }
+               }
+               kobject_del(&space_info->kobj);
+               kobject_put(&space_info->kobj);
         }
         return 0;
  }
@@ -8356,10 +8344,57 @@ static void __link_block_group(struct btrfs_space_info *space_info,
         int index = get_block_group_index(cache);
  
         down_write(&space_info->groups_sem);
+       if (list_empty(&space_info->block_groups[index])) {
+               struct kobject *kobj = &space_info->block_group_kobjs[index];
+               int ret;
+
+               kobject_get(&space_info->kobj); /* put in release */
+               ret = kobject_add(kobj, &space_info->kobj, "%s",
+                                 get_raid_name(index));
+               if (ret) {
+                       pr_warn("BTRFS: failed to add kobject for block cache. ignoring.\n");
+                       kobject_put(&space_info->kobj);
+               }
+       }
         list_add_tail(&cache->list, &space_info->block_groups[index]);
         up_write(&space_info->groups_sem);
  }
  
+static struct btrfs_block_group_cache *
+btrfs_create_block_group_cache(struct btrfs_root *root, u64 start, u64 size)
+{
+       struct btrfs_block_group_cache *cache;
+
+       cache = kzalloc(sizeof(*cache), GFP_NOFS);
+       if (!cache)
+               return NULL;
+
+       cache->free_space_ctl = kzalloc(sizeof(*cache->free_space_ctl),
+                                       GFP_NOFS);
+       if (!cache->free_space_ctl) {
+               kfree(cache);
+               return NULL;
+       }
+
+       cache->key.objectid = start;
+       cache->key.offset = size;
+       cache->key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
+
+       cache->sectorsize = root->sectorsize;
+       cache->fs_info = root->fs_info;
+       cache->full_stripe_len = btrfs_full_stripe_len(root,
+                                              &root->fs_info->mapping_tree,
+                                              start);
+       atomic_set(&cache->count, 1);
+       spin_lock_init(&cache->lock);
+       INIT_LIST_HEAD(&cache->list);
+       INIT_LIST_HEAD(&cache->cluster_list);
+       INIT_LIST_HEAD(&cache->new_bg_list);
+       btrfs_init_free_space_ctl(cache);
+
+       return cache;
+}
+
  int btrfs_read_block_groups(struct btrfs_root *root)
  {
         struct btrfs_path *path;
@@ -8395,26 +8430,16 @@ int btrfs_read_block_groups(struct btrfs_root *root)
                         break;
                 if (ret != 0)
                         goto error;
+
                 leaf = path->nodes[0];
                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
-               cache = kzalloc(sizeof(*cache), GFP_NOFS);
+
+               cache = btrfs_create_block_group_cache(root, found_key.objectid,
+                                                      found_key.offset);
                 if (!cache) {
                         ret = -ENOMEM;
                         goto error;
                 }
-               cache->free_space_ctl = kzalloc(sizeof(*cache->free_space_ctl),
-                                               GFP_NOFS);
-               if (!cache->free_space_ctl) {
-                       kfree(cache);
-                       ret = -ENOMEM;
-                       goto error;
-               }
-
-               atomic_set(&cache->count, 1);
-               spin_lock_init(&cache->lock);
-               cache->fs_info = info;
-               INIT_LIST_HEAD(&cache->list);
-               INIT_LIST_HEAD(&cache->cluster_list);
  
                 if (need_clear) {
                         /*
@@ -8435,16 +8460,10 @@ int btrfs_read_block_groups(struct btrfs_root *root)
                 read_extent_buffer(leaf, &cache->item,
                                    btrfs_item_ptr_offset(leaf, path->slots[0]),
                                    sizeof(cache->item));
-               memcpy(&cache->key, &found_key, sizeof(found_key));
+               cache->flags = btrfs_block_group_flags(&cache->item);
  
                 key.objectid = found_key.objectid + found_key.offset;
                 btrfs_release_path(path);
-               cache->flags = btrfs_block_group_flags(&cache->item);
-               cache->sectorsize = root->sectorsize;
-               cache->full_stripe_len = btrfs_full_stripe_len(root,
-                                              &root->fs_info->mapping_tree,
-                                              found_key.objectid);
-               btrfs_init_free_space_ctl(cache);
  
                 /*
                  * We need to exclude the super stripes now so that the space
@@ -8458,8 +8477,7 @@ int btrfs_read_block_groups(struct btrfs_root *root)
                          * case.
                          */
                         free_excluded_extents(root, cache);
-                       kfree(cache->free_space_ctl);
-                       kfree(cache);
+                       btrfs_put_block_group(cache);
                         goto error;
                 }
  
@@ -8590,38 +8608,15 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans,
  
         root->fs_info->last_trans_log_full_commit = trans->transid;
  
-       cache = kzalloc(sizeof(*cache), GFP_NOFS);
+       cache = btrfs_create_block_group_cache(root, chunk_offset, size);
         if (!cache)
                 return -ENOMEM;
-       cache->free_space_ctl = kzalloc(sizeof(*cache->free_space_ctl),
-                                       GFP_NOFS);
-       if (!cache->free_space_ctl) {
-               kfree(cache);
-               return -ENOMEM;
-       }
-
-       cache->key.objectid = chunk_offset;
-       cache->key.offset = size;
-       cache->key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
-       cache->sectorsize = root->sectorsize;
-       cache->fs_info = root->fs_info;
-       cache->full_stripe_len = btrfs_full_stripe_len(root,
-                                              &root->fs_info->mapping_tree,
-                                              chunk_offset);
-
-       atomic_set(&cache->count, 1);
-       spin_lock_init(&cache->lock);
-       INIT_LIST_HEAD(&cache->list);
-       INIT_LIST_HEAD(&cache->cluster_list);
-       INIT_LIST_HEAD(&cache->new_bg_list);
-
-       btrfs_init_free_space_ctl(cache);
  
         btrfs_set_block_group_used(&cache->item, bytes_used);
         btrfs_set_block_group_chunk_objectid(&cache->item, chunk_objectid);
-       cache->flags = type;
         btrfs_set_block_group_flags(&cache->item, type);
  
+       cache->flags = type;
         cache->last_byte_to_unpin = (u64)-1;
         cache->cached = BTRFS_CACHE_FINISHED;
         ret = exclude_super_stripes(root, cache);
@@ -8631,8 +8626,7 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans,
                  * case.
                  */
                 free_excluded_extents(root, cache);
-               kfree(cache->free_space_ctl);
-               kfree(cache);
+               btrfs_put_block_group(cache);
                 return ret;
         }
  
@@ -8796,8 +8790,11 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
          * are still on the list after taking the semaphore
          */
         list_del_init(&block_group->list);
-       if (list_empty(&block_group->space_info->block_groups[index]))
+       if (list_empty(&block_group->space_info->block_groups[index])) {
+               kobject_del(&block_group->space_info->block_group_kobjs[index]);
+               kobject_put(&block_group->space_info->block_group_kobjs[index]);
                 clear_avail_alloc_bits(root->fs_info, block_group->flags);
+       }
         up_write(&block_group->space_info->groups_sem);
  
         if (block_group->cached == BTRFS_CACHE_STARTED)
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c

index bcb6f1b780d64512868303c04a7939060612e3e3..85bbd01f1271379de6b3bcf41f4a42bd9d30320a 100644 (file)
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -59,7 +59,7 @@ void btrfs_leak_debug_check(void)
  
         while (!list_empty(&states)) {
                 state = list_entry(states.next, struct extent_state, leak_list);
-               printk(KERN_ERR "btrfs state leak: start %llu end %llu "
+               printk(KERN_ERR "BTRFS: state leak: start %llu end %llu "
                        "state %lu in tree %p refs %d\n",
                        state->start, state->end, state->state, state->tree,
                        atomic_read(&state->refs));
@@ -69,7 +69,7 @@ void btrfs_leak_debug_check(void)
  
         while (!list_empty(&buffers)) {
                 eb = list_entry(buffers.next, struct extent_buffer, leak_list);
-               printk(KERN_ERR "btrfs buffer leak start %llu len %lu "
+               printk(KERN_ERR "BTRFS: buffer leak start %llu len %lu "
                        "refs %d\n",
                        eb->start, eb->len, atomic_read(&eb->refs));
                 list_del(&eb->leak_list);
@@ -77,16 +77,22 @@ void btrfs_leak_debug_check(void)
         }
  }
  
-#define btrfs_debug_check_extent_io_range(inode, start, end)           \
-       __btrfs_debug_check_extent_io_range(__func__, (inode), (start), (end))
+#define btrfs_debug_check_extent_io_range(tree, start, end)            \
+       __btrfs_debug_check_extent_io_range(__func__, (tree), (start), (end))
  static inline void __btrfs_debug_check_extent_io_range(const char *caller,
-               struct inode *inode, u64 start, u64 end)
+               struct extent_io_tree *tree, u64 start, u64 end)
  {
-       u64 isize = i_size_read(inode);
+       struct inode *inode;
+       u64 isize;
+
+       if (!tree->mapping)
+               return;
  
+       inode = tree->mapping->host;
+       isize = i_size_read(inode);
         if (end >= PAGE_SIZE && (end % 2) == 0 && end != isize - 1) {
                 printk_ratelimited(KERN_DEBUG
-                   "btrfs: %s: ino %llu isize %llu odd range [%llu,%llu]\n",
+                   "BTRFS: %s: ino %llu isize %llu odd range [%llu,%llu]\n",
                                 caller, btrfs_ino(inode), isize, start, end);
         }
  }
@@ -124,6 +130,8 @@ static noinline void flush_write_bio(void *data);
  static inline struct btrfs_fs_info *
  tree_fs_info(struct extent_io_tree *tree)
  {
+       if (!tree->mapping)
+               return NULL;
         return btrfs_sb(tree->mapping->host->i_sb);
  }
  
@@ -186,11 +194,9 @@ void extent_io_tree_init(struct extent_io_tree *tree,
                          struct address_space *mapping)
  {
         tree->state = RB_ROOT;
-       INIT_RADIX_TREE(&tree->buffer, GFP_ATOMIC);
         tree->ops = NULL;
         tree->dirty_bytes = 0;
         spin_lock_init(&tree->lock);
-       spin_lock_init(&tree->buffer_lock);
         tree->mapping = mapping;
  }
  
@@ -224,12 +230,20 @@ void free_extent_state(struct extent_state *state)
  }
  
  static struct rb_node *tree_insert(struct rb_root *root, u64 offset,
-                                  struct rb_node *node)
+                                  struct rb_node *node,
+                                  struct rb_node ***p_in,
+                                  struct rb_node **parent_in)
  {
         struct rb_node **p = &root->rb_node;
         struct rb_node *parent = NULL;
         struct tree_entry *entry;
  
+       if (p_in && parent_in) {
+               p = *p_in;
+               parent = *parent_in;
+               goto do_insert;
+       }
+
         while (*p) {
                 parent = *p;
                 entry = rb_entry(parent, struct tree_entry, rb_node);
@@ -242,35 +256,43 @@ static struct rb_node *tree_insert(struct rb_root *root, u64 offset,
                         return parent;
         }
  
+do_insert:
         rb_link_node(node, parent, p);
         rb_insert_color(node, root);
         return NULL;
  }
  
  static struct rb_node *__etree_search(struct extent_io_tree *tree, u64 offset,
-                                    struct rb_node **prev_ret,
-                                    struct rb_node **next_ret)
+                                     struct rb_node **prev_ret,
+                                     struct rb_node **next_ret,
+                                     struct rb_node ***p_ret,
+                                     struct rb_node **parent_ret)
  {
         struct rb_root *root = &tree->state;
-       struct rb_node *n = root->rb_node;
+       struct rb_node **n = &root->rb_node;
         struct rb_node *prev = NULL;
         struct rb_node *orig_prev = NULL;
         struct tree_entry *entry;
         struct tree_entry *prev_entry = NULL;
  
-       while (n) {
-               entry = rb_entry(n, struct tree_entry, rb_node);
-               prev = n;
+       while (*n) {
+               prev = *n;
+               entry = rb_entry(prev, struct tree_entry, rb_node);
                 prev_entry = entry;
  
                 if (offset < entry->start)
-                       n = n->rb_left;
+                       n = &(*n)->rb_left;
                 else if (offset > entry->end)
-                       n = n->rb_right;
+                       n = &(*n)->rb_right;
                 else
-                       return n;
+                       return *n;
         }
  
+       if (p_ret)
+               *p_ret = n;
+       if (parent_ret)
+               *parent_ret = prev;
+
         if (prev_ret) {
                 orig_prev = prev;
                 while (prev && offset > prev_entry->end) {
@@ -292,18 +314,27 @@ static struct rb_node *__etree_search(struct extent_io_tree *tree, u64 offset,
         return NULL;
  }
  
-static inline struct rb_node *tree_search(struct extent_io_tree *tree,
-                                         u64 offset)
+static inline struct rb_node *
+tree_search_for_insert(struct extent_io_tree *tree,
+                      u64 offset,
+                      struct rb_node ***p_ret,
+                      struct rb_node **parent_ret)
  {
         struct rb_node *prev = NULL;
         struct rb_node *ret;
  
-       ret = __etree_search(tree, offset, &prev, NULL);
+       ret = __etree_search(tree, offset, &prev, NULL, p_ret, parent_ret);
         if (!ret)
                 return prev;
         return ret;
  }
  
+static inline struct rb_node *tree_search(struct extent_io_tree *tree,
+                                         u64 offset)
+{
+       return tree_search_for_insert(tree, offset, NULL, NULL);
+}
+
  static void merge_cb(struct extent_io_tree *tree, struct extent_state *new,
                      struct extent_state *other)
  {
@@ -385,23 +416,25 @@ static void set_state_bits(struct extent_io_tree *tree,
   */
  static int insert_state(struct extent_io_tree *tree,
                         struct extent_state *state, u64 start, u64 end,
+                       struct rb_node ***p,
+                       struct rb_node **parent,
                         unsigned long *bits)
  {
         struct rb_node *node;
  
         if (end < start)
-               WARN(1, KERN_ERR "btrfs end < start %llu %llu\n",
+               WARN(1, KERN_ERR "BTRFS: end < start %llu %llu\n",
                        end, start);
         state->start = start;
         state->end = end;
  
         set_state_bits(tree, state, bits);
  
-       node = tree_insert(&tree->state, end, &state->rb_node);
+       node = tree_insert(&tree->state, end, &state->rb_node, p, parent);
         if (node) {
                 struct extent_state *found;
                 found = rb_entry(node, struct extent_state, rb_node);
-               printk(KERN_ERR "btrfs found node %llu %llu on insert of "
+               printk(KERN_ERR "BTRFS: found node %llu %llu on insert of "
                        "%llu %llu\n",
                        found->start, found->end, start, end);
                 return -EEXIST;
@@ -444,7 +477,8 @@ static int split_state(struct extent_io_tree *tree, struct extent_state *orig,
         prealloc->state = orig->state;
         orig->start = split;
  
-       node = tree_insert(&tree->state, prealloc->end, &prealloc->rb_node);
+       node = tree_insert(&tree->state, prealloc->end, &prealloc->rb_node,
+                          NULL, NULL);
         if (node) {
                 free_extent_state(prealloc);
                 return -EEXIST;
@@ -542,7 +576,7 @@ int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
         int err;
         int clear = 0;
  
-       btrfs_debug_check_extent_io_range(tree->mapping->host, start, end);
+       btrfs_debug_check_extent_io_range(tree, start, end);
  
         if (bits & EXTENT_DELALLOC)
                 bits |= EXTENT_NORESERVE;
@@ -702,7 +736,7 @@ static void wait_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
         struct extent_state *state;
         struct rb_node *node;
  
-       btrfs_debug_check_extent_io_range(tree->mapping->host, start, end);
+       btrfs_debug_check_extent_io_range(tree, start, end);
  
         spin_lock(&tree->lock);
  again:
@@ -783,11 +817,13 @@ __set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
         struct extent_state *state;
         struct extent_state *prealloc = NULL;
         struct rb_node *node;
+       struct rb_node **p;
+       struct rb_node *parent;
         int err = 0;
         u64 last_start;
         u64 last_end;
  
-       btrfs_debug_check_extent_io_range(tree->mapping->host, start, end);
+       btrfs_debug_check_extent_io_range(tree, start, end);
  
         bits |= EXTENT_FIRST_DELALLOC;
  again:
@@ -809,14 +845,16 @@ again:
          * this search will find all the extents that end after
          * our range starts.
          */
-       node = tree_search(tree, start);
+       node = tree_search_for_insert(tree, start, &p, &parent);
         if (!node) {
                 prealloc = alloc_extent_state_atomic(prealloc);
                 BUG_ON(!prealloc);
-               err = insert_state(tree, prealloc, start, end, &bits);
+               err = insert_state(tree, prealloc, start, end,
+                                  &p, &parent, &bits);
                 if (err)
                         extent_io_tree_panic(tree, err);
  
+               cache_state(prealloc, cached_state);
                 prealloc = NULL;
                 goto out;
         }
@@ -919,7 +957,7 @@ hit_next:
                  * the later extent.
                  */
                 err = insert_state(tree, prealloc, start, this_end,
-                                  &bits);
+                                  NULL, NULL, &bits);
                 if (err)
                         extent_io_tree_panic(tree, err);
  
@@ -1005,11 +1043,13 @@ int convert_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
         struct extent_state *state;
         struct extent_state *prealloc = NULL;
         struct rb_node *node;
+       struct rb_node **p;
+       struct rb_node *parent;
         int err = 0;
         u64 last_start;
         u64 last_end;
  
-       btrfs_debug_check_extent_io_range(tree->mapping->host, start, end);
+       btrfs_debug_check_extent_io_range(tree, start, end);
  
  again:
         if (!prealloc && (mask & __GFP_WAIT)) {
@@ -1032,17 +1072,19 @@ again:
          * this search will find all the extents that end after
          * our range starts.
          */
-       node = tree_search(tree, start);
+       node = tree_search_for_insert(tree, start, &p, &parent);
         if (!node) {
                 prealloc = alloc_extent_state_atomic(prealloc);
                 if (!prealloc) {
                         err = -ENOMEM;
                         goto out;
                 }
-               err = insert_state(tree, prealloc, start, end, &bits);
-               prealloc = NULL;
+               err = insert_state(tree, prealloc, start, end,
+                                  &p, &parent, &bits);
                 if (err)
                         extent_io_tree_panic(tree, err);
+               cache_state(prealloc, cached_state);
+               prealloc = NULL;
                 goto out;
         }
         state = rb_entry(node, struct extent_state, rb_node);
@@ -1135,7 +1177,7 @@ hit_next:
                  * the later extent.
                  */
                 err = insert_state(tree, prealloc, start, this_end,
-                                  &bits);
+                                  NULL, NULL, &bits);
                 if (err)
                         extent_io_tree_panic(tree, err);
                 cache_state(prealloc, cached_state);
@@ -2012,9 +2054,10 @@ int repair_io_failure(struct btrfs_fs_info *fs_info, u64 start,
                 return -EIO;
         }
  
-       printk_ratelimited_in_rcu(KERN_INFO "btrfs read error corrected: ino %lu off %llu "
-                     "(dev %s sector %llu)\n", page->mapping->host->i_ino,
-                     start, rcu_str_deref(dev->name), sector);
+       printk_ratelimited_in_rcu(KERN_INFO
+                       "BTRFS: read error corrected: ino %lu off %llu "
+                   "(dev %s sector %llu)\n", page->mapping->host->i_ino,
+                   start, rcu_str_deref(dev->name), sector);
  
         bio_put(bio);
         return 0;
@@ -2156,7 +2199,7 @@ static int bio_readpage_error(struct bio *failed_bio, u64 phy_offset,
                         return -EIO;
                 }
  
-               if (em->start > start || em->start + em->len < start) {
+               if (em->start > start || em->start + em->len <= start) {
                         free_extent_map(em);
                         em = NULL;
                 }
@@ -2333,25 +2376,29 @@ int end_extent_writepage(struct page *page, int err, u64 start, u64 end)
  static void end_bio_extent_writepage(struct bio *bio, int err)
  {
         struct bio_vec *bvec;
-       struct extent_io_tree *tree;
         u64 start;
         u64 end;
         int i;
  
         bio_for_each_segment_all(bvec, bio, i) {
                 struct page *page = bvec->bv_page;
-               tree = &BTRFS_I(page->mapping->host)->io_tree;
  
                 /* We always issue full-page reads, but if some block
                  * in a page fails to read, blk_update_request() will
                  * advance bv_offset and adjust bv_len to compensate.
                  * Print a warning for nonzero offsets, and an error
                  * if they don't add up to a full page.  */
-               if (bvec->bv_offset || bvec->bv_len != PAGE_CACHE_SIZE)
-                       printk("%s page write in btrfs with offset %u and length %u\n",
-                              bvec->bv_offset + bvec->bv_len != PAGE_CACHE_SIZE
-                              ? KERN_ERR "partial" : KERN_INFO "incomplete",
-                              bvec->bv_offset, bvec->bv_len);
+               if (bvec->bv_offset || bvec->bv_len != PAGE_CACHE_SIZE) {
+                       if (bvec->bv_offset + bvec->bv_len != PAGE_CACHE_SIZE)
+                               btrfs_err(BTRFS_I(page->mapping->host)->root->fs_info,
+                                  "partial page write in btrfs with offset %u and length %u",
+                                       bvec->bv_offset, bvec->bv_len);
+                       else
+                               btrfs_info(BTRFS_I(page->mapping->host)->root->fs_info,
+                                  "incomplete page write in btrfs with offset %u and "
+                                  "length %u",
+                                       bvec->bv_offset, bvec->bv_len);
+               }
  
                 start = page_offset(page);
                 end = start + bvec->bv_offset + bvec->bv_len - 1;
@@ -2421,11 +2468,17 @@ static void end_bio_extent_readpage(struct bio *bio, int err)
                  * advance bv_offset and adjust bv_len to compensate.
                  * Print a warning for nonzero offsets, and an error
                  * if they don't add up to a full page.  */
-               if (bvec->bv_offset || bvec->bv_len != PAGE_CACHE_SIZE)
-                       printk("%s page read in btrfs with offset %u and length %u\n",
-                              bvec->bv_offset + bvec->bv_len != PAGE_CACHE_SIZE
-                              ? KERN_ERR "partial" : KERN_INFO "incomplete",
-                              bvec->bv_offset, bvec->bv_len);
+               if (bvec->bv_offset || bvec->bv_len != PAGE_CACHE_SIZE) {
+                       if (bvec->bv_offset + bvec->bv_len != PAGE_CACHE_SIZE)
+                               btrfs_err(BTRFS_I(page->mapping->host)->root->fs_info,
+                                  "partial page read in btrfs with offset %u and length %u",
+                                       bvec->bv_offset, bvec->bv_len);
+                       else
+                               btrfs_info(BTRFS_I(page->mapping->host)->root->fs_info,
+                                  "incomplete page read in btrfs with offset %u and "
+                                  "length %u",
+                                       bvec->bv_offset, bvec->bv_len);
+               }
  
                 start = page_offset(page);
                 end = start + bvec->bv_offset + bvec->bv_len - 1;
@@ -3281,8 +3334,8 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
  
                         set_range_writeback(tree, cur, cur + iosize - 1);
                         if (!PageWriteback(page)) {
-                               printk(KERN_ERR "btrfs warning page %lu not "
-                                      "writeback, cur %llu end %llu\n",
+                               btrfs_err(BTRFS_I(inode)->root->fs_info,
+                                          "page %lu not writeback, cur %llu end %llu",
                                        page->index, cur, end);
                         }
  
@@ -3438,6 +3491,7 @@ static int write_one_eb(struct extent_buffer *eb,
                         struct extent_page_data *epd)
  {
         struct block_device *bdev = fs_info->fs_devices->latest_bdev;
+       struct extent_io_tree *tree = &BTRFS_I(fs_info->btree_inode)->io_tree;
         u64 offset = eb->start;
         unsigned long i, num_pages;
         unsigned long bio_flags = 0;
@@ -3455,7 +3509,7 @@ static int write_one_eb(struct extent_buffer *eb,
  
                 clear_page_dirty_for_io(p);
                 set_page_writeback(p);
-               ret = submit_extent_page(rw, eb->tree, p, offset >> 9,
+               ret = submit_extent_page(rw, tree, p, offset >> 9,
                                          PAGE_CACHE_SIZE, 0, bdev, &epd->bio,
                                          -1, end_bio_extent_buffer_writepage,
                                          0, epd->bio_flags, bio_flags);
@@ -4073,12 +4127,10 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
         struct extent_map *em = NULL;
         struct extent_state *cached_state = NULL;
         struct btrfs_path *path;
-       struct btrfs_file_extent_item *item;
         int end = 0;
         u64 em_start = 0;
         u64 em_len = 0;
         u64 em_end = 0;
-       unsigned long emflags;
  
         if (len == 0)
                 return -EINVAL;
@@ -4103,8 +4155,6 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
         }
         WARN_ON(!ret);
         path->slots[0]--;
-       item = btrfs_item_ptr(path->nodes[0], path->slots[0],
-                             struct btrfs_file_extent_item);
         btrfs_item_key_to_cpu(path->nodes[0], &found_key, path->slots[0]);
         found_type = btrfs_key_type(&found_key);
  
@@ -4172,7 +4222,6 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
                         offset_in_extent = em_start - em->start;
                 em_end = extent_map_end(em);
                 em_len = em_end - em_start;
-               emflags = em->flags;
                 disko = 0;
                 flags = 0;
  
@@ -4324,10 +4373,9 @@ static inline void btrfs_release_extent_buffer(struct extent_buffer *eb)
         __free_extent_buffer(eb);
  }
  
-static struct extent_buffer *__alloc_extent_buffer(struct extent_io_tree *tree,
-                                                  u64 start,
-                                                  unsigned long len,
-                                                  gfp_t mask)
+static struct extent_buffer *
+__alloc_extent_buffer(struct btrfs_fs_info *fs_info, u64 start,
+                     unsigned long len, gfp_t mask)
  {
         struct extent_buffer *eb = NULL;
  
@@ -4336,7 +4384,7 @@ static struct extent_buffer *__alloc_extent_buffer(struct extent_io_tree *tree,
                 return NULL;
         eb->start = start;
         eb->len = len;
-       eb->tree = tree;
+       eb->fs_info = fs_info;
         eb->bflags = 0;
         rwlock_init(&eb->lock);
         atomic_set(&eb->write_locks, 0);
@@ -4468,13 +4516,14 @@ static void mark_extent_buffer_accessed(struct extent_buffer *eb)
         }
  }
  
-struct extent_buffer *find_extent_buffer(struct extent_io_tree *tree,
-                                                       u64 start)
+struct extent_buffer *find_extent_buffer(struct btrfs_fs_info *fs_info,
+                                        u64 start)
  {
         struct extent_buffer *eb;
  
         rcu_read_lock();
-       eb = radix_tree_lookup(&tree->buffer, start >> PAGE_CACHE_SHIFT);
+       eb = radix_tree_lookup(&fs_info->buffer_radix,
+                              start >> PAGE_CACHE_SHIFT);
         if (eb && atomic_inc_not_zero(&eb->refs)) {
                 rcu_read_unlock();
                 mark_extent_buffer_accessed(eb);
@@ -4485,7 +4534,7 @@ struct extent_buffer *find_extent_buffer(struct extent_io_tree *tree,
         return NULL;
  }
  
-struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree,
+struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info,
                                           u64 start, unsigned long len)
  {
         unsigned long num_pages = num_extent_pages(start, len);
@@ -4494,16 +4543,15 @@ struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree,
         struct extent_buffer *eb;
         struct extent_buffer *exists = NULL;
         struct page *p;
-       struct address_space *mapping = tree->mapping;
+       struct address_space *mapping = fs_info->btree_inode->i_mapping;
         int uptodate = 1;
         int ret;
  
-
-       eb = find_extent_buffer(tree, start);
+       eb = find_extent_buffer(fs_info, start);
         if (eb)
                 return eb;
  
-       eb = __alloc_extent_buffer(tree, start, len, GFP_NOFS);
+       eb = __alloc_extent_buffer(fs_info, start, len, GFP_NOFS);
         if (!eb)
                 return NULL;
  
@@ -4558,12 +4606,13 @@ again:
         if (ret)
                 goto free_eb;
  
-       spin_lock(&tree->buffer_lock);
-       ret = radix_tree_insert(&tree->buffer, start >> PAGE_CACHE_SHIFT, eb);
-       spin_unlock(&tree->buffer_lock);
+       spin_lock(&fs_info->buffer_lock);
+       ret = radix_tree_insert(&fs_info->buffer_radix,
+                               start >> PAGE_CACHE_SHIFT, eb);
+       spin_unlock(&fs_info->buffer_lock);
         radix_tree_preload_end();
         if (ret == -EEXIST) {
-               exists = find_extent_buffer(tree, start);
+               exists = find_extent_buffer(fs_info, start);
                 if (exists)
                         goto free_eb;
                 else
@@ -4571,6 +4620,7 @@ again:
         }
         /* add one reference for the tree */
         check_buffer_tree_ref(eb);
+       set_bit(EXTENT_BUFFER_IN_TREE, &eb->bflags);
  
         /*
          * there is a race where release page may have
@@ -4614,17 +4664,17 @@ static int release_extent_buffer(struct extent_buffer *eb)
  {
         WARN_ON(atomic_read(&eb->refs) == 0);
         if (atomic_dec_and_test(&eb->refs)) {
-               if (test_bit(EXTENT_BUFFER_DUMMY, &eb->bflags)) {
-                       spin_unlock(&eb->refs_lock);
-               } else {
-                       struct extent_io_tree *tree = eb->tree;
+               if (test_and_clear_bit(EXTENT_BUFFER_IN_TREE, &eb->bflags)) {
+                       struct btrfs_fs_info *fs_info = eb->fs_info;
  
                         spin_unlock(&eb->refs_lock);
  
-                       spin_lock(&tree->buffer_lock);
-                       radix_tree_delete(&tree->buffer,
+                       spin_lock(&fs_info->buffer_lock);
+                       radix_tree_delete(&fs_info->buffer_radix,
                                           eb->start >> PAGE_CACHE_SHIFT);
-                       spin_unlock(&tree->buffer_lock);
+                       spin_unlock(&fs_info->buffer_lock);
+               } else {
+                       spin_unlock(&eb->refs_lock);
                 }
  
                 /* Should be safe to release our pages at this point */
@@ -5103,12 +5153,12 @@ void memcpy_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
         unsigned long src_i;
  
         if (src_offset + len > dst->len) {
-               printk(KERN_ERR "btrfs memmove bogus src_offset %lu move "
+               printk(KERN_ERR "BTRFS: memmove bogus src_offset %lu move "
                        "len %lu dst len %lu\n", src_offset, len, dst->len);
                 BUG_ON(1);
         }
         if (dst_offset + len > dst->len) {
-               printk(KERN_ERR "btrfs memmove bogus dst_offset %lu move "
+               printk(KERN_ERR "BTRFS: memmove bogus dst_offset %lu move "
                        "len %lu dst len %lu\n", dst_offset, len, dst->len);
                 BUG_ON(1);
         }
@@ -5150,12 +5200,12 @@ void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
         unsigned long src_i;
  
         if (src_offset + len > dst->len) {
-               printk(KERN_ERR "btrfs memmove bogus src_offset %lu move "
+               printk(KERN_ERR "BTRFS: memmove bogus src_offset %lu move "
                        "len %lu len %lu\n", src_offset, len, dst->len);
                 BUG_ON(1);
         }
         if (dst_offset + len > dst->len) {
-               printk(KERN_ERR "btrfs memmove bogus dst_offset %lu move "
+               printk(KERN_ERR "BTRFS: memmove bogus dst_offset %lu move "
                        "len %lu len %lu\n", dst_offset, len, dst->len);
                 BUG_ON(1);
         }
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h

index 19620c58f096ef2dd8f0a5810ff632c575e0f5ee..58b27e5ab52158a2d03a1bf7d7d612878489a9ac 100644 (file)
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -43,6 +43,7 @@
  #define EXTENT_BUFFER_WRITEBACK 7
  #define EXTENT_BUFFER_IOERR 8
  #define EXTENT_BUFFER_DUMMY 9
+#define EXTENT_BUFFER_IN_TREE 10
  
  /* these are flags for extent_clear_unlock_delalloc */
  #define PAGE_UNLOCK            (1 << 0)
@@ -94,12 +95,10 @@ struct extent_io_ops {
  
  struct extent_io_tree {
         struct rb_root state;
-       struct radix_tree_root buffer;
         struct address_space *mapping;
         u64 dirty_bytes;
         int track_uptodate;
         spinlock_t lock;
-       spinlock_t buffer_lock;
         struct extent_io_ops *ops;
  };
  
@@ -130,7 +129,7 @@ struct extent_buffer {
         unsigned long map_start;
         unsigned long map_len;
         unsigned long bflags;
-       struct extent_io_tree *tree;
+       struct btrfs_fs_info *fs_info;
         spinlock_t refs_lock;
         atomic_t refs;
         atomic_t io_pages;
@@ -266,11 +265,11 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
  int get_state_private(struct extent_io_tree *tree, u64 start, u64 *private);
  void set_page_extent_mapped(struct page *page);
  
-struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree,
+struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info,
                                           u64 start, unsigned long len);
  struct extent_buffer *alloc_dummy_extent_buffer(u64 start, unsigned long len);
  struct extent_buffer *btrfs_clone_extent_buffer(struct extent_buffer *src);
-struct extent_buffer *find_extent_buffer(struct extent_io_tree *tree,
+struct extent_buffer *find_extent_buffer(struct btrfs_fs_info *fs_info,
                                          u64 start);
  void free_extent_buffer(struct extent_buffer *eb);
  void free_extent_buffer_stale(struct extent_buffer *eb);
diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c

index a4a7a1a8da95c4c1e7571d99e0d58a7b5209f4ee..996ad56b57db64bbc0516f4674d8ad3fd2d43976 100644 (file)
--- a/fs/btrfs/extent_map.c
+++ b/fs/btrfs/extent_map.c
@@ -79,12 +79,21 @@ void free_extent_map(struct extent_map *em)
         }
  }
  
-static struct rb_node *tree_insert(struct rb_root *root, u64 offset,
-                                  struct rb_node *node)
+/* simple helper to do math around the end of an extent, handling wrap */
+static u64 range_end(u64 start, u64 len)
+{
+       if (start + len < start)
+               return (u64)-1;
+       return start + len;
+}
+
+static int tree_insert(struct rb_root *root, struct extent_map *em)
  {
         struct rb_node **p = &root->rb_node;
         struct rb_node *parent = NULL;
-       struct extent_map *entry;
+       struct extent_map *entry = NULL;
+       struct rb_node *orig_parent = NULL;
+       u64 end = range_end(em->start, em->len);
  
         while (*p) {
                 parent = *p;
@@ -92,19 +101,37 @@ static struct rb_node *tree_insert(struct rb_root *root, u64 offset,
  
                 WARN_ON(!entry->in_tree);
  
-               if (offset < entry->start)
+               if (em->start < entry->start)
                         p = &(*p)->rb_left;
-               else if (offset >= extent_map_end(entry))
+               else if (em->start >= extent_map_end(entry))
                         p = &(*p)->rb_right;
                 else
-                       return parent;
+                       return -EEXIST;
         }
  
-       entry = rb_entry(node, struct extent_map, rb_node);
-       entry->in_tree = 1;
-       rb_link_node(node, parent, p);
-       rb_insert_color(node, root);
-       return NULL;
+       orig_parent = parent;
+       while (parent && em->start >= extent_map_end(entry)) {
+               parent = rb_next(parent);
+               entry = rb_entry(parent, struct extent_map, rb_node);
+       }
+       if (parent)
+               if (end > entry->start && em->start < extent_map_end(entry))
+                       return -EEXIST;
+
+       parent = orig_parent;
+       entry = rb_entry(parent, struct extent_map, rb_node);
+       while (parent && em->start < entry->start) {
+               parent = rb_prev(parent);
+               entry = rb_entry(parent, struct extent_map, rb_node);
+       }
+       if (parent)
+               if (end > entry->start && em->start < extent_map_end(entry))
+                       return -EEXIST;
+
+       em->in_tree = 1;
+       rb_link_node(&em->rb_node, orig_parent, p);
+       rb_insert_color(&em->rb_node, root);
+       return 0;
  }
  
  /*
@@ -228,7 +255,7 @@ static void try_merge_map(struct extent_map_tree *tree, struct extent_map *em)
                 merge = rb_entry(rb, struct extent_map, rb_node);
         if (rb && mergable_maps(em, merge)) {
                 em->len += merge->len;
-               em->block_len += merge->len;
+               em->block_len += merge->block_len;
                 rb_erase(&merge->rb_node, &tree->map);
                 merge->in_tree = 0;
                 em->mod_len = (merge->mod_start + merge->mod_len) - em->mod_start;
@@ -310,20 +337,11 @@ int add_extent_mapping(struct extent_map_tree *tree,
                        struct extent_map *em, int modified)
  {
         int ret = 0;
-       struct rb_node *rb;
-       struct extent_map *exist;
  
-       exist = lookup_extent_mapping(tree, em->start, em->len);
-       if (exist) {
-               free_extent_map(exist);
-               ret = -EEXIST;
-               goto out;
-       }
-       rb = tree_insert(&tree->map, em->start, &em->rb_node);
-       if (rb) {
-               ret = -EEXIST;
+       ret = tree_insert(&tree->map, em);
+       if (ret)
                 goto out;
-       }
+
         atomic_inc(&em->refs);
  
         em->mod_start = em->start;
@@ -337,14 +355,6 @@ out:
         return ret;
  }
  
-/* simple helper to do math around the end of an extent, handling wrap */
-static u64 range_end(u64 start, u64 len)
-{
-       if (start + len < start)
-               return (u64)-1;
-       return start + len;
-}
-
  static struct extent_map *
  __lookup_extent_mapping(struct extent_map_tree *tree,
                         u64 start, u64 len, int strict)
diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c

index 84a46a42d26269b94fbb0a823e1fec43439d5e69..127555b29f587fab26de53b9a52f137f958ce3f5 100644 (file)
--- a/fs/btrfs/file-item.c
+++ b/fs/btrfs/file-item.c
@@ -246,8 +246,8 @@ static int __btrfs_lookup_bio_sums(struct btrfs_root *root,
                                                 offset + bvec->bv_len - 1,
                                                 EXTENT_NODATASUM, GFP_NOFS);
                                 } else {
-                                       printk(KERN_INFO "btrfs no csum found "
-                                              "for inode %llu start %llu\n",
+                                       btrfs_info(BTRFS_I(inode)->root->fs_info,
+                                                  "no csum found for inode %llu start %llu",
                                                btrfs_ino(inode), offset);
                                 }
                                 item = NULL;
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c

index 82d0342763c54d652982d2818ab3b9b7f8ffc1ec..0165b8672f099c49f96400fc0c87cc0b7cfc4532 100644 (file)
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -692,7 +692,10 @@ next:
  int __btrfs_drop_extents(struct btrfs_trans_handle *trans,
                          struct btrfs_root *root, struct inode *inode,
                          struct btrfs_path *path, u64 start, u64 end,
-                        u64 *drop_end, int drop_cache)
+                        u64 *drop_end, int drop_cache,
+                        int replace_extent,
+                        u32 extent_item_size,
+                        int *key_inserted)
  {
         struct extent_buffer *leaf;
         struct btrfs_file_extent_item *fi;
@@ -712,6 +715,7 @@ int __btrfs_drop_extents(struct btrfs_trans_handle *trans,
         int modify_tree = -1;
         int update_refs = (root->ref_cows || root == root->fs_info->tree_root);
         int found = 0;
+       int leafs_visited = 0;
  
         if (drop_cache)
                 btrfs_drop_extent_cache(inode, start, end - 1, 0);
@@ -733,6 +737,7 @@ int __btrfs_drop_extents(struct btrfs_trans_handle *trans,
                                 path->slots[0]--;
                 }
                 ret = 0;
+               leafs_visited++;
  next_slot:
                 leaf = path->nodes[0];
                 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
@@ -744,6 +749,7 @@ next_slot:
                                 ret = 0;
                                 break;
                         }
+                       leafs_visited++;
                         leaf = path->nodes[0];
                         recow = 1;
                 }
@@ -766,7 +772,8 @@ next_slot:
                                 btrfs_file_extent_num_bytes(leaf, fi);
                 } else if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
                         extent_end = key.offset +
-                               btrfs_file_extent_inline_len(leaf, fi);
+                               btrfs_file_extent_inline_len(leaf,
+                                                    path->slots[0], fi);
                 } else {
                         WARN_ON(1);
                         extent_end = search_start;
@@ -927,14 +934,44 @@ next_slot:
         }
  
         if (!ret && del_nr > 0) {
+               /*
+                * Set path->slots[0] to first slot, so that after the delete
+                * if items are move off from our leaf to its immediate left or
+                * right neighbor leafs, we end up with a correct and adjusted
+                * path->slots[0] for our insertion.
+                */
+               path->slots[0] = del_slot;
                 ret = btrfs_del_items(trans, root, path, del_slot, del_nr);
                 if (ret)
                         btrfs_abort_transaction(trans, root, ret);
+
+               leaf = path->nodes[0];
+               /*
+                * leaf eb has flag EXTENT_BUFFER_STALE if it was deleted (that
+                * is, its contents got pushed to its neighbors), in which case
+                * it means path->locks[0] == 0
+                */
+               if (!ret && replace_extent && leafs_visited == 1 &&
+                   path->locks[0] &&
+                   btrfs_leaf_free_space(root, leaf) >=
+                   sizeof(struct btrfs_item) + extent_item_size) {
+
+                       key.objectid = ino;
+                       key.type = BTRFS_EXTENT_DATA_KEY;
+                       key.offset = start;
+                       setup_items_for_insert(root, path, &key,
+                                              &extent_item_size,
+                                              extent_item_size,
+                                              sizeof(struct btrfs_item) +
+                                              extent_item_size, 1);
+                       *key_inserted = 1;
+               }
         }
  
+       if (!replace_extent || !(*key_inserted))
+               btrfs_release_path(path);
         if (drop_end)
                 *drop_end = found ? min(end, extent_end) : end;
-       btrfs_release_path(path);
         return ret;
  }
  
@@ -949,7 +986,7 @@ int btrfs_drop_extents(struct btrfs_trans_handle *trans,
         if (!path)
                 return -ENOMEM;
         ret = __btrfs_drop_extents(trans, root, inode, path, start, end, NULL,
-                                  drop_cache);
+                                  drop_cache, 0, 0, NULL);
         btrfs_free_path(path);
         return ret;
  }
@@ -1235,29 +1272,18 @@ static int prepare_uptodate_page(struct page *page, u64 pos,
  }
  
  /*
- * this gets pages into the page cache and locks them down, it also properly
- * waits for data=ordered extents to finish before allowing the pages to be
- * modified.
+ * this just gets pages into the page cache and locks them down.
   */
-static noinline int prepare_pages(struct btrfs_root *root, struct file *file,
-                        struct page **pages, size_t num_pages,
-                        loff_t pos, unsigned long first_index,
-                        size_t write_bytes, bool force_uptodate)
+static noinline int prepare_pages(struct inode *inode, struct page **pages,
+                                 size_t num_pages, loff_t pos,
+                                 size_t write_bytes, bool force_uptodate)
  {
-       struct extent_state *cached_state = NULL;
         int i;
         unsigned long index = pos >> PAGE_CACHE_SHIFT;
-       struct inode *inode = file_inode(file);
         gfp_t mask = btrfs_alloc_write_mask(inode->i_mapping);
         int err = 0;
-       int faili = 0;
-       u64 start_pos;
-       u64 last_pos;
-
-       start_pos = pos & ~((u64)root->sectorsize - 1);
-       last_pos = ((u64)index + num_pages) << PAGE_CACHE_SHIFT;
+       int faili;
  
-again:
         for (i = 0; i < num_pages; i++) {
                 pages[i] = find_or_create_page(inode->i_mapping, index + i,
                                                mask | __GFP_WRITE);
@@ -1280,57 +1306,85 @@ again:
                 }
                 wait_on_page_writeback(pages[i]);
         }
-       faili = num_pages - 1;
-       err = 0;
+
+       return 0;
+fail:
+       while (faili >= 0) {
+               unlock_page(pages[faili]);
+               page_cache_release(pages[faili]);
+               faili--;
+       }
+       return err;
+
+}
+
+/*
+ * This function locks the extent and properly waits for data=ordered extents
+ * to finish before allowing the pages to be modified if need.
+ *
+ * The return value:
+ * 1 - the extent is locked
+ * 0 - the extent is not locked, and everything is OK
+ * -EAGAIN - need re-prepare the pages
+ * the other < 0 number - Something wrong happens
+ */
+static noinline int
+lock_and_cleanup_extent_if_need(struct inode *inode, struct page **pages,
+                               size_t num_pages, loff_t pos,
+                               u64 *lockstart, u64 *lockend,
+                               struct extent_state **cached_state)
+{
+       u64 start_pos;
+       u64 last_pos;
+       int i;
+       int ret = 0;
+
+       start_pos = pos & ~((u64)PAGE_CACHE_SIZE - 1);
+       last_pos = start_pos + ((u64)num_pages << PAGE_CACHE_SHIFT) - 1;
+
         if (start_pos < inode->i_size) {
                 struct btrfs_ordered_extent *ordered;
                 lock_extent_bits(&BTRFS_I(inode)->io_tree,
-                                start_pos, last_pos - 1, 0, &cached_state);
-               ordered = btrfs_lookup_first_ordered_extent(inode,
-                                                           last_pos - 1);
+                                start_pos, last_pos, 0, cached_state);
+               ordered = btrfs_lookup_first_ordered_extent(inode, last_pos);
                 if (ordered &&
                     ordered->file_offset + ordered->len > start_pos &&
-                   ordered->file_offset < last_pos) {
+                   ordered->file_offset <= last_pos) {
                         btrfs_put_ordered_extent(ordered);
                         unlock_extent_cached(&BTRFS_I(inode)->io_tree,
-                                            start_pos, last_pos - 1,
-                                            &cached_state, GFP_NOFS);
+                                            start_pos, last_pos,
+                                            cached_state, GFP_NOFS);
                         for (i = 0; i < num_pages; i++) {
                                 unlock_page(pages[i]);
                                 page_cache_release(pages[i]);
                         }
-                       err = btrfs_wait_ordered_range(inode, start_pos,
-                                                      last_pos - start_pos);
-                       if (err)
-                               goto fail;
-                       goto again;
+                       ret = btrfs_wait_ordered_range(inode, start_pos,
+                                               last_pos - start_pos + 1);
+                       if (ret)
+                               return ret;
+                       else
+                               return -EAGAIN;
                 }
                 if (ordered)
                         btrfs_put_ordered_extent(ordered);
  
                 clear_extent_bit(&BTRFS_I(inode)->io_tree, start_pos,
-                                 last_pos - 1, EXTENT_DIRTY | EXTENT_DELALLOC |
+                                 last_pos, EXTENT_DIRTY | EXTENT_DELALLOC |
                                   EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG,
-                                 0, 0, &cached_state, GFP_NOFS);
-               unlock_extent_cached(&BTRFS_I(inode)->io_tree,
-                                    start_pos, last_pos - 1, &cached_state,
-                                    GFP_NOFS);
+                                 0, 0, cached_state, GFP_NOFS);
+               *lockstart = start_pos;
+               *lockend = last_pos;
+               ret = 1;
         }
+
         for (i = 0; i < num_pages; i++) {
                 if (clear_page_dirty_for_io(pages[i]))
                         account_page_redirty(pages[i]);
                 set_page_extent_mapped(pages[i]);
                 WARN_ON(!PageLocked(pages[i]));
         }
-       return 0;
-fail:
-       while (faili >= 0) {
-               unlock_page(pages[faili]);
-               page_cache_release(pages[faili]);
-               faili--;
-       }
-       return err;
  
+       return ret;
  }
  
  static noinline int check_can_nocow(struct inode *inode, loff_t pos,
@@ -1381,13 +1435,17 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file,
         struct inode *inode = file_inode(file);
         struct btrfs_root *root = BTRFS_I(inode)->root;
         struct page **pages = NULL;
+       struct extent_state *cached_state = NULL;
         u64 release_bytes = 0;
+       u64 lockstart;
+       u64 lockend;
         unsigned long first_index;
         size_t num_written = 0;
         int nrptrs;
         int ret = 0;
         bool only_release_metadata = false;
         bool force_page_uptodate = false;
+       bool need_unlock;
  
         nrptrs = min((iov_iter_count(i) + PAGE_CACHE_SIZE - 1) /
                      PAGE_CACHE_SIZE, PAGE_CACHE_SIZE /
@@ -1456,18 +1514,31 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file,
                 }
  
                 release_bytes = reserve_bytes;
-
+               need_unlock = false;
+again:
                 /*
                  * This is going to setup the pages array with the number of
                  * pages we want, so we don't really need to worry about the
                  * contents of pages from loop to loop
                  */
-               ret = prepare_pages(root, file, pages, num_pages,
-                                   pos, first_index, write_bytes,
+               ret = prepare_pages(inode, pages, num_pages,
+                                   pos, write_bytes,
                                     force_page_uptodate);
                 if (ret)
                         break;
  
+               ret = lock_and_cleanup_extent_if_need(inode, pages, num_pages,
+                                                     pos, &lockstart, &lockend,
+                                                     &cached_state);
+               if (ret < 0) {
+                       if (ret == -EAGAIN)
+                               goto again;
+                       break;
+               } else if (ret > 0) {
+                       need_unlock = true;
+                       ret = 0;
+               }
+
                 copied = btrfs_copy_from_user(pos, num_pages,
                                            write_bytes, pages, i);
  
@@ -1512,19 +1583,21 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file,
                 }
  
                 release_bytes = dirty_pages << PAGE_CACHE_SHIFT;
-               if (copied > 0) {
+
+               if (copied > 0)
                         ret = btrfs_dirty_pages(root, inode, pages,
                                                 dirty_pages, pos, copied,
                                                 NULL);
-                       if (ret) {
-                               btrfs_drop_pages(pages, num_pages);
-                               break;
-                       }
+               if (need_unlock)
+                       unlock_extent_cached(&BTRFS_I(inode)->io_tree,
+                                            lockstart, lockend, &cached_state,
+                                            GFP_NOFS);
+               if (ret) {
+                       btrfs_drop_pages(pages, num_pages);
+                       break;
                 }
  
                 release_bytes = 0;
-               btrfs_drop_pages(pages, num_pages);
-
                 if (only_release_metadata && copied > 0) {
                         u64 lockstart = round_down(pos, root->sectorsize);
                         u64 lockend = lockstart +
@@ -1536,6 +1609,8 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file,
                         only_release_metadata = false;
                 }
  
+               btrfs_drop_pages(pages, num_pages);
+
                 cond_resched();
  
                 balance_dirty_pages_ratelimited(inode->i_mapping);
@@ -1857,12 +1932,24 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
         if (file->private_data)
                 btrfs_ioctl_trans_end(file);
  
+       /*
+        * We use start here because we will need to wait on the IO to complete
+        * in btrfs_sync_log, which could require joining a transaction (for
+        * example checking cross references in the nocow path).  If we use join
+        * here we could get into a situation where we're waiting on IO to
+        * happen that is blocked on a transaction trying to commit.  With start
+        * we inc the extwriter counter, so we wait for all extwriters to exit
+        * before we start blocking join'ers.  This comment is to keep somebody
+        * from thinking they are super smart and changing this to
+        * btrfs_join_transaction *cough*Josef*cough*.
+        */
         trans = btrfs_start_transaction(root, 0);
         if (IS_ERR(trans)) {
                 ret = PTR_ERR(trans);
                 mutex_unlock(&inode->i_mutex);
                 goto out;
         }
+       trans->sync = true;
  
         ret = btrfs_log_dentry_safe(trans, root, dentry);
         if (ret < 0) {
@@ -1963,11 +2050,13 @@ static int fill_holes(struct btrfs_trans_handle *trans, struct inode *inode,
         struct btrfs_key key;
         int ret;
  
+       if (btrfs_fs_incompat(root->fs_info, NO_HOLES))
+               goto out;
+
         key.objectid = btrfs_ino(inode);
         key.type = BTRFS_EXTENT_DATA_KEY;
         key.offset = offset;
  
-
         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
         if (ret < 0)
                 return ret;
@@ -2064,8 +2153,10 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
         u64 drop_end;
         int ret = 0;
         int err = 0;
+       int rsv_count;
         bool same_page = ((offset >> PAGE_CACHE_SHIFT) ==
                           ((offset + len - 1) >> PAGE_CACHE_SHIFT));
+       bool no_holes = btrfs_fs_incompat(root->fs_info, NO_HOLES);
  
         ret = btrfs_wait_ordered_range(inode, offset, len);
         if (ret)
@@ -2125,7 +2216,7 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
                  * we need to try again.
                  */
                 if ((!ordered ||
-                   (ordered->file_offset + ordered->len < lockstart ||
+                   (ordered->file_offset + ordered->len <= lockstart ||
                      ordered->file_offset > lockend)) &&
                      !test_range_bit(&BTRFS_I(inode)->io_tree, lockstart,
                                      lockend, EXTENT_UPTODATE, 0,
@@ -2163,9 +2254,10 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
         /*
          * 1 - update the inode
          * 1 - removing the extents in the range
-        * 1 - adding the hole extent
+        * 1 - adding the hole extent if no_holes isn't set
          */
-       trans = btrfs_start_transaction(root, 3);
+       rsv_count = no_holes ? 2 : 3;
+       trans = btrfs_start_transaction(root, rsv_count);
         if (IS_ERR(trans)) {
                 err = PTR_ERR(trans);
                 goto out_free;
@@ -2179,7 +2271,7 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
         while (cur_offset < lockend) {
                 ret = __btrfs_drop_extents(trans, root, inode, path,
                                            cur_offset, lockend + 1,
-                                          &drop_end, 1);
+                                          &drop_end, 1, 0, 0, NULL);
                 if (ret != -ENOSPC)
                         break;
  
@@ -2202,7 +2294,7 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
                 btrfs_end_transaction(trans, root);
                 btrfs_btree_balance_dirty(root);
  
-               trans = btrfs_start_transaction(root, 3);
+               trans = btrfs_start_transaction(root, rsv_count);
                 if (IS_ERR(trans)) {
                         ret = PTR_ERR(trans);
                         trans = NULL;
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c

index 057be95b1e1e5894bdbe3525d0f5f8f2b3860650..73f3de7a083c2aab0b48e0e0a53f61044dba794e 100644 (file)
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -347,8 +347,8 @@ static int io_ctl_prepare_pages(struct io_ctl *io_ctl, struct inode *inode,
                         btrfs_readpage(NULL, page);
                         lock_page(page);
                         if (!PageUptodate(page)) {
-                               printk(KERN_ERR "btrfs: error reading free "
-                                      "space cache\n");
+                               btrfs_err(BTRFS_I(inode)->root->fs_info,
+                                          "error reading free space cache");
                                 io_ctl_drop_pages(io_ctl);
                                 return -EIO;
                         }
@@ -405,7 +405,7 @@ static int io_ctl_check_generation(struct io_ctl *io_ctl, u64 generation)
  
         gen = io_ctl->cur;
         if (le64_to_cpu(*gen) != generation) {
-               printk_ratelimited(KERN_ERR "btrfs: space cache generation "
+               printk_ratelimited(KERN_ERR "BTRFS: space cache generation "
                                    "(%Lu) does not match inode (%Lu)\n", *gen,
                                    generation);
                 io_ctl_unmap_page(io_ctl);
@@ -463,7 +463,7 @@ static int io_ctl_check_crc(struct io_ctl *io_ctl, int index)
                               PAGE_CACHE_SIZE - offset);
         btrfs_csum_final(crc, (char *)&crc);
         if (val != crc) {
-               printk_ratelimited(KERN_ERR "btrfs: csum mismatch on free "
+               printk_ratelimited(KERN_ERR "BTRFS: csum mismatch on free "
                                    "space cache\n");
                 io_ctl_unmap_page(io_ctl);
                 return -EIO;
@@ -1902,7 +1902,7 @@ out:
         spin_unlock(&ctl->tree_lock);
  
         if (ret) {
-               printk(KERN_CRIT "btrfs: unable to add free space :%d\n", ret);
+               printk(KERN_CRIT "BTRFS: unable to add free space :%d\n", ret);
                 ASSERT(ret != -EEXIST);
         }
  
@@ -2011,14 +2011,15 @@ void btrfs_dump_free_space(struct btrfs_block_group_cache *block_group,
                 info = rb_entry(n, struct btrfs_free_space, offset_index);
                 if (info->bytes >= bytes && !block_group->ro)
                         count++;
-               printk(KERN_CRIT "entry offset %llu, bytes %llu, bitmap %s\n",
-                      info->offset, info->bytes,
+               btrfs_crit(block_group->fs_info,
+                          "entry offset %llu, bytes %llu, bitmap %s",
+                          info->offset, info->bytes,
                        (info->bitmap) ? "yes" : "no");
         }
-       printk(KERN_INFO "block group has cluster?: %s\n",
+       btrfs_info(block_group->fs_info, "block group has cluster?: %s",
                list_empty(&block_group->cluster_list) ? "no" : "yes");
-       printk(KERN_INFO "%d blocks of free space at or bigger than bytes is"
-              "\n", count);
+       btrfs_info(block_group->fs_info,
+                  "%d blocks of free space at or bigger than bytes is", count);
  }
  
  void btrfs_init_free_space_ctl(struct btrfs_block_group_cache *block_group)
@@ -2421,7 +2422,6 @@ setup_cluster_no_bitmap(struct btrfs_block_group_cache *block_group,
         struct btrfs_free_space *entry = NULL;
         struct btrfs_free_space *last;
         struct rb_node *node;
-       u64 window_start;
         u64 window_free;
         u64 max_extent;
         u64 total_size = 0;
@@ -2443,7 +2443,6 @@ setup_cluster_no_bitmap(struct btrfs_block_group_cache *block_group,
                 entry = rb_entry(node, struct btrfs_free_space, offset_index);
         }
  
-       window_start = entry->offset;
         window_free = entry->bytes;
         max_extent = entry->bytes;
         first = entry;
diff --git a/fs/btrfs/hash.c b/fs/btrfs/hash.c

new file mode 100644 (file)

index 0000000..85889aa
--- /dev/null
+++ b/fs/btrfs/hash.c
@@ -0,0 +1,50 @@
+/*
+ * Copyright (C) 2014 Filipe David Borba Manana <fdmanana@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ */
+
+#include <crypto/hash.h>
+#include <linux/err.h>
+#include "hash.h"
+
+static struct crypto_shash *tfm;
+
+int __init btrfs_hash_init(void)
+{
+       tfm = crypto_alloc_shash("crc32c", 0, 0);
+       if (IS_ERR(tfm))
+               return PTR_ERR(tfm);
+
+       return 0;
+}
+
+void btrfs_hash_exit(void)
+{
+       crypto_free_shash(tfm);
+}
+
+u32 btrfs_crc32c(u32 crc, const void *address, unsigned int length)
+{
+       struct {
+               struct shash_desc shash;
+               char ctx[crypto_shash_descsize(tfm)];
+       } desc;
+       int err;
+
+       desc.shash.tfm = tfm;
+       desc.shash.flags = 0;
+       *(u32 *)desc.ctx = crc;
+
+       err = crypto_shash_update(&desc.shash, address, length);
+       BUG_ON(err);
+
+       return *(u32 *)desc.ctx;
+}
diff --git a/fs/btrfs/hash.h b/fs/btrfs/hash.h

index 1d982812ab6761077673985dea55970f418cedf7..118a2316e5d39f51cc6293e912ed9d2598d6afd8 100644 (file)
--- a/fs/btrfs/hash.h
+++ b/fs/btrfs/hash.h
@@ -19,10 +19,15 @@
  #ifndef __HASH__
  #define __HASH__
  
-#include <linux/crc32c.h>
+int __init btrfs_hash_init(void);
+
+void btrfs_hash_exit(void);
+
+u32 btrfs_crc32c(u32 crc, const void *address, unsigned int length);
+
  static inline u64 btrfs_name_hash(const char *name, int len)
  {
-       return crc32c((u32)~1, name, len);
+       return btrfs_crc32c((u32)~1, name, len);
  }
  
  /*
@@ -31,7 +36,7 @@ static inline u64 btrfs_name_hash(const char *name, int len)
  static inline u64 btrfs_extref_hash(u64 parent_objectid, const char *name,
                                     int len)
  {
-       return (u64) crc32c(parent_objectid, name, len);
+       return (u64) btrfs_crc32c(parent_objectid, name, len);
  }
  
  #endif
diff --git a/fs/btrfs/inode-item.c b/fs/btrfs/inode-item.c

index ec82fae070975fcdb6d60879ba34cc001884afbf..2be38df703c9b095f35ed5e887fa2c37c7fb0e7c 100644 (file)
--- a/fs/btrfs/inode-item.c
+++ b/fs/btrfs/inode-item.c
@@ -91,32 +91,6 @@ int btrfs_find_name_in_ext_backref(struct btrfs_path *path, u64 ref_objectid,
         return 0;
  }
  
-static struct btrfs_inode_ref *
-btrfs_lookup_inode_ref(struct btrfs_trans_handle *trans,
-                      struct btrfs_root *root,
-                      struct btrfs_path *path,
-                      const char *name, int name_len,
-                      u64 inode_objectid, u64 ref_objectid, int ins_len,
-                      int cow)
-{
-       int ret;
-       struct btrfs_key key;
-       struct btrfs_inode_ref *ref;
-
-       key.objectid = inode_objectid;
-       key.type = BTRFS_INODE_REF_KEY;
-       key.offset = ref_objectid;
-
-       ret = btrfs_search_slot(trans, root, &key, path, ins_len, cow);
-       if (ret < 0)
-               return ERR_PTR(ret);
-       if (ret > 0)
-               return NULL;
-       if (!find_name_in_backref(path, name, name_len, &ref))
-               return NULL;
-       return ref;
-}
-
  /* Returns NULL if no extref found */
  struct btrfs_inode_extref *
  btrfs_lookup_inode_extref(struct btrfs_trans_handle *trans,
@@ -144,45 +118,6 @@ btrfs_lookup_inode_extref(struct btrfs_trans_handle *trans,
         return extref;
  }
  
-int btrfs_get_inode_ref_index(struct btrfs_trans_handle *trans,
-                             struct btrfs_root *root,
-                             struct btrfs_path *path,
-                             const char *name, int name_len,
-                             u64 inode_objectid, u64 ref_objectid, int mod,
-                             u64 *ret_index)
-{
-       struct btrfs_inode_ref *ref;
-       struct btrfs_inode_extref *extref;
-       int ins_len = mod < 0 ? -1 : 0;
-       int cow = mod != 0;
-
-       ref = btrfs_lookup_inode_ref(trans, root, path, name, name_len,
-                                    inode_objectid, ref_objectid, ins_len,
-                                    cow);
-       if (IS_ERR(ref))
-               return PTR_ERR(ref);
-
-       if (ref != NULL) {
-               *ret_index = btrfs_inode_ref_index(path->nodes[0], ref);
-               return 0;
-       }
-
-       btrfs_release_path(path);
-
-       extref = btrfs_lookup_inode_extref(trans, root, path, name,
-                                          name_len, inode_objectid,
-                                          ref_objectid, ins_len, cow);
-       if (IS_ERR(extref))
-               return PTR_ERR(extref);
-
-       if (extref) {
-               *ret_index = btrfs_inode_extref_index(path->nodes[0], extref);
-               return 0;
-       }
-
-       return -ENOENT;
-}
-
  static int btrfs_del_inode_extref(struct btrfs_trans_handle *trans,
                                   struct btrfs_root *root,
                                   const char *name, int name_len,
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c

index d546d8c3038baa4451aa2f338a0c24592a3ea48f..5c4ab9c18940cc7827a75df6e02a84370cae3edd 100644 (file)
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -58,9 +58,10 @@
  #include "inode-map.h"
  #include "backref.h"
  #include "hash.h"
+#include "props.h"
  
  struct btrfs_iget_args {
-       u64 ino;
+       struct btrfs_key *location;
         struct btrfs_root *root;
  };
  
@@ -125,13 +126,12 @@ static int btrfs_init_inode_security(struct btrfs_trans_handle *trans,
   * no overlapping inline items exist in the btree
   */
  static noinline int insert_inline_extent(struct btrfs_trans_handle *trans,
+                               struct btrfs_path *path, int extent_inserted,
                                 struct btrfs_root *root, struct inode *inode,
                                 u64 start, size_t size, size_t compressed_size,
                                 int compress_type,
                                 struct page **compressed_pages)
  {
-       struct btrfs_key key;
-       struct btrfs_path *path;
         struct extent_buffer *leaf;
         struct page *page = NULL;
         char *kaddr;
@@ -140,29 +140,29 @@ static noinline int insert_inline_extent(struct btrfs_trans_handle *trans,
         int err = 0;
         int ret;
         size_t cur_size = size;
-       size_t datasize;
         unsigned long offset;
  
         if (compressed_size && compressed_pages)
                 cur_size = compressed_size;
  
-       path = btrfs_alloc_path();
-       if (!path)
-               return -ENOMEM;
+       inode_add_bytes(inode, size);
  
-       path->leave_spinning = 1;
+       if (!extent_inserted) {
+               struct btrfs_key key;
+               size_t datasize;
  
-       key.objectid = btrfs_ino(inode);
-       key.offset = start;
-       btrfs_set_key_type(&key, BTRFS_EXTENT_DATA_KEY);
-       datasize = btrfs_file_extent_calc_inline_size(cur_size);
+               key.objectid = btrfs_ino(inode);
+               key.offset = start;
+               btrfs_set_key_type(&key, BTRFS_EXTENT_DATA_KEY);
  
-       inode_add_bytes(inode, size);
-       ret = btrfs_insert_empty_item(trans, root, path, &key,
-                                     datasize);
-       if (ret) {
-               err = ret;
-               goto fail;
+               datasize = btrfs_file_extent_calc_inline_size(cur_size);
+               path->leave_spinning = 1;
+               ret = btrfs_insert_empty_item(trans, root, path, &key,
+                                             datasize);
+               if (ret) {
+                       err = ret;
+                       goto fail;
+               }
         }
         leaf = path->nodes[0];
         ei = btrfs_item_ptr(leaf, path->slots[0],
@@ -203,7 +203,7 @@ static noinline int insert_inline_extent(struct btrfs_trans_handle *trans,
                 page_cache_release(page);
         }
         btrfs_mark_buffer_dirty(leaf);
-       btrfs_free_path(path);
+       btrfs_release_path(path);
  
         /*
          * we're an inline extent, so nobody can
@@ -219,7 +219,6 @@ static noinline int insert_inline_extent(struct btrfs_trans_handle *trans,
  
         return ret;
  fail:
-       btrfs_free_path(path);
         return err;
  }
  
@@ -242,6 +241,9 @@ static noinline int cow_file_range_inline(struct btrfs_root *root,
         u64 aligned_end = ALIGN(end, root->sectorsize);
         u64 data_len = inline_len;
         int ret;
+       struct btrfs_path *path;
+       int extent_inserted = 0;
+       u32 extent_item_size;
  
         if (compressed_size)
                 data_len = compressed_size;
@@ -256,12 +258,27 @@ static noinline int cow_file_range_inline(struct btrfs_root *root,
                 return 1;
         }
  
+       path = btrfs_alloc_path();
+       if (!path)
+               return -ENOMEM;
+
         trans = btrfs_join_transaction(root);
-       if (IS_ERR(trans))
+       if (IS_ERR(trans)) {
+               btrfs_free_path(path);
                 return PTR_ERR(trans);
+       }
         trans->block_rsv = &root->fs_info->delalloc_block_rsv;
  
-       ret = btrfs_drop_extents(trans, root, inode, start, aligned_end, 1);
+       if (compressed_size && compressed_pages)
+               extent_item_size = btrfs_file_extent_calc_inline_size(
+                  compressed_size);
+       else
+               extent_item_size = btrfs_file_extent_calc_inline_size(
+                   inline_len);
+
+       ret = __btrfs_drop_extents(trans, root, inode, path,
+                                  start, aligned_end, NULL,
+                                  1, 1, extent_item_size, &extent_inserted);
         if (ret) {
                 btrfs_abort_transaction(trans, root, ret);
                 goto out;
@@ -269,7 +286,8 @@ static noinline int cow_file_range_inline(struct btrfs_root *root,
  
         if (isize > actual_end)
                 inline_len = min_t(u64, isize, actual_end);
-       ret = insert_inline_extent(trans, root, inode, start,
+       ret = insert_inline_extent(trans, path, extent_inserted,
+                                  root, inode, start,
                                    inline_len, compressed_size,
                                    compress_type, compressed_pages);
         if (ret && ret != -ENOSPC) {
@@ -284,6 +302,7 @@ static noinline int cow_file_range_inline(struct btrfs_root *root,
         btrfs_delalloc_release_metadata(inode, end + 1 - start);
         btrfs_drop_extent_cache(inode, start, aligned_end - 1, 0);
  out:
+       btrfs_free_path(path);
         btrfs_end_transaction(trans, root);
         return ret;
  }
@@ -1262,7 +1281,8 @@ next_slot:
                         nocow = 1;
                 } else if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
                         extent_end = found_key.offset +
-                               btrfs_file_extent_inline_len(leaf, fi);
+                               btrfs_file_extent_inline_len(leaf,
+                                                    path->slots[0], fi);
                         extent_end = ALIGN(extent_end, root->sectorsize);
                 } else {
                         BUG_ON(1);
@@ -1841,14 +1861,13 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans,
         struct btrfs_path *path;
         struct extent_buffer *leaf;
         struct btrfs_key ins;
+       int extent_inserted = 0;
         int ret;
  
         path = btrfs_alloc_path();
         if (!path)
                 return -ENOMEM;
  
-       path->leave_spinning = 1;
-
         /*
          * we may be replacing one extent in the tree with another.
          * The new extent is pinned in the extent map, and we don't want
@@ -1858,17 +1877,23 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans,
          * the caller is expected to unpin it and allow it to be merged
          * with the others.
          */
-       ret = btrfs_drop_extents(trans, root, inode, file_pos,
-                                file_pos + num_bytes, 0);
+       ret = __btrfs_drop_extents(trans, root, inode, path, file_pos,
+                                  file_pos + num_bytes, NULL, 0,
+                                  1, sizeof(*fi), &extent_inserted);
         if (ret)
                 goto out;
  
-       ins.objectid = btrfs_ino(inode);
-       ins.offset = file_pos;
-       ins.type = BTRFS_EXTENT_DATA_KEY;
-       ret = btrfs_insert_empty_item(trans, root, path, &ins, sizeof(*fi));
-       if (ret)
-               goto out;
+       if (!extent_inserted) {
+               ins.objectid = btrfs_ino(inode);
+               ins.offset = file_pos;
+               ins.type = BTRFS_EXTENT_DATA_KEY;
+
+               path->leave_spinning = 1;
+               ret = btrfs_insert_empty_item(trans, root, path, &ins,
+                                             sizeof(*fi));
+               if (ret)
+                       goto out;
+       }
         leaf = path->nodes[0];
         fi = btrfs_item_ptr(leaf, path->slots[0],
                             struct btrfs_file_extent_item);
@@ -2290,7 +2315,7 @@ again:
                 u64 extent_len;
                 struct btrfs_key found_key;
  
-               ret = btrfs_search_slot(trans, root, &key, path, 1, 1);
+               ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
                 if (ret < 0)
                         goto out_free_path;
  
@@ -2543,12 +2568,6 @@ out_kfree:
         return NULL;
  }
  
-/*
- * helper function for btrfs_finish_ordered_io, this
- * just reads in some of the csum leaves to prime them into ram
- * before we start the transaction.  It limits the amount of btree
- * reads required while inside the transaction.
- */
  /* as ordered data IO finishes, this gets called so we can finish
   * an ordered extent if the range of bytes in the file it covers are
   * fully written.
@@ -3248,7 +3267,8 @@ out:
   * slot is the slot the inode is in, objectid is the objectid of the inode
   */
  static noinline int acls_after_inode_item(struct extent_buffer *leaf,
-                                         int slot, u64 objectid)
+                                         int slot, u64 objectid,
+                                         int *first_xattr_slot)
  {
         u32 nritems = btrfs_header_nritems(leaf);
         struct btrfs_key found_key;
@@ -3264,6 +3284,7 @@ static noinline int acls_after_inode_item(struct extent_buffer *leaf,
         }
  
         slot++;
+       *first_xattr_slot = -1;
         while (slot < nritems) {
                 btrfs_item_key_to_cpu(leaf, &found_key, slot);
  
@@ -3273,6 +3294,8 @@ static noinline int acls_after_inode_item(struct extent_buffer *leaf,
  
                 /* we found an xattr, assume we've got an acl */
                 if (found_key.type == BTRFS_XATTR_ITEM_KEY) {
+                       if (*first_xattr_slot == -1)
+                               *first_xattr_slot = slot;
                         if (found_key.offset == xattr_access ||
                             found_key.offset == xattr_default)
                                 return 1;
@@ -3301,6 +3324,8 @@ static noinline int acls_after_inode_item(struct extent_buffer *leaf,
          * something larger than an xattr.  We have to assume the inode
          * has acls
          */
+       if (*first_xattr_slot == -1)
+               *first_xattr_slot = slot;
         return 1;
  }
  
@@ -3315,10 +3340,12 @@ static void btrfs_read_locked_inode(struct inode *inode)
         struct btrfs_timespec *tspec;
         struct btrfs_root *root = BTRFS_I(inode)->root;
         struct btrfs_key location;
+       unsigned long ptr;
         int maybe_acls;
         u32 rdev;
         int ret;
         bool filled = false;
+       int first_xattr_slot;
  
         ret = btrfs_fill_inode(inode, &rdev);
         if (!ret)
@@ -3328,7 +3355,6 @@ static void btrfs_read_locked_inode(struct inode *inode)
         if (!path)
                 goto make_bad;
  
-       path->leave_spinning = 1;
         memcpy(&location, &BTRFS_I(inode)->location, sizeof(location));
  
         ret = btrfs_lookup_inode(NULL, root, path, &location, 0);
@@ -3338,7 +3364,7 @@ static void btrfs_read_locked_inode(struct inode *inode)
         leaf = path->nodes[0];
  
         if (filled)
-               goto cache_acl;
+               goto cache_index;
  
         inode_item = btrfs_item_ptr(leaf, path->slots[0],
                                     struct btrfs_inode_item);
@@ -3381,18 +3407,51 @@ static void btrfs_read_locked_inode(struct inode *inode)
  
         BTRFS_I(inode)->index_cnt = (u64)-1;
         BTRFS_I(inode)->flags = btrfs_inode_flags(leaf, inode_item);
+
+cache_index:
+       path->slots[0]++;
+       if (inode->i_nlink != 1 ||
+           path->slots[0] >= btrfs_header_nritems(leaf))
+               goto cache_acl;
+
+       btrfs_item_key_to_cpu(leaf, &location, path->slots[0]);
+       if (location.objectid != btrfs_ino(inode))
+               goto cache_acl;
+
+       ptr = btrfs_item_ptr_offset(leaf, path->slots[0]);
+       if (location.type == BTRFS_INODE_REF_KEY) {
+               struct btrfs_inode_ref *ref;
+
+               ref = (struct btrfs_inode_ref *)ptr;
+               BTRFS_I(inode)->dir_index = btrfs_inode_ref_index(leaf, ref);
+       } else if (location.type == BTRFS_INODE_EXTREF_KEY) {
+               struct btrfs_inode_extref *extref;
+
+               extref = (struct btrfs_inode_extref *)ptr;
+               BTRFS_I(inode)->dir_index = btrfs_inode_extref_index(leaf,
+                                                                    extref);
+       }
  cache_acl:
         /*
          * try to precache a NULL acl entry for files that don't have
          * any xattrs or acls
          */
         maybe_acls = acls_after_inode_item(leaf, path->slots[0],
-                                          btrfs_ino(inode));
+                                          btrfs_ino(inode), &first_xattr_slot);
+       if (first_xattr_slot != -1) {
+               path->slots[0] = first_xattr_slot;
+               ret = btrfs_load_inode_props(inode, path);
+               if (ret)
+                       btrfs_err(root->fs_info,
+                                 "error loading props for ino %llu (root %llu): %d\n",
+                                 btrfs_ino(inode),
+                                 root->root_key.objectid, ret);
+       }
+       btrfs_free_path(path);
+
         if (!maybe_acls)
                 cache_no_acl(inode);
  
-       btrfs_free_path(path);
-
         switch (inode->i_mode & S_IFMT) {
         case S_IFREG:
                 inode->i_mapping->a_ops = &btrfs_aops;
@@ -3496,7 +3555,6 @@ static noinline int btrfs_update_inode_item(struct btrfs_trans_handle *trans,
                 goto failed;
         }
  
-       btrfs_unlock_up_safe(path, 1);
         leaf = path->nodes[0];
         inode_item = btrfs_item_ptr(leaf, path->slots[0],
                                     struct btrfs_inode_item);
@@ -3593,6 +3651,24 @@ static int __btrfs_unlink_inode(struct btrfs_trans_handle *trans,
                 goto err;
         btrfs_release_path(path);
  
+       /*
+        * If we don't have dir index, we have to get it by looking up
+        * the inode ref, since we get the inode ref, remove it directly,
+        * it is unnecessary to do delayed deletion.
+        *
+        * But if we have dir index, needn't search inode ref to get it.
+        * Since the inode ref is close to the inode item, it is better
+        * that we delay to delete it, and just do this deletion when
+        * we update the inode item.
+        */
+       if (BTRFS_I(inode)->dir_index) {
+               ret = btrfs_delayed_delete_inode_ref(inode);
+               if (!ret) {
+                       index = BTRFS_I(inode)->dir_index;
+                       goto skip_backref;
+               }
+       }
+
         ret = btrfs_del_inode_ref(trans, root, name, name_len, ino,
                                   dir_ino, &index);
         if (ret) {
@@ -3602,7 +3678,7 @@ static int __btrfs_unlink_inode(struct btrfs_trans_handle *trans,
                 btrfs_abort_transaction(trans, root, ret);
                 goto err;
         }
-
+skip_backref:
         ret = btrfs_delete_delayed_dir_index(trans, root, dir, index);
         if (ret) {
                 btrfs_abort_transaction(trans, root, ret);
@@ -3948,7 +4024,7 @@ search_again:
                                     btrfs_file_extent_num_bytes(leaf, fi);
                         } else if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
                                 item_end += btrfs_file_extent_inline_len(leaf,
-                                                                        fi);
+                                                        path->slots[0], fi);
                         }
                         item_end--;
                 }
@@ -4018,6 +4094,12 @@ search_again:
                                         inode_sub_bytes(inode, item_end + 1 -
                                                         new_size);
                                 }
+
+                               /*
+                                * update the ram bytes to properly reflect
+                                * the new size of our item
+                                */
+                               btrfs_set_file_extent_ram_bytes(leaf, fi, size);
                                 size =
                                     btrfs_file_extent_calc_inline_size(size);
                                 btrfs_truncate_item(root, path, size, 1);
@@ -4203,6 +4285,49 @@ out:
         return ret;
  }
  
+static int maybe_insert_hole(struct btrfs_root *root, struct inode *inode,
+                            u64 offset, u64 len)
+{
+       struct btrfs_trans_handle *trans;
+       int ret;
+
+       /*
+        * Still need to make sure the inode looks like it's been updated so
+        * that any holes get logged if we fsync.
+        */
+       if (btrfs_fs_incompat(root->fs_info, NO_HOLES)) {
+               BTRFS_I(inode)->last_trans = root->fs_info->generation;
+               BTRFS_I(inode)->last_sub_trans = root->log_transid;
+               BTRFS_I(inode)->last_log_commit = root->last_log_commit;
+               return 0;
+       }
+
+       /*
+        * 1 - for the one we're dropping
+        * 1 - for the one we're adding
+        * 1 - for updating the inode.
+        */
+       trans = btrfs_start_transaction(root, 3);
+       if (IS_ERR(trans))
+               return PTR_ERR(trans);
+
+       ret = btrfs_drop_extents(trans, root, inode, offset, offset + len, 1);
+       if (ret) {
+               btrfs_abort_transaction(trans, root, ret);
+               btrfs_end_transaction(trans, root);
+               return ret;
+       }
+
+       ret = btrfs_insert_file_extent(trans, root, btrfs_ino(inode), offset,
+                                      0, 0, len, 0, len, 0, 0, 0);
+       if (ret)
+               btrfs_abort_transaction(trans, root, ret);
+       else
+               btrfs_update_inode(trans, root, inode);
+       btrfs_end_transaction(trans, root);
+       return ret;
+}
+
  /*
   * This function puts in dummy file extents for the area we're creating a hole
   * for.  So if we are truncating this file to a larger size we need to insert
@@ -4211,7 +4336,6 @@ out:
   */
  int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size)
  {
-       struct btrfs_trans_handle *trans;
         struct btrfs_root *root = BTRFS_I(inode)->root;
         struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
         struct extent_map *em = NULL;
@@ -4266,31 +4390,10 @@ int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size)
                         struct extent_map *hole_em;
                         hole_size = last_byte - cur_offset;
  
-                       trans = btrfs_start_transaction(root, 3);
-                       if (IS_ERR(trans)) {
-                               err = PTR_ERR(trans);
-                               break;
-                       }
-
-                       err = btrfs_drop_extents(trans, root, inode,
-                                                cur_offset,
-                                                cur_offset + hole_size, 1);
-                       if (err) {
-                               btrfs_abort_transaction(trans, root, err);
-                               btrfs_end_transaction(trans, root);
-                               break;
-                       }
-
-                       err = btrfs_insert_file_extent(trans, root,
-                                       btrfs_ino(inode), cur_offset, 0,
-                                       0, hole_size, 0, hole_size,
-                                       0, 0, 0);
-                       if (err) {
-                               btrfs_abort_transaction(trans, root, err);
-                               btrfs_end_transaction(trans, root);
+                       err = maybe_insert_hole(root, inode, cur_offset,
+                                               hole_size);
+                       if (err)
                                 break;
-                       }
-
                         btrfs_drop_extent_cache(inode, cur_offset,
                                                 cur_offset + hole_size - 1, 0);
                         hole_em = alloc_extent_map();
@@ -4309,7 +4412,7 @@ int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size)
                         hole_em->ram_bytes = hole_size;
                         hole_em->bdev = root->fs_info->fs_devices->latest_bdev;
                         hole_em->compress_type = BTRFS_COMPRESS_NONE;
-                       hole_em->generation = trans->transid;
+                       hole_em->generation = root->fs_info->generation;
  
                         while (1) {
                                 write_lock(&em_tree->lock);
@@ -4322,17 +4425,14 @@ int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size)
                                                         hole_size - 1, 0);
                         }
                         free_extent_map(hole_em);
-next:
-                       btrfs_update_inode(trans, root, inode);
-                       btrfs_end_transaction(trans, root);
                 }
+next:
                 free_extent_map(em);
                 em = NULL;
                 cur_offset = last_byte;
                 if (cur_offset >= block_end)
                         break;
         }
-
         free_extent_map(em);
         unlock_extent_cached(io_tree, hole_start, block_end - 1, &cached_state,
                              GFP_NOFS);
@@ -4474,6 +4574,64 @@ static int btrfs_setattr(struct dentry *dentry, struct iattr *attr)
         return err;
  }
  
+/*
+ * While truncating the inode pages during eviction, we get the VFS calling
+ * btrfs_invalidatepage() against each page of the inode. This is slow because
+ * the calls to btrfs_invalidatepage() result in a huge amount of calls to
+ * lock_extent_bits() and clear_extent_bit(), which keep merging and splitting
+ * extent_state structures over and over, wasting lots of time.
+ *
+ * Therefore if the inode is being evicted, let btrfs_invalidatepage() skip all
+ * those expensive operations on a per page basis and do only the ordered io
+ * finishing, while we release here the extent_map and extent_state structures,
+ * without the excessive merging and splitting.
+ */
+static void evict_inode_truncate_pages(struct inode *inode)
+{
+       struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
+       struct extent_map_tree *map_tree = &BTRFS_I(inode)->extent_tree;
+       struct rb_node *node;
+
+       ASSERT(inode->i_state & I_FREEING);
+       truncate_inode_pages(&inode->i_data, 0);
+
+       write_lock(&map_tree->lock);
+       while (!RB_EMPTY_ROOT(&map_tree->map)) {
+               struct extent_map *em;
+
+               node = rb_first(&map_tree->map);
+               em = rb_entry(node, struct extent_map, rb_node);
+               clear_bit(EXTENT_FLAG_PINNED, &em->flags);
+               clear_bit(EXTENT_FLAG_LOGGING, &em->flags);
+               remove_extent_mapping(map_tree, em);
+               free_extent_map(em);
+       }
+       write_unlock(&map_tree->lock);
+
+       spin_lock(&io_tree->lock);
+       while (!RB_EMPTY_ROOT(&io_tree->state)) {
+               struct extent_state *state;
+               struct extent_state *cached_state = NULL;
+
+               node = rb_first(&io_tree->state);
+               state = rb_entry(node, struct extent_state, rb_node);
+               atomic_inc(&state->refs);
+               spin_unlock(&io_tree->lock);
+
+               lock_extent_bits(io_tree, state->start, state->end,
+                                0, &cached_state);
+               clear_extent_bit(io_tree, state->start, state->end,
+                                EXTENT_LOCKED | EXTENT_DIRTY |
+                                EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING |
+                                EXTENT_DEFRAG, 1, 1,
+                                &cached_state, GFP_NOFS);
+               free_extent_state(state);
+
+               spin_lock(&io_tree->lock);
+       }
+       spin_unlock(&io_tree->lock);
+}
+
  void btrfs_evict_inode(struct inode *inode)
  {
         struct btrfs_trans_handle *trans;
@@ -4484,7 +4642,8 @@ void btrfs_evict_inode(struct inode *inode)
  
         trace_btrfs_inode_evict(inode);
  
-       truncate_inode_pages(&inode->i_data, 0);
+       evict_inode_truncate_pages(inode);
+
         if (inode->i_nlink &&
             ((btrfs_root_refs(&root->root_item) != 0 &&
               root->root_key.objectid != BTRFS_ROOT_TREE_OBJECTID) ||
@@ -4659,9 +4818,9 @@ static int fixup_tree_root_location(struct btrfs_root *root,
         }
  
         err = -ENOENT;
-       ret = btrfs_find_root_ref(root->fs_info->tree_root, path,
-                                 BTRFS_I(dir)->root->root_key.objectid,
-                                 location->objectid);
+       ret = btrfs_find_item(root->fs_info->tree_root, path,
+                               BTRFS_I(dir)->root->root_key.objectid,
+                               location->objectid, BTRFS_ROOT_REF_KEY, NULL);
         if (ret) {
                 if (ret < 0)
                         err = ret;
@@ -4822,7 +4981,9 @@ again:
  static int btrfs_init_locked_inode(struct inode *inode, void *p)
  {
         struct btrfs_iget_args *args = p;
-       inode->i_ino = args->ino;
+       inode->i_ino = args->location->objectid;
+       memcpy(&BTRFS_I(inode)->location, args->location,
+              sizeof(*args->location));
         BTRFS_I(inode)->root = args->root;
         return 0;
  }
@@ -4830,19 +4991,19 @@ static int btrfs_init_locked_inode(struct inode *inode, void *p)
  static int btrfs_find_actor(struct inode *inode, void *opaque)
  {
         struct btrfs_iget_args *args = opaque;
-       return args->ino == btrfs_ino(inode) &&
+       return args->location->objectid == BTRFS_I(inode)->location.objectid &&
                 args->root == BTRFS_I(inode)->root;
  }
  
  static struct inode *btrfs_iget_locked(struct super_block *s,
-                                      u64 objectid,
+                                      struct btrfs_key *location,
                                        struct btrfs_root *root)
  {
         struct inode *inode;
         struct btrfs_iget_args args;
-       unsigned long hashval = btrfs_inode_hash(objectid, root);
+       unsigned long hashval = btrfs_inode_hash(location->objectid, root);
  
-       args.ino = objectid;
+       args.location = location;
         args.root = root;
  
         inode = iget5_locked(s, hashval, btrfs_find_actor,
@@ -4859,13 +5020,11 @@ struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location,
  {
         struct inode *inode;
  
-       inode = btrfs_iget_locked(s, location->objectid, root);
+       inode = btrfs_iget_locked(s, location, root);
         if (!inode)
                 return ERR_PTR(-ENOMEM);
  
         if (inode->i_state & I_NEW) {
-               BTRFS_I(inode)->root = root;
-               memcpy(&BTRFS_I(inode)->location, location, sizeof(*location));
                 btrfs_read_locked_inode(inode);
                 if (!is_bad_inode(inode)) {
                         inode_tree_add(inode);
@@ -4921,7 +5080,7 @@ struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry)
                 return ERR_PTR(ret);
  
         if (location.objectid == 0)
-               return NULL;
+               return ERR_PTR(-ENOENT);
  
         if (location.type == BTRFS_INODE_ITEM_KEY) {
                 inode = btrfs_iget(dir->i_sb, &location, root, NULL);
@@ -4985,10 +5144,17 @@ static void btrfs_dentry_release(struct dentry *dentry)
  static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry,
                                    unsigned int flags)
  {
-       struct dentry *ret;
+       struct inode *inode;
  
-       ret = d_splice_alias(btrfs_lookup_dentry(dir, dentry), dentry);
-       return ret;
+       inode = btrfs_lookup_dentry(dir, dentry);
+       if (IS_ERR(inode)) {
+               if (PTR_ERR(inode) == -ENOENT)
+                       inode = NULL;
+               else
+                       return ERR_CAST(inode);
+       }
+
+       return d_splice_alias(inode, dentry);
  }
  
  unsigned char btrfs_filetype_table[] = {
@@ -5358,7 +5524,6 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
         u32 sizes[2];
         unsigned long ptr;
         int ret;
-       int owner;
  
         path = btrfs_alloc_path();
         if (!path)
@@ -5392,6 +5557,7 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
          * number
          */
         BTRFS_I(inode)->index_cnt = 2;
+       BTRFS_I(inode)->dir_index = *index;
         BTRFS_I(inode)->root = root;
         BTRFS_I(inode)->generation = trans->transid;
         inode->i_generation = BTRFS_I(inode)->generation;
@@ -5404,11 +5570,6 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
          */
         set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &BTRFS_I(inode)->runtime_flags);
  
-       if (S_ISDIR(mode))
-               owner = 0;
-       else
-               owner = 1;
-
         key[0].objectid = objectid;
         btrfs_set_key_type(&key[0], BTRFS_INODE_ITEM_KEY);
         key[0].offset = 0;
@@ -5473,6 +5634,12 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
  
         btrfs_update_root_times(trans, root);
  
+       ret = btrfs_inode_inherit_props(trans, inode, dir);
+       if (ret)
+               btrfs_err(root->fs_info,
+                         "error inheriting props for ino %llu (root %llu): %d",
+                         btrfs_ino(inode), root->root_key.objectid, ret);
+
         return inode;
  fail:
         if (dir)
@@ -5741,6 +5908,8 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
                 goto fail;
         }
  
+       /* There are several dir indexes for this inode, clear the cache. */
+       BTRFS_I(inode)->dir_index = 0ULL;
         inc_nlink(inode);
         inode_inc_iversion(inode);
         inode->i_ctime = CURRENT_TIME;
@@ -6004,7 +6173,7 @@ again:
                        btrfs_file_extent_num_bytes(leaf, item);
         } else if (found_type == BTRFS_FILE_EXTENT_INLINE) {
                 size_t size;
-               size = btrfs_file_extent_inline_len(leaf, item);
+               size = btrfs_file_extent_inline_len(leaf, path->slots[0], item);
                 extent_end = ALIGN(extent_start + size, root->sectorsize);
         }
  next:
@@ -6073,7 +6242,7 @@ next:
                         goto out;
                 }
  
-               size = btrfs_file_extent_inline_len(leaf, item);
+               size = btrfs_file_extent_inline_len(leaf, path->slots[0], item);
                 extent_offset = page_offset(page) + pg_offset - extent_start;
                 copy_size = min_t(u64, PAGE_CACHE_SIZE - pg_offset,
                                 size - extent_offset);
@@ -6390,6 +6559,7 @@ noinline int can_nocow_extent(struct inode *inode, u64 offset, u64 *len,
         int slot;
         int found_type;
         bool nocow = (BTRFS_I(inode)->flags & BTRFS_INODE_NODATACOW);
+
         path = btrfs_alloc_path();
         if (!path)
                 return -ENOMEM;
@@ -6433,6 +6603,10 @@ noinline int can_nocow_extent(struct inode *inode, u64 offset, u64 *len,
         if (!nocow && found_type == BTRFS_FILE_EXTENT_REG)
                 goto out;
  
+       extent_end = key.offset + btrfs_file_extent_num_bytes(leaf, fi);
+       if (extent_end <= offset)
+               goto out;
+
         disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
         if (disk_bytenr == 0)
                 goto out;
@@ -6450,8 +6624,6 @@ noinline int can_nocow_extent(struct inode *inode, u64 offset, u64 *len,
                 *ram_bytes = btrfs_file_extent_ram_bytes(leaf, fi);
         }
  
-       extent_end = key.offset + btrfs_file_extent_num_bytes(leaf, fi);
-
         if (btrfs_extent_readonly(root, disk_bytenr))
                 goto out;
         btrfs_release_path(path);
@@ -6895,8 +7067,8 @@ static void btrfs_end_dio_bio(struct bio *bio, int err)
         struct btrfs_dio_private *dip = bio->bi_private;
  
         if (err) {
-               printk(KERN_ERR "btrfs direct IO failed ino %llu rw %lu "
-                     "sector %#Lx len %u err no %d\n",
+               btrfs_err(BTRFS_I(dip->inode)->root->fs_info,
+                         "direct IO failed ino %llu rw %lu sector %#Lx len %u err no %d",
                       btrfs_ino(dip->inode), bio->bi_rw,
                       (unsigned long long)bio->bi_iter.bi_sector,
                       bio->bi_iter.bi_size, err);
@@ -7370,6 +7542,7 @@ static void btrfs_invalidatepage(struct page *page, unsigned int offset,
         struct extent_state *cached_state = NULL;
         u64 page_start = page_offset(page);
         u64 page_end = page_start + PAGE_CACHE_SIZE - 1;
+       int inode_evicting = inode->i_state & I_FREEING;
  
         /*
          * we have the page locked, so new writeback can't start,
@@ -7385,17 +7558,21 @@ static void btrfs_invalidatepage(struct page *page, unsigned int offset,
                 btrfs_releasepage(page, GFP_NOFS);
                 return;
         }
-       lock_extent_bits(tree, page_start, page_end, 0, &cached_state);
-       ordered = btrfs_lookup_ordered_extent(inode, page_offset(page));
+
+       if (!inode_evicting)
+               lock_extent_bits(tree, page_start, page_end, 0, &cached_state);
+       ordered = btrfs_lookup_ordered_extent(inode, page_start);
         if (ordered) {
                 /*
                  * IO on this page will never be started, so we need
                  * to account for any ordered extents now
                  */
-               clear_extent_bit(tree, page_start, page_end,
-                                EXTENT_DIRTY | EXTENT_DELALLOC |
-                                EXTENT_LOCKED | EXTENT_DO_ACCOUNTING |
-                                EXTENT_DEFRAG, 1, 0, &cached_state, GFP_NOFS);
+               if (!inode_evicting)
+                       clear_extent_bit(tree, page_start, page_end,
+                                        EXTENT_DIRTY | EXTENT_DELALLOC |
+                                        EXTENT_LOCKED | EXTENT_DO_ACCOUNTING |
+                                        EXTENT_DEFRAG, 1, 0, &cached_state,
+                                        GFP_NOFS);
                 /*
                  * whoever cleared the private bit is responsible
                  * for the finish_ordered_io
@@ -7419,14 +7596,22 @@ static void btrfs_invalidatepage(struct page *page, unsigned int offset,
                                 btrfs_finish_ordered_io(ordered);
                 }
                 btrfs_put_ordered_extent(ordered);
-               cached_state = NULL;
-               lock_extent_bits(tree, page_start, page_end, 0, &cached_state);
+               if (!inode_evicting) {
+                       cached_state = NULL;
+                       lock_extent_bits(tree, page_start, page_end, 0,
+                                        &cached_state);
+               }
+       }
+
+       if (!inode_evicting) {
+               clear_extent_bit(tree, page_start, page_end,
+                                EXTENT_LOCKED | EXTENT_DIRTY |
+                                EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING |
+                                EXTENT_DEFRAG, 1, 1,
+                                &cached_state, GFP_NOFS);
+
+               __btrfs_releasepage(page, GFP_NOFS);
         }
-       clear_extent_bit(tree, page_start, page_end,
-                EXTENT_LOCKED | EXTENT_DIRTY | EXTENT_DELALLOC |
-                EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG, 1, 1,
-                &cached_state, GFP_NOFS);
-       __btrfs_releasepage(page, GFP_NOFS);
  
         ClearPageChecked(page);
         if (PagePrivate(page)) {
@@ -7736,7 +7921,9 @@ out:
   * create a new subvolume directory/inode (helper for the ioctl).
   */
  int btrfs_create_subvol_root(struct btrfs_trans_handle *trans,
-                            struct btrfs_root *new_root, u64 new_dirid)
+                            struct btrfs_root *new_root,
+                            struct btrfs_root *parent_root,
+                            u64 new_dirid)
  {
         struct inode *inode;
         int err;
@@ -7754,6 +7941,12 @@ int btrfs_create_subvol_root(struct btrfs_trans_handle *trans,
         set_nlink(inode, 1);
         btrfs_i_size_write(inode, 0);
  
+       err = btrfs_subvol_inherit_props(trans, new_root, parent_root);
+       if (err)
+               btrfs_err(new_root->fs_info,
+                         "error inheriting subvolume %llu properties: %d\n",
+                         new_root->root_key.objectid, err);
+
         err = btrfs_update_inode(trans, new_root, inode);
  
         iput(inode);
@@ -7779,6 +7972,7 @@ struct inode *btrfs_alloc_inode(struct super_block *sb)
         ei->flags = 0;
         ei->csum_bytes = 0;
         ei->index_cnt = (u64)-1;
+       ei->dir_index = 0;
         ei->last_unlink_trans = 0;
         ei->last_log_commit = 0;
  
@@ -8066,6 +8260,7 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
         if (ret)
                 goto out_fail;
  
+       BTRFS_I(old_inode)->dir_index = 0ULL;
         if (unlikely(old_ino == BTRFS_FIRST_FREE_OBJECTID)) {
                 /* force full log commit if subvolume involved. */
                 root->fs_info->last_trans_log_full_commit = trans->transid;
@@ -8154,6 +8349,9 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
                 goto out_fail;
         }
  
+       if (old_inode->i_nlink == 1)
+               BTRFS_I(old_inode)->dir_index = index;
+
         if (old_ino != BTRFS_FIRST_FREE_OBJECTID) {
                 struct dentry *parent = new_dentry->d_parent;
                 btrfs_log_new_name(trans, old_inode, old_dir, parent);
@@ -8289,7 +8487,7 @@ int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput)
  {
         int ret;
  
-       if (root->fs_info->sb->s_flags & MS_RDONLY)
+       if (test_bit(BTRFS_FS_STATE_ERROR, &root->fs_info->fs_state))
                 return -EROFS;
  
         ret = __start_delalloc_inodes(root, delay_iput);
@@ -8315,7 +8513,7 @@ int btrfs_start_delalloc_roots(struct btrfs_fs_info *fs_info, int delay_iput)
         struct list_head splice;
         int ret;
  
-       if (fs_info->sb->s_flags & MS_RDONLY)
+       if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state))
                 return -EROFS;
  
         INIT_LIST_HEAD(&splice);
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c

index ad27dcea319c74558d9f38e4647111bca362654a..b0134892dc70cdf69be04ad44e3e52183a86fcb0 100644 (file)
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -56,6 +56,8 @@
  #include "rcu-string.h"
  #include "send.h"
  #include "dev-replace.h"
+#include "props.h"
+#include "sysfs.h"
  
  static int btrfs_clone(struct inode *src, struct inode *inode,
                        u64 off, u64 olen, u64 olen_aligned, u64 destoff);
@@ -190,6 +192,9 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg)
         unsigned int i_oldflags;
         umode_t mode;
  
+       if (!inode_owner_or_capable(inode))
+               return -EPERM;
+
         if (btrfs_root_readonly(root))
                 return -EROFS;
  
@@ -200,9 +205,6 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg)
         if (ret)
                 return ret;
  
-       if (!inode_owner_or_capable(inode))
-               return -EACCES;
-
         ret = mnt_want_write_file(file);
         if (ret)
                 return ret;
@@ -280,9 +282,25 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg)
         if (flags & FS_NOCOMP_FL) {
                 ip->flags &= ~BTRFS_INODE_COMPRESS;
                 ip->flags |= BTRFS_INODE_NOCOMPRESS;
+
+               ret = btrfs_set_prop(inode, "btrfs.compression", NULL, 0, 0);
+               if (ret && ret != -ENODATA)
+                       goto out_drop;
         } else if (flags & FS_COMPR_FL) {
+               const char *comp;
+
                 ip->flags |= BTRFS_INODE_COMPRESS;
                 ip->flags &= ~BTRFS_INODE_NOCOMPRESS;
+
+               if (root->fs_info->compress_type == BTRFS_COMPRESS_LZO)
+                       comp = "lzo";
+               else
+                       comp = "zlib";
+               ret = btrfs_set_prop(inode, "btrfs.compression",
+                                    comp, strlen(comp), 0);
+               if (ret)
+                       goto out_drop;
+
         } else {
                 ip->flags &= ~(BTRFS_INODE_COMPRESS | BTRFS_INODE_NOCOMPRESS);
         }
@@ -392,6 +410,7 @@ static noinline int create_subvol(struct inode *dir,
         struct btrfs_root *new_root;
         struct btrfs_block_rsv block_rsv;
         struct timespec cur_time = CURRENT_TIME;
+       struct inode *inode;
         int ret;
         int err;
         u64 objectid;
@@ -417,7 +436,9 @@ static noinline int create_subvol(struct inode *dir,
         trans = btrfs_start_transaction(root, 0);
         if (IS_ERR(trans)) {
                 ret = PTR_ERR(trans);
-               goto out;
+               btrfs_subvolume_release_metadata(root, &block_rsv,
+                                                qgroup_reserved);
+               return ret;
         }
         trans->block_rsv = &block_rsv;
         trans->bytes_reserved = block_rsv.size;
@@ -500,7 +521,7 @@ static noinline int create_subvol(struct inode *dir,
  
         btrfs_record_root_in_trans(trans, new_root);
  
-       ret = btrfs_create_subvol_root(trans, new_root, new_dirid);
+       ret = btrfs_create_subvol_root(trans, new_root, root, new_dirid);
         if (ret) {
                 /* We potentially lose an unused inode item here */
                 btrfs_abort_transaction(trans, root, ret);
@@ -542,6 +563,8 @@ static noinline int create_subvol(struct inode *dir,
  fail:
         trans->block_rsv = NULL;
         trans->bytes_reserved = 0;
+       btrfs_subvolume_release_metadata(root, &block_rsv, qgroup_reserved);
+
         if (async_transid) {
                 *async_transid = trans->transid;
                 err = btrfs_commit_transaction_async(trans, root, 1);
@@ -553,10 +576,12 @@ fail:
         if (err && !ret)
                 ret = err;
  
-       if (!ret)
-               d_instantiate(dentry, btrfs_lookup_dentry(dir, dentry));
-out:
-       btrfs_subvolume_release_metadata(root, &block_rsv, qgroup_reserved);
+       if (!ret) {
+               inode = btrfs_lookup_dentry(dir, dentry);
+               if (IS_ERR(inode))
+                       return PTR_ERR(inode);
+               d_instantiate(dentry, inode);
+       }
         return ret;
  }
  
@@ -642,7 +667,7 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir,
                 ret = PTR_ERR(inode);
                 goto fail;
         }
-       BUG_ON(!inode);
+
         d_instantiate(dentry, inode);
         ret = 0;
  fail:
@@ -1011,7 +1036,7 @@ out:
  static int cluster_pages_for_defrag(struct inode *inode,
                                     struct page **pages,
                                     unsigned long start_index,
-                                   int num_pages)
+                                   unsigned long num_pages)
  {
         unsigned long file_end;
         u64 isize = i_size_read(inode);
@@ -1169,8 +1194,8 @@ int btrfs_defrag_file(struct inode *inode, struct file *file,
         int defrag_count = 0;
         int compress_type = BTRFS_COMPRESS_ZLIB;
         int extent_thresh = range->extent_thresh;
-       int max_cluster = (256 * 1024) >> PAGE_CACHE_SHIFT;
-       int cluster = max_cluster;
+       unsigned long max_cluster = (256 * 1024) >> PAGE_CACHE_SHIFT;
+       unsigned long cluster = max_cluster;
         u64 new_align = ~((u64)128 * 1024 - 1);
         struct page **pages = NULL;
  
@@ -1254,7 +1279,7 @@ int btrfs_defrag_file(struct inode *inode, struct file *file,
                         break;
  
                 if (btrfs_defrag_cancelled(root->fs_info)) {
-                       printk(KERN_DEBUG "btrfs: defrag_file cancelled\n");
+                       printk(KERN_DEBUG "BTRFS: defrag_file cancelled\n");
                         ret = -EAGAIN;
                         break;
                 }
@@ -1416,20 +1441,20 @@ static noinline int btrfs_ioctl_resize(struct file *file,
                         ret = -EINVAL;
                         goto out_free;
                 }
-               printk(KERN_INFO "btrfs: resizing devid %llu\n", devid);
+               btrfs_info(root->fs_info, "resizing devid %llu", devid);
         }
  
         device = btrfs_find_device(root->fs_info, devid, NULL, NULL);
         if (!device) {
-               printk(KERN_INFO "btrfs: resizer unable to find device %llu\n",
+               btrfs_info(root->fs_info, "resizer unable to find device %llu",
                        devid);
                 ret = -ENODEV;
                 goto out_free;
         }
  
         if (!device->writeable) {
-               printk(KERN_INFO "btrfs: resizer unable to apply on "
-                      "readonly device %llu\n",
+               btrfs_info(root->fs_info,
+                          "resizer unable to apply on readonly device %llu",
                        devid);
                 ret = -EPERM;
                 goto out_free;
@@ -1466,6 +1491,10 @@ static noinline int btrfs_ioctl_resize(struct file *file,
                 }
                 new_size = old_size - new_size;
         } else if (mod > 0) {
+               if (new_size > ULLONG_MAX - old_size) {
+                       ret = -EINVAL;
+                       goto out_free;
+               }
                 new_size = old_size + new_size;
         }
  
@@ -1481,7 +1510,7 @@ static noinline int btrfs_ioctl_resize(struct file *file,
         do_div(new_size, root->sectorsize);
         new_size *= root->sectorsize;
  
-       printk_in_rcu(KERN_INFO "btrfs: new size for %s is %llu\n",
+       printk_in_rcu(KERN_INFO "BTRFS: new size for %s is %llu\n",
                       rcu_str_deref(device->name), new_size);
  
         if (new_size > old_size) {
@@ -1542,9 +1571,15 @@ static noinline int btrfs_ioctl_snap_create_transid(struct file *file,
  
                 src_inode = file_inode(src.file);
                 if (src_inode->i_sb != file_inode(file)->i_sb) {
-                       printk(KERN_INFO "btrfs: Snapshot src from "
-                              "another FS\n");
+                       btrfs_info(BTRFS_I(src_inode)->root->fs_info,
+                                  "Snapshot src from another FS");
                         ret = -EINVAL;
+               } else if (!inode_owner_or_capable(src_inode)) {
+                       /*
+                        * Subvolume creation is not restricted, but snapshots
+                        * are limited to own subvolumes only
+                        */
+                       ret = -EPERM;
                 } else {
                         ret = btrfs_mksubvol(&file->f_path, name, namelen,
                                              BTRFS_I(src_inode)->root,
@@ -1662,6 +1697,9 @@ static noinline int btrfs_ioctl_subvol_setflags(struct file *file,
         u64 flags;
         int ret = 0;
  
+       if (!inode_owner_or_capable(inode))
+               return -EPERM;
+
         ret = mnt_want_write_file(file);
         if (ret)
                 goto out;
@@ -1686,11 +1724,6 @@ static noinline int btrfs_ioctl_subvol_setflags(struct file *file,
                 goto out_drop_write;
         }
  
-       if (!inode_owner_or_capable(inode)) {
-               ret = -EACCES;
-               goto out_drop_write;
-       }
-
         down_write(&root->fs_info->subvol_sem);
  
         /* nothing to do */
@@ -1698,12 +1731,28 @@ static noinline int btrfs_ioctl_subvol_setflags(struct file *file,
                 goto out_drop_sem;
  
         root_flags = btrfs_root_flags(&root->root_item);
-       if (flags & BTRFS_SUBVOL_RDONLY)
+       if (flags & BTRFS_SUBVOL_RDONLY) {
                 btrfs_set_root_flags(&root->root_item,
                                      root_flags | BTRFS_ROOT_SUBVOL_RDONLY);
-       else
-               btrfs_set_root_flags(&root->root_item,
+       } else {
+               /*
+                * Block RO -> RW transition if this subvolume is involved in
+                * send
+                */
+               spin_lock(&root->root_item_lock);
+               if (root->send_in_progress == 0) {
+                       btrfs_set_root_flags(&root->root_item,
                                      root_flags & ~BTRFS_ROOT_SUBVOL_RDONLY);
+                       spin_unlock(&root->root_item_lock);
+               } else {
+                       spin_unlock(&root->root_item_lock);
+                       btrfs_warn(root->fs_info,
+                       "Attempt to set subvolume %llu read-write during send",
+                                       root->root_key.objectid);
+                       ret = -EPERM;
+                       goto out_drop_sem;
+               }
+       }
  
         trans = btrfs_start_transaction(root, 1);
         if (IS_ERR(trans)) {
@@ -1910,7 +1959,7 @@ static noinline int search_ioctl(struct inode *inode,
                 key.offset = (u64)-1;
                 root = btrfs_read_fs_root_no_name(info, &key);
                 if (IS_ERR(root)) {
-                       printk(KERN_ERR "could not find root %llu\n",
+                       printk(KERN_ERR "BTRFS: could not find root %llu\n",
                                sk->tree_id);
                         btrfs_free_path(path);
                         return -ENOENT;
@@ -2000,7 +2049,7 @@ static noinline int btrfs_search_path_in_tree(struct btrfs_fs_info *info,
         key.offset = (u64)-1;
         root = btrfs_read_fs_root_no_name(info, &key);
         if (IS_ERR(root)) {
-               printk(KERN_ERR "could not find root %llu\n", tree_id);
+               printk(KERN_ERR "BTRFS: could not find root %llu\n", tree_id);
                 ret = -ENOENT;
                 goto out;
         }
@@ -2838,12 +2887,14 @@ static int btrfs_clone(struct inode *src, struct inode *inode,
                  * note the key will change type as we walk through the
                  * tree.
                  */
+               path->leave_spinning = 1;
                 ret = btrfs_search_slot(NULL, BTRFS_I(src)->root, &key, path,
                                 0, 0);
                 if (ret < 0)
                         goto out;
  
                 nritems = btrfs_header_nritems(path->nodes[0]);
+process_slot:
                 if (path->slots[0] >= nritems) {
                         ret = btrfs_next_leaf(BTRFS_I(src)->root, path);
                         if (ret < 0)
@@ -2870,11 +2921,6 @@ static int btrfs_clone(struct inode *src, struct inode *inode,
                         u8 comp;
                         u64 endoff;
  
-                       size = btrfs_item_size_nr(leaf, slot);
-                       read_extent_buffer(leaf, buf,
-                                          btrfs_item_ptr_offset(leaf, slot),
-                                          size);
-
                         extent = btrfs_item_ptr(leaf, slot,
                                                 struct btrfs_file_extent_item);
                         comp = btrfs_file_extent_compression(leaf, extent);
@@ -2893,11 +2939,20 @@ static int btrfs_clone(struct inode *src, struct inode *inode,
                                 datal = btrfs_file_extent_ram_bytes(leaf,
                                                                     extent);
                         }
-                       btrfs_release_path(path);
  
                         if (key.offset + datal <= off ||
-                           key.offset >= off + len - 1)
-                               goto next;
+                           key.offset >= off + len - 1) {
+                               path->slots[0]++;
+                               goto process_slot;
+                       }
+
+                       size = btrfs_item_size_nr(leaf, slot);
+                       read_extent_buffer(leaf, buf,
+                                          btrfs_item_ptr_offset(leaf, slot),
+                                          size);
+
+                       btrfs_release_path(path);
+                       path->leave_spinning = 0;
  
                         memcpy(&new_key, &key, sizeof(new_key));
                         new_key.objectid = btrfs_ino(inode);
@@ -3068,7 +3123,6 @@ static int btrfs_clone(struct inode *src, struct inode *inode,
                         }
                         ret = btrfs_end_transaction(trans, root);
                 }
-next:
                 btrfs_release_path(path);
                 key.offset++;
         }
@@ -3196,9 +3250,17 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
  
         unlock_extent(&BTRFS_I(src)->io_tree, off, off + len - 1);
  out_unlock:
-       mutex_unlock(&src->i_mutex);
-       if (!same_inode)
-               mutex_unlock(&inode->i_mutex);
+       if (!same_inode) {
+               if (inode < src) {
+                       mutex_unlock(&src->i_mutex);
+                       mutex_unlock(&inode->i_mutex);
+               } else {
+                       mutex_unlock(&inode->i_mutex);
+                       mutex_unlock(&src->i_mutex);
+               }
+       } else {
+               mutex_unlock(&src->i_mutex);
+       }
  out_fput:
         fdput(src_file);
  out_drop_write:
@@ -3321,8 +3383,8 @@ static long btrfs_ioctl_default_subvol(struct file *file, void __user *argp)
         if (IS_ERR_OR_NULL(di)) {
                 btrfs_free_path(path);
                 btrfs_end_transaction(trans, root);
-               printk(KERN_ERR "Umm, you don't have the default dir item, "
-                      "this isn't going to work\n");
+               btrfs_err(new_root->fs_info, "Umm, you don't have the default dir"
+                          "item, this isn't going to work");
                 ret = -ENOENT;
                 goto out;
         }
@@ -3475,6 +3537,20 @@ out:
         return ret;
  }
  
+static long btrfs_ioctl_global_rsv(struct btrfs_root *root, void __user *arg)
+{
+       struct btrfs_block_rsv *block_rsv = &root->fs_info->global_block_rsv;
+       u64 reserved;
+
+       spin_lock(&block_rsv->lock);
+       reserved = block_rsv->reserved;
+       spin_unlock(&block_rsv->lock);
+
+       if (arg && copy_to_user(arg, &reserved, sizeof(reserved)))
+               return -EFAULT;
+       return 0;
+}
+
  /*
   * there are many ways the trans_start and trans_end ioctls can lead
   * to deadlocks.  They should only be used by applications that
@@ -4303,6 +4379,9 @@ static long btrfs_ioctl_set_received_subvol(struct file *file,
         int ret = 0;
         int received_uuid_changed;
  
+       if (!inode_owner_or_capable(inode))
+               return -EPERM;
+
         ret = mnt_want_write_file(file);
         if (ret < 0)
                 return ret;
@@ -4319,11 +4398,6 @@ static long btrfs_ioctl_set_received_subvol(struct file *file,
                 goto out;
         }
  
-       if (!inode_owner_or_capable(inode)) {
-               ret = -EACCES;
-               goto out;
-       }
-
         sa = memdup_user(arg, sizeof(*sa));
         if (IS_ERR(sa)) {
                 ret = PTR_ERR(sa);
@@ -4409,8 +4483,8 @@ static int btrfs_ioctl_get_fslabel(struct file *file, void __user *arg)
         len = strnlen(label, BTRFS_LABEL_SIZE);
  
         if (len == BTRFS_LABEL_SIZE) {
-               pr_warn("btrfs: label is too long, return the first %zu bytes\n",
-                       --len);
+               btrfs_warn(root->fs_info,
+                       "label is too long, return the first %zu bytes", --len);
         }
  
         ret = copy_to_user(arg, label, len);
@@ -4433,7 +4507,7 @@ static int btrfs_ioctl_set_fslabel(struct file *file, void __user *arg)
                 return -EFAULT;
  
         if (strnlen(label, BTRFS_LABEL_SIZE) == BTRFS_LABEL_SIZE) {
-               pr_err("btrfs: unable to set label with more than %d bytes\n",
+               btrfs_err(root->fs_info, "unable to set label with more than %d bytes",
                        BTRFS_LABEL_SIZE - 1);
                 return -EINVAL;
         }
@@ -4458,6 +4532,166 @@ out_unlock:
         return ret;
  }
  
+#define INIT_FEATURE_FLAGS(suffix) \
+       { .compat_flags = BTRFS_FEATURE_COMPAT_##suffix, \
+         .compat_ro_flags = BTRFS_FEATURE_COMPAT_RO_##suffix, \
+         .incompat_flags = BTRFS_FEATURE_INCOMPAT_##suffix }
+
+static int btrfs_ioctl_get_supported_features(struct file *file,
+                                             void __user *arg)
+{
+       static struct btrfs_ioctl_feature_flags features[3] = {
+               INIT_FEATURE_FLAGS(SUPP),
+               INIT_FEATURE_FLAGS(SAFE_SET),
+               INIT_FEATURE_FLAGS(SAFE_CLEAR)
+       };
+
+       if (copy_to_user(arg, &features, sizeof(features)))
+               return -EFAULT;
+
+       return 0;
+}
+
+static int btrfs_ioctl_get_features(struct file *file, void __user *arg)
+{
+       struct btrfs_root *root = BTRFS_I(file_inode(file))->root;
+       struct btrfs_super_block *super_block = root->fs_info->super_copy;
+       struct btrfs_ioctl_feature_flags features;
+
+       features.compat_flags = btrfs_super_compat_flags(super_block);
+       features.compat_ro_flags = btrfs_super_compat_ro_flags(super_block);
+       features.incompat_flags = btrfs_super_incompat_flags(super_block);
+
+       if (copy_to_user(arg, &features, sizeof(features)))
+               return -EFAULT;
+
+       return 0;
+}
+
+static int check_feature_bits(struct btrfs_root *root,
+                             enum btrfs_feature_set set,
+                             u64 change_mask, u64 flags, u64 supported_flags,
+                             u64 safe_set, u64 safe_clear)
+{
+       const char *type = btrfs_feature_set_names[set];
+       char *names;
+       u64 disallowed, unsupported;
+       u64 set_mask = flags & change_mask;
+       u64 clear_mask = ~flags & change_mask;
+
+       unsupported = set_mask & ~supported_flags;
+       if (unsupported) {
+               names = btrfs_printable_features(set, unsupported);
+               if (names) {
+                       btrfs_warn(root->fs_info,
+                          "this kernel does not support the %s feature bit%s",
+                          names, strchr(names, ',') ? "s" : "");
+                       kfree(names);
+               } else
+                       btrfs_warn(root->fs_info,
+                          "this kernel does not support %s bits 0x%llx",
+                          type, unsupported);
+               return -EOPNOTSUPP;
+       }
+
+       disallowed = set_mask & ~safe_set;
+       if (disallowed) {
+               names = btrfs_printable_features(set, disallowed);
+               if (names) {
+                       btrfs_warn(root->fs_info,
+                          "can't set the %s feature bit%s while mounted",
+                          names, strchr(names, ',') ? "s" : "");
+                       kfree(names);
+               } else
+                       btrfs_warn(root->fs_info,
+                          "can't set %s bits 0x%llx while mounted",
+                          type, disallowed);
+               return -EPERM;
+       }
+
+       disallowed = clear_mask & ~safe_clear;
+       if (disallowed) {
+               names = btrfs_printable_features(set, disallowed);
+               if (names) {
+                       btrfs_warn(root->fs_info,
+                          "can't clear the %s feature bit%s while mounted",
+                          names, strchr(names, ',') ? "s" : "");
+                       kfree(names);
+               } else
+                       btrfs_warn(root->fs_info,
+                          "can't clear %s bits 0x%llx while mounted",
+                          type, disallowed);
+               return -EPERM;
+       }
+
+       return 0;
+}
+
+#define check_feature(root, change_mask, flags, mask_base)     \
+check_feature_bits(root, FEAT_##mask_base, change_mask, flags, \
+                  BTRFS_FEATURE_ ## mask_base ## _SUPP,        \
+                  BTRFS_FEATURE_ ## mask_base ## _SAFE_SET,    \
+                  BTRFS_FEATURE_ ## mask_base ## _SAFE_CLEAR)
+
+static int btrfs_ioctl_set_features(struct file *file, void __user *arg)
+{
+       struct btrfs_root *root = BTRFS_I(file_inode(file))->root;
+       struct btrfs_super_block *super_block = root->fs_info->super_copy;
+       struct btrfs_ioctl_feature_flags flags[2];
+       struct btrfs_trans_handle *trans;
+       u64 newflags;
+       int ret;
+
+       if (!capable(CAP_SYS_ADMIN))
+               return -EPERM;
+
+       if (copy_from_user(flags, arg, sizeof(flags)))
+               return -EFAULT;
+
+       /* Nothing to do */
+       if (!flags[0].compat_flags && !flags[0].compat_ro_flags &&
+           !flags[0].incompat_flags)
+               return 0;
+
+       ret = check_feature(root, flags[0].compat_flags,
+                           flags[1].compat_flags, COMPAT);
+       if (ret)
+               return ret;
+
+       ret = check_feature(root, flags[0].compat_ro_flags,
+                           flags[1].compat_ro_flags, COMPAT_RO);
+       if (ret)
+               return ret;
+
+       ret = check_feature(root, flags[0].incompat_flags,
+                           flags[1].incompat_flags, INCOMPAT);
+       if (ret)
+               return ret;
+
+       trans = btrfs_start_transaction(root, 1);
+       if (IS_ERR(trans))
+               return PTR_ERR(trans);
+
+       spin_lock(&root->fs_info->super_lock);
+       newflags = btrfs_super_compat_flags(super_block);
+       newflags |= flags[0].compat_flags & flags[1].compat_flags;
+       newflags &= ~(flags[0].compat_flags & ~flags[1].compat_flags);
+       btrfs_set_super_compat_flags(super_block, newflags);
+
+       newflags = btrfs_super_compat_ro_flags(super_block);
+       newflags |= flags[0].compat_ro_flags & flags[1].compat_ro_flags;
+       newflags &= ~(flags[0].compat_ro_flags & ~flags[1].compat_ro_flags);
+       btrfs_set_super_compat_ro_flags(super_block, newflags);
+
+       newflags = btrfs_super_incompat_flags(super_block);
+       newflags |= flags[0].incompat_flags & flags[1].incompat_flags;
+       newflags &= ~(flags[0].incompat_flags & ~flags[1].incompat_flags);
+       btrfs_set_super_incompat_flags(super_block, newflags);
+       spin_unlock(&root->fs_info->super_lock);
+
+       return btrfs_end_transaction(trans, root);
+}
+
  long btrfs_ioctl(struct file *file, unsigned int
                 cmd, unsigned long arg)
  {
@@ -4523,6 +4757,8 @@ long btrfs_ioctl(struct file *file, unsigned int
                 return btrfs_ioctl_logical_to_ino(root, argp);
         case BTRFS_IOC_SPACE_INFO:
                 return btrfs_ioctl_space_info(root, argp);
+       case BTRFS_IOC_GLOBAL_RSV:
+               return btrfs_ioctl_global_rsv(root, argp);
         case BTRFS_IOC_SYNC: {
                 int ret;
  
@@ -4576,6 +4812,12 @@ long btrfs_ioctl(struct file *file, unsigned int
                 return btrfs_ioctl_set_fslabel(file, argp);
         case BTRFS_IOC_FILE_EXTENT_SAME:
                 return btrfs_ioctl_file_extent_same(file, argp);
+       case BTRFS_IOC_GET_SUPPORTED_FEATURES:
+               return btrfs_ioctl_get_supported_features(file, argp);
+       case BTRFS_IOC_GET_FEATURES:
+               return btrfs_ioctl_get_features(file, argp);
+       case BTRFS_IOC_SET_FEATURES:
+               return btrfs_ioctl_set_features(file, argp);
         }
  
         return -ENOTTY;
diff --git a/fs/btrfs/lzo.c b/fs/btrfs/lzo.c

index b6a6f07c5ce20fe4cecf6917497a6e705be26c9c..b47f669aca758e8ed0f4d3911dfacaa12b3c941f 100644 (file)
--- a/fs/btrfs/lzo.c
+++ b/fs/btrfs/lzo.c
@@ -141,7 +141,7 @@ static int lzo_compress_pages(struct list_head *ws,
                 ret = lzo1x_1_compress(data_in, in_len, workspace->cbuf,
                                        &out_len, workspace->mem);
                 if (ret != LZO_E_OK) {
-                       printk(KERN_DEBUG "btrfs deflate in loop returned %d\n",
+                       printk(KERN_DEBUG "BTRFS: deflate in loop returned %d\n",
                                ret);
                         ret = -1;
                         goto out;
@@ -357,7 +357,7 @@ cont:
                 if (need_unmap)
                         kunmap(pages_in[page_in_index - 1]);
                 if (ret != LZO_E_OK) {
-                       printk(KERN_WARNING "btrfs decompress failed\n");
+                       printk(KERN_WARNING "BTRFS: decompress failed\n");
                         ret = -1;
                         break;
                 }
@@ -401,7 +401,7 @@ static int lzo_decompress(struct list_head *ws, unsigned char *data_in,
         out_len = PAGE_CACHE_SIZE;
         ret = lzo1x_decompress_safe(data_in, in_len, workspace->buf, &out_len);
         if (ret != LZO_E_OK) {
-               printk(KERN_WARNING "btrfs decompress failed!\n");
+               printk(KERN_WARNING "BTRFS: decompress failed!\n");
                 ret = -1;
                 goto out;
         }
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c

index 69582d5b69d1f6064a77a409760a3ba1886b6d92..b16450b840e73be00d42e217e0fcecb2fae1894d 100644 (file)
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -336,13 +336,14 @@ int btrfs_dec_test_first_ordered_pending(struct inode *inode,
                       entry->len);
         *file_offset = dec_end;
         if (dec_start > dec_end) {
-               printk(KERN_CRIT "bad ordering dec_start %llu end %llu\n",
-                      dec_start, dec_end);
+               btrfs_crit(BTRFS_I(inode)->root->fs_info,
+                       "bad ordering dec_start %llu end %llu", dec_start, dec_end);
         }
         to_dec = dec_end - dec_start;
         if (to_dec > entry->bytes_left) {
-               printk(KERN_CRIT "bad ordered accounting left %llu size %llu\n",
-                      entry->bytes_left, to_dec);
+               btrfs_crit(BTRFS_I(inode)->root->fs_info,
+                       "bad ordered accounting left %llu size %llu",
+                       entry->bytes_left, to_dec);
         }
         entry->bytes_left -= to_dec;
         if (!uptodate)
@@ -401,7 +402,8 @@ have_entry:
         }
  
         if (io_size > entry->bytes_left) {
-               printk(KERN_CRIT "bad ordered accounting left %llu size %llu\n",
+               btrfs_crit(BTRFS_I(inode)->root->fs_info,
+                          "bad ordered accounting left %llu size %llu",
                        entry->bytes_left, io_size);
         }
         entry->bytes_left -= io_size;
@@ -520,7 +522,8 @@ void btrfs_remove_ordered_extent(struct inode *inode,
         spin_lock_irq(&tree->lock);
         node = &entry->rb_node;
         rb_erase(node, &tree->tree);
-       tree->last = NULL;
+       if (tree->last == node)
+               tree->last = NULL;
         set_bit(BTRFS_ORDERED_COMPLETE, &entry->flags);
         spin_unlock_irq(&tree->lock);
  
diff --git a/fs/btrfs/orphan.c b/fs/btrfs/orphan.c

index 24cad1695af74790ecc18182db35a0ed5c8cce7a..65793edb38ca881a3f82f49101bc4b079574ed4c 100644 (file)
--- a/fs/btrfs/orphan.c
+++ b/fs/btrfs/orphan.c
@@ -69,23 +69,3 @@ out:
         btrfs_free_path(path);
         return ret;
  }
-
-int btrfs_find_orphan_item(struct btrfs_root *root, u64 offset)
-{
-       struct btrfs_path *path;
-       struct btrfs_key key;
-       int ret;
-
-       key.objectid = BTRFS_ORPHAN_OBJECTID;
-       key.type = BTRFS_ORPHAN_ITEM_KEY;
-       key.offset = offset;
-
-       path = btrfs_alloc_path();
-       if (!path)
-               return -ENOMEM;
-
-       ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
-
-       btrfs_free_path(path);
-       return ret;
-}
diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c

index 417053b171817742e64e53e579d1daf8c1a9b7cc..6efd70d3b64f785e23d0bb3323d4905825406c62 100644 (file)
--- a/fs/btrfs/print-tree.c
+++ b/fs/btrfs/print-tree.c
@@ -154,7 +154,7 @@ static void print_uuid_item(struct extent_buffer *l, unsigned long offset,
                             u32 item_size)
  {
         if (!IS_ALIGNED(item_size, sizeof(u64))) {
-               pr_warn("btrfs: uuid item with illegal size %lu!\n",
+               pr_warn("BTRFS: uuid item with illegal size %lu!\n",
                         (unsigned long)item_size);
                 return;
         }
@@ -249,7 +249,7 @@ void btrfs_print_leaf(struct btrfs_root *root, struct extent_buffer *l)
                             BTRFS_FILE_EXTENT_INLINE) {
                                 printk(KERN_INFO "\t\tinline extent data "
                                        "size %u\n",
-                                      btrfs_file_extent_inline_len(l, fi));
+                                      btrfs_file_extent_inline_len(l, i, fi));
                                 break;
                         }
                         printk(KERN_INFO "\t\textent data disk bytenr %llu "
diff --git a/fs/btrfs/props.c b/fs/btrfs/props.c

new file mode 100644 (file)

index 0000000..129b1dd
--- /dev/null
+++ b/fs/btrfs/props.c
@@ -0,0 +1,427 @@
+/*
+ * Copyright (C) 2014 Filipe David Borba Manana <fdmanana@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+
+#include <linux/hashtable.h>
+#include "props.h"
+#include "btrfs_inode.h"
+#include "hash.h"
+#include "transaction.h"
+#include "xattr.h"
+
+#define BTRFS_PROP_HANDLERS_HT_BITS 8
+static DEFINE_HASHTABLE(prop_handlers_ht, BTRFS_PROP_HANDLERS_HT_BITS);
+
+struct prop_handler {
+       struct hlist_node node;
+       const char *xattr_name;
+       int (*validate)(const char *value, size_t len);
+       int (*apply)(struct inode *inode, const char *value, size_t len);
+       const char *(*extract)(struct inode *inode);
+       int inheritable;
+};
+
+static int prop_compression_validate(const char *value, size_t len);
+static int prop_compression_apply(struct inode *inode,
+                                 const char *value,
+                                 size_t len);
+static const char *prop_compression_extract(struct inode *inode);
+
+static struct prop_handler prop_handlers[] = {
+       {
+               .xattr_name = XATTR_BTRFS_PREFIX "compression",
+               .validate = prop_compression_validate,
+               .apply = prop_compression_apply,
+               .extract = prop_compression_extract,
+               .inheritable = 1
+       },
+       {
+               .xattr_name = NULL
+       }
+};
+
+void __init btrfs_props_init(void)
+{
+       struct prop_handler *p;
+
+       hash_init(prop_handlers_ht);
+
+       for (p = &prop_handlers[0]; p->xattr_name; p++) {
+               u64 h = btrfs_name_hash(p->xattr_name, strlen(p->xattr_name));
+
+               hash_add(prop_handlers_ht, &p->node, h);
+       }
+}
+
+static const struct hlist_head *find_prop_handlers_by_hash(const u64 hash)
+{
+       struct hlist_head *h;
+
+       h = &prop_handlers_ht[hash_min(hash, BTRFS_PROP_HANDLERS_HT_BITS)];
+       if (hlist_empty(h))
+               return NULL;
+
+       return h;
+}
+
+static const struct prop_handler *
+find_prop_handler(const char *name,
+                 const struct hlist_head *handlers)
+{
+       struct prop_handler *h;
+
+       if (!handlers) {
+               u64 hash = btrfs_name_hash(name, strlen(name));
+
+               handlers = find_prop_handlers_by_hash(hash);
+               if (!handlers)
+                       return NULL;
+       }
+
+       hlist_for_each_entry(h, handlers, node)
+               if (!strcmp(h->xattr_name, name))
+                       return h;
+
+       return NULL;
+}
+
+static int __btrfs_set_prop(struct btrfs_trans_handle *trans,
+                           struct inode *inode,
+                           const char *name,
+                           const char *value,
+                           size_t value_len,
+                           int flags)
+{
+       const struct prop_handler *handler;
+       int ret;
+
+       if (strlen(name) <= XATTR_BTRFS_PREFIX_LEN)
+               return -EINVAL;
+
+       handler = find_prop_handler(name, NULL);
+       if (!handler)
+               return -EINVAL;
+
+       if (value_len == 0) {
+               ret = __btrfs_setxattr(trans, inode, handler->xattr_name,
+                                      NULL, 0, flags);
+               if (ret)
+                       return ret;
+
+               ret = handler->apply(inode, NULL, 0);
+               ASSERT(ret == 0);
+
+               return ret;
+       }
+
+       ret = handler->validate(value, value_len);
+       if (ret)
+               return ret;
+       ret = __btrfs_setxattr(trans, inode, handler->xattr_name,
+                              value, value_len, flags);
+       if (ret)
+               return ret;
+       ret = handler->apply(inode, value, value_len);
+       if (ret) {
+               __btrfs_setxattr(trans, inode, handler->xattr_name,
+                                NULL, 0, flags);
+               return ret;
+       }
+
+       set_bit(BTRFS_INODE_HAS_PROPS, &BTRFS_I(inode)->runtime_flags);
+
+       return 0;
+}
+
+int btrfs_set_prop(struct inode *inode,
+                  const char *name,
+                  const char *value,
+                  size_t value_len,
+                  int flags)
+{
+       return __btrfs_set_prop(NULL, inode, name, value, value_len, flags);
+}
+
+static int iterate_object_props(struct btrfs_root *root,
+                               struct btrfs_path *path,
+                               u64 objectid,
+                               void (*iterator)(void *,
+                                                const struct prop_handler *,
+                                                const char *,
+                                                size_t),
+                               void *ctx)
+{
+       int ret;
+       char *name_buf = NULL;
+       char *value_buf = NULL;
+       int name_buf_len = 0;
+       int value_buf_len = 0;
+
+       while (1) {
+               struct btrfs_key key;
+               struct btrfs_dir_item *di;
+               struct extent_buffer *leaf;
+               u32 total_len, cur, this_len;
+               int slot;
+               const struct hlist_head *handlers;
+
+               slot = path->slots[0];
+               leaf = path->nodes[0];
+
+               if (slot >= btrfs_header_nritems(leaf)) {
+                       ret = btrfs_next_leaf(root, path);
+                       if (ret < 0)
+                               goto out;
+                       else if (ret > 0)
+                               break;
+                       continue;
+               }
+
+               btrfs_item_key_to_cpu(leaf, &key, slot);
+               if (key.objectid != objectid)
+                       break;
+               if (key.type != BTRFS_XATTR_ITEM_KEY)
+                       break;
+
+               handlers = find_prop_handlers_by_hash(key.offset);
+               if (!handlers)
+                       goto next_slot;
+
+               di = btrfs_item_ptr(leaf, slot, struct btrfs_dir_item);
+               cur = 0;
+               total_len = btrfs_item_size_nr(leaf, slot);
+
+               while (cur < total_len) {
+                       u32 name_len = btrfs_dir_name_len(leaf, di);
+                       u32 data_len = btrfs_dir_data_len(leaf, di);
+                       unsigned long name_ptr, data_ptr;
+                       const struct prop_handler *handler;
+
+                       this_len = sizeof(*di) + name_len + data_len;
+                       name_ptr = (unsigned long)(di + 1);
+                       data_ptr = name_ptr + name_len;
+
+                       if (name_len <= XATTR_BTRFS_PREFIX_LEN ||
+                           memcmp_extent_buffer(leaf, XATTR_BTRFS_PREFIX,
+                                                name_ptr,
+                                                XATTR_BTRFS_PREFIX_LEN))
+                               goto next_dir_item;
+
+                       if (name_len >= name_buf_len) {
+                               kfree(name_buf);
+                               name_buf_len = name_len + 1;
+                               name_buf = kmalloc(name_buf_len, GFP_NOFS);
+                               if (!name_buf) {
+                                       ret = -ENOMEM;
+                                       goto out;
+                               }
+                       }
+                       read_extent_buffer(leaf, name_buf, name_ptr, name_len);
+                       name_buf[name_len] = '\0';
+
+                       handler = find_prop_handler(name_buf, handlers);
+                       if (!handler)
+                               goto next_dir_item;
+
+                       if (data_len > value_buf_len) {
+                               kfree(value_buf);
+                               value_buf_len = data_len;
+                               value_buf = kmalloc(data_len, GFP_NOFS);
+                               if (!value_buf) {
+                                       ret = -ENOMEM;
+                                       goto out;
+                               }
+                       }
+                       read_extent_buffer(leaf, value_buf, data_ptr, data_len);
+
+                       iterator(ctx, handler, value_buf, data_len);
+next_dir_item:
+                       cur += this_len;
+                       di = (struct btrfs_dir_item *)((char *) di + this_len);
+               }
+
+next_slot:
+               path->slots[0]++;
+       }
+
+       ret = 0;
+out:
+       btrfs_release_path(path);
+       kfree(name_buf);
+       kfree(value_buf);
+
+       return ret;
+}
+
+static void inode_prop_iterator(void *ctx,
+                               const struct prop_handler *handler,
+                               const char *value,
+                               size_t len)
+{
+       struct inode *inode = ctx;
+       struct btrfs_root *root = BTRFS_I(inode)->root;
+       int ret;
+
+       ret = handler->apply(inode, value, len);
+       if (unlikely(ret))
+               btrfs_warn(root->fs_info,
+                          "error applying prop %s to ino %llu (root %llu): %d",
+                          handler->xattr_name, btrfs_ino(inode),
+                          root->root_key.objectid, ret);
+       else
+               set_bit(BTRFS_INODE_HAS_PROPS, &BTRFS_I(inode)->runtime_flags);
+}
+
+int btrfs_load_inode_props(struct inode *inode, struct btrfs_path *path)
+{
+       struct btrfs_root *root = BTRFS_I(inode)->root;
+       u64 ino = btrfs_ino(inode);
+       int ret;
+
+       ret = iterate_object_props(root, path, ino, inode_prop_iterator, inode);
+
+       return ret;
+}
+
+static int inherit_props(struct btrfs_trans_handle *trans,
+                        struct inode *inode,
+                        struct inode *parent)
+{
+       const struct prop_handler *h;
+       struct btrfs_root *root = BTRFS_I(inode)->root;
+       int ret;
+
+       if (!test_bit(BTRFS_INODE_HAS_PROPS,
+                     &BTRFS_I(parent)->runtime_flags))
+               return 0;
+
+       for (h = &prop_handlers[0]; h->xattr_name; h++) {
+               const char *value;
+               u64 num_bytes;
+
+               if (!h->inheritable)
+                       continue;
+
+               value = h->extract(parent);
+               if (!value)
+                       continue;
+
+               num_bytes = btrfs_calc_trans_metadata_size(root, 1);
+               ret = btrfs_block_rsv_add(root, trans->block_rsv,
+                                         num_bytes, BTRFS_RESERVE_NO_FLUSH);
+               if (ret)
+                       goto out;
+               ret = __btrfs_set_prop(trans, inode, h->xattr_name,
+                                      value, strlen(value), 0);
+               btrfs_block_rsv_release(root, trans->block_rsv, num_bytes);
+               if (ret)
+                       goto out;
+       }
+       ret = 0;
+out:
+       return ret;
+}
+
+int btrfs_inode_inherit_props(struct btrfs_trans_handle *trans,
+                             struct inode *inode,
+                             struct inode *dir)
+{
+       if (!dir)
+               return 0;
+
+       return inherit_props(trans, inode, dir);
+}
+
+int btrfs_subvol_inherit_props(struct btrfs_trans_handle *trans,
+                              struct btrfs_root *root,
+                              struct btrfs_root *parent_root)
+{
+       struct btrfs_key key;
+       struct inode *parent_inode, *child_inode;
+       int ret;
+
+       key.objectid = BTRFS_FIRST_FREE_OBJECTID;
+       key.type = BTRFS_INODE_ITEM_KEY;
+       key.offset = 0;
+
+       parent_inode = btrfs_iget(parent_root->fs_info->sb, &key,
+                                 parent_root, NULL);
+       if (IS_ERR(parent_inode))
+               return PTR_ERR(parent_inode);
+
+       child_inode = btrfs_iget(root->fs_info->sb, &key, root, NULL);
+       if (IS_ERR(child_inode)) {
+               iput(parent_inode);
+               return PTR_ERR(child_inode);
+       }
+
+       ret = inherit_props(trans, child_inode, parent_inode);
+       iput(child_inode);
+       iput(parent_inode);
+
+       return ret;
+}
+
+static int prop_compression_validate(const char *value, size_t len)
+{
+       if (!strncmp("lzo", value, len))
+               return 0;
+       else if (!strncmp("zlib", value, len))
+               return 0;
+
+       return -EINVAL;
+}
+
+static int prop_compression_apply(struct inode *inode,
+                                 const char *value,
+                                 size_t len)
+{
+       int type;
+
+       if (len == 0) {
+               BTRFS_I(inode)->flags |= BTRFS_INODE_NOCOMPRESS;
+               BTRFS_I(inode)->flags &= ~BTRFS_INODE_COMPRESS;
+               BTRFS_I(inode)->force_compress = BTRFS_COMPRESS_NONE;
+
+               return 0;
+       }
+
+       if (!strncmp("lzo", value, len))
+               type = BTRFS_COMPRESS_LZO;
+       else if (!strncmp("zlib", value, len))
+               type = BTRFS_COMPRESS_ZLIB;
+       else
+               return -EINVAL;
+
+       BTRFS_I(inode)->flags &= ~BTRFS_INODE_NOCOMPRESS;
+       BTRFS_I(inode)->flags |= BTRFS_INODE_COMPRESS;
+       BTRFS_I(inode)->force_compress = type;
+
+       return 0;
+}
+
+static const char *prop_compression_extract(struct inode *inode)
+{
+       switch (BTRFS_I(inode)->force_compress) {
+       case BTRFS_COMPRESS_ZLIB:
+               return "zlib";
+       case BTRFS_COMPRESS_LZO:
+               return "lzo";
+       }
+
+       return NULL;
+}
diff --git a/fs/btrfs/props.h b/fs/btrfs/props.h

new file mode 100644 (file)

index 0000000..100f188
--- /dev/null
+++ b/fs/btrfs/props.h
@@ -0,0 +1,42 @@
+/*
+ * Copyright (C) 2014 Filipe David Borba Manana <fdmanana@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+
+#ifndef __BTRFS_PROPS_H
+#define __BTRFS_PROPS_H
+
+#include "ctree.h"
+
+void __init btrfs_props_init(void);
+
+int btrfs_set_prop(struct inode *inode,
+                  const char *name,
+                  const char *value,
+                  size_t value_len,
+                  int flags);
+
+int btrfs_load_inode_props(struct inode *inode, struct btrfs_path *path);
+
+int btrfs_inode_inherit_props(struct btrfs_trans_handle *trans,
+                             struct inode *inode,
+                             struct inode *dir);
+
+int btrfs_subvol_inherit_props(struct btrfs_trans_handle *trans,
+                              struct btrfs_root *root,
+                              struct btrfs_root *parent_root);
+
+#endif
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c

index 4e6ef490619e59b90fb64bf2cca251d6f11eae80..472302a2d745646a2d3a051da57fb95f2865cd0f 100644 (file)
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -301,16 +301,16 @@ int btrfs_read_qgroup_config(struct btrfs_fs_info *fs_info)
  
                         if (btrfs_qgroup_status_version(l, ptr) !=
                             BTRFS_QGROUP_STATUS_VERSION) {
-                               printk(KERN_ERR
-                                "btrfs: old qgroup version, quota disabled\n");
+                               btrfs_err(fs_info,
+                                "old qgroup version, quota disabled");
                                 goto out;
                         }
                         if (btrfs_qgroup_status_generation(l, ptr) !=
                             fs_info->generation) {
                                 flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
-                               printk(KERN_ERR
-                                       "btrfs: qgroup generation mismatch, "
-                                       "marked as inconsistent\n");
+                               btrfs_err(fs_info,
+                                       "qgroup generation mismatch, "
+                                       "marked as inconsistent");
                         }
                         fs_info->qgroup_flags = btrfs_qgroup_status_flags(l,
                                                                           ptr);
@@ -325,7 +325,7 @@ int btrfs_read_qgroup_config(struct btrfs_fs_info *fs_info)
                 qgroup = find_qgroup_rb(fs_info, found_key.offset);
                 if ((qgroup && found_key.type == BTRFS_QGROUP_INFO_KEY) ||
                     (!qgroup && found_key.type == BTRFS_QGROUP_LIMIT_KEY)) {
-                       printk(KERN_ERR "btrfs: inconsitent qgroup config\n");
+                       btrfs_err(fs_info, "inconsitent qgroup config");
                         flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
                 }
                 if (!qgroup) {
@@ -396,8 +396,8 @@ next1:
                 ret = add_relation_rb(fs_info, found_key.objectid,
                                       found_key.offset);
                 if (ret == -ENOENT) {
-                       printk(KERN_WARNING
-                               "btrfs: orphan qgroup relation 0x%llx->0x%llx\n",
+                       btrfs_warn(fs_info,
+                               "orphan qgroup relation 0x%llx->0x%llx",
                                 found_key.objectid, found_key.offset);
                         ret = 0;        /* ignore the error */
                 }
@@ -644,8 +644,7 @@ static int update_qgroup_limit_item(struct btrfs_trans_handle *trans,
  
         l = path->nodes[0];
         slot = path->slots[0];
-       qgroup_limit = btrfs_item_ptr(l, path->slots[0],
-                                     struct btrfs_qgroup_limit_item);
+       qgroup_limit = btrfs_item_ptr(l, slot, struct btrfs_qgroup_limit_item);
         btrfs_set_qgroup_limit_flags(l, qgroup_limit, flags);
         btrfs_set_qgroup_limit_max_rfer(l, qgroup_limit, max_rfer);
         btrfs_set_qgroup_limit_max_excl(l, qgroup_limit, max_excl);
@@ -687,8 +686,7 @@ static int update_qgroup_info_item(struct btrfs_trans_handle *trans,
  
         l = path->nodes[0];
         slot = path->slots[0];
-       qgroup_info = btrfs_item_ptr(l, path->slots[0],
-                                struct btrfs_qgroup_info_item);
+       qgroup_info = btrfs_item_ptr(l, slot, struct btrfs_qgroup_info_item);
         btrfs_set_qgroup_info_generation(l, qgroup_info, trans->transid);
         btrfs_set_qgroup_info_rfer(l, qgroup_info, qgroup->rfer);
         btrfs_set_qgroup_info_rfer_cmpr(l, qgroup_info, qgroup->rfer_cmpr);
@@ -1161,7 +1159,7 @@ int btrfs_limit_qgroup(struct btrfs_trans_handle *trans,
                                        limit->rsv_excl);
         if (ret) {
                 fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
-               printk(KERN_INFO "unable to update quota limit for %llu\n",
+               btrfs_info(fs_info, "unable to update quota limit for %llu",
                        qgroupid);
         }
  
@@ -1349,7 +1347,6 @@ int btrfs_qgroup_account_ref(struct btrfs_trans_handle *trans,
                              struct btrfs_delayed_ref_node *node,
                              struct btrfs_delayed_extent_op *extent_op)
  {
-       struct btrfs_key ins;
         struct btrfs_root *quota_root;
         u64 ref_root;
         struct btrfs_qgroup *qgroup;
@@ -1363,10 +1360,6 @@ int btrfs_qgroup_account_ref(struct btrfs_trans_handle *trans,
  
         BUG_ON(!fs_info->quota_root);
  
-       ins.objectid = node->bytenr;
-       ins.offset = node->num_bytes;
-       ins.type = BTRFS_EXTENT_ITEM_KEY;
-
         if (node->type == BTRFS_TREE_BLOCK_REF_KEY ||
             node->type == BTRFS_SHARED_BLOCK_REF_KEY) {
                 struct btrfs_delayed_tree_ref *ref;
@@ -1840,7 +1833,9 @@ void assert_qgroups_uptodate(struct btrfs_trans_handle *trans)
  {
         if (list_empty(&trans->qgroup_ref_list) && !trans->delayed_ref_elem.seq)
                 return;
-       pr_err("btrfs: qgroups not uptodate in trans handle %p: list is%s empty, seq is %#x.%x\n",
+       btrfs_err(trans->root->fs_info,
+               "qgroups not uptodate in trans handle %p:  list is%s empty, "
+               "seq is %#x.%x",
                 trans, list_empty(&trans->qgroup_ref_list) ? "" : " not",
                 (u32)(trans->delayed_ref_elem.seq >> 32),
                 (u32)trans->delayed_ref_elem.seq);
@@ -1902,9 +1897,17 @@ qgroup_rescan_leaf(struct btrfs_fs_info *fs_info, struct btrfs_path *path,
         mutex_unlock(&fs_info->qgroup_rescan_lock);
  
         for (; slot < btrfs_header_nritems(scratch_leaf); ++slot) {
+               u64 num_bytes;
+
                 btrfs_item_key_to_cpu(scratch_leaf, &found, slot);
-               if (found.type != BTRFS_EXTENT_ITEM_KEY)
+               if (found.type != BTRFS_EXTENT_ITEM_KEY &&
+                   found.type != BTRFS_METADATA_ITEM_KEY)
                         continue;
+               if (found.type == BTRFS_METADATA_ITEM_KEY)
+                       num_bytes = fs_info->extent_root->leafsize;
+               else
+                       num_bytes = found.offset;
+
                 ret = btrfs_find_all_roots(trans, fs_info, found.objectid,
                                            tree_mod_seq_elem.seq, &roots);
                 if (ret < 0)
@@ -1949,12 +1952,12 @@ qgroup_rescan_leaf(struct btrfs_fs_info *fs_info, struct btrfs_path *path,
                         struct btrfs_qgroup_list *glist;
  
                         qg = (struct btrfs_qgroup *)(uintptr_t) unode->aux;
-                       qg->rfer += found.offset;
-                       qg->rfer_cmpr += found.offset;
+                       qg->rfer += num_bytes;
+                       qg->rfer_cmpr += num_bytes;
                         WARN_ON(qg->tag >= seq);
                         if (qg->refcnt - seq == roots->nnodes) {
-                               qg->excl += found.offset;
-                               qg->excl_cmpr += found.offset;
+                               qg->excl += num_bytes;
+                               qg->excl_cmpr += num_bytes;
                         }
                         qgroup_dirty(fs_info, qg);
  
@@ -2037,10 +2040,10 @@ out:
         mutex_unlock(&fs_info->qgroup_rescan_lock);
  
         if (err >= 0) {
-               pr_info("btrfs: qgroup scan completed%s\n",
+               btrfs_info(fs_info, "qgroup scan completed%s",
                         err == 2 ? " (inconsistency flag cleared)" : "");
         } else {
-               pr_err("btrfs: qgroup scan failed with %d\n", err);
+               btrfs_err(fs_info, "qgroup scan failed with %d", err);
         }
  
         complete_all(&fs_info->qgroup_rescan_completion);
@@ -2096,7 +2099,7 @@ qgroup_rescan_init(struct btrfs_fs_info *fs_info, u64 progress_objectid,
  
         if (ret) {
  err:
-               pr_info("btrfs: qgroup_rescan_init failed with %d\n", ret);
+               btrfs_info(fs_info, "qgroup_rescan_init failed with %d", ret);
                 return ret;
         }
  
diff --git a/fs/btrfs/reada.c b/fs/btrfs/reada.c

index 1031b69252c5a235046e7732d378e14865e54b50..31c797c48c3ecdf9273473a7c0669adf86f5870a 100644 (file)
--- a/fs/btrfs/reada.c
+++ b/fs/btrfs/reada.c
@@ -189,8 +189,8 @@ static int __readahead_hook(struct btrfs_root *root, struct extent_buffer *eb,
                          */
  #ifdef DEBUG
                         if (rec->generation != generation) {
-                               printk(KERN_DEBUG "generation mismatch for "
-                                               "(%llu,%d,%llu) %llu != %llu\n",
+                               btrfs_debug(root->fs_info,
+                                          "generation mismatch for (%llu,%d,%llu) %llu != %llu",
                                        key.objectid, key.type, key.offset,
                                        rec->generation, generation);
                         }
@@ -365,8 +365,9 @@ static struct reada_extent *reada_find_extent(struct btrfs_root *root,
                 goto error;
  
         if (bbio->num_stripes > BTRFS_MAX_MIRRORS) {
-               printk(KERN_ERR "btrfs readahead: more than %d copies not "
-                               "supported", BTRFS_MAX_MIRRORS);
+               btrfs_err(root->fs_info,
+                          "readahead: more than %d copies not supported",
+                          BTRFS_MAX_MIRRORS);
                 goto error;
         }
  
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c

index 429c73c374b84f9bcd468067221bf99e0b9f67db..07b3b36f40ee51657b248112a6d1f028cea9e364 100644 (file)
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -94,6 +94,7 @@ struct backref_edge {
  
  #define LOWER  0
  #define UPPER  1
+#define RELOCATION_RESERVED_NODES      256
  
  struct backref_cache {
         /* red black tree of all backref nodes in the cache */
@@ -176,6 +177,8 @@ struct reloc_control {
         u64 merging_rsv_size;
         /* size of relocated tree nodes */
         u64 nodes_relocated;
+       /* reserved size for block group relocation*/
+       u64 reserved_bytes;
  
         u64 search_start;
         u64 extents_found;
@@ -184,7 +187,6 @@ struct reloc_control {
         unsigned int create_reloc_tree:1;
         unsigned int merge_reloc_tree:1;
         unsigned int found_file_extent:1;
-       unsigned int commit_transaction:1;
  };
  
  /* stages of data relocation */
@@ -2309,9 +2311,6 @@ void free_reloc_roots(struct list_head *list)
                 reloc_root = list_entry(list->next, struct btrfs_root,
                                         root_list);
                 __del_reloc_root(reloc_root);
-               free_extent_buffer(reloc_root->node);
-               free_extent_buffer(reloc_root->commit_root);
-               kfree(reloc_root);
         }
  }
  
@@ -2353,10 +2352,9 @@ again:
  
                         ret = merge_reloc_root(rc, root);
                         if (ret) {
-                               __del_reloc_root(reloc_root);
-                               free_extent_buffer(reloc_root->node);
-                               free_extent_buffer(reloc_root->commit_root);
-                               kfree(reloc_root);
+                               if (list_empty(&reloc_root->root_list))
+                                       list_add_tail(&reloc_root->root_list,
+                                                     &reloc_roots);
                                 goto out;
                         }
                 } else {
@@ -2452,7 +2450,7 @@ static noinline_for_stack
  struct btrfs_root *select_reloc_root(struct btrfs_trans_handle *trans,
                                      struct reloc_control *rc,
                                      struct backref_node *node,
-                                    struct backref_edge *edges[], int *nr)
+                                    struct backref_edge *edges[])
  {
         struct backref_node *next;
         struct btrfs_root *root;
@@ -2494,7 +2492,6 @@ struct btrfs_root *select_reloc_root(struct btrfs_trans_handle *trans,
         if (!root)
                 return NULL;
  
-       *nr = index;
         next = node;
         /* setup backref node path for btrfs_reloc_cow_block */
         while (1) {
@@ -2590,28 +2587,36 @@ static int reserve_metadata_space(struct btrfs_trans_handle *trans,
         struct btrfs_root *root = rc->extent_root;
         u64 num_bytes;
         int ret;
+       u64 tmp;
  
         num_bytes = calcu_metadata_size(rc, node, 1) * 2;
  
         trans->block_rsv = rc->block_rsv;
-       ret = btrfs_block_rsv_add(root, rc->block_rsv, num_bytes,
-                                 BTRFS_RESERVE_FLUSH_ALL);
+       rc->reserved_bytes += num_bytes;
+       ret = btrfs_block_rsv_refill(root, rc->block_rsv, num_bytes,
+                               BTRFS_RESERVE_FLUSH_ALL);
         if (ret) {
-               if (ret == -EAGAIN)
-                       rc->commit_transaction = 1;
+               if (ret == -EAGAIN) {
+                       tmp = rc->extent_root->nodesize *
+                               RELOCATION_RESERVED_NODES;
+                       while (tmp <= rc->reserved_bytes)
+                               tmp <<= 1;
+                       /*
+                        * only one thread can access block_rsv at this point,
+                        * so we don't need hold lock to protect block_rsv.
+                        * we expand more reservation size here to allow enough
+                        * space for relocation and we will return eailer in
+                        * enospc case.
+                        */
+                       rc->block_rsv->size = tmp + rc->extent_root->nodesize *
+                                             RELOCATION_RESERVED_NODES;
+               }
                 return ret;
         }
  
         return 0;
  }
  
-static void release_metadata_space(struct reloc_control *rc,
-                                  struct backref_node *node)
-{
-       u64 num_bytes = calcu_metadata_size(rc, node, 0) * 2;
-       btrfs_block_rsv_release(rc->extent_root, rc->block_rsv, num_bytes);
-}
-
  /*
   * relocate a block tree, and then update pointers in upper level
   * blocks that reference the block to point to the new location.
@@ -2633,7 +2638,6 @@ static int do_relocation(struct btrfs_trans_handle *trans,
         u32 blocksize;
         u64 bytenr;
         u64 generation;
-       int nr;
         int slot;
         int ret;
         int err = 0;
@@ -2646,7 +2650,7 @@ static int do_relocation(struct btrfs_trans_handle *trans,
                 cond_resched();
  
                 upper = edge->node[UPPER];
-               root = select_reloc_root(trans, rc, upper, edges, &nr);
+               root = select_reloc_root(trans, rc, upper, edges);
                 BUG_ON(!root);
  
                 if (upper->eb && !upper->locked) {
@@ -2898,7 +2902,6 @@ static int relocate_tree_block(struct btrfs_trans_handle *trans,
                                 struct btrfs_path *path)
  {
         struct btrfs_root *root;
-       int release = 0;
         int ret = 0;
  
         if (!node)
@@ -2915,7 +2918,6 @@ static int relocate_tree_block(struct btrfs_trans_handle *trans,
                 ret = reserve_metadata_space(trans, rc, node);
                 if (ret)
                         goto out;
-               release = 1;
         }
  
         if (root) {
@@ -2940,11 +2942,8 @@ static int relocate_tree_block(struct btrfs_trans_handle *trans,
                 ret = do_relocation(trans, rc, node, key, path, 1);
         }
  out:
-       if (ret || node->level == 0 || node->cowonly) {
-               if (release)
-                       release_metadata_space(rc, node);
+       if (ret || node->level == 0 || node->cowonly)
                 remove_backref_node(&rc->backref_cache, node);
-       }
         return ret;
  }
  
@@ -3867,29 +3866,20 @@ static noinline_for_stack
  int prepare_to_relocate(struct reloc_control *rc)
  {
         struct btrfs_trans_handle *trans;
-       int ret;
  
         rc->block_rsv = btrfs_alloc_block_rsv(rc->extent_root,
                                               BTRFS_BLOCK_RSV_TEMP);
         if (!rc->block_rsv)
                 return -ENOMEM;
  
-       /*
-        * reserve some space for creating reloc trees.
-        * btrfs_init_reloc_root will use them when there
-        * is no reservation in transaction handle.
-        */
-       ret = btrfs_block_rsv_add(rc->extent_root, rc->block_rsv,
-                                 rc->extent_root->nodesize * 256,
-                                 BTRFS_RESERVE_FLUSH_ALL);
-       if (ret)
-               return ret;
-
         memset(&rc->cluster, 0, sizeof(rc->cluster));
         rc->search_start = rc->block_group->key.objectid;
         rc->extents_found = 0;
         rc->nodes_relocated = 0;
         rc->merging_rsv_size = 0;
+       rc->reserved_bytes = 0;
+       rc->block_rsv->size = rc->extent_root->nodesize *
+                             RELOCATION_RESERVED_NODES;
  
         rc->create_reloc_tree = 1;
         set_reloc_control(rc);
@@ -3933,6 +3923,14 @@ static noinline_for_stack int relocate_block_group(struct reloc_control *rc)
         }
  
         while (1) {
+               rc->reserved_bytes = 0;
+               ret = btrfs_block_rsv_refill(rc->extent_root,
+                                       rc->block_rsv, rc->block_rsv->size,
+                                       BTRFS_RESERVE_FLUSH_ALL);
+               if (ret) {
+                       err = ret;
+                       break;
+               }
                 progress++;
                 trans = btrfs_start_transaction(rc->extent_root, 0);
                 if (IS_ERR(trans)) {
@@ -4011,6 +4009,12 @@ restart:
                 if (!RB_EMPTY_ROOT(&blocks)) {
                         ret = relocate_tree_blocks(trans, rc, &blocks);
                         if (ret < 0) {
+                               /*
+                                * if we fail to relocate tree blocks, force to update
+                                * backref cache when committing transaction.
+                                */
+                               rc->backref_cache.last_trans = trans->transid - 1;
+
                                 if (ret != -EAGAIN) {
                                         err = ret;
                                         break;
@@ -4020,14 +4024,8 @@ restart:
                         }
                 }
  
-               if (rc->commit_transaction) {
-                       rc->commit_transaction = 0;
-                       ret = btrfs_commit_transaction(trans, rc->extent_root);
-                       BUG_ON(ret);
-               } else {
-                       btrfs_end_transaction_throttle(trans, rc->extent_root);
-                       btrfs_btree_balance_dirty(rc->extent_root);
-               }
+               btrfs_end_transaction_throttle(trans, rc->extent_root);
+               btrfs_btree_balance_dirty(rc->extent_root);
                 trans = NULL;
  
                 if (rc->stage == MOVE_DATA_EXTENTS &&
@@ -4247,7 +4245,7 @@ int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start)
                 goto out;
         }
  
-       printk(KERN_INFO "btrfs: relocating block group %llu flags %llu\n",
+       btrfs_info(extent_root->fs_info, "relocating block group %llu flags %llu",
                rc->block_group->key.objectid, rc->block_group->flags);
  
         ret = btrfs_start_delalloc_roots(fs_info, 0);
@@ -4269,7 +4267,7 @@ int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start)
                 if (rc->extents_found == 0)
                         break;
  
-               printk(KERN_INFO "btrfs: found %llu extents\n",
+               btrfs_info(extent_root->fs_info, "found %llu extents",
                         rc->extents_found);
  
                 if (rc->stage == MOVE_DATA_EXTENTS && rc->found_file_extent) {
@@ -4285,11 +4283,6 @@ int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start)
                 }
         }
  
-       filemap_write_and_wait_range(fs_info->btree_inode->i_mapping,
-                                    rc->block_group->key.objectid,
-                                    rc->block_group->key.objectid +
-                                    rc->block_group->key.offset - 1);
-
         WARN_ON(rc->block_group->pinned > 0);
         WARN_ON(rc->block_group->reserved > 0);
         WARN_ON(btrfs_block_group_used(&rc->block_group->item) > 0);
diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c

index ec71ea44d2b4626c9a2bcc73b5fb94af666eaf5b..1389b69059de3ba33dcccbcb9474fa7d6537afd1 100644 (file)
--- a/fs/btrfs/root-tree.c
+++ b/fs/btrfs/root-tree.c
@@ -44,7 +44,7 @@ static void btrfs_read_root_item(struct extent_buffer *eb, int slot,
         if (!need_reset && btrfs_root_generation(item)
                 != btrfs_root_generation_v2(item)) {
                 if (btrfs_root_generation_v2(item) != 0) {
-                       printk(KERN_WARNING "btrfs: mismatching "
+                       printk(KERN_WARNING "BTRFS: mismatching "
                                         "generation and generation_v2 "
                                         "found in root item. This root "
                                         "was probably mounted with an "
@@ -154,7 +154,7 @@ int btrfs_update_root(struct btrfs_trans_handle *trans, struct btrfs_root
  
         if (ret != 0) {
                 btrfs_print_leaf(root, path->nodes[0]);
-               printk(KERN_CRIT "unable to update root key %llu %u %llu\n",
+               btrfs_crit(root->fs_info, "unable to update root key %llu %u %llu",
                        key->objectid, key->type, key->offset);
                 BUG_ON(1);
         }
@@ -400,21 +400,6 @@ out:
         return err;
  }
  
-int btrfs_find_root_ref(struct btrfs_root *tree_root,
-                  struct btrfs_path *path,
-                  u64 root_id, u64 ref_id)
-{
-       struct btrfs_key key;
-       int ret;
-
-       key.objectid = root_id;
-       key.type = BTRFS_ROOT_REF_KEY;
-       key.offset = ref_id;
-
-       ret = btrfs_search_slot(NULL, tree_root, &key, path, 0, 0);
-       return ret;
-}
-
  /*
   * add a btrfs_root_ref item.  type is either BTRFS_ROOT_REF_KEY
   * or BTRFS_ROOT_BACKREF_KEY.
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c

index bb9a928fa3a848c597d842a94fe2e49a48766cf0..efba5d1282ee40addd128cf05427181a0aade83f 100644 (file)
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -256,6 +256,8 @@ static int copy_nocow_pages_for_inode(u64 inum, u64 offset, u64 root,
  static int copy_nocow_pages(struct scrub_ctx *sctx, u64 logical, u64 len,
                             int mirror_num, u64 physical_for_dev_replace);
  static void copy_nocow_pages_worker(struct btrfs_work *work);
+static void __scrub_blocked_if_needed(struct btrfs_fs_info *fs_info);
+static void scrub_blocked_if_needed(struct btrfs_fs_info *fs_info);
  
  
  static void scrub_pending_bio_inc(struct scrub_ctx *sctx)
@@ -269,6 +271,29 @@ static void scrub_pending_bio_dec(struct scrub_ctx *sctx)
         wake_up(&sctx->list_wait);
  }
  
+static void __scrub_blocked_if_needed(struct btrfs_fs_info *fs_info)
+{
+       while (atomic_read(&fs_info->scrub_pause_req)) {
+               mutex_unlock(&fs_info->scrub_lock);
+               wait_event(fs_info->scrub_pause_wait,
+                  atomic_read(&fs_info->scrub_pause_req) == 0);
+               mutex_lock(&fs_info->scrub_lock);
+       }
+}
+
+static void scrub_blocked_if_needed(struct btrfs_fs_info *fs_info)
+{
+       atomic_inc(&fs_info->scrubs_paused);
+       wake_up(&fs_info->scrub_pause_wait);
+
+       mutex_lock(&fs_info->scrub_lock);
+       __scrub_blocked_if_needed(fs_info);
+       atomic_dec(&fs_info->scrubs_paused);
+       mutex_unlock(&fs_info->scrub_lock);
+
+       wake_up(&fs_info->scrub_pause_wait);
+}
+
  /*
   * used for workers that require transaction commits (i.e., for the
   * NOCOW case)
@@ -480,7 +505,7 @@ static int scrub_print_warning_inode(u64 inum, u64 offset, u64 root,
          * hold all of the paths here
          */
         for (i = 0; i < ipath->fspath->elem_cnt; ++i)
-               printk_in_rcu(KERN_WARNING "btrfs: %s at logical %llu on dev "
+               printk_in_rcu(KERN_WARNING "BTRFS: %s at logical %llu on dev "
                         "%s, sector %llu, root %llu, inode %llu, offset %llu, "
                         "length %llu, links %u (path: %s)\n", swarn->errstr,
                         swarn->logical, rcu_str_deref(swarn->dev->name),
@@ -492,7 +517,7 @@ static int scrub_print_warning_inode(u64 inum, u64 offset, u64 root,
         return 0;
  
  err:
-       printk_in_rcu(KERN_WARNING "btrfs: %s at logical %llu on dev "
+       printk_in_rcu(KERN_WARNING "BTRFS: %s at logical %llu on dev "
                 "%s, sector %llu, root %llu, inode %llu, offset %llu: path "
                 "resolving failed with ret=%d\n", swarn->errstr,
                 swarn->logical, rcu_str_deref(swarn->dev->name),
@@ -555,7 +580,7 @@ static void scrub_print_warning(const char *errstr, struct scrub_block *sblock)
                         ret = tree_backref_for_extent(&ptr, eb, ei, item_size,
                                                         &ref_root, &ref_level);
                         printk_in_rcu(KERN_WARNING
-                               "btrfs: %s at logical %llu on dev %s, "
+                               "BTRFS: %s at logical %llu on dev %s, "
                                 "sector %llu: metadata %s (level %d) in tree "
                                 "%llu\n", errstr, swarn.logical,
                                 rcu_str_deref(dev->name),
@@ -704,13 +729,11 @@ static void scrub_fixup_nodatasum(struct btrfs_work *work)
         struct scrub_fixup_nodatasum *fixup;
         struct scrub_ctx *sctx;
         struct btrfs_trans_handle *trans = NULL;
-       struct btrfs_fs_info *fs_info;
         struct btrfs_path *path;
         int uncorrectable = 0;
  
         fixup = container_of(work, struct scrub_fixup_nodatasum, work);
         sctx = fixup->sctx;
-       fs_info = fixup->root->fs_info;
  
         path = btrfs_alloc_path();
         if (!path) {
@@ -759,8 +782,8 @@ out:
                 btrfs_dev_replace_stats_inc(
                         &sctx->dev_root->fs_info->dev_replace.
                         num_uncorrectable_read_errors);
-               printk_ratelimited_in_rcu(KERN_ERR
-                       "btrfs: unable to fixup (nodatasum) error at logical %llu on dev %s\n",
+               printk_ratelimited_in_rcu(KERN_ERR "BTRFS: "
+                   "unable to fixup (nodatasum) error at logical %llu on dev %s\n",
                         fixup->logical, rcu_str_deref(fixup->dev->name));
         }
  
@@ -1161,7 +1184,7 @@ corrected_error:
                         sctx->stat.corrected_errors++;
                         spin_unlock(&sctx->stat_lock);
                         printk_ratelimited_in_rcu(KERN_ERR
-                               "btrfs: fixed up error at logical %llu on dev %s\n",
+                               "BTRFS: fixed up error at logical %llu on dev %s\n",
                                 logical, rcu_str_deref(dev->name));
                 }
         } else {
@@ -1170,7 +1193,7 @@ did_not_correct_error:
                 sctx->stat.uncorrectable_errors++;
                 spin_unlock(&sctx->stat_lock);
                 printk_ratelimited_in_rcu(KERN_ERR
-                       "btrfs: unable to fixup (regular) error at logical %llu on dev %s\n",
+                       "BTRFS: unable to fixup (regular) error at logical %llu on dev %s\n",
                         logical, rcu_str_deref(dev->name));
         }
  
@@ -1418,8 +1441,9 @@ static int scrub_repair_page_from_good_copy(struct scrub_block *sblock_bad,
                 int ret;
  
                 if (!page_bad->dev->bdev) {
-                       printk_ratelimited(KERN_WARNING
-                               "btrfs: scrub_repair_page_from_good_copy(bdev == NULL) is unexpected!\n");
+                       printk_ratelimited(KERN_WARNING "BTRFS: "
+                               "scrub_repair_page_from_good_copy(bdev == NULL) "
+                               "is unexpected!\n");
                         return -EIO;
                 }
  
@@ -1877,7 +1901,7 @@ static void scrub_submit(struct scrub_ctx *sctx)
                  * This case is handled correctly (but _very_ slowly).
                  */
                 printk_ratelimited(KERN_WARNING
-                       "btrfs: scrub_submit(bio bdev == NULL) is unexpected!\n");
+                       "BTRFS: scrub_submit(bio bdev == NULL) is unexpected!\n");
                 bio_endio(sbio->bio, -EIO);
         } else {
                 btrfsic_submit_bio(READ, sbio->bio);
@@ -2286,8 +2310,7 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
  
         wait_event(sctx->list_wait,
                    atomic_read(&sctx->bios_in_flight) == 0);
-       atomic_inc(&fs_info->scrubs_paused);
-       wake_up(&fs_info->scrub_pause_wait);
+       scrub_blocked_if_needed(fs_info);
  
         /* FIXME it might be better to start readahead at commit root */
         key_start.objectid = logical;
@@ -2311,16 +2334,6 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
         if (!IS_ERR(reada2))
                 btrfs_reada_wait(reada2);
  
-       mutex_lock(&fs_info->scrub_lock);
-       while (atomic_read(&fs_info->scrub_pause_req)) {
-               mutex_unlock(&fs_info->scrub_lock);
-               wait_event(fs_info->scrub_pause_wait,
-                  atomic_read(&fs_info->scrub_pause_req) == 0);
-               mutex_lock(&fs_info->scrub_lock);
-       }
-       atomic_dec(&fs_info->scrubs_paused);
-       mutex_unlock(&fs_info->scrub_lock);
-       wake_up(&fs_info->scrub_pause_wait);
  
         /*
          * collect all data csums for the stripe to avoid seeking during
@@ -2357,22 +2370,14 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
                         wait_event(sctx->list_wait,
                                    atomic_read(&sctx->bios_in_flight) == 0);
                         atomic_set(&sctx->wr_ctx.flush_all_writes, 0);
-                       atomic_inc(&fs_info->scrubs_paused);
-                       wake_up(&fs_info->scrub_pause_wait);
-                       mutex_lock(&fs_info->scrub_lock);
-                       while (atomic_read(&fs_info->scrub_pause_req)) {
-                               mutex_unlock(&fs_info->scrub_lock);
-                               wait_event(fs_info->scrub_pause_wait,
-                                  atomic_read(&fs_info->scrub_pause_req) == 0);
-                               mutex_lock(&fs_info->scrub_lock);
-                       }
-                       atomic_dec(&fs_info->scrubs_paused);
-                       mutex_unlock(&fs_info->scrub_lock);
-                       wake_up(&fs_info->scrub_pause_wait);
+                       scrub_blocked_if_needed(fs_info);
                 }
  
+               if (btrfs_fs_incompat(fs_info, SKINNY_METADATA))
+                       key.type = BTRFS_METADATA_ITEM_KEY;
+               else
+                       key.type = BTRFS_EXTENT_ITEM_KEY;
                 key.objectid = logical;
-               key.type = BTRFS_EXTENT_ITEM_KEY;
                 key.offset = (u64)-1;
  
                 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
@@ -2380,8 +2385,7 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
                         goto out;
  
                 if (ret > 0) {
-                       ret = btrfs_previous_item(root, path, 0,
-                                                 BTRFS_EXTENT_ITEM_KEY);
+                       ret = btrfs_previous_extent_item(root, path, 0);
                         if (ret < 0)
                                 goto out;
                         if (ret > 0) {
@@ -2439,9 +2443,9 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
  
                         if (key.objectid < logical &&
                             (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
-                               printk(KERN_ERR
-                                      "btrfs scrub: tree block %llu spanning "
-                                      "stripes, ignored. logical=%llu\n",
+                               btrfs_err(fs_info,
+                                          "scrub: tree block %llu spanning "
+                                          "stripes, ignored. logical=%llu",
                                        key.objectid, logical);
                                 goto next;
                         }
@@ -2683,21 +2687,9 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx,
                 wait_event(sctx->list_wait,
                            atomic_read(&sctx->bios_in_flight) == 0);
                 atomic_set(&sctx->wr_ctx.flush_all_writes, 0);
-               atomic_inc(&fs_info->scrubs_paused);
-               wake_up(&fs_info->scrub_pause_wait);
                 wait_event(sctx->list_wait,
                            atomic_read(&sctx->workers_pending) == 0);
-
-               mutex_lock(&fs_info->scrub_lock);
-               while (atomic_read(&fs_info->scrub_pause_req)) {
-                       mutex_unlock(&fs_info->scrub_lock);
-                       wait_event(fs_info->scrub_pause_wait,
-                          atomic_read(&fs_info->scrub_pause_req) == 0);
-                       mutex_lock(&fs_info->scrub_lock);
-               }
-               atomic_dec(&fs_info->scrubs_paused);
-               mutex_unlock(&fs_info->scrub_lock);
-               wake_up(&fs_info->scrub_pause_wait);
+               scrub_blocked_if_needed(fs_info);
  
                 btrfs_put_block_group(cache);
                 if (ret)
@@ -2823,8 +2815,8 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start,
          * check some assumptions
          */
         if (fs_info->chunk_root->nodesize != fs_info->chunk_root->leafsize) {
-               printk(KERN_ERR
-                      "btrfs_scrub: size assumption nodesize == leafsize (%d == %d) fails\n",
+               btrfs_err(fs_info,
+                          "scrub: size assumption nodesize == leafsize (%d == %d) fails",
                        fs_info->chunk_root->nodesize,
                        fs_info->chunk_root->leafsize);
                 return -EINVAL;
@@ -2836,16 +2828,17 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start,
                  * the way scrub is implemented. Do not handle this
                  * situation at all because it won't ever happen.
                  */
-               printk(KERN_ERR
-                      "btrfs_scrub: size assumption nodesize <= BTRFS_STRIPE_LEN (%d <= %d) fails\n",
+               btrfs_err(fs_info,
+                          "scrub: size assumption nodesize <= BTRFS_STRIPE_LEN (%d <= %d) fails",
                        fs_info->chunk_root->nodesize, BTRFS_STRIPE_LEN);
                 return -EINVAL;
         }
  
         if (fs_info->chunk_root->sectorsize != PAGE_SIZE) {
                 /* not supported for data w/o checksums */
-               printk(KERN_ERR
-                      "btrfs_scrub: size assumption sectorsize != PAGE_SIZE (%d != %lu) fails\n",
+               btrfs_err(fs_info,
+                          "scrub: size assumption sectorsize != PAGE_SIZE "
+                          "(%d != %lu) fails",
                        fs_info->chunk_root->sectorsize, PAGE_SIZE);
                 return -EINVAL;
         }
@@ -2858,7 +2851,8 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start,
                  * would exhaust the array bounds of pagev member in
                  * struct scrub_block
                  */
-               pr_err("btrfs_scrub: size assumption nodesize and sectorsize <= SCRUB_MAX_PAGES_PER_BLOCK (%d <= %d && %d <= %d) fails\n",
+               btrfs_err(fs_info, "scrub: size assumption nodesize and sectorsize "
+                          "<= SCRUB_MAX_PAGES_PER_BLOCK (%d <= %d && %d <= %d) fails",
                        fs_info->chunk_root->nodesize,
                        SCRUB_MAX_PAGES_PER_BLOCK,
                        fs_info->chunk_root->sectorsize,
@@ -2908,7 +2902,13 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start,
         }
         sctx->readonly = readonly;
         dev->scrub_device = sctx;
+       mutex_unlock(&fs_info->fs_devices->device_list_mutex);
  
+       /*
+        * checking @scrub_pause_req here, we can avoid
+        * race between committing transaction and scrubbing.
+        */
+       __scrub_blocked_if_needed(fs_info);
         atomic_inc(&fs_info->scrubs_running);
         mutex_unlock(&fs_info->scrub_lock);
  
@@ -2917,9 +2917,10 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start,
                  * by holding device list mutex, we can
                  * kick off writing super in log tree sync.
                  */
+               mutex_lock(&fs_info->fs_devices->device_list_mutex);
                 ret = scrub_supers(sctx, dev);
+               mutex_unlock(&fs_info->fs_devices->device_list_mutex);
         }
-       mutex_unlock(&fs_info->fs_devices->device_list_mutex);
  
         if (!ret)
                 ret = scrub_enumerate_chunks(sctx, dev, start, end,
@@ -3167,7 +3168,8 @@ static void copy_nocow_pages_worker(struct btrfs_work *work)
         ret = iterate_inodes_from_logical(logical, fs_info, path,
                                           record_inode_for_nocow, nocow_ctx);
         if (ret != 0 && ret != -ENOENT) {
-               pr_warn("iterate_inodes_from_logical() failed: log %llu, phys %llu, len %llu, mir %u, ret %d\n",
+               btrfs_warn(fs_info, "iterate_inodes_from_logical() failed: log %llu, "
+                       "phys %llu, len %llu, mir %u, ret %d",
                         logical, physical_for_dev_replace, len, mirror_num,
                         ret);
                 not_written = 1;
@@ -3289,7 +3291,7 @@ static int copy_nocow_pages_for_inode(u64 inum, u64 offset, u64 root,
  again:
                 page = find_or_create_page(inode->i_mapping, index, GFP_NOFS);
                 if (!page) {
-                       pr_err("find_or_create_page() failed\n");
+                       btrfs_err(fs_info, "find_or_create_page() failed");
                         ret = -ENOMEM;
                         goto out;
                 }
@@ -3361,7 +3363,7 @@ static int write_page_nocow(struct scrub_ctx *sctx,
                 return -EIO;
         if (!dev->bdev) {
                 printk_ratelimited(KERN_WARNING
-                       "btrfs: scrub write_page_nocow(bdev == NULL) is unexpected!\n");
+                       "BTRFS: scrub write_page_nocow(bdev == NULL) is unexpected!\n");
                 return -EIO;
         }
         bio = btrfs_io_bio_alloc(GFP_NOFS, 1);
diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c

index 945d1db98f26968ec051a6ff6116f971f18e7303..730dce395858a6e4c86a43b4a8fa41779d719e0d 100644 (file)
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -88,8 +88,6 @@ struct send_ctx {
         u64 cmd_send_size[BTRFS_SEND_C_MAX + 1];
         u64 flags;      /* 'flags' member of btrfs_ioctl_send_args is u64 */
  
-       struct vfsmount *mnt;
-
         struct btrfs_root *send_root;
         struct btrfs_root *parent_root;
         struct clone_root *clone_roots;
@@ -111,6 +109,7 @@ struct send_ctx {
         int cur_inode_deleted;
         u64 cur_inode_size;
         u64 cur_inode_mode;
+       u64 cur_inode_last_extent;
  
         u64 send_progress;
  
@@ -122,6 +121,74 @@ struct send_ctx {
         int name_cache_size;
  
         char *read_buf;
+
+       /*
+        * We process inodes by their increasing order, so if before an
+        * incremental send we reverse the parent/child relationship of
+        * directories such that a directory with a lower inode number was
+        * the parent of a directory with a higher inode number, and the one
+        * becoming the new parent got renamed too, we can't rename/move the
+        * directory with lower inode number when we finish processing it - we
+        * must process the directory with higher inode number first, then
+        * rename/move it and then rename/move the directory with lower inode
+        * number. Example follows.
+        *
+        * Tree state when the first send was performed:
+        *
+        * .
+        * |-- a                   (ino 257)
+        *     |-- b               (ino 258)
+        *         |
+        *         |
+        *         |-- c           (ino 259)
+        *         |   |-- d       (ino 260)
+        *         |
+        *         |-- c2          (ino 261)
+        *
+        * Tree state when the second (incremental) send is performed:
+        *
+        * .
+        * |-- a                   (ino 257)
+        *     |-- b               (ino 258)
+        *         |-- c2          (ino 261)
+        *             |-- d2      (ino 260)
+        *                 |-- cc  (ino 259)
+        *
+        * The sequence of steps that lead to the second state was:
+        *
+        * mv /a/b/c/d /a/b/c2/d2
+        * mv /a/b/c /a/b/c2/d2/cc
+        *
+        * "c" has lower inode number, but we can't move it (2nd mv operation)
+        * before we move "d", which has higher inode number.
+        *
+        * So we just memorize which move/rename operations must be performed
+        * later when their respective parent is processed and moved/renamed.
+        */
+
+       /* Indexed by parent directory inode number. */
+       struct rb_root pending_dir_moves;
+
+       /*
+        * Reverse index, indexed by the inode number of a directory that
+        * is waiting for the move/rename of its immediate parent before its
+        * own move/rename can be performed.
+        */
+       struct rb_root waiting_dir_moves;
+};
+
+struct pending_dir_move {
+       struct rb_node node;
+       struct list_head list;
+       u64 parent_ino;
+       u64 ino;
+       u64 gen;
+       struct list_head update_refs;
+};
+
+struct waiting_dir_move {
+       struct rb_node node;
+       u64 ino;
  };
  
  struct name_cache_entry {
@@ -145,6 +212,15 @@ struct name_cache_entry {
         char name[];
  };
  
+static int is_waiting_for_move(struct send_ctx *sctx, u64 ino);
+
+static int need_send_hole(struct send_ctx *sctx)
+{
+       return (sctx->parent_root && !sctx->cur_inode_new &&
+               !sctx->cur_inode_new_gen && !sctx->cur_inode_deleted &&
+               S_ISREG(sctx->cur_inode_mode));
+}
+
  static void fs_path_reset(struct fs_path *p)
  {
         if (p->reversed) {
@@ -336,16 +412,6 @@ out:
         return ret;
  }
  
-#if 0
-static void fs_path_remove(struct fs_path *p)
-{
-       BUG_ON(p->reversed);
-       while (p->start != p->end && *p->end != '/')
-               p->end--;
-       *p->end = 0;
-}
-#endif
-
  static int fs_path_copy(struct fs_path *p, struct fs_path *from)
  {
         int ret;
@@ -436,30 +502,15 @@ static int tlv_put(struct send_ctx *sctx, u16 attr, const void *data, int len)
         return 0;
  }
  
-#if 0
-static int tlv_put_u8(struct send_ctx *sctx, u16 attr, u8 value)
-{
-       return tlv_put(sctx, attr, &value, sizeof(value));
-}
-
-static int tlv_put_u16(struct send_ctx *sctx, u16 attr, u16 value)
-{
-       __le16 tmp = cpu_to_le16(value);
-       return tlv_put(sctx, attr, &tmp, sizeof(tmp));
-}
-
-static int tlv_put_u32(struct send_ctx *sctx, u16 attr, u32 value)
-{
-       __le32 tmp = cpu_to_le32(value);
-       return tlv_put(sctx, attr, &tmp, sizeof(tmp));
-}
-#endif
+#define TLV_PUT_DEFINE_INT(bits) \
+       static int tlv_put_u##bits(struct send_ctx *sctx,               \
+                       u##bits attr, u##bits value)                    \
+       {                                                               \
+               __le##bits __tmp = cpu_to_le##bits(value);              \
+               return tlv_put(sctx, attr, &__tmp, sizeof(__tmp));      \
+       }
  
-static int tlv_put_u64(struct send_ctx *sctx, u16 attr, u64 value)
-{
-       __le64 tmp = cpu_to_le64(value);
-       return tlv_put(sctx, attr, &tmp, sizeof(tmp));
-}
+TLV_PUT_DEFINE_INT(64)
  
  static int tlv_put_string(struct send_ctx *sctx, u16 attr,
                           const char *str, int len)
@@ -475,17 +526,6 @@ static int tlv_put_uuid(struct send_ctx *sctx, u16 attr,
         return tlv_put(sctx, attr, uuid, BTRFS_UUID_SIZE);
  }
  
-#if 0
-static int tlv_put_timespec(struct send_ctx *sctx, u16 attr,
-                           struct timespec *ts)
-{
-       struct btrfs_timespec bts;
-       bts.sec = cpu_to_le64(ts->tv_sec);
-       bts.nsec = cpu_to_le32(ts->tv_nsec);
-       return tlv_put(sctx, attr, &bts, sizeof(bts));
-}
-#endif
-
  static int tlv_put_btrfs_timespec(struct send_ctx *sctx, u16 attr,
                                   struct extent_buffer *eb,
                                   struct btrfs_timespec *ts)
@@ -533,12 +573,6 @@ static int tlv_put_btrfs_timespec(struct send_ctx *sctx, u16 attr,
                 if (ret < 0) \
                         goto tlv_put_failure; \
         } while (0)
-#define TLV_PUT_TIMESPEC(sctx, attrtype, ts) \
-       do { \
-               ret = tlv_put_timespec(sctx, attrtype, ts); \
-               if (ret < 0) \
-                       goto tlv_put_failure; \
-       } while (0)
  #define TLV_PUT_BTRFS_TIMESPEC(sctx, attrtype, eb, ts) \
         do { \
                 ret = tlv_put_btrfs_timespec(sctx, attrtype, eb, ts); \
@@ -1270,7 +1304,7 @@ static int find_extent_clone(struct send_ctx *sctx,
         if (!backref_ctx->found_itself) {
                 /* found a bug in backref code? */
                 ret = -EIO;
-               printk(KERN_ERR "btrfs: ERROR did not find backref in "
+               btrfs_err(sctx->send_root->fs_info, "did not find backref in "
                                 "send_root. inode=%llu, offset=%llu, "
                                 "disk_byte=%llu found extent=%llu\n",
                                 ino, data_offset, disk_byte, found_key.objectid);
@@ -1343,7 +1377,7 @@ static int read_symlink(struct btrfs_root *root,
         BUG_ON(compression);
  
         off = btrfs_file_extent_inline_start(ei);
-       len = btrfs_file_extent_inline_len(path->nodes[0], ei);
+       len = btrfs_file_extent_inline_len(path->nodes[0], path->slots[0], ei);
  
         ret = fs_path_add_from_extent_buffer(dest, path->nodes[0], off, len);
  
@@ -1372,7 +1406,7 @@ static int gen_unique_name(struct send_ctx *sctx,
                 return -ENOMEM;
  
         while (1) {
-               len = snprintf(tmp, sizeof(tmp) - 1, "o%llu-%llu-%llu",
+               len = snprintf(tmp, sizeof(tmp), "o%llu-%llu-%llu",
                                 ino, gen, idx);
                 if (len >= sizeof(tmp)) {
                         /* should really not happen */
@@ -1933,6 +1967,7 @@ static void name_cache_free(struct send_ctx *sctx)
   */
  static int __get_cur_name_and_parent(struct send_ctx *sctx,
                                      u64 ino, u64 gen,
+                                    int skip_name_cache,
                                      u64 *parent_ino,
                                      u64 *parent_gen,
                                      struct fs_path *dest)
@@ -1942,6 +1977,8 @@ static int __get_cur_name_and_parent(struct send_ctx *sctx,
         struct btrfs_path *path = NULL;
         struct name_cache_entry *nce = NULL;
  
+       if (skip_name_cache)
+               goto get_ref;
         /*
          * First check if we already did a call to this function with the same
          * ino/gen. If yes, check if the cache entry is still up-to-date. If yes
@@ -1986,11 +2023,12 @@ static int __get_cur_name_and_parent(struct send_ctx *sctx,
                 goto out_cache;
         }
  
+get_ref:
         /*
          * Depending on whether the inode was already processed or not, use
          * send_root or parent_root for ref lookup.
          */
-       if (ino < sctx->send_progress)
+       if (ino < sctx->send_progress && !skip_name_cache)
                 ret = get_first_ref(sctx->send_root, ino,
                                     parent_ino, parent_gen, dest);
         else
@@ -2014,6 +2052,8 @@ static int __get_cur_name_and_parent(struct send_ctx *sctx,
                         goto out;
                 ret = 1;
         }
+       if (skip_name_cache)
+               goto out;
  
  out_cache:
         /*
@@ -2081,6 +2121,9 @@ static int get_cur_path(struct send_ctx *sctx, u64 ino, u64 gen,
         u64 parent_inode = 0;
         u64 parent_gen = 0;
         int stop = 0;
+       u64 start_ino = ino;
+       u64 start_gen = gen;
+       int skip_name_cache = 0;
  
         name = fs_path_alloc();
         if (!name) {
@@ -2088,19 +2131,32 @@ static int get_cur_path(struct send_ctx *sctx, u64 ino, u64 gen,
                 goto out;
         }
  
+       if (is_waiting_for_move(sctx, ino))
+               skip_name_cache = 1;
+
+again:
         dest->reversed = 1;
         fs_path_reset(dest);
  
         while (!stop && ino != BTRFS_FIRST_FREE_OBJECTID) {
                 fs_path_reset(name);
  
-               ret = __get_cur_name_and_parent(sctx, ino, gen,
+               ret = __get_cur_name_and_parent(sctx, ino, gen, skip_name_cache,
                                 &parent_inode, &parent_gen, name);
                 if (ret < 0)
                         goto out;
                 if (ret)
                         stop = 1;
  
+               if (!skip_name_cache &&
+                   is_waiting_for_move(sctx, parent_inode)) {
+                       ino = start_ino;
+                       gen = start_gen;
+                       stop = 0;
+                       skip_name_cache = 1;
+                       goto again;
+               }
+
                 ret = fs_path_add_path(dest, name);
                 if (ret < 0)
                         goto out;
@@ -2131,7 +2187,7 @@ static int send_subvol_begin(struct send_ctx *sctx)
         char *name = NULL;
         int namelen;
  
-       path = alloc_path_for_send();
+       path = btrfs_alloc_path();
         if (!path)
                 return -ENOMEM;
  
@@ -2180,12 +2236,12 @@ static int send_subvol_begin(struct send_ctx *sctx)
         TLV_PUT_UUID(sctx, BTRFS_SEND_A_UUID,
                         sctx->send_root->root_item.uuid);
         TLV_PUT_U64(sctx, BTRFS_SEND_A_CTRANSID,
-                       sctx->send_root->root_item.ctransid);
+                   le64_to_cpu(sctx->send_root->root_item.ctransid));
         if (parent_root) {
                 TLV_PUT_UUID(sctx, BTRFS_SEND_A_CLONE_UUID,
                                 sctx->parent_root->root_item.uuid);
                 TLV_PUT_U64(sctx, BTRFS_SEND_A_CLONE_CTRANSID,
-                               sctx->parent_root->root_item.ctransid);
+                           le64_to_cpu(sctx->parent_root->root_item.ctransid));
         }
  
         ret = send_cmd(sctx);
@@ -2672,10 +2728,349 @@ out:
         return ret;
  }
  
+static int is_waiting_for_move(struct send_ctx *sctx, u64 ino)
+{
+       struct rb_node *n = sctx->waiting_dir_moves.rb_node;
+       struct waiting_dir_move *entry;
+
+       while (n) {
+               entry = rb_entry(n, struct waiting_dir_move, node);
+               if (ino < entry->ino)
+                       n = n->rb_left;
+               else if (ino > entry->ino)
+                       n = n->rb_right;
+               else
+                       return 1;
+       }
+       return 0;
+}
+
+static int add_waiting_dir_move(struct send_ctx *sctx, u64 ino)
+{
+       struct rb_node **p = &sctx->waiting_dir_moves.rb_node;
+       struct rb_node *parent = NULL;
+       struct waiting_dir_move *entry, *dm;
+
+       dm = kmalloc(sizeof(*dm), GFP_NOFS);
+       if (!dm)
+               return -ENOMEM;
+       dm->ino = ino;
+
+       while (*p) {
+               parent = *p;
+               entry = rb_entry(parent, struct waiting_dir_move, node);
+               if (ino < entry->ino) {
+                       p = &(*p)->rb_left;
+               } else if (ino > entry->ino) {
+                       p = &(*p)->rb_right;
+               } else {
+                       kfree(dm);
+                       return -EEXIST;
+               }
+       }
+
+       rb_link_node(&dm->node, parent, p);
+       rb_insert_color(&dm->node, &sctx->waiting_dir_moves);
+       return 0;
+}
+
+#ifdef CONFIG_BTRFS_ASSERT
+
+static int del_waiting_dir_move(struct send_ctx *sctx, u64 ino)
+{
+       struct rb_node *n = sctx->waiting_dir_moves.rb_node;
+       struct waiting_dir_move *entry;
+
+       while (n) {
+               entry = rb_entry(n, struct waiting_dir_move, node);
+               if (ino < entry->ino) {
+                       n = n->rb_left;
+               } else if (ino > entry->ino) {
+                       n = n->rb_right;
+               } else {
+                       rb_erase(&entry->node, &sctx->waiting_dir_moves);
+                       kfree(entry);
+                       return 0;
+               }
+       }
+       return -ENOENT;
+}
+
+#endif
+
+static int add_pending_dir_move(struct send_ctx *sctx, u64 parent_ino)
+{
+       struct rb_node **p = &sctx->pending_dir_moves.rb_node;
+       struct rb_node *parent = NULL;
+       struct pending_dir_move *entry, *pm;
+       struct recorded_ref *cur;
+       int exists = 0;
+       int ret;
+
+       pm = kmalloc(sizeof(*pm), GFP_NOFS);
+       if (!pm)
+               return -ENOMEM;
+       pm->parent_ino = parent_ino;
+       pm->ino = sctx->cur_ino;
+       pm->gen = sctx->cur_inode_gen;
+       INIT_LIST_HEAD(&pm->list);
+       INIT_LIST_HEAD(&pm->update_refs);
+       RB_CLEAR_NODE(&pm->node);
+
+       while (*p) {
+               parent = *p;
+               entry = rb_entry(parent, struct pending_dir_move, node);
+               if (parent_ino < entry->parent_ino) {
+                       p = &(*p)->rb_left;
+               } else if (parent_ino > entry->parent_ino) {
+                       p = &(*p)->rb_right;
+               } else {
+                       exists = 1;
+                       break;
+               }
+       }
+
+       list_for_each_entry(cur, &sctx->deleted_refs, list) {
+               ret = dup_ref(cur, &pm->update_refs);
+               if (ret < 0)
+                       goto out;
+       }
+       list_for_each_entry(cur, &sctx->new_refs, list) {
+               ret = dup_ref(cur, &pm->update_refs);
+               if (ret < 0)
+                       goto out;
+       }
+
+       ret = add_waiting_dir_move(sctx, pm->ino);
+       if (ret)
+               goto out;
+
+       if (exists) {
+               list_add_tail(&pm->list, &entry->list);
+       } else {
+               rb_link_node(&pm->node, parent, p);
+               rb_insert_color(&pm->node, &sctx->pending_dir_moves);
+       }
+       ret = 0;
+out:
+       if (ret) {
+               __free_recorded_refs(&pm->update_refs);
+               kfree(pm);
+       }
+       return ret;
+}
+
+static struct pending_dir_move *get_pending_dir_moves(struct send_ctx *sctx,
+                                                     u64 parent_ino)
+{
+       struct rb_node *n = sctx->pending_dir_moves.rb_node;
+       struct pending_dir_move *entry;
+
+       while (n) {
+               entry = rb_entry(n, struct pending_dir_move, node);
+               if (parent_ino < entry->parent_ino)
+                       n = n->rb_left;
+               else if (parent_ino > entry->parent_ino)
+                       n = n->rb_right;
+               else
+                       return entry;
+       }
+       return NULL;
+}
+
+static int apply_dir_move(struct send_ctx *sctx, struct pending_dir_move *pm)
+{
+       struct fs_path *from_path = NULL;
+       struct fs_path *to_path = NULL;
+       u64 orig_progress = sctx->send_progress;
+       struct recorded_ref *cur;
+       int ret;
+
+       from_path = fs_path_alloc();
+       if (!from_path)
+               return -ENOMEM;
+
+       sctx->send_progress = pm->ino;
+       ret = get_cur_path(sctx, pm->ino, pm->gen, from_path);
+       if (ret < 0)
+               goto out;
+
+       to_path = fs_path_alloc();
+       if (!to_path) {
+               ret = -ENOMEM;
+               goto out;
+       }
+
+       sctx->send_progress = sctx->cur_ino + 1;
+       ASSERT(del_waiting_dir_move(sctx, pm->ino) == 0);
+       ret = get_cur_path(sctx, pm->ino, pm->gen, to_path);
+       if (ret < 0)
+               goto out;
+
+       ret = send_rename(sctx, from_path, to_path);
+       if (ret < 0)
+               goto out;
+
+       ret = send_utimes(sctx, pm->ino, pm->gen);
+       if (ret < 0)
+               goto out;
+
+       /*
+        * After rename/move, need to update the utimes of both new parent(s)
+        * and old parent(s).
+        */
+       list_for_each_entry(cur, &pm->update_refs, list) {
+               ret = send_utimes(sctx, cur->dir, cur->dir_gen);
+               if (ret < 0)
+                       goto out;
+       }
+
+out:
+       fs_path_free(from_path);
+       fs_path_free(to_path);
+       sctx->send_progress = orig_progress;
+
+       return ret;
+}
+
+static void free_pending_move(struct send_ctx *sctx, struct pending_dir_move *m)
+{
+       if (!list_empty(&m->list))
+               list_del(&m->list);
+       if (!RB_EMPTY_NODE(&m->node))
+               rb_erase(&m->node, &sctx->pending_dir_moves);
+       __free_recorded_refs(&m->update_refs);
+       kfree(m);
+}
+
+static void tail_append_pending_moves(struct pending_dir_move *moves,
+                                     struct list_head *stack)
+{
+       if (list_empty(&moves->list)) {
+               list_add_tail(&moves->list, stack);
+       } else {
+               LIST_HEAD(list);
+               list_splice_init(&moves->list, &list);
+               list_add_tail(&moves->list, stack);
+               list_splice_tail(&list, stack);
+       }
+}
+
+static int apply_children_dir_moves(struct send_ctx *sctx)
+{
+       struct pending_dir_move *pm;
+       struct list_head stack;
+       u64 parent_ino = sctx->cur_ino;
+       int ret = 0;
+
+       pm = get_pending_dir_moves(sctx, parent_ino);
+       if (!pm)
+               return 0;
+
+       INIT_LIST_HEAD(&stack);
+       tail_append_pending_moves(pm, &stack);
+
+       while (!list_empty(&stack)) {
+               pm = list_first_entry(&stack, struct pending_dir_move, list);
+               parent_ino = pm->ino;
+               ret = apply_dir_move(sctx, pm);
+               free_pending_move(sctx, pm);
+               if (ret)
+                       goto out;
+               pm = get_pending_dir_moves(sctx, parent_ino);
+               if (pm)
+                       tail_append_pending_moves(pm, &stack);
+       }
+       return 0;
+
+out:
+       while (!list_empty(&stack)) {
+               pm = list_first_entry(&stack, struct pending_dir_move, list);
+               free_pending_move(sctx, pm);
+       }
+       return ret;
+}
+
+static int wait_for_parent_move(struct send_ctx *sctx,
+                               struct recorded_ref *parent_ref)
+{
+       int ret;
+       u64 ino = parent_ref->dir;
+       u64 parent_ino_before, parent_ino_after;
+       u64 new_gen, old_gen;
+       struct fs_path *path_before = NULL;
+       struct fs_path *path_after = NULL;
+       int len1, len2;
+
+       if (parent_ref->dir <= sctx->cur_ino)
+               return 0;
+
+       if (is_waiting_for_move(sctx, ino))
+               return 1;
+
+       ret = get_inode_info(sctx->parent_root, ino, NULL, &old_gen,
+                            NULL, NULL, NULL, NULL);
+       if (ret == -ENOENT)
+               return 0;
+       else if (ret < 0)
+               return ret;
+
+       ret = get_inode_info(sctx->send_root, ino, NULL, &new_gen,
+                            NULL, NULL, NULL, NULL);
+       if (ret < 0)
+               return ret;
+
+       if (new_gen != old_gen)
+               return 0;
+
+       path_before = fs_path_alloc();
+       if (!path_before)
+               return -ENOMEM;
+
+       ret = get_first_ref(sctx->parent_root, ino, &parent_ino_before,
+                           NULL, path_before);
+       if (ret == -ENOENT) {
+               ret = 0;
+               goto out;
+       } else if (ret < 0) {
+               goto out;
+       }
+
+       path_after = fs_path_alloc();
+       if (!path_after) {
+               ret = -ENOMEM;
+               goto out;
+       }
+
+       ret = get_first_ref(sctx->send_root, ino, &parent_ino_after,
+                           NULL, path_after);
+       if (ret == -ENOENT) {
+               ret = 0;
+               goto out;
+       } else if (ret < 0) {
+               goto out;
+       }
+
+       len1 = fs_path_len(path_before);
+       len2 = fs_path_len(path_after);
+       if ((parent_ino_before != parent_ino_after) && (len1 != len2 ||
+            memcmp(path_before->start, path_after->start, len1))) {
+               ret = 1;
+               goto out;
+       }
+       ret = 0;
+
+out:
+       fs_path_free(path_before);
+       fs_path_free(path_after);
+
+       return ret;
+}
+
  /*
   * This does all the move/link/unlink/rmdir magic.
   */
-static int process_recorded_refs(struct send_ctx *sctx)
+static int process_recorded_refs(struct send_ctx *sctx, int *pending_move)
  {
         int ret = 0;
         struct recorded_ref *cur;
@@ -2824,11 +3219,17 @@ verbose_printk("btrfs: process_recorded_refs %llu\n", sctx->cur_ino);
                                  * dirs, we always have one new and one deleted
                                  * ref. The deleted ref is ignored later.
                                  */
-                               ret = send_rename(sctx, valid_path,
-                                               cur->full_path);
-                               if (ret < 0)
-                                       goto out;
-                               ret = fs_path_copy(valid_path, cur->full_path);
+                               if (wait_for_parent_move(sctx, cur)) {
+                                       ret = add_pending_dir_move(sctx,
+                                                                  cur->dir);
+                                       *pending_move = 1;
+                               } else {
+                                       ret = send_rename(sctx, valid_path,
+                                                         cur->full_path);
+                                       if (!ret)
+                                               ret = fs_path_copy(valid_path,
+                                                              cur->full_path);
+                               }
                                 if (ret < 0)
                                         goto out;
                         } else {
@@ -3197,6 +3598,7 @@ static int process_all_refs(struct send_ctx *sctx,
         struct extent_buffer *eb;
         int slot;
         iterate_inode_ref_t cb;
+       int pending_move = 0;
  
         path = alloc_path_for_send();
         if (!path)
@@ -3240,7 +3642,9 @@ static int process_all_refs(struct send_ctx *sctx,
         }
         btrfs_release_path(path);
  
-       ret = process_recorded_refs(sctx);
+       ret = process_recorded_refs(sctx, &pending_move);
+       /* Only applicable to an incremental send. */
+       ASSERT(pending_move == 0);
  
  out:
         btrfs_free_path(path);
@@ -3706,7 +4110,7 @@ verbose_printk("btrfs: send_clone offset=%llu, len=%d, clone_root=%llu, "
         TLV_PUT_UUID(sctx, BTRFS_SEND_A_CLONE_UUID,
                         clone_root->root->root_item.uuid);
         TLV_PUT_U64(sctx, BTRFS_SEND_A_CLONE_CTRANSID,
-                       clone_root->root->root_item.ctransid);
+                   le64_to_cpu(clone_root->root->root_item.ctransid));
         TLV_PUT_PATH(sctx, BTRFS_SEND_A_CLONE_PATH, p);
         TLV_PUT_U64(sctx, BTRFS_SEND_A_CLONE_OFFSET,
                         clone_root->offset);
@@ -3752,6 +4156,39 @@ out:
         return ret;
  }
  
+static int send_hole(struct send_ctx *sctx, u64 end)
+{
+       struct fs_path *p = NULL;
+       u64 offset = sctx->cur_inode_last_extent;
+       u64 len;
+       int ret = 0;
+
+       p = fs_path_alloc();
+       if (!p)
+               return -ENOMEM;
+       memset(sctx->read_buf, 0, BTRFS_SEND_READ_SIZE);
+       while (offset < end) {
+               len = min_t(u64, end - offset, BTRFS_SEND_READ_SIZE);
+
+               ret = begin_cmd(sctx, BTRFS_SEND_C_WRITE);
+               if (ret < 0)
+                       break;
+               ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, p);
+               if (ret < 0)
+                       break;
+               TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p);
+               TLV_PUT_U64(sctx, BTRFS_SEND_A_FILE_OFFSET, offset);
+               TLV_PUT(sctx, BTRFS_SEND_A_DATA, sctx->read_buf, len);
+               ret = send_cmd(sctx);
+               if (ret < 0)
+                       break;
+               offset += len;
+       }
+tlv_put_failure:
+       fs_path_free(p);
+       return ret;
+}
+
  static int send_write_or_clone(struct send_ctx *sctx,
                                struct btrfs_path *path,
                                struct btrfs_key *key,
@@ -3764,12 +4201,14 @@ static int send_write_or_clone(struct send_ctx *sctx,
         u64 len;
         u32 l;
         u8 type;
+       u64 bs = sctx->send_root->fs_info->sb->s_blocksize;
  
         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
                         struct btrfs_file_extent_item);
         type = btrfs_file_extent_type(path->nodes[0], ei);
         if (type == BTRFS_FILE_EXTENT_INLINE) {
-               len = btrfs_file_extent_inline_len(path->nodes[0], ei);
+               len = btrfs_file_extent_inline_len(path->nodes[0],
+                                                  path->slots[0], ei);
                 /*
                  * it is possible the inline item won't cover the whole page,
                  * but there may be items after this page.  Make
@@ -3787,7 +4226,7 @@ static int send_write_or_clone(struct send_ctx *sctx,
                 goto out;
         }
  
-       if (clone_root) {
+       if (clone_root && IS_ALIGNED(offset + len, bs)) {
                 ret = send_clone(sctx, offset, len, clone_root);
         } else if (sctx->flags & BTRFS_SEND_FLAG_NO_FILE_DATA) {
                 ret = send_update_extent(sctx, offset, len);
@@ -3979,6 +4418,101 @@ out:
         return ret;
  }
  
+static int get_last_extent(struct send_ctx *sctx, u64 offset)
+{
+       struct btrfs_path *path;
+       struct btrfs_root *root = sctx->send_root;
+       struct btrfs_file_extent_item *fi;
+       struct btrfs_key key;
+       u64 extent_end;
+       u8 type;
+       int ret;
+
+       path = alloc_path_for_send();
+       if (!path)
+               return -ENOMEM;
+
+       sctx->cur_inode_last_extent = 0;
+
+       key.objectid = sctx->cur_ino;
+       key.type = BTRFS_EXTENT_DATA_KEY;
+       key.offset = offset;
+       ret = btrfs_search_slot_for_read(root, &key, path, 0, 1);
+       if (ret < 0)
+               goto out;
+       ret = 0;
+       btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
+       if (key.objectid != sctx->cur_ino || key.type != BTRFS_EXTENT_DATA_KEY)
+               goto out;
+
+       fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
+                           struct btrfs_file_extent_item);
+       type = btrfs_file_extent_type(path->nodes[0], fi);
+       if (type == BTRFS_FILE_EXTENT_INLINE) {
+               u64 size = btrfs_file_extent_inline_len(path->nodes[0],
+                                                       path->slots[0], fi);
+               extent_end = ALIGN(key.offset + size,
+                                  sctx->send_root->sectorsize);
+       } else {
+               extent_end = key.offset +
+                       btrfs_file_extent_num_bytes(path->nodes[0], fi);
+       }
+       sctx->cur_inode_last_extent = extent_end;
+out:
+       btrfs_free_path(path);
+       return ret;
+}
+
+static int maybe_send_hole(struct send_ctx *sctx, struct btrfs_path *path,
+                          struct btrfs_key *key)
+{
+       struct btrfs_file_extent_item *fi;
+       u64 extent_end;
+       u8 type;
+       int ret = 0;
+
+       if (sctx->cur_ino != key->objectid || !need_send_hole(sctx))
+               return 0;
+
+       if (sctx->cur_inode_last_extent == (u64)-1) {
+               ret = get_last_extent(sctx, key->offset - 1);
+               if (ret)
+                       return ret;
+       }
+
+       fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
+                           struct btrfs_file_extent_item);
+       type = btrfs_file_extent_type(path->nodes[0], fi);
+       if (type == BTRFS_FILE_EXTENT_INLINE) {
+               u64 size = btrfs_file_extent_inline_len(path->nodes[0],
+                                                       path->slots[0], fi);
+               extent_end = ALIGN(key->offset + size,
+                                  sctx->send_root->sectorsize);
+       } else {
+               extent_end = key->offset +
+                       btrfs_file_extent_num_bytes(path->nodes[0], fi);
+       }
+
+       if (path->slots[0] == 0 &&
+           sctx->cur_inode_last_extent < key->offset) {
+               /*
+                * We might have skipped entire leafs that contained only
+                * file extent items for our current inode. These leafs have
+                * a generation number smaller (older) than the one in the
+                * current leaf and the leaf our last extent came from, and
+                * are located between these 2 leafs.
+                */
+               ret = get_last_extent(sctx, key->offset - 1);
+               if (ret)
+                       return ret;
+       }
+
+       if (sctx->cur_inode_last_extent < key->offset)
+               ret = send_hole(sctx, key->offset);
+       sctx->cur_inode_last_extent = extent_end;
+       return ret;
+}
+
  static int process_extent(struct send_ctx *sctx,
                           struct btrfs_path *path,
                           struct btrfs_key *key)
@@ -3995,7 +4529,7 @@ static int process_extent(struct send_ctx *sctx,
                         goto out;
                 if (ret) {
                         ret = 0;
-                       goto out;
+                       goto out_hole;
                 }
         } else {
                 struct btrfs_file_extent_item *ei;
@@ -4031,7 +4565,10 @@ static int process_extent(struct send_ctx *sctx,
                 goto out;
  
         ret = send_write_or_clone(sctx, path, key, found_clone);
-
+       if (ret)
+               goto out;
+out_hole:
+       ret = maybe_send_hole(sctx, path, key);
  out:
         return ret;
  }
@@ -4054,17 +4591,25 @@ static int process_all_extents(struct send_ctx *sctx)
         key.objectid = sctx->cmp_key->objectid;
         key.type = BTRFS_EXTENT_DATA_KEY;
         key.offset = 0;
-       while (1) {
-               ret = btrfs_search_slot_for_read(root, &key, path, 1, 0);
-               if (ret < 0)
-                       goto out;
-               if (ret) {
-                       ret = 0;
-                       goto out;
-               }
+       ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
+       if (ret < 0)
+               goto out;
  
+       while (1) {
                 eb = path->nodes[0];
                 slot = path->slots[0];
+
+               if (slot >= btrfs_header_nritems(eb)) {
+                       ret = btrfs_next_leaf(root, path);
+                       if (ret < 0) {
+                               goto out;
+                       } else if (ret > 0) {
+                               ret = 0;
+                               break;
+                       }
+                       continue;
+               }
+
                 btrfs_item_key_to_cpu(eb, &found_key, slot);
  
                 if (found_key.objectid != key.objectid ||
@@ -4077,8 +4622,7 @@ static int process_all_extents(struct send_ctx *sctx)
                 if (ret < 0)
                         goto out;
  
-               btrfs_release_path(path);
-               key.offset = found_key.offset + 1;
+               path->slots[0]++;
         }
  
  out:
@@ -4086,7 +4630,9 @@ out:
         return ret;
  }
  
-static int process_recorded_refs_if_needed(struct send_ctx *sctx, int at_end)
+static int process_recorded_refs_if_needed(struct send_ctx *sctx, int at_end,
+                                          int *pending_move,
+                                          int *refs_processed)
  {
         int ret = 0;
  
@@ -4098,17 +4644,11 @@ static int process_recorded_refs_if_needed(struct send_ctx *sctx, int at_end)
         if (list_empty(&sctx->new_refs) && list_empty(&sctx->deleted_refs))
                 goto out;
  
-       ret = process_recorded_refs(sctx);
+       ret = process_recorded_refs(sctx, pending_move);
         if (ret < 0)
                 goto out;
  
-       /*
-        * We have processed the refs and thus need to advance send_progress.
-        * Now, calls to get_cur_xxx will take the updated refs of the current
-        * inode into account.
-        */
-       sctx->send_progress = sctx->cur_ino + 1;
-
+       *refs_processed = 1;
  out:
         return ret;
  }
@@ -4124,11 +4664,29 @@ static int finish_inode_if_needed(struct send_ctx *sctx, int at_end)
         u64 right_gid;
         int need_chmod = 0;
         int need_chown = 0;
+       int pending_move = 0;
+       int refs_processed = 0;
  
-       ret = process_recorded_refs_if_needed(sctx, at_end);
+       ret = process_recorded_refs_if_needed(sctx, at_end, &pending_move,
+                                             &refs_processed);
         if (ret < 0)
                 goto out;
  
+       /*
+        * We have processed the refs and thus need to advance send_progress.
+        * Now, calls to get_cur_xxx will take the updated refs of the current
+        * inode into account.
+        *
+        * On the other hand, if our current inode is a directory and couldn't
+        * be moved/renamed because its parent was renamed/moved too and it has
+        * a higher inode number, we can only move/rename our current inode
+        * after we moved/renamed its parent. Therefore in this case operate on
+        * the old path (pre move/rename) of our current inode, and the
+        * move/rename will be performed later.
+        */
+       if (refs_processed && !pending_move)
+               sctx->send_progress = sctx->cur_ino + 1;
+
         if (sctx->cur_ino == 0 || sctx->cur_inode_deleted)
                 goto out;
         if (!at_end && sctx->cmp_key->objectid == sctx->cur_ino)
@@ -4157,6 +4715,19 @@ static int finish_inode_if_needed(struct send_ctx *sctx, int at_end)
         }
  
         if (S_ISREG(sctx->cur_inode_mode)) {
+               if (need_send_hole(sctx)) {
+                       if (sctx->cur_inode_last_extent == (u64)-1) {
+                               ret = get_last_extent(sctx, (u64)-1);
+                               if (ret)
+                                       goto out;
+                       }
+                       if (sctx->cur_inode_last_extent <
+                           sctx->cur_inode_size) {
+                               ret = send_hole(sctx, sctx->cur_inode_size);
+                               if (ret)
+                                       goto out;
+                       }
+               }
                 ret = send_truncate(sctx, sctx->cur_ino, sctx->cur_inode_gen,
                                 sctx->cur_inode_size);
                 if (ret < 0)
@@ -4177,9 +4748,21 @@ static int finish_inode_if_needed(struct send_ctx *sctx, int at_end)
         }
  
         /*
-        * Need to send that every time, no matter if it actually changed
-        * between the two trees as we have done changes to the inode before.
+        * If other directory inodes depended on our current directory
+        * inode's move/rename, now do their move/rename operations.
+        */
+       if (!is_waiting_for_move(sctx, sctx->cur_ino)) {
+               ret = apply_children_dir_moves(sctx);
+               if (ret)
+                       goto out;
+       }
+
+       /*
+        * Need to send that every time, no matter if it actually
+        * changed between the two trees as we have done changes to
+        * the inode before.
          */
+       sctx->send_progress = sctx->cur_ino + 1;
         ret = send_utimes(sctx, sctx->cur_ino, sctx->cur_inode_gen);
         if (ret < 0)
                 goto out;
@@ -4200,6 +4783,7 @@ static int changed_inode(struct send_ctx *sctx,
  
         sctx->cur_ino = key->objectid;
         sctx->cur_inode_new_gen = 0;
+       sctx->cur_inode_last_extent = (u64)-1;
  
         /*
          * Set send_progress to current inode. This will tell all get_cur_xxx
@@ -4480,14 +5064,18 @@ static int changed_cb(struct btrfs_root *left_root,
         struct send_ctx *sctx = ctx;
  
         if (result == BTRFS_COMPARE_TREE_SAME) {
-               if (key->type != BTRFS_INODE_REF_KEY &&
-                   key->type != BTRFS_INODE_EXTREF_KEY)
-                       return 0;
-               ret = compare_refs(sctx, left_path, key);
-               if (!ret)
+               if (key->type == BTRFS_INODE_REF_KEY ||
+                   key->type == BTRFS_INODE_EXTREF_KEY) {
+                       ret = compare_refs(sctx, left_path, key);
+                       if (!ret)
+                               return 0;
+                       if (ret < 0)
+                               return ret;
+               } else if (key->type == BTRFS_EXTENT_DATA_KEY) {
+                       return maybe_send_hole(sctx, left_path, key);
+               } else {
                         return 0;
-               if (ret < 0)
-                       return ret;
+               }
                 result = BTRFS_COMPARE_TREE_CHANGED;
                 ret = 0;
         }
@@ -4522,7 +5110,6 @@ out:
  static int full_send_tree(struct send_ctx *sctx)
  {
         int ret;
-       struct btrfs_trans_handle *trans = NULL;
         struct btrfs_root *send_root = sctx->send_root;
         struct btrfs_key key;
         struct btrfs_key found_key;
@@ -4544,19 +5131,6 @@ static int full_send_tree(struct send_ctx *sctx)
         key.type = BTRFS_INODE_ITEM_KEY;
         key.offset = 0;
  
-join_trans:
-       /*
-        * We need to make sure the transaction does not get committed
-        * while we do anything on commit roots. Join a transaction to prevent
-        * this.
-        */
-       trans = btrfs_join_transaction(send_root);
-       if (IS_ERR(trans)) {
-               ret = PTR_ERR(trans);
-               trans = NULL;
-               goto out;
-       }
-
         /*
          * Make sure the tree has not changed after re-joining. We detect this
          * by comparing start_ctransid and ctransid. They should always match.
@@ -4566,7 +5140,7 @@ join_trans:
         spin_unlock(&send_root->root_item_lock);
  
         if (ctransid != start_ctransid) {
-               WARN(1, KERN_WARNING "btrfs: the root that you're trying to "
+               WARN(1, KERN_WARNING "BTRFS: the root that you're trying to "
                                      "send was modified in between. This is "
                                      "probably a bug.\n");
                 ret = -EIO;
@@ -4580,19 +5154,6 @@ join_trans:
                 goto out_finish;
  
         while (1) {
-               /*
-                * When someone want to commit while we iterate, end the
-                * joined transaction and rejoin.
-                */
-               if (btrfs_should_end_transaction(trans, send_root)) {
-                       ret = btrfs_end_transaction(trans, send_root);
-                       trans = NULL;
-                       if (ret < 0)
-                               goto out;
-                       btrfs_release_path(path);
-                       goto join_trans;
-               }
-
                 eb = path->nodes[0];
                 slot = path->slots[0];
                 btrfs_item_key_to_cpu(eb, &found_key, slot);
@@ -4620,12 +5181,6 @@ out_finish:
  
  out:
         btrfs_free_path(path);
-       if (trans) {
-               if (!ret)
-                       ret = btrfs_end_transaction(trans, send_root);
-               else
-                       btrfs_end_transaction(trans, send_root);
-       }
         return ret;
  }
  
@@ -4662,6 +5217,21 @@ out:
         return ret;
  }
  
+static void btrfs_root_dec_send_in_progress(struct btrfs_root* root)
+{
+       spin_lock(&root->root_item_lock);
+       root->send_in_progress--;
+       /*
+        * Not much left to do, we don't know why it's unbalanced and
+        * can't blindly reset it to 0.
+        */
+       if (root->send_in_progress < 0)
+               btrfs_err(root->fs_info,
+                       "send_in_progres unbalanced %d root %llu\n",
+                       root->send_in_progress, root->root_key.objectid);
+       spin_unlock(&root->root_item_lock);
+}
+
  long btrfs_ioctl_send(struct file *mnt_file, void __user *arg_)
  {
         int ret = 0;
@@ -4673,6 +5243,9 @@ long btrfs_ioctl_send(struct file *mnt_file, void __user *arg_)
         struct send_ctx *sctx = NULL;
         u32 i;
         u64 *clone_sources_tmp = NULL;
+       int clone_sources_to_rollback = 0;
+       int sort_clone_roots = 0;
+       int index;
  
         if (!capable(CAP_SYS_ADMIN))
                 return -EPERM;
@@ -4680,6 +5253,14 @@ long btrfs_ioctl_send(struct file *mnt_file, void __user *arg_)
         send_root = BTRFS_I(file_inode(mnt_file))->root;
         fs_info = send_root->fs_info;
  
+       /*
+        * The subvolume must remain read-only during send, protect against
+        * making it RW.
+        */
+       spin_lock(&send_root->root_item_lock);
+       send_root->send_in_progress++;
+       spin_unlock(&send_root->root_item_lock);
+
         /*
          * This is done when we lookup the root, it should already be complete
          * by the time we get here.
@@ -4687,32 +5268,12 @@ long btrfs_ioctl_send(struct file *mnt_file, void __user *arg_)
         WARN_ON(send_root->orphan_cleanup_state != ORPHAN_CLEANUP_DONE);
  
         /*
-        * If we just created this root we need to make sure that the orphan
-        * cleanup has been done and committed since we search the commit root,
-        * so check its commit root transid with our otransid and if they match
-        * commit the transaction to make sure everything is updated.
+        * Userspace tools do the checks and warn the user if it's
+        * not RO.
          */
-       down_read(&send_root->fs_info->extent_commit_sem);
-       if (btrfs_header_generation(send_root->commit_root) ==
-           btrfs_root_otransid(&send_root->root_item)) {
-               struct btrfs_trans_handle *trans;
-
-               up_read(&send_root->fs_info->extent_commit_sem);
-
-               trans = btrfs_attach_transaction_barrier(send_root);
-               if (IS_ERR(trans)) {
-                       if (PTR_ERR(trans) != -ENOENT) {
-                               ret = PTR_ERR(trans);
-                               goto out;
-                       }
-                       /* ENOENT means theres no transaction */
-               } else {
-                       ret = btrfs_commit_transaction(trans, send_root);
-                       if (ret)
-                               goto out;
-               }
-       } else {
-               up_read(&send_root->fs_info->extent_commit_sem);
+       if (!btrfs_root_readonly(send_root)) {
+               ret = -EPERM;
+               goto out;
         }
  
         arg = memdup_user(arg_, sizeof(*arg));
@@ -4753,8 +5314,6 @@ long btrfs_ioctl_send(struct file *mnt_file, void __user *arg_)
                 goto out;
         }
  
-       sctx->mnt = mnt_file->f_path.mnt;
-
         sctx->send_root = send_root;
         sctx->clone_roots_cnt = arg->clone_sources_count;
  
@@ -4771,6 +5330,9 @@ long btrfs_ioctl_send(struct file *mnt_file, void __user *arg_)
                 goto out;
         }
  
+       sctx->pending_dir_moves = RB_ROOT;
+       sctx->waiting_dir_moves = RB_ROOT;
+
         sctx->clone_roots = vzalloc(sizeof(struct clone_root) *
                         (arg->clone_sources_count + 1));
         if (!sctx->clone_roots) {
@@ -4798,11 +5360,27 @@ long btrfs_ioctl_send(struct file *mnt_file, void __user *arg_)
                         key.objectid = clone_sources_tmp[i];
                         key.type = BTRFS_ROOT_ITEM_KEY;
                         key.offset = (u64)-1;
+
+                       index = srcu_read_lock(&fs_info->subvol_srcu);
+
                         clone_root = btrfs_read_fs_root_no_name(fs_info, &key);
                         if (IS_ERR(clone_root)) {
+                               srcu_read_unlock(&fs_info->subvol_srcu, index);
                                 ret = PTR_ERR(clone_root);
                                 goto out;
                         }
+                       clone_sources_to_rollback = i + 1;
+                       spin_lock(&clone_root->root_item_lock);
+                       clone_root->send_in_progress++;
+                       if (!btrfs_root_readonly(clone_root)) {
+                               spin_unlock(&clone_root->root_item_lock);
+                               srcu_read_unlock(&fs_info->subvol_srcu, index);
+                               ret = -EPERM;
+                               goto out;
+                       }
+                       spin_unlock(&clone_root->root_item_lock);
+                       srcu_read_unlock(&fs_info->subvol_srcu, index);
+
                         sctx->clone_roots[i].root = clone_root;
                 }
                 vfree(clone_sources_tmp);
@@ -4813,11 +5391,27 @@ long btrfs_ioctl_send(struct file *mnt_file, void __user *arg_)
                 key.objectid = arg->parent_root;
                 key.type = BTRFS_ROOT_ITEM_KEY;
                 key.offset = (u64)-1;
+
+               index = srcu_read_lock(&fs_info->subvol_srcu);
+
                 sctx->parent_root = btrfs_read_fs_root_no_name(fs_info, &key);
                 if (IS_ERR(sctx->parent_root)) {
+                       srcu_read_unlock(&fs_info->subvol_srcu, index);
                         ret = PTR_ERR(sctx->parent_root);
                         goto out;
                 }
+
+               spin_lock(&sctx->parent_root->root_item_lock);
+               sctx->parent_root->send_in_progress++;
+               if (!btrfs_root_readonly(sctx->parent_root)) {
+                       spin_unlock(&sctx->parent_root->root_item_lock);
+                       srcu_read_unlock(&fs_info->subvol_srcu, index);
+                       ret = -EPERM;
+                       goto out;
+               }
+               spin_unlock(&sctx->parent_root->root_item_lock);
+
+               srcu_read_unlock(&fs_info->subvol_srcu, index);
         }
  
         /*
@@ -4831,6 +5425,7 @@ long btrfs_ioctl_send(struct file *mnt_file, void __user *arg_)
         sort(sctx->clone_roots, sctx->clone_roots_cnt,
                         sizeof(*sctx->clone_roots), __clone_root_cmp_sort,
                         NULL);
+       sort_clone_roots = 1;
  
         ret = send_subvol(sctx);
         if (ret < 0)
@@ -4846,6 +5441,48 @@ long btrfs_ioctl_send(struct file *mnt_file, void __user *arg_)
         }
  
  out:
+       WARN_ON(sctx && !ret && !RB_EMPTY_ROOT(&sctx->pending_dir_moves));
+       while (sctx && !RB_EMPTY_ROOT(&sctx->pending_dir_moves)) {
+               struct rb_node *n;
+               struct pending_dir_move *pm;
+
+               n = rb_first(&sctx->pending_dir_moves);
+               pm = rb_entry(n, struct pending_dir_move, node);
+               while (!list_empty(&pm->list)) {
+                       struct pending_dir_move *pm2;
+
+                       pm2 = list_first_entry(&pm->list,
+                                              struct pending_dir_move, list);
+                       free_pending_move(sctx, pm2);
+               }
+               free_pending_move(sctx, pm);
+       }
+
+       WARN_ON(sctx && !ret && !RB_EMPTY_ROOT(&sctx->waiting_dir_moves));
+       while (sctx && !RB_EMPTY_ROOT(&sctx->waiting_dir_moves)) {
+               struct rb_node *n;
+               struct waiting_dir_move *dm;
+
+               n = rb_first(&sctx->waiting_dir_moves);
+               dm = rb_entry(n, struct waiting_dir_move, node);
+               rb_erase(&dm->node, &sctx->waiting_dir_moves);
+               kfree(dm);
+       }
+
+       if (sort_clone_roots) {
+               for (i = 0; i < sctx->clone_roots_cnt; i++)
+                       btrfs_root_dec_send_in_progress(
+                                       sctx->clone_roots[i].root);
+       } else {
+               for (i = 0; sctx && i < clone_sources_to_rollback; i++)
+                       btrfs_root_dec_send_in_progress(
+                                       sctx->clone_roots[i].root);
+
+               btrfs_root_dec_send_in_progress(send_root);
+       }
+       if (sctx && !IS_ERR_OR_NULL(sctx->parent_root))
+               btrfs_root_dec_send_in_progress(sctx->parent_root);
+
         kfree(arg);
         vfree(clone_sources_tmp);
  
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c

index d71a11d13dfaa8b3065222c87ce964d8ba28e5b5..c02f63356895ff7fc5b59f36748e7aeb36840e04 100644 (file)
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -48,6 +48,8 @@
  #include "transaction.h"
  #include "btrfs_inode.h"
  #include "print-tree.h"
+#include "hash.h"
+#include "props.h"
  #include "xattr.h"
  #include "volumes.h"
  #include "export.h"
@@ -152,11 +154,12 @@ void __btrfs_std_error(struct btrfs_fs_info *fs_info, const char *function,
                 vaf.fmt = fmt;
                 vaf.va = &args;
  
-               printk(KERN_CRIT "BTRFS error (device %s) in %s:%d: errno=%d %s (%pV)\n",
+               printk(KERN_CRIT
+                       "BTRFS: error (device %s) in %s:%d: errno=%d %s (%pV)\n",
                         sb->s_id, function, line, errno, errstr, &vaf);
                 va_end(args);
         } else {
-               printk(KERN_CRIT "BTRFS error (device %s) in %s:%d: errno=%d %s\n",
+               printk(KERN_CRIT "BTRFS: error (device %s) in %s:%d: errno=%d %s\n",
                         sb->s_id, function, line, errno, errstr);
         }
  
@@ -250,7 +253,7 @@ void __btrfs_abort_transaction(struct btrfs_trans_handle *trans,
          */
         if (!test_and_set_bit(BTRFS_FS_STATE_TRANS_ABORTED,
                                 &root->fs_info->fs_state)) {
-               WARN(1, KERN_DEBUG "btrfs: Transaction aborted (error %d)\n",
+               WARN(1, KERN_DEBUG "BTRFS: Transaction aborted (error %d)\n",
                                 errno);
         }
         trans->aborted = errno;
@@ -294,8 +297,8 @@ void __btrfs_panic(struct btrfs_fs_info *fs_info, const char *function,
                 panic(KERN_CRIT "BTRFS panic (device %s) in %s:%d: %pV (errno=%d %s)\n",
                         s_id, function, line, &vaf, errno, errstr);
  
-       printk(KERN_CRIT "BTRFS panic (device %s) in %s:%d: %pV (errno=%d %s)\n",
-              s_id, function, line, &vaf, errno, errstr);
+       btrfs_crit(fs_info, "panic in %s:%d: %pV (errno=%d %s)",
+                  function, line, &vaf, errno, errstr);
         va_end(args);
         /* Caller calls BUG() */
  }
@@ -322,7 +325,9 @@ enum {
         Opt_no_space_cache, Opt_recovery, Opt_skip_balance,
         Opt_check_integrity, Opt_check_integrity_including_extent_data,
         Opt_check_integrity_print_mask, Opt_fatal_errors, Opt_rescan_uuid_tree,
-       Opt_commit_interval,
+       Opt_commit_interval, Opt_barrier, Opt_nodefrag, Opt_nodiscard,
+       Opt_noenospc_debug, Opt_noflushoncommit, Opt_acl, Opt_datacow,
+       Opt_datasum, Opt_treelog, Opt_noinode_cache,
         Opt_err,
  };
  
@@ -332,8 +337,11 @@ static match_table_t tokens = {
         {Opt_subvolid, "subvolid=%s"},
         {Opt_device, "device=%s"},
         {Opt_nodatasum, "nodatasum"},
+       {Opt_datasum, "datasum"},
         {Opt_nodatacow, "nodatacow"},
+       {Opt_datacow, "datacow"},
         {Opt_nobarrier, "nobarrier"},
+       {Opt_barrier, "barrier"},
         {Opt_max_inline, "max_inline=%s"},
         {Opt_alloc_start, "alloc_start=%s"},
         {Opt_thread_pool, "thread_pool=%d"},
@@ -344,18 +352,25 @@ static match_table_t tokens = {
         {Opt_ssd, "ssd"},
         {Opt_ssd_spread, "ssd_spread"},
         {Opt_nossd, "nossd"},
+       {Opt_acl, "acl"},
         {Opt_noacl, "noacl"},
         {Opt_notreelog, "notreelog"},
+       {Opt_treelog, "treelog"},
         {Opt_flushoncommit, "flushoncommit"},
+       {Opt_noflushoncommit, "noflushoncommit"},
         {Opt_ratio, "metadata_ratio=%d"},
         {Opt_discard, "discard"},
+       {Opt_nodiscard, "nodiscard"},
         {Opt_space_cache, "space_cache"},
         {Opt_clear_cache, "clear_cache"},
         {Opt_user_subvol_rm_allowed, "user_subvol_rm_allowed"},
         {Opt_enospc_debug, "enospc_debug"},
+       {Opt_noenospc_debug, "noenospc_debug"},
         {Opt_subvolrootid, "subvolrootid=%d"},
         {Opt_defrag, "autodefrag"},
+       {Opt_nodefrag, "noautodefrag"},
         {Opt_inode_cache, "inode_cache"},
+       {Opt_noinode_cache, "noinode_cache"},
         {Opt_no_space_cache, "nospace_cache"},
         {Opt_recovery, "recovery"},
         {Opt_skip_balance, "skip_balance"},
@@ -368,6 +383,20 @@ static match_table_t tokens = {
         {Opt_err, NULL},
  };
  
+#define btrfs_set_and_info(root, opt, fmt, args...)                    \
+{                                                                      \
+       if (!btrfs_test_opt(root, opt))                                 \
+               btrfs_info(root->fs_info, fmt, ##args);                 \
+       btrfs_set_opt(root->fs_info->mount_opt, opt);                   \
+}
+
+#define btrfs_clear_and_info(root, opt, fmt, args...)                  \
+{                                                                      \
+       if (btrfs_test_opt(root, opt))                                  \
+               btrfs_info(root->fs_info, fmt, ##args);                 \
+       btrfs_clear_opt(root->fs_info->mount_opt, opt);                 \
+}
+
  /*
   * Regular mount options parser.  Everything that is needed only when
   * reading in a new superblock is parsed here.
@@ -383,6 +412,7 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
         int ret = 0;
         char *compress_type;
         bool compress_force = false;
+       bool compress = false;
  
         cache_gen = btrfs_super_cache_generation(root->fs_info->super_copy);
         if (cache_gen)
@@ -409,7 +439,7 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
                 token = match_token(p, tokens, args);
                 switch (token) {
                 case Opt_degraded:
-                       printk(KERN_INFO "btrfs: allowing degraded mounts\n");
+                       btrfs_info(root->fs_info, "allowing degraded mounts");
                         btrfs_set_opt(info->mount_opt, DEGRADED);
                         break;
                 case Opt_subvol:
@@ -422,27 +452,45 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
                          */
                         break;
                 case Opt_nodatasum:
-                       printk(KERN_INFO "btrfs: setting nodatasum\n");
-                       btrfs_set_opt(info->mount_opt, NODATASUM);
+                       btrfs_set_and_info(root, NODATASUM,
+                                          "setting nodatasum");
+                       break;
+               case Opt_datasum:
+                       if (btrfs_test_opt(root, NODATASUM)) {
+                               if (btrfs_test_opt(root, NODATACOW))
+                                       btrfs_info(root->fs_info, "setting datasum, datacow enabled");
+                               else
+                                       btrfs_info(root->fs_info, "setting datasum");
+                       }
+                       btrfs_clear_opt(info->mount_opt, NODATACOW);
+                       btrfs_clear_opt(info->mount_opt, NODATASUM);
                         break;
                 case Opt_nodatacow:
-                       if (!btrfs_test_opt(root, COMPRESS) ||
-                               !btrfs_test_opt(root, FORCE_COMPRESS)) {
-                                       printk(KERN_INFO "btrfs: setting nodatacow, compression disabled\n");
-                       } else {
-                               printk(KERN_INFO "btrfs: setting nodatacow\n");
+                       if (!btrfs_test_opt(root, NODATACOW)) {
+                               if (!btrfs_test_opt(root, COMPRESS) ||
+                                   !btrfs_test_opt(root, FORCE_COMPRESS)) {
+                                       btrfs_info(root->fs_info,
+                                                  "setting nodatacow, compression disabled");
+                               } else {
+                                       btrfs_info(root->fs_info, "setting nodatacow");
+                               }
                         }
                         btrfs_clear_opt(info->mount_opt, COMPRESS);
                         btrfs_clear_opt(info->mount_opt, FORCE_COMPRESS);
                         btrfs_set_opt(info->mount_opt, NODATACOW);
                         btrfs_set_opt(info->mount_opt, NODATASUM);
                         break;
+               case Opt_datacow:
+                       btrfs_clear_and_info(root, NODATACOW,
+                                            "setting datacow");
+                       break;
                 case Opt_compress_force:
                 case Opt_compress_force_type:
                         compress_force = true;
                         /* Fallthrough */
                 case Opt_compress:
                 case Opt_compress_type:
+                       compress = true;
                         if (token == Opt_compress ||
                             token == Opt_compress_force ||
                             strcmp(args[0].from, "zlib") == 0) {
@@ -469,34 +517,36 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
                         }
  
                         if (compress_force) {
-                               btrfs_set_opt(info->mount_opt, FORCE_COMPRESS);
-                               pr_info("btrfs: force %s compression\n",
-                                       compress_type);
-                       } else if (btrfs_test_opt(root, COMPRESS)) {
-                               pr_info("btrfs: use %s compression\n",
-                                       compress_type);
+                               btrfs_set_and_info(root, FORCE_COMPRESS,
+                                                  "force %s compression",
+                                                  compress_type);
+                       } else if (compress) {
+                               if (!btrfs_test_opt(root, COMPRESS))
+                                       btrfs_info(root->fs_info,
+                                                  "btrfs: use %s compression\n",
+                                                  compress_type);
                         }
                         break;
                 case Opt_ssd:
-                       printk(KERN_INFO "btrfs: use ssd allocation scheme\n");
-                       btrfs_set_opt(info->mount_opt, SSD);
+                       btrfs_set_and_info(root, SSD,
+                                          "use ssd allocation scheme");
                         break;
                 case Opt_ssd_spread:
-                       printk(KERN_INFO "btrfs: use spread ssd "
-                              "allocation scheme\n");
-                       btrfs_set_opt(info->mount_opt, SSD);
-                       btrfs_set_opt(info->mount_opt, SSD_SPREAD);
+                       btrfs_set_and_info(root, SSD_SPREAD,
+                                          "use spread ssd allocation scheme");
                         break;
                 case Opt_nossd:
-                       printk(KERN_INFO "btrfs: not using ssd allocation "
-                              "scheme\n");
-                       btrfs_set_opt(info->mount_opt, NOSSD);
+                       btrfs_clear_and_info(root, NOSSD,
+                                            "not using ssd allocation scheme");
                         btrfs_clear_opt(info->mount_opt, SSD);
-                       btrfs_clear_opt(info->mount_opt, SSD_SPREAD);
+                       break;
+               case Opt_barrier:
+                       btrfs_clear_and_info(root, NOBARRIER,
+                                            "turning on barriers");
                         break;
                 case Opt_nobarrier:
-                       printk(KERN_INFO "btrfs: turning off barriers\n");
-                       btrfs_set_opt(info->mount_opt, NOBARRIER);
+                       btrfs_set_and_info(root, NOBARRIER,
+                                          "turning off barriers");
                         break;
                 case Opt_thread_pool:
                         ret = match_int(&args[0], &intarg);
@@ -520,7 +570,7 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
                                                 info->max_inline,
                                                 root->sectorsize);
                                 }
-                               printk(KERN_INFO "btrfs: max_inline at %llu\n",
+                               btrfs_info(root->fs_info, "max_inline at %llu",
                                         info->max_inline);
                         } else {
                                 ret = -ENOMEM;
@@ -534,24 +584,34 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
                                 info->alloc_start = memparse(num, NULL);
                                 mutex_unlock(&info->chunk_mutex);
                                 kfree(num);
-                               printk(KERN_INFO
-                                       "btrfs: allocations start at %llu\n",
+                               btrfs_info(root->fs_info, "allocations start at %llu",
                                         info->alloc_start);
                         } else {
                                 ret = -ENOMEM;
                                 goto out;
                         }
                         break;
+               case Opt_acl:
+                       root->fs_info->sb->s_flags |= MS_POSIXACL;
+                       break;
                 case Opt_noacl:
                         root->fs_info->sb->s_flags &= ~MS_POSIXACL;
                         break;
                 case Opt_notreelog:
-                       printk(KERN_INFO "btrfs: disabling tree log\n");
-                       btrfs_set_opt(info->mount_opt, NOTREELOG);
+                       btrfs_set_and_info(root, NOTREELOG,
+                                          "disabling tree log");
+                       break;
+               case Opt_treelog:
+                       btrfs_clear_and_info(root, NOTREELOG,
+                                            "enabling tree log");
                         break;
                 case Opt_flushoncommit:
-                       printk(KERN_INFO "btrfs: turning on flush-on-commit\n");
-                       btrfs_set_opt(info->mount_opt, FLUSHONCOMMIT);
+                       btrfs_set_and_info(root, FLUSHONCOMMIT,
+                                          "turning on flush-on-commit");
+                       break;
+               case Opt_noflushoncommit:
+                       btrfs_clear_and_info(root, FLUSHONCOMMIT,
+                                            "turning off flush-on-commit");
                         break;
                 case Opt_ratio:
                         ret = match_int(&args[0], &intarg);
@@ -559,7 +619,7 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
                                 goto out;
                         } else if (intarg >= 0) {
                                 info->metadata_ratio = intarg;
-                               printk(KERN_INFO "btrfs: metadata ratio %d\n",
+                               btrfs_info(root->fs_info, "metadata ratio %d",
                                        info->metadata_ratio);
                         } else {
                                 ret = -EINVAL;
@@ -567,25 +627,35 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
                         }
                         break;
                 case Opt_discard:
-                       btrfs_set_opt(info->mount_opt, DISCARD);
+                       btrfs_set_and_info(root, DISCARD,
+                                          "turning on discard");
+                       break;
+               case Opt_nodiscard:
+                       btrfs_clear_and_info(root, DISCARD,
+                                            "turning off discard");
                         break;
                 case Opt_space_cache:
-                       btrfs_set_opt(info->mount_opt, SPACE_CACHE);
+                       btrfs_set_and_info(root, SPACE_CACHE,
+                                          "enabling disk space caching");
                         break;
                 case Opt_rescan_uuid_tree:
                         btrfs_set_opt(info->mount_opt, RESCAN_UUID_TREE);
                         break;
                 case Opt_no_space_cache:
-                       printk(KERN_INFO "btrfs: disabling disk space caching\n");
-                       btrfs_clear_opt(info->mount_opt, SPACE_CACHE);
+                       btrfs_clear_and_info(root, SPACE_CACHE,
+                                            "disabling disk space caching");
                         break;
                 case Opt_inode_cache:
-                       printk(KERN_INFO "btrfs: enabling inode map caching\n");
-                       btrfs_set_opt(info->mount_opt, INODE_MAP_CACHE);
+                       btrfs_set_and_info(root, CHANGE_INODE_CACHE,
+                                          "enabling inode map caching");
+                       break;
+               case Opt_noinode_cache:
+                       btrfs_clear_and_info(root, CHANGE_INODE_CACHE,
+                                            "disabling inode map caching");
                         break;
                 case Opt_clear_cache:
-                       printk(KERN_INFO "btrfs: force clearing of disk cache\n");
-                       btrfs_set_opt(info->mount_opt, CLEAR_CACHE);
+                       btrfs_set_and_info(root, CLEAR_CACHE,
+                                          "force clearing of disk cache");
                         break;
                 case Opt_user_subvol_rm_allowed:
                         btrfs_set_opt(info->mount_opt, USER_SUBVOL_RM_ALLOWED);
@@ -593,12 +663,19 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
                 case Opt_enospc_debug:
                         btrfs_set_opt(info->mount_opt, ENOSPC_DEBUG);
                         break;
+               case Opt_noenospc_debug:
+                       btrfs_clear_opt(info->mount_opt, ENOSPC_DEBUG);
+                       break;
                 case Opt_defrag:
-                       printk(KERN_INFO "btrfs: enabling auto defrag\n");
-                       btrfs_set_opt(info->mount_opt, AUTO_DEFRAG);
+                       btrfs_set_and_info(root, AUTO_DEFRAG,
+                                          "enabling auto defrag");
+                       break;
+               case Opt_nodefrag:
+                       btrfs_clear_and_info(root, AUTO_DEFRAG,
+                                            "disabling auto defrag");
                         break;
                 case Opt_recovery:
-                       printk(KERN_INFO "btrfs: enabling auto recovery\n");
+                       btrfs_info(root->fs_info, "enabling auto recovery");
                         btrfs_set_opt(info->mount_opt, RECOVERY);
                         break;
                 case Opt_skip_balance:
@@ -606,14 +683,14 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
                         break;
  #ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY
                 case Opt_check_integrity_including_extent_data:
-                       printk(KERN_INFO "btrfs: enabling check integrity"
-                              " including extent data\n");
+                       btrfs_info(root->fs_info,
+                                  "enabling check integrity including extent data");
                         btrfs_set_opt(info->mount_opt,
                                       CHECK_INTEGRITY_INCLUDING_EXTENT_DATA);
                         btrfs_set_opt(info->mount_opt, CHECK_INTEGRITY);
                         break;
                 case Opt_check_integrity:
-                       printk(KERN_INFO "btrfs: enabling check integrity\n");
+                       btrfs_info(root->fs_info, "enabling check integrity");
                         btrfs_set_opt(info->mount_opt, CHECK_INTEGRITY);
                         break;
                 case Opt_check_integrity_print_mask:
@@ -622,8 +699,7 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
                                 goto out;
                         } else if (intarg >= 0) {
                                 info->check_integrity_print_mask = intarg;
-                               printk(KERN_INFO "btrfs:"
-                                      " check_integrity_print_mask 0x%x\n",
+                               btrfs_info(root->fs_info, "check_integrity_print_mask 0x%x",
                                        info->check_integrity_print_mask);
                         } else {
                                 ret = -EINVAL;
@@ -634,8 +710,8 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
                 case Opt_check_integrity_including_extent_data:
                 case Opt_check_integrity:
                 case Opt_check_integrity_print_mask:
-                       printk(KERN_ERR "btrfs: support for check_integrity*"
-                              " not compiled in!\n");
+                       btrfs_err(root->fs_info,
+                               "support for check_integrity* not compiled in!");
                         ret = -EINVAL;
                         goto out;
  #endif
@@ -655,28 +731,24 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
                         intarg = 0;
                         ret = match_int(&args[0], &intarg);
                         if (ret < 0) {
-                               printk(KERN_ERR
-                                       "btrfs: invalid commit interval\n");
+                               btrfs_err(root->fs_info, "invalid commit interval");
                                 ret = -EINVAL;
                                 goto out;
                         }
                         if (intarg > 0) {
                                 if (intarg > 300) {
-                                       printk(KERN_WARNING
-                                           "btrfs: excessive commit interval %d\n",
+                                       btrfs_warn(root->fs_info, "excessive commit interval %d",
                                                         intarg);
                                 }
                                 info->commit_interval = intarg;
                         } else {
-                               printk(KERN_INFO
-                                   "btrfs: using default commit interval %ds\n",
+                               btrfs_info(root->fs_info, "using default commit interval %ds",
                                     BTRFS_DEFAULT_COMMIT_INTERVAL);
                                 info->commit_interval = BTRFS_DEFAULT_COMMIT_INTERVAL;
                         }
                         break;
                 case Opt_err:
-                       printk(KERN_INFO "btrfs: unrecognized mount option "
-                              "'%s'\n", p);
+                       btrfs_info(root->fs_info, "unrecognized mount option '%s'", p);
                         ret = -EINVAL;
                         goto out;
                 default:
@@ -685,7 +757,7 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
         }
  out:
         if (!ret && btrfs_test_opt(root, SPACE_CACHE))
-               printk(KERN_INFO "btrfs: disk space caching is enabled\n");
+               btrfs_info(root->fs_info, "disk space caching is enabled");
         kfree(orig);
         return ret;
  }
@@ -748,7 +820,8 @@ static int btrfs_parse_early_options(const char *options, fmode_t flags,
                         break;
                 case Opt_subvolrootid:
                         printk(KERN_WARNING
-                               "btrfs: 'subvolrootid' mount option is deprecated and has no effect\n");
+                               "BTRFS: 'subvolrootid' mount option is deprecated and has "
+                               "no effect\n");
                         break;
                 case Opt_device:
                         device_name = match_strdup(&args[0]);
@@ -877,7 +950,7 @@ static int btrfs_fill_super(struct super_block *sb,
         sb->s_flags |= MS_I_VERSION;
         err = open_ctree(sb, fs_devices, (char *)data);
         if (err) {
-               printk("btrfs: open_ctree failed\n");
+               printk(KERN_ERR "BTRFS: open_ctree failed\n");
                 return err;
         }
  
@@ -1115,7 +1188,7 @@ static struct dentry *mount_subvol(const char *subvol_name, int flags,
                 dput(root);
                 root = ERR_PTR(-EINVAL);
                 deactivate_locked_super(s);
-               printk(KERN_ERR "btrfs: '%s' is not a valid subvolume\n",
+               printk(KERN_ERR "BTRFS: '%s' is not a valid subvolume\n",
                                 subvol_name);
         }
  
@@ -1240,7 +1313,7 @@ static void btrfs_resize_thread_pool(struct btrfs_fs_info *fs_info,
  
         fs_info->thread_pool_size = new_pool_size;
  
-       printk(KERN_INFO "btrfs: resize thread pool %d -> %d\n",
+       btrfs_info(fs_info, "resize thread pool %d -> %d",
                old_pool_size, new_pool_size);
  
         btrfs_set_max_workers(&fs_info->generic_worker, new_pool_size);
@@ -1346,7 +1419,7 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data)
         } else {
                 if (test_bit(BTRFS_FS_STATE_ERROR, &root->fs_info->fs_state)) {
                         btrfs_err(fs_info,
-                               "Remounting read-write after error is not allowed\n");
+                               "Remounting read-write after error is not allowed");
                         ret = -EINVAL;
                         goto restore;
                 }
@@ -1358,8 +1431,8 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data)
                 if (fs_info->fs_devices->missing_devices >
                      fs_info->num_tolerated_disk_barrier_failures &&
                     !(*flags & MS_RDONLY)) {
-                       printk(KERN_WARNING
-                              "Btrfs: too many missing devices, writeable remount is not allowed\n");
+                       btrfs_warn(fs_info,
+                               "too many missing devices, writeable remount is not allowed");
                         ret = -EACCES;
                         goto restore;
                 }
@@ -1384,16 +1457,15 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data)
  
                 ret = btrfs_resume_dev_replace_async(fs_info);
                 if (ret) {
-                       pr_warn("btrfs: failed to resume dev_replace\n");
+                       btrfs_warn(fs_info, "failed to resume dev_replace");
                         goto restore;
                 }
  
                 if (!fs_info->uuid_root) {
-                       pr_info("btrfs: creating UUID tree\n");
+                       btrfs_info(fs_info, "creating UUID tree");
                         ret = btrfs_create_uuid_tree(fs_info);
                         if (ret) {
-                               pr_warn("btrfs: failed to create the uuid tree"
-                                       "%d\n", ret);
+                               btrfs_warn(fs_info, "failed to create the UUID tree %d", ret);
                                 goto restore;
                         }
                 }
@@ -1773,7 +1845,7 @@ static int btrfs_interface_init(void)
  static void btrfs_interface_exit(void)
  {
         if (misc_deregister(&btrfs_misc) < 0)
-               printk(KERN_INFO "btrfs: misc_deregister failed for control device\n");
+               printk(KERN_INFO "BTRFS: misc_deregister failed for control device\n");
  }
  
  static void btrfs_print_info(void)
@@ -1818,10 +1890,16 @@ static int __init init_btrfs_fs(void)
  {
         int err;
  
-       err = btrfs_init_sysfs();
+       err = btrfs_hash_init();
         if (err)
                 return err;
  
+       btrfs_props_init();
+
+       err = btrfs_init_sysfs();
+       if (err)
+               goto free_hash;
+
         btrfs_init_compress();
  
         err = btrfs_init_cachep();
@@ -1895,6 +1973,8 @@ free_cachep:
  free_compress:
         btrfs_exit_compress();
         btrfs_exit_sysfs();
+free_hash:
+       btrfs_hash_exit();
         return err;
  }
  
@@ -1913,6 +1993,7 @@ static void __exit exit_btrfs_fs(void)
         btrfs_exit_sysfs();
         btrfs_cleanup_fs_uuids();
         btrfs_exit_compress();
+       btrfs_hash_exit();
  }
  
  module_init(init_btrfs_fs)
diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c

index 5b326cd60a4aa3c048eefe730312248d73cf47ba..782374d8fd1970ee9d6b4742fc2e213dc4d2e637 100644 (file)
--- a/fs/btrfs/sysfs.c
+++ b/fs/btrfs/sysfs.c
@@ -22,24 +22,641 @@
  #include <linux/completion.h>
  #include <linux/buffer_head.h>
  #include <linux/kobject.h>
+#include <linux/bug.h>
+#include <linux/genhd.h>
  
  #include "ctree.h"
  #include "disk-io.h"
  #include "transaction.h"
+#include "sysfs.h"
+#include "volumes.h"
+
+static inline struct btrfs_fs_info *to_fs_info(struct kobject *kobj);
+
+static u64 get_features(struct btrfs_fs_info *fs_info,
+                       enum btrfs_feature_set set)
+{
+       struct btrfs_super_block *disk_super = fs_info->super_copy;
+       if (set == FEAT_COMPAT)
+               return btrfs_super_compat_flags(disk_super);
+       else if (set == FEAT_COMPAT_RO)
+               return btrfs_super_compat_ro_flags(disk_super);
+       else
+               return btrfs_super_incompat_flags(disk_super);
+}
+
+static void set_features(struct btrfs_fs_info *fs_info,
+                        enum btrfs_feature_set set, u64 features)
+{
+       struct btrfs_super_block *disk_super = fs_info->super_copy;
+       if (set == FEAT_COMPAT)
+               btrfs_set_super_compat_flags(disk_super, features);
+       else if (set == FEAT_COMPAT_RO)
+               btrfs_set_super_compat_ro_flags(disk_super, features);
+       else
+               btrfs_set_super_incompat_flags(disk_super, features);
+}
+
+static int can_modify_feature(struct btrfs_feature_attr *fa)
+{
+       int val = 0;
+       u64 set, clear;
+       switch (fa->feature_set) {
+       case FEAT_COMPAT:
+               set = BTRFS_FEATURE_COMPAT_SAFE_SET;
+               clear = BTRFS_FEATURE_COMPAT_SAFE_CLEAR;
+               break;
+       case FEAT_COMPAT_RO:
+               set = BTRFS_FEATURE_COMPAT_RO_SAFE_SET;
+               clear = BTRFS_FEATURE_COMPAT_RO_SAFE_CLEAR;
+               break;
+       case FEAT_INCOMPAT:
+               set = BTRFS_FEATURE_INCOMPAT_SAFE_SET;
+               clear = BTRFS_FEATURE_INCOMPAT_SAFE_CLEAR;
+               break;
+       default:
+               printk(KERN_WARNING "btrfs: sysfs: unknown feature set %d\n",
+                               fa->feature_set);
+               return 0;
+       }
+
+       if (set & fa->feature_bit)
+               val |= 1;
+       if (clear & fa->feature_bit)
+               val |= 2;
+
+       return val;
+}
+
+static ssize_t btrfs_feature_attr_show(struct kobject *kobj,
+                                      struct kobj_attribute *a, char *buf)
+{
+       int val = 0;
+       struct btrfs_fs_info *fs_info = to_fs_info(kobj);
+       struct btrfs_feature_attr *fa = to_btrfs_feature_attr(a);
+       if (fs_info) {
+               u64 features = get_features(fs_info, fa->feature_set);
+               if (features & fa->feature_bit)
+                       val = 1;
+       } else
+               val = can_modify_feature(fa);
+
+       return snprintf(buf, PAGE_SIZE, "%d\n", val);
+}
+
+static ssize_t btrfs_feature_attr_store(struct kobject *kobj,
+                                       struct kobj_attribute *a,
+                                       const char *buf, size_t count)
+{
+       struct btrfs_fs_info *fs_info;
+       struct btrfs_feature_attr *fa = to_btrfs_feature_attr(a);
+       struct btrfs_trans_handle *trans;
+       u64 features, set, clear;
+       unsigned long val;
+       int ret;
+
+       fs_info = to_fs_info(kobj);
+       if (!fs_info)
+               return -EPERM;
+
+       ret = kstrtoul(skip_spaces(buf), 0, &val);
+       if (ret)
+               return ret;
+
+       if (fa->feature_set == FEAT_COMPAT) {
+               set = BTRFS_FEATURE_COMPAT_SAFE_SET;
+               clear = BTRFS_FEATURE_COMPAT_SAFE_CLEAR;
+       } else if (fa->feature_set == FEAT_COMPAT_RO) {
+               set = BTRFS_FEATURE_COMPAT_RO_SAFE_SET;
+               clear = BTRFS_FEATURE_COMPAT_RO_SAFE_CLEAR;
+       } else {
+               set = BTRFS_FEATURE_INCOMPAT_SAFE_SET;
+               clear = BTRFS_FEATURE_INCOMPAT_SAFE_CLEAR;
+       }
+
+       features = get_features(fs_info, fa->feature_set);
+
+       /* Nothing to do */
+       if ((val && (features & fa->feature_bit)) ||
+           (!val && !(features & fa->feature_bit)))
+               return count;
+
+       if ((val && !(set & fa->feature_bit)) ||
+           (!val && !(clear & fa->feature_bit))) {
+               btrfs_info(fs_info,
+                       "%sabling feature %s on mounted fs is not supported.",
+                       val ? "En" : "Dis", fa->kobj_attr.attr.name);
+               return -EPERM;
+       }
+
+       btrfs_info(fs_info, "%s %s feature flag",
+                  val ? "Setting" : "Clearing", fa->kobj_attr.attr.name);
+
+       trans = btrfs_start_transaction(fs_info->fs_root, 0);
+       if (IS_ERR(trans))
+               return PTR_ERR(trans);
+
+       spin_lock(&fs_info->super_lock);
+       features = get_features(fs_info, fa->feature_set);
+       if (val)
+               features |= fa->feature_bit;
+       else
+               features &= ~fa->feature_bit;
+       set_features(fs_info, fa->feature_set, features);
+       spin_unlock(&fs_info->super_lock);
+
+       ret = btrfs_commit_transaction(trans, fs_info->fs_root);
+       if (ret)
+               return ret;
+
+       return count;
+}
+
+static umode_t btrfs_feature_visible(struct kobject *kobj,
+                                    struct attribute *attr, int unused)
+{
+       struct btrfs_fs_info *fs_info = to_fs_info(kobj);
+       umode_t mode = attr->mode;
+
+       if (fs_info) {
+               struct btrfs_feature_attr *fa;
+               u64 features;
+
+               fa = attr_to_btrfs_feature_attr(attr);
+               features = get_features(fs_info, fa->feature_set);
+
+               if (can_modify_feature(fa))
+                       mode |= S_IWUSR;
+               else if (!(features & fa->feature_bit))
+                       mode = 0;
+       }
+
+       return mode;
+}
+
+BTRFS_FEAT_ATTR_INCOMPAT(mixed_backref, MIXED_BACKREF);
+BTRFS_FEAT_ATTR_INCOMPAT(default_subvol, DEFAULT_SUBVOL);
+BTRFS_FEAT_ATTR_INCOMPAT(mixed_groups, MIXED_GROUPS);
+BTRFS_FEAT_ATTR_INCOMPAT(compress_lzo, COMPRESS_LZO);
+BTRFS_FEAT_ATTR_INCOMPAT(big_metadata, BIG_METADATA);
+BTRFS_FEAT_ATTR_INCOMPAT(extended_iref, EXTENDED_IREF);
+BTRFS_FEAT_ATTR_INCOMPAT(raid56, RAID56);
+BTRFS_FEAT_ATTR_INCOMPAT(skinny_metadata, SKINNY_METADATA);
+BTRFS_FEAT_ATTR_INCOMPAT(no_holes, NO_HOLES);
+
+static struct attribute *btrfs_supported_feature_attrs[] = {
+       BTRFS_FEAT_ATTR_PTR(mixed_backref),
+       BTRFS_FEAT_ATTR_PTR(default_subvol),
+       BTRFS_FEAT_ATTR_PTR(mixed_groups),
+       BTRFS_FEAT_ATTR_PTR(compress_lzo),
+       BTRFS_FEAT_ATTR_PTR(big_metadata),
+       BTRFS_FEAT_ATTR_PTR(extended_iref),
+       BTRFS_FEAT_ATTR_PTR(raid56),
+       BTRFS_FEAT_ATTR_PTR(skinny_metadata),
+       BTRFS_FEAT_ATTR_PTR(no_holes),
+       NULL
+};
+
+static const struct attribute_group btrfs_feature_attr_group = {
+       .name = "features",
+       .is_visible = btrfs_feature_visible,
+       .attrs = btrfs_supported_feature_attrs,
+};
+
+static ssize_t btrfs_show_u64(u64 *value_ptr, spinlock_t *lock, char *buf)
+{
+       u64 val;
+       if (lock)
+               spin_lock(lock);
+       val = *value_ptr;
+       if (lock)
+               spin_unlock(lock);
+       return snprintf(buf, PAGE_SIZE, "%llu\n", val);
+}
+
+static ssize_t global_rsv_size_show(struct kobject *kobj,
+                                   struct kobj_attribute *ka, char *buf)
+{
+       struct btrfs_fs_info *fs_info = to_fs_info(kobj->parent);
+       struct btrfs_block_rsv *block_rsv = &fs_info->global_block_rsv;
+       return btrfs_show_u64(&block_rsv->size, &block_rsv->lock, buf);
+}
+BTRFS_ATTR(global_rsv_size, 0444, global_rsv_size_show);
+
+static ssize_t global_rsv_reserved_show(struct kobject *kobj,
+                                       struct kobj_attribute *a, char *buf)
+{
+       struct btrfs_fs_info *fs_info = to_fs_info(kobj->parent);
+       struct btrfs_block_rsv *block_rsv = &fs_info->global_block_rsv;
+       return btrfs_show_u64(&block_rsv->reserved, &block_rsv->lock, buf);
+}
+BTRFS_ATTR(global_rsv_reserved, 0444, global_rsv_reserved_show);
+
+#define to_space_info(_kobj) container_of(_kobj, struct btrfs_space_info, kobj)
+
+static ssize_t raid_bytes_show(struct kobject *kobj,
+                              struct kobj_attribute *attr, char *buf);
+BTRFS_RAID_ATTR(total_bytes, raid_bytes_show);
+BTRFS_RAID_ATTR(used_bytes, raid_bytes_show);
+
+static ssize_t raid_bytes_show(struct kobject *kobj,
+                              struct kobj_attribute *attr, char *buf)
+
+{
+       struct btrfs_space_info *sinfo = to_space_info(kobj->parent);
+       struct btrfs_block_group_cache *block_group;
+       int index = kobj - sinfo->block_group_kobjs;
+       u64 val = 0;
+
+       down_read(&sinfo->groups_sem);
+       list_for_each_entry(block_group, &sinfo->block_groups[index], list) {
+               if (&attr->attr == BTRFS_RAID_ATTR_PTR(total_bytes))
+                       val += block_group->key.offset;
+               else
+                       val += btrfs_block_group_used(&block_group->item);
+       }
+       up_read(&sinfo->groups_sem);
+       return snprintf(buf, PAGE_SIZE, "%llu\n", val);
+}
+
+static struct attribute *raid_attributes[] = {
+       BTRFS_RAID_ATTR_PTR(total_bytes),
+       BTRFS_RAID_ATTR_PTR(used_bytes),
+       NULL
+};
+
+static void release_raid_kobj(struct kobject *kobj)
+{
+       kobject_put(kobj->parent);
+}
+
+struct kobj_type btrfs_raid_ktype = {
+       .sysfs_ops = &kobj_sysfs_ops,
+       .release = release_raid_kobj,
+       .default_attrs = raid_attributes,
+};
+
+#define SPACE_INFO_ATTR(field)                                         \
+static ssize_t btrfs_space_info_show_##field(struct kobject *kobj,     \
+                                            struct kobj_attribute *a,  \
+                                            char *buf)                 \
+{                                                                      \
+       struct btrfs_space_info *sinfo = to_space_info(kobj);           \
+       return btrfs_show_u64(&sinfo->field, &sinfo->lock, buf);        \
+}                                                                      \
+BTRFS_ATTR(field, 0444, btrfs_space_info_show_##field)
+
+static ssize_t btrfs_space_info_show_total_bytes_pinned(struct kobject *kobj,
+                                                      struct kobj_attribute *a,
+                                                      char *buf)
+{
+       struct btrfs_space_info *sinfo = to_space_info(kobj);
+       s64 val = percpu_counter_sum(&sinfo->total_bytes_pinned);
+       return snprintf(buf, PAGE_SIZE, "%lld\n", val);
+}
+
+SPACE_INFO_ATTR(flags);
+SPACE_INFO_ATTR(total_bytes);
+SPACE_INFO_ATTR(bytes_used);
+SPACE_INFO_ATTR(bytes_pinned);
+SPACE_INFO_ATTR(bytes_reserved);
+SPACE_INFO_ATTR(bytes_may_use);
+SPACE_INFO_ATTR(disk_used);
+SPACE_INFO_ATTR(disk_total);
+BTRFS_ATTR(total_bytes_pinned, 0444, btrfs_space_info_show_total_bytes_pinned);
+
+static struct attribute *space_info_attrs[] = {
+       BTRFS_ATTR_PTR(flags),
+       BTRFS_ATTR_PTR(total_bytes),
+       BTRFS_ATTR_PTR(bytes_used),
+       BTRFS_ATTR_PTR(bytes_pinned),
+       BTRFS_ATTR_PTR(bytes_reserved),
+       BTRFS_ATTR_PTR(bytes_may_use),
+       BTRFS_ATTR_PTR(disk_used),
+       BTRFS_ATTR_PTR(disk_total),
+       BTRFS_ATTR_PTR(total_bytes_pinned),
+       NULL,
+};
+
+static void space_info_release(struct kobject *kobj)
+{
+       struct btrfs_space_info *sinfo = to_space_info(kobj);
+       percpu_counter_destroy(&sinfo->total_bytes_pinned);
+       kfree(sinfo);
+}
+
+struct kobj_type space_info_ktype = {
+       .sysfs_ops = &kobj_sysfs_ops,
+       .release = space_info_release,
+       .default_attrs = space_info_attrs,
+};
+
+static const struct attribute *allocation_attrs[] = {
+       BTRFS_ATTR_PTR(global_rsv_reserved),
+       BTRFS_ATTR_PTR(global_rsv_size),
+       NULL,
+};
+
+static ssize_t btrfs_label_show(struct kobject *kobj,
+                               struct kobj_attribute *a, char *buf)
+{
+       struct btrfs_fs_info *fs_info = to_fs_info(kobj);
+       return snprintf(buf, PAGE_SIZE, "%s\n", fs_info->super_copy->label);
+}
+
+static ssize_t btrfs_label_store(struct kobject *kobj,
+                                struct kobj_attribute *a,
+                                const char *buf, size_t len)
+{
+       struct btrfs_fs_info *fs_info = to_fs_info(kobj);
+       struct btrfs_trans_handle *trans;
+       struct btrfs_root *root = fs_info->fs_root;
+       int ret;
+
+       if (len >= BTRFS_LABEL_SIZE) {
+               pr_err("BTRFS: unable to set label with more than %d bytes\n",
+                      BTRFS_LABEL_SIZE - 1);
+               return -EINVAL;
+       }
+
+       trans = btrfs_start_transaction(root, 0);
+       if (IS_ERR(trans))
+               return PTR_ERR(trans);
+
+       spin_lock(&root->fs_info->super_lock);
+       strcpy(fs_info->super_copy->label, buf);
+       spin_unlock(&root->fs_info->super_lock);
+       ret = btrfs_commit_transaction(trans, root);
+
+       if (!ret)
+               return len;
+
+       return ret;
+}
+BTRFS_ATTR_RW(label, 0644, btrfs_label_show, btrfs_label_store);
+
+static struct attribute *btrfs_attrs[] = {
+       BTRFS_ATTR_PTR(label),
+       NULL,
+};
+
+static void btrfs_release_super_kobj(struct kobject *kobj)
+{
+       struct btrfs_fs_info *fs_info = to_fs_info(kobj);
+       complete(&fs_info->kobj_unregister);
+}
+
+static struct kobj_type btrfs_ktype = {
+       .sysfs_ops      = &kobj_sysfs_ops,
+       .release        = btrfs_release_super_kobj,
+       .default_attrs  = btrfs_attrs,
+};
+
+static inline struct btrfs_fs_info *to_fs_info(struct kobject *kobj)
+{
+       if (kobj->ktype != &btrfs_ktype)
+               return NULL;
+       return container_of(kobj, struct btrfs_fs_info, super_kobj);
+}
+
+#define NUM_FEATURE_BITS 64
+static char btrfs_unknown_feature_names[3][NUM_FEATURE_BITS][13];
+static struct btrfs_feature_attr btrfs_feature_attrs[3][NUM_FEATURE_BITS];
+
+static u64 supported_feature_masks[3] = {
+       [FEAT_COMPAT]    = BTRFS_FEATURE_COMPAT_SUPP,
+       [FEAT_COMPAT_RO] = BTRFS_FEATURE_COMPAT_RO_SUPP,
+       [FEAT_INCOMPAT]  = BTRFS_FEATURE_INCOMPAT_SUPP,
+};
+
+static int addrm_unknown_feature_attrs(struct btrfs_fs_info *fs_info, bool add)
+{
+       int set;
+
+       for (set = 0; set < FEAT_MAX; set++) {
+               int i;
+               struct attribute *attrs[2];
+               struct attribute_group agroup = {
+                       .name = "features",
+                       .attrs = attrs,
+               };
+               u64 features = get_features(fs_info, set);
+               features &= ~supported_feature_masks[set];
+
+               if (!features)
+                       continue;
+
+               attrs[1] = NULL;
+               for (i = 0; i < NUM_FEATURE_BITS; i++) {
+                       struct btrfs_feature_attr *fa;
+
+                       if (!(features & (1ULL << i)))
+                               continue;
+
+                       fa = &btrfs_feature_attrs[set][i];
+                       attrs[0] = &fa->kobj_attr.attr;
+                       if (add) {
+                               int ret;
+                               ret = sysfs_merge_group(&fs_info->super_kobj,
+                                                       &agroup);
+                               if (ret)
+                                       return ret;
+                       } else
+                               sysfs_unmerge_group(&fs_info->super_kobj,
+                                                   &agroup);
+               }
+
+       }
+       return 0;
+}
+
+static void __btrfs_sysfs_remove_one(struct btrfs_fs_info *fs_info)
+{
+       kobject_del(&fs_info->super_kobj);
+       kobject_put(&fs_info->super_kobj);
+       wait_for_completion(&fs_info->kobj_unregister);
+}
+
+void btrfs_sysfs_remove_one(struct btrfs_fs_info *fs_info)
+{
+       if (fs_info->space_info_kobj) {
+               sysfs_remove_files(fs_info->space_info_kobj, allocation_attrs);
+               kobject_del(fs_info->space_info_kobj);
+               kobject_put(fs_info->space_info_kobj);
+       }
+       kobject_del(fs_info->device_dir_kobj);
+       kobject_put(fs_info->device_dir_kobj);
+       addrm_unknown_feature_attrs(fs_info, false);
+       sysfs_remove_group(&fs_info->super_kobj, &btrfs_feature_attr_group);
+       __btrfs_sysfs_remove_one(fs_info);
+}
+
+const char * const btrfs_feature_set_names[3] = {
+       [FEAT_COMPAT]    = "compat",
+       [FEAT_COMPAT_RO] = "compat_ro",
+       [FEAT_INCOMPAT]  = "incompat",
+};
+
+char *btrfs_printable_features(enum btrfs_feature_set set, u64 flags)
+{
+       size_t bufsize = 4096; /* safe max, 64 names * 64 bytes */
+       int len = 0;
+       int i;
+       char *str;
+
+       str = kmalloc(bufsize, GFP_KERNEL);
+       if (!str)
+               return str;
+
+       for (i = 0; i < ARRAY_SIZE(btrfs_feature_attrs[set]); i++) {
+               const char *name;
+
+               if (!(flags & (1ULL << i)))
+                       continue;
+
+               name = btrfs_feature_attrs[set][i].kobj_attr.attr.name;
+               len += snprintf(str + len, bufsize - len, "%s%s",
+                               len ? "," : "", name);
+       }
+
+       return str;
+}
+
+static void init_feature_attrs(void)
+{
+       struct btrfs_feature_attr *fa;
+       int set, i;
+
+       BUILD_BUG_ON(ARRAY_SIZE(btrfs_unknown_feature_names) !=
+                    ARRAY_SIZE(btrfs_feature_attrs));
+       BUILD_BUG_ON(ARRAY_SIZE(btrfs_unknown_feature_names[0]) !=
+                    ARRAY_SIZE(btrfs_feature_attrs[0]));
+
+       memset(btrfs_feature_attrs, 0, sizeof(btrfs_feature_attrs));
+       memset(btrfs_unknown_feature_names, 0,
+              sizeof(btrfs_unknown_feature_names));
+
+       for (i = 0; btrfs_supported_feature_attrs[i]; i++) {
+               struct btrfs_feature_attr *sfa;
+               struct attribute *a = btrfs_supported_feature_attrs[i];
+               int bit;
+               sfa = attr_to_btrfs_feature_attr(a);
+               bit = ilog2(sfa->feature_bit);
+               fa = &btrfs_feature_attrs[sfa->feature_set][bit];
+
+               fa->kobj_attr.attr.name = sfa->kobj_attr.attr.name;
+       }
+
+       for (set = 0; set < FEAT_MAX; set++) {
+               for (i = 0; i < ARRAY_SIZE(btrfs_feature_attrs[set]); i++) {
+                       char *name = btrfs_unknown_feature_names[set][i];
+                       fa = &btrfs_feature_attrs[set][i];
+
+                       if (fa->kobj_attr.attr.name)
+                               continue;
+
+                       snprintf(name, 13, "%s:%u",
+                                btrfs_feature_set_names[set], i);
+
+                       fa->kobj_attr.attr.name = name;
+                       fa->kobj_attr.attr.mode = S_IRUGO;
+                       fa->feature_set = set;
+                       fa->feature_bit = 1ULL << i;
+               }
+       }
+}
+
+static int add_device_membership(struct btrfs_fs_info *fs_info)
+{
+       int error = 0;
+       struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
+       struct btrfs_device *dev;
+
+       fs_info->device_dir_kobj = kobject_create_and_add("devices",
+                                               &fs_info->super_kobj);
+       if (!fs_info->device_dir_kobj)
+               return -ENOMEM;
+
+       list_for_each_entry(dev, &fs_devices->devices, dev_list) {
+               struct hd_struct *disk = dev->bdev->bd_part;
+               struct kobject *disk_kobj = &part_to_dev(disk)->kobj;
+
+               error = sysfs_create_link(fs_info->device_dir_kobj,
+                                         disk_kobj, disk_kobj->name);
+               if (error)
+                       break;
+       }
+
+       return error;
+}
  
  /* /sys/fs/btrfs/ entry */
  static struct kset *btrfs_kset;
  
+int btrfs_sysfs_add_one(struct btrfs_fs_info *fs_info)
+{
+       int error;
+
+       init_completion(&fs_info->kobj_unregister);
+       fs_info->super_kobj.kset = btrfs_kset;
+       error = kobject_init_and_add(&fs_info->super_kobj, &btrfs_ktype, NULL,
+                                    "%pU", fs_info->fsid);
+       if (error)
+               return error;
+
+       error = sysfs_create_group(&fs_info->super_kobj,
+                                  &btrfs_feature_attr_group);
+       if (error) {
+               __btrfs_sysfs_remove_one(fs_info);
+               return error;
+       }
+
+       error = addrm_unknown_feature_attrs(fs_info, true);
+       if (error)
+               goto failure;
+
+       error = add_device_membership(fs_info);
+       if (error)
+               goto failure;
+
+       fs_info->space_info_kobj = kobject_create_and_add("allocation",
+                                                 &fs_info->super_kobj);
+       if (!fs_info->space_info_kobj) {
+               error = -ENOMEM;
+               goto failure;
+       }
+
+       error = sysfs_create_files(fs_info->space_info_kobj, allocation_attrs);
+       if (error)
+               goto failure;
+
+       return 0;
+failure:
+       btrfs_sysfs_remove_one(fs_info);
+       return error;
+}
+
  int btrfs_init_sysfs(void)
  {
+       int ret;
         btrfs_kset = kset_create_and_add("btrfs", NULL, fs_kobj);
         if (!btrfs_kset)
                 return -ENOMEM;
+
+       init_feature_attrs();
+
+       ret = sysfs_create_group(&btrfs_kset->kobj, &btrfs_feature_attr_group);
+       if (ret) {
+               kset_unregister(btrfs_kset);
+               return ret;
+       }
+
         return 0;
  }
  
  void btrfs_exit_sysfs(void)
  {
+       sysfs_remove_group(&btrfs_kset->kobj, &btrfs_feature_attr_group);
         kset_unregister(btrfs_kset);
  }
  
diff --git a/fs/btrfs/sysfs.h b/fs/btrfs/sysfs.h

new file mode 100644 (file)

index 0000000..f3cea37
--- /dev/null
+++ b/fs/btrfs/sysfs.h
@@ -0,0 +1,64 @@
+#ifndef _BTRFS_SYSFS_H_
+#define _BTRFS_SYSFS_H_
+
+enum btrfs_feature_set {
+       FEAT_COMPAT,
+       FEAT_COMPAT_RO,
+       FEAT_INCOMPAT,
+       FEAT_MAX
+};
+
+#define __INIT_KOBJ_ATTR(_name, _mode, _show, _store)                  \
+{                                                                      \
+       .attr   = { .name = __stringify(_name), .mode = _mode },        \
+       .show   = _show,                                                \
+       .store  = _store,                                               \
+}
+
+#define BTRFS_ATTR_RW(_name, _mode, _show, _store)                     \
+static struct kobj_attribute btrfs_attr_##_name =                      \
+                       __INIT_KOBJ_ATTR(_name, _mode, _show, _store)
+#define BTRFS_ATTR(_name, _mode, _show)                                        \
+       BTRFS_ATTR_RW(_name, _mode, _show, NULL)
+#define BTRFS_ATTR_PTR(_name)    (&btrfs_attr_##_name.attr)
+
+#define BTRFS_RAID_ATTR(_name, _show)                                  \
+static struct kobj_attribute btrfs_raid_attr_##_name =                 \
+                       __INIT_KOBJ_ATTR(_name, 0444, _show, NULL)
+#define BTRFS_RAID_ATTR_PTR(_name)    (&btrfs_raid_attr_##_name.attr)
+
+
+struct btrfs_feature_attr {
+       struct kobj_attribute kobj_attr;
+       enum btrfs_feature_set feature_set;
+       u64 feature_bit;
+};
+
+#define BTRFS_FEAT_ATTR(_name, _feature_set, _prefix, _feature_bit)         \
+static struct btrfs_feature_attr btrfs_attr_##_name = {                             \
+       .kobj_attr = __INIT_KOBJ_ATTR(_name, S_IRUGO,                        \
+                                     btrfs_feature_attr_show,               \
+                                     btrfs_feature_attr_store),             \
+       .feature_set    = _feature_set,                                      \
+       .feature_bit    = _prefix ##_## _feature_bit,                        \
+}
+#define BTRFS_FEAT_ATTR_PTR(_name)    (&btrfs_attr_##_name.kobj_attr.attr)
+
+#define BTRFS_FEAT_ATTR_COMPAT(name, feature) \
+       BTRFS_FEAT_ATTR(name, FEAT_COMPAT, BTRFS_FEATURE_COMPAT, feature)
+#define BTRFS_FEAT_ATTR_COMPAT_RO(name, feature) \
+       BTRFS_FEAT_ATTR(name, FEAT_COMPAT_RO, BTRFS_FEATURE_COMPAT, feature)
+#define BTRFS_FEAT_ATTR_INCOMPAT(name, feature) \
+       BTRFS_FEAT_ATTR(name, FEAT_INCOMPAT, BTRFS_FEATURE_INCOMPAT, feature)
+
+/* convert from attribute */
+#define to_btrfs_feature_attr(a) \
+                       container_of(a, struct btrfs_feature_attr, kobj_attr)
+#define attr_to_btrfs_attr(a) container_of(a, struct kobj_attribute, attr)
+#define attr_to_btrfs_feature_attr(a) \
+                       to_btrfs_feature_attr(attr_to_btrfs_attr(a))
+char *btrfs_printable_features(enum btrfs_feature_set set, u64 flags);
+extern const char * const btrfs_feature_set_names[3];
+extern struct kobj_type space_info_ktype;
+extern struct kobj_type btrfs_raid_ktype;
+#endif /* _BTRFS_SYSFS_H_ */
diff --git a/fs/btrfs/tests/btrfs-tests.h b/fs/btrfs/tests/btrfs-tests.h

index b353bc806ca066be17600283ccc5d1a181af3426..312560a9123dedc69542b33c4931b71901125d12 100644 (file)
--- a/fs/btrfs/tests/btrfs-tests.h
+++ b/fs/btrfs/tests/btrfs-tests.h
@@ -21,7 +21,7 @@
  
  #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
  
-#define test_msg(fmt, ...) pr_info("btrfs: selftest: " fmt, ##__VA_ARGS__)
+#define test_msg(fmt, ...) pr_info("BTRFS: selftest: " fmt, ##__VA_ARGS__)
  
  int btrfs_test_free_space_cache(void);
  int btrfs_test_extent_buffer_operations(void);
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c

index c6a872a8a46862948e93c343cdd0c7479caf3883..34cd83184c4ad2ff7ce85bb13fea48dfb61198b4 100644 (file)
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -62,7 +62,7 @@ void btrfs_put_transaction(struct btrfs_transaction *transaction)
         WARN_ON(atomic_read(&transaction->use_count) == 0);
         if (atomic_dec_and_test(&transaction->use_count)) {
                 BUG_ON(!list_empty(&transaction->list));
-               WARN_ON(transaction->delayed_refs.root.rb_node);
+               WARN_ON(!RB_EMPTY_ROOT(&transaction->delayed_refs.href_root));
                 while (!list_empty(&transaction->pending_chunks)) {
                         struct extent_map *em;
  
@@ -183,8 +183,8 @@ loop:
         atomic_set(&cur_trans->use_count, 2);
         cur_trans->start_time = get_seconds();
  
-       cur_trans->delayed_refs.root = RB_ROOT;
-       cur_trans->delayed_refs.num_entries = 0;
+       cur_trans->delayed_refs.href_root = RB_ROOT;
+       atomic_set(&cur_trans->delayed_refs.num_entries, 0);
         cur_trans->delayed_refs.num_heads_ready = 0;
         cur_trans->delayed_refs.num_heads = 0;
         cur_trans->delayed_refs.flushing = 0;
@@ -196,17 +196,14 @@ loop:
          */
         smp_mb();
         if (!list_empty(&fs_info->tree_mod_seq_list))
-               WARN(1, KERN_ERR "btrfs: tree_mod_seq_list not empty when "
+               WARN(1, KERN_ERR "BTRFS: tree_mod_seq_list not empty when "
                         "creating a fresh transaction\n");
         if (!RB_EMPTY_ROOT(&fs_info->tree_mod_log))
-               WARN(1, KERN_ERR "btrfs: tree_mod_log rb tree not empty when "
+               WARN(1, KERN_ERR "BTRFS: tree_mod_log rb tree not empty when "
                         "creating a fresh transaction\n");
         atomic64_set(&fs_info->tree_mod_seq, 0);
  
         spin_lock_init(&cur_trans->delayed_refs.lock);
-       atomic_set(&cur_trans->delayed_refs.procs_running_refs, 0);
-       atomic_set(&cur_trans->delayed_refs.ref_seq, 0);
-       init_waitqueue_head(&cur_trans->delayed_refs.wait);
  
         INIT_LIST_HEAD(&cur_trans->pending_snapshots);
         INIT_LIST_HEAD(&cur_trans->ordered_operations);
@@ -472,6 +469,7 @@ again:
         h->type = type;
         h->allocating_chunk = false;
         h->reloc_reserved = false;
+       h->sync = false;
         INIT_LIST_HEAD(&h->qgroup_ref_list);
         INIT_LIST_HEAD(&h->new_bgs);
  
@@ -647,7 +645,7 @@ static int should_end_transaction(struct btrfs_trans_handle *trans,
                                   struct btrfs_root *root)
  {
         if (root->fs_info->global_block_rsv.space_info->full &&
-           btrfs_should_throttle_delayed_refs(trans, root))
+           btrfs_check_space_for_delayed_refs(trans, root))
                 return 1;
  
         return !!btrfs_block_rsv_check(root, &root->fs_info->global_block_rsv, 5);
@@ -711,8 +709,8 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
                 btrfs_create_pending_block_groups(trans, root);
  
         trans->delayed_ref_updates = 0;
-       if (btrfs_should_throttle_delayed_refs(trans, root)) {
-               cur = max_t(unsigned long, cur, 1);
+       if (!trans->sync && btrfs_should_throttle_delayed_refs(trans, root)) {
+               cur = max_t(unsigned long, cur, 32);
                 trans->delayed_ref_updates = 0;
                 btrfs_run_delayed_refs(trans, root, cur);
         }
@@ -788,12 +786,6 @@ int btrfs_end_transaction_throttle(struct btrfs_trans_handle *trans,
         return __btrfs_end_transaction(trans, root, 1);
  }
  
-int btrfs_end_transaction_dmeta(struct btrfs_trans_handle *trans,
-                               struct btrfs_root *root)
-{
-       return __btrfs_end_transaction(trans, root, 1);
-}
-
  /*
   * when btree blocks are allocated, they have some corresponding bits set for
   * them in one of two extent_io trees.  This is used to make sure all of
@@ -1105,7 +1097,7 @@ int btrfs_defrag_root(struct btrfs_root *root)
                         break;
  
                 if (btrfs_defrag_cancelled(root->fs_info)) {
-                       printk(KERN_DEBUG "btrfs: defrag_root cancelled\n");
+                       pr_debug("BTRFS: defrag_root cancelled\n");
                         ret = -EAGAIN;
                         break;
                 }
@@ -1746,6 +1738,8 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
                 goto cleanup_transaction;
  
         btrfs_wait_delalloc_flush(root->fs_info);
+
+       btrfs_scrub_pause(root);
         /*
          * Ok now we need to make sure to block out any other joins while we
          * commit the transaction.  We could have started a join before setting
@@ -1810,7 +1804,6 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
  
         WARN_ON(cur_trans != trans->transaction);
  
-       btrfs_scrub_pause(root);
         /* btrfs_commit_tree_roots is responsible for getting the
          * various roots consistent with each other.  Every pointer
          * in the tree of tree roots has to point to the most up to date
@@ -1833,6 +1826,15 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
                 goto cleanup_transaction;
         }
  
+       /*
+        * Since the transaction is done, we should set the inode map cache flag
+        * before any other comming transaction.
+        */
+       if (btrfs_test_opt(root, CHANGE_INODE_CACHE))
+               btrfs_set_opt(root->fs_info->mount_opt, INODE_MAP_CACHE);
+       else
+               btrfs_clear_opt(root->fs_info->mount_opt, INODE_MAP_CACHE);
+
         /* commit_fs_roots gets rid of all the tree log roots, it is now
          * safe to free the root of tree log roots
          */
@@ -1975,10 +1977,23 @@ int btrfs_clean_one_deleted_snapshot(struct btrfs_root *root)
         }
         root = list_first_entry(&fs_info->dead_roots,
                         struct btrfs_root, root_list);
+       /*
+        * Make sure root is not involved in send,
+        * if we fail with first root, we return
+        * directly rather than continue.
+        */
+       spin_lock(&root->root_item_lock);
+       if (root->send_in_progress) {
+               spin_unlock(&fs_info->trans_lock);
+               spin_unlock(&root->root_item_lock);
+               return 0;
+       }
+       spin_unlock(&root->root_item_lock);
+
         list_del_init(&root->root_list);
         spin_unlock(&fs_info->trans_lock);
  
-       pr_debug("btrfs: cleaner removing %llu\n", root->objectid);
+       pr_debug("BTRFS: cleaner removing %llu\n", root->objectid);
  
         btrfs_kill_all_delayed_nodes(root);
  
diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h

index 7657d115067d3f8ae1d5cc39a53410d8a1011673..6ac037e9f9f0557524b7ed29b6b72a768f7a23cf 100644 (file)
--- a/fs/btrfs/transaction.h
+++ b/fs/btrfs/transaction.h
@@ -93,6 +93,7 @@ struct btrfs_trans_handle {
         short adding_csums;
         bool allocating_chunk;
         bool reloc_reserved;
+       bool sync;
         unsigned int type;
         /*
          * this root is only needed to validate that the root passed to
@@ -154,8 +155,6 @@ int btrfs_commit_transaction_async(struct btrfs_trans_handle *trans,
                                    int wait_for_unblock);
  int btrfs_end_transaction_throttle(struct btrfs_trans_handle *trans,
                                    struct btrfs_root *root);
-int btrfs_end_transaction_dmeta(struct btrfs_trans_handle *trans,
-                               struct btrfs_root *root);
  int btrfs_should_end_transaction(struct btrfs_trans_handle *trans,
                                  struct btrfs_root *root);
  void btrfs_throttle(struct btrfs_root *root);
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c

index 9f7fc51ca334864b72336e127d786047dfb1f5de..39d83da03e0398db90428e52b98524982da469b5 100644 (file)
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -570,7 +570,7 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans,
                 if (btrfs_file_extent_disk_bytenr(eb, item) == 0)
                         nbytes = 0;
         } else if (found_type == BTRFS_FILE_EXTENT_INLINE) {
-               size = btrfs_file_extent_inline_len(eb, item);
+               size = btrfs_file_extent_inline_len(eb, slot, item);
                 nbytes = btrfs_file_extent_ram_bytes(eb, item);
                 extent_end = ALIGN(start + size, root->sectorsize);
         } else {
@@ -1238,7 +1238,8 @@ static int insert_orphan_item(struct btrfs_trans_handle *trans,
                               struct btrfs_root *root, u64 offset)
  {
         int ret;
-       ret = btrfs_find_orphan_item(root, offset);
+       ret = btrfs_find_item(root, NULL, BTRFS_ORPHAN_OBJECTID,
+                       offset, BTRFS_ORPHAN_ITEM_KEY, NULL);
         if (ret > 0)
                 ret = btrfs_insert_orphan_item(trans, root, offset);
         return ret;
@@ -3194,7 +3195,7 @@ static int log_inode_item(struct btrfs_trans_handle *trans,
  static noinline int copy_items(struct btrfs_trans_handle *trans,
                                struct inode *inode,
                                struct btrfs_path *dst_path,
-                              struct extent_buffer *src,
+                              struct btrfs_path *src_path, u64 *last_extent,
                                int start_slot, int nr, int inode_only)
  {
         unsigned long src_offset;
@@ -3202,6 +3203,8 @@ static noinline int copy_items(struct btrfs_trans_handle *trans,
         struct btrfs_root *log = BTRFS_I(inode)->root->log_root;
         struct btrfs_file_extent_item *extent;
         struct btrfs_inode_item *inode_item;
+       struct extent_buffer *src = src_path->nodes[0];
+       struct btrfs_key first_key, last_key, key;
         int ret;
         struct btrfs_key *ins_keys;
         u32 *ins_sizes;
@@ -3209,6 +3212,9 @@ static noinline int copy_items(struct btrfs_trans_handle *trans,
         int i;
         struct list_head ordered_sums;
         int skip_csum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM;
+       bool has_extents = false;
+       bool need_find_last_extent = (*last_extent == 0);
+       bool done = false;
  
         INIT_LIST_HEAD(&ordered_sums);
  
@@ -3217,6 +3223,8 @@ static noinline int copy_items(struct btrfs_trans_handle *trans,
         if (!ins_data)
                 return -ENOMEM;
  
+       first_key.objectid = (u64)-1;
+
         ins_sizes = (u32 *)ins_data;
         ins_keys = (struct btrfs_key *)(ins_data + nr * sizeof(u32));
  
@@ -3237,6 +3245,9 @@ static noinline int copy_items(struct btrfs_trans_handle *trans,
  
                 src_offset = btrfs_item_ptr_offset(src, start_slot + i);
  
+               if ((i == (nr - 1)))
+                       last_key = ins_keys[i];
+
                 if (ins_keys[i].type == BTRFS_INODE_ITEM_KEY) {
                         inode_item = btrfs_item_ptr(dst_path->nodes[0],
                                                     dst_path->slots[0],
@@ -3248,6 +3259,21 @@ static noinline int copy_items(struct btrfs_trans_handle *trans,
                                            src_offset, ins_sizes[i]);
                 }
  
+               /*
+                * We set need_find_last_extent here in case we know we were
+                * processing other items and then walk into the first extent in
+                * the inode.  If we don't hit an extent then nothing changes,
+                * we'll do the last search the next time around.
+                */
+               if (ins_keys[i].type == BTRFS_EXTENT_DATA_KEY) {
+                       has_extents = true;
+                       if (need_find_last_extent &&
+                           first_key.objectid == (u64)-1)
+                               first_key = ins_keys[i];
+               } else {
+                       need_find_last_extent = false;
+               }
+
                 /* take a reference on file data extents so that truncates
                  * or deletes of this inode don't have to relog the inode
                  * again
@@ -3312,6 +3338,128 @@ static noinline int copy_items(struct btrfs_trans_handle *trans,
                 list_del(&sums->list);
                 kfree(sums);
         }
+
+       if (!has_extents)
+               return ret;
+
+       /*
+        * Because we use btrfs_search_forward we could skip leaves that were
+        * not modified and then assume *last_extent is valid when it really
+        * isn't.  So back up to the previous leaf and read the end of the last
+        * extent before we go and fill in holes.
+        */
+       if (need_find_last_extent) {
+               u64 len;
+
+               ret = btrfs_prev_leaf(BTRFS_I(inode)->root, src_path);
+               if (ret < 0)
+                       return ret;
+               if (ret)
+                       goto fill_holes;
+               if (src_path->slots[0])
+                       src_path->slots[0]--;
+               src = src_path->nodes[0];
+               btrfs_item_key_to_cpu(src, &key, src_path->slots[0]);
+               if (key.objectid != btrfs_ino(inode) ||
+                   key.type != BTRFS_EXTENT_DATA_KEY)
+                       goto fill_holes;
+               extent = btrfs_item_ptr(src, src_path->slots[0],
+                                       struct btrfs_file_extent_item);
+               if (btrfs_file_extent_type(src, extent) ==
+                   BTRFS_FILE_EXTENT_INLINE) {
+                       len = btrfs_file_extent_inline_len(src,
+                                                          src_path->slots[0],
+                                                          extent);
+                       *last_extent = ALIGN(key.offset + len,
+                                            log->sectorsize);
+               } else {
+                       len = btrfs_file_extent_num_bytes(src, extent);
+                       *last_extent = key.offset + len;
+               }
+       }
+fill_holes:
+       /* So we did prev_leaf, now we need to move to the next leaf, but a few
+        * things could have happened
+        *
+        * 1) A merge could have happened, so we could currently be on a leaf
+        * that holds what we were copying in the first place.
+        * 2) A split could have happened, and now not all of the items we want
+        * are on the same leaf.
+        *
+        * So we need to adjust how we search for holes, we need to drop the
+        * path and re-search for the first extent key we found, and then walk
+        * forward until we hit the last one we copied.
+        */
+       if (need_find_last_extent) {
+               /* btrfs_prev_leaf could return 1 without releasing the path */
+               btrfs_release_path(src_path);
+               ret = btrfs_search_slot(NULL, BTRFS_I(inode)->root, &first_key,
+                                       src_path, 0, 0);
+               if (ret < 0)
+                       return ret;
+               ASSERT(ret == 0);
+               src = src_path->nodes[0];
+               i = src_path->slots[0];
+       } else {
+               i = start_slot;
+       }
+
+       /*
+        * Ok so here we need to go through and fill in any holes we may have
+        * to make sure that holes are punched for those areas in case they had
+        * extents previously.
+        */
+       while (!done) {
+               u64 offset, len;
+               u64 extent_end;
+
+               if (i >= btrfs_header_nritems(src_path->nodes[0])) {
+                       ret = btrfs_next_leaf(BTRFS_I(inode)->root, src_path);
+                       if (ret < 0)
+                               return ret;
+                       ASSERT(ret == 0);
+                       src = src_path->nodes[0];
+                       i = 0;
+               }
+
+               btrfs_item_key_to_cpu(src, &key, i);
+               if (!btrfs_comp_cpu_keys(&key, &last_key))
+                       done = true;
+               if (key.objectid != btrfs_ino(inode) ||
+                   key.type != BTRFS_EXTENT_DATA_KEY) {
+                       i++;
+                       continue;
+               }
+               extent = btrfs_item_ptr(src, i, struct btrfs_file_extent_item);
+               if (btrfs_file_extent_type(src, extent) ==
+                   BTRFS_FILE_EXTENT_INLINE) {
+                       len = btrfs_file_extent_inline_len(src, i, extent);
+                       extent_end = ALIGN(key.offset + len, log->sectorsize);
+               } else {
+                       len = btrfs_file_extent_num_bytes(src, extent);
+                       extent_end = key.offset + len;
+               }
+               i++;
+
+               if (*last_extent == key.offset) {
+                       *last_extent = extent_end;
+                       continue;
+               }
+               offset = *last_extent;
+               len = key.offset - *last_extent;
+               ret = btrfs_insert_file_extent(trans, log, btrfs_ino(inode),
+                                              offset, 0, 0, len, 0, len, 0,
+                                              0, 0);
+               if (ret)
+                       break;
+               *last_extent = offset + len;
+       }
+       /*
+        * Need to let the callers know we dropped the path so they should
+        * re-search.
+        */
+       if (!ret && need_find_last_extent)
+               ret = 1;
         return ret;
  }
  
@@ -3349,21 +3497,27 @@ static int log_one_extent(struct btrfs_trans_handle *trans,
         int ret;
         int index = log->log_transid % 2;
         bool skip_csum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM;
-
-       ret = __btrfs_drop_extents(trans, log, inode, path, em->start,
-                                  em->start + em->len, NULL, 0);
-       if (ret)
-               return ret;
+       int extent_inserted = 0;
  
         INIT_LIST_HEAD(&ordered_sums);
         btrfs_init_map_token(&token);
-       key.objectid = btrfs_ino(inode);
-       key.type = BTRFS_EXTENT_DATA_KEY;
-       key.offset = em->start;
  
-       ret = btrfs_insert_empty_item(trans, log, path, &key, sizeof(*fi));
+       ret = __btrfs_drop_extents(trans, log, inode, path, em->start,
+                                  em->start + em->len, NULL, 0, 1,
+                                  sizeof(*fi), &extent_inserted);
         if (ret)
                 return ret;
+
+       if (!extent_inserted) {
+               key.objectid = btrfs_ino(inode);
+               key.type = BTRFS_EXTENT_DATA_KEY;
+               key.offset = em->start;
+
+               ret = btrfs_insert_empty_item(trans, log, path, &key,
+                                             sizeof(*fi));
+               if (ret)
+                       return ret;
+       }
         leaf = path->nodes[0];
         fi = btrfs_item_ptr(leaf, path->slots[0],
                             struct btrfs_file_extent_item);
@@ -3485,7 +3639,11 @@ again:
                  * start over after this.
                  */
  
-               wait_event(ordered->wait, ordered->csum_bytes_left == 0);
+               if (ordered->csum_bytes_left) {
+                       btrfs_start_ordered_extent(inode, ordered, 0);
+                       wait_event(ordered->wait,
+                                  ordered->csum_bytes_left == 0);
+               }
  
                 list_for_each_entry(sum, &ordered->list, list) {
                         ret = btrfs_csum_file_blocks(trans, log, sum);
@@ -3630,6 +3788,7 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
         struct btrfs_key max_key;
         struct btrfs_root *log = root->log_root;
         struct extent_buffer *src = NULL;
+       u64 last_extent = 0;
         int err = 0;
         int ret;
         int nritems;
@@ -3745,11 +3904,15 @@ again:
                         goto next_slot;
                 }
  
-               ret = copy_items(trans, inode, dst_path, src, ins_start_slot,
-                                ins_nr, inode_only);
-               if (ret) {
+               ret = copy_items(trans, inode, dst_path, path, &last_extent,
+                                ins_start_slot, ins_nr, inode_only);
+               if (ret < 0) {
                         err = ret;
                         goto out_unlock;
+               } if (ret) {
+                       ins_nr = 0;
+                       btrfs_release_path(path);
+                       continue;
                 }
                 ins_nr = 1;
                 ins_start_slot = path->slots[0];
@@ -3763,13 +3926,14 @@ next_slot:
                         goto again;
                 }
                 if (ins_nr) {
-                       ret = copy_items(trans, inode, dst_path, src,
-                                        ins_start_slot,
+                       ret = copy_items(trans, inode, dst_path, path,
+                                        &last_extent, ins_start_slot,
                                          ins_nr, inode_only);
-                       if (ret) {
+                       if (ret < 0) {
                                 err = ret;
                                 goto out_unlock;
                         }
+                       ret = 0;
                         ins_nr = 0;
                 }
                 btrfs_release_path(path);
@@ -3784,12 +3948,13 @@ next_slot:
                 }
         }
         if (ins_nr) {
-               ret = copy_items(trans, inode, dst_path, src, ins_start_slot,
-                                ins_nr, inode_only);
-               if (ret) {
+               ret = copy_items(trans, inode, dst_path, path, &last_extent,
+                                ins_start_slot, ins_nr, inode_only);
+               if (ret < 0) {
                         err = ret;
                         goto out_unlock;
                 }
+               ret = 0;
                 ins_nr = 0;
         }
  
diff --git a/fs/btrfs/ulist.c b/fs/btrfs/ulist.c

index b0a523b2c60ee8e73cd5165382892918ddad269e..840a38b2778a6e97ee013f0d2333e179c3c475e3 100644 (file)
--- a/fs/btrfs/ulist.c
+++ b/fs/btrfs/ulist.c
@@ -5,8 +5,8 @@
   */
  
  #include <linux/slab.h>
-#include <linux/export.h>
  #include "ulist.h"
+#include "ctree.h"
  
  /*
   * ulist is a generic data structure to hold a collection of unique u64
@@ -14,10 +14,6 @@
   * enumerating it.
   * It is possible to store an auxiliary value along with the key.
   *
- * The implementation is preliminary and can probably be sped up
- * significantly. A first step would be to store the values in an rbtree
- * as soon as ULIST_SIZE is exceeded.
- *
   * A sample usage for ulists is the enumeration of directed graphs without
   * visiting a node twice. The pseudo-code could look like this:
   *
@@ -50,12 +46,10 @@
   */
  void ulist_init(struct ulist *ulist)
  {
-       ulist->nnodes = 0;
-       ulist->nodes = ulist->int_nodes;
-       ulist->nodes_alloced = ULIST_SIZE;
+       INIT_LIST_HEAD(&ulist->nodes);
         ulist->root = RB_ROOT;
+       ulist->nnodes = 0;
  }
-EXPORT_SYMBOL(ulist_init);
  
  /**
   * ulist_fini - free up additionally allocated memory for the ulist
@@ -64,18 +58,17 @@ EXPORT_SYMBOL(ulist_init);
   * This is useful in cases where the base 'struct ulist' has been statically
   * allocated.
   */
-void ulist_fini(struct ulist *ulist)
+static void ulist_fini(struct ulist *ulist)
  {
-       /*
-        * The first ULIST_SIZE elements are stored inline in struct ulist.
-        * Only if more elements are alocated they need to be freed.
-        */
-       if (ulist->nodes_alloced > ULIST_SIZE)
-               kfree(ulist->nodes);
-       ulist->nodes_alloced = 0;       /* in case ulist_fini is called twice */
+       struct ulist_node *node;
+       struct ulist_node *next;
+
+       list_for_each_entry_safe(node, next, &ulist->nodes, list) {
+               kfree(node);
+       }
         ulist->root = RB_ROOT;
+       INIT_LIST_HEAD(&ulist->nodes);
  }
-EXPORT_SYMBOL(ulist_fini);
  
  /**
   * ulist_reinit - prepare a ulist for reuse
@@ -89,7 +82,6 @@ void ulist_reinit(struct ulist *ulist)
         ulist_fini(ulist);
         ulist_init(ulist);
  }
-EXPORT_SYMBOL(ulist_reinit);
  
  /**
   * ulist_alloc - dynamically allocate a ulist
@@ -108,7 +100,6 @@ struct ulist *ulist_alloc(gfp_t gfp_mask)
  
         return ulist;
  }
-EXPORT_SYMBOL(ulist_alloc);
  
  /**
   * ulist_free - free dynamically allocated ulist
@@ -123,7 +114,6 @@ void ulist_free(struct ulist *ulist)
         ulist_fini(ulist);
         kfree(ulist);
  }
-EXPORT_SYMBOL(ulist_free);
  
  static struct ulist_node *ulist_rbtree_search(struct ulist *ulist, u64 val)
  {
@@ -192,63 +182,32 @@ int ulist_add(struct ulist *ulist, u64 val, u64 aux, gfp_t gfp_mask)
  int ulist_add_merge(struct ulist *ulist, u64 val, u64 aux,
                     u64 *old_aux, gfp_t gfp_mask)
  {
-       int ret = 0;
-       struct ulist_node *node = NULL;
+       int ret;
+       struct ulist_node *node;
+
         node = ulist_rbtree_search(ulist, val);
         if (node) {
                 if (old_aux)
                         *old_aux = node->aux;
                 return 0;
         }
+       node = kmalloc(sizeof(*node), gfp_mask);
+       if (!node)
+               return -ENOMEM;
  
-       if (ulist->nnodes >= ulist->nodes_alloced) {
-               u64 new_alloced = ulist->nodes_alloced + 128;
-               struct ulist_node *new_nodes;
-               void *old = NULL;
-               int i;
-
-               for (i = 0; i < ulist->nnodes; i++)
-                       rb_erase(&ulist->nodes[i].rb_node, &ulist->root);
-
-               /*
-                * if nodes_alloced == ULIST_SIZE no memory has been allocated
-                * yet, so pass NULL to krealloc
-                */
-               if (ulist->nodes_alloced > ULIST_SIZE)
-                       old = ulist->nodes;
+       node->val = val;
+       node->aux = aux;
+#ifdef CONFIG_BTRFS_DEBUG
+       node->seqnum = ulist->nnodes;
+#endif
  
-               new_nodes = krealloc(old, sizeof(*new_nodes) * new_alloced,
-                                    gfp_mask);
-               if (!new_nodes)
-                       return -ENOMEM;
-
-               if (!old)
-                       memcpy(new_nodes, ulist->int_nodes,
-                              sizeof(ulist->int_nodes));
-
-               ulist->nodes = new_nodes;
-               ulist->nodes_alloced = new_alloced;
-
-               /*
-                * krealloc actually uses memcpy, which does not copy rb_node
-                * pointers, so we have to do it ourselves.  Otherwise we may
-                * be bitten by crashes.
-                */
-               for (i = 0; i < ulist->nnodes; i++) {
-                       ret = ulist_rbtree_insert(ulist, &ulist->nodes[i]);
-                       if (ret < 0)
-                               return ret;
-               }
-       }
-       ulist->nodes[ulist->nnodes].val = val;
-       ulist->nodes[ulist->nnodes].aux = aux;
-       ret = ulist_rbtree_insert(ulist, &ulist->nodes[ulist->nnodes]);
-       BUG_ON(ret);
-       ++ulist->nnodes;
+       ret = ulist_rbtree_insert(ulist, node);
+       ASSERT(!ret);
+       list_add_tail(&node->list, &ulist->nodes);
+       ulist->nnodes++;
  
         return 1;
  }
-EXPORT_SYMBOL(ulist_add);
  
  /**
   * ulist_next - iterate ulist
@@ -268,11 +227,25 @@ EXPORT_SYMBOL(ulist_add);
   */
  struct ulist_node *ulist_next(struct ulist *ulist, struct ulist_iterator *uiter)
  {
-       if (ulist->nnodes == 0)
+       struct ulist_node *node;
+
+       if (list_empty(&ulist->nodes))
                 return NULL;
-       if (uiter->i < 0 || uiter->i >= ulist->nnodes)
+       if (uiter->cur_list && uiter->cur_list->next == &ulist->nodes)
                 return NULL;
-
-       return &ulist->nodes[uiter->i++];
+       if (uiter->cur_list) {
+               uiter->cur_list = uiter->cur_list->next;
+       } else {
+               uiter->cur_list = ulist->nodes.next;
+#ifdef CONFIG_BTRFS_DEBUG
+               uiter->i = 0;
+#endif
+       }
+       node = list_entry(uiter->cur_list, struct ulist_node, list);
+#ifdef CONFIG_BTRFS_DEBUG
+       ASSERT(node->seqnum == uiter->i);
+       ASSERT(uiter->i >= 0 && uiter->i < ulist->nnodes);
+       uiter->i++;
+#endif
+       return node;
  }
-EXPORT_SYMBOL(ulist_next);
diff --git a/fs/btrfs/ulist.h b/fs/btrfs/ulist.h

index fb36731074b5fa6ed5e27be3cca08caf329631ee..7f78cbf5cf413636e7b6072efca05110dce8b211 100644 (file)
--- a/fs/btrfs/ulist.h
+++ b/fs/btrfs/ulist.h
@@ -17,18 +17,12 @@
   * enumerating it.
   * It is possible to store an auxiliary value along with the key.
   *
- * The implementation is preliminary and can probably be sped up
- * significantly. A first step would be to store the values in an rbtree
- * as soon as ULIST_SIZE is exceeded.
   */
-
-/*
- * number of elements statically allocated inside struct ulist
- */
-#define ULIST_SIZE 16
-
  struct ulist_iterator {
+#ifdef CONFIG_BTRFS_DEBUG
         int i;
+#endif
+       struct list_head *cur_list;  /* hint to start search */
  };
  
  /*
@@ -37,6 +31,12 @@ struct ulist_iterator {
  struct ulist_node {
         u64 val;                /* value to store */
         u64 aux;                /* auxiliary value saved along with the val */
+
+#ifdef CONFIG_BTRFS_DEBUG
+       int seqnum;             /* sequence number this node is added */
+#endif
+
+       struct list_head list;  /* used to link node */
         struct rb_node rb_node; /* used to speed up search */
  };
  
@@ -46,28 +46,11 @@ struct ulist {
          */
         unsigned long nnodes;
  
-       /*
-        * number of nodes we already have room for
-        */
-       unsigned long nodes_alloced;
-
-       /*
-        * pointer to the array storing the elements. The first ULIST_SIZE
-        * elements are stored inline. In this case the it points to int_nodes.
-        * After exceeding ULIST_SIZE, dynamic memory is allocated.
-        */
-       struct ulist_node *nodes;
-
+       struct list_head nodes;
         struct rb_root root;
-
-       /*
-        * inline storage space for the first ULIST_SIZE entries
-        */
-       struct ulist_node int_nodes[ULIST_SIZE];
  };
  
  void ulist_init(struct ulist *ulist);
-void ulist_fini(struct ulist *ulist);
  void ulist_reinit(struct ulist *ulist);
  struct ulist *ulist_alloc(gfp_t gfp_mask);
  void ulist_free(struct ulist *ulist);
@@ -77,6 +60,6 @@ int ulist_add_merge(struct ulist *ulist, u64 val, u64 aux,
  struct ulist_node *ulist_next(struct ulist *ulist,
                               struct ulist_iterator *uiter);
  
-#define ULIST_ITER_INIT(uiter) ((uiter)->i = 0)
+#define ULIST_ITER_INIT(uiter) ((uiter)->cur_list = NULL)
  
  #endif
diff --git a/fs/btrfs/uuid-tree.c b/fs/btrfs/uuid-tree.c

index fbda90004fe9ef08ab0b93b628485c63ee33d1e0..f6a4c03ee7d8fc1cf3aedc3f2b8edf0007992ff7 100644 (file)
--- a/fs/btrfs/uuid-tree.c
+++ b/fs/btrfs/uuid-tree.c
@@ -69,7 +69,7 @@ static int btrfs_uuid_tree_lookup(struct btrfs_root *uuid_root, u8 *uuid,
         ret = -ENOENT;
  
         if (!IS_ALIGNED(item_size, sizeof(u64))) {
-               pr_warn("btrfs: uuid item with illegal size %lu!\n",
+               btrfs_warn(uuid_root->fs_info, "uuid item with illegal size %lu!",
                         (unsigned long)item_size);
                 goto out;
         }
@@ -137,7 +137,8 @@ int btrfs_uuid_tree_add(struct btrfs_trans_handle *trans,
                 offset = btrfs_item_ptr_offset(eb, slot);
                 offset += btrfs_item_size_nr(eb, slot) - sizeof(subid_le);
         } else if (ret < 0) {
-               pr_warn("btrfs: insert uuid item failed %d (0x%016llx, 0x%016llx) type %u!\n",
+               btrfs_warn(uuid_root->fs_info, "insert uuid item failed %d "
+                       "(0x%016llx, 0x%016llx) type %u!",
                         ret, (unsigned long long)key.objectid,
                         (unsigned long long)key.offset, type);
                 goto out;
@@ -183,7 +184,7 @@ int btrfs_uuid_tree_rem(struct btrfs_trans_handle *trans,
  
         ret = btrfs_search_slot(trans, uuid_root, &key, path, -1, 1);
         if (ret < 0) {
-               pr_warn("btrfs: error %d while searching for uuid item!\n",
+               btrfs_warn(uuid_root->fs_info, "error %d while searching for uuid item!",
                         ret);
                 goto out;
         }
@@ -197,7 +198,7 @@ int btrfs_uuid_tree_rem(struct btrfs_trans_handle *trans,
         offset = btrfs_item_ptr_offset(eb, slot);
         item_size = btrfs_item_size_nr(eb, slot);
         if (!IS_ALIGNED(item_size, sizeof(u64))) {
-               pr_warn("btrfs: uuid item with illegal size %lu!\n",
+               btrfs_warn(uuid_root->fs_info, "uuid item with illegal size %lu!",
                         (unsigned long)item_size);
                 ret = -ENOENT;
                 goto out;
@@ -299,7 +300,7 @@ again_search_slot:
                 offset = btrfs_item_ptr_offset(leaf, slot);
                 item_size = btrfs_item_size_nr(leaf, slot);
                 if (!IS_ALIGNED(item_size, sizeof(u64))) {
-                       pr_warn("btrfs: uuid item with illegal size %lu!\n",
+                       btrfs_warn(fs_info, "uuid item with illegal size %lu!",
                                 (unsigned long)item_size);
                         goto skip;
                 }
@@ -349,6 +350,6 @@ skip:
  out:
         btrfs_free_path(path);
         if (ret)
-               pr_warn("btrfs: btrfs_uuid_tree_iterate failed %d\n", ret);
+               btrfs_warn(fs_info, "btrfs_uuid_tree_iterate failed %d", ret);
         return 0;
  }
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c

index 54d2685a3071f512bc8d8a5c67a60a03f179b29b..bab0b84d8f806adf711b797c0909cb9dd664cc95 100644 (file)
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -125,7 +125,7 @@ static void btrfs_kobject_uevent(struct block_device *bdev,
  
         ret = kobject_uevent(&disk_to_dev(bdev->bd_disk)->kobj, action);
         if (ret)
-               pr_warn("Sending event '%d' to kobject: '%s' (%p): failed\n",
+               pr_warn("BTRFS: Sending event '%d' to kobject: '%s' (%p): failed\n",
                         action,
                         kobject_name(&disk_to_dev(bdev->bd_disk)->kobj),
                         &disk_to_dev(bdev->bd_disk)->kobj);
@@ -200,7 +200,7 @@ btrfs_get_bdev_and_sb(const char *device_path, fmode_t flags, void *holder,
  
         if (IS_ERR(*bdev)) {
                 ret = PTR_ERR(*bdev);
-               printk(KERN_INFO "btrfs: open %s failed\n", device_path);
+               printk(KERN_INFO "BTRFS: open %s failed\n", device_path);
                 goto error;
         }
  
@@ -912,9 +912,9 @@ int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder,
         if (disk_super->label[0]) {
                 if (disk_super->label[BTRFS_LABEL_SIZE - 1])
                         disk_super->label[BTRFS_LABEL_SIZE - 1] = '\0';
-               printk(KERN_INFO "btrfs: device label %s ", disk_super->label);
+               printk(KERN_INFO "BTRFS: device label %s ", disk_super->label);
         } else {
-               printk(KERN_INFO "btrfs: device fsid %pU ", disk_super->fsid);
+               printk(KERN_INFO "BTRFS: device fsid %pU ", disk_super->fsid);
         }
  
         printk(KERN_CONT "devid %llu transid %llu %s\n", devid, transid, path);
@@ -1813,7 +1813,7 @@ int btrfs_find_device_missing_or_by_path(struct btrfs_root *root,
                 }
  
                 if (!*device) {
-                       pr_err("btrfs: no missing device found\n");
+                       btrfs_err(root->fs_info, "no missing device found");
                         return -ENOENT;
                 }
  
@@ -3052,7 +3052,7 @@ loop:
  error:
         btrfs_free_path(path);
         if (enospc_errors) {
-               printk(KERN_INFO "btrfs: %d enospc errors during balance\n",
+               btrfs_info(fs_info, "%d enospc errors during balance",
                        enospc_errors);
                 if (!ret)
                         ret = -ENOSPC;
@@ -3138,8 +3138,8 @@ int btrfs_balance(struct btrfs_balance_control *bctl,
                 if (!(bctl->flags & BTRFS_BALANCE_DATA) ||
                     !(bctl->flags & BTRFS_BALANCE_METADATA) ||
                     memcmp(&bctl->data, &bctl->meta, sizeof(bctl->data))) {
-                       printk(KERN_ERR "btrfs: with mixed groups data and "
-                              "metadata balance options must be the same\n");
+                       btrfs_err(fs_info, "with mixed groups data and "
+                                  "metadata balance options must be the same");
                         ret = -EINVAL;
                         goto out;
                 }
@@ -3165,8 +3165,8 @@ int btrfs_balance(struct btrfs_balance_control *bctl,
         if ((bctl->data.flags & BTRFS_BALANCE_ARGS_CONVERT) &&
             (!alloc_profile_is_valid(bctl->data.target, 1) ||
              (bctl->data.target & ~allowed))) {
-               printk(KERN_ERR "btrfs: unable to start balance with target "
-                      "data profile %llu\n",
+               btrfs_err(fs_info, "unable to start balance with target "
+                          "data profile %llu",
                        bctl->data.target);
                 ret = -EINVAL;
                 goto out;
@@ -3174,8 +3174,8 @@ int btrfs_balance(struct btrfs_balance_control *bctl,
         if ((bctl->meta.flags & BTRFS_BALANCE_ARGS_CONVERT) &&
             (!alloc_profile_is_valid(bctl->meta.target, 1) ||
              (bctl->meta.target & ~allowed))) {
-               printk(KERN_ERR "btrfs: unable to start balance with target "
-                      "metadata profile %llu\n",
+               btrfs_err(fs_info,
+                          "unable to start balance with target metadata profile %llu",
                        bctl->meta.target);
                 ret = -EINVAL;
                 goto out;
@@ -3183,8 +3183,8 @@ int btrfs_balance(struct btrfs_balance_control *bctl,
         if ((bctl->sys.flags & BTRFS_BALANCE_ARGS_CONVERT) &&
             (!alloc_profile_is_valid(bctl->sys.target, 1) ||
              (bctl->sys.target & ~allowed))) {
-               printk(KERN_ERR "btrfs: unable to start balance with target "
-                      "system profile %llu\n",
+               btrfs_err(fs_info,
+                          "unable to start balance with target system profile %llu",
                        bctl->sys.target);
                 ret = -EINVAL;
                 goto out;
@@ -3193,7 +3193,7 @@ int btrfs_balance(struct btrfs_balance_control *bctl,
         /* allow dup'ed data chunks only in mixed mode */
         if (!mixed && (bctl->data.flags & BTRFS_BALANCE_ARGS_CONVERT) &&
             (bctl->data.target & BTRFS_BLOCK_GROUP_DUP)) {
-               printk(KERN_ERR "btrfs: dup for data is not allowed\n");
+               btrfs_err(fs_info, "dup for data is not allowed");
                 ret = -EINVAL;
                 goto out;
         }
@@ -3213,11 +3213,10 @@ int btrfs_balance(struct btrfs_balance_control *bctl,
                      (fs_info->avail_metadata_alloc_bits & allowed) &&
                      !(bctl->meta.target & allowed))) {
                         if (bctl->flags & BTRFS_BALANCE_FORCE) {
-                               printk(KERN_INFO "btrfs: force reducing metadata "
-                                      "integrity\n");
+                               btrfs_info(fs_info, "force reducing metadata integrity");
                         } else {
-                               printk(KERN_ERR "btrfs: balance will reduce metadata "
-                                      "integrity, use force if you want this\n");
+                               btrfs_err(fs_info, "balance will reduce metadata "
+                                          "integrity, use force if you want this");
                                 ret = -EINVAL;
                                 goto out;
                         }
@@ -3303,7 +3302,7 @@ static int balance_kthread(void *data)
         mutex_lock(&fs_info->balance_mutex);
  
         if (fs_info->balance_ctl) {
-               printk(KERN_INFO "btrfs: continuing balance\n");
+               btrfs_info(fs_info, "continuing balance");
                 ret = btrfs_balance(fs_info->balance_ctl, NULL);
         }
  
@@ -3325,7 +3324,7 @@ int btrfs_resume_balance_async(struct btrfs_fs_info *fs_info)
         spin_unlock(&fs_info->balance_lock);
  
         if (btrfs_test_opt(fs_info->tree_root, SKIP_BALANCE)) {
-               printk(KERN_INFO "btrfs: force skipping balance\n");
+               btrfs_info(fs_info, "force skipping balance");
                 return 0;
         }
  
@@ -3543,7 +3542,7 @@ update_tree:
                                                   BTRFS_UUID_KEY_SUBVOL,
                                                   key.objectid);
                         if (ret < 0) {
-                               pr_warn("btrfs: uuid_tree_add failed %d\n",
+                               btrfs_warn(fs_info, "uuid_tree_add failed %d",
                                         ret);
                                 break;
                         }
@@ -3555,7 +3554,7 @@ update_tree:
                                                  BTRFS_UUID_KEY_RECEIVED_SUBVOL,
                                                   key.objectid);
                         if (ret < 0) {
-                               pr_warn("btrfs: uuid_tree_add failed %d\n",
+                               btrfs_warn(fs_info, "uuid_tree_add failed %d",
                                         ret);
                                 break;
                         }
@@ -3590,7 +3589,7 @@ out:
         if (trans && !IS_ERR(trans))
                 btrfs_end_transaction(trans, fs_info->uuid_root);
         if (ret)
-               pr_warn("btrfs: btrfs_uuid_scan_kthread failed %d\n", ret);
+               btrfs_warn(fs_info, "btrfs_uuid_scan_kthread failed %d", ret);
         else
                 fs_info->update_uuid_tree_gen = 1;
         up(&fs_info->uuid_tree_rescan_sem);
@@ -3654,7 +3653,7 @@ static int btrfs_uuid_rescan_kthread(void *data)
          */
         ret = btrfs_uuid_tree_iterate(fs_info, btrfs_check_uuid_tree_entry);
         if (ret < 0) {
-               pr_warn("btrfs: iterating uuid_tree failed %d\n", ret);
+               btrfs_warn(fs_info, "iterating uuid_tree failed %d", ret);
                 up(&fs_info->uuid_tree_rescan_sem);
                 return ret;
         }
@@ -3695,7 +3694,7 @@ int btrfs_create_uuid_tree(struct btrfs_fs_info *fs_info)
         task = kthread_run(btrfs_uuid_scan_kthread, fs_info, "btrfs-uuid");
         if (IS_ERR(task)) {
                 /* fs_info->update_uuid_tree_gen remains 0 in all error case */
-               pr_warn("btrfs: failed to start uuid_scan task\n");
+               btrfs_warn(fs_info, "failed to start uuid_scan task");
                 up(&fs_info->uuid_tree_rescan_sem);
                 return PTR_ERR(task);
         }
@@ -3711,7 +3710,7 @@ int btrfs_check_uuid_tree(struct btrfs_fs_info *fs_info)
         task = kthread_run(btrfs_uuid_rescan_kthread, fs_info, "btrfs-uuid");
         if (IS_ERR(task)) {
                 /* fs_info->update_uuid_tree_gen remains 0 in all error case */
-               pr_warn("btrfs: failed to start uuid_rescan task\n");
+               btrfs_warn(fs_info, "failed to start uuid_rescan task");
                 up(&fs_info->uuid_tree_rescan_sem);
                 return PTR_ERR(task);
         }
@@ -4033,7 +4032,7 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
                 max_stripe_size = 32 * 1024 * 1024;
                 max_chunk_size = 2 * max_stripe_size;
         } else {
-               printk(KERN_ERR "btrfs: invalid chunk type 0x%llx requested\n",
+               btrfs_err(info, "invalid chunk type 0x%llx requested\n",
                        type);
                 BUG_ON(1);
         }
@@ -4065,7 +4064,7 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
  
                 if (!device->writeable) {
                         WARN(1, KERN_ERR
-                              "btrfs: read-only device in alloc_list\n");
+                              "BTRFS: read-only device in alloc_list\n");
                         continue;
                 }
  
@@ -5193,13 +5192,13 @@ int btrfs_rmap_block(struct btrfs_mapping_tree *map_tree,
         read_unlock(&em_tree->lock);
  
         if (!em) {
-               printk(KERN_ERR "btrfs: couldn't find em for chunk %Lu\n",
+               printk(KERN_ERR "BTRFS: couldn't find em for chunk %Lu\n",
                        chunk_start);
                 return -EIO;
         }
  
         if (em->start != chunk_start) {
-               printk(KERN_ERR "btrfs: bad chunk start, em=%Lu, wanted=%Lu\n",
+               printk(KERN_ERR "BTRFS: bad chunk start, em=%Lu, wanted=%Lu\n",
                        em->start, chunk_start);
                 free_extent_map(em);
                 return -EIO;
@@ -6130,7 +6129,8 @@ static int update_dev_stat_item(struct btrfs_trans_handle *trans,
         BUG_ON(!path);
         ret = btrfs_search_slot(trans, dev_root, &key, path, -1, 1);
         if (ret < 0) {
-               printk_in_rcu(KERN_WARNING "btrfs: error %d while searching for dev_stats item for device %s!\n",
+               printk_in_rcu(KERN_WARNING "BTRFS: "
+                       "error %d while searching for dev_stats item for device %s!\n",
                               ret, rcu_str_deref(device->name));
                 goto out;
         }
@@ -6140,7 +6140,8 @@ static int update_dev_stat_item(struct btrfs_trans_handle *trans,
                 /* need to delete old one and insert a new one */
                 ret = btrfs_del_item(trans, dev_root, path);
                 if (ret != 0) {
-                       printk_in_rcu(KERN_WARNING "btrfs: delete too small dev_stats item for device %s failed %d!\n",
+                       printk_in_rcu(KERN_WARNING "BTRFS: "
+                               "delete too small dev_stats item for device %s failed %d!\n",
                                       rcu_str_deref(device->name), ret);
                         goto out;
                 }
@@ -6153,7 +6154,8 @@ static int update_dev_stat_item(struct btrfs_trans_handle *trans,
                 ret = btrfs_insert_empty_item(trans, dev_root, path,
                                               &key, sizeof(*ptr));
                 if (ret < 0) {
-                       printk_in_rcu(KERN_WARNING "btrfs: insert dev_stats item for device %s failed %d!\n",
+                       printk_in_rcu(KERN_WARNING "BTRFS: "
+                                         "insert dev_stats item for device %s failed %d!\n",
                                       rcu_str_deref(device->name), ret);
                         goto out;
                 }
@@ -6206,16 +6208,14 @@ static void btrfs_dev_stat_print_on_error(struct btrfs_device *dev)
  {
         if (!dev->dev_stats_valid)
                 return;
-       printk_ratelimited_in_rcu(KERN_ERR
-                          "btrfs: bdev %s errs: wr %u, rd %u, flush %u, corrupt %u, gen %u\n",
+       printk_ratelimited_in_rcu(KERN_ERR "BTRFS: "
+                          "bdev %s errs: wr %u, rd %u, flush %u, corrupt %u, gen %u\n",
                            rcu_str_deref(dev->name),
                            btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_WRITE_ERRS),
                            btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_READ_ERRS),
                            btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_FLUSH_ERRS),
-                          btrfs_dev_stat_read(dev,
-                                              BTRFS_DEV_STAT_CORRUPTION_ERRS),
-                          btrfs_dev_stat_read(dev,
-                                              BTRFS_DEV_STAT_GENERATION_ERRS));
+                          btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_CORRUPTION_ERRS),
+                          btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_GENERATION_ERRS));
  }
  
  static void btrfs_dev_stat_print_on_load(struct btrfs_device *dev)
@@ -6228,7 +6228,8 @@ static void btrfs_dev_stat_print_on_load(struct btrfs_device *dev)
         if (i == BTRFS_DEV_STAT_VALUES_MAX)
                 return; /* all values == 0, suppress message */
  
-       printk_in_rcu(KERN_INFO "btrfs: bdev %s errs: wr %u, rd %u, flush %u, corrupt %u, gen %u\n",
+       printk_in_rcu(KERN_INFO "BTRFS: "
+                  "bdev %s errs: wr %u, rd %u, flush %u, corrupt %u, gen %u\n",
                rcu_str_deref(dev->name),
                btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_WRITE_ERRS),
                btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_READ_ERRS),
@@ -6249,12 +6250,10 @@ int btrfs_get_dev_stats(struct btrfs_root *root,
         mutex_unlock(&fs_devices->device_list_mutex);
  
         if (!dev) {
-               printk(KERN_WARNING
-                      "btrfs: get dev_stats failed, device not found\n");
+               btrfs_warn(root->fs_info, "get dev_stats failed, device not found");
                 return -ENODEV;
         } else if (!dev->dev_stats_valid) {
-               printk(KERN_WARNING
-                      "btrfs: get dev_stats failed, not yet valid\n");
+               btrfs_warn(root->fs_info, "get dev_stats failed, not yet valid");
                 return -ENODEV;
         } else if (stats->flags & BTRFS_DEV_STATS_RESET) {
                 for (i = 0; i < BTRFS_DEV_STAT_VALUES_MAX; i++) {
diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c

index 3d1c301c9260299fbecc13eaeade5b6521c3ef26..ad8328d797ea9910c21f0ecd5db6051d347a4571 100644 (file)
--- a/fs/btrfs/xattr.c
+++ b/fs/btrfs/xattr.c
@@ -28,6 +28,7 @@
  #include "transaction.h"
  #include "xattr.h"
  #include "disk-io.h"
+#include "props.h"
  
  
  ssize_t __btrfs_getxattr(struct inode *inode, const char *name,
@@ -332,7 +333,8 @@ static bool btrfs_is_valid_xattr(const char *name)
                         XATTR_SECURITY_PREFIX_LEN) ||
                !strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN) ||
                !strncmp(name, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN) ||
-              !strncmp(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN);
+              !strncmp(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN) ||
+               !strncmp(name, XATTR_BTRFS_PREFIX, XATTR_BTRFS_PREFIX_LEN);
  }
  
  ssize_t btrfs_getxattr(struct dentry *dentry, const char *name,
@@ -374,6 +376,10 @@ int btrfs_setxattr(struct dentry *dentry, const char *name, const void *value,
         if (!btrfs_is_valid_xattr(name))
                 return -EOPNOTSUPP;
  
+       if (!strncmp(name, XATTR_BTRFS_PREFIX, XATTR_BTRFS_PREFIX_LEN))
+               return btrfs_set_prop(dentry->d_inode, name,
+                                     value, size, flags);
+
         if (size == 0)
                 value = "";  /* empty EA, do not remove */
  
@@ -403,6 +409,10 @@ int btrfs_removexattr(struct dentry *dentry, const char *name)
         if (!btrfs_is_valid_xattr(name))
                 return -EOPNOTSUPP;
  
+       if (!strncmp(name, XATTR_BTRFS_PREFIX, XATTR_BTRFS_PREFIX_LEN))
+               return btrfs_set_prop(dentry->d_inode, name,
+                                     NULL, 0, XATTR_REPLACE);
+
         return __btrfs_setxattr(NULL, dentry->d_inode, name, NULL, 0,
                                 XATTR_REPLACE);
  }
diff --git a/fs/btrfs/zlib.c b/fs/btrfs/zlib.c

index 9acb846c3e7f775e78684d2117c861c70b9890bc..8e57191950cb60072944e58fbb9ca0f07f9d4315 100644 (file)
--- a/fs/btrfs/zlib.c
+++ b/fs/btrfs/zlib.c
@@ -97,7 +97,7 @@ static int zlib_compress_pages(struct list_head *ws,
         *total_in = 0;
  
         if (Z_OK != zlib_deflateInit(&workspace->def_strm, 3)) {
-               printk(KERN_WARNING "btrfs: deflateInit failed\n");
+               printk(KERN_WARNING "BTRFS: deflateInit failed\n");
                 ret = -1;
                 goto out;
         }
@@ -125,7 +125,7 @@ static int zlib_compress_pages(struct list_head *ws,
         while (workspace->def_strm.total_in < len) {
                 ret = zlib_deflate(&workspace->def_strm, Z_SYNC_FLUSH);
                 if (ret != Z_OK) {
-                       printk(KERN_DEBUG "btrfs: deflate in loop returned %d\n",
+                       printk(KERN_DEBUG "BTRFS: deflate in loop returned %d\n",
                                ret);
                         zlib_deflateEnd(&workspace->def_strm);
                         ret = -1;
@@ -252,7 +252,7 @@ static int zlib_decompress_biovec(struct list_head *ws, struct page **pages_in,
         }
  
         if (Z_OK != zlib_inflateInit2(&workspace->inf_strm, wbits)) {
-               printk(KERN_WARNING "btrfs: inflateInit failed\n");
+               printk(KERN_WARNING "BTRFS: inflateInit failed\n");
                 return -1;
         }
         while (workspace->inf_strm.total_in < srclen) {
@@ -336,7 +336,7 @@ static int zlib_decompress(struct list_head *ws, unsigned char *data_in,
         }
  
         if (Z_OK != zlib_inflateInit2(&workspace->inf_strm, wbits)) {
-               printk(KERN_WARNING "btrfs: inflateInit failed\n");
+               printk(KERN_WARNING "BTRFS: inflateInit failed\n");
                 return -1;
         }
  
diff --git a/include/linux/rwsem.h b/include/linux/rwsem.h

index 0616ffe45702f0c28fd20390ec9b8574c543032b..03f3b05e8ec17dda4d7b8dcefcc1e723bc6e606c 100644 (file)
--- a/include/linux/rwsem.h
+++ b/include/linux/rwsem.h
@@ -74,6 +74,17 @@ do {                                                         \
         __init_rwsem((sem), #sem, &__key);                      \
  } while (0)
  
+/*
+ * This is the same regardless of which rwsem implementation that is being used.
+ * It is just a heuristic meant to be called by somebody alreadying holding the
+ * rwsem to see if somebody from an incompatible type is wanting access to the
+ * lock.
+ */
+static inline int rwsem_is_contended(struct rw_semaphore *sem)
+{
+       return !list_empty(&sem->wait_list);
+}
+
  /*
   * lock for reading
   */
diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h

index 4832d75dcbaedb888a2751303fe7d5a4e62b8010..3176cdc32937f4b62cbcd11e878310f1b086f15e 100644 (file)
--- a/include/trace/events/btrfs.h
+++ b/include/trace/events/btrfs.h
@@ -208,17 +208,18 @@ TRACE_EVENT_CONDITION(btrfs_get_extent,
                   __entry->refs, __entry->compress_type)
  );
  
-#define show_ordered_flags(flags)                                      \
-       __print_symbolic(flags,                                         \
-               { BTRFS_ORDERED_IO_DONE,        "IO_DONE"       },      \
-               { BTRFS_ORDERED_COMPLETE,       "COMPLETE"      },      \
-               { BTRFS_ORDERED_NOCOW,          "NOCOW"         },      \
-               { BTRFS_ORDERED_COMPRESSED,     "COMPRESSED"    },      \
-               { BTRFS_ORDERED_PREALLOC,       "PREALLOC"      },      \
-               { BTRFS_ORDERED_DIRECT,         "DIRECT"        },      \
-               { BTRFS_ORDERED_IOERR,          "IOERR"         },      \
-               { BTRFS_ORDERED_UPDATED_ISIZE,  "UPDATED_ISIZE" },      \
-               { BTRFS_ORDERED_LOGGED_CSUM,    "LOGGED_CSUM"   })
+#define show_ordered_flags(flags)                                         \
+       __print_flags(flags, "|",                                          \
+               { (1 << BTRFS_ORDERED_IO_DONE),         "IO_DONE"       }, \
+               { (1 << BTRFS_ORDERED_COMPLETE),        "COMPLETE"      }, \
+               { (1 << BTRFS_ORDERED_NOCOW),           "NOCOW"         }, \
+               { (1 << BTRFS_ORDERED_COMPRESSED),      "COMPRESSED"    }, \
+               { (1 << BTRFS_ORDERED_PREALLOC),        "PREALLOC"      }, \
+               { (1 << BTRFS_ORDERED_DIRECT),          "DIRECT"        }, \
+               { (1 << BTRFS_ORDERED_IOERR),           "IOERR"         }, \
+               { (1 << BTRFS_ORDERED_UPDATED_ISIZE),   "UPDATED_ISIZE" }, \
+               { (1 << BTRFS_ORDERED_LOGGED_CSUM),     "LOGGED_CSUM"   }, \
+               { (1 << BTRFS_ORDERED_TRUNCATED),       "TRUNCATED"     })
  
  
  DECLARE_EVENT_CLASS(btrfs__ordered_extent,
diff --git a/include/uapi/linux/btrfs.h b/include/uapi/linux/btrfs.h

index 45e618921c612385c0c1b97f78fe0e19bac86056..1b8a0f4c95900b14e7d6f71fa54523409d3b2be6 100644 (file)
--- a/include/uapi/linux/btrfs.h
+++ b/include/uapi/linux/btrfs.h
@@ -184,6 +184,12 @@ struct btrfs_ioctl_fs_info_args {
         __u64 reserved[124];                    /* pad to 1k */
  };
  
+struct btrfs_ioctl_feature_flags {
+       __u64 compat_flags;
+       __u64 compat_ro_flags;
+       __u64 incompat_flags;
+};
+
  /* balance control ioctl modes */
  #define BTRFS_BALANCE_CTL_PAUSE                1
  #define BTRFS_BALANCE_CTL_CANCEL       2
@@ -552,6 +558,7 @@ static inline char *btrfs_err_str(enum btrfs_err_code err_code)
  #define BTRFS_IOC_DEFAULT_SUBVOL _IOW(BTRFS_IOCTL_MAGIC, 19, __u64)
  #define BTRFS_IOC_SPACE_INFO _IOWR(BTRFS_IOCTL_MAGIC, 20, \
                                     struct btrfs_ioctl_space_args)
+#define BTRFS_IOC_GLOBAL_RSV _IOR(BTRFS_IOCTL_MAGIC, 20, __u64)
  #define BTRFS_IOC_START_SYNC _IOR(BTRFS_IOCTL_MAGIC, 24, __u64)
  #define BTRFS_IOC_WAIT_SYNC  _IOW(BTRFS_IOCTL_MAGIC, 22, __u64)
  #define BTRFS_IOC_SNAP_CREATE_V2 _IOW(BTRFS_IOCTL_MAGIC, 23, \
@@ -606,5 +613,11 @@ static inline char *btrfs_err_str(enum btrfs_err_code err_code)
                                     struct btrfs_ioctl_dev_replace_args)
  #define BTRFS_IOC_FILE_EXTENT_SAME _IOWR(BTRFS_IOCTL_MAGIC, 54, \
                                          struct btrfs_ioctl_same_args)
+#define BTRFS_IOC_GET_FEATURES _IOR(BTRFS_IOCTL_MAGIC, 57, \
+                                  struct btrfs_ioctl_feature_flags)
+#define BTRFS_IOC_SET_FEATURES _IOW(BTRFS_IOCTL_MAGIC, 57, \
+                                  struct btrfs_ioctl_feature_flags[2])
+#define BTRFS_IOC_GET_SUPPORTED_FEATURES _IOR(BTRFS_IOCTL_MAGIC, 57, \
+                                  struct btrfs_ioctl_feature_flags[3])
  
  #endif /* _UAPI_LINUX_BTRFS_H */
diff --git a/include/uapi/linux/xattr.h b/include/uapi/linux/xattr.h

index e4629b93bdd671821f1fd90b98d94a60669fa761..40bbc04b6f8162f6dbd51160a1018598c0c79c26 100644 (file)
--- a/include/uapi/linux/xattr.h
+++ b/include/uapi/linux/xattr.h
@@ -20,6 +20,9 @@
  #define XATTR_MAC_OSX_PREFIX "osx."
  #define XATTR_MAC_OSX_PREFIX_LEN (sizeof(XATTR_MAC_OSX_PREFIX) - 1)
  
+#define XATTR_BTRFS_PREFIX "btrfs."
+#define XATTR_BTRFS_PREFIX_LEN (sizeof(XATTR_BTRFS_PREFIX) - 1)
+
  #define XATTR_SECURITY_PREFIX  "security."
  #define XATTR_SECURITY_PREFIX_LEN (sizeof(XATTR_SECURITY_PREFIX) - 1)
  
diff --git a/lib/kobject.c b/lib/kobject.c

index b0b26665c61161d4f33d1dbdaf49fe74275f400e..cb14aeac4ccaeedfe537d423c41c3d0b021e1c9f 100644 (file)
--- a/lib/kobject.c
+++ b/lib/kobject.c
@@ -779,6 +779,7 @@ const struct sysfs_ops kobj_sysfs_ops = {
         .show   = kobj_attr_show,
         .store  = kobj_attr_store,
  };
+EXPORT_SYMBOL_GPL(kobj_sysfs_ops);
  
  /**
   * kset_register - initialize and add a kset.
author	Linus Torvalds <torvalds@linux-foundation.org>
	Fri, 31 Jan 2014 04:08:20 +0000 (20:08 -0800)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Fri, 31 Jan 2014 04:08:20 +0000 (20:08 -0800)
Documentation/filesystems/btrfs.txt		patch \| blob \| history
fs/btrfs/Kconfig		patch \| blob \| history
fs/btrfs/Makefile		patch \| blob \| history
fs/btrfs/backref.c		patch \| blob \| history
fs/btrfs/btrfs_inode.h		patch \| blob \| history
fs/btrfs/check-integrity.c		patch \| blob \| history
fs/btrfs/compression.c		patch \| blob \| history
fs/btrfs/ctree.c		patch \| blob \| history
fs/btrfs/ctree.h		patch \| blob \| history
fs/btrfs/delayed-inode.c		patch \| blob \| history
fs/btrfs/delayed-inode.h		patch \| blob \| history
fs/btrfs/delayed-ref.c		patch \| blob \| history
fs/btrfs/delayed-ref.h		patch \| blob \| history
fs/btrfs/dev-replace.c		patch \| blob \| history
fs/btrfs/dir-item.c		patch \| blob \| history
fs/btrfs/disk-io.c		patch \| blob \| history
fs/btrfs/extent-tree.c		patch \| blob \| history
fs/btrfs/extent_io.c		patch \| blob \| history
fs/btrfs/extent_io.h		patch \| blob \| history
fs/btrfs/extent_map.c		patch \| blob \| history
fs/btrfs/file-item.c		patch \| blob \| history
fs/btrfs/file.c		patch \| blob \| history
fs/btrfs/free-space-cache.c		patch \| blob \| history
fs/btrfs/hash.c	[new file with mode: 0644]	patch \| blob
fs/btrfs/hash.h		patch \| blob \| history
fs/btrfs/inode-item.c		patch \| blob \| history
fs/btrfs/inode.c		patch \| blob \| history
fs/btrfs/ioctl.c		patch \| blob \| history
fs/btrfs/lzo.c		patch \| blob \| history
fs/btrfs/ordered-data.c		patch \| blob \| history
fs/btrfs/orphan.c		patch \| blob \| history
fs/btrfs/print-tree.c		patch \| blob \| history
fs/btrfs/props.c	[new file with mode: 0644]	patch \| blob
fs/btrfs/props.h	[new file with mode: 0644]	patch \| blob
fs/btrfs/qgroup.c		patch \| blob \| history
fs/btrfs/reada.c		patch \| blob \| history
fs/btrfs/relocation.c		patch \| blob \| history
fs/btrfs/root-tree.c		patch \| blob \| history
fs/btrfs/scrub.c		patch \| blob \| history
fs/btrfs/send.c		patch \| blob \| history
fs/btrfs/super.c		patch \| blob \| history
fs/btrfs/sysfs.c		patch \| blob \| history
fs/btrfs/sysfs.h	[new file with mode: 0644]	patch \| blob
fs/btrfs/tests/btrfs-tests.h		patch \| blob \| history
fs/btrfs/transaction.c		patch \| blob \| history
fs/btrfs/transaction.h		patch \| blob \| history
fs/btrfs/tree-log.c		patch \| blob \| history
fs/btrfs/ulist.c		patch \| blob \| history
fs/btrfs/ulist.h		patch \| blob \| history
fs/btrfs/uuid-tree.c		patch \| blob \| history
fs/btrfs/volumes.c		patch \| blob \| history
fs/btrfs/xattr.c		patch \| blob \| history
fs/btrfs/zlib.c		patch \| blob \| history
include/linux/rwsem.h		patch \| blob \| history
include/trace/events/btrfs.h		patch \| blob \| history
include/uapi/linux/btrfs.h		patch \| blob \| history
include/uapi/linux/xattr.h		patch \| blob \| history
lib/kobject.c		patch \| blob \| history