]> Pileus Git - ~andy/linux/commitdiff
Merge branch 'for-chris' of git://git.jan-o-sch.net/btrfs-unstable into integration
authorChris Mason <chris.mason@oracle.com>
Mon, 16 Jan 2012 20:26:31 +0000 (15:26 -0500)
committerChris Mason <chris.mason@oracle.com>
Mon, 16 Jan 2012 20:26:31 +0000 (15:26 -0500)
21 files changed:
fs/btrfs/Makefile
fs/btrfs/backref.c
fs/btrfs/backref.h
fs/btrfs/ctree.c
fs/btrfs/ctree.h
fs/btrfs/delayed-ref.c
fs/btrfs/delayed-ref.h
fs/btrfs/disk-io.c
fs/btrfs/extent-tree.c
fs/btrfs/extent_io.c
fs/btrfs/extent_io.h
fs/btrfs/file.c
fs/btrfs/inode.c
fs/btrfs/ioctl.c
fs/btrfs/locking.c
fs/btrfs/relocation.c
fs/btrfs/scrub.c
fs/btrfs/transaction.c
fs/btrfs/tree-log.c
fs/btrfs/ulist.c [new file with mode: 0644]
fs/btrfs/ulist.h [new file with mode: 0644]

index c0ddfd29c5e5a348464d5c3d15a77a7708fd8d79..70798407b9a2ebd48498319496efd1e5ba33789b 100644 (file)
@@ -8,6 +8,6 @@ btrfs-y += super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \
           extent_io.o volumes.o async-thread.o ioctl.o locking.o orphan.o \
           export.o tree-log.o free-space-cache.o zlib.o lzo.o \
           compression.o delayed-ref.o relocation.o delayed-inode.o scrub.o \
-          reada.o backref.o
+          reada.o backref.o ulist.o
 
 btrfs-$(CONFIG_BTRFS_FS_POSIX_ACL) += acl.o
index 22c64fff1bd524b213ce8b13a233f861680d8424..b9a843226de859c34c8e17f9172aeab0ff0f3be1 100644 (file)
 #include "ctree.h"
 #include "disk-io.h"
 #include "backref.h"
+#include "ulist.h"
+#include "transaction.h"
+#include "delayed-ref.h"
 
-struct __data_ref {
+/*
+ * this structure records all encountered refs on the way up to the root
+ */
+struct __prelim_ref {
        struct list_head list;
-       u64 inum;
-       u64 root;
-       u64 extent_data_item_offset;
+       u64 root_id;
+       struct btrfs_key key;
+       int level;
+       int count;
+       u64 parent;
+       u64 wanted_disk_byte;
 };
 
-struct __shared_ref {
-       struct list_head list;
+static int __add_prelim_ref(struct list_head *head, u64 root_id,
+                           struct btrfs_key *key, int level, u64 parent,
+                           u64 wanted_disk_byte, int count)
+{
+       struct __prelim_ref *ref;
+
+       /* in case we're adding delayed refs, we're holding the refs spinlock */
+       ref = kmalloc(sizeof(*ref), GFP_ATOMIC);
+       if (!ref)
+               return -ENOMEM;
+
+       ref->root_id = root_id;
+       if (key)
+               ref->key = *key;
+       else
+               memset(&ref->key, 0, sizeof(ref->key));
+
+       ref->level = level;
+       ref->count = count;
+       ref->parent = parent;
+       ref->wanted_disk_byte = wanted_disk_byte;
+       list_add_tail(&ref->list, head);
+
+       return 0;
+}
+
+static int add_all_parents(struct btrfs_root *root, struct btrfs_path *path,
+                               struct ulist *parents,
+                               struct extent_buffer *eb, int level,
+                               u64 wanted_objectid, u64 wanted_disk_byte)
+{
+       int ret;
+       int slot;
+       struct btrfs_file_extent_item *fi;
+       struct btrfs_key key;
        u64 disk_byte;
-};
+
+add_parent:
+       ret = ulist_add(parents, eb->start, 0, GFP_NOFS);
+       if (ret < 0)
+               return ret;
+
+       if (level != 0)
+               return 0;
+
+       /*
+        * if the current leaf is full with EXTENT_DATA items, we must
+        * check the next one if that holds a reference as well.
+        * ref->count cannot be used to skip this check.
+        * repeat this until we don't find any additional EXTENT_DATA items.
+        */
+       while (1) {
+               ret = btrfs_next_leaf(root, path);
+               if (ret < 0)
+                       return ret;
+               if (ret)
+                       return 0;
+
+               eb = path->nodes[0];
+               for (slot = 0; slot < btrfs_header_nritems(eb); ++slot) {
+                       btrfs_item_key_to_cpu(eb, &key, slot);
+                       if (key.objectid != wanted_objectid ||
+                           key.type != BTRFS_EXTENT_DATA_KEY)
+                               return 0;
+                       fi = btrfs_item_ptr(eb, slot,
+                                               struct btrfs_file_extent_item);
+                       disk_byte = btrfs_file_extent_disk_bytenr(eb, fi);
+                       if (disk_byte == wanted_disk_byte)
+                               goto add_parent;
+               }
+       }
+
+       return 0;
+}
+
+/*
+ * resolve an indirect backref in the form (root_id, key, level)
+ * to a logical address
+ */
+static int __resolve_indirect_ref(struct btrfs_fs_info *fs_info,
+                                       struct __prelim_ref *ref,
+                                       struct ulist *parents)
+{
+       struct btrfs_path *path;
+       struct btrfs_root *root;
+       struct btrfs_key root_key;
+       struct btrfs_key key = {0};
+       struct extent_buffer *eb;
+       int ret = 0;
+       int root_level;
+       int level = ref->level;
+
+       path = btrfs_alloc_path();
+       if (!path)
+               return -ENOMEM;
+
+       root_key.objectid = ref->root_id;
+       root_key.type = BTRFS_ROOT_ITEM_KEY;
+       root_key.offset = (u64)-1;
+       root = btrfs_read_fs_root_no_name(fs_info, &root_key);
+       if (IS_ERR(root)) {
+               ret = PTR_ERR(root);
+               goto out;
+       }
+
+       rcu_read_lock();
+       root_level = btrfs_header_level(root->node);
+       rcu_read_unlock();
+
+       if (root_level + 1 == level)
+               goto out;
+
+       path->lowest_level = level;
+       ret = btrfs_search_slot(NULL, root, &ref->key, path, 0, 0);
+       pr_debug("search slot in root %llu (level %d, ref count %d) returned "
+                "%d for key (%llu %u %llu)\n",
+                (unsigned long long)ref->root_id, level, ref->count, ret,
+                (unsigned long long)ref->key.objectid, ref->key.type,
+                (unsigned long long)ref->key.offset);
+       if (ret < 0)
+               goto out;
+
+       eb = path->nodes[level];
+       if (!eb) {
+               WARN_ON(1);
+               ret = 1;
+               goto out;
+       }
+
+       if (level == 0) {
+               if (ret == 1 && path->slots[0] >= btrfs_header_nritems(eb)) {
+                       ret = btrfs_next_leaf(root, path);
+                       if (ret)
+                               goto out;
+                       eb = path->nodes[0];
+               }
+
+               btrfs_item_key_to_cpu(eb, &key, path->slots[0]);
+       }
+
+       /* the last two parameters will only be used for level == 0 */
+       ret = add_all_parents(root, path, parents, eb, level, key.objectid,
+                               ref->wanted_disk_byte);
+out:
+       btrfs_free_path(path);
+       return ret;
+}
+
+/*
+ * resolve all indirect backrefs from the list
+ */
+static int __resolve_indirect_refs(struct btrfs_fs_info *fs_info,
+                                  struct list_head *head)
+{
+       int err;
+       int ret = 0;
+       struct __prelim_ref *ref;
+       struct __prelim_ref *ref_safe;
+       struct __prelim_ref *new_ref;
+       struct ulist *parents;
+       struct ulist_node *node;
+
+       parents = ulist_alloc(GFP_NOFS);
+       if (!parents)
+               return -ENOMEM;
+
+       /*
+        * _safe allows us to insert directly after the current item without
+        * iterating over the newly inserted items.
+        * we're also allowed to re-assign ref during iteration.
+        */
+       list_for_each_entry_safe(ref, ref_safe, head, list) {
+               if (ref->parent)        /* already direct */
+                       continue;
+               if (ref->count == 0)
+                       continue;
+               err = __resolve_indirect_ref(fs_info, ref, parents);
+               if (err) {
+                       if (ret == 0)
+                               ret = err;
+                       continue;
+               }
+
+               /* we put the first parent into the ref at hand */
+               node = ulist_next(parents, NULL);
+               ref->parent = node ? node->val : 0;
+
+               /* additional parents require new refs being added here */
+               while ((node = ulist_next(parents, node))) {
+                       new_ref = kmalloc(sizeof(*new_ref), GFP_NOFS);
+                       if (!new_ref) {
+                               ret = -ENOMEM;
+                               break;
+                       }
+                       memcpy(new_ref, ref, sizeof(*ref));
+                       new_ref->parent = node->val;
+                       list_add(&new_ref->list, &ref->list);
+               }
+               ulist_reinit(parents);
+       }
+
+       ulist_free(parents);
+       return ret;
+}
+
+/*
+ * merge two lists of backrefs and adjust counts accordingly
+ *
+ * mode = 1: merge identical keys, if key is set
+ * mode = 2: merge identical parents
+ */
+static int __merge_refs(struct list_head *head, int mode)
+{
+       struct list_head *pos1;
+
+       list_for_each(pos1, head) {
+               struct list_head *n2;
+               struct list_head *pos2;
+               struct __prelim_ref *ref1;
+
+               ref1 = list_entry(pos1, struct __prelim_ref, list);
+
+               if (mode == 1 && ref1->key.type == 0)
+                       continue;
+               for (pos2 = pos1->next, n2 = pos2->next; pos2 != head;
+                    pos2 = n2, n2 = pos2->next) {
+                       struct __prelim_ref *ref2;
+
+                       ref2 = list_entry(pos2, struct __prelim_ref, list);
+
+                       if (mode == 1) {
+                               if (memcmp(&ref1->key, &ref2->key,
+                                          sizeof(ref1->key)) ||
+                                   ref1->level != ref2->level ||
+                                   ref1->root_id != ref2->root_id)
+                                       continue;
+                               ref1->count += ref2->count;
+                       } else {
+                               if (ref1->parent != ref2->parent)
+                                       continue;
+                               ref1->count += ref2->count;
+                       }
+                       list_del(&ref2->list);
+                       kfree(ref2);
+               }
+
+       }
+       return 0;
+}
+
+/*
+ * add all currently queued delayed refs from this head whose seq nr is
+ * smaller or equal that seq to the list
+ */
+static int __add_delayed_refs(struct btrfs_delayed_ref_head *head, u64 seq,
+                             struct btrfs_key *info_key,
+                             struct list_head *prefs)
+{
+       struct btrfs_delayed_extent_op *extent_op = head->extent_op;
+       struct rb_node *n = &head->node.rb_node;
+       int sgn;
+       int ret;
+
+       if (extent_op && extent_op->update_key)
+               btrfs_disk_key_to_cpu(info_key, &extent_op->key);
+
+       while ((n = rb_prev(n))) {
+               struct btrfs_delayed_ref_node *node;
+               node = rb_entry(n, struct btrfs_delayed_ref_node,
+                               rb_node);
+               if (node->bytenr != head->node.bytenr)
+                       break;
+               WARN_ON(node->is_head);
+
+               if (node->seq > seq)
+                       continue;
+
+               switch (node->action) {
+               case BTRFS_ADD_DELAYED_EXTENT:
+               case BTRFS_UPDATE_DELAYED_HEAD:
+                       WARN_ON(1);
+                       continue;
+               case BTRFS_ADD_DELAYED_REF:
+                       sgn = 1;
+                       break;
+               case BTRFS_DROP_DELAYED_REF:
+                       sgn = -1;
+                       break;
+               default:
+                       BUG_ON(1);
+               }
+               switch (node->type) {
+               case BTRFS_TREE_BLOCK_REF_KEY: {
+                       struct btrfs_delayed_tree_ref *ref;
+
+                       ref = btrfs_delayed_node_to_tree_ref(node);
+                       ret = __add_prelim_ref(prefs, ref->root, info_key,
+                                              ref->level + 1, 0, node->bytenr,
+                                              node->ref_mod * sgn);
+                       break;
+               }
+               case BTRFS_SHARED_BLOCK_REF_KEY: {
+                       struct btrfs_delayed_tree_ref *ref;
+
+                       ref = btrfs_delayed_node_to_tree_ref(node);
+                       ret = __add_prelim_ref(prefs, ref->root, info_key,
+                                              ref->level + 1, ref->parent,
+                                              node->bytenr,
+                                              node->ref_mod * sgn);
+                       break;
+               }
+               case BTRFS_EXTENT_DATA_REF_KEY: {
+                       struct btrfs_delayed_data_ref *ref;
+                       struct btrfs_key key;
+
+                       ref = btrfs_delayed_node_to_data_ref(node);
+
+                       key.objectid = ref->objectid;
+                       key.type = BTRFS_EXTENT_DATA_KEY;
+                       key.offset = ref->offset;
+                       ret = __add_prelim_ref(prefs, ref->root, &key, 0, 0,
+                                              node->bytenr,
+                                              node->ref_mod * sgn);
+                       break;
+               }
+               case BTRFS_SHARED_DATA_REF_KEY: {
+                       struct btrfs_delayed_data_ref *ref;
+                       struct btrfs_key key;
+
+                       ref = btrfs_delayed_node_to_data_ref(node);
+
+                       key.objectid = ref->objectid;
+                       key.type = BTRFS_EXTENT_DATA_KEY;
+                       key.offset = ref->offset;
+                       ret = __add_prelim_ref(prefs, ref->root, &key, 0,
+                                              ref->parent, node->bytenr,
+                                              node->ref_mod * sgn);
+                       break;
+               }
+               default:
+                       WARN_ON(1);
+               }
+               BUG_ON(ret);
+       }
+
+       return 0;
+}
+
+/*
+ * add all inline backrefs for bytenr to the list
+ */
+static int __add_inline_refs(struct btrfs_fs_info *fs_info,
+                            struct btrfs_path *path, u64 bytenr,
+                            struct btrfs_key *info_key, int *info_level,
+                            struct list_head *prefs)
+{
+       int ret;
+       int slot;
+       struct extent_buffer *leaf;
+       struct btrfs_key key;
+       unsigned long ptr;
+       unsigned long end;
+       struct btrfs_extent_item *ei;
+       u64 flags;
+       u64 item_size;
+
+       /*
+        * enumerate all inline refs
+        */
+       leaf = path->nodes[0];
+       slot = path->slots[0] - 1;
+
+       item_size = btrfs_item_size_nr(leaf, slot);
+       BUG_ON(item_size < sizeof(*ei));
+
+       ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
+       flags = btrfs_extent_flags(leaf, ei);
+
+       ptr = (unsigned long)(ei + 1);
+       end = (unsigned long)ei + item_size;
+
+       if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
+               struct btrfs_tree_block_info *info;
+               struct btrfs_disk_key disk_key;
+
+               info = (struct btrfs_tree_block_info *)ptr;
+               *info_level = btrfs_tree_block_level(leaf, info);
+               btrfs_tree_block_key(leaf, info, &disk_key);
+               btrfs_disk_key_to_cpu(info_key, &disk_key);
+               ptr += sizeof(struct btrfs_tree_block_info);
+               BUG_ON(ptr > end);
+       } else {
+               BUG_ON(!(flags & BTRFS_EXTENT_FLAG_DATA));
+       }
+
+       while (ptr < end) {
+               struct btrfs_extent_inline_ref *iref;
+               u64 offset;
+               int type;
+
+               iref = (struct btrfs_extent_inline_ref *)ptr;
+               type = btrfs_extent_inline_ref_type(leaf, iref);
+               offset = btrfs_extent_inline_ref_offset(leaf, iref);
+
+               switch (type) {
+               case BTRFS_SHARED_BLOCK_REF_KEY:
+                       ret = __add_prelim_ref(prefs, 0, info_key,
+                                               *info_level + 1, offset,
+                                               bytenr, 1);
+                       break;
+               case BTRFS_SHARED_DATA_REF_KEY: {
+                       struct btrfs_shared_data_ref *sdref;
+                       int count;
+
+                       sdref = (struct btrfs_shared_data_ref *)(iref + 1);
+                       count = btrfs_shared_data_ref_count(leaf, sdref);
+                       ret = __add_prelim_ref(prefs, 0, NULL, 0, offset,
+                                              bytenr, count);
+                       break;
+               }
+               case BTRFS_TREE_BLOCK_REF_KEY:
+                       ret = __add_prelim_ref(prefs, offset, info_key,
+                                              *info_level + 1, 0, bytenr, 1);
+                       break;
+               case BTRFS_EXTENT_DATA_REF_KEY: {
+                       struct btrfs_extent_data_ref *dref;
+                       int count;
+                       u64 root;
+
+                       dref = (struct btrfs_extent_data_ref *)(&iref->offset);
+                       count = btrfs_extent_data_ref_count(leaf, dref);
+                       key.objectid = btrfs_extent_data_ref_objectid(leaf,
+                                                                     dref);
+                       key.type = BTRFS_EXTENT_DATA_KEY;
+                       key.offset = btrfs_extent_data_ref_offset(leaf, dref);
+                       root = btrfs_extent_data_ref_root(leaf, dref);
+                       ret = __add_prelim_ref(prefs, root, &key, 0, 0, bytenr,
+                                               count);
+                       break;
+               }
+               default:
+                       WARN_ON(1);
+               }
+               BUG_ON(ret);
+               ptr += btrfs_extent_inline_ref_size(type);
+       }
+
+       return 0;
+}
+
+/*
+ * add all non-inline backrefs for bytenr to the list
+ */
+static int __add_keyed_refs(struct btrfs_fs_info *fs_info,
+                           struct btrfs_path *path, u64 bytenr,
+                           struct btrfs_key *info_key, int info_level,
+                           struct list_head *prefs)
+{
+       struct btrfs_root *extent_root = fs_info->extent_root;
+       int ret;
+       int slot;
+       struct extent_buffer *leaf;
+       struct btrfs_key key;
+
+       while (1) {
+               ret = btrfs_next_item(extent_root, path);
+               if (ret < 0)
+                       break;
+               if (ret) {
+                       ret = 0;
+                       break;
+               }
+
+               slot = path->slots[0];
+               leaf = path->nodes[0];
+               btrfs_item_key_to_cpu(leaf, &key, slot);
+
+               if (key.objectid != bytenr)
+                       break;
+               if (key.type < BTRFS_TREE_BLOCK_REF_KEY)
+                       continue;
+               if (key.type > BTRFS_SHARED_DATA_REF_KEY)
+                       break;
+
+               switch (key.type) {
+               case BTRFS_SHARED_BLOCK_REF_KEY:
+                       ret = __add_prelim_ref(prefs, 0, info_key,
+                                               info_level + 1, key.offset,
+                                               bytenr, 1);
+                       break;
+               case BTRFS_SHARED_DATA_REF_KEY: {
+                       struct btrfs_shared_data_ref *sdref;
+                       int count;
+
+                       sdref = btrfs_item_ptr(leaf, slot,
+                                             struct btrfs_shared_data_ref);
+                       count = btrfs_shared_data_ref_count(leaf, sdref);
+                       ret = __add_prelim_ref(prefs, 0, NULL, 0, key.offset,
+                                               bytenr, count);
+                       break;
+               }
+               case BTRFS_TREE_BLOCK_REF_KEY:
+                       ret = __add_prelim_ref(prefs, key.offset, info_key,
+                                               info_level + 1, 0, bytenr, 1);
+                       break;
+               case BTRFS_EXTENT_DATA_REF_KEY: {
+                       struct btrfs_extent_data_ref *dref;
+                       int count;
+                       u64 root;
+
+                       dref = btrfs_item_ptr(leaf, slot,
+                                             struct btrfs_extent_data_ref);
+                       count = btrfs_extent_data_ref_count(leaf, dref);
+                       key.objectid = btrfs_extent_data_ref_objectid(leaf,
+                                                                     dref);
+                       key.type = BTRFS_EXTENT_DATA_KEY;
+                       key.offset = btrfs_extent_data_ref_offset(leaf, dref);
+                       root = btrfs_extent_data_ref_root(leaf, dref);
+                       ret = __add_prelim_ref(prefs, root, &key, 0, 0,
+                                               bytenr, count);
+                       break;
+               }
+               default:
+                       WARN_ON(1);
+               }
+               BUG_ON(ret);
+       }
+
+       return ret;
+}
+
+/*
+ * this adds all existing backrefs (inline backrefs, backrefs and delayed
+ * refs) for the given bytenr to the refs list, merges duplicates and resolves
+ * indirect refs to their parent bytenr.
+ * When roots are found, they're added to the roots list
+ *
+ * FIXME some caching might speed things up
+ */
+static int find_parent_nodes(struct btrfs_trans_handle *trans,
+                            struct btrfs_fs_info *fs_info, u64 bytenr,
+                            u64 seq, struct ulist *refs, struct ulist *roots)
+{
+       struct btrfs_key key;
+       struct btrfs_path *path;
+       struct btrfs_key info_key = { 0 };
+       struct btrfs_delayed_ref_root *delayed_refs = NULL;
+       struct btrfs_delayed_ref_head *head = NULL;
+       int info_level = 0;
+       int ret;
+       struct list_head prefs_delayed;
+       struct list_head prefs;
+       struct __prelim_ref *ref;
+
+       INIT_LIST_HEAD(&prefs);
+       INIT_LIST_HEAD(&prefs_delayed);
+
+       key.objectid = bytenr;
+       key.type = BTRFS_EXTENT_ITEM_KEY;
+       key.offset = (u64)-1;
+
+       path = btrfs_alloc_path();
+       if (!path)
+               return -ENOMEM;
+
+       /*
+        * grab both a lock on the path and a lock on the delayed ref head.
+        * We need both to get a consistent picture of how the refs look
+        * at a specified point in time
+        */
+again:
+       ret = btrfs_search_slot(trans, fs_info->extent_root, &key, path, 0, 0);
+       if (ret < 0)
+               goto out;
+       BUG_ON(ret == 0);
+
+       /*
+        * look if there are updates for this ref queued and lock the head
+        */
+       delayed_refs = &trans->transaction->delayed_refs;
+       spin_lock(&delayed_refs->lock);
+       head = btrfs_find_delayed_ref_head(trans, bytenr);
+       if (head) {
+               if (!mutex_trylock(&head->mutex)) {
+                       atomic_inc(&head->node.refs);
+                       spin_unlock(&delayed_refs->lock);
+
+                       btrfs_release_path(path);
+
+                       /*
+                        * Mutex was contended, block until it's
+                        * released and try again
+                        */
+                       mutex_lock(&head->mutex);
+                       mutex_unlock(&head->mutex);
+                       btrfs_put_delayed_ref(&head->node);
+                       goto again;
+               }
+               ret = __add_delayed_refs(head, seq, &info_key, &prefs_delayed);
+               if (ret)
+                       goto out;
+       }
+       spin_unlock(&delayed_refs->lock);
+
+       if (path->slots[0]) {
+               struct extent_buffer *leaf;
+               int slot;
+
+               leaf = path->nodes[0];
+               slot = path->slots[0] - 1;
+               btrfs_item_key_to_cpu(leaf, &key, slot);
+               if (key.objectid == bytenr &&
+                   key.type == BTRFS_EXTENT_ITEM_KEY) {
+                       ret = __add_inline_refs(fs_info, path, bytenr,
+                                               &info_key, &info_level, &prefs);
+                       if (ret)
+                               goto out;
+                       ret = __add_keyed_refs(fs_info, path, bytenr, &info_key,
+                                              info_level, &prefs);
+                       if (ret)
+                               goto out;
+               }
+       }
+       btrfs_release_path(path);
+
+       /*
+        * when adding the delayed refs above, the info_key might not have
+        * been known yet. Go over the list and replace the missing keys
+        */
+       list_for_each_entry(ref, &prefs_delayed, list) {
+               if ((ref->key.offset | ref->key.type | ref->key.objectid) == 0)
+                       memcpy(&ref->key, &info_key, sizeof(ref->key));
+       }
+       list_splice_init(&prefs_delayed, &prefs);
+
+       ret = __merge_refs(&prefs, 1);
+       if (ret)
+               goto out;
+
+       ret = __resolve_indirect_refs(fs_info, &prefs);
+       if (ret)
+               goto out;
+
+       ret = __merge_refs(&prefs, 2);
+       if (ret)
+               goto out;
+
+       while (!list_empty(&prefs)) {
+               ref = list_first_entry(&prefs, struct __prelim_ref, list);
+               list_del(&ref->list);
+               if (ref->count < 0)
+                       WARN_ON(1);
+               if (ref->count && ref->root_id && ref->parent == 0) {
+                       /* no parent == root of tree */
+                       ret = ulist_add(roots, ref->root_id, 0, GFP_NOFS);
+                       BUG_ON(ret < 0);
+               }
+               if (ref->count && ref->parent) {
+                       ret = ulist_add(refs, ref->parent, 0, GFP_NOFS);
+                       BUG_ON(ret < 0);
+               }
+               kfree(ref);
+       }
+
+out:
+       if (head)
+               mutex_unlock(&head->mutex);
+       btrfs_free_path(path);
+       while (!list_empty(&prefs)) {
+               ref = list_first_entry(&prefs, struct __prelim_ref, list);
+               list_del(&ref->list);
+               kfree(ref);
+       }
+       while (!list_empty(&prefs_delayed)) {
+               ref = list_first_entry(&prefs_delayed, struct __prelim_ref,
+                                      list);
+               list_del(&ref->list);
+               kfree(ref);
+       }
+
+       return ret;
+}
+
+/*
+ * Finds all leafs with a reference to the specified combination of bytenr and
+ * offset. key_list_head will point to a list of corresponding keys (caller must
+ * free each list element). The leafs will be stored in the leafs ulist, which
+ * must be freed with ulist_free.
+ *
+ * returns 0 on success, <0 on error
+ */
+static int btrfs_find_all_leafs(struct btrfs_trans_handle *trans,
+                               struct btrfs_fs_info *fs_info, u64 bytenr,
+                               u64 num_bytes, u64 seq, struct ulist **leafs)
+{
+       struct ulist *tmp;
+       int ret;
+
+       tmp = ulist_alloc(GFP_NOFS);
+       if (!tmp)
+               return -ENOMEM;
+       *leafs = ulist_alloc(GFP_NOFS);
+       if (!*leafs) {
+               ulist_free(tmp);
+               return -ENOMEM;
+       }
+
+       ret = find_parent_nodes(trans, fs_info, bytenr, seq, *leafs, tmp);
+       ulist_free(tmp);
+
+       if (ret < 0 && ret != -ENOENT) {
+               ulist_free(*leafs);
+               return ret;
+       }
+
+       return 0;
+}
+
+/*
+ * walk all backrefs for a given extent to find all roots that reference this
+ * extent. Walking a backref means finding all extents that reference this
+ * extent and in turn walk the backrefs of those, too. Naturally this is a
+ * recursive process, but here it is implemented in an iterative fashion: We
+ * find all referencing extents for the extent in question and put them on a
+ * list. In turn, we find all referencing extents for those, further appending
+ * to the list. The way we iterate the list allows adding more elements after
+ * the current while iterating. The process stops when we reach the end of the
+ * list. Found roots are added to the roots list.
+ *
+ * returns 0 on success, < 0 on error.
+ */
+int btrfs_find_all_roots(struct btrfs_trans_handle *trans,
+                               struct btrfs_fs_info *fs_info, u64 bytenr,
+                               u64 num_bytes, u64 seq, struct ulist **roots)
+{
+       struct ulist *tmp;
+       struct ulist_node *node = NULL;
+       int ret;
+
+       tmp = ulist_alloc(GFP_NOFS);
+       if (!tmp)
+               return -ENOMEM;
+       *roots = ulist_alloc(GFP_NOFS);
+       if (!*roots) {
+               ulist_free(tmp);
+               return -ENOMEM;
+       }
+
+       while (1) {
+               ret = find_parent_nodes(trans, fs_info, bytenr, seq,
+                                       tmp, *roots);
+               if (ret < 0 && ret != -ENOENT) {
+                       ulist_free(tmp);
+                       ulist_free(*roots);
+                       return ret;
+               }
+               node = ulist_next(tmp, node);
+               if (!node)
+                       break;
+               bytenr = node->val;
+       }
+
+       ulist_free(tmp);
+       return 0;
+}
+
 
 static int __inode_info(u64 inum, u64 ioff, u8 key_type,
                        struct btrfs_root *fs_root, struct btrfs_path *path,
@@ -181,8 +952,11 @@ int extent_from_logical(struct btrfs_fs_info *fs_info, u64 logical,
        btrfs_item_key_to_cpu(path->nodes[0], found_key, path->slots[0]);
        if (found_key->type != BTRFS_EXTENT_ITEM_KEY ||
            found_key->objectid > logical ||
-           found_key->objectid + found_key->offset <= logical)
+           found_key->objectid + found_key->offset <= logical) {
+               pr_debug("logical %llu is not within any extent\n",
+                        (unsigned long long)logical);
                return -ENOENT;
+       }
 
        eb = path->nodes[0];
        item_size = btrfs_item_size_nr(eb, path->slots[0]);
@@ -191,6 +965,13 @@ int extent_from_logical(struct btrfs_fs_info *fs_info, u64 logical,
        ei = btrfs_item_ptr(eb, path->slots[0], struct btrfs_extent_item);
        flags = btrfs_extent_flags(eb, ei);
 
+       pr_debug("logical %llu is at position %llu within the extent (%llu "
+                "EXTENT_ITEM %llu) flags %#llx size %u\n",
+                (unsigned long long)logical,
+                (unsigned long long)(logical - found_key->objectid),
+                (unsigned long long)found_key->objectid,
+                (unsigned long long)found_key->offset,
+                (unsigned long long)flags, item_size);
        if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
                return BTRFS_EXTENT_FLAG_TREE_BLOCK;
        if (flags & BTRFS_EXTENT_FLAG_DATA)
@@ -287,128 +1068,11 @@ int tree_backref_for_extent(unsigned long *ptr, struct extent_buffer *eb,
        return 0;
 }
 
-static int __data_list_add(struct list_head *head, u64 inum,
-                               u64 extent_data_item_offset, u64 root)
-{
-       struct __data_ref *ref;
-
-       ref = kmalloc(sizeof(*ref), GFP_NOFS);
-       if (!ref)
-               return -ENOMEM;
-
-       ref->inum = inum;
-       ref->extent_data_item_offset = extent_data_item_offset;
-       ref->root = root;
-       list_add_tail(&ref->list, head);
-
-       return 0;
-}
-
-static int __data_list_add_eb(struct list_head *head, struct extent_buffer *eb,
-                               struct btrfs_extent_data_ref *dref)
-{
-       return __data_list_add(head, btrfs_extent_data_ref_objectid(eb, dref),
-                               btrfs_extent_data_ref_offset(eb, dref),
-                               btrfs_extent_data_ref_root(eb, dref));
-}
-
-static int __shared_list_add(struct list_head *head, u64 disk_byte)
-{
-       struct __shared_ref *ref;
-
-       ref = kmalloc(sizeof(*ref), GFP_NOFS);
-       if (!ref)
-               return -ENOMEM;
-
-       ref->disk_byte = disk_byte;
-       list_add_tail(&ref->list, head);
-
-       return 0;
-}
-
-static int __iter_shared_inline_ref_inodes(struct btrfs_fs_info *fs_info,
-                                          u64 logical, u64 inum,
-                                          u64 extent_data_item_offset,
-                                          u64 extent_offset,
-                                          struct btrfs_path *path,
-                                          struct list_head *data_refs,
-                                          iterate_extent_inodes_t *iterate,
-                                          void *ctx)
-{
-       u64 ref_root;
-       u32 item_size;
-       struct btrfs_key key;
-       struct extent_buffer *eb;
-       struct btrfs_extent_item *ei;
-       struct btrfs_extent_inline_ref *eiref;
-       struct __data_ref *ref;
-       int ret;
-       int type;
-       int last;
-       unsigned long ptr = 0;
-
-       WARN_ON(!list_empty(data_refs));
-       ret = extent_from_logical(fs_info, logical, path, &key);
-       if (ret & BTRFS_EXTENT_FLAG_DATA)
-               ret = -EIO;
-       if (ret < 0)
-               goto out;
-
-       eb = path->nodes[0];
-       ei = btrfs_item_ptr(eb, path->slots[0], struct btrfs_extent_item);
-       item_size = btrfs_item_size_nr(eb, path->slots[0]);
-
-       ret = 0;
-       ref_root = 0;
-       /*
-        * as done in iterate_extent_inodes, we first build a list of refs to
-        * iterate, then free the path and then iterate them to avoid deadlocks.
-        */
-       do {
-               last = __get_extent_inline_ref(&ptr, eb, ei, item_size,
-                                               &eiref, &type);
-               if (last < 0) {
-                       ret = last;
-                       goto out;
-               }
-               if (type == BTRFS_TREE_BLOCK_REF_KEY ||
-                   type == BTRFS_SHARED_BLOCK_REF_KEY) {
-                       ref_root = btrfs_extent_inline_ref_offset(eb, eiref);
-                       ret = __data_list_add(data_refs, inum,
-                                               extent_data_item_offset,
-                                               ref_root);
-               }
-       } while (!ret && !last);
-
-       btrfs_release_path(path);
-
-       if (ref_root == 0) {
-               printk(KERN_ERR "btrfs: failed to find tree block ref "
-                       "for shared data backref %llu\n", logical);
-               WARN_ON(1);
-               ret = -EIO;
-       }
-
-out:
-       while (!list_empty(data_refs)) {
-               ref = list_first_entry(data_refs, struct __data_ref, list);
-               list_del(&ref->list);
-               if (!ret)
-                       ret = iterate(ref->inum, extent_offset +
-                                       ref->extent_data_item_offset,
-                                       ref->root, ctx);
-               kfree(ref);
-       }
-
-       return ret;
-}
-
-static int __iter_shared_inline_ref(struct btrfs_fs_info *fs_info,
-                                   u64 logical, u64 orig_extent_item_objectid,
-                                   u64 extent_offset, struct btrfs_path *path,
-                                   struct list_head *data_refs,
-                                   iterate_extent_inodes_t *iterate,
-                                   void *ctx)
+static int iterate_leaf_refs(struct btrfs_fs_info *fs_info,
+                               struct btrfs_path *path, u64 logical,
+                               u64 orig_extent_item_objectid,
+                               u64 extent_item_pos, u64 root,
+                               iterate_extent_inodes_t *iterate, void *ctx)
 {
        u64 disk_byte;
        struct btrfs_key key;
@@ -416,8 +1080,10 @@ static int __iter_shared_inline_ref(struct btrfs_fs_info *fs_info,
        struct extent_buffer *eb;
        int slot;
        int nritems;
-       int ret;
-       int found = 0;
+       int ret = 0;
+       int extent_type;
+       u64 data_offset;
+       u64 data_len;
 
        eb = read_tree_block(fs_info->tree_root, logical,
                                fs_info->tree_root->leafsize, 0);
@@ -435,149 +1101,99 @@ static int __iter_shared_inline_ref(struct btrfs_fs_info *fs_info,
                if (key.type != BTRFS_EXTENT_DATA_KEY)
                        continue;
                fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
-               if (!fi) {
-                       free_extent_buffer(eb);
-                       return -EIO;
-               }
+               extent_type = btrfs_file_extent_type(eb, fi);
+               if (extent_type == BTRFS_FILE_EXTENT_INLINE)
+                       continue;
+               /* don't skip BTRFS_FILE_EXTENT_PREALLOC, we can handle that */
                disk_byte = btrfs_file_extent_disk_bytenr(eb, fi);
-               if (disk_byte != orig_extent_item_objectid) {
-                       if (found)
-                               break;
-                       else
-                               continue;
-               }
-               ++found;
-               ret = __iter_shared_inline_ref_inodes(fs_info, logical,
-                                                       key.objectid,
-                                                       key.offset,
-                                                       extent_offset, path,
-                                                       data_refs,
-                                                       iterate, ctx);
-               if (ret)
-                       break;
-       }
+               if (disk_byte != orig_extent_item_objectid)
+                       continue;
 
-       if (!found) {
-               printk(KERN_ERR "btrfs: failed to follow shared data backref "
-                       "to parent %llu\n", logical);
-               WARN_ON(1);
-               ret = -EIO;
+               data_offset = btrfs_file_extent_offset(eb, fi);
+               data_len = btrfs_file_extent_num_bytes(eb, fi);
+
+               if (extent_item_pos < data_offset ||
+                   extent_item_pos >= data_offset + data_len)
+                       continue;
+
+               pr_debug("ref for %llu resolved, key (%llu EXTEND_DATA %llu), "
+                               "root %llu\n", orig_extent_item_objectid,
+                               key.objectid, key.offset, root);
+               ret = iterate(key.objectid,
+                               key.offset + (extent_item_pos - data_offset),
+                               root, ctx);
+               if (ret) {
+                       pr_debug("stopping iteration because ret=%d\n", ret);
+                       break;
+               }
        }
 
        free_extent_buffer(eb);
+
        return ret;
 }
 
 /*
  * calls iterate() for every inode that references the extent identified by
- * the given parameters. will use the path given as a parameter and return it
- * released.
+ * the given parameters.
  * when the iterator function returns a non-zero value, iteration stops.
+ * path is guaranteed to be in released state when iterate() is called.
  */
 int iterate_extent_inodes(struct btrfs_fs_info *fs_info,
                                struct btrfs_path *path,
-                               u64 extent_item_objectid,
-                               u64 extent_offset,
+                               u64 extent_item_objectid, u64 extent_item_pos,
                                iterate_extent_inodes_t *iterate, void *ctx)
 {
-       unsigned long ptr = 0;
-       int last;
        int ret;
-       int type;
-       u64 logical;
-       u32 item_size;
-       struct btrfs_extent_inline_ref *eiref;
-       struct btrfs_extent_data_ref *dref;
-       struct extent_buffer *eb;
-       struct btrfs_extent_item *ei;
-       struct btrfs_key key;
        struct list_head data_refs = LIST_HEAD_INIT(data_refs);
        struct list_head shared_refs = LIST_HEAD_INIT(shared_refs);
-       struct __data_ref *ref_d;
-       struct __shared_ref *ref_s;
-
-       eb = path->nodes[0];
-       ei = btrfs_item_ptr(eb, path->slots[0], struct btrfs_extent_item);
-       item_size = btrfs_item_size_nr(eb, path->slots[0]);
-
-       /* first we iterate the inline refs, ... */
-       do {
-               last = __get_extent_inline_ref(&ptr, eb, ei, item_size,
-                                               &eiref, &type);
-               if (last == -ENOENT) {
-                       ret = 0;
-                       break;
-               }
-               if (last < 0) {
-                       ret = last;
-                       break;
-               }
+       struct btrfs_trans_handle *trans;
+       struct ulist *refs;
+       struct ulist *roots;
+       struct ulist_node *ref_node = NULL;
+       struct ulist_node *root_node = NULL;
+       struct seq_list seq_elem;
+       struct btrfs_delayed_ref_root *delayed_refs;
+
+       trans = btrfs_join_transaction(fs_info->extent_root);
+       if (IS_ERR(trans))
+               return PTR_ERR(trans);
+
+       pr_debug("resolving all inodes for extent %llu\n",
+                       extent_item_objectid);
+
+       delayed_refs = &trans->transaction->delayed_refs;
+       spin_lock(&delayed_refs->lock);
+       btrfs_get_delayed_seq(delayed_refs, &seq_elem);
+       spin_unlock(&delayed_refs->lock);
+
+       ret = btrfs_find_all_leafs(trans, fs_info, extent_item_objectid,
+                                  extent_item_pos, seq_elem.seq,
+                                  &refs);
 
-               if (type == BTRFS_EXTENT_DATA_REF_KEY) {
-                       dref = (struct btrfs_extent_data_ref *)(&eiref->offset);
-                       ret = __data_list_add_eb(&data_refs, eb, dref);
-               } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
-                       logical = btrfs_extent_inline_ref_offset(eb, eiref);
-                       ret = __shared_list_add(&shared_refs, logical);
-               }
-       } while (!ret && !last);
+       if (ret)
+               goto out;
 
-       /* ... then we proceed to in-tree references and ... */
-       while (!ret) {
-               ++path->slots[0];
-               if (path->slots[0] > btrfs_header_nritems(eb)) {
-                       ret = btrfs_next_leaf(fs_info->extent_root, path);
-                       if (ret) {
-                               if (ret == 1)
-                                       ret = 0; /* we're done */
-                               break;
-                       }
-                       eb = path->nodes[0];
-               }
-               btrfs_item_key_to_cpu(eb, &key, path->slots[0]);
-               if (key.objectid != extent_item_objectid)
+       while (!ret && (ref_node = ulist_next(refs, ref_node))) {
+               ret = btrfs_find_all_roots(trans, fs_info, ref_node->val, -1,
+                                               seq_elem.seq, &roots);
+               if (ret)
                        break;
-               if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
-                       dref = btrfs_item_ptr(eb, path->slots[0],
-                                               struct btrfs_extent_data_ref);
-                       ret = __data_list_add_eb(&data_refs, eb, dref);
-               } else if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
-                       ret = __shared_list_add(&shared_refs, key.offset);
+               while (!ret && (root_node = ulist_next(roots, root_node))) {
+                       pr_debug("root %llu references leaf %llu\n",
+                                       root_node->val, ref_node->val);
+                       ret = iterate_leaf_refs(fs_info, path, ref_node->val,
+                                               extent_item_objectid,
+                                               extent_item_pos, root_node->val,
+                                               iterate, ctx);
                }
        }
 
-       btrfs_release_path(path);
-
-       /*
-        * ... only at the very end we can process the refs we found. this is
-        * because the iterator function we call is allowed to make tree lookups
-        * and we have to avoid deadlocks. additionally, we need more tree
-        * lookups ourselves for shared data refs.
-        */
-       while (!list_empty(&data_refs)) {
-               ref_d = list_first_entry(&data_refs, struct __data_ref, list);
-               list_del(&ref_d->list);
-               if (!ret)
-                       ret = iterate(ref_d->inum, extent_offset +
-                                       ref_d->extent_data_item_offset,
-                                       ref_d->root, ctx);
-               kfree(ref_d);
-       }
-
-       while (!list_empty(&shared_refs)) {
-               ref_s = list_first_entry(&shared_refs, struct __shared_ref,
-                                       list);
-               list_del(&ref_s->list);
-               if (!ret)
-                       ret = __iter_shared_inline_ref(fs_info,
-                                                       ref_s->disk_byte,
-                                                       extent_item_objectid,
-                                                       extent_offset, path,
-                                                       &data_refs,
-                                                       iterate, ctx);
-               kfree(ref_s);
-       }
-
+       ulist_free(refs);
+       ulist_free(roots);
+out:
+       btrfs_put_delayed_seq(delayed_refs, &seq_elem);
+       btrfs_end_transaction(trans, fs_info->extent_root);
        return ret;
 }
 
@@ -586,19 +1202,20 @@ int iterate_inodes_from_logical(u64 logical, struct btrfs_fs_info *fs_info,
                                iterate_extent_inodes_t *iterate, void *ctx)
 {
        int ret;
-       u64 offset;
+       u64 extent_item_pos;
        struct btrfs_key found_key;
 
        ret = extent_from_logical(fs_info, logical, path,
                                        &found_key);
+       btrfs_release_path(path);
        if (ret & BTRFS_EXTENT_FLAG_TREE_BLOCK)
                ret = -EINVAL;
        if (ret < 0)
                return ret;
 
-       offset = logical - found_key.objectid;
+       extent_item_pos = logical - found_key.objectid;
        ret = iterate_extent_inodes(fs_info, path, found_key.objectid,
-                                       offset, iterate, ctx);
+                                       extent_item_pos, iterate, ctx);
 
        return ret;
 }
@@ -643,6 +1260,10 @@ static int iterate_irefs(u64 inum, struct btrfs_root *fs_root,
                for (cur = 0; cur < btrfs_item_size(eb, item); cur += len) {
                        name_len = btrfs_inode_ref_name_len(eb, iref);
                        /* path must be released before calling iterate()! */
+                       pr_debug("following ref at offset %u for inode %llu in "
+                                "tree %llu\n", cur,
+                                (unsigned long long)found_key.objectid,
+                                (unsigned long long)fs_root->objectid);
                        ret = iterate(parent, iref, eb, ctx);
                        if (ret) {
                                free_extent_buffer(eb);
@@ -683,10 +1304,14 @@ static int inode_to_path(u64 inum, struct btrfs_inode_ref *iref,
                return PTR_ERR(fspath);
 
        if (fspath > fspath_min) {
+               pr_debug("path resolved: %s\n", fspath);
                ipath->fspath->val[i] = (u64)(unsigned long)fspath;
                ++ipath->fspath->elem_cnt;
                ipath->fspath->bytes_left = fspath - fspath_min;
        } else {
+               pr_debug("missed path, not enough space. missing bytes: %lu, "
+                        "constructed so far: %s\n",
+                        (unsigned long)(fspath_min - fspath), fspath_min);
                ++ipath->fspath->elem_missed;
                ipath->fspath->bytes_missing += fspath_min - fspath;
                ipath->fspath->bytes_left = 0;
index 92618837cb8f94a3a799ea3d08093d9b33aab336..d00dfa9ca9342c96f5057af09fb06418cd943cb6 100644 (file)
@@ -20,6 +20,7 @@
 #define __BTRFS_BACKREF__
 
 #include "ioctl.h"
+#include "ulist.h"
 
 struct inode_fs_paths {
        struct btrfs_path               *btrfs_path;
@@ -54,6 +55,10 @@ int iterate_inodes_from_logical(u64 logical, struct btrfs_fs_info *fs_info,
 
 int paths_from_inode(u64 inum, struct inode_fs_paths *ipath);
 
+int btrfs_find_all_roots(struct btrfs_trans_handle *trans,
+                               struct btrfs_fs_info *fs_info, u64 bytenr,
+                               u64 num_bytes, u64 seq, struct ulist **roots);
+
 struct btrfs_data_container *init_data_container(u32 total_bytes);
 struct inode_fs_paths *init_ipath(s32 total_bytes, struct btrfs_root *fs_root,
                                        struct btrfs_path *path);
index dede441bdeee2678225187bece170710abe1a9b4..0639a555e16ed1975702ed5509dc9bc1c4dbf490 100644 (file)
@@ -240,7 +240,7 @@ int btrfs_copy_root(struct btrfs_trans_handle *trans,
 
        cow = btrfs_alloc_free_block(trans, root, buf->len, 0,
                                     new_root_objectid, &disk_key, level,
-                                    buf->start, 0);
+                                    buf->start, 0, 1);
        if (IS_ERR(cow))
                return PTR_ERR(cow);
 
@@ -261,9 +261,9 @@ int btrfs_copy_root(struct btrfs_trans_handle *trans,
 
        WARN_ON(btrfs_header_generation(buf) > trans->transid);
        if (new_root_objectid == BTRFS_TREE_RELOC_OBJECTID)
-               ret = btrfs_inc_ref(trans, root, cow, 1);
+               ret = btrfs_inc_ref(trans, root, cow, 1, 1);
        else
-               ret = btrfs_inc_ref(trans, root, cow, 0);
+               ret = btrfs_inc_ref(trans, root, cow, 0, 1);
 
        if (ret)
                return ret;
@@ -350,14 +350,14 @@ static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans,
                if ((owner == root->root_key.objectid ||
                     root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) &&
                    !(flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)) {
-                       ret = btrfs_inc_ref(trans, root, buf, 1);
+                       ret = btrfs_inc_ref(trans, root, buf, 1, 1);
                        BUG_ON(ret);
 
                        if (root->root_key.objectid ==
                            BTRFS_TREE_RELOC_OBJECTID) {
-                               ret = btrfs_dec_ref(trans, root, buf, 0);
+                               ret = btrfs_dec_ref(trans, root, buf, 0, 1);
                                BUG_ON(ret);
-                               ret = btrfs_inc_ref(trans, root, cow, 1);
+                               ret = btrfs_inc_ref(trans, root, cow, 1, 1);
                                BUG_ON(ret);
                        }
                        new_flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
@@ -365,9 +365,9 @@ static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans,
 
                        if (root->root_key.objectid ==
                            BTRFS_TREE_RELOC_OBJECTID)
-                               ret = btrfs_inc_ref(trans, root, cow, 1);
+                               ret = btrfs_inc_ref(trans, root, cow, 1, 1);
                        else
-                               ret = btrfs_inc_ref(trans, root, cow, 0);
+                               ret = btrfs_inc_ref(trans, root, cow, 0, 1);
                        BUG_ON(ret);
                }
                if (new_flags != 0) {
@@ -381,11 +381,11 @@ static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans,
                if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
                        if (root->root_key.objectid ==
                            BTRFS_TREE_RELOC_OBJECTID)
-                               ret = btrfs_inc_ref(trans, root, cow, 1);
+                               ret = btrfs_inc_ref(trans, root, cow, 1, 1);
                        else
-                               ret = btrfs_inc_ref(trans, root, cow, 0);
+                               ret = btrfs_inc_ref(trans, root, cow, 0, 1);
                        BUG_ON(ret);
-                       ret = btrfs_dec_ref(trans, root, buf, 1);
+                       ret = btrfs_dec_ref(trans, root, buf, 1, 1);
                        BUG_ON(ret);
                }
                clean_tree_block(trans, root, buf);
@@ -446,7 +446,7 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
 
        cow = btrfs_alloc_free_block(trans, root, buf->len, parent_start,
                                     root->root_key.objectid, &disk_key,
-                                    level, search_start, empty_size);
+                                    level, search_start, empty_size, 1);
        if (IS_ERR(cow))
                return PTR_ERR(cow);
 
@@ -484,7 +484,7 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
                rcu_assign_pointer(root->node, cow);
 
                btrfs_free_tree_block(trans, root, buf, parent_start,
-                                     last_ref);
+                                     last_ref, 1);
                free_extent_buffer(buf);
                add_root_to_dirty_list(root);
        } else {
@@ -500,7 +500,7 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
                                              trans->transid);
                btrfs_mark_buffer_dirty(parent);
                btrfs_free_tree_block(trans, root, buf, parent_start,
-                                     last_ref);
+                                     last_ref, 1);
        }
        if (unlock_orig)
                btrfs_tree_unlock(buf);
@@ -957,7 +957,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
                free_extent_buffer(mid);
 
                root_sub_used(root, mid->len);
-               btrfs_free_tree_block(trans, root, mid, 0, 1);
+               btrfs_free_tree_block(trans, root, mid, 0, 1, 0);
                /* once for the root ptr */
                free_extent_buffer(mid);
                return 0;
@@ -1015,7 +1015,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
                        if (wret)
                                ret = wret;
                        root_sub_used(root, right->len);
-                       btrfs_free_tree_block(trans, root, right, 0, 1);
+                       btrfs_free_tree_block(trans, root, right, 0, 1, 0);
                        free_extent_buffer(right);
                        right = NULL;
                } else {
@@ -1055,7 +1055,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
                if (wret)
                        ret = wret;
                root_sub_used(root, mid->len);
-               btrfs_free_tree_block(trans, root, mid, 0, 1);
+               btrfs_free_tree_block(trans, root, mid, 0, 1, 0);
                free_extent_buffer(mid);
                mid = NULL;
        } else {
@@ -2089,7 +2089,7 @@ static noinline int insert_new_root(struct btrfs_trans_handle *trans,
 
        c = btrfs_alloc_free_block(trans, root, root->nodesize, 0,
                                   root->root_key.objectid, &lower_key,
-                                  level, root->node->start, 0);
+                                  level, root->node->start, 0, 0);
        if (IS_ERR(c))
                return PTR_ERR(c);
 
@@ -2216,7 +2216,7 @@ static noinline int split_node(struct btrfs_trans_handle *trans,
 
        split = btrfs_alloc_free_block(trans, root, root->nodesize, 0,
                                        root->root_key.objectid,
-                                       &disk_key, level, c->start, 0);
+                                       &disk_key, level, c->start, 0, 0);
        if (IS_ERR(split))
                return PTR_ERR(split);
 
@@ -2970,7 +2970,7 @@ again:
 
        right = btrfs_alloc_free_block(trans, root, root->leafsize, 0,
                                        root->root_key.objectid,
-                                       &disk_key, 0, l->start, 0);
+                                       &disk_key, 0, l->start, 0, 0);
        if (IS_ERR(right))
                return PTR_ERR(right);
 
@@ -3781,7 +3781,7 @@ static noinline int btrfs_del_leaf(struct btrfs_trans_handle *trans,
 
        root_sub_used(root, leaf->len);
 
-       btrfs_free_tree_block(trans, root, leaf, 0, 1);
+       btrfs_free_tree_block(trans, root, leaf, 0, 1, 0);
        return 0;
 }
 /*
index dfc136cc07d7e9c4193b80188b5f452b1183fd9d..b6d1020c4571870660998d61ea4cf34cfbaaca35 100644 (file)
@@ -2439,11 +2439,11 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
                                        struct btrfs_root *root, u32 blocksize,
                                        u64 parent, u64 root_objectid,
                                        struct btrfs_disk_key *key, int level,
-                                       u64 hint, u64 empty_size);
+                                       u64 hint, u64 empty_size, int for_cow);
 void btrfs_free_tree_block(struct btrfs_trans_handle *trans,
                           struct btrfs_root *root,
                           struct extent_buffer *buf,
-                          u64 parent, int last_ref);
+                          u64 parent, int last_ref, int for_cow);
 struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans,
                                            struct btrfs_root *root,
                                            u64 bytenr, u32 blocksize,
@@ -2463,17 +2463,17 @@ int btrfs_reserve_extent(struct btrfs_trans_handle *trans,
                                  u64 search_end, struct btrfs_key *ins,
                                  u64 data);
 int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
-                 struct extent_buffer *buf, int full_backref);
+                 struct extent_buffer *buf, int full_backref, int for_cow);
 int btrfs_dec_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
-                 struct extent_buffer *buf, int full_backref);
+                 struct extent_buffer *buf, int full_backref, int for_cow);
 int btrfs_set_disk_extent_flags(struct btrfs_trans_handle *trans,
                                struct btrfs_root *root,
                                u64 bytenr, u64 num_bytes, u64 flags,
                                int is_data);
 int btrfs_free_extent(struct btrfs_trans_handle *trans,
                      struct btrfs_root *root,
-                     u64 bytenr, u64 num_bytes, u64 parent,
-                     u64 root_objectid, u64 owner, u64 offset);
+                     u64 bytenr, u64 num_bytes, u64 parent, u64 root_objectid,
+                     u64 owner, u64 offset, int for_cow);
 
 int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len);
 int btrfs_free_and_pin_reserved_extent(struct btrfs_root *root,
@@ -2485,7 +2485,7 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
 int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
                         struct btrfs_root *root,
                         u64 bytenr, u64 num_bytes, u64 parent,
-                        u64 root_objectid, u64 owner, u64 offset);
+                        u64 root_objectid, u64 owner, u64 offset, int for_cow);
 
 int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
                                    struct btrfs_root *root);
@@ -2644,10 +2644,18 @@ static inline int btrfs_insert_empty_item(struct btrfs_trans_handle *trans,
 }
 
 int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path);
+static inline int btrfs_next_item(struct btrfs_root *root, struct btrfs_path *p)
+{
+       ++p->slots[0];
+       if (p->slots[0] >= btrfs_header_nritems(p->nodes[0]))
+               return btrfs_next_leaf(root, p);
+       return 0;
+}
 int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path);
 int btrfs_leaf_free_space(struct btrfs_root *root, struct extent_buffer *leaf);
 void btrfs_drop_snapshot(struct btrfs_root *root,
-                        struct btrfs_block_rsv *block_rsv, int update_ref);
+                        struct btrfs_block_rsv *block_rsv, int update_ref,
+                        int for_reloc);
 int btrfs_drop_subtree(struct btrfs_trans_handle *trans,
                        struct btrfs_root *root,
                        struct extent_buffer *node,
index 125cf76fcd086803d35bd257bdb54168486e0e0b..66e4f29505a33dbecd45b5d6a80e878c87818bc0 100644 (file)
@@ -101,6 +101,11 @@ static int comp_entry(struct btrfs_delayed_ref_node *ref2,
                return -1;
        if (ref1->type > ref2->type)
                return 1;
+       /* merging of sequenced refs is not allowed */
+       if (ref1->seq < ref2->seq)
+               return -1;
+       if (ref1->seq > ref2->seq)
+               return 1;
        if (ref1->type == BTRFS_TREE_BLOCK_REF_KEY ||
            ref1->type == BTRFS_SHARED_BLOCK_REF_KEY) {
                return comp_tree_refs(btrfs_delayed_node_to_tree_ref(ref2),
@@ -150,16 +155,22 @@ static struct btrfs_delayed_ref_node *tree_insert(struct rb_root *root,
 
 /*
  * find an head entry based on bytenr. This returns the delayed ref
- * head if it was able to find one, or NULL if nothing was in that spot
+ * head if it was able to find one, or NULL if nothing was in that spot.
+ * If return_bigger is given, the next bigger entry is returned if no exact
+ * match is found.
  */
 static struct btrfs_delayed_ref_node *find_ref_head(struct rb_root *root,
                                  u64 bytenr,
-                                 struct btrfs_delayed_ref_node **last)
+                                 struct btrfs_delayed_ref_node **last,
+                                 int return_bigger)
 {
-       struct rb_node *n = root->rb_node;
+       struct rb_node *n;
        struct btrfs_delayed_ref_node *entry;
-       int cmp;
+       int cmp = 0;
 
+again:
+       n = root->rb_node;
+       entry = NULL;
        while (n) {
                entry = rb_entry(n, struct btrfs_delayed_ref_node, rb_node);
                WARN_ON(!entry->in_tree);
@@ -182,6 +193,19 @@ static struct btrfs_delayed_ref_node *find_ref_head(struct rb_root *root,
                else
                        return entry;
        }
+       if (entry && return_bigger) {
+               if (cmp > 0) {
+                       n = rb_next(&entry->rb_node);
+                       if (!n)
+                               n = rb_first(root);
+                       entry = rb_entry(n, struct btrfs_delayed_ref_node,
+                                        rb_node);
+                       bytenr = entry->bytenr;
+                       return_bigger = 0;
+                       goto again;
+               }
+               return entry;
+       }
        return NULL;
 }
 
@@ -209,6 +233,24 @@ int btrfs_delayed_ref_lock(struct btrfs_trans_handle *trans,
        return 0;
 }
 
+int btrfs_check_delayed_seq(struct btrfs_delayed_ref_root *delayed_refs,
+                           u64 seq)
+{
+       struct seq_list *elem;
+
+       assert_spin_locked(&delayed_refs->lock);
+       if (list_empty(&delayed_refs->seq_head))
+               return 0;
+
+       elem = list_first_entry(&delayed_refs->seq_head, struct seq_list, list);
+       if (seq >= elem->seq) {
+               pr_debug("holding back delayed_ref %llu, lowest is %llu (%p)\n",
+                        seq, elem->seq, delayed_refs);
+               return 1;
+       }
+       return 0;
+}
+
 int btrfs_find_ref_cluster(struct btrfs_trans_handle *trans,
                           struct list_head *cluster, u64 start)
 {
@@ -223,20 +265,8 @@ int btrfs_find_ref_cluster(struct btrfs_trans_handle *trans,
                node = rb_first(&delayed_refs->root);
        } else {
                ref = NULL;
-               find_ref_head(&delayed_refs->root, start, &ref);
+               find_ref_head(&delayed_refs->root, start + 1, &ref, 1);
                if (ref) {
-                       struct btrfs_delayed_ref_node *tmp;
-
-                       node = rb_prev(&ref->rb_node);
-                       while (node) {
-                               tmp = rb_entry(node,
-                                              struct btrfs_delayed_ref_node,
-                                              rb_node);
-                               if (tmp->bytenr < start)
-                                       break;
-                               ref = tmp;
-                               node = rb_prev(&ref->rb_node);
-                       }
                        node = &ref->rb_node;
                } else
                        node = rb_first(&delayed_refs->root);
@@ -390,7 +420,8 @@ update_existing_head_ref(struct btrfs_delayed_ref_node *existing,
  * this does all the dirty work in terms of maintaining the correct
  * overall modification count.
  */
-static noinline int add_delayed_ref_head(struct btrfs_trans_handle *trans,
+static noinline int add_delayed_ref_head(struct btrfs_fs_info *fs_info,
+                                       struct btrfs_trans_handle *trans,
                                        struct btrfs_delayed_ref_node *ref,
                                        u64 bytenr, u64 num_bytes,
                                        int action, int is_data)
@@ -437,6 +468,7 @@ static noinline int add_delayed_ref_head(struct btrfs_trans_handle *trans,
        ref->action  = 0;
        ref->is_head = 1;
        ref->in_tree = 1;
+       ref->seq = 0;
 
        head_ref = btrfs_delayed_node_to_head(ref);
        head_ref->must_insert_reserved = must_insert_reserved;
@@ -468,14 +500,17 @@ static noinline int add_delayed_ref_head(struct btrfs_trans_handle *trans,
 /*
  * helper to insert a delayed tree ref into the rbtree.
  */
-static noinline int add_delayed_tree_ref(struct btrfs_trans_handle *trans,
+static noinline int add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
+                                        struct btrfs_trans_handle *trans,
                                         struct btrfs_delayed_ref_node *ref,
                                         u64 bytenr, u64 num_bytes, u64 parent,
-                                        u64 ref_root, int level, int action)
+                                        u64 ref_root, int level, int action,
+                                        int for_cow)
 {
        struct btrfs_delayed_ref_node *existing;
        struct btrfs_delayed_tree_ref *full_ref;
        struct btrfs_delayed_ref_root *delayed_refs;
+       u64 seq = 0;
 
        if (action == BTRFS_ADD_DELAYED_EXTENT)
                action = BTRFS_ADD_DELAYED_REF;
@@ -491,14 +526,17 @@ static noinline int add_delayed_tree_ref(struct btrfs_trans_handle *trans,
        ref->is_head = 0;
        ref->in_tree = 1;
 
+       if (need_ref_seq(for_cow, ref_root))
+               seq = inc_delayed_seq(delayed_refs);
+       ref->seq = seq;
+
        full_ref = btrfs_delayed_node_to_tree_ref(ref);
-       if (parent) {
-               full_ref->parent = parent;
+       full_ref->parent = parent;
+       full_ref->root = ref_root;
+       if (parent)
                ref->type = BTRFS_SHARED_BLOCK_REF_KEY;
-       } else {
-               full_ref->root = ref_root;
+       else
                ref->type = BTRFS_TREE_BLOCK_REF_KEY;
-       }
        full_ref->level = level;
 
        trace_btrfs_delayed_tree_ref(ref, full_ref, action);
@@ -522,15 +560,17 @@ static noinline int add_delayed_tree_ref(struct btrfs_trans_handle *trans,
 /*
  * helper to insert a delayed data ref into the rbtree.
  */
-static noinline int add_delayed_data_ref(struct btrfs_trans_handle *trans,
+static noinline int add_delayed_data_ref(struct btrfs_fs_info *fs_info,
+                                        struct btrfs_trans_handle *trans,
                                         struct btrfs_delayed_ref_node *ref,
                                         u64 bytenr, u64 num_bytes, u64 parent,
                                         u64 ref_root, u64 owner, u64 offset,
-                                        int action)
+                                        int action, int for_cow)
 {
        struct btrfs_delayed_ref_node *existing;
        struct btrfs_delayed_data_ref *full_ref;
        struct btrfs_delayed_ref_root *delayed_refs;
+       u64 seq = 0;
 
        if (action == BTRFS_ADD_DELAYED_EXTENT)
                action = BTRFS_ADD_DELAYED_REF;
@@ -546,14 +586,18 @@ static noinline int add_delayed_data_ref(struct btrfs_trans_handle *trans,
        ref->is_head = 0;
        ref->in_tree = 1;
 
+       if (need_ref_seq(for_cow, ref_root))
+               seq = inc_delayed_seq(delayed_refs);
+       ref->seq = seq;
+
        full_ref = btrfs_delayed_node_to_data_ref(ref);
-       if (parent) {
-               full_ref->parent = parent;
+       full_ref->parent = parent;
+       full_ref->root = ref_root;
+       if (parent)
                ref->type = BTRFS_SHARED_DATA_REF_KEY;
-       } else {
-               full_ref->root = ref_root;
+       else
                ref->type = BTRFS_EXTENT_DATA_REF_KEY;
-       }
+
        full_ref->objectid = owner;
        full_ref->offset = offset;
 
@@ -580,10 +624,12 @@ static noinline int add_delayed_data_ref(struct btrfs_trans_handle *trans,
  * to make sure the delayed ref is eventually processed before this
  * transaction commits.
  */
-int btrfs_add_delayed_tree_ref(struct btrfs_trans_handle *trans,
+int btrfs_add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
+                              struct btrfs_trans_handle *trans,
                               u64 bytenr, u64 num_bytes, u64 parent,
                               u64 ref_root,  int level, int action,
-                              struct btrfs_delayed_extent_op *extent_op)
+                              struct btrfs_delayed_extent_op *extent_op,
+                              int for_cow)
 {
        struct btrfs_delayed_tree_ref *ref;
        struct btrfs_delayed_ref_head *head_ref;
@@ -610,13 +656,17 @@ int btrfs_add_delayed_tree_ref(struct btrfs_trans_handle *trans,
         * insert both the head node and the new ref without dropping
         * the spin lock
         */
-       ret = add_delayed_ref_head(trans, &head_ref->node, bytenr, num_bytes,
-                                  action, 0);
+       ret = add_delayed_ref_head(fs_info, trans, &head_ref->node, bytenr,
+                                  num_bytes, action, 0);
        BUG_ON(ret);
 
-       ret = add_delayed_tree_ref(trans, &ref->node, bytenr, num_bytes,
-                                  parent, ref_root, level, action);
+       ret = add_delayed_tree_ref(fs_info, trans, &ref->node, bytenr,
+                                  num_bytes, parent, ref_root, level, action,
+                                  for_cow);
        BUG_ON(ret);
+       if (!need_ref_seq(for_cow, ref_root) &&
+           waitqueue_active(&delayed_refs->seq_wait))
+               wake_up(&delayed_refs->seq_wait);
        spin_unlock(&delayed_refs->lock);
        return 0;
 }
@@ -624,11 +674,13 @@ int btrfs_add_delayed_tree_ref(struct btrfs_trans_handle *trans,
 /*
  * add a delayed data ref. it's similar to btrfs_add_delayed_tree_ref.
  */
-int btrfs_add_delayed_data_ref(struct btrfs_trans_handle *trans,
+int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info,
+                              struct btrfs_trans_handle *trans,
                               u64 bytenr, u64 num_bytes,
                               u64 parent, u64 ref_root,
                               u64 owner, u64 offset, int action,
-                              struct btrfs_delayed_extent_op *extent_op)
+                              struct btrfs_delayed_extent_op *extent_op,
+                              int for_cow)
 {
        struct btrfs_delayed_data_ref *ref;
        struct btrfs_delayed_ref_head *head_ref;
@@ -655,18 +707,23 @@ int btrfs_add_delayed_data_ref(struct btrfs_trans_handle *trans,
         * insert both the head node and the new ref without dropping
         * the spin lock
         */
-       ret = add_delayed_ref_head(trans, &head_ref->node, bytenr, num_bytes,
-                                  action, 1);
+       ret = add_delayed_ref_head(fs_info, trans, &head_ref->node, bytenr,
+                                  num_bytes, action, 1);
        BUG_ON(ret);
 
-       ret = add_delayed_data_ref(trans, &ref->node, bytenr, num_bytes,
-                                  parent, ref_root, owner, offset, action);
+       ret = add_delayed_data_ref(fs_info, trans, &ref->node, bytenr,
+                                  num_bytes, parent, ref_root, owner, offset,
+                                  action, for_cow);
        BUG_ON(ret);
+       if (!need_ref_seq(for_cow, ref_root) &&
+           waitqueue_active(&delayed_refs->seq_wait))
+               wake_up(&delayed_refs->seq_wait);
        spin_unlock(&delayed_refs->lock);
        return 0;
 }
 
-int btrfs_add_delayed_extent_op(struct btrfs_trans_handle *trans,
+int btrfs_add_delayed_extent_op(struct btrfs_fs_info *fs_info,
+                               struct btrfs_trans_handle *trans,
                                u64 bytenr, u64 num_bytes,
                                struct btrfs_delayed_extent_op *extent_op)
 {
@@ -683,11 +740,13 @@ int btrfs_add_delayed_extent_op(struct btrfs_trans_handle *trans,
        delayed_refs = &trans->transaction->delayed_refs;
        spin_lock(&delayed_refs->lock);
 
-       ret = add_delayed_ref_head(trans, &head_ref->node, bytenr,
+       ret = add_delayed_ref_head(fs_info, trans, &head_ref->node, bytenr,
                                   num_bytes, BTRFS_UPDATE_DELAYED_HEAD,
                                   extent_op->is_data);
        BUG_ON(ret);
 
+       if (waitqueue_active(&delayed_refs->seq_wait))
+               wake_up(&delayed_refs->seq_wait);
        spin_unlock(&delayed_refs->lock);
        return 0;
 }
@@ -704,7 +763,7 @@ btrfs_find_delayed_ref_head(struct btrfs_trans_handle *trans, u64 bytenr)
        struct btrfs_delayed_ref_root *delayed_refs;
 
        delayed_refs = &trans->transaction->delayed_refs;
-       ref = find_ref_head(&delayed_refs->root, bytenr, NULL);
+       ref = find_ref_head(&delayed_refs->root, bytenr, NULL, 0);
        if (ref)
                return btrfs_delayed_node_to_head(ref);
        return NULL;
index e287e3b0eab0d970d37f0f4c70fd688b22f276ac..d8f244d9492511e3b108b26bcf4da1bc9fbf6826 100644 (file)
@@ -33,6 +33,9 @@ struct btrfs_delayed_ref_node {
        /* the size of the extent */
        u64 num_bytes;
 
+       /* seq number to keep track of insertion order */
+       u64 seq;
+
        /* ref count on this data structure */
        atomic_t refs;
 
@@ -98,19 +101,15 @@ struct btrfs_delayed_ref_head {
 
 struct btrfs_delayed_tree_ref {
        struct btrfs_delayed_ref_node node;
-       union {
-               u64 root;
-               u64 parent;
-       };
+       u64 root;
+       u64 parent;
        int level;
 };
 
 struct btrfs_delayed_data_ref {
        struct btrfs_delayed_ref_node node;
-       union {
-               u64 root;
-               u64 parent;
-       };
+       u64 root;
+       u64 parent;
        u64 objectid;
        u64 offset;
 };
@@ -140,6 +139,26 @@ struct btrfs_delayed_ref_root {
        int flushing;
 
        u64 run_delayed_start;
+
+       /*
+        * seq number of delayed refs. We need to know if a backref was being
+        * added before the currently processed ref or afterwards.
+        */
+       u64 seq;
+
+       /*
+        * seq_list holds a list of all seq numbers that are currently being
+        * added to the list. While walking backrefs (btrfs_find_all_roots,
+        * qgroups), which might take some time, no newer ref must be processed,
+        * as it might influence the outcome of the walk.
+        */
+       struct list_head seq_head;
+
+       /*
+        * when the only refs we have in the list must not be processed, we want
+        * to wait for more refs to show up or for the end of backref walking.
+        */
+       wait_queue_head_t seq_wait;
 };
 
 static inline void btrfs_put_delayed_ref(struct btrfs_delayed_ref_node *ref)
@@ -151,16 +170,21 @@ static inline void btrfs_put_delayed_ref(struct btrfs_delayed_ref_node *ref)
        }
 }
 
-int btrfs_add_delayed_tree_ref(struct btrfs_trans_handle *trans,
+int btrfs_add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
+                              struct btrfs_trans_handle *trans,
                               u64 bytenr, u64 num_bytes, u64 parent,
                               u64 ref_root, int level, int action,
-                              struct btrfs_delayed_extent_op *extent_op);
-int btrfs_add_delayed_data_ref(struct btrfs_trans_handle *trans,
+                              struct btrfs_delayed_extent_op *extent_op,
+                              int for_cow);
+int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info,
+                              struct btrfs_trans_handle *trans,
                               u64 bytenr, u64 num_bytes,
                               u64 parent, u64 ref_root,
                               u64 owner, u64 offset, int action,
-                              struct btrfs_delayed_extent_op *extent_op);
-int btrfs_add_delayed_extent_op(struct btrfs_trans_handle *trans,
+                              struct btrfs_delayed_extent_op *extent_op,
+                              int for_cow);
+int btrfs_add_delayed_extent_op(struct btrfs_fs_info *fs_info,
+                               struct btrfs_trans_handle *trans,
                                u64 bytenr, u64 num_bytes,
                                struct btrfs_delayed_extent_op *extent_op);
 
@@ -170,6 +194,60 @@ int btrfs_delayed_ref_lock(struct btrfs_trans_handle *trans,
                           struct btrfs_delayed_ref_head *head);
 int btrfs_find_ref_cluster(struct btrfs_trans_handle *trans,
                           struct list_head *cluster, u64 search_start);
+
+struct seq_list {
+       struct list_head list;
+       u64 seq;
+};
+
+static inline u64 inc_delayed_seq(struct btrfs_delayed_ref_root *delayed_refs)
+{
+       assert_spin_locked(&delayed_refs->lock);
+       ++delayed_refs->seq;
+       return delayed_refs->seq;
+}
+
+static inline void
+btrfs_get_delayed_seq(struct btrfs_delayed_ref_root *delayed_refs,
+                     struct seq_list *elem)
+{
+       assert_spin_locked(&delayed_refs->lock);
+       elem->seq = delayed_refs->seq;
+       list_add_tail(&elem->list, &delayed_refs->seq_head);
+}
+
+static inline void
+btrfs_put_delayed_seq(struct btrfs_delayed_ref_root *delayed_refs,
+                     struct seq_list *elem)
+{
+       spin_lock(&delayed_refs->lock);
+       list_del(&elem->list);
+       wake_up(&delayed_refs->seq_wait);
+       spin_unlock(&delayed_refs->lock);
+}
+
+int btrfs_check_delayed_seq(struct btrfs_delayed_ref_root *delayed_refs,
+                           u64 seq);
+
+/*
+ * delayed refs with a ref_seq > 0 must be held back during backref walking.
+ * this only applies to items in one of the fs-trees. for_cow items never need
+ * to be held back, so they won't get a ref_seq number.
+ */
+static inline int need_ref_seq(int for_cow, u64 rootid)
+{
+       if (for_cow)
+               return 0;
+
+       if (rootid == BTRFS_FS_TREE_OBJECTID)
+               return 1;
+
+       if ((s64)rootid >= (s64)BTRFS_FIRST_FREE_OBJECTID)
+               return 1;
+
+       return 0;
+}
+
 /*
  * a node might live in a head or a regular ref, this lets you
  * test for the proper type to use.
index e5167219c2662ec7ab362cdb183b65676820148b..9be97716c5e068153b83a5e2d493c0d010c16710 100644 (file)
@@ -1243,7 +1243,8 @@ static struct btrfs_root *alloc_log_tree(struct btrfs_trans_handle *trans,
        root->ref_cows = 0;
 
        leaf = btrfs_alloc_free_block(trans, root, root->leafsize, 0,
-                                     BTRFS_TREE_LOG_OBJECTID, NULL, 0, 0, 0);
+                                     BTRFS_TREE_LOG_OBJECTID, NULL,
+                                     0, 0, 0, 0);
        if (IS_ERR(leaf)) {
                kfree(root);
                return ERR_CAST(leaf);
index 1c1cf216be8059e37a3c63b767dd40d036ce1f94..a44072a692ab684c59e229f266af5d9ae26953b4 100644 (file)
@@ -1871,20 +1871,24 @@ static int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr,
 int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
                         struct btrfs_root *root,
                         u64 bytenr, u64 num_bytes, u64 parent,
-                        u64 root_objectid, u64 owner, u64 offset)
+                        u64 root_objectid, u64 owner, u64 offset, int for_cow)
 {
        int ret;
+       struct btrfs_fs_info *fs_info = root->fs_info;
+
        BUG_ON(owner < BTRFS_FIRST_FREE_OBJECTID &&
               root_objectid == BTRFS_TREE_LOG_OBJECTID);
 
        if (owner < BTRFS_FIRST_FREE_OBJECTID) {
-               ret = btrfs_add_delayed_tree_ref(trans, bytenr, num_bytes,
+               ret = btrfs_add_delayed_tree_ref(fs_info, trans, bytenr,
+                                       num_bytes,
                                        parent, root_objectid, (int)owner,
-                                       BTRFS_ADD_DELAYED_REF, NULL);
+                                       BTRFS_ADD_DELAYED_REF, NULL, for_cow);
        } else {
-               ret = btrfs_add_delayed_data_ref(trans, bytenr, num_bytes,
+               ret = btrfs_add_delayed_data_ref(fs_info, trans, bytenr,
+                                       num_bytes,
                                        parent, root_objectid, owner, offset,
-                                       BTRFS_ADD_DELAYED_REF, NULL);
+                                       BTRFS_ADD_DELAYED_REF, NULL, for_cow);
        }
        return ret;
 }
@@ -2231,6 +2235,28 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans,
                        }
                }
 
+               /*
+                * locked_ref is the head node, so we have to go one
+                * node back for any delayed ref updates
+                */
+               ref = select_delayed_ref(locked_ref);
+
+               if (ref && ref->seq &&
+                   btrfs_check_delayed_seq(delayed_refs, ref->seq)) {
+                       /*
+                        * there are still refs with lower seq numbers in the
+                        * process of being added. Don't run this ref yet.
+                        */
+                       list_del_init(&locked_ref->cluster);
+                       mutex_unlock(&locked_ref->mutex);
+                       locked_ref = NULL;
+                       delayed_refs->num_heads_ready++;
+                       spin_unlock(&delayed_refs->lock);
+                       cond_resched();
+                       spin_lock(&delayed_refs->lock);
+                       continue;
+               }
+
                /*
                 * record the must insert reserved flag before we
                 * drop the spin lock.
@@ -2241,11 +2267,6 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans,
                extent_op = locked_ref->extent_op;
                locked_ref->extent_op = NULL;
 
-               /*
-                * locked_ref is the head node, so we have to go one
-                * node back for any delayed ref updates
-                */
-               ref = select_delayed_ref(locked_ref);
                if (!ref) {
                        /* All delayed refs have been processed, Go ahead
                         * and send the head node to run_one_delayed_ref,
@@ -2276,7 +2297,12 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans,
                ref->in_tree = 0;
                rb_erase(&ref->rb_node, &delayed_refs->root);
                delayed_refs->num_entries--;
-
+               /*
+                * we modified num_entries, but as we're currently running
+                * delayed refs, skip
+                *     wake_up(&delayed_refs->seq_wait);
+                * here.
+                */
                spin_unlock(&delayed_refs->lock);
 
                ret = run_one_delayed_ref(trans, root, ref, extent_op,
@@ -2297,6 +2323,23 @@ next:
        return count;
 }
 
+
+static void wait_for_more_refs(struct btrfs_delayed_ref_root *delayed_refs,
+                       unsigned long num_refs)
+{
+       struct list_head *first_seq = delayed_refs->seq_head.next;
+
+       spin_unlock(&delayed_refs->lock);
+       pr_debug("waiting for more refs (num %ld, first %p)\n",
+                num_refs, first_seq);
+       wait_event(delayed_refs->seq_wait,
+                  num_refs != delayed_refs->num_entries ||
+                  delayed_refs->seq_head.next != first_seq);
+       pr_debug("done waiting for more refs (num %ld, first %p)\n",
+                delayed_refs->num_entries, delayed_refs->seq_head.next);
+       spin_lock(&delayed_refs->lock);
+}
+
 /*
  * this starts processing the delayed reference count updates and
  * extent insertions we have queued up so far.  count can be
@@ -2312,8 +2355,11 @@ int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
        struct btrfs_delayed_ref_node *ref;
        struct list_head cluster;
        int ret;
+       u64 delayed_start;
        int run_all = count == (unsigned long)-1;
        int run_most = 0;
+       unsigned long num_refs = 0;
+       int consider_waiting;
 
        if (root == root->fs_info->extent_root)
                root = root->fs_info->tree_root;
@@ -2325,6 +2371,7 @@ int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
        delayed_refs = &trans->transaction->delayed_refs;
        INIT_LIST_HEAD(&cluster);
 again:
+       consider_waiting = 0;
        spin_lock(&delayed_refs->lock);
        if (count == 0) {
                count = delayed_refs->num_entries * 2;
@@ -2341,11 +2388,35 @@ again:
                 * of refs to process starting at the first one we are able to
                 * lock
                 */
+               delayed_start = delayed_refs->run_delayed_start;
                ret = btrfs_find_ref_cluster(trans, &cluster,
                                             delayed_refs->run_delayed_start);
                if (ret)
                        break;
 
+               if (delayed_start >= delayed_refs->run_delayed_start) {
+                       if (consider_waiting == 0) {
+                               /*
+                                * btrfs_find_ref_cluster looped. let's do one
+                                * more cycle. if we don't run any delayed ref
+                                * during that cycle (because we can't because
+                                * all of them are blocked) and if the number of
+                                * refs doesn't change, we avoid busy waiting.
+                                */
+                               consider_waiting = 1;
+                               num_refs = delayed_refs->num_entries;
+                       } else {
+                               wait_for_more_refs(delayed_refs, num_refs);
+                               /*
+                                * after waiting, things have changed. we
+                                * dropped the lock and someone else might have
+                                * run some refs, built new clusters and so on.
+                                * therefore, we restart staleness detection.
+                                */
+                               consider_waiting = 0;
+                       }
+               }
+
                ret = run_clustered_refs(trans, root, &cluster);
                BUG_ON(ret < 0);
 
@@ -2353,6 +2424,11 @@ again:
 
                if (count == 0)
                        break;
+
+               if (ret || delayed_refs->run_delayed_start == 0) {
+                       /* refs were run, let's reset staleness detection */
+                       consider_waiting = 0;
+               }
        }
 
        if (run_all) {
@@ -2410,7 +2486,8 @@ int btrfs_set_disk_extent_flags(struct btrfs_trans_handle *trans,
        extent_op->update_key = 0;
        extent_op->is_data = is_data ? 1 : 0;
 
-       ret = btrfs_add_delayed_extent_op(trans, bytenr, num_bytes, extent_op);
+       ret = btrfs_add_delayed_extent_op(root->fs_info, trans, bytenr,
+                                         num_bytes, extent_op);
        if (ret)
                kfree(extent_op);
        return ret;
@@ -2595,7 +2672,7 @@ out:
 static int __btrfs_mod_ref(struct btrfs_trans_handle *trans,
                           struct btrfs_root *root,
                           struct extent_buffer *buf,
-                          int full_backref, int inc)
+                          int full_backref, int inc, int for_cow)
 {
        u64 bytenr;
        u64 num_bytes;
@@ -2608,7 +2685,7 @@ static int __btrfs_mod_ref(struct btrfs_trans_handle *trans,
        int level;
        int ret = 0;
        int (*process_func)(struct btrfs_trans_handle *, struct btrfs_root *,
-                           u64, u64, u64, u64, u64, u64);
+                           u64, u64, u64, u64, u64, u64, int);
 
        ref_root = btrfs_header_owner(buf);
        nritems = btrfs_header_nritems(buf);
@@ -2645,14 +2722,15 @@ static int __btrfs_mod_ref(struct btrfs_trans_handle *trans,
                        key.offset -= btrfs_file_extent_offset(buf, fi);
                        ret = process_func(trans, root, bytenr, num_bytes,
                                           parent, ref_root, key.objectid,
-                                          key.offset);
+                                          key.offset, for_cow);
                        if (ret)
                                goto fail;
                } else {
                        bytenr = btrfs_node_blockptr(buf, i);
                        num_bytes = btrfs_level_size(root, level - 1);
                        ret = process_func(trans, root, bytenr, num_bytes,
-                                          parent, ref_root, level - 1, 0);
+                                          parent, ref_root, level - 1, 0,
+                                          for_cow);
                        if (ret)
                                goto fail;
                }
@@ -2664,15 +2742,15 @@ fail:
 }
 
 int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
-                 struct extent_buffer *buf, int full_backref)
+                 struct extent_buffer *buf, int full_backref, int for_cow)
 {
-       return __btrfs_mod_ref(trans, root, buf, full_backref, 1);
+       return __btrfs_mod_ref(trans, root, buf, full_backref, 1, for_cow);
 }
 
 int btrfs_dec_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
-                 struct extent_buffer *buf, int full_backref)
+                 struct extent_buffer *buf, int full_backref, int for_cow)
 {
-       return __btrfs_mod_ref(trans, root, buf, full_backref, 0);
+       return __btrfs_mod_ref(trans, root, buf, full_backref, 0, for_cow);
 }
 
 static int write_one_cache_group(struct btrfs_trans_handle *trans,
@@ -4954,6 +5032,8 @@ static noinline int check_ref_cleanup(struct btrfs_trans_handle *trans,
        rb_erase(&head->node.rb_node, &delayed_refs->root);
 
        delayed_refs->num_entries--;
+       if (waitqueue_active(&delayed_refs->seq_wait))
+               wake_up(&delayed_refs->seq_wait);
 
        /*
         * we don't take a ref on the node because we're removing it from the
@@ -4981,16 +5061,17 @@ out:
 void btrfs_free_tree_block(struct btrfs_trans_handle *trans,
                           struct btrfs_root *root,
                           struct extent_buffer *buf,
-                          u64 parent, int last_ref)
+                          u64 parent, int last_ref, int for_cow)
 {
        struct btrfs_block_group_cache *cache = NULL;
        int ret;
 
        if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) {
-               ret = btrfs_add_delayed_tree_ref(trans, buf->start, buf->len,
-                                               parent, root->root_key.objectid,
-                                               btrfs_header_level(buf),
-                                               BTRFS_DROP_DELAYED_REF, NULL);
+               ret = btrfs_add_delayed_tree_ref(root->fs_info, trans,
+                                       buf->start, buf->len,
+                                       parent, root->root_key.objectid,
+                                       btrfs_header_level(buf),
+                                       BTRFS_DROP_DELAYED_REF, NULL, for_cow);
                BUG_ON(ret);
        }
 
@@ -5025,12 +5106,12 @@ out:
        btrfs_put_block_group(cache);
 }
 
-int btrfs_free_extent(struct btrfs_trans_handle *trans,
-                     struct btrfs_root *root,
-                     u64 bytenr, u64 num_bytes, u64 parent,
-                     u64 root_objectid, u64 owner, u64 offset)
+int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root,
+                     u64 bytenr, u64 num_bytes, u64 parent, u64 root_objectid,
+                     u64 owner, u64 offset, int for_cow)
 {
        int ret;
+       struct btrfs_fs_info *fs_info = root->fs_info;
 
        /*
         * tree log blocks never actually go into the extent allocation
@@ -5042,14 +5123,17 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans,
                btrfs_pin_extent(root, bytenr, num_bytes, 1);
                ret = 0;
        } else if (owner < BTRFS_FIRST_FREE_OBJECTID) {
-               ret = btrfs_add_delayed_tree_ref(trans, bytenr, num_bytes,
+               ret = btrfs_add_delayed_tree_ref(fs_info, trans, bytenr,
+                                       num_bytes,
                                        parent, root_objectid, (int)owner,
-                                       BTRFS_DROP_DELAYED_REF, NULL);
+                                       BTRFS_DROP_DELAYED_REF, NULL, for_cow);
                BUG_ON(ret);
        } else {
-               ret = btrfs_add_delayed_data_ref(trans, bytenr, num_bytes,
-                                       parent, root_objectid, owner,
-                                       offset, BTRFS_DROP_DELAYED_REF, NULL);
+               ret = btrfs_add_delayed_data_ref(fs_info, trans, bytenr,
+                                               num_bytes,
+                                               parent, root_objectid, owner,
+                                               offset, BTRFS_DROP_DELAYED_REF,
+                                               NULL, for_cow);
                BUG_ON(ret);
        }
        return ret;
@@ -5877,9 +5961,10 @@ int btrfs_alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
 
        BUG_ON(root_objectid == BTRFS_TREE_LOG_OBJECTID);
 
-       ret = btrfs_add_delayed_data_ref(trans, ins->objectid, ins->offset,
-                                        0, root_objectid, owner, offset,
-                                        BTRFS_ADD_DELAYED_EXTENT, NULL);
+       ret = btrfs_add_delayed_data_ref(root->fs_info, trans, ins->objectid,
+                                        ins->offset, 0,
+                                        root_objectid, owner, offset,
+                                        BTRFS_ADD_DELAYED_EXTENT, NULL, 0);
        return ret;
 }
 
@@ -6049,7 +6134,7 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
                                        struct btrfs_root *root, u32 blocksize,
                                        u64 parent, u64 root_objectid,
                                        struct btrfs_disk_key *key, int level,
-                                       u64 hint, u64 empty_size)
+                                       u64 hint, u64 empty_size, int for_cow)
 {
        struct btrfs_key ins;
        struct btrfs_block_rsv *block_rsv;
@@ -6093,10 +6178,11 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
                extent_op->update_flags = 1;
                extent_op->is_data = 0;
 
-               ret = btrfs_add_delayed_tree_ref(trans, ins.objectid,
+               ret = btrfs_add_delayed_tree_ref(root->fs_info, trans,
+                                       ins.objectid,
                                        ins.offset, parent, root_objectid,
                                        level, BTRFS_ADD_DELAYED_EXTENT,
-                                       extent_op);
+                                       extent_op, for_cow);
                BUG_ON(ret);
        }
        return buf;
@@ -6113,6 +6199,7 @@ struct walk_control {
        int keep_locks;
        int reada_slot;
        int reada_count;
+       int for_reloc;
 };
 
 #define DROP_REFERENCE 1
@@ -6251,9 +6338,9 @@ static noinline int walk_down_proc(struct btrfs_trans_handle *trans,
        /* wc->stage == UPDATE_BACKREF */
        if (!(wc->flags[level] & flag)) {
                BUG_ON(!path->locks[level]);
-               ret = btrfs_inc_ref(trans, root, eb, 1);
+               ret = btrfs_inc_ref(trans, root, eb, 1, wc->for_reloc);
                BUG_ON(ret);
-               ret = btrfs_dec_ref(trans, root, eb, 0);
+               ret = btrfs_dec_ref(trans, root, eb, 0, wc->for_reloc);
                BUG_ON(ret);
                ret = btrfs_set_disk_extent_flags(trans, root, eb->start,
                                                  eb->len, flag, 0);
@@ -6397,7 +6484,7 @@ skip:
                }
 
                ret = btrfs_free_extent(trans, root, bytenr, blocksize, parent,
-                                       root->root_key.objectid, level - 1, 0);
+                               root->root_key.objectid, level - 1, 0, 0);
                BUG_ON(ret);
        }
        btrfs_tree_unlock(next);
@@ -6471,9 +6558,11 @@ static noinline int walk_up_proc(struct btrfs_trans_handle *trans,
        if (wc->refs[level] == 1) {
                if (level == 0) {
                        if (wc->flags[level] & BTRFS_BLOCK_FLAG_FULL_BACKREF)
-                               ret = btrfs_dec_ref(trans, root, eb, 1);
+                               ret = btrfs_dec_ref(trans, root, eb, 1,
+                                                   wc->for_reloc);
                        else
-                               ret = btrfs_dec_ref(trans, root, eb, 0);
+                               ret = btrfs_dec_ref(trans, root, eb, 0,
+                                                   wc->for_reloc);
                        BUG_ON(ret);
                }
                /* make block locked assertion in clean_tree_block happy */
@@ -6500,7 +6589,7 @@ static noinline int walk_up_proc(struct btrfs_trans_handle *trans,
                               btrfs_header_owner(path->nodes[level + 1]));
        }
 
-       btrfs_free_tree_block(trans, root, eb, parent, wc->refs[level] == 1);
+       btrfs_free_tree_block(trans, root, eb, parent, wc->refs[level] == 1, 0);
 out:
        wc->refs[level] = 0;
        wc->flags[level] = 0;
@@ -6584,7 +6673,8 @@ static noinline int walk_up_tree(struct btrfs_trans_handle *trans,
  * blocks are properly updated.
  */
 void btrfs_drop_snapshot(struct btrfs_root *root,
-                        struct btrfs_block_rsv *block_rsv, int update_ref)
+                        struct btrfs_block_rsv *block_rsv, int update_ref,
+                        int for_reloc)
 {
        struct btrfs_path *path;
        struct btrfs_trans_handle *trans;
@@ -6672,6 +6762,7 @@ void btrfs_drop_snapshot(struct btrfs_root *root,
        wc->stage = DROP_REFERENCE;
        wc->update_ref = update_ref;
        wc->keep_locks = 0;
+       wc->for_reloc = for_reloc;
        wc->reada_count = BTRFS_NODEPTRS_PER_BLOCK(root);
 
        while (1) {
@@ -6756,6 +6847,7 @@ out:
  * drop subtree rooted at tree block 'node'.
  *
  * NOTE: this function will unlock and release tree block 'node'
+ * only used by relocation code
  */
 int btrfs_drop_subtree(struct btrfs_trans_handle *trans,
                        struct btrfs_root *root,
@@ -6800,6 +6892,7 @@ int btrfs_drop_subtree(struct btrfs_trans_handle *trans,
        wc->stage = DROP_REFERENCE;
        wc->update_ref = 0;
        wc->keep_locks = 1;
+       wc->for_reloc = 1;
        wc->reada_count = BTRFS_NODEPTRS_PER_BLOCK(root);
 
        while (1) {
index 49f3c9dc09f4c81902299fd81c62da1ed8423250..3622cc22ff919d4477f6b3f930ac2b86443f6ba7 100644 (file)
@@ -3579,6 +3579,7 @@ static struct extent_buffer *__alloc_extent_buffer(struct extent_io_tree *tree,
        atomic_set(&eb->blocking_writers, 0);
        atomic_set(&eb->spinning_readers, 0);
        atomic_set(&eb->spinning_writers, 0);
+       eb->lock_nested = 0;
        init_waitqueue_head(&eb->write_lock_wq);
        init_waitqueue_head(&eb->read_lock_wq);
 
index 7604c30013227fd823b1523503f8faaeccf283c4..bc6a042cb6fc496e6910d21fb3cdaec000f43b8e 100644 (file)
@@ -129,6 +129,7 @@ struct extent_buffer {
        struct list_head leak_list;
        struct rcu_head rcu_head;
        atomic_t refs;
+       pid_t lock_owner;
 
        /* count of read lock holders on the extent buffer */
        atomic_t write_locks;
@@ -137,6 +138,7 @@ struct extent_buffer {
        atomic_t blocking_readers;
        atomic_t spinning_readers;
        atomic_t spinning_writers;
+       int lock_nested;
 
        /* protects write locks */
        rwlock_t lock;
index 97fbe939c050dc7d523baeaed960741c053cbf0d..fc97b00bd8714135ba43140ee590ede878e09753 100644 (file)
@@ -678,7 +678,7 @@ next_slot:
                                                disk_bytenr, num_bytes, 0,
                                                root->root_key.objectid,
                                                new_key.objectid,
-                                               start - extent_offset);
+                                               start - extent_offset, 0);
                                BUG_ON(ret);
                                *hint_byte = disk_bytenr;
                        }
@@ -753,7 +753,7 @@ next_slot:
                                                disk_bytenr, num_bytes, 0,
                                                root->root_key.objectid,
                                                key.objectid, key.offset -
-                                               extent_offset);
+                                               extent_offset, 0);
                                BUG_ON(ret);
                                inode_sub_bytes(inode,
                                                extent_end - key.offset);
@@ -962,7 +962,7 @@ again:
 
                ret = btrfs_inc_extent_ref(trans, root, bytenr, num_bytes, 0,
                                           root->root_key.objectid,
-                                          ino, orig_offset);
+                                          ino, orig_offset, 0);
                BUG_ON(ret);
 
                if (split == start) {
@@ -989,7 +989,7 @@ again:
                del_nr++;
                ret = btrfs_free_extent(trans, root, bytenr, num_bytes,
                                        0, root->root_key.objectid,
-                                       ino, orig_offset);
+                                       ino, orig_offset, 0);
                BUG_ON(ret);
        }
        other_start = 0;
@@ -1006,7 +1006,7 @@ again:
                del_nr++;
                ret = btrfs_free_extent(trans, root, bytenr, num_bytes,
                                        0, root->root_key.objectid,
-                                       ino, orig_offset);
+                                       ino, orig_offset, 0);
                BUG_ON(ret);
        }
        if (del_nr == 0) {
index fd1a06df5bc637c5dad0b0b7e5ce1ce88d5c1616..acc4ff39ca4e5047a2b827bc77e7aa0096400f76 100644 (file)
@@ -3179,7 +3179,7 @@ delete:
                        ret = btrfs_free_extent(trans, root, extent_start,
                                                extent_num_bytes, 0,
                                                btrfs_header_owner(leaf),
-                                               ino, extent_offset);
+                                               ino, extent_offset, 0);
                        BUG_ON(ret);
                }
 
@@ -5121,7 +5121,7 @@ again:
                        }
                        flush_dcache_page(page);
                } else if (create && PageUptodate(page)) {
-                       WARN_ON(1);
+                       BUG();
                        if (!trans) {
                                kunmap(page);
                                free_extent_map(em);
index ef909b5d3d2ee427bdda2cfb54dcdf6b0dede8a1..7fdf22c2dc0da099ab4c5c020d0b74bcdb4f9713 100644 (file)
@@ -368,7 +368,7 @@ static noinline int create_subvol(struct btrfs_root *root,
                return PTR_ERR(trans);
 
        leaf = btrfs_alloc_free_block(trans, root, root->leafsize,
-                                     0, objectid, NULL, 0, 0, 0);
+                                     0, objectid, NULL, 0, 0, 0, 0);
        if (IS_ERR(leaf)) {
                ret = PTR_ERR(leaf);
                goto fail;
@@ -2468,7 +2468,8 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
                                                        disko, diskl, 0,
                                                        root->root_key.objectid,
                                                        btrfs_ino(inode),
-                                                       new_key.offset - datao);
+                                                       new_key.offset - datao,
+                                                       0);
                                        BUG_ON(ret);
                                }
                        } else if (type == BTRFS_FILE_EXTENT_INLINE) {
@@ -3018,7 +3019,7 @@ static long btrfs_ioctl_logical_to_ino(struct btrfs_root *root,
 {
        int ret = 0;
        int size;
-       u64 extent_offset;
+       u64 extent_item_pos;
        struct btrfs_ioctl_logical_ino_args *loi;
        struct btrfs_data_container *inodes = NULL;
        struct btrfs_path *path = NULL;
@@ -3049,15 +3050,17 @@ static long btrfs_ioctl_logical_to_ino(struct btrfs_root *root,
        }
 
        ret = extent_from_logical(root->fs_info, loi->logical, path, &key);
+       btrfs_release_path(path);
 
        if (ret & BTRFS_EXTENT_FLAG_TREE_BLOCK)
                ret = -ENOENT;
        if (ret < 0)
                goto out;
 
-       extent_offset = loi->logical - key.objectid;
+       extent_item_pos = loi->logical - key.objectid;
        ret = iterate_extent_inodes(root->fs_info, path, key.objectid,
-                                       extent_offset, build_ino_list, inodes);
+                                       extent_item_pos, build_ino_list,
+                                       inodes);
 
        if (ret < 0)
                goto out;
index d77b67c4b275731417c11e04ad38b3dc9c4d456b..5e178d8f7167f496e928613b6c1f0000c2ea242e 100644 (file)
@@ -33,6 +33,14 @@ void btrfs_assert_tree_read_locked(struct extent_buffer *eb);
  */
 void btrfs_set_lock_blocking_rw(struct extent_buffer *eb, int rw)
 {
+       if (eb->lock_nested) {
+               read_lock(&eb->lock);
+               if (eb->lock_nested && current->pid == eb->lock_owner) {
+                       read_unlock(&eb->lock);
+                       return;
+               }
+               read_unlock(&eb->lock);
+       }
        if (rw == BTRFS_WRITE_LOCK) {
                if (atomic_read(&eb->blocking_writers) == 0) {
                        WARN_ON(atomic_read(&eb->spinning_writers) != 1);
@@ -57,6 +65,14 @@ void btrfs_set_lock_blocking_rw(struct extent_buffer *eb, int rw)
  */
 void btrfs_clear_lock_blocking_rw(struct extent_buffer *eb, int rw)
 {
+       if (eb->lock_nested) {
+               read_lock(&eb->lock);
+               if (&eb->lock_nested && current->pid == eb->lock_owner) {
+                       read_unlock(&eb->lock);
+                       return;
+               }
+               read_unlock(&eb->lock);
+       }
        if (rw == BTRFS_WRITE_LOCK_BLOCKING) {
                BUG_ON(atomic_read(&eb->blocking_writers) != 1);
                write_lock(&eb->lock);
@@ -81,12 +97,25 @@ void btrfs_clear_lock_blocking_rw(struct extent_buffer *eb, int rw)
 void btrfs_tree_read_lock(struct extent_buffer *eb)
 {
 again:
+       read_lock(&eb->lock);
+       if (atomic_read(&eb->blocking_writers) &&
+           current->pid == eb->lock_owner) {
+               /*
+                * This extent is already write-locked by our thread. We allow
+                * an additional read lock to be added because it's for the same
+                * thread. btrfs_find_all_roots() depends on this as it may be
+                * called on a partly (write-)locked tree.
+                */
+               BUG_ON(eb->lock_nested);
+               eb->lock_nested = 1;
+               read_unlock(&eb->lock);
+               return;
+       }
+       read_unlock(&eb->lock);
        wait_event(eb->write_lock_wq, atomic_read(&eb->blocking_writers) == 0);
        read_lock(&eb->lock);
        if (atomic_read(&eb->blocking_writers)) {
                read_unlock(&eb->lock);
-               wait_event(eb->write_lock_wq,
-                          atomic_read(&eb->blocking_writers) == 0);
                goto again;
        }
        atomic_inc(&eb->read_locks);
@@ -129,6 +158,7 @@ int btrfs_try_tree_write_lock(struct extent_buffer *eb)
        }
        atomic_inc(&eb->write_locks);
        atomic_inc(&eb->spinning_writers);
+       eb->lock_owner = current->pid;
        return 1;
 }
 
@@ -137,6 +167,15 @@ int btrfs_try_tree_write_lock(struct extent_buffer *eb)
  */
 void btrfs_tree_read_unlock(struct extent_buffer *eb)
 {
+       if (eb->lock_nested) {
+               read_lock(&eb->lock);
+               if (eb->lock_nested && current->pid == eb->lock_owner) {
+                       eb->lock_nested = 0;
+                       read_unlock(&eb->lock);
+                       return;
+               }
+               read_unlock(&eb->lock);
+       }
        btrfs_assert_tree_read_locked(eb);
        WARN_ON(atomic_read(&eb->spinning_readers) == 0);
        atomic_dec(&eb->spinning_readers);
@@ -149,6 +188,15 @@ void btrfs_tree_read_unlock(struct extent_buffer *eb)
  */
 void btrfs_tree_read_unlock_blocking(struct extent_buffer *eb)
 {
+       if (eb->lock_nested) {
+               read_lock(&eb->lock);
+               if (eb->lock_nested && current->pid == eb->lock_owner) {
+                       eb->lock_nested = 0;
+                       read_unlock(&eb->lock);
+                       return;
+               }
+               read_unlock(&eb->lock);
+       }
        btrfs_assert_tree_read_locked(eb);
        WARN_ON(atomic_read(&eb->blocking_readers) == 0);
        if (atomic_dec_and_test(&eb->blocking_readers))
@@ -181,6 +229,7 @@ again:
        WARN_ON(atomic_read(&eb->spinning_writers));
        atomic_inc(&eb->spinning_writers);
        atomic_inc(&eb->write_locks);
+       eb->lock_owner = current->pid;
        return 0;
 }
 
index cfb55434a46981fa64416e68fa3fd29cf58238f5..efe9f792544de8b9d7c25eed3275322084140059 100644 (file)
@@ -1604,12 +1604,12 @@ int replace_file_extents(struct btrfs_trans_handle *trans,
                ret = btrfs_inc_extent_ref(trans, root, new_bytenr,
                                           num_bytes, parent,
                                           btrfs_header_owner(leaf),
-                                          key.objectid, key.offset);
+                                          key.objectid, key.offset, 1);
                BUG_ON(ret);
 
                ret = btrfs_free_extent(trans, root, bytenr, num_bytes,
                                        parent, btrfs_header_owner(leaf),
-                                       key.objectid, key.offset);
+                                       key.objectid, key.offset, 1);
                BUG_ON(ret);
        }
        if (dirty)
@@ -1778,21 +1778,23 @@ again:
 
                ret = btrfs_inc_extent_ref(trans, src, old_bytenr, blocksize,
                                        path->nodes[level]->start,
-                                       src->root_key.objectid, level - 1, 0);
+                                       src->root_key.objectid, level - 1, 0,
+                                       1);
                BUG_ON(ret);
                ret = btrfs_inc_extent_ref(trans, dest, new_bytenr, blocksize,
                                        0, dest->root_key.objectid, level - 1,
-                                       0);
+                                       0, 1);
                BUG_ON(ret);
 
                ret = btrfs_free_extent(trans, src, new_bytenr, blocksize,
                                        path->nodes[level]->start,
-                                       src->root_key.objectid, level - 1, 0);
+                                       src->root_key.objectid, level - 1, 0,
+                                       1);
                BUG_ON(ret);
 
                ret = btrfs_free_extent(trans, dest, old_bytenr, blocksize,
                                        0, dest->root_key.objectid, level - 1,
-                                       0);
+                                       0, 1);
                BUG_ON(ret);
 
                btrfs_unlock_up_safe(path, 0);
@@ -2244,7 +2246,7 @@ again:
                } else {
                        list_del_init(&reloc_root->root_list);
                }
-               btrfs_drop_snapshot(reloc_root, rc->block_rsv, 0);
+               btrfs_drop_snapshot(reloc_root, rc->block_rsv, 0, 1);
        }
 
        if (found) {
@@ -2558,7 +2560,7 @@ static int do_relocation(struct btrfs_trans_handle *trans,
                                                node->eb->start, blocksize,
                                                upper->eb->start,
                                                btrfs_header_owner(upper->eb),
-                                               node->level, 0);
+                                               node->level, 0, 1);
                        BUG_ON(ret);
 
                        ret = btrfs_drop_subtree(trans, root, eb, upper->eb);
index ddf2c90d3fc0c475cbfabf6397c84f734abcc5e8..6a6a51a809ba1c56b83471ac38a825e9af9e3689 100644 (file)
@@ -309,7 +309,7 @@ static void scrub_print_warning(const char *errstr, struct scrub_bio *sbio,
        u8 ref_level;
        unsigned long ptr = 0;
        const int bufsize = 4096;
-       u64 extent_offset;
+       u64 extent_item_pos;
 
        path = btrfs_alloc_path();
 
@@ -329,12 +329,13 @@ static void scrub_print_warning(const char *errstr, struct scrub_bio *sbio,
        if (ret < 0)
                goto out;
 
-       extent_offset = swarn.logical - found_key.objectid;
+       extent_item_pos = swarn.logical - found_key.objectid;
        swarn.extent_item_size = found_key.offset;
 
        eb = path->nodes[0];
        ei = btrfs_item_ptr(eb, path->slots[0], struct btrfs_extent_item);
        item_size = btrfs_item_size_nr(eb, path->slots[0]);
+       btrfs_release_path(path);
 
        if (ret & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
                do {
@@ -351,7 +352,7 @@ static void scrub_print_warning(const char *errstr, struct scrub_bio *sbio,
        } else {
                swarn.path = path;
                iterate_extent_inodes(fs_info, path, found_key.objectid,
-                                       extent_offset,
+                                       extent_item_pos,
                                        scrub_print_warning_inode, &swarn);
        }
 
index 360c2dfd1ee64bb8af741023ad7bbc45badf8fde..d5f987b49d70aa8583c598792a926613c59f912a 100644 (file)
@@ -36,6 +36,8 @@ static noinline void put_transaction(struct btrfs_transaction *transaction)
        WARN_ON(atomic_read(&transaction->use_count) == 0);
        if (atomic_dec_and_test(&transaction->use_count)) {
                BUG_ON(!list_empty(&transaction->list));
+               WARN_ON(transaction->delayed_refs.root.rb_node);
+               WARN_ON(!list_empty(&transaction->delayed_refs.seq_head));
                memset(transaction, 0, sizeof(*transaction));
                kmem_cache_free(btrfs_transaction_cachep, transaction);
        }
@@ -108,8 +110,11 @@ loop:
        cur_trans->delayed_refs.num_heads = 0;
        cur_trans->delayed_refs.flushing = 0;
        cur_trans->delayed_refs.run_delayed_start = 0;
+       cur_trans->delayed_refs.seq = 1;
+       init_waitqueue_head(&cur_trans->delayed_refs.seq_wait);
        spin_lock_init(&cur_trans->commit_lock);
        spin_lock_init(&cur_trans->delayed_refs.lock);
+       INIT_LIST_HEAD(&cur_trans->delayed_refs.seq_head);
 
        INIT_LIST_HEAD(&cur_trans->pending_snapshots);
        list_add_tail(&cur_trans->list, &root->fs_info->trans_list);
@@ -1386,9 +1391,9 @@ int btrfs_clean_old_snapshots(struct btrfs_root *root)
 
                if (btrfs_header_backref_rev(root->node) <
                    BTRFS_MIXED_BACKREF_REV)
-                       btrfs_drop_snapshot(root, NULL, 0);
+                       btrfs_drop_snapshot(root, NULL, 0, 0);
                else
-                       btrfs_drop_snapshot(root, NULL, 1);
+                       btrfs_drop_snapshot(root, NULL, 1, 0);
        }
        return 0;
 }
index 3568374d419da8ee50eb17f4af5319964750614d..cb877e0886a71b80e0f44f60e0d884f57203a597 100644 (file)
@@ -589,7 +589,7 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans,
                                ret = btrfs_inc_extent_ref(trans, root,
                                                ins.objectid, ins.offset,
                                                0, root->root_key.objectid,
-                                               key->objectid, offset);
+                                               key->objectid, offset, 0);
                                BUG_ON(ret);
                        } else {
                                /*
diff --git a/fs/btrfs/ulist.c b/fs/btrfs/ulist.c
new file mode 100644 (file)
index 0000000..12f5147
--- /dev/null
@@ -0,0 +1,220 @@
+/*
+ * Copyright (C) 2011 STRATO AG
+ * written by Arne Jansen <sensille@gmx.net>
+ * Distributed under the GNU GPL license version 2.
+ */
+
+#include <linux/slab.h>
+#include <linux/module.h>
+#include "ulist.h"
+
+/*
+ * ulist is a generic data structure to hold a collection of unique u64
+ * values. The only operations it supports is adding to the list and
+ * enumerating it.
+ * It is possible to store an auxiliary value along with the key.
+ *
+ * The implementation is preliminary and can probably be sped up
+ * significantly. A first step would be to store the values in an rbtree
+ * as soon as ULIST_SIZE is exceeded.
+ *
+ * A sample usage for ulists is the enumeration of directed graphs without
+ * visiting a node twice. The pseudo-code could look like this:
+ *
+ * ulist = ulist_alloc();
+ * ulist_add(ulist, root);
+ * elem = NULL;
+ *
+ * while ((elem = ulist_next(ulist, elem)) {
+ *     for (all child nodes n in elem)
+ *             ulist_add(ulist, n);
+ *     do something useful with the node;
+ * }
+ * ulist_free(ulist);
+ *
+ * This assumes the graph nodes are adressable by u64. This stems from the
+ * usage for tree enumeration in btrfs, where the logical addresses are
+ * 64 bit.
+ *
+ * It is also useful for tree enumeration which could be done elegantly
+ * recursively, but is not possible due to kernel stack limitations. The
+ * loop would be similar to the above.
+ */
+
+/**
+ * ulist_init - freshly initialize a ulist
+ * @ulist:     the ulist to initialize
+ *
+ * Note: don't use this function to init an already used ulist, use
+ * ulist_reinit instead.
+ */
+void ulist_init(struct ulist *ulist)
+{
+       ulist->nnodes = 0;
+       ulist->nodes = ulist->int_nodes;
+       ulist->nodes_alloced = ULIST_SIZE;
+}
+EXPORT_SYMBOL(ulist_init);
+
+/**
+ * ulist_fini - free up additionally allocated memory for the ulist
+ * @ulist:     the ulist from which to free the additional memory
+ *
+ * This is useful in cases where the base 'struct ulist' has been statically
+ * allocated.
+ */
+void ulist_fini(struct ulist *ulist)
+{
+       /*
+        * The first ULIST_SIZE elements are stored inline in struct ulist.
+        * Only if more elements are alocated they need to be freed.
+        */
+       if (ulist->nodes_alloced > ULIST_SIZE)
+               kfree(ulist->nodes);
+       ulist->nodes_alloced = 0;       /* in case ulist_fini is called twice */
+}
+EXPORT_SYMBOL(ulist_fini);
+
+/**
+ * ulist_reinit - prepare a ulist for reuse
+ * @ulist:     ulist to be reused
+ *
+ * Free up all additional memory allocated for the list elements and reinit
+ * the ulist.
+ */
+void ulist_reinit(struct ulist *ulist)
+{
+       ulist_fini(ulist);
+       ulist_init(ulist);
+}
+EXPORT_SYMBOL(ulist_reinit);
+
+/**
+ * ulist_alloc - dynamically allocate a ulist
+ * @gfp_mask:  allocation flags to for base allocation
+ *
+ * The allocated ulist will be returned in an initialized state.
+ */
+struct ulist *ulist_alloc(unsigned long gfp_mask)
+{
+       struct ulist *ulist = kmalloc(sizeof(*ulist), gfp_mask);
+
+       if (!ulist)
+               return NULL;
+
+       ulist_init(ulist);
+
+       return ulist;
+}
+EXPORT_SYMBOL(ulist_alloc);
+
+/**
+ * ulist_free - free dynamically allocated ulist
+ * @ulist:     ulist to free
+ *
+ * It is not necessary to call ulist_fini before.
+ */
+void ulist_free(struct ulist *ulist)
+{
+       if (!ulist)
+               return;
+       ulist_fini(ulist);
+       kfree(ulist);
+}
+EXPORT_SYMBOL(ulist_free);
+
+/**
+ * ulist_add - add an element to the ulist
+ * @ulist:     ulist to add the element to
+ * @val:       value to add to ulist
+ * @aux:       auxiliary value to store along with val
+ * @gfp_mask:  flags to use for allocation
+ *
+ * Note: locking must be provided by the caller. In case of rwlocks write
+ *       locking is needed
+ *
+ * Add an element to a ulist. The @val will only be added if it doesn't
+ * already exist. If it is added, the auxiliary value @aux is stored along with
+ * it. In case @val already exists in the ulist, @aux is ignored, even if
+ * it differs from the already stored value.
+ *
+ * ulist_add returns 0 if @val already exists in ulist and 1 if @val has been
+ * inserted.
+ * In case of allocation failure -ENOMEM is returned and the ulist stays
+ * unaltered.
+ */
+int ulist_add(struct ulist *ulist, u64 val, unsigned long aux,
+             unsigned long gfp_mask)
+{
+       int i;
+
+       for (i = 0; i < ulist->nnodes; ++i) {
+               if (ulist->nodes[i].val == val)
+                       return 0;
+       }
+
+       if (ulist->nnodes >= ulist->nodes_alloced) {
+               u64 new_alloced = ulist->nodes_alloced + 128;
+               struct ulist_node *new_nodes;
+               void *old = NULL;
+
+               /*
+                * if nodes_alloced == ULIST_SIZE no memory has been allocated
+                * yet, so pass NULL to krealloc
+                */
+               if (ulist->nodes_alloced > ULIST_SIZE)
+                       old = ulist->nodes;
+
+               new_nodes = krealloc(old, sizeof(*new_nodes) * new_alloced,
+                                    gfp_mask);
+               if (!new_nodes)
+                       return -ENOMEM;
+
+               if (!old)
+                       memcpy(new_nodes, ulist->int_nodes,
+                              sizeof(ulist->int_nodes));
+
+               ulist->nodes = new_nodes;
+               ulist->nodes_alloced = new_alloced;
+       }
+       ulist->nodes[ulist->nnodes].val = val;
+       ulist->nodes[ulist->nnodes].aux = aux;
+       ++ulist->nnodes;
+
+       return 1;
+}
+EXPORT_SYMBOL(ulist_add);
+
+/**
+ * ulist_next - iterate ulist
+ * @ulist:     ulist to iterate
+ * @prev:      previously returned element or %NULL to start iteration
+ *
+ * Note: locking must be provided by the caller. In case of rwlocks only read
+ *       locking is needed
+ *
+ * This function is used to iterate an ulist. The iteration is started with
+ * @prev = %NULL. It returns the next element from the ulist or %NULL when the
+ * end is reached. No guarantee is made with respect to the order in which
+ * the elements are returned. They might neither be returned in order of
+ * addition nor in ascending order.
+ * It is allowed to call ulist_add during an enumeration. Newly added items
+ * are guaranteed to show up in the running enumeration.
+ */
+struct ulist_node *ulist_next(struct ulist *ulist, struct ulist_node *prev)
+{
+       int next;
+
+       if (ulist->nnodes == 0)
+               return NULL;
+
+       if (!prev)
+               return &ulist->nodes[0];
+
+       next = (prev - ulist->nodes) + 1;
+       if (next < 0 || next >= ulist->nnodes)
+               return NULL;
+
+       return &ulist->nodes[next];
+}
+EXPORT_SYMBOL(ulist_next);
diff --git a/fs/btrfs/ulist.h b/fs/btrfs/ulist.h
new file mode 100644 (file)
index 0000000..2e25dec
--- /dev/null
@@ -0,0 +1,68 @@
+/*
+ * Copyright (C) 2011 STRATO AG
+ * written by Arne Jansen <sensille@gmx.net>
+ * Distributed under the GNU GPL license version 2.
+ *
+ */
+
+#ifndef __ULIST__
+#define __ULIST__
+
+/*
+ * ulist is a generic data structure to hold a collection of unique u64
+ * values. The only operations it supports is adding to the list and
+ * enumerating it.
+ * It is possible to store an auxiliary value along with the key.
+ *
+ * The implementation is preliminary and can probably be sped up
+ * significantly. A first step would be to store the values in an rbtree
+ * as soon as ULIST_SIZE is exceeded.
+ */
+
+/*
+ * number of elements statically allocated inside struct ulist
+ */
+#define ULIST_SIZE 16
+
+/*
+ * element of the list
+ */
+struct ulist_node {
+       u64 val;                /* value to store */
+       unsigned long aux;      /* auxiliary value saved along with the val */
+};
+
+struct ulist {
+       /*
+        * number of elements stored in list
+        */
+       unsigned long nnodes;
+
+       /*
+        * number of nodes we already have room for
+        */
+       unsigned long nodes_alloced;
+
+       /*
+        * pointer to the array storing the elements. The first ULIST_SIZE
+        * elements are stored inline. In this case the it points to int_nodes.
+        * After exceeding ULIST_SIZE, dynamic memory is allocated.
+        */
+       struct ulist_node *nodes;
+
+       /*
+        * inline storage space for the first ULIST_SIZE entries
+        */
+       struct ulist_node int_nodes[ULIST_SIZE];
+};
+
+void ulist_init(struct ulist *ulist);
+void ulist_fini(struct ulist *ulist);
+void ulist_reinit(struct ulist *ulist);
+struct ulist *ulist_alloc(unsigned long gfp_mask);
+void ulist_free(struct ulist *ulist);
+int ulist_add(struct ulist *ulist, u64 val, unsigned long aux,
+             unsigned long gfp_mask);
+struct ulist_node *ulist_next(struct ulist *ulist, struct ulist_node *prev);
+
+#endif