]> Pileus Git - ~andy/linux/blobdiff - fs/btrfs/extent-tree.c
Merge branch 'for-chris' of git://git.jan-o-sch.net/btrfs-unstable into integration
[~andy/linux] / fs / btrfs / extent-tree.c
index 1c1cf216be8059e37a3c63b767dd40d036ce1f94..a44072a692ab684c59e229f266af5d9ae26953b4 100644 (file)
@@ -1871,20 +1871,24 @@ static int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr,
 int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
                         struct btrfs_root *root,
                         u64 bytenr, u64 num_bytes, u64 parent,
-                        u64 root_objectid, u64 owner, u64 offset)
+                        u64 root_objectid, u64 owner, u64 offset, int for_cow)
 {
        int ret;
+       struct btrfs_fs_info *fs_info = root->fs_info;
+
        BUG_ON(owner < BTRFS_FIRST_FREE_OBJECTID &&
               root_objectid == BTRFS_TREE_LOG_OBJECTID);
 
        if (owner < BTRFS_FIRST_FREE_OBJECTID) {
-               ret = btrfs_add_delayed_tree_ref(trans, bytenr, num_bytes,
+               ret = btrfs_add_delayed_tree_ref(fs_info, trans, bytenr,
+                                       num_bytes,
                                        parent, root_objectid, (int)owner,
-                                       BTRFS_ADD_DELAYED_REF, NULL);
+                                       BTRFS_ADD_DELAYED_REF, NULL, for_cow);
        } else {
-               ret = btrfs_add_delayed_data_ref(trans, bytenr, num_bytes,
+               ret = btrfs_add_delayed_data_ref(fs_info, trans, bytenr,
+                                       num_bytes,
                                        parent, root_objectid, owner, offset,
-                                       BTRFS_ADD_DELAYED_REF, NULL);
+                                       BTRFS_ADD_DELAYED_REF, NULL, for_cow);
        }
        return ret;
 }
@@ -2231,6 +2235,28 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans,
                        }
                }
 
+               /*
+                * locked_ref is the head node, so we have to go one
+                * node back for any delayed ref updates
+                */
+               ref = select_delayed_ref(locked_ref);
+
+               if (ref && ref->seq &&
+                   btrfs_check_delayed_seq(delayed_refs, ref->seq)) {
+                       /*
+                        * there are still refs with lower seq numbers in the
+                        * process of being added. Don't run this ref yet.
+                        */
+                       list_del_init(&locked_ref->cluster);
+                       mutex_unlock(&locked_ref->mutex);
+                       locked_ref = NULL;
+                       delayed_refs->num_heads_ready++;
+                       spin_unlock(&delayed_refs->lock);
+                       cond_resched();
+                       spin_lock(&delayed_refs->lock);
+                       continue;
+               }
+
                /*
                 * record the must insert reserved flag before we
                 * drop the spin lock.
@@ -2241,11 +2267,6 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans,
                extent_op = locked_ref->extent_op;
                locked_ref->extent_op = NULL;
 
-               /*
-                * locked_ref is the head node, so we have to go one
-                * node back for any delayed ref updates
-                */
-               ref = select_delayed_ref(locked_ref);
                if (!ref) {
                        /* All delayed refs have been processed, Go ahead
                         * and send the head node to run_one_delayed_ref,
@@ -2276,7 +2297,12 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans,
                ref->in_tree = 0;
                rb_erase(&ref->rb_node, &delayed_refs->root);
                delayed_refs->num_entries--;
-
+               /*
+                * we modified num_entries, but as we're currently running
+                * delayed refs, skip
+                *     wake_up(&delayed_refs->seq_wait);
+                * here.
+                */
                spin_unlock(&delayed_refs->lock);
 
                ret = run_one_delayed_ref(trans, root, ref, extent_op,
@@ -2297,6 +2323,23 @@ next:
        return count;
 }
 
+
+static void wait_for_more_refs(struct btrfs_delayed_ref_root *delayed_refs,
+                       unsigned long num_refs)
+{
+       struct list_head *first_seq = delayed_refs->seq_head.next;
+
+       spin_unlock(&delayed_refs->lock);
+       pr_debug("waiting for more refs (num %ld, first %p)\n",
+                num_refs, first_seq);
+       wait_event(delayed_refs->seq_wait,
+                  num_refs != delayed_refs->num_entries ||
+                  delayed_refs->seq_head.next != first_seq);
+       pr_debug("done waiting for more refs (num %ld, first %p)\n",
+                delayed_refs->num_entries, delayed_refs->seq_head.next);
+       spin_lock(&delayed_refs->lock);
+}
+
 /*
  * this starts processing the delayed reference count updates and
  * extent insertions we have queued up so far.  count can be
@@ -2312,8 +2355,11 @@ int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
        struct btrfs_delayed_ref_node *ref;
        struct list_head cluster;
        int ret;
+       u64 delayed_start;
        int run_all = count == (unsigned long)-1;
        int run_most = 0;
+       unsigned long num_refs = 0;
+       int consider_waiting;
 
        if (root == root->fs_info->extent_root)
                root = root->fs_info->tree_root;
@@ -2325,6 +2371,7 @@ int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
        delayed_refs = &trans->transaction->delayed_refs;
        INIT_LIST_HEAD(&cluster);
 again:
+       consider_waiting = 0;
        spin_lock(&delayed_refs->lock);
        if (count == 0) {
                count = delayed_refs->num_entries * 2;
@@ -2341,11 +2388,35 @@ again:
                 * of refs to process starting at the first one we are able to
                 * lock
                 */
+               delayed_start = delayed_refs->run_delayed_start;
                ret = btrfs_find_ref_cluster(trans, &cluster,
                                             delayed_refs->run_delayed_start);
                if (ret)
                        break;
 
+               if (delayed_start >= delayed_refs->run_delayed_start) {
+                       if (consider_waiting == 0) {
+                               /*
+                                * btrfs_find_ref_cluster looped. let's do one
+                                * more cycle. if we don't run any delayed ref
+                                * during that cycle (because we can't because
+                                * all of them are blocked) and if the number of
+                                * refs doesn't change, we avoid busy waiting.
+                                */
+                               consider_waiting = 1;
+                               num_refs = delayed_refs->num_entries;
+                       } else {
+                               wait_for_more_refs(delayed_refs, num_refs);
+                               /*
+                                * after waiting, things have changed. we
+                                * dropped the lock and someone else might have
+                                * run some refs, built new clusters and so on.
+                                * therefore, we restart staleness detection.
+                                */
+                               consider_waiting = 0;
+                       }
+               }
+
                ret = run_clustered_refs(trans, root, &cluster);
                BUG_ON(ret < 0);
 
@@ -2353,6 +2424,11 @@ again:
 
                if (count == 0)
                        break;
+
+               if (ret || delayed_refs->run_delayed_start == 0) {
+                       /* refs were run, let's reset staleness detection */
+                       consider_waiting = 0;
+               }
        }
 
        if (run_all) {
@@ -2410,7 +2486,8 @@ int btrfs_set_disk_extent_flags(struct btrfs_trans_handle *trans,
        extent_op->update_key = 0;
        extent_op->is_data = is_data ? 1 : 0;
 
-       ret = btrfs_add_delayed_extent_op(trans, bytenr, num_bytes, extent_op);
+       ret = btrfs_add_delayed_extent_op(root->fs_info, trans, bytenr,
+                                         num_bytes, extent_op);
        if (ret)
                kfree(extent_op);
        return ret;
@@ -2595,7 +2672,7 @@ out:
 static int __btrfs_mod_ref(struct btrfs_trans_handle *trans,
                           struct btrfs_root *root,
                           struct extent_buffer *buf,
-                          int full_backref, int inc)
+                          int full_backref, int inc, int for_cow)
 {
        u64 bytenr;
        u64 num_bytes;
@@ -2608,7 +2685,7 @@ static int __btrfs_mod_ref(struct btrfs_trans_handle *trans,
        int level;
        int ret = 0;
        int (*process_func)(struct btrfs_trans_handle *, struct btrfs_root *,
-                           u64, u64, u64, u64, u64, u64);
+                           u64, u64, u64, u64, u64, u64, int);
 
        ref_root = btrfs_header_owner(buf);
        nritems = btrfs_header_nritems(buf);
@@ -2645,14 +2722,15 @@ static int __btrfs_mod_ref(struct btrfs_trans_handle *trans,
                        key.offset -= btrfs_file_extent_offset(buf, fi);
                        ret = process_func(trans, root, bytenr, num_bytes,
                                           parent, ref_root, key.objectid,
-                                          key.offset);
+                                          key.offset, for_cow);
                        if (ret)
                                goto fail;
                } else {
                        bytenr = btrfs_node_blockptr(buf, i);
                        num_bytes = btrfs_level_size(root, level - 1);
                        ret = process_func(trans, root, bytenr, num_bytes,
-                                          parent, ref_root, level - 1, 0);
+                                          parent, ref_root, level - 1, 0,
+                                          for_cow);
                        if (ret)
                                goto fail;
                }
@@ -2664,15 +2742,15 @@ fail:
 }
 
 int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
-                 struct extent_buffer *buf, int full_backref)
+                 struct extent_buffer *buf, int full_backref, int for_cow)
 {
-       return __btrfs_mod_ref(trans, root, buf, full_backref, 1);
+       return __btrfs_mod_ref(trans, root, buf, full_backref, 1, for_cow);
 }
 
 int btrfs_dec_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
-                 struct extent_buffer *buf, int full_backref)
+                 struct extent_buffer *buf, int full_backref, int for_cow)
 {
-       return __btrfs_mod_ref(trans, root, buf, full_backref, 0);
+       return __btrfs_mod_ref(trans, root, buf, full_backref, 0, for_cow);
 }
 
 static int write_one_cache_group(struct btrfs_trans_handle *trans,
@@ -4954,6 +5032,8 @@ static noinline int check_ref_cleanup(struct btrfs_trans_handle *trans,
        rb_erase(&head->node.rb_node, &delayed_refs->root);
 
        delayed_refs->num_entries--;
+       if (waitqueue_active(&delayed_refs->seq_wait))
+               wake_up(&delayed_refs->seq_wait);
 
        /*
         * we don't take a ref on the node because we're removing it from the
@@ -4981,16 +5061,17 @@ out:
 void btrfs_free_tree_block(struct btrfs_trans_handle *trans,
                           struct btrfs_root *root,
                           struct extent_buffer *buf,
-                          u64 parent, int last_ref)
+                          u64 parent, int last_ref, int for_cow)
 {
        struct btrfs_block_group_cache *cache = NULL;
        int ret;
 
        if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) {
-               ret = btrfs_add_delayed_tree_ref(trans, buf->start, buf->len,
-                                               parent, root->root_key.objectid,
-                                               btrfs_header_level(buf),
-                                               BTRFS_DROP_DELAYED_REF, NULL);
+               ret = btrfs_add_delayed_tree_ref(root->fs_info, trans,
+                                       buf->start, buf->len,
+                                       parent, root->root_key.objectid,
+                                       btrfs_header_level(buf),
+                                       BTRFS_DROP_DELAYED_REF, NULL, for_cow);
                BUG_ON(ret);
        }
 
@@ -5025,12 +5106,12 @@ out:
        btrfs_put_block_group(cache);
 }
 
-int btrfs_free_extent(struct btrfs_trans_handle *trans,
-                     struct btrfs_root *root,
-                     u64 bytenr, u64 num_bytes, u64 parent,
-                     u64 root_objectid, u64 owner, u64 offset)
+int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root,
+                     u64 bytenr, u64 num_bytes, u64 parent, u64 root_objectid,
+                     u64 owner, u64 offset, int for_cow)
 {
        int ret;
+       struct btrfs_fs_info *fs_info = root->fs_info;
 
        /*
         * tree log blocks never actually go into the extent allocation
@@ -5042,14 +5123,17 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans,
                btrfs_pin_extent(root, bytenr, num_bytes, 1);
                ret = 0;
        } else if (owner < BTRFS_FIRST_FREE_OBJECTID) {
-               ret = btrfs_add_delayed_tree_ref(trans, bytenr, num_bytes,
+               ret = btrfs_add_delayed_tree_ref(fs_info, trans, bytenr,
+                                       num_bytes,
                                        parent, root_objectid, (int)owner,
-                                       BTRFS_DROP_DELAYED_REF, NULL);
+                                       BTRFS_DROP_DELAYED_REF, NULL, for_cow);
                BUG_ON(ret);
        } else {
-               ret = btrfs_add_delayed_data_ref(trans, bytenr, num_bytes,
-                                       parent, root_objectid, owner,
-                                       offset, BTRFS_DROP_DELAYED_REF, NULL);
+               ret = btrfs_add_delayed_data_ref(fs_info, trans, bytenr,
+                                               num_bytes,
+                                               parent, root_objectid, owner,
+                                               offset, BTRFS_DROP_DELAYED_REF,
+                                               NULL, for_cow);
                BUG_ON(ret);
        }
        return ret;
@@ -5877,9 +5961,10 @@ int btrfs_alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
 
        BUG_ON(root_objectid == BTRFS_TREE_LOG_OBJECTID);
 
-       ret = btrfs_add_delayed_data_ref(trans, ins->objectid, ins->offset,
-                                        0, root_objectid, owner, offset,
-                                        BTRFS_ADD_DELAYED_EXTENT, NULL);
+       ret = btrfs_add_delayed_data_ref(root->fs_info, trans, ins->objectid,
+                                        ins->offset, 0,
+                                        root_objectid, owner, offset,
+                                        BTRFS_ADD_DELAYED_EXTENT, NULL, 0);
        return ret;
 }
 
@@ -6049,7 +6134,7 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
                                        struct btrfs_root *root, u32 blocksize,
                                        u64 parent, u64 root_objectid,
                                        struct btrfs_disk_key *key, int level,
-                                       u64 hint, u64 empty_size)
+                                       u64 hint, u64 empty_size, int for_cow)
 {
        struct btrfs_key ins;
        struct btrfs_block_rsv *block_rsv;
@@ -6093,10 +6178,11 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
                extent_op->update_flags = 1;
                extent_op->is_data = 0;
 
-               ret = btrfs_add_delayed_tree_ref(trans, ins.objectid,
+               ret = btrfs_add_delayed_tree_ref(root->fs_info, trans,
+                                       ins.objectid,
                                        ins.offset, parent, root_objectid,
                                        level, BTRFS_ADD_DELAYED_EXTENT,
-                                       extent_op);
+                                       extent_op, for_cow);
                BUG_ON(ret);
        }
        return buf;
@@ -6113,6 +6199,7 @@ struct walk_control {
        int keep_locks;
        int reada_slot;
        int reada_count;
+       int for_reloc;
 };
 
 #define DROP_REFERENCE 1
@@ -6251,9 +6338,9 @@ static noinline int walk_down_proc(struct btrfs_trans_handle *trans,
        /* wc->stage == UPDATE_BACKREF */
        if (!(wc->flags[level] & flag)) {
                BUG_ON(!path->locks[level]);
-               ret = btrfs_inc_ref(trans, root, eb, 1);
+               ret = btrfs_inc_ref(trans, root, eb, 1, wc->for_reloc);
                BUG_ON(ret);
-               ret = btrfs_dec_ref(trans, root, eb, 0);
+               ret = btrfs_dec_ref(trans, root, eb, 0, wc->for_reloc);
                BUG_ON(ret);
                ret = btrfs_set_disk_extent_flags(trans, root, eb->start,
                                                  eb->len, flag, 0);
@@ -6397,7 +6484,7 @@ skip:
                }
 
                ret = btrfs_free_extent(trans, root, bytenr, blocksize, parent,
-                                       root->root_key.objectid, level - 1, 0);
+                               root->root_key.objectid, level - 1, 0, 0);
                BUG_ON(ret);
        }
        btrfs_tree_unlock(next);
@@ -6471,9 +6558,11 @@ static noinline int walk_up_proc(struct btrfs_trans_handle *trans,
        if (wc->refs[level] == 1) {
                if (level == 0) {
                        if (wc->flags[level] & BTRFS_BLOCK_FLAG_FULL_BACKREF)
-                               ret = btrfs_dec_ref(trans, root, eb, 1);
+                               ret = btrfs_dec_ref(trans, root, eb, 1,
+                                                   wc->for_reloc);
                        else
-                               ret = btrfs_dec_ref(trans, root, eb, 0);
+                               ret = btrfs_dec_ref(trans, root, eb, 0,
+                                                   wc->for_reloc);
                        BUG_ON(ret);
                }
                /* make block locked assertion in clean_tree_block happy */
@@ -6500,7 +6589,7 @@ static noinline int walk_up_proc(struct btrfs_trans_handle *trans,
                               btrfs_header_owner(path->nodes[level + 1]));
        }
 
-       btrfs_free_tree_block(trans, root, eb, parent, wc->refs[level] == 1);
+       btrfs_free_tree_block(trans, root, eb, parent, wc->refs[level] == 1, 0);
 out:
        wc->refs[level] = 0;
        wc->flags[level] = 0;
@@ -6584,7 +6673,8 @@ static noinline int walk_up_tree(struct btrfs_trans_handle *trans,
  * blocks are properly updated.
  */
 void btrfs_drop_snapshot(struct btrfs_root *root,
-                        struct btrfs_block_rsv *block_rsv, int update_ref)
+                        struct btrfs_block_rsv *block_rsv, int update_ref,
+                        int for_reloc)
 {
        struct btrfs_path *path;
        struct btrfs_trans_handle *trans;
@@ -6672,6 +6762,7 @@ void btrfs_drop_snapshot(struct btrfs_root *root,
        wc->stage = DROP_REFERENCE;
        wc->update_ref = update_ref;
        wc->keep_locks = 0;
+       wc->for_reloc = for_reloc;
        wc->reada_count = BTRFS_NODEPTRS_PER_BLOCK(root);
 
        while (1) {
@@ -6756,6 +6847,7 @@ out:
  * drop subtree rooted at tree block 'node'.
  *
  * NOTE: this function will unlock and release tree block 'node'
+ * only used by relocation code
  */
 int btrfs_drop_subtree(struct btrfs_trans_handle *trans,
                        struct btrfs_root *root,
@@ -6800,6 +6892,7 @@ int btrfs_drop_subtree(struct btrfs_trans_handle *trans,
        wc->stage = DROP_REFERENCE;
        wc->update_ref = 0;
        wc->keep_locks = 1;
+       wc->for_reloc = 1;
        wc->reada_count = BTRFS_NODEPTRS_PER_BLOCK(root);
 
        while (1) {