]> Pileus Git - ~andy/linux/commitdiff
Merge git://git.kernel.org/pub/scm/linux/kernel/git/mason/btrfs-unstable
authorLinus Torvalds <torvalds@linux-foundation.org>
Sun, 11 Oct 2009 18:23:13 +0000 (11:23 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Sun, 11 Oct 2009 18:23:13 +0000 (11:23 -0700)
* git://git.kernel.org/pub/scm/linux/kernel/git/mason/btrfs-unstable:
  Btrfs: fix file clone ioctl for bookend extents
  Btrfs: fix uninit compiler warning in cow_file_range_nocow
  Btrfs: constify dentry_operations
  Btrfs: optimize back reference update during btrfs_drop_snapshot
  Btrfs: remove negative dentry when deleting subvolumne
  Btrfs: optimize fsync for the single writer case
  Btrfs: async delalloc flushing under space pressure
  Btrfs: release delalloc reservations on extent item insertion
  Btrfs: delay clearing EXTENT_DELALLOC for compressed extents
  Btrfs: cleanup extent_clear_unlock_delalloc flags
  Btrfs: fix possible softlockup in the allocator
  Btrfs: fix deadlock on async thread startup

14 files changed:
fs/btrfs/async-thread.c
fs/btrfs/async-thread.h
fs/btrfs/btrfs_inode.h
fs/btrfs/ctree.h
fs/btrfs/disk-io.c
fs/btrfs/extent-tree.c
fs/btrfs/extent_io.c
fs/btrfs/extent_io.h
fs/btrfs/file.c
fs/btrfs/inode.c
fs/btrfs/ioctl.c
fs/btrfs/ordered-data.c
fs/btrfs/relocation.c
fs/btrfs/tree-log.c

index 282ca085c2fbff854bcf4a396d0773db129c5085..c0861e781cdbdfde4b7c42ec1b90032f2059e69a 100644 (file)
@@ -63,6 +63,51 @@ struct btrfs_worker_thread {
        int idle;
 };
 
+/*
+ * btrfs_start_workers uses kthread_run, which can block waiting for memory
+ * for a very long time.  It will actually throttle on page writeback,
+ * and so it may not make progress until after our btrfs worker threads
+ * process all of the pending work structs in their queue
+ *
+ * This means we can't use btrfs_start_workers from inside a btrfs worker
+ * thread that is used as part of cleaning dirty memory, which pretty much
+ * involves all of the worker threads.
+ *
+ * Instead we have a helper queue who never has more than one thread
+ * where we scheduler thread start operations.  This worker_start struct
+ * is used to contain the work and hold a pointer to the queue that needs
+ * another worker.
+ */
+struct worker_start {
+       struct btrfs_work work;
+       struct btrfs_workers *queue;
+};
+
+static void start_new_worker_func(struct btrfs_work *work)
+{
+       struct worker_start *start;
+       start = container_of(work, struct worker_start, work);
+       btrfs_start_workers(start->queue, 1);
+       kfree(start);
+}
+
+static int start_new_worker(struct btrfs_workers *queue)
+{
+       struct worker_start *start;
+       int ret;
+
+       start = kzalloc(sizeof(*start), GFP_NOFS);
+       if (!start)
+               return -ENOMEM;
+
+       start->work.func = start_new_worker_func;
+       start->queue = queue;
+       ret = btrfs_queue_worker(queue->atomic_worker_start, &start->work);
+       if (ret)
+               kfree(start);
+       return ret;
+}
+
 /*
  * helper function to move a thread onto the idle list after it
  * has finished some requests.
@@ -118,11 +163,13 @@ static void check_pending_worker_creates(struct btrfs_worker_thread *worker)
                goto out;
 
        workers->atomic_start_pending = 0;
-       if (workers->num_workers >= workers->max_workers)
+       if (workers->num_workers + workers->num_workers_starting >=
+           workers->max_workers)
                goto out;
 
+       workers->num_workers_starting += 1;
        spin_unlock_irqrestore(&workers->lock, flags);
-       btrfs_start_workers(workers, 1);
+       start_new_worker(workers);
        return;
 
 out:
@@ -390,9 +437,11 @@ int btrfs_stop_workers(struct btrfs_workers *workers)
 /*
  * simple init on struct btrfs_workers
  */
-void btrfs_init_workers(struct btrfs_workers *workers, char *name, int max)
+void btrfs_init_workers(struct btrfs_workers *workers, char *name, int max,
+                       struct btrfs_workers *async_helper)
 {
        workers->num_workers = 0;
+       workers->num_workers_starting = 0;
        INIT_LIST_HEAD(&workers->worker_list);
        INIT_LIST_HEAD(&workers->idle_list);
        INIT_LIST_HEAD(&workers->order_list);
@@ -404,14 +453,15 @@ void btrfs_init_workers(struct btrfs_workers *workers, char *name, int max)
        workers->name = name;
        workers->ordered = 0;
        workers->atomic_start_pending = 0;
-       workers->atomic_worker_start = 0;
+       workers->atomic_worker_start = async_helper;
 }
 
 /*
  * starts new worker threads.  This does not enforce the max worker
  * count in case you need to temporarily go past it.
  */
-int btrfs_start_workers(struct btrfs_workers *workers, int num_workers)
+static int __btrfs_start_workers(struct btrfs_workers *workers,
+                                int num_workers)
 {
        struct btrfs_worker_thread *worker;
        int ret = 0;
@@ -444,6 +494,8 @@ int btrfs_start_workers(struct btrfs_workers *workers, int num_workers)
                list_add_tail(&worker->worker_list, &workers->idle_list);
                worker->idle = 1;
                workers->num_workers++;
+               workers->num_workers_starting--;
+               WARN_ON(workers->num_workers_starting < 0);
                spin_unlock_irq(&workers->lock);
        }
        return 0;
@@ -452,6 +504,14 @@ fail:
        return ret;
 }
 
+int btrfs_start_workers(struct btrfs_workers *workers, int num_workers)
+{
+       spin_lock_irq(&workers->lock);
+       workers->num_workers_starting += num_workers;
+       spin_unlock_irq(&workers->lock);
+       return __btrfs_start_workers(workers, num_workers);
+}
+
 /*
  * run through the list and find a worker thread that doesn't have a lot
  * to do right now.  This can return null if we aren't yet at the thread
@@ -461,7 +521,10 @@ static struct btrfs_worker_thread *next_worker(struct btrfs_workers *workers)
 {
        struct btrfs_worker_thread *worker;
        struct list_head *next;
-       int enforce_min = workers->num_workers < workers->max_workers;
+       int enforce_min;
+
+       enforce_min = (workers->num_workers + workers->num_workers_starting) <
+               workers->max_workers;
 
        /*
         * if we find an idle thread, don't move it to the end of the
@@ -509,15 +572,17 @@ again:
        worker = next_worker(workers);
 
        if (!worker) {
-               if (workers->num_workers >= workers->max_workers) {
+               if (workers->num_workers + workers->num_workers_starting >=
+                   workers->max_workers) {
                        goto fallback;
                } else if (workers->atomic_worker_start) {
                        workers->atomic_start_pending = 1;
                        goto fallback;
                } else {
+                       workers->num_workers_starting++;
                        spin_unlock_irqrestore(&workers->lock, flags);
                        /* we're below the limit, start another worker */
-                       btrfs_start_workers(workers, 1);
+                       __btrfs_start_workers(workers, 1);
                        goto again;
                }
        }
index fc089b95ec14f24c9a971b44f3718c2029f241ef..5077746cf85e049e87bcd8ded49b592ecc271605 100644 (file)
@@ -64,6 +64,8 @@ struct btrfs_workers {
        /* current number of running workers */
        int num_workers;
 
+       int num_workers_starting;
+
        /* max number of workers allowed.  changed by btrfs_start_workers */
        int max_workers;
 
@@ -78,9 +80,10 @@ struct btrfs_workers {
 
        /*
         * are we allowed to sleep while starting workers or are we required
-        * to start them at a later time?
+        * to start them at a later time?  If we can't sleep, this indicates
+        * which queue we need to use to schedule thread creation.
         */
-       int atomic_worker_start;
+       struct btrfs_workers *atomic_worker_start;
 
        /* list with all the work threads.  The workers on the idle thread
         * may be actively servicing jobs, but they haven't yet hit the
@@ -109,7 +112,8 @@ struct btrfs_workers {
 int btrfs_queue_worker(struct btrfs_workers *workers, struct btrfs_work *work);
 int btrfs_start_workers(struct btrfs_workers *workers, int num_workers);
 int btrfs_stop_workers(struct btrfs_workers *workers);
-void btrfs_init_workers(struct btrfs_workers *workers, char *name, int max);
+void btrfs_init_workers(struct btrfs_workers *workers, char *name, int max,
+                       struct btrfs_workers *async_starter);
 int btrfs_requeue_work(struct btrfs_work *work);
 void btrfs_set_work_high_prio(struct btrfs_work *work);
 #endif
index a54d354cefcb71894a89975fae23ff94a9b939ad..c71abec0ab9021c9e56269e7fce81267e4ab9100 100644 (file)
@@ -128,12 +128,14 @@ struct btrfs_inode {
        u64 last_unlink_trans;
 
        /*
-        * These two counters are for delalloc metadata reservations.  We keep
-        * track of how many extents we've accounted for vs how many extents we
-        * have.
+        * Counters to keep track of the number of extent item's we may use due
+        * to delalloc and such.  outstanding_extents is the number of extent
+        * items we think we'll end up using, and reserved_extents is the number
+        * of extent items we've reserved metadata for.
         */
-       int delalloc_reserved_extents;
-       int delalloc_extents;
+       spinlock_t accounting_lock;
+       int reserved_extents;
+       int outstanding_extents;
 
        /*
         * ordered_data_close is set by truncate when a file that used
index dd8ced9814c4a5d0bc35e9bd8ef12cd8745382b9..1bb897ecdeebd5e4d5357ffd2bae010e6d846d3a 100644 (file)
@@ -691,14 +691,17 @@ struct btrfs_space_info {
 
        struct list_head list;
 
+       /* for controlling how we free up space for allocations */
+       wait_queue_head_t allocate_wait;
+       wait_queue_head_t flush_wait;
+       int allocating_chunk;
+       int flushing;
+
        /* for block groups in our same type */
        struct list_head block_groups;
        spinlock_t lock;
        struct rw_semaphore groups_sem;
        atomic_t caching_threads;
-
-       int allocating_chunk;
-       wait_queue_head_t wait;
 };
 
 /*
@@ -907,6 +910,7 @@ struct btrfs_fs_info {
         * A third pool does submit_bio to avoid deadlocking with the other
         * two
         */
+       struct btrfs_workers generic_worker;
        struct btrfs_workers workers;
        struct btrfs_workers delalloc_workers;
        struct btrfs_workers endio_workers;
@@ -914,6 +918,7 @@ struct btrfs_fs_info {
        struct btrfs_workers endio_meta_write_workers;
        struct btrfs_workers endio_write_workers;
        struct btrfs_workers submit_workers;
+       struct btrfs_workers enospc_workers;
        /*
         * fixup workers take dirty pages that didn't properly go through
         * the cow mechanism and make them safe to write.  It happens
@@ -1005,6 +1010,8 @@ struct btrfs_root {
        atomic_t log_commit[2];
        unsigned long log_transid;
        unsigned long log_batch;
+       pid_t log_start_pid;
+       bool log_multiple_pids;
 
        u64 objectid;
        u64 last_trans;
@@ -2323,7 +2330,7 @@ int btrfs_orphan_del(struct btrfs_trans_handle *trans, struct inode *inode);
 void btrfs_orphan_cleanup(struct btrfs_root *root);
 int btrfs_cont_expand(struct inode *inode, loff_t size);
 int btrfs_invalidate_inodes(struct btrfs_root *root);
-extern struct dentry_operations btrfs_dentry_operations;
+extern const struct dentry_operations btrfs_dentry_operations;
 
 /* ioctl.c */
 long btrfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
index af0435f79fa605e96f0461efd496599c576eb7e2..100551a66c46dc7d3ad51eb341039ee362724d20 100644 (file)
@@ -1746,21 +1746,25 @@ struct btrfs_root *open_ctree(struct super_block *sb,
                err = -EINVAL;
                goto fail_iput;
        }
-printk("thread pool is %d\n", fs_info->thread_pool_size);
-       /*
-        * we need to start all the end_io workers up front because the
-        * queue work function gets called at interrupt time, and so it
-        * cannot dynamically grow.
-        */
+
+       btrfs_init_workers(&fs_info->generic_worker,
+                          "genwork", 1, NULL);
+
        btrfs_init_workers(&fs_info->workers, "worker",
-                          fs_info->thread_pool_size);
+                          fs_info->thread_pool_size,
+                          &fs_info->generic_worker);
 
        btrfs_init_workers(&fs_info->delalloc_workers, "delalloc",
-                          fs_info->thread_pool_size);
+                          fs_info->thread_pool_size,
+                          &fs_info->generic_worker);
 
        btrfs_init_workers(&fs_info->submit_workers, "submit",
                           min_t(u64, fs_devices->num_devices,
-                          fs_info->thread_pool_size));
+                          fs_info->thread_pool_size),
+                          &fs_info->generic_worker);
+       btrfs_init_workers(&fs_info->enospc_workers, "enospc",
+                          fs_info->thread_pool_size,
+                          &fs_info->generic_worker);
 
        /* a higher idle thresh on the submit workers makes it much more
         * likely that bios will be send down in a sane order to the
@@ -1774,15 +1778,20 @@ printk("thread pool is %d\n", fs_info->thread_pool_size);
        fs_info->delalloc_workers.idle_thresh = 2;
        fs_info->delalloc_workers.ordered = 1;
 
-       btrfs_init_workers(&fs_info->fixup_workers, "fixup", 1);
+       btrfs_init_workers(&fs_info->fixup_workers, "fixup", 1,
+                          &fs_info->generic_worker);
        btrfs_init_workers(&fs_info->endio_workers, "endio",
-                          fs_info->thread_pool_size);
+                          fs_info->thread_pool_size,
+                          &fs_info->generic_worker);
        btrfs_init_workers(&fs_info->endio_meta_workers, "endio-meta",
-                          fs_info->thread_pool_size);
+                          fs_info->thread_pool_size,
+                          &fs_info->generic_worker);
        btrfs_init_workers(&fs_info->endio_meta_write_workers,
-                          "endio-meta-write", fs_info->thread_pool_size);
+                          "endio-meta-write", fs_info->thread_pool_size,
+                          &fs_info->generic_worker);
        btrfs_init_workers(&fs_info->endio_write_workers, "endio-write",
-                          fs_info->thread_pool_size);
+                          fs_info->thread_pool_size,
+                          &fs_info->generic_worker);
 
        /*
         * endios are largely parallel and should have a very
@@ -1794,12 +1803,8 @@ printk("thread pool is %d\n", fs_info->thread_pool_size);
        fs_info->endio_write_workers.idle_thresh = 2;
        fs_info->endio_meta_write_workers.idle_thresh = 2;
 
-       fs_info->endio_workers.atomic_worker_start = 1;
-       fs_info->endio_meta_workers.atomic_worker_start = 1;
-       fs_info->endio_write_workers.atomic_worker_start = 1;
-       fs_info->endio_meta_write_workers.atomic_worker_start = 1;
-
        btrfs_start_workers(&fs_info->workers, 1);
+       btrfs_start_workers(&fs_info->generic_worker, 1);
        btrfs_start_workers(&fs_info->submit_workers, 1);
        btrfs_start_workers(&fs_info->delalloc_workers, 1);
        btrfs_start_workers(&fs_info->fixup_workers, 1);
@@ -1807,6 +1812,7 @@ printk("thread pool is %d\n", fs_info->thread_pool_size);
        btrfs_start_workers(&fs_info->endio_meta_workers, 1);
        btrfs_start_workers(&fs_info->endio_meta_write_workers, 1);
        btrfs_start_workers(&fs_info->endio_write_workers, 1);
+       btrfs_start_workers(&fs_info->enospc_workers, 1);
 
        fs_info->bdi.ra_pages *= btrfs_super_num_devices(disk_super);
        fs_info->bdi.ra_pages = max(fs_info->bdi.ra_pages,
@@ -2012,6 +2018,7 @@ fail_chunk_root:
        free_extent_buffer(chunk_root->node);
        free_extent_buffer(chunk_root->commit_root);
 fail_sb_buffer:
+       btrfs_stop_workers(&fs_info->generic_worker);
        btrfs_stop_workers(&fs_info->fixup_workers);
        btrfs_stop_workers(&fs_info->delalloc_workers);
        btrfs_stop_workers(&fs_info->workers);
@@ -2020,6 +2027,7 @@ fail_sb_buffer:
        btrfs_stop_workers(&fs_info->endio_meta_write_workers);
        btrfs_stop_workers(&fs_info->endio_write_workers);
        btrfs_stop_workers(&fs_info->submit_workers);
+       btrfs_stop_workers(&fs_info->enospc_workers);
 fail_iput:
        invalidate_inode_pages2(fs_info->btree_inode->i_mapping);
        iput(fs_info->btree_inode);
@@ -2437,6 +2445,7 @@ int close_ctree(struct btrfs_root *root)
 
        iput(fs_info->btree_inode);
 
+       btrfs_stop_workers(&fs_info->generic_worker);
        btrfs_stop_workers(&fs_info->fixup_workers);
        btrfs_stop_workers(&fs_info->delalloc_workers);
        btrfs_stop_workers(&fs_info->workers);
@@ -2445,6 +2454,7 @@ int close_ctree(struct btrfs_root *root)
        btrfs_stop_workers(&fs_info->endio_meta_write_workers);
        btrfs_stop_workers(&fs_info->endio_write_workers);
        btrfs_stop_workers(&fs_info->submit_workers);
+       btrfs_stop_workers(&fs_info->enospc_workers);
 
        btrfs_close_devices(fs_info->fs_devices);
        btrfs_mapping_tree_free(&fs_info->mapping_tree);
index 359a754c782cd8338f0a1bf0d456306e3240c32c..d0c4d584efadd9cf92050bf0da946e364da2dc8a 100644 (file)
@@ -2824,14 +2824,17 @@ int btrfs_unreserve_metadata_for_delalloc(struct btrfs_root *root,
                                           num_items);
 
        spin_lock(&meta_sinfo->lock);
-       if (BTRFS_I(inode)->delalloc_reserved_extents <=
-           BTRFS_I(inode)->delalloc_extents) {
+       spin_lock(&BTRFS_I(inode)->accounting_lock);
+       if (BTRFS_I(inode)->reserved_extents <=
+           BTRFS_I(inode)->outstanding_extents) {
+               spin_unlock(&BTRFS_I(inode)->accounting_lock);
                spin_unlock(&meta_sinfo->lock);
                return 0;
        }
+       spin_unlock(&BTRFS_I(inode)->accounting_lock);
 
-       BTRFS_I(inode)->delalloc_reserved_extents--;
-       BUG_ON(BTRFS_I(inode)->delalloc_reserved_extents < 0);
+       BTRFS_I(inode)->reserved_extents--;
+       BUG_ON(BTRFS_I(inode)->reserved_extents < 0);
 
        if (meta_sinfo->bytes_delalloc < num_bytes) {
                bug = true;
@@ -2864,6 +2867,107 @@ static void check_force_delalloc(struct btrfs_space_info *meta_sinfo)
                meta_sinfo->force_delalloc = 0;
 }
 
+struct async_flush {
+       struct btrfs_root *root;
+       struct btrfs_space_info *info;
+       struct btrfs_work work;
+};
+
+static noinline void flush_delalloc_async(struct btrfs_work *work)
+{
+       struct async_flush *async;
+       struct btrfs_root *root;
+       struct btrfs_space_info *info;
+
+       async = container_of(work, struct async_flush, work);
+       root = async->root;
+       info = async->info;
+
+       btrfs_start_delalloc_inodes(root);
+       wake_up(&info->flush_wait);
+       btrfs_wait_ordered_extents(root, 0);
+
+       spin_lock(&info->lock);
+       info->flushing = 0;
+       spin_unlock(&info->lock);
+       wake_up(&info->flush_wait);
+
+       kfree(async);
+}
+
+static void wait_on_flush(struct btrfs_space_info *info)
+{
+       DEFINE_WAIT(wait);
+       u64 used;
+
+       while (1) {
+               prepare_to_wait(&info->flush_wait, &wait,
+                               TASK_UNINTERRUPTIBLE);
+               spin_lock(&info->lock);
+               if (!info->flushing) {
+                       spin_unlock(&info->lock);
+                       break;
+               }
+
+               used = info->bytes_used + info->bytes_reserved +
+                       info->bytes_pinned + info->bytes_readonly +
+                       info->bytes_super + info->bytes_root +
+                       info->bytes_may_use + info->bytes_delalloc;
+               if (used < info->total_bytes) {
+                       spin_unlock(&info->lock);
+                       break;
+               }
+               spin_unlock(&info->lock);
+               schedule();
+       }
+       finish_wait(&info->flush_wait, &wait);
+}
+
+static void flush_delalloc(struct btrfs_root *root,
+                                struct btrfs_space_info *info)
+{
+       struct async_flush *async;
+       bool wait = false;
+
+       spin_lock(&info->lock);
+
+       if (!info->flushing) {
+               info->flushing = 1;
+               init_waitqueue_head(&info->flush_wait);
+       } else {
+               wait = true;
+       }
+
+       spin_unlock(&info->lock);
+
+       if (wait) {
+               wait_on_flush(info);
+               return;
+       }
+
+       async = kzalloc(sizeof(*async), GFP_NOFS);
+       if (!async)
+               goto flush;
+
+       async->root = root;
+       async->info = info;
+       async->work.func = flush_delalloc_async;
+
+       btrfs_queue_worker(&root->fs_info->enospc_workers,
+                          &async->work);
+       wait_on_flush(info);
+       return;
+
+flush:
+       btrfs_start_delalloc_inodes(root);
+       btrfs_wait_ordered_extents(root, 0);
+
+       spin_lock(&info->lock);
+       info->flushing = 0;
+       spin_unlock(&info->lock);
+       wake_up(&info->flush_wait);
+}
+
 static int maybe_allocate_chunk(struct btrfs_root *root,
                                 struct btrfs_space_info *info)
 {
@@ -2894,7 +2998,7 @@ static int maybe_allocate_chunk(struct btrfs_root *root,
        if (!info->allocating_chunk) {
                info->force_alloc = 1;
                info->allocating_chunk = 1;
-               init_waitqueue_head(&info->wait);
+               init_waitqueue_head(&info->allocate_wait);
        } else {
                wait = true;
        }
@@ -2902,7 +3006,7 @@ static int maybe_allocate_chunk(struct btrfs_root *root,
        spin_unlock(&info->lock);
 
        if (wait) {
-               wait_event(info->wait,
+               wait_event(info->allocate_wait,
                           !info->allocating_chunk);
                return 1;
        }
@@ -2923,7 +3027,7 @@ out:
        spin_lock(&info->lock);
        info->allocating_chunk = 0;
        spin_unlock(&info->lock);
-       wake_up(&info->wait);
+       wake_up(&info->allocate_wait);
 
        if (ret)
                return 0;
@@ -2981,21 +3085,20 @@ again:
                        filemap_flush(inode->i_mapping);
                        goto again;
                } else if (flushed == 3) {
-                       btrfs_start_delalloc_inodes(root);
-                       btrfs_wait_ordered_extents(root, 0);
+                       flush_delalloc(root, meta_sinfo);
                        goto again;
                }
                spin_lock(&meta_sinfo->lock);
                meta_sinfo->bytes_delalloc -= num_bytes;
                spin_unlock(&meta_sinfo->lock);
                printk(KERN_ERR "enospc, has %d, reserved %d\n",
-                      BTRFS_I(inode)->delalloc_extents,
-                      BTRFS_I(inode)->delalloc_reserved_extents);
+                      BTRFS_I(inode)->outstanding_extents,
+                      BTRFS_I(inode)->reserved_extents);
                dump_space_info(meta_sinfo, 0, 0);
                return -ENOSPC;
        }
 
-       BTRFS_I(inode)->delalloc_reserved_extents++;
+       BTRFS_I(inode)->reserved_extents++;
        check_force_delalloc(meta_sinfo);
        spin_unlock(&meta_sinfo->lock);
 
@@ -3094,8 +3197,7 @@ again:
                }
 
                if (retries == 2) {
-                       btrfs_start_delalloc_inodes(root);
-                       btrfs_wait_ordered_extents(root, 0);
+                       flush_delalloc(root, meta_sinfo);
                        goto again;
                }
                spin_lock(&meta_sinfo->lock);
@@ -4029,6 +4131,7 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans,
        int loop = 0;
        bool found_uncached_bg = false;
        bool failed_cluster_refill = false;
+       bool failed_alloc = false;
 
        WARN_ON(num_bytes < root->sectorsize);
        btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY);
@@ -4233,14 +4336,23 @@ refill_cluster:
 
                offset = btrfs_find_space_for_alloc(block_group, search_start,
                                                    num_bytes, empty_size);
-               if (!offset && (cached || (!cached &&
-                                          loop == LOOP_CACHING_NOWAIT))) {
-                       goto loop;
-               } else if (!offset && (!cached &&
-                                      loop > LOOP_CACHING_NOWAIT)) {
+               /*
+                * If we didn't find a chunk, and we haven't failed on this
+                * block group before, and this block group is in the middle of
+                * caching and we are ok with waiting, then go ahead and wait
+                * for progress to be made, and set failed_alloc to true.
+                *
+                * If failed_alloc is true then we've already waited on this
+                * block group once and should move on to the next block group.
+                */
+               if (!offset && !failed_alloc && !cached &&
+                   loop > LOOP_CACHING_NOWAIT) {
                        wait_block_group_cache_progress(block_group,
-                                       num_bytes + empty_size);
+                                               num_bytes + empty_size);
+                       failed_alloc = true;
                        goto have_block_group;
+               } else if (!offset) {
+                       goto loop;
                }
 checks:
                search_start = stripe_align(root, offset);
@@ -4288,6 +4400,7 @@ checks:
                break;
 loop:
                failed_cluster_refill = false;
+               failed_alloc = false;
                btrfs_put_block_group(block_group);
        }
        up_read(&space_info->groups_sem);
@@ -4799,6 +4912,7 @@ static noinline void reada_walk_down(struct btrfs_trans_handle *trans,
        u64 bytenr;
        u64 generation;
        u64 refs;
+       u64 flags;
        u64 last = 0;
        u32 nritems;
        u32 blocksize;
@@ -4836,15 +4950,19 @@ static noinline void reada_walk_down(struct btrfs_trans_handle *trans,
                    generation <= root->root_key.offset)
                        continue;
 
+               /* We don't lock the tree block, it's OK to be racy here */
+               ret = btrfs_lookup_extent_info(trans, root, bytenr, blocksize,
+                                              &refs, &flags);
+               BUG_ON(ret);
+               BUG_ON(refs == 0);
+
                if (wc->stage == DROP_REFERENCE) {
-                       ret = btrfs_lookup_extent_info(trans, root,
-                                               bytenr, blocksize,
-                                               &refs, NULL);
-                       BUG_ON(ret);
-                       BUG_ON(refs == 0);
                        if (refs == 1)
                                goto reada;
 
+                       if (wc->level == 1 &&
+                           (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF))
+                               continue;
                        if (!wc->update_ref ||
                            generation <= root->root_key.offset)
                                continue;
@@ -4853,6 +4971,10 @@ static noinline void reada_walk_down(struct btrfs_trans_handle *trans,
                                                  &wc->update_progress);
                        if (ret < 0)
                                continue;
+               } else {
+                       if (wc->level == 1 &&
+                           (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF))
+                               continue;
                }
 reada:
                ret = readahead_tree_block(root, bytenr, blocksize,
@@ -4876,7 +4998,7 @@ reada:
 static noinline int walk_down_proc(struct btrfs_trans_handle *trans,
                                   struct btrfs_root *root,
                                   struct btrfs_path *path,
-                                  struct walk_control *wc)
+                                  struct walk_control *wc, int lookup_info)
 {
        int level = wc->level;
        struct extent_buffer *eb = path->nodes[level];
@@ -4891,8 +5013,9 @@ static noinline int walk_down_proc(struct btrfs_trans_handle *trans,
         * when reference count of tree block is 1, it won't increase
         * again. once full backref flag is set, we never clear it.
         */
-       if ((wc->stage == DROP_REFERENCE && wc->refs[level] != 1) ||
-           (wc->stage == UPDATE_BACKREF && !(wc->flags[level] & flag))) {
+       if (lookup_info &&
+           ((wc->stage == DROP_REFERENCE && wc->refs[level] != 1) ||
+            (wc->stage == UPDATE_BACKREF && !(wc->flags[level] & flag)))) {
                BUG_ON(!path->locks[level]);
                ret = btrfs_lookup_extent_info(trans, root,
                                               eb->start, eb->len,
@@ -4953,7 +5076,7 @@ static noinline int walk_down_proc(struct btrfs_trans_handle *trans,
 static noinline int do_walk_down(struct btrfs_trans_handle *trans,
                                 struct btrfs_root *root,
                                 struct btrfs_path *path,
-                                struct walk_control *wc)
+                                struct walk_control *wc, int *lookup_info)
 {
        u64 bytenr;
        u64 generation;
@@ -4973,8 +5096,10 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans,
         * for the subtree
         */
        if (wc->stage == UPDATE_BACKREF &&
-           generation <= root->root_key.offset)
+           generation <= root->root_key.offset) {
+               *lookup_info = 1;
                return 1;
+       }
 
        bytenr = btrfs_node_blockptr(path->nodes[level], path->slots[level]);
        blocksize = btrfs_level_size(root, level - 1);
@@ -4987,14 +5112,19 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans,
        btrfs_tree_lock(next);
        btrfs_set_lock_blocking(next);
 
-       if (wc->stage == DROP_REFERENCE) {
-               ret = btrfs_lookup_extent_info(trans, root, bytenr, blocksize,
-                                              &wc->refs[level - 1],
-                                              &wc->flags[level - 1]);
-               BUG_ON(ret);
-               BUG_ON(wc->refs[level - 1] == 0);
+       ret = btrfs_lookup_extent_info(trans, root, bytenr, blocksize,
+                                      &wc->refs[level - 1],
+                                      &wc->flags[level - 1]);
+       BUG_ON(ret);
+       BUG_ON(wc->refs[level - 1] == 0);
+       *lookup_info = 0;
 
+       if (wc->stage == DROP_REFERENCE) {
                if (wc->refs[level - 1] > 1) {
+                       if (level == 1 &&
+                           (wc->flags[0] & BTRFS_BLOCK_FLAG_FULL_BACKREF))
+                               goto skip;
+
                        if (!wc->update_ref ||
                            generation <= root->root_key.offset)
                                goto skip;
@@ -5008,12 +5138,17 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans,
                        wc->stage = UPDATE_BACKREF;
                        wc->shared_level = level - 1;
                }
+       } else {
+               if (level == 1 &&
+                   (wc->flags[0] & BTRFS_BLOCK_FLAG_FULL_BACKREF))
+                       goto skip;
        }
 
        if (!btrfs_buffer_uptodate(next, generation)) {
                btrfs_tree_unlock(next);
                free_extent_buffer(next);
                next = NULL;
+               *lookup_info = 1;
        }
 
        if (!next) {
@@ -5036,21 +5171,22 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans,
 skip:
        wc->refs[level - 1] = 0;
        wc->flags[level - 1] = 0;
+       if (wc->stage == DROP_REFERENCE) {
+               if (wc->flags[level] & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
+                       parent = path->nodes[level]->start;
+               } else {
+                       BUG_ON(root->root_key.objectid !=
+                              btrfs_header_owner(path->nodes[level]));
+                       parent = 0;
+               }
 
-       if (wc->flags[level] & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
-               parent = path->nodes[level]->start;
-       } else {
-               BUG_ON(root->root_key.objectid !=
-                      btrfs_header_owner(path->nodes[level]));
-               parent = 0;
+               ret = btrfs_free_extent(trans, root, bytenr, blocksize, parent,
+                                       root->root_key.objectid, level - 1, 0);
+               BUG_ON(ret);
        }
-
-       ret = btrfs_free_extent(trans, root, bytenr, blocksize, parent,
-                               root->root_key.objectid, level - 1, 0);
-       BUG_ON(ret);
-
        btrfs_tree_unlock(next);
        free_extent_buffer(next);
+       *lookup_info = 1;
        return 1;
 }
 
@@ -5164,6 +5300,7 @@ static noinline int walk_down_tree(struct btrfs_trans_handle *trans,
                                   struct walk_control *wc)
 {
        int level = wc->level;
+       int lookup_info = 1;
        int ret;
 
        while (level >= 0) {
@@ -5171,14 +5308,14 @@ static noinline int walk_down_tree(struct btrfs_trans_handle *trans,
                    btrfs_header_nritems(path->nodes[level]))
                        break;
 
-               ret = walk_down_proc(trans, root, path, wc);
+               ret = walk_down_proc(trans, root, path, wc, lookup_info);
                if (ret > 0)
                        break;
 
                if (level == 0)
                        break;
 
-               ret = do_walk_down(trans, root, path, wc);
+               ret = do_walk_down(trans, root, path, wc, &lookup_info);
                if (ret > 0) {
                        path->slots[level]++;
                        continue;
index de1793ba004aa472afc8c983521cea7380ef4720..96577e8bf9fdb62819ab2dbd5f9da91200624596 100644 (file)
@@ -460,7 +460,8 @@ static int clear_state_bit(struct extent_io_tree *tree,
                            struct extent_state *state, int bits, int wake,
                            int delete)
 {
-       int ret = state->state & bits;
+       int bits_to_clear = bits & ~EXTENT_DO_ACCOUNTING;
+       int ret = state->state & bits_to_clear;
 
        if ((bits & EXTENT_DIRTY) && (state->state & EXTENT_DIRTY)) {
                u64 range = state->end - state->start + 1;
@@ -468,7 +469,7 @@ static int clear_state_bit(struct extent_io_tree *tree,
                tree->dirty_bytes -= range;
        }
        clear_state_cb(tree, state, bits);
-       state->state &= ~bits;
+       state->state &= ~bits_to_clear;
        if (wake)
                wake_up(&state->wq);
        if (delete || state->state == 0) {
@@ -956,7 +957,8 @@ int clear_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end,
                       gfp_t mask)
 {
        return clear_extent_bit(tree, start, end,
-                               EXTENT_DIRTY | EXTENT_DELALLOC, 0, 0,
+                               EXTENT_DIRTY | EXTENT_DELALLOC |
+                               EXTENT_DO_ACCOUNTING, 0, 0,
                                NULL, mask);
 }
 
@@ -1401,12 +1403,7 @@ out_failed:
 int extent_clear_unlock_delalloc(struct inode *inode,
                                struct extent_io_tree *tree,
                                u64 start, u64 end, struct page *locked_page,
-                               int unlock_pages,
-                               int clear_unlock,
-                               int clear_delalloc, int clear_dirty,
-                               int set_writeback,
-                               int end_writeback,
-                               int set_private2)
+                               unsigned long op)
 {
        int ret;
        struct page *pages[16];
@@ -1416,17 +1413,21 @@ int extent_clear_unlock_delalloc(struct inode *inode,
        int i;
        int clear_bits = 0;
 
-       if (clear_unlock)
+       if (op & EXTENT_CLEAR_UNLOCK)
                clear_bits |= EXTENT_LOCKED;
-       if (clear_dirty)
+       if (op & EXTENT_CLEAR_DIRTY)
                clear_bits |= EXTENT_DIRTY;
 
-       if (clear_delalloc)
+       if (op & EXTENT_CLEAR_DELALLOC)
                clear_bits |= EXTENT_DELALLOC;
 
+       if (op & EXTENT_CLEAR_ACCOUNTING)
+               clear_bits |= EXTENT_DO_ACCOUNTING;
+
        clear_extent_bit(tree, start, end, clear_bits, 1, 0, NULL, GFP_NOFS);
-       if (!(unlock_pages || clear_dirty || set_writeback || end_writeback ||
-             set_private2))
+       if (!(op & (EXTENT_CLEAR_UNLOCK_PAGE | EXTENT_CLEAR_DIRTY |
+                   EXTENT_SET_WRITEBACK | EXTENT_END_WRITEBACK |
+                   EXTENT_SET_PRIVATE2)))
                return 0;
 
        while (nr_pages > 0) {
@@ -1435,20 +1436,20 @@ int extent_clear_unlock_delalloc(struct inode *inode,
                                     nr_pages, ARRAY_SIZE(pages)), pages);
                for (i = 0; i < ret; i++) {
 
-                       if (set_private2)
+                       if (op & EXTENT_SET_PRIVATE2)
                                SetPagePrivate2(pages[i]);
 
                        if (pages[i] == locked_page) {
                                page_cache_release(pages[i]);
                                continue;
                        }
-                       if (clear_dirty)
+                       if (op & EXTENT_CLEAR_DIRTY)
                                clear_page_dirty_for_io(pages[i]);
-                       if (set_writeback)
+                       if (op & EXTENT_SET_WRITEBACK)
                                set_page_writeback(pages[i]);
-                       if (end_writeback)
+                       if (op & EXTENT_END_WRITEBACK)
                                end_page_writeback(pages[i]);
-                       if (unlock_pages)
+                       if (op & EXTENT_CLEAR_UNLOCK_PAGE)
                                unlock_page(pages[i]);
                        page_cache_release(pages[i]);
                }
@@ -2714,7 +2715,8 @@ int extent_invalidatepage(struct extent_io_tree *tree,
        lock_extent(tree, start, end, GFP_NOFS);
        wait_on_page_writeback(page);
        clear_extent_bit(tree, start, end,
-                        EXTENT_LOCKED | EXTENT_DIRTY | EXTENT_DELALLOC,
+                        EXTENT_LOCKED | EXTENT_DIRTY | EXTENT_DELALLOC |
+                        EXTENT_DO_ACCOUNTING,
                         1, 1, NULL, GFP_NOFS);
        return 0;
 }
index 4794ec891fed31a3998098cb1999818627aacb12..36de250a7b2bce5ef6f36d35ded4f1f88a2dfc58 100644 (file)
@@ -15,6 +15,7 @@
 #define EXTENT_BUFFER_FILLED (1 << 8)
 #define EXTENT_BOUNDARY (1 << 9)
 #define EXTENT_NODATASUM (1 << 10)
+#define EXTENT_DO_ACCOUNTING (1 << 11)
 #define EXTENT_IOBITS (EXTENT_LOCKED | EXTENT_WRITEBACK)
 
 /* flags for bio submission */
 #define EXTENT_BUFFER_BLOCKING 1
 #define EXTENT_BUFFER_DIRTY 2
 
+/* these are flags for extent_clear_unlock_delalloc */
+#define EXTENT_CLEAR_UNLOCK_PAGE 0x1
+#define EXTENT_CLEAR_UNLOCK     0x2
+#define EXTENT_CLEAR_DELALLOC   0x4
+#define EXTENT_CLEAR_DIRTY      0x8
+#define EXTENT_SET_WRITEBACK    0x10
+#define EXTENT_END_WRITEBACK    0x20
+#define EXTENT_SET_PRIVATE2     0x40
+#define EXTENT_CLEAR_ACCOUNTING  0x80
+
 /*
  * page->private values.  Every page that is controlled by the extent
  * map has page->private set to one.
@@ -288,10 +299,5 @@ int extent_range_uptodate(struct extent_io_tree *tree,
 int extent_clear_unlock_delalloc(struct inode *inode,
                                struct extent_io_tree *tree,
                                u64 start, u64 end, struct page *locked_page,
-                               int unlock_page,
-                               int clear_unlock,
-                               int clear_delalloc, int clear_dirty,
-                               int set_writeback,
-                               int end_writeback,
-                               int set_private2);
+                               unsigned long op);
 #endif
index f19e1259a971d52d62a9979ba311d3f136289e05..2d623aa0625f8fb6901b983233b08def66a8574d 100644 (file)
@@ -878,7 +878,8 @@ again:
                        btrfs_put_ordered_extent(ordered);
 
                clear_extent_bits(&BTRFS_I(inode)->io_tree, start_pos,
-                                 last_pos - 1, EXTENT_DIRTY | EXTENT_DELALLOC,
+                                 last_pos - 1, EXTENT_DIRTY | EXTENT_DELALLOC |
+                                 EXTENT_DO_ACCOUNTING,
                                  GFP_NOFS);
                unlock_extent(&BTRFS_I(inode)->io_tree,
                              start_pos, last_pos - 1, GFP_NOFS);
index 112e5aa85892c35dbaf921ba16bba119d4c2c49e..9e138b793dc78d25825bef6e003d2156cbfa172c 100644 (file)
@@ -424,9 +424,12 @@ again:
                         * and free up our temp pages.
                         */
                        extent_clear_unlock_delalloc(inode,
-                                                    &BTRFS_I(inode)->io_tree,
-                                                    start, end, NULL, 1, 0,
-                                                    0, 1, 1, 1, 0);
+                            &BTRFS_I(inode)->io_tree,
+                            start, end, NULL,
+                            EXTENT_CLEAR_UNLOCK_PAGE | EXTENT_CLEAR_DIRTY |
+                            EXTENT_CLEAR_DELALLOC |
+                            EXTENT_CLEAR_ACCOUNTING |
+                            EXTENT_SET_WRITEBACK | EXTENT_END_WRITEBACK);
                        ret = 0;
                        goto free_pages_out;
                }
@@ -637,11 +640,14 @@ static noinline int submit_compressed_extents(struct inode *inode,
                 * clear dirty, set writeback and unlock the pages.
                 */
                extent_clear_unlock_delalloc(inode,
-                                            &BTRFS_I(inode)->io_tree,
-                                            async_extent->start,
-                                            async_extent->start +
-                                            async_extent->ram_size - 1,
-                                            NULL, 1, 1, 0, 1, 1, 0, 0);
+                               &BTRFS_I(inode)->io_tree,
+                               async_extent->start,
+                               async_extent->start +
+                               async_extent->ram_size - 1,
+                               NULL, EXTENT_CLEAR_UNLOCK_PAGE |
+                               EXTENT_CLEAR_UNLOCK |
+                               EXTENT_CLEAR_DELALLOC |
+                               EXTENT_CLEAR_DIRTY | EXTENT_SET_WRITEBACK);
 
                ret = btrfs_submit_compressed_write(inode,
                                    async_extent->start,
@@ -712,9 +718,15 @@ static noinline int cow_file_range(struct inode *inode,
                                            start, end, 0, NULL);
                if (ret == 0) {
                        extent_clear_unlock_delalloc(inode,
-                                                    &BTRFS_I(inode)->io_tree,
-                                                    start, end, NULL, 1, 1,
-                                                    1, 1, 1, 1, 0);
+                                    &BTRFS_I(inode)->io_tree,
+                                    start, end, NULL,
+                                    EXTENT_CLEAR_UNLOCK_PAGE |
+                                    EXTENT_CLEAR_UNLOCK |
+                                    EXTENT_CLEAR_DELALLOC |
+                                    EXTENT_CLEAR_ACCOUNTING |
+                                    EXTENT_CLEAR_DIRTY |
+                                    EXTENT_SET_WRITEBACK |
+                                    EXTENT_END_WRITEBACK);
                        *nr_written = *nr_written +
                             (end - start + PAGE_CACHE_SIZE) / PAGE_CACHE_SIZE;
                        *page_started = 1;
@@ -738,6 +750,8 @@ static noinline int cow_file_range(struct inode *inode,
        btrfs_drop_extent_cache(inode, start, start + num_bytes - 1, 0);
 
        while (disk_num_bytes > 0) {
+               unsigned long op;
+
                cur_alloc_size = min(disk_num_bytes, root->fs_info->max_extent);
                ret = btrfs_reserve_extent(trans, root, cur_alloc_size,
                                           root->sectorsize, 0, alloc_hint,
@@ -789,10 +803,13 @@ static noinline int cow_file_range(struct inode *inode,
                 * Do set the Private2 bit so we know this page was properly
                 * setup for writepage
                 */
+               op = unlock ? EXTENT_CLEAR_UNLOCK_PAGE : 0;
+               op |= EXTENT_CLEAR_UNLOCK | EXTENT_CLEAR_DELALLOC |
+                       EXTENT_SET_PRIVATE2;
+
                extent_clear_unlock_delalloc(inode, &BTRFS_I(inode)->io_tree,
                                             start, start + ram_size - 1,
-                                            locked_page, unlock, 1,
-                                            1, 0, 0, 0, 1);
+                                            locked_page, op);
                disk_num_bytes -= cur_alloc_size;
                num_bytes -= cur_alloc_size;
                alloc_hint = ins.objectid + ins.offset;
@@ -864,8 +881,8 @@ static int cow_file_range_async(struct inode *inode, struct page *locked_page,
        u64 cur_end;
        int limit = 10 * 1024 * 1042;
 
-       clear_extent_bit(&BTRFS_I(inode)->io_tree, start, end, EXTENT_LOCKED |
-                        EXTENT_DELALLOC, 1, 0, NULL, GFP_NOFS);
+       clear_extent_bit(&BTRFS_I(inode)->io_tree, start, end, EXTENT_LOCKED,
+                        1, 0, NULL, GFP_NOFS);
        while (start < end) {
                async_cow = kmalloc(sizeof(*async_cow), GFP_NOFS);
                async_cow->inode = inode;
@@ -1006,6 +1023,7 @@ next_slot:
 
                if (found_key.offset > cur_offset) {
                        extent_end = found_key.offset;
+                       extent_type = 0;
                        goto out_check;
                }
 
@@ -1112,8 +1130,10 @@ out_check:
                BUG_ON(ret);
 
                extent_clear_unlock_delalloc(inode, &BTRFS_I(inode)->io_tree,
-                                       cur_offset, cur_offset + num_bytes - 1,
-                                       locked_page, 1, 1, 1, 0, 0, 0, 1);
+                               cur_offset, cur_offset + num_bytes - 1,
+                               locked_page, EXTENT_CLEAR_UNLOCK_PAGE |
+                               EXTENT_CLEAR_UNLOCK | EXTENT_CLEAR_DELALLOC |
+                               EXTENT_SET_PRIVATE2);
                cur_offset = extent_end;
                if (cur_offset > end)
                        break;
@@ -1178,15 +1198,17 @@ static int btrfs_split_extent_hook(struct inode *inode,
                                        root->fs_info->max_extent);
 
                /*
-                * if we break a large extent up then leave delalloc_extents be,
-                * since we've already accounted for the large extent.
+                * if we break a large extent up then leave oustanding_extents
+                * be, since we've already accounted for the large extent.
                 */
                if (div64_u64(new_size + root->fs_info->max_extent - 1,
                              root->fs_info->max_extent) < num_extents)
                        return 0;
        }
 
-       BTRFS_I(inode)->delalloc_extents++;
+       spin_lock(&BTRFS_I(inode)->accounting_lock);
+       BTRFS_I(inode)->outstanding_extents++;
+       spin_unlock(&BTRFS_I(inode)->accounting_lock);
 
        return 0;
 }
@@ -1217,7 +1239,9 @@ static int btrfs_merge_extent_hook(struct inode *inode,
 
        /* we're not bigger than the max, unreserve the space and go */
        if (new_size <= root->fs_info->max_extent) {
-               BTRFS_I(inode)->delalloc_extents--;
+               spin_lock(&BTRFS_I(inode)->accounting_lock);
+               BTRFS_I(inode)->outstanding_extents--;
+               spin_unlock(&BTRFS_I(inode)->accounting_lock);
                return 0;
        }
 
@@ -1231,7 +1255,9 @@ static int btrfs_merge_extent_hook(struct inode *inode,
                      root->fs_info->max_extent) > num_extents)
                return 0;
 
-       BTRFS_I(inode)->delalloc_extents--;
+       spin_lock(&BTRFS_I(inode)->accounting_lock);
+       BTRFS_I(inode)->outstanding_extents--;
+       spin_unlock(&BTRFS_I(inode)->accounting_lock);
 
        return 0;
 }
@@ -1253,7 +1279,9 @@ static int btrfs_set_bit_hook(struct inode *inode, u64 start, u64 end,
        if (!(old & EXTENT_DELALLOC) && (bits & EXTENT_DELALLOC)) {
                struct btrfs_root *root = BTRFS_I(inode)->root;
 
-               BTRFS_I(inode)->delalloc_extents++;
+               spin_lock(&BTRFS_I(inode)->accounting_lock);
+               BTRFS_I(inode)->outstanding_extents++;
+               spin_unlock(&BTRFS_I(inode)->accounting_lock);
                btrfs_delalloc_reserve_space(root, inode, end - start + 1);
                spin_lock(&root->fs_info->delalloc_lock);
                BTRFS_I(inode)->delalloc_bytes += end - start + 1;
@@ -1281,8 +1309,12 @@ static int btrfs_clear_bit_hook(struct inode *inode,
        if ((state->state & EXTENT_DELALLOC) && (bits & EXTENT_DELALLOC)) {
                struct btrfs_root *root = BTRFS_I(inode)->root;
 
-               BTRFS_I(inode)->delalloc_extents--;
-               btrfs_unreserve_metadata_for_delalloc(root, inode, 1);
+               if (bits & EXTENT_DO_ACCOUNTING) {
+                       spin_lock(&BTRFS_I(inode)->accounting_lock);
+                       BTRFS_I(inode)->outstanding_extents--;
+                       spin_unlock(&BTRFS_I(inode)->accounting_lock);
+                       btrfs_unreserve_metadata_for_delalloc(root, inode, 1);
+               }
 
                spin_lock(&root->fs_info->delalloc_lock);
                if (state->end - state->start + 1 >
@@ -3598,12 +3630,14 @@ static int btrfs_dentry_delete(struct dentry *dentry)
 {
        struct btrfs_root *root;
 
-       if (!dentry->d_inode)
-               return 0;
+       if (!dentry->d_inode && !IS_ROOT(dentry))
+               dentry = dentry->d_parent;
 
-       root = BTRFS_I(dentry->d_inode)->root;
-       if (btrfs_root_refs(&root->root_item) == 0)
-               return 1;
+       if (dentry->d_inode) {
+               root = BTRFS_I(dentry->d_inode)->root;
+               if (btrfs_root_refs(&root->root_item) == 0)
+                       return 1;
+       }
        return 0;
 }
 
@@ -4808,7 +4842,8 @@ static void btrfs_invalidatepage(struct page *page, unsigned long offset)
                 */
                clear_extent_bit(tree, page_start, page_end,
                                 EXTENT_DIRTY | EXTENT_DELALLOC |
-                                EXTENT_LOCKED, 1, 0, NULL, GFP_NOFS);
+                                EXTENT_LOCKED | EXTENT_DO_ACCOUNTING, 1, 0,
+                                NULL, GFP_NOFS);
                /*
                 * whoever cleared the private bit is responsible
                 * for the finish_ordered_io
@@ -4821,8 +4856,8 @@ static void btrfs_invalidatepage(struct page *page, unsigned long offset)
                lock_extent(tree, page_start, page_end, GFP_NOFS);
        }
        clear_extent_bit(tree, page_start, page_end,
-                EXTENT_LOCKED | EXTENT_DIRTY | EXTENT_DELALLOC,
-                1, 1, NULL, GFP_NOFS);
+                EXTENT_LOCKED | EXTENT_DIRTY | EXTENT_DELALLOC |
+                EXTENT_DO_ACCOUNTING, 1, 1, NULL, GFP_NOFS);
        __btrfs_releasepage(page, GFP_NOFS);
 
        ClearPageChecked(page);
@@ -4917,7 +4952,8 @@ again:
         * prepare_pages in the normal write path.
         */
        clear_extent_bits(&BTRFS_I(inode)->io_tree, page_start, page_end,
-                         EXTENT_DIRTY | EXTENT_DELALLOC, GFP_NOFS);
+                         EXTENT_DIRTY | EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING,
+                         GFP_NOFS);
 
        ret = btrfs_set_extent_delalloc(inode, page_start, page_end);
        if (ret) {
@@ -5065,8 +5101,9 @@ struct inode *btrfs_alloc_inode(struct super_block *sb)
                return NULL;
        ei->last_trans = 0;
        ei->logged_trans = 0;
-       ei->delalloc_extents = 0;
-       ei->delalloc_reserved_extents = 0;
+       ei->outstanding_extents = 0;
+       ei->reserved_extents = 0;
+       spin_lock_init(&ei->accounting_lock);
        btrfs_ordered_inode_tree_init(&ei->ordered_tree);
        INIT_LIST_HEAD(&ei->i_orphan);
        INIT_LIST_HEAD(&ei->ordered_operations);
@@ -5805,6 +5842,6 @@ static const struct inode_operations btrfs_symlink_inode_operations = {
        .removexattr    = btrfs_removexattr,
 };
 
-struct dentry_operations btrfs_dentry_operations = {
+const struct dentry_operations btrfs_dentry_operations = {
        .d_delete       = btrfs_dentry_delete,
 };
index 9a780c8d0ac83df7bc2444d90830e3214649e8a4..cdbb054102b9f860ee4d119dfff39e891ac29438 100644 (file)
@@ -830,6 +830,7 @@ out_up_write:
 out_unlock:
        mutex_unlock(&inode->i_mutex);
        if (!err) {
+               shrink_dcache_sb(root->fs_info->sb);
                btrfs_invalidate_inodes(dest);
                d_delete(dentry);
        }
@@ -1122,8 +1123,10 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
                                        datao += off - key.offset;
                                        datal -= off - key.offset;
                                }
-                               if (key.offset + datao + datal > off + len)
-                                       datal = off + len - key.offset - datao;
+
+                               if (key.offset + datal > off + len)
+                                       datal = off + len - key.offset;
+
                                /* disko == 0 means it's a hole */
                                if (!disko)
                                        datao = 0;
index 897fba835f897a0d02f1d4677447fdcf70d115d7..5799bc46a30993a1cb477fb3a4dd9db23dcad6da 100644 (file)
@@ -306,6 +306,12 @@ int btrfs_remove_ordered_extent(struct inode *inode,
        tree->last = NULL;
        set_bit(BTRFS_ORDERED_COMPLETE, &entry->flags);
 
+       spin_lock(&BTRFS_I(inode)->accounting_lock);
+       BTRFS_I(inode)->outstanding_extents--;
+       spin_unlock(&BTRFS_I(inode)->accounting_lock);
+       btrfs_unreserve_metadata_for_delalloc(BTRFS_I(inode)->root,
+                                             inode, 1);
+
        spin_lock(&BTRFS_I(inode)->root->fs_info->ordered_extent_lock);
        list_del_init(&entry->root_extent_list);
 
index 361ad323faaceb4ed3313bd1e2904ea29ea7d2f3..cfcc93c93a7b4db99b87ef3b91b30cc8d2f7e36b 100644 (file)
@@ -3518,7 +3518,7 @@ int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start)
        BUG_ON(!rc->block_group);
 
        btrfs_init_workers(&rc->workers, "relocate",
-                          fs_info->thread_pool_size);
+                          fs_info->thread_pool_size, NULL);
 
        rc->extent_root = extent_root;
        btrfs_prepare_block_group_relocation(extent_root, rc->block_group);
@@ -3701,7 +3701,7 @@ int btrfs_recover_relocation(struct btrfs_root *root)
        mapping_tree_init(&rc->reloc_root_tree);
        INIT_LIST_HEAD(&rc->reloc_roots);
        btrfs_init_workers(&rc->workers, "relocate",
-                          root->fs_info->thread_pool_size);
+                          root->fs_info->thread_pool_size, NULL);
        rc->extent_root = root->fs_info->extent_root;
 
        set_reloc_control(rc);
index 7827841b55cbd5399606cd02820055f2ff6eded2..4edfdc2acc5f23cf09c4f3ddab47664887f8cc1d 100644 (file)
@@ -137,11 +137,20 @@ static int start_log_trans(struct btrfs_trans_handle *trans,
 
        mutex_lock(&root->log_mutex);
        if (root->log_root) {
+               if (!root->log_start_pid) {
+                       root->log_start_pid = current->pid;
+                       root->log_multiple_pids = false;
+               } else if (root->log_start_pid != current->pid) {
+                       root->log_multiple_pids = true;
+               }
+
                root->log_batch++;
                atomic_inc(&root->log_writers);
                mutex_unlock(&root->log_mutex);
                return 0;
        }
+       root->log_multiple_pids = false;
+       root->log_start_pid = current->pid;
        mutex_lock(&root->fs_info->tree_log_mutex);
        if (!root->fs_info->log_root_tree) {
                ret = btrfs_init_log_root_tree(trans, root->fs_info);
@@ -1985,7 +1994,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
        if (atomic_read(&root->log_commit[(index1 + 1) % 2]))
                wait_log_commit(trans, root, root->log_transid - 1);
 
-       while (1) {
+       while (root->log_multiple_pids) {
                unsigned long batch = root->log_batch;
                mutex_unlock(&root->log_mutex);
                schedule_timeout_uninterruptible(1);
@@ -2011,6 +2020,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
        root->log_batch = 0;
        root->log_transid++;
        log->log_transid = root->log_transid;
+       root->log_start_pid = 0;
        smp_mb();
        /*
         * log tree has been flushed to disk, new modifications of