]> Pileus Git - ~andy/linux/commitdiff
Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/btrfs...
authorChris Mason <chris.mason@oracle.com>
Fri, 11 Sep 2009 23:07:25 +0000 (19:07 -0400)
committerChris Mason <chris.mason@oracle.com>
Fri, 11 Sep 2009 23:07:25 +0000 (19:07 -0400)
1  2 
fs/btrfs/compression.c
fs/btrfs/ctree.h
fs/btrfs/disk-io.c
fs/btrfs/file.c
fs/btrfs/inode.c
fs/btrfs/ioctl.c

diff --combined fs/btrfs/compression.c
index 9d8ba4d54a37c3f96e9585de46b8e99d701410bc,78451a58f209ce71a4ae846bab006ddc16367f40..a11a32058b50a4993f072fd1baddc6e9dafb52a8
@@@ -26,6 -26,7 +26,6 @@@
  #include <linux/time.h>
  #include <linux/init.h>
  #include <linux/string.h>
 -#include <linux/smp_lock.h>
  #include <linux/backing-dev.h>
  #include <linux/mpage.h>
  #include <linux/swap.h>
@@@ -506,10 -507,10 +506,10 @@@ static noinline int add_ra_bio_pages(st
                 */
                set_page_extent_mapped(page);
                lock_extent(tree, last_offset, end, GFP_NOFS);
-               spin_lock(&em_tree->lock);
+               read_lock(&em_tree->lock);
                em = lookup_extent_mapping(em_tree, last_offset,
                                           PAGE_CACHE_SIZE);
-               spin_unlock(&em_tree->lock);
+               read_unlock(&em_tree->lock);
  
                if (!em || last_offset < em->start ||
                    (last_offset + PAGE_CACHE_SIZE > extent_map_end(em)) ||
@@@ -593,11 -594,11 +593,11 @@@ int btrfs_submit_compressed_read(struc
        em_tree = &BTRFS_I(inode)->extent_tree;
  
        /* we need the actual starting offset of this extent in the file */
-       spin_lock(&em_tree->lock);
+       read_lock(&em_tree->lock);
        em = lookup_extent_mapping(em_tree,
                                   page_offset(bio->bi_io_vec->bv_page),
                                   PAGE_CACHE_SIZE);
-       spin_unlock(&em_tree->lock);
+       read_unlock(&em_tree->lock);
  
        compressed_len = em->block_len;
        cb = kmalloc(compressed_bio_size(root, compressed_len), GFP_NOFS);
diff --combined fs/btrfs/ctree.h
index 837435ce84caa104dcb00ba9830df29a47e95923,1ceab8b4d6dc41adf466b24f477060a8a96ddaad..732d5b884aa7aa5b236df4c95a8a018730d7e080
@@@ -41,6 -41,8 +41,6 @@@ struct btrfs_ordered_sum
  
  #define BTRFS_MAGIC "_BHRfS_M"
  
 -#define BTRFS_ACL_NOT_CACHED    ((void *)-1)
 -
  #define BTRFS_MAX_LEVEL 8
  
  #define BTRFS_COMPAT_EXTENT_TREE_V0
@@@ -2290,7 -2292,7 +2290,7 @@@ extern struct file_operations btrfs_fil
  int btrfs_drop_extents(struct btrfs_trans_handle *trans,
                       struct btrfs_root *root, struct inode *inode,
                       u64 start, u64 end, u64 locked_end,
-                      u64 inline_limit, u64 *hint_block);
+                      u64 inline_limit, u64 *hint_block, int drop_cache);
  int btrfs_mark_extent_written(struct btrfs_trans_handle *trans,
                              struct btrfs_root *root,
                              struct inode *inode, u64 start, u64 end);
diff --combined fs/btrfs/disk-io.c
index e83be2e4602c256f959a2b9daf0c3da2b8e9bcab,b6cfdd9164e251abc3b09e711bdf8f9a775c1b51..253da7e01ab3e9e4f4c0a223096fb6dd97b72fcf
@@@ -42,8 -42,6 +42,8 @@@
  static struct extent_io_ops btree_extent_io_ops;
  static void end_workqueue_fn(struct btrfs_work *work);
  
 +static atomic_t btrfs_bdi_num = ATOMIC_INIT(0);
 +
  /*
   * end_io_wq structs are used to do processing in task context when an IO is
   * complete.  This is used during reads to verify checksums, and it is used
@@@ -123,15 -121,15 +123,15 @@@ static struct extent_map *btree_get_ext
        struct extent_map *em;
        int ret;
  
-       spin_lock(&em_tree->lock);
+       read_lock(&em_tree->lock);
        em = lookup_extent_mapping(em_tree, start, len);
        if (em) {
                em->bdev =
                        BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev;
-               spin_unlock(&em_tree->lock);
+               read_unlock(&em_tree->lock);
                goto out;
        }
-       spin_unlock(&em_tree->lock);
+       read_unlock(&em_tree->lock);
  
        em = alloc_extent_map(GFP_NOFS);
        if (!em) {
        em->block_start = 0;
        em->bdev = BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev;
  
-       spin_lock(&em_tree->lock);
+       write_lock(&em_tree->lock);
        ret = add_extent_mapping(em_tree, em);
        if (ret == -EEXIST) {
                u64 failed_start = em->start;
                free_extent_map(em);
                em = NULL;
        }
-       spin_unlock(&em_tree->lock);
+       write_unlock(&em_tree->lock);
  
        if (ret)
                em = ERR_PTR(ret);
@@@ -1325,9 -1323,9 +1325,9 @@@ static void btrfs_unplug_io_fn(struct b
        offset = page_offset(page);
  
        em_tree = &BTRFS_I(inode)->extent_tree;
-       spin_lock(&em_tree->lock);
+       read_lock(&em_tree->lock);
        em = lookup_extent_mapping(em_tree, offset, PAGE_CACHE_SIZE);
-       spin_unlock(&em_tree->lock);
+       read_unlock(&em_tree->lock);
        if (!em) {
                __unplug_io_fn(bdi, page);
                return;
        free_extent_map(em);
  }
  
 +/*
 + * If this fails, caller must call bdi_destroy() to get rid of the
 + * bdi again.
 + */
  static int setup_bdi(struct btrfs_fs_info *info, struct backing_dev_info *bdi)
  {
 -      bdi_init(bdi);
 +      int err;
 +
 +      bdi->capabilities = BDI_CAP_MAP_COPY;
 +      err = bdi_init(bdi);
 +      if (err)
 +              return err;
 +
 +      err = bdi_register(bdi, NULL, "btrfs-%d",
 +                              atomic_inc_return(&btrfs_bdi_num));
 +      if (err)
 +              return err;
 +
        bdi->ra_pages   = default_backing_dev_info.ra_pages;
 -      bdi->state              = 0;
 -      bdi->capabilities       = default_backing_dev_info.capabilities;
        bdi->unplug_io_fn       = btrfs_unplug_io_fn;
        bdi->unplug_io_data     = info;
        bdi->congested_fn       = btrfs_congested_fn;
@@@ -1584,8 -1569,7 +1584,8 @@@ struct btrfs_root *open_ctree(struct su
        fs_info->sb = sb;
        fs_info->max_extent = (u64)-1;
        fs_info->max_inline = 8192 * 1024;
 -      setup_bdi(fs_info, &fs_info->bdi);
 +      if (setup_bdi(fs_info, &fs_info->bdi))
 +              goto fail_bdi;
        fs_info->btree_inode = new_inode(sb);
        fs_info->btree_inode->i_ino = 1;
        fs_info->btree_inode->i_nlink = 1;
                err = -EINVAL;
                goto fail_iput;
        }
+ printk("thread pool is %d\n", fs_info->thread_pool_size);
        /*
         * we need to start all the end_io workers up front because the
         * queue work function gets called at interrupt time, and so it
        fs_info->endio_workers.idle_thresh = 4;
        fs_info->endio_meta_workers.idle_thresh = 4;
  
-       fs_info->endio_write_workers.idle_thresh = 64;
-       fs_info->endio_meta_write_workers.idle_thresh = 64;
+       fs_info->endio_write_workers.idle_thresh = 2;
+       fs_info->endio_meta_write_workers.idle_thresh = 2;
+       fs_info->endio_workers.atomic_worker_start = 1;
+       fs_info->endio_meta_workers.atomic_worker_start = 1;
+       fs_info->endio_write_workers.atomic_worker_start = 1;
+       fs_info->endio_meta_write_workers.atomic_worker_start = 1;
  
        btrfs_start_workers(&fs_info->workers, 1);
        btrfs_start_workers(&fs_info->submit_workers, 1);
        btrfs_start_workers(&fs_info->delalloc_workers, 1);
        btrfs_start_workers(&fs_info->fixup_workers, 1);
-       btrfs_start_workers(&fs_info->endio_workers, fs_info->thread_pool_size);
-       btrfs_start_workers(&fs_info->endio_meta_workers,
-                           fs_info->thread_pool_size);
-       btrfs_start_workers(&fs_info->endio_meta_write_workers,
-                           fs_info->thread_pool_size);
-       btrfs_start_workers(&fs_info->endio_write_workers,
-                           fs_info->thread_pool_size);
+       btrfs_start_workers(&fs_info->endio_workers, 1);
+       btrfs_start_workers(&fs_info->endio_meta_workers, 1);
+       btrfs_start_workers(&fs_info->endio_meta_write_workers, 1);
+       btrfs_start_workers(&fs_info->endio_write_workers, 1);
  
        fs_info->bdi.ra_pages *= btrfs_super_num_devices(disk_super);
        fs_info->bdi.ra_pages = max(fs_info->bdi.ra_pages,
@@@ -1973,8 -1959,8 +1975,8 @@@ fail_iput
  
        btrfs_close_devices(fs_info->fs_devices);
        btrfs_mapping_tree_free(&fs_info->mapping_tree);
 +fail_bdi:
        bdi_destroy(&fs_info->bdi);
 -
  fail:
        kfree(extent_root);
        kfree(tree_root);
diff --combined fs/btrfs/file.c
index 4b833972273a75218eb775cf8caf64dc1be80ed7,4123db9d51410b9de5e68724af45b0069d3ef76a..571ad3c13b47be3de640f4ce84253f3ce593b26d
@@@ -22,6 -22,7 +22,6 @@@
  #include <linux/time.h>
  #include <linux/init.h>
  #include <linux/string.h>
 -#include <linux/smp_lock.h>
  #include <linux/backing-dev.h>
  #include <linux/mpage.h>
  #include <linux/swap.h>
@@@ -112,8 -113,6 +112,6 @@@ static noinline int dirty_and_release_p
        int err = 0;
        int i;
        struct inode *inode = fdentry(file)->d_inode;
-       struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
-       u64 hint_byte;
        u64 num_bytes;
        u64 start_pos;
        u64 end_of_last_block;
                    root->sectorsize - 1) & ~((u64)root->sectorsize - 1);
  
        end_of_last_block = start_pos + num_bytes - 1;
-       lock_extent(io_tree, start_pos, end_of_last_block, GFP_NOFS);
-       trans = btrfs_join_transaction(root, 1);
-       if (!trans) {
-               err = -ENOMEM;
-               goto out_unlock;
-       }
-       btrfs_set_trans_block_group(trans, inode);
-       hint_byte = 0;
-       set_extent_uptodate(io_tree, start_pos, end_of_last_block, GFP_NOFS);
-       /* check for reserved extents on each page, we don't want
-        * to reset the delalloc bit on things that already have
-        * extents reserved.
-        */
        btrfs_set_extent_delalloc(inode, start_pos, end_of_last_block);
        for (i = 0; i < num_pages; i++) {
                struct page *p = pages[i];
                 * at this time.
                 */
        }
-       err = btrfs_end_transaction(trans, root);
- out_unlock:
-       unlock_extent(io_tree, start_pos, end_of_last_block, GFP_NOFS);
        return err;
  }
  
@@@ -189,18 -169,18 +168,18 @@@ int btrfs_drop_extent_cache(struct inod
                if (!split2)
                        split2 = alloc_extent_map(GFP_NOFS);
  
-               spin_lock(&em_tree->lock);
+               write_lock(&em_tree->lock);
                em = lookup_extent_mapping(em_tree, start, len);
                if (!em) {
-                       spin_unlock(&em_tree->lock);
+                       write_unlock(&em_tree->lock);
                        break;
                }
                flags = em->flags;
                if (skip_pinned && test_bit(EXTENT_FLAG_PINNED, &em->flags)) {
-                       spin_unlock(&em_tree->lock);
                        if (em->start <= start &&
                            (!testend || em->start + em->len >= start + len)) {
                                free_extent_map(em);
+                               write_unlock(&em_tree->lock);
                                break;
                        }
                        if (start < em->start) {
                                start = em->start + em->len;
                        }
                        free_extent_map(em);
+                       write_unlock(&em_tree->lock);
                        continue;
                }
                compressed = test_bit(EXTENT_FLAG_COMPRESSED, &em->flags);
                        free_extent_map(split);
                        split = NULL;
                }
-               spin_unlock(&em_tree->lock);
+               write_unlock(&em_tree->lock);
  
                /* once for us */
                free_extent_map(em);
  noinline int btrfs_drop_extents(struct btrfs_trans_handle *trans,
                       struct btrfs_root *root, struct inode *inode,
                       u64 start, u64 end, u64 locked_end,
-                      u64 inline_limit, u64 *hint_byte)
+                      u64 inline_limit, u64 *hint_byte, int drop_cache)
  {
        u64 extent_end = 0;
        u64 search_start = start;
        int ret;
  
        inline_limit = 0;
-       btrfs_drop_extent_cache(inode, start, end - 1, 0);
+       if (drop_cache)
+               btrfs_drop_extent_cache(inode, start, end - 1, 0);
  
        path = btrfs_alloc_path();
        if (!path)
diff --combined fs/btrfs/inode.c
index 59cba180fe833f08f815f4ca968956eaa7f33e18,88f9df7bfdaee77dcc65b223b8afbc0b61ee35d2..941f1b71cd2212b00d53109f83d73c1417e807fd
@@@ -26,6 -26,7 +26,6 @@@
  #include <linux/time.h>
  #include <linux/init.h>
  #include <linux/string.h>
 -#include <linux/smp_lock.h>
  #include <linux/backing-dev.h>
  #include <linux/mpage.h>
  #include <linux/swap.h>
@@@ -231,7 -232,8 +231,8 @@@ static noinline int cow_file_range_inli
        }
  
        ret = btrfs_drop_extents(trans, root, inode, start,
-                                aligned_end, aligned_end, start, &hint_byte);
+                                aligned_end, aligned_end, start,
+                                &hint_byte, 1);
        BUG_ON(ret);
  
        if (isize > actual_end)
                                   inline_len, compressed_size,
                                   compressed_pages);
        BUG_ON(ret);
-       btrfs_drop_extent_cache(inode, start, aligned_end, 0);
+       btrfs_drop_extent_cache(inode, start, aligned_end - 1, 0);
        return 0;
  }
  
@@@ -425,7 -427,7 +426,7 @@@ again
                        extent_clear_unlock_delalloc(inode,
                                                     &BTRFS_I(inode)->io_tree,
                                                     start, end, NULL, 1, 0,
-                                                    0, 1, 1, 1);
+                                                    0, 1, 1, 1, 0);
                        ret = 0;
                        goto free_pages_out;
                }
@@@ -611,9 -613,9 +612,9 @@@ static noinline int submit_compressed_e
                set_bit(EXTENT_FLAG_COMPRESSED, &em->flags);
  
                while (1) {
-                       spin_lock(&em_tree->lock);
+                       write_lock(&em_tree->lock);
                        ret = add_extent_mapping(em_tree, em);
-                       spin_unlock(&em_tree->lock);
+                       write_unlock(&em_tree->lock);
                        if (ret != -EEXIST) {
                                free_extent_map(em);
                                break;
                                             async_extent->start,
                                             async_extent->start +
                                             async_extent->ram_size - 1,
-                                            NULL, 1, 1, 0, 1, 1, 0);
+                                            NULL, 1, 1, 0, 1, 1, 0, 0);
  
                ret = btrfs_submit_compressed_write(inode,
                                    async_extent->start,
@@@ -713,7 -715,7 +714,7 @@@ static noinline int cow_file_range(stru
                        extent_clear_unlock_delalloc(inode,
                                                     &BTRFS_I(inode)->io_tree,
                                                     start, end, NULL, 1, 1,
-                                                    1, 1, 1, 1);
+                                                    1, 1, 1, 1, 0);
                        *nr_written = *nr_written +
                             (end - start + PAGE_CACHE_SIZE) / PAGE_CACHE_SIZE;
                        *page_started = 1;
                set_bit(EXTENT_FLAG_PINNED, &em->flags);
  
                while (1) {
-                       spin_lock(&em_tree->lock);
+                       write_lock(&em_tree->lock);
                        ret = add_extent_mapping(em_tree, em);
-                       spin_unlock(&em_tree->lock);
+                       write_unlock(&em_tree->lock);
                        if (ret != -EEXIST) {
                                free_extent_map(em);
                                break;
                /* we're not doing compressed IO, don't unlock the first
                 * page (which the caller expects to stay locked), don't
                 * clear any dirty bits and don't set any writeback bits
+                *
+                * Do set the Private2 bit so we know this page was properly
+                * setup for writepage
                 */
                extent_clear_unlock_delalloc(inode, &BTRFS_I(inode)->io_tree,
                                             start, start + ram_size - 1,
                                             locked_page, unlock, 1,
-                                            1, 0, 0, 0);
+                                            1, 0, 0, 0, 1);
                disk_num_bytes -= cur_alloc_size;
                num_bytes -= cur_alloc_size;
                alloc_hint = ins.objectid + ins.offset;
@@@ -853,7 -858,7 +857,7 @@@ static int cow_file_range_async(struct 
        int limit = 10 * 1024 * 1042;
  
        clear_extent_bit(&BTRFS_I(inode)->io_tree, start, end, EXTENT_LOCKED |
-                        EXTENT_DELALLOC, 1, 0, GFP_NOFS);
+                        EXTENT_DELALLOC, 1, 0, NULL, GFP_NOFS);
        while (start < end) {
                async_cow = kmalloc(sizeof(*async_cow), GFP_NOFS);
                async_cow->inode = inode;
@@@ -1080,9 -1085,9 +1084,9 @@@ out_check
                        em->bdev = root->fs_info->fs_devices->latest_bdev;
                        set_bit(EXTENT_FLAG_PINNED, &em->flags);
                        while (1) {
-                               spin_lock(&em_tree->lock);
+                               write_lock(&em_tree->lock);
                                ret = add_extent_mapping(em_tree, em);
-                               spin_unlock(&em_tree->lock);
+                               write_unlock(&em_tree->lock);
                                if (ret != -EEXIST) {
                                        free_extent_map(em);
                                        break;
  
                extent_clear_unlock_delalloc(inode, &BTRFS_I(inode)->io_tree,
                                        cur_offset, cur_offset + num_bytes - 1,
-                                       locked_page, 1, 1, 1, 0, 0, 0);
+                                       locked_page, 1, 1, 1, 0, 0, 0, 1);
                cur_offset = extent_end;
                if (cur_offset > end)
                        break;
@@@ -1374,10 -1379,8 +1378,8 @@@ again
        lock_extent(&BTRFS_I(inode)->io_tree, page_start, page_end, GFP_NOFS);
  
        /* already ordered? We're done */
-       if (test_range_bit(&BTRFS_I(inode)->io_tree, page_start, page_end,
-                            EXTENT_ORDERED, 0)) {
+       if (PagePrivate2(page))
                goto out;
-       }
  
        ordered = btrfs_lookup_ordered_extent(inode, page_start);
        if (ordered) {
@@@ -1413,11 -1416,9 +1415,9 @@@ static int btrfs_writepage_start_hook(s
        struct inode *inode = page->mapping->host;
        struct btrfs_writepage_fixup *fixup;
        struct btrfs_root *root = BTRFS_I(inode)->root;
-       int ret;
  
-       ret = test_range_bit(&BTRFS_I(inode)->io_tree, start, end,
-                            EXTENT_ORDERED, 0);
-       if (ret)
+       /* this page is properly in the ordered list */
+       if (TestClearPagePrivate2(page))
                return 0;
  
        if (PageChecked(page))
@@@ -1455,9 -1456,19 +1455,19 @@@ static int insert_reserved_file_extent(
        BUG_ON(!path);
  
        path->leave_spinning = 1;
+       /*
+        * we may be replacing one extent in the tree with another.
+        * The new extent is pinned in the extent map, and we don't want
+        * to drop it from the cache until it is completely in the btree.
+        *
+        * So, tell btrfs_drop_extents to leave this extent in the cache.
+        * the caller is expected to unpin it and allow it to be merged
+        * with the others.
+        */
        ret = btrfs_drop_extents(trans, root, inode, file_pos,
                                 file_pos + num_bytes, locked_end,
-                                file_pos, &hint);
+                                file_pos, &hint, 0);
        BUG_ON(ret);
  
        ins.objectid = inode->i_ino;
        btrfs_mark_buffer_dirty(leaf);
  
        inode_add_bytes(inode, num_bytes);
-       btrfs_drop_extent_cache(inode, file_pos, file_pos + num_bytes - 1, 0);
  
        ins.objectid = disk_bytenr;
        ins.offset = disk_num_bytes;
@@@ -1596,6 -1606,9 +1605,9 @@@ static int btrfs_finish_ordered_io(stru
                                                ordered_extent->len,
                                                compressed, 0, 0,
                                                BTRFS_FILE_EXTENT_REG);
+               unpin_extent_cache(&BTRFS_I(inode)->extent_tree,
+                                  ordered_extent->file_offset,
+                                  ordered_extent->len);
                BUG_ON(ret);
        }
        unlock_extent(io_tree, ordered_extent->file_offset,
@@@ -1623,6 -1636,7 +1635,7 @@@ nocow
  static int btrfs_writepage_end_io_hook(struct page *page, u64 start, u64 end,
                                struct extent_state *state, int uptodate)
  {
+       ClearPagePrivate2(page);
        return btrfs_finish_ordered_io(page->mapping->host, start, end);
  }
  
@@@ -1669,13 -1683,13 +1682,13 @@@ static int btrfs_io_failed_hook(struct 
                failrec->last_mirror = 0;
                failrec->bio_flags = 0;
  
-               spin_lock(&em_tree->lock);
+               read_lock(&em_tree->lock);
                em = lookup_extent_mapping(em_tree, start, failrec->len);
                if (em->start > start || em->start + em->len < start) {
                        free_extent_map(em);
                        em = NULL;
                }
-               spin_unlock(&em_tree->lock);
+               read_unlock(&em_tree->lock);
  
                if (!em || IS_ERR(em)) {
                        kfree(failrec);
@@@ -1794,7 -1808,7 +1807,7 @@@ static int btrfs_readpage_end_io_hook(s
                return 0;
  
        if (root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID &&
-           test_range_bit(io_tree, start, end, EXTENT_NODATASUM, 1)) {
+           test_range_bit(io_tree, start, end, EXTENT_NODATASUM, 1, NULL)) {
                clear_extent_bits(io_tree, start, end, EXTENT_NODATASUM,
                                  GFP_NOFS);
                return 0;
@@@ -2121,8 -2135,10 +2134,8 @@@ static void btrfs_read_locked_inode(str
         * any xattrs or acls
         */
        maybe_acls = acls_after_inode_item(leaf, path->slots[0], inode->i_ino);
 -      if (!maybe_acls) {
 -              BTRFS_I(inode)->i_acl = NULL;
 -              BTRFS_I(inode)->i_default_acl = NULL;
 -      }
 +      if (!maybe_acls)
 +              cache_no_acl(inode);
  
        BTRFS_I(inode)->block_group = btrfs_find_block_group(root, 0,
                                                alloc_group_block, 0);
@@@ -2319,6 -2335,7 +2332,6 @@@ err
        btrfs_update_inode(trans, root, dir);
        btrfs_drop_nlink(inode);
        ret = btrfs_update_inode(trans, root, inode);
 -      dir->i_sb->s_dirt = 1;
  out:
        return ret;
  }
@@@ -2802,6 -2819,7 +2815,6 @@@ error
                                      pending_del_nr);
        }
        btrfs_free_path(path);
 -      inode->i_sb->s_dirt = 1;
        return ret;
  }
  
@@@ -2935,7 -2953,7 +2948,7 @@@ int btrfs_cont_expand(struct inode *ino
                                                 cur_offset,
                                                 cur_offset + hole_size,
                                                 block_end,
-                                                cur_offset, &hint_byte);
+                                                cur_offset, &hint_byte, 1);
                        if (err)
                                break;
                        err = btrfs_insert_file_extent(trans, root,
@@@ -3099,12 -3117,8 +3112,12 @@@ static void inode_tree_add(struct inod
  {
        struct btrfs_root *root = BTRFS_I(inode)->root;
        struct btrfs_inode *entry;
 -      struct rb_node **p = &root->inode_tree.rb_node;
 -      struct rb_node *parent = NULL;
 +      struct rb_node **p;
 +      struct rb_node *parent;
 +
 +again:
 +      p = &root->inode_tree.rb_node;
 +      parent = NULL;
  
        spin_lock(&root->inode_lock);
        while (*p) {
                entry = rb_entry(parent, struct btrfs_inode, rb_node);
  
                if (inode->i_ino < entry->vfs_inode.i_ino)
 -                      p = &(*p)->rb_left;
 +                      p = &parent->rb_left;
                else if (inode->i_ino > entry->vfs_inode.i_ino)
 -                      p = &(*p)->rb_right;
 +                      p = &parent->rb_right;
                else {
                        WARN_ON(!(entry->vfs_inode.i_state &
                                  (I_WILL_FREE | I_FREEING | I_CLEAR)));
 -                      break;
 +                      rb_erase(parent, &root->inode_tree);
 +                      RB_CLEAR_NODE(parent);
 +                      spin_unlock(&root->inode_lock);
 +                      goto again;
                }
        }
        rb_link_node(&BTRFS_I(inode)->rb_node, parent, p);
@@@ -3133,18 -3144,21 +3146,18 @@@ static void inode_tree_del(struct inod
  {
        struct btrfs_root *root = BTRFS_I(inode)->root;
  
 +      spin_lock(&root->inode_lock);
        if (!RB_EMPTY_NODE(&BTRFS_I(inode)->rb_node)) {
 -              spin_lock(&root->inode_lock);
                rb_erase(&BTRFS_I(inode)->rb_node, &root->inode_tree);
 -              spin_unlock(&root->inode_lock);
                RB_CLEAR_NODE(&BTRFS_I(inode)->rb_node);
        }
 +      spin_unlock(&root->inode_lock);
  }
  
  static noinline void init_btrfs_i(struct inode *inode)
  {
        struct btrfs_inode *bi = BTRFS_I(inode);
  
 -      bi->i_acl = BTRFS_ACL_NOT_CACHED;
 -      bi->i_default_acl = BTRFS_ACL_NOT_CACHED;
 -
        bi->generation = 0;
        bi->sequence = 0;
        bi->last_trans = 0;
@@@ -3768,6 -3782,7 +3781,6 @@@ static int btrfs_mknod(struct inode *di
                init_special_inode(inode, inode->i_mode, rdev);
                btrfs_update_inode(trans, root, inode);
        }
 -      dir->i_sb->s_dirt = 1;
        btrfs_update_inode_block_group(trans, inode);
        btrfs_update_inode_block_group(trans, dir);
  out_unlock:
@@@ -3832,6 -3847,7 +3845,6 @@@ static int btrfs_create(struct inode *d
                inode->i_op = &btrfs_file_inode_operations;
                BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops;
        }
 -      dir->i_sb->s_dirt = 1;
        btrfs_update_inode_block_group(trans, inode);
        btrfs_update_inode_block_group(trans, dir);
  out_unlock:
@@@ -3878,6 -3894,7 +3891,6 @@@ static int btrfs_link(struct dentry *ol
        if (err)
                drop_inode = 1;
  
 -      dir->i_sb->s_dirt = 1;
        btrfs_update_inode_block_group(trans, dir);
        err = btrfs_update_inode(trans, root, inode);
  
@@@ -3959,6 -3976,7 +3972,6 @@@ static int btrfs_mkdir(struct inode *di
  
        d_instantiate(dentry, inode);
        drop_on_err = 0;
 -      dir->i_sb->s_dirt = 1;
        btrfs_update_inode_block_group(trans, inode);
        btrfs_update_inode_block_group(trans, dir);
  
@@@ -4064,11 -4082,11 +4077,11 @@@ struct extent_map *btrfs_get_extent(str
        int compressed;
  
  again:
-       spin_lock(&em_tree->lock);
+       read_lock(&em_tree->lock);
        em = lookup_extent_mapping(em_tree, start, len);
        if (em)
                em->bdev = root->fs_info->fs_devices->latest_bdev;
-       spin_unlock(&em_tree->lock);
+       read_unlock(&em_tree->lock);
  
        if (em) {
                if (em->start > start || em->start + em->len <= start)
                                map = kmap(page);
                                read_extent_buffer(leaf, map + pg_offset, ptr,
                                                   copy_size);
+                               if (pg_offset + copy_size < PAGE_CACHE_SIZE) {
+                                       memset(map + pg_offset + copy_size, 0,
+                                              PAGE_CACHE_SIZE - pg_offset -
+                                              copy_size);
+                               }
                                kunmap(page);
                        }
                        flush_dcache_page(page);
@@@ -4259,7 -4282,7 +4277,7 @@@ insert
        }
  
        err = 0;
-       spin_lock(&em_tree->lock);
+       write_lock(&em_tree->lock);
        ret = add_extent_mapping(em_tree, em);
        /* it is possible that someone inserted the extent into the tree
         * while we had the lock dropped.  It is also possible that
                        err = 0;
                }
        }
-       spin_unlock(&em_tree->lock);
+       write_unlock(&em_tree->lock);
  out:
        if (path)
                btrfs_free_path(path);
@@@ -4398,13 -4421,21 +4416,21 @@@ static void btrfs_invalidatepage(struc
        u64 page_start = page_offset(page);
        u64 page_end = page_start + PAGE_CACHE_SIZE - 1;
  
+       /*
+        * we have the page locked, so new writeback can't start,
+        * and the dirty bit won't be cleared while we are here.
+        *
+        * Wait for IO on this page so that we can safely clear
+        * the PagePrivate2 bit and do ordered accounting
+        */
        wait_on_page_writeback(page);
        tree = &BTRFS_I(page->mapping->host)->io_tree;
        if (offset) {
                btrfs_releasepage(page, GFP_NOFS);
                return;
        }
        lock_extent(tree, page_start, page_end, GFP_NOFS);
        ordered = btrfs_lookup_ordered_extent(page->mapping->host,
                                           page_offset(page));
                 */
                clear_extent_bit(tree, page_start, page_end,
                                 EXTENT_DIRTY | EXTENT_DELALLOC |
-                                EXTENT_LOCKED, 1, 0, GFP_NOFS);
-               btrfs_finish_ordered_io(page->mapping->host,
-                                       page_start, page_end);
+                                EXTENT_LOCKED, 1, 0, NULL, GFP_NOFS);
+               /*
+                * whoever cleared the private bit is responsible
+                * for the finish_ordered_io
+                */
+               if (TestClearPagePrivate2(page)) {
+                       btrfs_finish_ordered_io(page->mapping->host,
+                                               page_start, page_end);
+               }
                btrfs_put_ordered_extent(ordered);
                lock_extent(tree, page_start, page_end, GFP_NOFS);
        }
        clear_extent_bit(tree, page_start, page_end,
-                EXTENT_LOCKED | EXTENT_DIRTY | EXTENT_DELALLOC |
-                EXTENT_ORDERED,
-                1, 1, GFP_NOFS);
+                EXTENT_LOCKED | EXTENT_DIRTY | EXTENT_DELALLOC,
+                1, 1, NULL, GFP_NOFS);
        __btrfs_releasepage(page, GFP_NOFS);
  
        ClearPageChecked(page);
@@@ -4521,11 -4557,14 +4552,14 @@@ again
        }
        ClearPageChecked(page);
        set_page_dirty(page);
+       SetPageUptodate(page);
  
        BTRFS_I(inode)->last_trans = root->fs_info->generation + 1;
        unlock_extent(io_tree, page_start, page_end, GFP_NOFS);
  
  out_unlock:
+       if (!ret)
+               return VM_FAULT_LOCKED;
        unlock_page(page);
  out:
        return ret;
@@@ -4642,6 -4681,8 +4676,6 @@@ struct inode *btrfs_alloc_inode(struct 
        ei->last_trans = 0;
        ei->logged_trans = 0;
        btrfs_ordered_inode_tree_init(&ei->ordered_tree);
 -      ei->i_acl = BTRFS_ACL_NOT_CACHED;
 -      ei->i_default_acl = BTRFS_ACL_NOT_CACHED;
        INIT_LIST_HEAD(&ei->i_orphan);
        INIT_LIST_HEAD(&ei->ordered_operations);
        return &ei->vfs_inode;
@@@ -4655,6 -4696,13 +4689,6 @@@ void btrfs_destroy_inode(struct inode *
        WARN_ON(!list_empty(&inode->i_dentry));
        WARN_ON(inode->i_data.nrpages);
  
 -      if (BTRFS_I(inode)->i_acl &&
 -          BTRFS_I(inode)->i_acl != BTRFS_ACL_NOT_CACHED)
 -              posix_acl_release(BTRFS_I(inode)->i_acl);
 -      if (BTRFS_I(inode)->i_default_acl &&
 -          BTRFS_I(inode)->i_default_acl != BTRFS_ACL_NOT_CACHED)
 -              posix_acl_release(BTRFS_I(inode)->i_default_acl);
 -
        /*
         * Make sure we're properly removed from the ordered operation
         * lists.
@@@ -4977,6 -5025,7 +5011,6 @@@ static int btrfs_symlink(struct inode *
                inode->i_op = &btrfs_file_inode_operations;
                BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops;
        }
 -      dir->i_sb->s_dirt = 1;
        btrfs_update_inode_block_group(trans, inode);
        btrfs_update_inode_block_group(trans, dir);
        if (drop_inode)
@@@ -5058,6 -5107,8 +5092,8 @@@ static int prealloc_file_range(struct b
                                                  0, 0, 0,
                                                  BTRFS_FILE_EXTENT_PREALLOC);
                BUG_ON(ret);
+               btrfs_drop_extent_cache(inode, cur_offset,
+                                       cur_offset + ins.offset -1, 0);
                num_bytes -= ins.offset;
                cur_offset += ins.offset;
                alloc_hint = ins.objectid + ins.offset;
diff --combined fs/btrfs/ioctl.c
index bd88f25889f7c5daf94bf0aec8645041bfb2ce60,e2d8e90259b05c2499a1f9cbc54ac9cf529ecbe0..ef0188fb3cc41f38452777eb173c7c22073f79b0
@@@ -27,6 -27,7 +27,6 @@@
  #include <linux/time.h>
  #include <linux/init.h>
  #include <linux/string.h>
 -#include <linux/smp_lock.h>
  #include <linux/backing-dev.h>
  #include <linux/mount.h>
  #include <linux/mpage.h>
@@@ -596,9 -597,8 +596,8 @@@ again
                clear_page_dirty_for_io(page);
  
                btrfs_set_extent_delalloc(inode, page_start, page_end);
-               unlock_extent(io_tree, page_start, page_end, GFP_NOFS);
                set_page_dirty(page);
+               unlock_extent(io_tree, page_start, page_end, GFP_NOFS);
                unlock_page(page);
                page_cache_release(page);
                balance_dirty_pages_ratelimited_nr(inode->i_mapping, 1);
@@@ -976,7 -976,7 +975,7 @@@ static long btrfs_ioctl_clone(struct fi
  
        /* punch hole in destination first */
        btrfs_drop_extents(trans, root, inode, off, off + len,
-                          off + len, 0, &hint_byte);
+                          off + len, 0, &hint_byte, 1);
  
        /* clone data */
        key.objectid = src->i_ino;