]> Pileus Git - ~andy/linux/commitdiff
ext4: lookup block mapping in extent status tree
authorZheng Liu <wenqing.lz@taobao.com>
Mon, 18 Feb 2013 05:29:59 +0000 (00:29 -0500)
committerTheodore Ts'o <tytso@mit.edu>
Mon, 18 Feb 2013 05:29:59 +0000 (00:29 -0500)
After tracking all extent status, we already have a extent cache in
memory.  Every time we want to lookup a block mapping, we can first
try to lookup it in extent status tree to avoid a potential disk I/O.

A new function called ext4_es_lookup_extent is defined to finish this
work.  When we try to lookup a block mapping, we always call
ext4_map_blocks and/or ext4_da_map_blocks.  So in these functions we
first try to lookup a block mapping in extent status tree.

A new flag EXT4_GET_BLOCKS_NO_PUT_HOLE is used in ext4_da_map_blocks
in order not to put a hole into extent status tree because this hole
will be converted to delayed extent in the tree immediately.

Signed-off-by: Zheng Liu <wenqing.lz@taobao.com>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
Cc: Jan kara <jack@suse.cz>
fs/ext4/ext4.h
fs/ext4/extents.c
fs/ext4/extents_status.c
fs/ext4/extents_status.h
fs/ext4/inode.c
include/trace/events/ext4.h

index 5c31d6ac9500f03c1d8214ef3aeddb0d49c85b87..329e7fba47d63cb5ac3e4a65a12a60eb65396ac5 100644 (file)
@@ -579,6 +579,8 @@ enum {
 #define EXT4_GET_BLOCKS_KEEP_SIZE              0x0080
        /* Do not take i_data_sem locking in ext4_map_blocks */
 #define EXT4_GET_BLOCKS_NO_LOCK                        0x0100
+       /* Do not put hole in extent cache */
+#define EXT4_GET_BLOCKS_NO_PUT_HOLE            0x0200
 
 /*
  * Flags used by ext4_free_blocks
index be0b1b3eed97dabc11ea230e623af664376dd0df..b9d7a2363736b133ffceb7f0ed089d7dce97909c 100644 (file)
@@ -2167,6 +2167,9 @@ ext4_ext_put_gap_in_cache(struct inode *inode, struct ext4_ext_path *path,
                                block,
                                le32_to_cpu(ex->ee_block),
                                 ext4_ext_get_actual_len(ex));
+               if (!ext4_find_delalloc_range(inode, lblock, lblock + len - 1))
+                       ext4_es_insert_extent(inode, lblock, len, ~0,
+                                             EXTENT_STATUS_HOLE);
        } else if (block >= le32_to_cpu(ex->ee_block)
                        + ext4_ext_get_actual_len(ex)) {
                ext4_lblk_t next;
@@ -2180,6 +2183,9 @@ ext4_ext_put_gap_in_cache(struct inode *inode, struct ext4_ext_path *path,
                                block);
                BUG_ON(next == lblock);
                len = next - lblock;
+               if (!ext4_find_delalloc_range(inode, lblock, lblock + len - 1))
+                       ext4_es_insert_extent(inode, lblock, len, ~0,
+                                             EXTENT_STATUS_HOLE);
        } else {
                lblock = len = 0;
                BUG();
@@ -4018,7 +4024,8 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
                 * put just found gap into cache to speed up
                 * subsequent requests
                 */
-               ext4_ext_put_gap_in_cache(inode, path, map->m_lblk);
+               if ((flags & EXT4_GET_BLOCKS_NO_PUT_HOLE) == 0)
+                       ext4_ext_put_gap_in_cache(inode, path, map->m_lblk);
                goto out2;
        }
 
index 76f4351ea82183eaa8d420a2e4f9a9027eb12dab..eeb893122d8d05242fe66c65230ce537358de24c 100644 (file)
@@ -461,6 +461,66 @@ error:
        return err;
 }
 
+/*
+ * ext4_es_lookup_extent() looks up an extent in extent status tree.
+ *
+ * ext4_es_lookup_extent is called by ext4_map_blocks/ext4_da_map_blocks.
+ *
+ * Return: 1 on found, 0 on not
+ */
+int ext4_es_lookup_extent(struct inode *inode, ext4_lblk_t lblk,
+                         struct extent_status *es)
+{
+       struct ext4_es_tree *tree;
+       struct extent_status *es1 = NULL;
+       struct rb_node *node;
+       int found = 0;
+
+       trace_ext4_es_lookup_extent_enter(inode, lblk);
+       es_debug("lookup extent in block %u\n", lblk);
+
+       tree = &EXT4_I(inode)->i_es_tree;
+       read_lock(&EXT4_I(inode)->i_es_lock);
+
+       /* find extent in cache firstly */
+       es->es_lblk = es->es_len = es->es_pblk = 0;
+       if (tree->cache_es) {
+               es1 = tree->cache_es;
+               if (in_range(lblk, es1->es_lblk, es1->es_len)) {
+                       es_debug("%u cached by [%u/%u)\n",
+                                lblk, es1->es_lblk, es1->es_len);
+                       found = 1;
+                       goto out;
+               }
+       }
+
+       node = tree->root.rb_node;
+       while (node) {
+               es1 = rb_entry(node, struct extent_status, rb_node);
+               if (lblk < es1->es_lblk)
+                       node = node->rb_left;
+               else if (lblk > ext4_es_end(es1))
+                       node = node->rb_right;
+               else {
+                       found = 1;
+                       break;
+               }
+       }
+
+out:
+       if (found) {
+               BUG_ON(!es1);
+               es->es_lblk = es1->es_lblk;
+               es->es_len = es1->es_len;
+               es->es_pblk = es1->es_pblk;
+       }
+
+       read_unlock(&EXT4_I(inode)->i_es_lock);
+
+       trace_ext4_es_lookup_extent_exit(inode, es, found);
+       return found;
+}
+
 static int __es_remove_extent(struct ext4_es_tree *tree, ext4_lblk_t lblk,
                                 ext4_lblk_t end)
 {
index 3f69d097c6e7f0c785aafb88d19e0b3278a80286..8ffc90c784fac340190366cfe0cb3057fbcbbd6c 100644 (file)
@@ -53,6 +53,8 @@ extern int ext4_es_remove_extent(struct inode *inode, ext4_lblk_t lblk,
                                 ext4_lblk_t len);
 extern void ext4_es_find_delayed_extent(struct inode *inode, ext4_lblk_t lblk,
                                        struct extent_status *es);
+extern int ext4_es_lookup_extent(struct inode *inode, ext4_lblk_t lblk,
+                                struct extent_status *es);
 
 static inline int ext4_es_is_written(struct extent_status *es)
 {
index 576b586b61aa76b28ca90aaa0292a1411f047356..95a0c62c568336ce60583465e7f40d090d43aa37 100644 (file)
@@ -507,12 +507,33 @@ static pgoff_t ext4_num_dirty_pages(struct inode *inode, pgoff_t idx,
 int ext4_map_blocks(handle_t *handle, struct inode *inode,
                    struct ext4_map_blocks *map, int flags)
 {
+       struct extent_status es;
        int retval;
 
        map->m_flags = 0;
        ext_debug("ext4_map_blocks(): inode %lu, flag %d, max_blocks %u,"
                  "logical block %lu\n", inode->i_ino, flags, map->m_len,
                  (unsigned long) map->m_lblk);
+
+       /* Lookup extent status tree firstly */
+       if (ext4_es_lookup_extent(inode, map->m_lblk, &es)) {
+               if (ext4_es_is_written(&es) || ext4_es_is_unwritten(&es)) {
+                       map->m_pblk = ext4_es_pblock(&es) +
+                                       map->m_lblk - es.es_lblk;
+                       map->m_flags |= ext4_es_is_written(&es) ?
+                                       EXT4_MAP_MAPPED : EXT4_MAP_UNWRITTEN;
+                       retval = es.es_len - (map->m_lblk - es.es_lblk);
+                       if (retval > map->m_len)
+                               retval = map->m_len;
+                       map->m_len = retval;
+               } else if (ext4_es_is_delayed(&es) || ext4_es_is_hole(&es)) {
+                       retval = 0;
+               } else {
+                       BUG_ON(1);
+               }
+               goto found;
+       }
+
        /*
         * Try to see if we can get the block without requesting a new
         * file system block.
@@ -544,6 +565,7 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode,
        if (!(flags & EXT4_GET_BLOCKS_NO_LOCK))
                up_read((&EXT4_I(inode)->i_data_sem));
 
+found:
        if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) {
                int ret = check_block_validity(inode, map);
                if (ret != 0)
@@ -1743,6 +1765,7 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock,
                              struct ext4_map_blocks *map,
                              struct buffer_head *bh)
 {
+       struct extent_status es;
        int retval;
        sector_t invalid_block = ~((sector_t) 0xffff);
 
@@ -1753,6 +1776,42 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock,
        ext_debug("ext4_da_map_blocks(): inode %lu, max_blocks %u,"
                  "logical block %lu\n", inode->i_ino, map->m_len,
                  (unsigned long) map->m_lblk);
+
+       /* Lookup extent status tree firstly */
+       if (ext4_es_lookup_extent(inode, iblock, &es)) {
+
+               if (ext4_es_is_hole(&es)) {
+                       retval = 0;
+                       down_read((&EXT4_I(inode)->i_data_sem));
+                       goto add_delayed;
+               }
+
+               /*
+                * Delayed extent could be allocated by fallocate.
+                * So we need to check it.
+                */
+               if (ext4_es_is_delayed(&es) && !ext4_es_is_unwritten(&es)) {
+                       map_bh(bh, inode->i_sb, invalid_block);
+                       set_buffer_new(bh);
+                       set_buffer_delay(bh);
+                       return 0;
+               }
+
+               map->m_pblk = ext4_es_pblock(&es) + iblock - es.es_lblk;
+               retval = es.es_len - (iblock - es.es_lblk);
+               if (retval > map->m_len)
+                       retval = map->m_len;
+               map->m_len = retval;
+               if (ext4_es_is_written(&es))
+                       map->m_flags |= EXT4_MAP_MAPPED;
+               else if (ext4_es_is_unwritten(&es))
+                       map->m_flags |= EXT4_MAP_UNWRITTEN;
+               else
+                       BUG_ON(1);
+
+               return retval;
+       }
+
        /*
         * Try to see if we can get the block without requesting a new
         * file system block.
@@ -1771,10 +1830,13 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock,
                        map->m_flags |= EXT4_MAP_FROM_CLUSTER;
                retval = 0;
        } else if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
-               retval = ext4_ext_map_blocks(NULL, inode, map, 0);
+               retval = ext4_ext_map_blocks(NULL, inode, map,
+                                            EXT4_GET_BLOCKS_NO_PUT_HOLE);
        else
-               retval = ext4_ind_map_blocks(NULL, inode, map, 0);
+               retval = ext4_ind_map_blocks(NULL, inode, map,
+                                            EXT4_GET_BLOCKS_NO_PUT_HOLE);
 
+add_delayed:
        if (retval == 0) {
                int ret;
                /*
index c121cdf55ab3e93c8d55988aae90e0e21ae92300..1e590b68cec4f35802216d1faf0ce1f333915781 100644 (file)
@@ -2199,6 +2199,62 @@ TRACE_EVENT(ext4_es_find_delayed_extent_exit,
                  __entry->pblk, __entry->status)
 );
 
+TRACE_EVENT(ext4_es_lookup_extent_enter,
+       TP_PROTO(struct inode *inode, ext4_lblk_t lblk),
+
+       TP_ARGS(inode, lblk),
+
+       TP_STRUCT__entry(
+               __field(        dev_t,          dev             )
+               __field(        ino_t,          ino             )
+               __field(        ext4_lblk_t,    lblk            )
+       ),
+
+       TP_fast_assign(
+               __entry->dev    = inode->i_sb->s_dev;
+               __entry->ino    = inode->i_ino;
+               __entry->lblk   = lblk;
+       ),
+
+       TP_printk("dev %d,%d ino %lu lblk %u",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 (unsigned long) __entry->ino, __entry->lblk)
+);
+
+TRACE_EVENT(ext4_es_lookup_extent_exit,
+       TP_PROTO(struct inode *inode, struct extent_status *es,
+                int found),
+
+       TP_ARGS(inode, es, found),
+
+       TP_STRUCT__entry(
+               __field(        dev_t,          dev             )
+               __field(        ino_t,          ino             )
+               __field(        ext4_lblk_t,    lblk            )
+               __field(        ext4_lblk_t,    len             )
+               __field(        ext4_fsblk_t,   pblk            )
+               __field(        unsigned long long,     status  )
+               __field(        int,            found           )
+       ),
+
+       TP_fast_assign(
+               __entry->dev    = inode->i_sb->s_dev;
+               __entry->ino    = inode->i_ino;
+               __entry->lblk   = es->es_lblk;
+               __entry->len    = es->es_len;
+               __entry->pblk   = ext4_es_pblock(es);
+               __entry->status = ext4_es_status(es);
+               __entry->found  = found;
+       ),
+
+       TP_printk("dev %d,%d ino %lu found %d [%u/%u) %llu %llx",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 (unsigned long) __entry->ino, __entry->found,
+                 __entry->lblk, __entry->len,
+                 __entry->found ? __entry->pblk : 0,
+                 __entry->found ? __entry->status : 0)
+);
+
 #endif /* _TRACE_EXT4_H */
 
 /* This part must be outside protection */