]> Pileus Git - ~andy/linux/blobdiff - fs/ext4/indirect.c
ext4: improve writepage credit estimate for files with indirect blocks
[~andy/linux] / fs / ext4 / indirect.c
index a04183127ef049190ad96c0b7052ea4495fd33e9..f7742f03cdcbea4a367115398ccd1d2df75dfa1e 100644 (file)
@@ -20,6 +20,7 @@
  *     (sct@redhat.com), 1993, 1998
  */
 
+#include <linux/aio.h>
 #include "ext4_jbd2.h"
 #include "truncate.h"
 #include "ext4_extents.h"      /* Needed for EXT_MAX_BLOCKS */
@@ -291,131 +292,6 @@ static int ext4_blks_to_allocate(Indirect *branch, int k, unsigned int blks,
        return count;
 }
 
-/**
- *     ext4_alloc_blocks: multiple allocate blocks needed for a branch
- *     @handle: handle for this transaction
- *     @inode: inode which needs allocated blocks
- *     @iblock: the logical block to start allocated at
- *     @goal: preferred physical block of allocation
- *     @indirect_blks: the number of blocks need to allocate for indirect
- *                     blocks
- *     @blks: number of desired blocks
- *     @new_blocks: on return it will store the new block numbers for
- *     the indirect blocks(if needed) and the first direct block,
- *     @err: on return it will store the error code
- *
- *     This function will return the number of blocks allocated as
- *     requested by the passed-in parameters.
- */
-static int ext4_alloc_blocks(handle_t *handle, struct inode *inode,
-                            ext4_lblk_t iblock, ext4_fsblk_t goal,
-                            int indirect_blks, int blks,
-                            ext4_fsblk_t new_blocks[4], int *err)
-{
-       struct ext4_allocation_request ar;
-       int target, i;
-       unsigned long count = 0, blk_allocated = 0;
-       int index = 0;
-       ext4_fsblk_t current_block = 0;
-       int ret = 0;
-
-       /*
-        * Here we try to allocate the requested multiple blocks at once,
-        * on a best-effort basis.
-        * To build a branch, we should allocate blocks for
-        * the indirect blocks(if not allocated yet), and at least
-        * the first direct block of this branch.  That's the
-        * minimum number of blocks need to allocate(required)
-        */
-       /* first we try to allocate the indirect blocks */
-       target = indirect_blks;
-       while (target > 0) {
-               count = target;
-               /* allocating blocks for indirect blocks and direct blocks */
-               current_block = ext4_new_meta_blocks(handle, inode, goal,
-                                                    0, &count, err);
-               if (*err)
-                       goto failed_out;
-
-               if (unlikely(current_block + count > EXT4_MAX_BLOCK_FILE_PHYS)) {
-                       EXT4_ERROR_INODE(inode,
-                                        "current_block %llu + count %lu > %d!",
-                                        current_block, count,
-                                        EXT4_MAX_BLOCK_FILE_PHYS);
-                       *err = -EIO;
-                       goto failed_out;
-               }
-
-               target -= count;
-               /* allocate blocks for indirect blocks */
-               while (index < indirect_blks && count) {
-                       new_blocks[index++] = current_block++;
-                       count--;
-               }
-               if (count > 0) {
-                       /*
-                        * save the new block number
-                        * for the first direct block
-                        */
-                       new_blocks[index] = current_block;
-                       WARN(1, KERN_INFO "%s returned more blocks than "
-                                               "requested\n", __func__);
-                       break;
-               }
-       }
-
-       target = blks - count ;
-       blk_allocated = count;
-       if (!target)
-               goto allocated;
-       /* Now allocate data blocks */
-       memset(&ar, 0, sizeof(ar));
-       ar.inode = inode;
-       ar.goal = goal;
-       ar.len = target;
-       ar.logical = iblock;
-       if (S_ISREG(inode->i_mode))
-               /* enable in-core preallocation only for regular files */
-               ar.flags = EXT4_MB_HINT_DATA;
-
-       current_block = ext4_mb_new_blocks(handle, &ar, err);
-       if (unlikely(current_block + ar.len > EXT4_MAX_BLOCK_FILE_PHYS)) {
-               EXT4_ERROR_INODE(inode,
-                                "current_block %llu + ar.len %d > %d!",
-                                current_block, ar.len,
-                                EXT4_MAX_BLOCK_FILE_PHYS);
-               *err = -EIO;
-               goto failed_out;
-       }
-
-       if (*err && (target == blks)) {
-               /*
-                * if the allocation failed and we didn't allocate
-                * any blocks before
-                */
-               goto failed_out;
-       }
-       if (!*err) {
-               if (target == blks) {
-                       /*
-                        * save the new block number
-                        * for the first direct block
-                        */
-                       new_blocks[index] = current_block;
-               }
-               blk_allocated += ar.len;
-       }
-allocated:
-       /* total number of blocks allocated for direct blocks */
-       ret = blk_allocated;
-       *err = 0;
-       return ret;
-failed_out:
-       for (i = 0; i < index; i++)
-               ext4_free_blocks(handle, inode, NULL, new_blocks[i], 1, 0);
-       return ret;
-}
-
 /**
  *     ext4_alloc_branch - allocate and set up a chain of blocks.
  *     @handle: handle for this transaction
@@ -448,60 +324,59 @@ static int ext4_alloc_branch(handle_t *handle, struct inode *inode,
                             int *blks, ext4_fsblk_t goal,
                             ext4_lblk_t *offsets, Indirect *branch)
 {
-       int blocksize = inode->i_sb->s_blocksize;
-       int i, n = 0;
-       int err = 0;
-       struct buffer_head *bh;
-       int num;
-       ext4_fsblk_t new_blocks[4];
-       ext4_fsblk_t current_block;
+       struct ext4_allocation_request  ar;
+       struct buffer_head *            bh;
+       ext4_fsblk_t                    b, new_blocks[4];
+       __le32                          *p;
+       int                             i, j, err, len = 1;
 
-       num = ext4_alloc_blocks(handle, inode, iblock, goal, indirect_blks,
-                               *blks, new_blocks, &err);
-       if (err)
-               return err;
-
-       branch[0].key = cpu_to_le32(new_blocks[0]);
        /*
-        * metadata blocks and data blocks are allocated.
+        * Set up for the direct block allocation
         */
-       for (n = 1; n <= indirect_blks;  n++) {
-               /*
-                * Get buffer_head for parent block, zero it out
-                * and set the pointer to new one, then send
-                * parent to disk.
-                */
-               bh = sb_getblk(inode->i_sb, new_blocks[n-1]);
+       memset(&ar, 0, sizeof(ar));
+       ar.inode = inode;
+       ar.len = *blks;
+       ar.logical = iblock;
+       if (S_ISREG(inode->i_mode))
+               ar.flags = EXT4_MB_HINT_DATA;
+
+       for (i = 0; i <= indirect_blks; i++) {
+               if (i == indirect_blks) {
+                       ar.goal = goal;
+                       new_blocks[i] = ext4_mb_new_blocks(handle, &ar, &err);
+               } else
+                       goal = new_blocks[i] = ext4_new_meta_blocks(handle, inode,
+                                                       goal, 0, NULL, &err);
+               if (err) {
+                       i--;
+                       goto failed;
+               }
+               branch[i].key = cpu_to_le32(new_blocks[i]);
+               if (i == 0)
+                       continue;
+
+               bh = branch[i].bh = sb_getblk(inode->i_sb, new_blocks[i-1]);
                if (unlikely(!bh)) {
                        err = -ENOMEM;
                        goto failed;
                }
-
-               branch[n].bh = bh;
                lock_buffer(bh);
                BUFFER_TRACE(bh, "call get_create_access");
                err = ext4_journal_get_create_access(handle, bh);
                if (err) {
-                       /* Don't brelse(bh) here; it's done in
-                        * ext4_journal_forget() below */
                        unlock_buffer(bh);
                        goto failed;
                }
 
-               memset(bh->b_data, 0, blocksize);
-               branch[n].p = (__le32 *) bh->b_data + offsets[n];
-               branch[n].key = cpu_to_le32(new_blocks[n]);
-               *branch[n].p = branch[n].key;
-               if (n == indirect_blks) {
-                       current_block = new_blocks[n];
-                       /*
-                        * End of chain, update the last new metablock of
-                        * the chain to point to the new allocated
-                        * data blocks numbers
-                        */
-                       for (i = 1; i < num; i++)
-                               *(branch[n].p + i) = cpu_to_le32(++current_block);
-               }
+               memset(bh->b_data, 0, bh->b_size);
+               p = branch[i].p = (__le32 *) bh->b_data + offsets[i];
+               b = new_blocks[i];
+
+               if (i == indirect_blks)
+                       len = ar.len;
+               for (j = 0; j < len; j++)
+                       *p++ = cpu_to_le32(b++);
+
                BUFFER_TRACE(bh, "marking uptodate");
                set_buffer_uptodate(bh);
                unlock_buffer(bh);
@@ -511,25 +386,16 @@ static int ext4_alloc_branch(handle_t *handle, struct inode *inode,
                if (err)
                        goto failed;
        }
-       *blks = num;
-       return err;
+       *blks = ar.len;
+       return 0;
 failed:
-       /* Allocation failed, free what we already allocated */
-       ext4_free_blocks(handle, inode, NULL, new_blocks[0], 1, 0);
-       for (i = 1; i <= n ; i++) {
-               /*
-                * branch[i].bh is newly allocated, so there is no
-                * need to revoke the block, which is why we don't
-                * need to set EXT4_FREE_BLOCKS_METADATA.
-                */
-               ext4_free_blocks(handle, inode, NULL, new_blocks[i], 1,
-                                EXT4_FREE_BLOCKS_FORGET);
+       for (; i >= 0; i--) {
+               if (i != indirect_blks && branch[i].bh)
+                       ext4_forget(handle, 1, inode, branch[i].bh,
+                                   branch[i].bh->b_blocknr);
+               ext4_free_blocks(handle, inode, NULL, new_blocks[i],
+                                (i == indirect_blks) ? ar.len : 1, 0);
        }
-       for (i = n+1; i < indirect_blks; i++)
-               ext4_free_blocks(handle, inode, NULL, new_blocks[i], 1, 0);
-
-       ext4_free_blocks(handle, inode, NULL, new_blocks[i], num, 0);
-
        return err;
 }
 
@@ -913,27 +779,18 @@ int ext4_ind_calc_metadata_amount(struct inode *inode, sector_t lblock)
        return (blk_bits / EXT4_ADDR_PER_BLOCK_BITS(inode->i_sb)) + 1;
 }
 
-int ext4_ind_trans_blocks(struct inode *inode, int nrblocks, int chunk)
+/*
+ * Calculate number of indirect blocks touched by mapping @nrblocks logically
+ * contiguous blocks
+ */
+int ext4_ind_trans_blocks(struct inode *inode, int nrblocks)
 {
-       int indirects;
-
-       /* if nrblocks are contiguous */
-       if (chunk) {
-               /*
-                * With N contiguous data blocks, we need at most
-                * N/EXT4_ADDR_PER_BLOCK(inode->i_sb) + 1 indirect blocks,
-                * 2 dindirect blocks, and 1 tindirect block
-                */
-               return DIV_ROUND_UP(nrblocks,
-                                   EXT4_ADDR_PER_BLOCK(inode->i_sb)) + 4;
-       }
        /*
-        * if nrblocks are not contiguous, worse case, each block touch
-        * a indirect block, and each indirect block touch a double indirect
-        * block, plus a triple indirect block
+        * With N contiguous data blocks, we need at most
+        * N/EXT4_ADDR_PER_BLOCK(inode->i_sb) + 1 indirect blocks,
+        * 2 dindirect blocks, and 1 tindirect block
         */
-       indirects = nrblocks * 2 + 1;
-       return indirects;
+       return DIV_ROUND_UP(nrblocks, EXT4_ADDR_PER_BLOCK(inode->i_sb)) + 4;
 }
 
 /*
@@ -941,26 +798,9 @@ int ext4_ind_trans_blocks(struct inode *inode, int nrblocks, int chunk)
  * be able to restart the transaction at a conventient checkpoint to make
  * sure we don't overflow the journal.
  *
- * start_transaction gets us a new handle for a truncate transaction,
- * and extend_transaction tries to extend the existing one a bit.  If
+ * Try to extend this transaction for the purposes of truncation.  If
  * extend fails, we need to propagate the failure up and restart the
  * transaction in the top-level truncate loop. --sct
- */
-static handle_t *start_transaction(struct inode *inode)
-{
-       handle_t *result;
-
-       result = ext4_journal_start(inode, EXT4_HT_TRUNCATE,
-                                   ext4_blocks_for_truncate(inode));
-       if (!IS_ERR(result))
-               return result;
-
-       ext4_std_error(inode->i_sb, PTR_ERR(result));
-       return result;
-}
-
-/*
- * Try to extend this transaction for the purposes of truncation.
  *
  * Returns 0 if we managed to create more room.  If we can't create more
  * room, and the transaction must be restarted we return 1.
@@ -1353,68 +1193,30 @@ static void ext4_free_branches(handle_t *handle, struct inode *inode,
        }
 }
 
-void ext4_ind_truncate(struct inode *inode)
+void ext4_ind_truncate(handle_t *handle, struct inode *inode)
 {
-       handle_t *handle;
        struct ext4_inode_info *ei = EXT4_I(inode);
        __le32 *i_data = ei->i_data;
        int addr_per_block = EXT4_ADDR_PER_BLOCK(inode->i_sb);
-       struct address_space *mapping = inode->i_mapping;
        ext4_lblk_t offsets[4];
        Indirect chain[4];
        Indirect *partial;
        __le32 nr = 0;
        int n = 0;
        ext4_lblk_t last_block, max_block;
-       loff_t page_len;
        unsigned blocksize = inode->i_sb->s_blocksize;
-       int err;
-
-       handle = start_transaction(inode);
-       if (IS_ERR(handle))
-               return;         /* AKPM: return what? */
 
        last_block = (inode->i_size + blocksize-1)
                                        >> EXT4_BLOCK_SIZE_BITS(inode->i_sb);
        max_block = (EXT4_SB(inode->i_sb)->s_bitmap_maxbytes + blocksize-1)
                                        >> EXT4_BLOCK_SIZE_BITS(inode->i_sb);
 
-       if (inode->i_size % PAGE_CACHE_SIZE != 0) {
-               page_len = PAGE_CACHE_SIZE -
-                       (inode->i_size & (PAGE_CACHE_SIZE - 1));
-
-               err = ext4_discard_partial_page_buffers(handle,
-                       mapping, inode->i_size, page_len, 0);
-
-               if (err)
-                       goto out_stop;
-       }
-
        if (last_block != max_block) {
                n = ext4_block_to_path(inode, last_block, offsets, NULL);
                if (n == 0)
-                       goto out_stop;  /* error */
+                       return;
        }
 
-       /*
-        * OK.  This truncate is going to happen.  We add the inode to the
-        * orphan list, so that if this truncate spans multiple transactions,
-        * and we crash, we will resume the truncate when the filesystem
-        * recovers.  It also marks the inode dirty, to catch the new size.
-        *
-        * Implication: the file must always be in a sane, consistent
-        * truncatable state while each transaction commits.
-        */
-       if (ext4_orphan_add(handle, inode))
-               goto out_stop;
-
-       /*
-        * From here we block out all ext4_get_block() callers who want to
-        * modify the block allocation tree.
-        */
-       down_write(&ei->i_data_sem);
-
-       ext4_discard_preallocations(inode);
        ext4_es_remove_extent(inode, last_block, EXT_MAX_BLOCKS - last_block);
 
        /*
@@ -1431,7 +1233,7 @@ void ext4_ind_truncate(struct inode *inode)
                 * It is unnecessary to free any data blocks if last_block is
                 * equal to the indirect block limit.
                 */
-               goto out_unlock;
+               return;
        } else if (n == 1) {            /* direct blocks */
                ext4_free_data(handle, inode, NULL, i_data+offsets[0],
                               i_data + EXT4_NDIR_BLOCKS);
@@ -1491,31 +1293,6 @@ do_indirects:
        case EXT4_TIND_BLOCK:
                ;
        }
-
-out_unlock:
-       up_write(&ei->i_data_sem);
-       inode->i_mtime = inode->i_ctime = ext4_current_time(inode);
-       ext4_mark_inode_dirty(handle, inode);
-
-       /*
-        * In a multi-transaction truncate, we only make the final transaction
-        * synchronous
-        */
-       if (IS_SYNC(inode))
-               ext4_handle_sync(handle);
-out_stop:
-       /*
-        * If this was a simple ftruncate(), and the file will remain alive
-        * then we need to clear up the orphan record which we created above.
-        * However, if this was a real unlink then we were called by
-        * ext4_delete_inode(), and we allow that function to clean up the
-        * orphan info for us.
-        */
-       if (inode->i_nlink)
-               ext4_orphan_del(handle, inode);
-
-       ext4_journal_stop(handle);
-       trace_ext4_truncate_exit(inode);
 }
 
 static int free_hole_blocks(handle_t *handle, struct inode *inode,
@@ -1569,8 +1346,8 @@ err:
        return ret;
 }
 
-static int ext4_free_hole_blocks(handle_t *handle, struct inode *inode,
-                                ext4_lblk_t first, ext4_lblk_t stop)
+int ext4_free_hole_blocks(handle_t *handle, struct inode *inode,
+                         ext4_lblk_t first, ext4_lblk_t stop)
 {
        int addr_per_block = EXT4_ADDR_PER_BLOCK(inode->i_sb);
        int level, ret = 0;
@@ -1604,157 +1381,3 @@ err:
        return ret;
 }
 
-int ext4_ind_punch_hole(struct file *file, loff_t offset, loff_t length)
-{
-       struct inode *inode = file_inode(file);
-       struct super_block *sb = inode->i_sb;
-       ext4_lblk_t first_block, stop_block;
-       struct address_space *mapping = inode->i_mapping;
-       handle_t *handle = NULL;
-       loff_t first_page, last_page, page_len;
-       loff_t first_page_offset, last_page_offset;
-       int err = 0;
-
-       /*
-        * Write out all dirty pages to avoid race conditions
-        * Then release them.
-        */
-       if (mapping->nrpages && mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) {
-               err = filemap_write_and_wait_range(mapping,
-                       offset, offset + length - 1);
-               if (err)
-                       return err;
-       }
-
-       mutex_lock(&inode->i_mutex);
-       /* It's not possible punch hole on append only file */
-       if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) {
-               err = -EPERM;
-               goto out_mutex;
-       }
-       if (IS_SWAPFILE(inode)) {
-               err = -ETXTBSY;
-               goto out_mutex;
-       }
-
-       /* No need to punch hole beyond i_size */
-       if (offset >= inode->i_size)
-               goto out_mutex;
-
-       /*
-        * If the hole extents beyond i_size, set the hole
-        * to end after the page that contains i_size
-        */
-       if (offset + length > inode->i_size) {
-               length = inode->i_size +
-                   PAGE_CACHE_SIZE - (inode->i_size & (PAGE_CACHE_SIZE - 1)) -
-                   offset;
-       }
-
-       first_page = (offset + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
-       last_page = (offset + length) >> PAGE_CACHE_SHIFT;
-
-       first_page_offset = first_page << PAGE_CACHE_SHIFT;
-       last_page_offset = last_page << PAGE_CACHE_SHIFT;
-
-       /* Now release the pages */
-       if (last_page_offset > first_page_offset) {
-               truncate_pagecache_range(inode, first_page_offset,
-                                        last_page_offset - 1);
-       }
-
-       /* Wait all existing dio works, newcomers will block on i_mutex */
-       inode_dio_wait(inode);
-
-       handle = start_transaction(inode);
-       if (IS_ERR(handle))
-               goto out_mutex;
-
-       /*
-        * Now we need to zero out the non-page-aligned data in the
-        * pages at the start and tail of the hole, and unmap the buffer
-        * heads for the block aligned regions of the page that were
-        * completely zerod.
-        */
-       if (first_page > last_page) {
-               /*
-                * If the file space being truncated is contained within a page
-                * just zero out and unmap the middle of that page
-                */
-               err = ext4_discard_partial_page_buffers(handle,
-                       mapping, offset, length, 0);
-               if (err)
-                       goto out;
-       } else {
-               /*
-                * Zero out and unmap the paritial page that contains
-                * the start of the hole
-                */
-               page_len = first_page_offset - offset;
-               if (page_len > 0) {
-                       err = ext4_discard_partial_page_buffers(handle, mapping,
-                                                       offset, page_len, 0);
-                       if (err)
-                               goto out;
-               }
-
-               /*
-                * Zero out and unmap the partial page that contains
-                * the end of the hole
-                */
-               page_len = offset + length - last_page_offset;
-               if (page_len > 0) {
-                       err = ext4_discard_partial_page_buffers(handle, mapping,
-                                               last_page_offset, page_len, 0);
-                       if (err)
-                               goto out;
-               }
-       }
-
-       /*
-        * If i_size contained in the last page, we need to
-        * unmap and zero the paritial page after i_size
-        */
-       if (inode->i_size >> PAGE_CACHE_SHIFT == last_page &&
-           inode->i_size % PAGE_CACHE_SIZE != 0) {
-               page_len = PAGE_CACHE_SIZE -
-                       (inode->i_size & (PAGE_CACHE_SIZE - 1));
-               if (page_len > 0) {
-                       err = ext4_discard_partial_page_buffers(handle,
-                               mapping, inode->i_size, page_len, 0);
-                       if (err)
-                               goto out;
-               }
-       }
-
-       first_block = (offset + sb->s_blocksize - 1) >>
-               EXT4_BLOCK_SIZE_BITS(sb);
-       stop_block = (offset + length) >> EXT4_BLOCK_SIZE_BITS(sb);
-
-       if (first_block >= stop_block)
-               goto out;
-
-       down_write(&EXT4_I(inode)->i_data_sem);
-       ext4_discard_preallocations(inode);
-
-       err = ext4_es_remove_extent(inode, first_block,
-                                   stop_block - first_block);
-       err = ext4_free_hole_blocks(handle, inode, first_block, stop_block);
-
-       ext4_discard_preallocations(inode);
-
-       if (IS_SYNC(inode))
-               ext4_handle_sync(handle);
-
-       up_write(&EXT4_I(inode)->i_data_sem);
-
-out:
-       inode->i_mtime = inode->i_ctime = ext4_current_time(inode);
-       ext4_mark_inode_dirty(handle, inode);
-       ext4_journal_stop(handle);
-
-out_mutex:
-       mutex_unlock(&inode->i_mutex);
-
-       return err;
-}