]> Pileus Git - ~andy/linux/commitdiff
ext4: fix lost truncate due to race with writeback
authorJan Kara <jack@suse.cz>
Sat, 17 Aug 2013 14:09:31 +0000 (10:09 -0400)
committerTheodore Ts'o <tytso@mit.edu>
Sat, 17 Aug 2013 14:09:31 +0000 (10:09 -0400)
The following race can lead to a loss of i_disksize update from truncate
thus resulting in a wrong inode size if the inode size isn't updated
again before inode is reclaimed:

ext4_setattr() mpage_map_and_submit_extent()
  EXT4_I(inode)->i_disksize = attr->ia_size;
  ...   ...
  disksize = ((loff_t)mpd->first_page) << PAGE_CACHE_SHIFT
  /* False because i_size isn't
   * updated yet */
  if (disksize > i_size_read(inode))
  /* True, because i_disksize is
   * already truncated */
  if (disksize > EXT4_I(inode)->i_disksize)
    /* Overwrite i_disksize
     * update from truncate */
    ext4_update_i_disksize()
  i_size_write(inode, attr->ia_size);

For other places updating i_disksize such race cannot happen because
i_mutex prevents these races. Writeback is the only place where we do
not hold i_mutex and we cannot grab it there because of lock ordering.

We fix the race by doing both i_disksize and i_size update in truncate
atomically under i_data_sem and in mpage_map_and_submit_extent() we move
the check against i_size under i_data_sem as well.

Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
Cc: stable@vger.kernel.org
fs/ext4/ext4.h
fs/ext4/inode.c

index 58dede76f75ff5bcb89ac510e1367be9070fa14b..3dbc56eb4849d624ad38e768ff350ea1af463af0 100644 (file)
@@ -2432,16 +2432,32 @@ do {                                                            \
 #define EXT4_FREECLUSTERS_WATERMARK 0
 #endif
 
+/* Update i_disksize. Requires i_mutex to avoid races with truncate */
 static inline void ext4_update_i_disksize(struct inode *inode, loff_t newsize)
 {
-       /*
-        * XXX: replace with spinlock if seen contended -bzzz
-        */
+       WARN_ON_ONCE(S_ISREG(inode->i_mode) &&
+                    !mutex_is_locked(&inode->i_mutex));
+       down_write(&EXT4_I(inode)->i_data_sem);
+       if (newsize > EXT4_I(inode)->i_disksize)
+               EXT4_I(inode)->i_disksize = newsize;
+       up_write(&EXT4_I(inode)->i_data_sem);
+}
+
+/*
+ * Update i_disksize after writeback has been started. Races with truncate
+ * are avoided by checking i_size under i_data_sem.
+ */
+static inline void ext4_wb_update_i_disksize(struct inode *inode, loff_t newsize)
+{
+       loff_t i_size;
+
        down_write(&EXT4_I(inode)->i_data_sem);
+       i_size = i_size_read(inode);
+       if (newsize > i_size)
+               newsize = i_size;
        if (newsize > EXT4_I(inode)->i_disksize)
                EXT4_I(inode)->i_disksize = newsize;
        up_write(&EXT4_I(inode)->i_data_sem);
-       return ;
 }
 
 struct ext4_group_info {
index 38f430119fefb5d2d1c5f0901c0fa9d7241cec4e..fc4051eb4e0fc3ba10bd56850086614e8e4ca73e 100644 (file)
@@ -2237,12 +2237,10 @@ static int mpage_map_and_submit_extent(handle_t *handle,
 
        /* Update on-disk size after IO is submitted */
        disksize = ((loff_t)mpd->first_page) << PAGE_CACHE_SHIFT;
-       if (disksize > i_size_read(inode))
-               disksize = i_size_read(inode);
        if (disksize > EXT4_I(inode)->i_disksize) {
                int err2;
 
-               ext4_update_i_disksize(inode, disksize);
+               ext4_wb_update_i_disksize(inode, disksize);
                err2 = ext4_mark_inode_dirty(handle, inode);
                if (err2)
                        ext4_error(inode->i_sb,
@@ -4627,18 +4625,27 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
                                error = ext4_orphan_add(handle, inode);
                                orphan = 1;
                        }
+                       down_write(&EXT4_I(inode)->i_data_sem);
                        EXT4_I(inode)->i_disksize = attr->ia_size;
                        rc = ext4_mark_inode_dirty(handle, inode);
                        if (!error)
                                error = rc;
+                       /*
+                        * We have to update i_size under i_data_sem together
+                        * with i_disksize to avoid races with writeback code
+                        * running ext4_wb_update_i_disksize().
+                        */
+                       if (!error)
+                               i_size_write(inode, attr->ia_size);
+                       up_write(&EXT4_I(inode)->i_data_sem);
                        ext4_journal_stop(handle);
                        if (error) {
                                ext4_orphan_del(NULL, inode);
                                goto err_out;
                        }
-               }
+               } else
+                       i_size_write(inode, attr->ia_size);
 
-               i_size_write(inode, attr->ia_size);
                /*
                 * Blocks are going to be removed from the inode. Wait
                 * for dio in flight.  Temporarily disable