Merge branch 'pci-for-jesse' of git://git.kernel.org/pub/scm/linux/kernel/git/tip...

[~andy/linux] / fs / xfs / xfs_iomap.c
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c

index 4821b8563c4120b0e488b9e803589a1b9c5f48a4..67f22b2b44b3c41c51ce3ac6c1f911f5db47b181 100644 (file)
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -57,8 +57,6 @@ xfs_iomap_enter_trace(
         xfs_off_t       offset,
         ssize_t         count)
  {
-       xfs_iocore_t    *io = &ip->i_iocore;
-
         if (!ip->i_rwtrace)
                 return;
  
@@ -70,8 +68,8 @@ xfs_iomap_enter_trace(
                 (void *)((unsigned long)((offset >> 32) & 0xffffffff)),
                 (void *)((unsigned long)(offset & 0xffffffff)),
                 (void *)((unsigned long)count),
-               (void *)((unsigned long)((io->io_new_size >> 32) & 0xffffffff)),
-               (void *)((unsigned long)(io->io_new_size & 0xffffffff)),
+               (void *)((unsigned long)((ip->i_new_size >> 32) & 0xffffffff)),
+               (void *)((unsigned long)(ip->i_new_size & 0xffffffff)),
                 (void *)((unsigned long)current_pid()),
                 (void *)NULL,
                 (void *)NULL,
@@ -143,7 +141,7 @@ xfs_imap_to_bmap(
                 iomapp->iomap_bsize = XFS_FSB_TO_B(mp, imap->br_blockcount);
                 iomapp->iomap_flags = flags;
  
-               if (ip->i_d.di_flags & XFS_DIFLAG_REALTIME) {
+               if (XFS_IS_REALTIME_INODE(ip)) {
                         iomapp->iomap_flags |= IOMAP_REALTIME;
                         iomapp->iomap_target = mp->m_rtdev_targp;
                 } else {
@@ -186,8 +184,6 @@ xfs_iomap(
         int             iomap_flags = 0;
  
         ASSERT((ip->i_d.di_mode & S_IFMT) == S_IFREG);
-       ASSERT(((ip->i_d.di_flags & XFS_DIFLAG_REALTIME) != 0) ==
-              ((ip->i_iocore.io_flags & XFS_IOCORE_RT) != 0));
  
         if (XFS_FORCED_SHUTDOWN(mp))
                 return XFS_ERROR(EIO);
@@ -200,14 +196,14 @@ xfs_iomap(
                 break;
         case BMAPI_WRITE:
                 xfs_iomap_enter_trace(XFS_IOMAP_WRITE_ENTER, ip, offset, count);
-               lockmode = XFS_ILOCK_EXCL|XFS_EXTSIZE_WR;
+               lockmode = XFS_ILOCK_EXCL;
                 if (flags & BMAPI_IGNSTATE)
                         bmapi_flags |= XFS_BMAPI_IGSTATE|XFS_BMAPI_ENTIRE;
                 xfs_ilock(ip, lockmode);
                 break;
         case BMAPI_ALLOCATE:
                 xfs_iomap_enter_trace(XFS_IOMAP_ALLOC_ENTER, ip, offset, count);
-               lockmode = XFS_ILOCK_SHARED|XFS_EXTSIZE_RD;
+               lockmode = XFS_ILOCK_SHARED;
                 bmapi_flags = XFS_BMAPI_ENTIRE;
  
                 /* Attempt non-blocking lock */
@@ -302,7 +298,7 @@ xfs_iomap_eof_align_last_fsb(
         xfs_extlen_t    align;
         int             eof, error;
  
-       if (ip->i_d.di_flags & XFS_DIFLAG_REALTIME)
+       if (XFS_IS_REALTIME_INODE(ip))
                 ;
         /*
          * If mounted with the "-o swalloc" option, roundup the allocation
@@ -402,7 +398,6 @@ xfs_iomap_write_direct(
         int             found)
  {
         xfs_mount_t     *mp = ip->i_mount;
-       xfs_iocore_t    *io = &ip->i_iocore;
         xfs_fileoff_t   offset_fsb;
         xfs_fileoff_t   last_fsb;
         xfs_filblks_t   count_fsb, resaligned;
@@ -432,8 +427,8 @@ xfs_iomap_write_direct(
         extsz = xfs_get_extsz_hint(ip);
  
         isize = ip->i_size;
-       if (io->io_new_size > isize)
-               isize = io->io_new_size;
+       if (ip->i_new_size > isize)
+               isize = ip->i_new_size;
  
         offset_fsb = XFS_B_TO_FSBT(mp, offset);
         last_fsb = XFS_B_TO_FSB(mp, ((xfs_ufsize_t)(offset + count)));
@@ -528,7 +523,7 @@ xfs_iomap_write_direct(
                 goto error_out;
         }
  
-       if (unlikely(!imap.br_startblock && !(io->io_flags & XFS_IOCORE_RT))) {
+       if (!(imap.br_startblock || XFS_IS_REALTIME_INODE(ip))) {
                 error = xfs_cmn_err_fsblock_zero(ip, &imap);
                 goto error_out;
         }
@@ -616,7 +611,6 @@ xfs_iomap_write_delay(
         int             *nmaps)
  {
         xfs_mount_t     *mp = ip->i_mount;
-       xfs_iocore_t    *io = &ip->i_iocore;
         xfs_fileoff_t   offset_fsb;
         xfs_fileoff_t   last_fsb;
         xfs_off_t       aligned_offset;
@@ -629,7 +623,7 @@ xfs_iomap_write_delay(
         int             prealloc, fsynced = 0;
         int             error;
  
-       ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE) != 0);
+       ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
  
         /*
          * Make sure that the dquots are there. This doesn't hold
@@ -644,8 +638,8 @@ xfs_iomap_write_delay(
  
  retry:
         isize = ip->i_size;
-       if (io->io_new_size > isize)
-               isize = io->io_new_size;
+       if (ip->i_new_size > isize)
+               isize = ip->i_new_size;
  
         error = xfs_iomap_eof_want_preallocate(mp, ip, isize, offset, count,
                                 ioflag, imap, XFS_WRITE_IMAPS, &prealloc);
@@ -691,7 +685,7 @@ retry:
                 goto retry;
         }
  
-       if (unlikely(!imap[0].br_startblock && !(io->io_flags & XFS_IOCORE_RT)))
+       if (!(imap[0].br_startblock || XFS_IS_REALTIME_INODE(ip)))
                 return xfs_cmn_err_fsblock_zero(ip, &imap[0]);
  
         *ret_imap = imap[0];
@@ -706,6 +700,9 @@ retry:
   * the originating callers request.
   *
   * Called without a lock on the inode.
+ *
+ * We no longer bother to look at the incoming map - all we have to
+ * guarantee is that whatever we allocate fills the required range.
   */
  int
  xfs_iomap_write_allocate(
@@ -716,15 +713,14 @@ xfs_iomap_write_allocate(
         int             *retmap)
  {
         xfs_mount_t     *mp = ip->i_mount;
-       xfs_iocore_t    *io = &ip->i_iocore;
         xfs_fileoff_t   offset_fsb, last_block;
         xfs_fileoff_t   end_fsb, map_start_fsb;
         xfs_fsblock_t   first_block;
         xfs_bmap_free_t free_list;
         xfs_filblks_t   count_fsb;
-       xfs_bmbt_irec_t imap[XFS_STRAT_WRITE_IMAPS];
+       xfs_bmbt_irec_t imap;
         xfs_trans_t     *tp;
-       int             i, nimaps, committed;
+       int             nimaps, committed;
         int             error = 0;
         int             nres;
  
@@ -771,16 +767,44 @@ xfs_iomap_write_allocate(
  
                         XFS_BMAP_INIT(&free_list, &first_block);
  
-                       nimaps = XFS_STRAT_WRITE_IMAPS;
                         /*
-                        * Ensure we don't go beyond eof - it is possible
-                        * the extents changed since we did the read call,
-                        * we dropped the ilock in the interim.
+                        * it is possible that the extents have changed since
+                        * we did the read call as we dropped the ilock for a
+                        * while. We have to be careful about truncates or hole
+                        * punchs here - we are not allowed to allocate
+                        * non-delalloc blocks here.
+                        *
+                        * The only protection against truncation is the pages
+                        * for the range we are being asked to convert are
+                        * locked and hence a truncate will block on them
+                        * first.
+                        *
+                        * As a result, if we go beyond the range we really
+                        * need and hit an delalloc extent boundary followed by
+                        * a hole while we have excess blocks in the map, we
+                        * will fill the hole incorrectly and overrun the
+                        * transaction reservation.
+                        *
+                        * Using a single map prevents this as we are forced to
+                        * check each map we look for overlap with the desired
+                        * range and abort as soon as we find it. Also, given
+                        * that we only return a single map, having one beyond
+                        * what we can return is probably a bit silly.
+                        *
+                        * We also need to check that we don't go beyond EOF;
+                        * this is a truncate optimisation as a truncate sets
+                        * the new file size before block on the pages we
+                        * currently have locked under writeback. Because they
+                        * are about to be tossed, we don't need to write them
+                        * back....
                          */
-
+                       nimaps = 1;
                         end_fsb = XFS_B_TO_FSB(mp, ip->i_size);
-                       xfs_bmap_last_offset(NULL, ip, &last_block,
-                               XFS_DATA_FORK);
+                       error = xfs_bmap_last_offset(NULL, ip, &last_block,
+                                                       XFS_DATA_FORK);
+                       if (error)
+                               goto trans_cancel;
+
                         last_block = XFS_FILEOFF_MAX(last_block, end_fsb);
                         if ((map_start_fsb + count_fsb) > last_block) {
                                 count_fsb = last_block - map_start_fsb;
@@ -793,7 +817,7 @@ xfs_iomap_write_allocate(
                         /* Go get the actual blocks */
                         error = xfs_bmapi(tp, ip, map_start_fsb, count_fsb,
                                         XFS_BMAPI_WRITE, &first_block, 1,
-                                       imap, &nimaps, &free_list, NULL);
+                                       &imap, &nimaps, &free_list, NULL);
                         if (error)
                                 goto trans_cancel;
  
@@ -812,27 +836,24 @@ xfs_iomap_write_allocate(
                  * See if we were able to allocate an extent that
                  * covers at least part of the callers request
                  */
-               for (i = 0; i < nimaps; i++) {
-                       if (unlikely(!imap[i].br_startblock &&
-                                    !(io->io_flags & XFS_IOCORE_RT)))
-                               return xfs_cmn_err_fsblock_zero(ip, &imap[i]);
-                       if ((offset_fsb >= imap[i].br_startoff) &&
-                           (offset_fsb < (imap[i].br_startoff +
-                                          imap[i].br_blockcount))) {
-                               *map = imap[i];
-                               *retmap = 1;
-                               XFS_STATS_INC(xs_xstrat_quick);
-                               return 0;
-                       }
-                       count_fsb -= imap[i].br_blockcount;
+               if (!(imap.br_startblock || XFS_IS_REALTIME_INODE(ip)))
+                       return xfs_cmn_err_fsblock_zero(ip, &imap);
+
+               if ((offset_fsb >= imap.br_startoff) &&
+                   (offset_fsb < (imap.br_startoff +
+                                  imap.br_blockcount))) {
+                       *map = imap;
+                       *retmap = 1;
+                       XFS_STATS_INC(xs_xstrat_quick);
+                       return 0;
                 }
  
-               /* So far we have not mapped the requested part of the
+               /*
+                * So far we have not mapped the requested part of the
                  * file, just surrounding data, try again.
                  */
-               nimaps--;
-               map_start_fsb = imap[nimaps].br_startoff +
-                               imap[nimaps].br_blockcount;
+               count_fsb -= imap.br_blockcount;
+               map_start_fsb = imap.br_startoff + imap.br_blockcount;
         }
  
  trans_cancel:
@@ -850,7 +871,6 @@ xfs_iomap_write_unwritten(
         size_t          count)
  {
         xfs_mount_t     *mp = ip->i_mount;
-       xfs_iocore_t    *io = &ip->i_iocore;
         xfs_fileoff_t   offset_fsb;
         xfs_filblks_t   count_fsb;
         xfs_filblks_t   numblks_fsb;
@@ -869,6 +889,16 @@ xfs_iomap_write_unwritten(
         count_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + count);
         count_fsb = (xfs_filblks_t)(count_fsb - offset_fsb);
  
+       /*
+        * Reserve enough blocks in this transaction for two complete extent
+        * btree splits.  We may be converting the middle part of an unwritten
+        * extent and in this case we will insert two new extents in the btree
+        * each of which could cause a full split.
+        *
+        * This reservation amount will be used in the first call to
+        * xfs_bmbt_split() to select an AG with enough space to satisfy the
+        * rest of the operation.
+        */
         resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0) << 1;
  
         do {
@@ -912,8 +942,7 @@ xfs_iomap_write_unwritten(
                 if (error)
                         return XFS_ERROR(error);
  
-               if (unlikely(!imap.br_startblock &&
-                            !(io->io_flags & XFS_IOCORE_RT)))
+               if (!(imap.br_startblock || XFS_IS_REALTIME_INODE(ip)))
                         return xfs_cmn_err_fsblock_zero(ip, &imap);
  
                 if ((numblks_fsb = imap.br_blockcount) == 0) {