]> Pileus Git - ~andy/linux/blobdiff - fs/xfs/xfs_inode.c
Merge branch 'rwsem-optimizations'
[~andy/linux] / fs / xfs / xfs_inode.c
index 202ce37e66cb8b605bca23d0ed1838aa20c453e8..558ef494720675aeb44bbfcf69792f2a8eae34eb 100644 (file)
@@ -44,6 +44,7 @@
 #include "xfs_quota.h"
 #include "xfs_filestream.h"
 #include "xfs_vnodeops.h"
+#include "xfs_cksum.h"
 #include "xfs_trace.h"
 #include "xfs_icache.h"
 
@@ -866,6 +867,17 @@ xfs_dinode_from_disk(
        to->di_dmstate  = be16_to_cpu(from->di_dmstate);
        to->di_flags    = be16_to_cpu(from->di_flags);
        to->di_gen      = be32_to_cpu(from->di_gen);
+
+       if (to->di_version == 3) {
+               to->di_changecount = be64_to_cpu(from->di_changecount);
+               to->di_crtime.t_sec = be32_to_cpu(from->di_crtime.t_sec);
+               to->di_crtime.t_nsec = be32_to_cpu(from->di_crtime.t_nsec);
+               to->di_flags2 = be64_to_cpu(from->di_flags2);
+               to->di_ino = be64_to_cpu(from->di_ino);
+               to->di_lsn = be64_to_cpu(from->di_lsn);
+               memcpy(to->di_pad2, from->di_pad2, sizeof(to->di_pad2));
+               uuid_copy(&to->di_uuid, &from->di_uuid);
+       }
 }
 
 void
@@ -902,6 +914,17 @@ xfs_dinode_to_disk(
        to->di_dmstate = cpu_to_be16(from->di_dmstate);
        to->di_flags = cpu_to_be16(from->di_flags);
        to->di_gen = cpu_to_be32(from->di_gen);
+
+       if (from->di_version == 3) {
+               to->di_changecount = cpu_to_be64(from->di_changecount);
+               to->di_crtime.t_sec = cpu_to_be32(from->di_crtime.t_sec);
+               to->di_crtime.t_nsec = cpu_to_be32(from->di_crtime.t_nsec);
+               to->di_flags2 = cpu_to_be64(from->di_flags2);
+               to->di_ino = cpu_to_be64(from->di_ino);
+               to->di_lsn = cpu_to_be64(from->di_lsn);
+               memcpy(to->di_pad2, from->di_pad2, sizeof(to->di_pad2));
+               uuid_copy(&to->di_uuid, &from->di_uuid);
+       }
 }
 
 STATIC uint
@@ -962,6 +985,47 @@ xfs_dic2xflags(
                                (XFS_DFORK_Q(dip) ? XFS_XFLAG_HASATTR : 0);
 }
 
+static bool
+xfs_dinode_verify(
+       struct xfs_mount        *mp,
+       struct xfs_inode        *ip,
+       struct xfs_dinode       *dip)
+{
+       if (dip->di_magic != cpu_to_be16(XFS_DINODE_MAGIC))
+               return false;
+
+       /* only version 3 or greater inodes are extensively verified here */
+       if (dip->di_version < 3)
+               return true;
+
+       if (!xfs_sb_version_hascrc(&mp->m_sb))
+               return false;
+       if (!xfs_verify_cksum((char *)dip, mp->m_sb.sb_inodesize,
+                             offsetof(struct xfs_dinode, di_crc)))
+               return false;
+       if (be64_to_cpu(dip->di_ino) != ip->i_ino)
+               return false;
+       if (!uuid_equal(&dip->di_uuid, &mp->m_sb.sb_uuid))
+               return false;
+       return true;
+}
+
+void
+xfs_dinode_calc_crc(
+       struct xfs_mount        *mp,
+       struct xfs_dinode       *dip)
+{
+       __uint32_t              crc;
+
+       if (dip->di_version < 3)
+               return;
+
+       ASSERT(xfs_sb_version_hascrc(&mp->m_sb));
+       crc = xfs_start_cksum((char *)dip, mp->m_sb.sb_inodesize,
+                             offsetof(struct xfs_dinode, di_crc));
+       dip->di_crc = xfs_end_cksum(crc);
+}
+
 /*
  * Read the disk inode attributes into the in-core inode structure.
  */
@@ -990,17 +1054,13 @@ xfs_iread(
        if (error)
                return error;
 
-       /*
-        * If we got something that isn't an inode it means someone
-        * (nfs or dmi) has a stale handle.
-        */
-       if (dip->di_magic != cpu_to_be16(XFS_DINODE_MAGIC)) {
-#ifdef DEBUG
-               xfs_alert(mp,
-                       "%s: dip->di_magic (0x%x) != XFS_DINODE_MAGIC (0x%x)",
-                       __func__, be16_to_cpu(dip->di_magic), XFS_DINODE_MAGIC);
-#endif /* DEBUG */
-               error = XFS_ERROR(EINVAL);
+       /* even unallocated inodes are verified */
+       if (!xfs_dinode_verify(mp, ip, dip)) {
+               xfs_alert(mp, "%s: validation failed for inode %lld failed",
+                               __func__, ip->i_ino);
+
+               XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, dip);
+               error = XFS_ERROR(EFSCORRUPTED);
                goto out_brelse;
        }
 
@@ -1022,10 +1082,20 @@ xfs_iread(
                        goto out_brelse;
                }
        } else {
+               /*
+                * Partial initialisation of the in-core inode. Just the bits
+                * that xfs_ialloc won't overwrite or relies on being correct.
+                */
                ip->i_d.di_magic = be16_to_cpu(dip->di_magic);
                ip->i_d.di_version = dip->di_version;
                ip->i_d.di_gen = be32_to_cpu(dip->di_gen);
                ip->i_d.di_flushiter = be16_to_cpu(dip->di_flushiter);
+
+               if (dip->di_version == 3) {
+                       ip->i_d.di_ino = be64_to_cpu(dip->di_ino);
+                       uuid_copy(&ip->i_d.di_uuid, &dip->di_uuid);
+               }
+
                /*
                 * Make sure to pull in the mode here as well in
                 * case the inode is released without being used.
@@ -1161,6 +1231,7 @@ xfs_ialloc(
        xfs_buf_t       **ialloc_context,
        xfs_inode_t     **ipp)
 {
+       struct xfs_mount *mp = tp->t_mountp;
        xfs_ino_t       ino;
        xfs_inode_t     *ip;
        uint            flags;
@@ -1187,7 +1258,7 @@ xfs_ialloc(
         * This is because we're setting fields here we need
         * to prevent others from looking at until we're done.
         */
-       error = xfs_iget(tp->t_mountp, tp, ino, XFS_IGET_CREATE,
+       error = xfs_iget(mp, tp, ino, XFS_IGET_CREATE,
                         XFS_ILOCK_EXCL, &ip);
        if (error)
                return error;
@@ -1208,7 +1279,7 @@ xfs_ialloc(
         * the inode version number now.  This way we only do the conversion
         * here rather than here and in the flush/logging code.
         */
-       if (xfs_sb_version_hasnlink(&tp->t_mountp->m_sb) &&
+       if (xfs_sb_version_hasnlink(&mp->m_sb) &&
            ip->i_d.di_version == 1) {
                ip->i_d.di_version = 2;
                /*
@@ -1258,6 +1329,19 @@ xfs_ialloc(
        ip->i_d.di_dmevmask = 0;
        ip->i_d.di_dmstate = 0;
        ip->i_d.di_flags = 0;
+
+       if (ip->i_d.di_version == 3) {
+               ASSERT(ip->i_d.di_ino == ino);
+               ASSERT(uuid_equal(&ip->i_d.di_uuid, &mp->m_sb.sb_uuid));
+               ip->i_d.di_crc = 0;
+               ip->i_d.di_changecount = 1;
+               ip->i_d.di_lsn = 0;
+               ip->i_d.di_flags2 = 0;
+               memset(&(ip->i_d.di_pad2[0]), 0, sizeof(ip->i_d.di_pad2));
+               ip->i_d.di_crtime = ip->i_d.di_mtime;
+       }
+
+
        flags = XFS_ILOG_CORE;
        switch (mode & S_IFMT) {
        case S_IFIFO:
@@ -2716,20 +2800,18 @@ abort_out:
 
 STATIC int
 xfs_iflush_int(
-       xfs_inode_t             *ip,
-       xfs_buf_t               *bp)
+       struct xfs_inode        *ip,
+       struct xfs_buf          *bp)
 {
-       xfs_inode_log_item_t    *iip;
-       xfs_dinode_t            *dip;
-       xfs_mount_t             *mp;
+       struct xfs_inode_log_item *iip = ip->i_itemp;
+       struct xfs_dinode       *dip;
+       struct xfs_mount        *mp = ip->i_mount;
 
        ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED));
        ASSERT(xfs_isiflocked(ip));
        ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE ||
               ip->i_d.di_nextents > XFS_IFORK_MAXEXT(ip, XFS_DATA_FORK));
-
-       iip = ip->i_itemp;
-       mp = ip->i_mount;
+       ASSERT(iip != NULL && iip->ili_fields != 0);
 
        /* set *dip = inode's place in the buffer */
        dip = (xfs_dinode_t *)xfs_buf_offset(bp, ip->i_imap.im_boffset);
@@ -2790,9 +2872,9 @@ xfs_iflush_int(
        }
        /*
         * bump the flush iteration count, used to detect flushes which
-        * postdate a log record during recovery.
+        * postdate a log record during recovery. This is redundant as we now
+        * log every change and hence this can't happen. Still, it doesn't hurt.
         */
-
        ip->i_d.di_flushiter++;
 
        /*
@@ -2868,41 +2950,30 @@ xfs_iflush_int(
         * need the AIL lock, because it is a 64 bit value that cannot be read
         * atomically.
         */
-       if (iip != NULL && iip->ili_fields != 0) {
-               iip->ili_last_fields = iip->ili_fields;
-               iip->ili_fields = 0;
-               iip->ili_logged = 1;
+       iip->ili_last_fields = iip->ili_fields;
+       iip->ili_fields = 0;
+       iip->ili_logged = 1;
 
-               xfs_trans_ail_copy_lsn(mp->m_ail, &iip->ili_flush_lsn,
-                                       &iip->ili_item.li_lsn);
+       xfs_trans_ail_copy_lsn(mp->m_ail, &iip->ili_flush_lsn,
+                               &iip->ili_item.li_lsn);
 
-               /*
-                * Attach the function xfs_iflush_done to the inode's
-                * buffer.  This will remove the inode from the AIL
-                * and unlock the inode's flush lock when the inode is
-                * completely written to disk.
-                */
-               xfs_buf_attach_iodone(bp, xfs_iflush_done, &iip->ili_item);
+       /*
+        * Attach the function xfs_iflush_done to the inode's
+        * buffer.  This will remove the inode from the AIL
+        * and unlock the inode's flush lock when the inode is
+        * completely written to disk.
+        */
+       xfs_buf_attach_iodone(bp, xfs_iflush_done, &iip->ili_item);
 
-               ASSERT(bp->b_fspriv != NULL);
-               ASSERT(bp->b_iodone != NULL);
-       } else {
-               /*
-                * We're flushing an inode which is not in the AIL and has
-                * not been logged.  For this case we can immediately drop
-                * the inode flush lock because we can avoid the whole
-                * AIL state thing.  It's OK to drop the flush lock now,
-                * because we've already locked the buffer and to do anything
-                * you really need both.
-                */
-               if (iip != NULL) {
-                       ASSERT(iip->ili_logged == 0);
-                       ASSERT(iip->ili_last_fields == 0);
-                       ASSERT((iip->ili_item.li_flags & XFS_LI_IN_AIL) == 0);
-               }
-               xfs_ifunlock(ip);
-       }
+       /* update the lsn in the on disk inode if required */
+       if (ip->i_d.di_version == 3)
+               dip->di_lsn = cpu_to_be64(iip->ili_item.li_lsn);
+
+       /* generate the checksum. */
+       xfs_dinode_calc_crc(mp, dip);
 
+       ASSERT(bp->b_fspriv != NULL);
+       ASSERT(bp->b_iodone != NULL);
        return 0;
 
 corrupt_out: