prepend_path() needs to reinitialize dentry/vfsmount/mnt on restarts

[~andy/linux] / fs / namei.c
diff --git a/fs/namei.c b/fs/namei.c

index 2a5a7aa9f43ff8141c2cbb48a759ee13eb3e3279..e029a4cbff7db7b23af15628ca4d8c2cac5da491 100644 (file)
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -2258,10 +2258,11 @@ out:
   * path_mountpoint - look up a path to be umounted
   * @dfd:       directory file descriptor to start walk from
   * @name:      full pathname to walk
+ * @path:      pointer to container for result
   * @flags:     lookup flags
   *
   * Look up the given name, but don't attempt to revalidate the last component.
- * Returns 0 and "path" will be valid on success; Retuns error otherwise.
+ * Returns 0 and "path" will be valid on success; Returns error otherwise.
   */
  static int
  path_mountpoint(int dfd, const char *name, struct path *path, unsigned int flags)
@@ -3615,8 +3616,27 @@ SYSCALL_DEFINE1(rmdir, const char __user *, pathname)
         return do_rmdir(AT_FDCWD, pathname);
  }
  
-int vfs_unlink(struct inode *dir, struct dentry *dentry)
+/**
+ * vfs_unlink - unlink a filesystem object
+ * @dir:       parent directory
+ * @dentry:    victim
+ * @delegated_inode: returns victim inode, if the inode is delegated.
+ *
+ * The caller must hold dir->i_mutex.
+ *
+ * If vfs_unlink discovers a delegation, it will return -EWOULDBLOCK and
+ * return a reference to the inode in delegated_inode.  The caller
+ * should then break the delegation on that inode and retry.  Because
+ * breaking a delegation may take a long time, the caller should drop
+ * dir->i_mutex before doing so.
+ *
+ * Alternatively, a caller may pass NULL for delegated_inode.  This may
+ * be appropriate for callers that expect the underlying filesystem not
+ * to be NFS exported.
+ */
+int vfs_unlink(struct inode *dir, struct dentry *dentry, struct inode **delegated_inode)
  {
+       struct inode *target = dentry->d_inode;
         int error = may_delete(dir, dentry, 0);
  
         if (error)
@@ -3625,22 +3645,26 @@ int vfs_unlink(struct inode *dir, struct dentry *dentry)
         if (!dir->i_op->unlink)
                 return -EPERM;
  
-       mutex_lock(&dentry->d_inode->i_mutex);
+       mutex_lock(&target->i_mutex);
         if (d_mountpoint(dentry))
                 error = -EBUSY;
         else {
                 error = security_inode_unlink(dir, dentry);
                 if (!error) {
+                       error = try_break_deleg(target, delegated_inode);
+                       if (error)
+                               goto out;
                         error = dir->i_op->unlink(dir, dentry);
                         if (!error)
                                 dont_mount(dentry);
                 }
         }
-       mutex_unlock(&dentry->d_inode->i_mutex);
+out:
+       mutex_unlock(&target->i_mutex);
  
         /* We don't d_delete() NFS sillyrenamed files--they still exist. */
         if (!error && !(dentry->d_flags & DCACHE_NFSFS_RENAMED)) {
-               fsnotify_link_count(dentry->d_inode);
+               fsnotify_link_count(target);
                 d_delete(dentry);
         }
  
@@ -3660,6 +3684,7 @@ static long do_unlinkat(int dfd, const char __user *pathname)
         struct dentry *dentry;
         struct nameidata nd;
         struct inode *inode = NULL;
+       struct inode *delegated_inode = NULL;
         unsigned int lookup_flags = 0;
  retry:
         name = user_path_parent(dfd, pathname, &nd, lookup_flags);
@@ -3674,7 +3699,7 @@ retry:
         error = mnt_want_write(nd.path.mnt);
         if (error)
                 goto exit1;
-
+retry_deleg:
         mutex_lock_nested(&nd.path.dentry->d_inode->i_mutex, I_MUTEX_PARENT);
         dentry = lookup_hash(&nd);
         error = PTR_ERR(dentry);
@@ -3689,13 +3714,19 @@ retry:
                 error = security_path_unlink(&nd.path, dentry);
                 if (error)
                         goto exit2;
-               error = vfs_unlink(nd.path.dentry->d_inode, dentry);
+               error = vfs_unlink(nd.path.dentry->d_inode, dentry, &delegated_inode);
  exit2:
                 dput(dentry);
         }
         mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
         if (inode)
                 iput(inode);    /* truncate the inode here */
+       inode = NULL;
+       if (delegated_inode) {
+               error = break_deleg_wait(&delegated_inode);
+               if (!error)
+                       goto retry_deleg;
+       }
         mnt_drop_write(nd.path.mnt);
  exit1:
         path_put(&nd.path);
@@ -3789,7 +3820,26 @@ SYSCALL_DEFINE2(symlink, const char __user *, oldname, const char __user *, newn
         return sys_symlinkat(oldname, AT_FDCWD, newname);
  }
  
-int vfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_dentry)
+/**
+ * vfs_link - create a new link
+ * @old_dentry:        object to be linked
+ * @dir:       new parent
+ * @new_dentry:        where to create the new link
+ * @delegated_inode: returns inode needing a delegation break
+ *
+ * The caller must hold dir->i_mutex
+ *
+ * If vfs_link discovers a delegation on the to-be-linked file in need
+ * of breaking, it will return -EWOULDBLOCK and return a reference to the
+ * inode in delegated_inode.  The caller should then break the delegation
+ * and retry.  Because breaking a delegation may take a long time, the
+ * caller should drop the i_mutex before doing so.
+ *
+ * Alternatively, a caller may pass NULL for delegated_inode.  This may
+ * be appropriate for callers that expect the underlying filesystem not
+ * to be NFS exported.
+ */
+int vfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_dentry, struct inode **delegated_inode)
  {
         struct inode *inode = old_dentry->d_inode;
         unsigned max_links = dir->i_sb->s_max_links;
@@ -3825,8 +3875,11 @@ int vfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_de
                 error =  -ENOENT;
         else if (max_links && inode->i_nlink >= max_links)
                 error = -EMLINK;
-       else
-               error = dir->i_op->link(old_dentry, dir, new_dentry);
+       else {
+               error = try_break_deleg(inode, delegated_inode);
+               if (!error)
+                       error = dir->i_op->link(old_dentry, dir, new_dentry);
+       }
  
         if (!error && (inode->i_state & I_LINKABLE)) {
                 spin_lock(&inode->i_lock);
@@ -3853,6 +3906,7 @@ SYSCALL_DEFINE5(linkat, int, olddfd, const char __user *, oldname,
  {
         struct dentry *new_dentry;
         struct path old_path, new_path;
+       struct inode *delegated_inode = NULL;
         int how = 0;
         int error;
  
@@ -3891,9 +3945,14 @@ retry:
         error = security_path_link(old_path.dentry, &new_path, new_dentry);
         if (error)
                 goto out_dput;
-       error = vfs_link(old_path.dentry, new_path.dentry->d_inode, new_dentry);
+       error = vfs_link(old_path.dentry, new_path.dentry->d_inode, new_dentry, &delegated_inode);
  out_dput:
         done_path_create(&new_path, new_dentry);
+       if (delegated_inode) {
+               error = break_deleg_wait(&delegated_inode);
+               if (!error)
+                       goto retry;
+       }
         if (retry_estale(error, how)) {
                 how |= LOOKUP_REVAL;
                 goto retry;
@@ -3918,7 +3977,8 @@ SYSCALL_DEFINE2(link, const char __user *, oldname, const char __user *, newname
   *        That's where 4.4 screws up. Current fix: serialization on
   *        sb->s_vfs_rename_mutex. We might be more accurate, but that's another
   *        story.
- *     c) we have to lock _three_ objects - parents and victim (if it exists).
+ *     c) we have to lock _four_ objects - parents and victim (if it exists),
+ *        and source (if it is not a directory).
   *        And that - after we got ->i_mutex on parents (until then we don't know
   *        whether the target exists).  Solution: try to be smart with locking
   *        order for inodes.  We rely on the fact that tree topology may change
@@ -3991,9 +4051,11 @@ out:
  }
  
  static int vfs_rename_other(struct inode *old_dir, struct dentry *old_dentry,
-                           struct inode *new_dir, struct dentry *new_dentry)
+                           struct inode *new_dir, struct dentry *new_dentry,
+                           struct inode **delegated_inode)
  {
         struct inode *target = new_dentry->d_inode;
+       struct inode *source = old_dentry->d_inode;
         int error;
  
         error = security_inode_rename(old_dir, old_dentry, new_dir, new_dentry);
@@ -4001,13 +4063,20 @@ static int vfs_rename_other(struct inode *old_dir, struct dentry *old_dentry,
                 return error;
  
         dget(new_dentry);
-       if (target)
-               mutex_lock(&target->i_mutex);
+       lock_two_nondirectories(source, target);
  
         error = -EBUSY;
         if (d_mountpoint(old_dentry)||d_mountpoint(new_dentry))
                 goto out;
  
+       error = try_break_deleg(source, delegated_inode);
+       if (error)
+               goto out;
+       if (target) {
+               error = try_break_deleg(target, delegated_inode);
+               if (error)
+                       goto out;
+       }
         error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry);
         if (error)
                 goto out;
@@ -4017,14 +4086,35 @@ static int vfs_rename_other(struct inode *old_dir, struct dentry *old_dentry,
         if (!(old_dir->i_sb->s_type->fs_flags & FS_RENAME_DOES_D_MOVE))
                 d_move(old_dentry, new_dentry);
  out:
-       if (target)
-               mutex_unlock(&target->i_mutex);
+       unlock_two_nondirectories(source, target);
         dput(new_dentry);
         return error;
  }
  
+/**
+ * vfs_rename - rename a filesystem object
+ * @old_dir:   parent of source
+ * @old_dentry:        source
+ * @new_dir:   parent of destination
+ * @new_dentry:        destination
+ * @delegated_inode: returns an inode needing a delegation break
+ *
+ * The caller must hold multiple mutexes--see lock_rename()).
+ *
+ * If vfs_rename discovers a delegation in need of breaking at either
+ * the source or destination, it will return -EWOULDBLOCK and return a
+ * reference to the inode in delegated_inode.  The caller should then
+ * break the delegation and retry.  Because breaking a delegation may
+ * take a long time, the caller should drop all locks before doing
+ * so.
+ *
+ * Alternatively, a caller may pass NULL for delegated_inode.  This may
+ * be appropriate for callers that expect the underlying filesystem not
+ * to be NFS exported.
+ */
  int vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
-              struct inode *new_dir, struct dentry *new_dentry)
+              struct inode *new_dir, struct dentry *new_dentry,
+              struct inode **delegated_inode)
  {
         int error;
         int is_dir = d_is_directory(old_dentry) || d_is_autodir(old_dentry);
@@ -4052,7 +4142,7 @@ int vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
         if (is_dir)
                 error = vfs_rename_dir(old_dir,old_dentry,new_dir,new_dentry);
         else
-               error = vfs_rename_other(old_dir,old_dentry,new_dir,new_dentry);
+               error = vfs_rename_other(old_dir,old_dentry,new_dir,new_dentry,delegated_inode);
         if (!error)
                 fsnotify_move(old_dir, new_dir, old_name, is_dir,
                               new_dentry->d_inode, old_dentry);
@@ -4068,6 +4158,7 @@ SYSCALL_DEFINE4(renameat, int, olddfd, const char __user *, oldname,
         struct dentry *old_dentry, *new_dentry;
         struct dentry *trap;
         struct nameidata oldnd, newnd;
+       struct inode *delegated_inode = NULL;
         struct filename *from;
         struct filename *to;
         unsigned int lookup_flags = 0;
@@ -4107,6 +4198,7 @@ retry:
         newnd.flags &= ~LOOKUP_PARENT;
         newnd.flags |= LOOKUP_RENAME_TARGET;
  
+retry_deleg:
         trap = lock_rename(new_dir, old_dir);
  
         old_dentry = lookup_hash(&oldnd);
@@ -4143,13 +4235,19 @@ retry:
         if (error)
                 goto exit5;
         error = vfs_rename(old_dir->d_inode, old_dentry,
-                                  new_dir->d_inode, new_dentry);
+                                  new_dir->d_inode, new_dentry,
+                                  &delegated_inode);
  exit5:
         dput(new_dentry);
  exit4:
         dput(old_dentry);
  exit3:
         unlock_rename(new_dir, old_dir);
+       if (delegated_inode) {
+               error = break_deleg_wait(&delegated_inode);
+               if (!error)
+                       goto retry_deleg;
+       }
         mnt_drop_write(oldnd.path.mnt);
  exit2:
         if (retry_estale(error, lookup_flags))