Merge branch 'for-3.3' of git://linux-nfs.org/~bfields/linux

author Linus Torvalds <torvalds@linux-foundation.org>

Sat, 14 Jan 2012 20:26:41 +0000 (12:26 -0800)

committer Linus Torvalds <torvalds@linux-foundation.org>

Sat, 14 Jan 2012 20:26:41 +0000 (12:26 -0800)
author Linus Torvalds <torvalds@linux-foundation.org>
Sat, 14 Jan 2012 20:26:41 +0000 (12:26 -0800)
committer Linus Torvalds <torvalds@linux-foundation.org>
Sat, 14 Jan 2012 20:26:41 +0000 (12:26 -0800)
diff --git a/CREDITS b/CREDITS

index 44fce988eaac8cd22bfe5a5e753ae1bb58b3476d..370b4c7da39b2bd45ff098dc0b8062255e478ecb 100644 (file)
--- a/CREDITS
+++ b/CREDITS
@@ -514,6 +514,11 @@ S: Bessemerstraat 21
  S: Amsterdam
  S: The Netherlands
  
+N: NeilBrown
+E: neil@brown.name
+P: 4096R/566281B9 1BC6 29EB D390 D870 7B5F  497A 39EC 9EDD 5662 81B9
+D: NFSD Maintainer 2000-2007
+
  N: Zach Brown
  E: zab@zabbo.net
  D: maestro pci sound
diff --git a/Documentation/filesystems/nfs/00-INDEX b/Documentation/filesystems/nfs/00-INDEX

index a57e12411d2a2570511df65500989110121e476e..1716874a651e1c574e7ca9719dfb4e3521b0a5e9 100644 (file)
--- a/Documentation/filesystems/nfs/00-INDEX
+++ b/Documentation/filesystems/nfs/00-INDEX
@@ -2,6 +2,8 @@
         - this file (nfs-related documentation).
  Exporting
         - explanation of how to make filesystems exportable.
+fault_injection.txt
+       - information for using fault injection on the server
  knfsd-stats.txt
         - statistics which the NFS server makes available to user space.
  nfs.txt
diff --git a/Documentation/filesystems/nfs/fault_injection.txt b/Documentation/filesystems/nfs/fault_injection.txt

new file mode 100644 (file)

index 0000000..426d166
--- /dev/null
+++ b/Documentation/filesystems/nfs/fault_injection.txt
@@ -0,0 +1,69 @@
+
+Fault Injection
+===============
+Fault injection is a method for forcing errors that may not normally occur, or
+may be difficult to reproduce.  Forcing these errors in a controlled environment
+can help the developer find and fix bugs before their code is shipped in a
+production system.  Injecting an error on the Linux NFS server will allow us to
+observe how the client reacts and if it manages to recover its state correctly.
+
+NFSD_FAULT_INJECTION must be selected when configuring the kernel to use this
+feature.
+
+
+Using Fault Injection
+=====================
+On the client, mount the fault injection server through NFS v4.0+ and do some
+work over NFS (open files, take locks, ...).
+
+On the server, mount the debugfs filesystem to <debug_dir> and ls
+<debug_dir>/nfsd.  This will show a list of files that will be used for
+injecting faults on the NFS server.  As root, write a number n to the file
+corresponding to the action you want the server to take.  The server will then
+process the first n items it finds.  So if you want to forget 5 locks, echo '5'
+to <debug_dir>/nfsd/forget_locks.  A value of 0 will tell the server to forget
+all corresponding items.  A log message will be created containing the number
+of items forgotten (check dmesg).
+
+Go back to work on the client and check if the client recovered from the error
+correctly.
+
+
+Available Faults
+================
+forget_clients:
+     The NFS server keeps a list of clients that have placed a mount call.  If
+     this list is cleared, the server will have no knowledge of who the client
+     is, forcing the client to reauthenticate with the server.
+
+forget_openowners:
+     The NFS server keeps a list of what files are currently opened and who
+     they were opened by.  Clearing this list will force the client to reopen
+     its files.
+
+forget_locks:
+     The NFS server keeps a list of what files are currently locked in the VFS.
+     Clearing this list will force the client to reclaim its locks (files are
+     unlocked through the VFS as they are cleared from this list).
+
+forget_delegations:
+     A delegation is used to assure the client that a file, or part of a file,
+     has not changed since the delegation was awarded.  Clearing this list will
+     force the client to reaquire its delegation before accessing the file
+     again.
+
+recall_delegations:
+     Delegations can be recalled by the server when another client attempts to
+     access a file.  This test will notify the client that its delegation has
+     been revoked, forcing the client to reaquire the delegation before using
+     the file again.
+
+
+tools/nfs/inject_faults.sh script
+=================================
+This script has been created to ease the fault injection process.  This script
+will detect the mounted debugfs directory and write to the files located there
+based on the arguments passed by the user.  For example, running
+`inject_faults.sh forget_locks 1` as root will instruct the server to forget
+one lock.  Running `inject_faults forget_locks` will instruct the server to
+forgetall locks.
diff --git a/MAINTAINERS b/MAINTAINERS

index 7559c1ca56ba7afc11104e430c20417d905654f9..4d1ba2022a95e29d75cc0a9fb18a6f24558deac0 100644 (file)
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -3775,7 +3775,6 @@ S:        Odd Fixes
  
  KERNEL NFSD, SUNRPC, AND LOCKD SERVERS
  M:     "J. Bruce Fields" <bfields@fieldses.org>
-M:     Neil Brown <neilb@suse.de>
  L:     linux-nfs@vger.kernel.org
  W:     http://nfs.sourceforge.net/
  S:     Supported
diff --git a/fs/nfsd/Kconfig b/fs/nfsd/Kconfig

index 10e6366608f284d71eea25717392c275f18274ed..8df1ea4a6ff988b5aeda534c5310c23b048ebde8 100644 (file)
--- a/fs/nfsd/Kconfig
+++ b/fs/nfsd/Kconfig
@@ -80,3 +80,13 @@ config NFSD_V4
           available from http://linux-nfs.org/.
  
           If unsure, say N.
+
+config NFSD_FAULT_INJECTION
+       bool "NFS server manual fault injection"
+       depends on NFSD_V4 && DEBUG_KERNEL
+       help
+         This option enables support for manually injecting faults
+         into the NFS server.  This is intended to be used for
+         testing error recovery on the NFS client.
+
+         If unsure, say N.
diff --git a/fs/nfsd/Makefile b/fs/nfsd/Makefile

index 9b118ee2019371e151b69089061b67c3722b52c6..af32ef06b4fec76f0528b3deaee25aaf28837be8 100644 (file)
--- a/fs/nfsd/Makefile
+++ b/fs/nfsd/Makefile
@@ -6,6 +6,7 @@ obj-$(CONFIG_NFSD)      += nfsd.o
  
  nfsd-y                         := nfssvc.o nfsctl.o nfsproc.o nfsfh.o vfs.o \
                            export.o auth.o lockd.o nfscache.o nfsxdr.o stats.o
+nfsd-$(CONFIG_NFSD_FAULT_INJECTION) += fault_inject.o
  nfsd-$(CONFIG_NFSD_V2_ACL) += nfs2acl.o
  nfsd-$(CONFIG_NFSD_V3) += nfs3proc.o nfs3xdr.o
  nfsd-$(CONFIG_NFSD_V3_ACL) += nfs3acl.o
diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c

index 62f3b9074e8490897af08811ec184cacc9ad554b..cf8a6bd062fa9dc4eccdf3ecc464d83b2a8a6366 100644 (file)
--- a/fs/nfsd/export.c
+++ b/fs/nfsd/export.c
@@ -87,7 +87,7 @@ static int expkey_parse(struct cache_detail *cd, char *mesg, int mlen)
         struct svc_expkey key;
         struct svc_expkey *ek = NULL;
  
-       if (mesg[mlen-1] != '\n')
+       if (mlen < 1 || mesg[mlen-1] != '\n')
                 return -EINVAL;
         mesg[mlen-1] = 0;
  
@@ -1226,12 +1226,12 @@ nfsd_export_init(void)
         int rv;
         dprintk("nfsd: initializing export module.\n");
  
-       rv = cache_register(&svc_export_cache);
+       rv = cache_register_net(&svc_export_cache, &init_net);
         if (rv)
                 return rv;
-       rv = cache_register(&svc_expkey_cache);
+       rv = cache_register_net(&svc_expkey_cache, &init_net);
         if (rv)
-               cache_unregister(&svc_export_cache);
+               cache_unregister_net(&svc_export_cache, &init_net);
         return rv;
  
  }
@@ -1255,8 +1255,8 @@ nfsd_export_shutdown(void)
  
         dprintk("nfsd: shutting down export module.\n");
  
-       cache_unregister(&svc_expkey_cache);
-       cache_unregister(&svc_export_cache);
+       cache_unregister_net(&svc_expkey_cache, &init_net);
+       cache_unregister_net(&svc_export_cache, &init_net);
         svcauth_unix_purge();
  
         dprintk("nfsd: export shutdown complete.\n");
diff --git a/fs/nfsd/fault_inject.c b/fs/nfsd/fault_inject.c

new file mode 100644 (file)

index 0000000..ce7f075
--- /dev/null
+++ b/fs/nfsd/fault_inject.c
@@ -0,0 +1,91 @@
+/*
+ * Copyright (c) 2011 Bryan Schumaker <bjschuma@netapp.com>
+ *
+ * Uses debugfs to create fault injection points for client testing
+ */
+
+#include <linux/types.h>
+#include <linux/fs.h>
+#include <linux/debugfs.h>
+#include <linux/module.h>
+
+#include "state.h"
+#include "fault_inject.h"
+
+struct nfsd_fault_inject_op {
+       char *file;
+       void (*func)(u64);
+};
+
+static struct nfsd_fault_inject_op inject_ops[] = {
+       {
+               .file   = "forget_clients",
+               .func   = nfsd_forget_clients,
+       },
+       {
+               .file   = "forget_locks",
+               .func   = nfsd_forget_locks,
+       },
+       {
+               .file   = "forget_openowners",
+               .func   = nfsd_forget_openowners,
+       },
+       {
+               .file   = "forget_delegations",
+               .func   = nfsd_forget_delegations,
+       },
+       {
+               .file   = "recall_delegations",
+               .func   = nfsd_recall_delegations,
+       },
+};
+
+static long int NUM_INJECT_OPS = sizeof(inject_ops) / sizeof(struct nfsd_fault_inject_op);
+static struct dentry *debug_dir;
+
+static int nfsd_inject_set(void *op_ptr, u64 val)
+{
+       struct nfsd_fault_inject_op *op = op_ptr;
+
+       if (val == 0)
+               printk(KERN_INFO "NFSD Fault Injection: %s (all)", op->file);
+       else
+               printk(KERN_INFO "NFSD Fault Injection: %s (n = %llu)", op->file, val);
+
+       op->func(val);
+       return 0;
+}
+
+static int nfsd_inject_get(void *data, u64 *val)
+{
+       return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(fops_nfsd, nfsd_inject_get, nfsd_inject_set, "%llu\n");
+
+void nfsd_fault_inject_cleanup(void)
+{
+       debugfs_remove_recursive(debug_dir);
+}
+
+int nfsd_fault_inject_init(void)
+{
+       unsigned int i;
+       struct nfsd_fault_inject_op *op;
+       mode_t mode = S_IFREG | S_IRUSR | S_IWUSR;
+
+       debug_dir = debugfs_create_dir("nfsd", NULL);
+       if (!debug_dir)
+               goto fail;
+
+       for (i = 0; i < NUM_INJECT_OPS; i++) {
+               op = &inject_ops[i];
+               if (!debugfs_create_file(op->file, mode, debug_dir, op, &fops_nfsd))
+                       goto fail;
+       }
+       return 0;
+
+fail:
+       nfsd_fault_inject_cleanup();
+       return -ENOMEM;
+}
diff --git a/fs/nfsd/fault_inject.h b/fs/nfsd/fault_inject.h

new file mode 100644 (file)

index 0000000..90bd057
--- /dev/null
+++ b/fs/nfsd/fault_inject.h
@@ -0,0 +1,28 @@
+/*
+ * Copyright (c) 2011 Bryan Schumaker <bjschuma@netapp.com>
+ *
+ * Function definitions for fault injection
+ */
+
+#ifndef LINUX_NFSD_FAULT_INJECT_H
+#define LINUX_NFSD_FAULT_INJECT_H
+
+#ifdef CONFIG_NFSD_FAULT_INJECTION
+int nfsd_fault_inject_init(void);
+void nfsd_fault_inject_cleanup(void);
+void nfsd_forget_clients(u64);
+void nfsd_forget_locks(u64);
+void nfsd_forget_openowners(u64);
+void nfsd_forget_delegations(u64);
+void nfsd_recall_delegations(u64);
+#else /* CONFIG_NFSD_FAULT_INJECTION */
+static inline int nfsd_fault_inject_init(void) { return 0; }
+static inline void nfsd_fault_inject_cleanup(void) {}
+static inline void nfsd_forget_clients(u64 num) {}
+static inline void nfsd_forget_locks(u64 num) {}
+static inline void nfsd_forget_openowners(u64 num) {}
+static inline void nfsd_forget_delegations(u64 num) {}
+static inline void nfsd_recall_delegations(u64 num) {}
+#endif /* CONFIG_NFSD_FAULT_INJECTION */
+
+#endif /* LINUX_NFSD_FAULT_INJECT_H */
diff --git a/fs/nfsd/nfs4idmap.c b/fs/nfsd/nfs4idmap.c

index 55780a22fdbdcc02b0dcfb05bfadb3dc2417347e..94096273cd6cb37b17aa727b55d62a8c8a75a13d 100644 (file)
--- a/fs/nfsd/nfs4idmap.c
+++ b/fs/nfsd/nfs4idmap.c
@@ -36,6 +36,7 @@
  #include <linux/seq_file.h>
  #include <linux/sched.h>
  #include <linux/slab.h>
+#include <net/net_namespace.h>
  #include "idmap.h"
  #include "nfsd.h"
  
@@ -466,20 +467,20 @@ nfsd_idmap_init(void)
  {
         int rv;
  
-       rv = cache_register(&idtoname_cache);
+       rv = cache_register_net(&idtoname_cache, &init_net);
         if (rv)
                 return rv;
-       rv = cache_register(&nametoid_cache);
+       rv = cache_register_net(&nametoid_cache, &init_net);
         if (rv)
-               cache_unregister(&idtoname_cache);
+               cache_unregister_net(&idtoname_cache, &init_net);
         return rv;
  }
  
  void
  nfsd_idmap_shutdown(void)
  {
-       cache_unregister(&idtoname_cache);
-       cache_unregister(&nametoid_cache);
+       cache_unregister_net(&idtoname_cache, &init_net);
+       cache_unregister_net(&nametoid_cache, &init_net);
  }
  
  static int
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c

index c5e28ed8bca07a7fa4ab4e624cd05648d130cae2..896da74ec5634fd92739e0284f5d0768c8f12437 100644 (file)
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -266,10 +266,6 @@ do_open_fhandle(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_
  {
         __be32 status;
  
-       /* Only reclaims from previously confirmed clients are valid */
-       if ((status = nfs4_check_open_reclaim(&open->op_clientid)))
-               return status;
-
         /* We don't know the target directory, and therefore can not
         * set the change info
         */
@@ -373,6 +369,9 @@ nfsd4_open(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
                         break;
                 case NFS4_OPEN_CLAIM_PREVIOUS:
                         open->op_openowner->oo_flags |= NFS4_OO_CONFIRMED;
+                       status = nfs4_check_open_reclaim(&open->op_clientid);
+                       if (status)
+                               goto out;
                 case NFS4_OPEN_CLAIM_FH:
                 case NFS4_OPEN_CLAIM_DELEG_CUR_FH:
                         status = do_open_fhandle(rqstp, &cstate->current_fh,
diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c

index 80a0be9ed008a6db484d81303379d0a599c5dad7..0b3e875d1abd5e4cd962d5ed5df29f22c7ca546d 100644 (file)
--- a/fs/nfsd/nfs4recover.c
+++ b/fs/nfsd/nfs4recover.c
@@ -117,8 +117,7 @@ out_no_tfm:
         return status;
  }
  
-int
-nfsd4_create_clid_dir(struct nfs4_client *clp)
+void nfsd4_create_clid_dir(struct nfs4_client *clp)
  {
         const struct cred *original_cred;
         char *dname = clp->cl_recdir;
@@ -127,13 +126,14 @@ nfsd4_create_clid_dir(struct nfs4_client *clp)
  
         dprintk("NFSD: nfsd4_create_clid_dir for \"%s\"\n", dname);
  
-       if (!rec_file || clp->cl_firststate)
-               return 0;
-
+       if (clp->cl_firststate)
+               return;
         clp->cl_firststate = 1;
+       if (!rec_file)
+               return;
         status = nfs4_save_creds(&original_cred);
         if (status < 0)
-               return status;
+               return;
  
         dir = rec_file->f_path.dentry;
         /* lock the parent */
@@ -144,8 +144,15 @@ nfsd4_create_clid_dir(struct nfs4_client *clp)
                 status = PTR_ERR(dentry);
                 goto out_unlock;
         }
-       status = -EEXIST;
         if (dentry->d_inode)
+               /*
+                * In the 4.1 case, where we're called from
+                * reclaim_complete(), records from the previous reboot
+                * may still be left, so this is OK.
+                *
+                * In the 4.0 case, we should never get here; but we may
+                * as well be forgiving and just succeed silently.
+                */
                 goto out_put;
         status = mnt_want_write_file(rec_file);
         if (status)
@@ -164,7 +171,6 @@ out_unlock:
                                 " and is writeable", status,
                                 user_recovery_dirname);
         nfs4_reset_creds(original_cred);
-       return status;
  }
  
  typedef int (recdir_func)(struct dentry *, struct dentry *);
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c

index 9ca16dc09e04878bd89bf4e471e5364f719d3aef..e8c98f0096706c04e70456c35af2f8123241af7a 100644 (file)
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -49,12 +49,20 @@
  time_t nfsd4_lease = 90;     /* default lease time */
  time_t nfsd4_grace = 90;
  static time_t boot_time;
-static stateid_t zerostateid;             /* bits all 0 */
-static stateid_t onestateid;              /* bits all 1 */
+
+#define all_ones {{~0,~0},~0}
+static const stateid_t one_stateid = {
+       .si_generation = ~0,
+       .si_opaque = all_ones,
+};
+static const stateid_t zero_stateid = {
+       /* all fields zero */
+};
+
  static u64 current_sessionid = 1;
  
-#define ZERO_STATEID(stateid) (!memcmp((stateid), &zerostateid, sizeof(stateid_t)))
-#define ONE_STATEID(stateid)  (!memcmp((stateid), &onestateid, sizeof(stateid_t)))
+#define ZERO_STATEID(stateid) (!memcmp((stateid), &zero_stateid, sizeof(stateid_t)))
+#define ONE_STATEID(stateid)  (!memcmp((stateid), &one_stateid, sizeof(stateid_t)))
  
  /* forward declarations */
  static int check_for_locks(struct nfs4_file *filp, struct nfs4_lockowner *lowner);
@@ -133,21 +141,21 @@ unsigned int max_delegations;
   * Open owner state (share locks)
   */
  
-/* hash tables for open owners */
-#define OPEN_OWNER_HASH_BITS              8
-#define OPEN_OWNER_HASH_SIZE             (1 << OPEN_OWNER_HASH_BITS)
-#define OPEN_OWNER_HASH_MASK             (OPEN_OWNER_HASH_SIZE - 1)
+/* hash tables for lock and open owners */
+#define OWNER_HASH_BITS              8
+#define OWNER_HASH_SIZE             (1 << OWNER_HASH_BITS)
+#define OWNER_HASH_MASK             (OWNER_HASH_SIZE - 1)
  
-static unsigned int open_ownerstr_hashval(u32 clientid, struct xdr_netobj *ownername)
+static unsigned int ownerstr_hashval(u32 clientid, struct xdr_netobj *ownername)
  {
         unsigned int ret;
  
         ret = opaque_hashval(ownername->data, ownername->len);
         ret += clientid;
-       return ret & OPEN_OWNER_HASH_MASK;
+       return ret & OWNER_HASH_MASK;
  }
  
-static struct list_head        open_ownerstr_hashtbl[OPEN_OWNER_HASH_SIZE];
+static struct list_head        ownerstr_hashtbl[OWNER_HASH_SIZE];
  
  /* hash table for nfs4_file */
  #define FILE_HASH_BITS                   8
@@ -514,6 +522,7 @@ static void unhash_lockowner(struct nfs4_lockowner *lo)
  
         list_del(&lo->lo_owner.so_strhash);
         list_del(&lo->lo_perstateid);
+       list_del(&lo->lo_owner_ino_hash);
         while (!list_empty(&lo->lo_owner.so_stateids)) {
                 stp = list_first_entry(&lo->lo_owner.so_stateids,
                                 struct nfs4_ol_stateid, st_perstateowner);
@@ -985,12 +994,11 @@ static struct nfs4_client *alloc_client(struct xdr_netobj name)
         clp = kzalloc(sizeof(struct nfs4_client), GFP_KERNEL);
         if (clp == NULL)
                 return NULL;
-       clp->cl_name.data = kmalloc(name.len, GFP_KERNEL);
+       clp->cl_name.data = kmemdup(name.data, name.len, GFP_KERNEL);
         if (clp->cl_name.data == NULL) {
                 kfree(clp);
                 return NULL;
         }
-       memcpy(clp->cl_name.data, name.data, name.len);
         clp->cl_name.len = name.len;
         return clp;
  }
@@ -1058,7 +1066,6 @@ expire_client(struct nfs4_client *clp)
         spin_unlock(&recall_lock);
         while (!list_empty(&reaplist)) {
                 dp = list_entry(reaplist.next, struct nfs4_delegation, dl_recall_lru);
-               list_del_init(&dp->dl_recall_lru);
                 unhash_delegation(dp);
         }
         while (!list_empty(&clp->cl_openowners)) {
@@ -2301,7 +2308,7 @@ nfsd4_free_slabs(void)
         nfsd4_free_slab(&deleg_slab);
  }
  
-static int
+int
  nfsd4_init_slabs(void)
  {
         openowner_slab = kmem_cache_create("nfsd4_openowners",
@@ -2373,7 +2380,7 @@ static inline void *alloc_stateowner(struct kmem_cache *slab, struct xdr_netobj
  
  static void hash_openowner(struct nfs4_openowner *oo, struct nfs4_client *clp, unsigned int strhashval)
  {
-       list_add(&oo->oo_owner.so_strhash, &open_ownerstr_hashtbl[strhashval]);
+       list_add(&oo->oo_owner.so_strhash, &ownerstr_hashtbl[strhashval]);
         list_add(&oo->oo_perclient, &clp->cl_openowners);
  }
  
@@ -2436,7 +2443,9 @@ find_openstateowner_str(unsigned int hashval, struct nfsd4_open *open)
         struct nfs4_stateowner *so;
         struct nfs4_openowner *oo;
  
-       list_for_each_entry(so, &open_ownerstr_hashtbl[hashval], so_strhash) {
+       list_for_each_entry(so, &ownerstr_hashtbl[hashval], so_strhash) {
+               if (!so->so_is_open_owner)
+                       continue;
                 if (same_owner_str(so, &open->op_owner, &open->op_clientid)) {
                         oo = openowner(so);
                         renew_client(oo->oo_owner.so_client);
@@ -2580,7 +2589,7 @@ nfsd4_process_open1(struct nfsd4_compound_state *cstate,
         if (open->op_file == NULL)
                 return nfserr_jukebox;
  
-       strhashval = open_ownerstr_hashval(clientid->cl_id, &open->op_owner);
+       strhashval = ownerstr_hashval(clientid->cl_id, &open->op_owner);
         oo = find_openstateowner_str(strhashval, open);
         open->op_openowner = oo;
         if (!oo) {
@@ -3123,7 +3132,6 @@ nfs4_laundromat(void)
         spin_unlock(&recall_lock);
         list_for_each_safe(pos, next, &reaplist) {
                 dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru);
-               list_del_init(&dp->dl_recall_lru);
                 unhash_delegation(dp);
         }
         test_val = nfsd4_lease;
@@ -3718,13 +3726,11 @@ out:
  }
  
  
-/* 
- * Lock owner state (byte-range locks)
- */
  #define LOFF_OVERFLOW(start, len)      ((u64)(len) > ~(u64)(start))
-#define LOCK_HASH_BITS              8
-#define LOCK_HASH_SIZE             (1 << LOCK_HASH_BITS)
-#define LOCK_HASH_MASK             (LOCK_HASH_SIZE - 1)
+
+#define LOCKOWNER_INO_HASH_BITS 8
+#define LOCKOWNER_INO_HASH_SIZE (1 << LOCKOWNER_INO_HASH_BITS)
+#define LOCKOWNER_INO_HASH_MASK (LOCKOWNER_INO_HASH_SIZE - 1)
  
  static inline u64
  end_offset(u64 start, u64 len)
@@ -3746,16 +3752,14 @@ last_byte_offset(u64 start, u64 len)
         return end > start ? end - 1: NFS4_MAX_UINT64;
  }
  
-static inline unsigned int
-lock_ownerstr_hashval(struct inode *inode, u32 cl_id,
-               struct xdr_netobj *ownername)
+static unsigned int lockowner_ino_hashval(struct inode *inode, u32 cl_id, struct xdr_netobj *ownername)
  {
         return (file_hashval(inode) + cl_id
                         + opaque_hashval(ownername->data, ownername->len))
-               & LOCK_HASH_MASK;
+               & LOCKOWNER_INO_HASH_MASK;
  }
  
-static struct list_head        lock_ownerstr_hashtbl[LOCK_HASH_SIZE];
+static struct list_head lockowner_ino_hashtbl[LOCKOWNER_INO_HASH_SIZE];
  
  /*
   * TODO: Linux file offsets are _signed_ 64-bit quantities, which means that
@@ -3809,23 +3813,39 @@ nevermind:
                 deny->ld_type = NFS4_WRITE_LT;
  }
  
+static bool same_lockowner_ino(struct nfs4_lockowner *lo, struct inode *inode, clientid_t *clid, struct xdr_netobj *owner)
+{
+       struct nfs4_ol_stateid *lst;
+
+       if (!same_owner_str(&lo->lo_owner, owner, clid))
+               return false;
+       lst = list_first_entry(&lo->lo_owner.so_stateids,
+                              struct nfs4_ol_stateid, st_perstateowner);
+       return lst->st_file->fi_inode == inode;
+}
+
  static struct nfs4_lockowner *
  find_lockowner_str(struct inode *inode, clientid_t *clid,
                 struct xdr_netobj *owner)
  {
-       unsigned int hashval = lock_ownerstr_hashval(inode, clid->cl_id, owner);
-       struct nfs4_stateowner *op;
+       unsigned int hashval = lockowner_ino_hashval(inode, clid->cl_id, owner);
+       struct nfs4_lockowner *lo;
  
-       list_for_each_entry(op, &lock_ownerstr_hashtbl[hashval], so_strhash) {
-               if (same_owner_str(op, owner, clid))
-                       return lockowner(op);
+       list_for_each_entry(lo, &lockowner_ino_hashtbl[hashval], lo_owner_ino_hash) {
+               if (same_lockowner_ino(lo, inode, clid, owner))
+                       return lo;
         }
         return NULL;
  }
  
  static void hash_lockowner(struct nfs4_lockowner *lo, unsigned int strhashval, struct nfs4_client *clp, struct nfs4_ol_stateid *open_stp)
  {
-       list_add(&lo->lo_owner.so_strhash, &lock_ownerstr_hashtbl[strhashval]);
+       struct inode *inode = open_stp->st_file->fi_inode;
+       unsigned int inohash = lockowner_ino_hashval(inode,
+                       clp->cl_clientid.cl_id, &lo->lo_owner.so_owner);
+
+       list_add(&lo->lo_owner.so_strhash, &ownerstr_hashtbl[strhashval]);
+       list_add(&lo->lo_owner_ino_hash, &lockowner_ino_hashtbl[inohash]);
         list_add(&lo->lo_perstateid, &open_stp->st_lockowners);
  }
  
@@ -3834,7 +3854,7 @@ static void hash_lockowner(struct nfs4_lockowner *lo, unsigned int strhashval, s
   * Called in nfsd4_lock - therefore, OPEN and OPEN_CONFIRM (if needed) has 
   * occurred. 
   *
- * strhashval = lock_ownerstr_hashval 
+ * strhashval = ownerstr_hashval
   */
  
  static struct nfs4_lockowner *
@@ -3892,6 +3912,37 @@ static void get_lock_access(struct nfs4_ol_stateid *lock_stp, u32 access)
         __set_bit(access, &lock_stp->st_access_bmap);
  }
  
+__be32 lookup_or_create_lock_state(struct nfsd4_compound_state *cstate, struct nfs4_ol_stateid *ost, struct nfsd4_lock *lock, struct nfs4_ol_stateid **lst, bool *new)
+{
+       struct nfs4_file *fi = ost->st_file;
+       struct nfs4_openowner *oo = openowner(ost->st_stateowner);
+       struct nfs4_client *cl = oo->oo_owner.so_client;
+       struct nfs4_lockowner *lo;
+       unsigned int strhashval;
+
+       lo = find_lockowner_str(fi->fi_inode, &cl->cl_clientid, &lock->v.new.owner);
+       if (lo) {
+               if (!cstate->minorversion)
+                       return nfserr_bad_seqid;
+               /* XXX: a lockowner always has exactly one stateid: */
+               *lst = list_first_entry(&lo->lo_owner.so_stateids,
+                               struct nfs4_ol_stateid, st_perstateowner);
+               return nfs_ok;
+       }
+       strhashval = ownerstr_hashval(cl->cl_clientid.cl_id,
+                       &lock->v.new.owner);
+       lo = alloc_init_lock_stateowner(strhashval, cl, ost, lock);
+       if (lo == NULL)
+               return nfserr_jukebox;
+       *lst = alloc_init_lock_stateid(lo, fi, ost);
+       if (*lst == NULL) {
+               release_lockowner(lo);
+               return nfserr_jukebox;
+       }
+       *new = true;
+       return nfs_ok;
+}
+
  /*
   *  LOCK operation 
   */
@@ -3907,7 +3958,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
         struct file_lock file_lock;
         struct file_lock conflock;
         __be32 status = 0;
-       unsigned int strhashval;
+       bool new_state = false;
         int lkflg;
         int err;
  
@@ -3933,10 +3984,15 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
                  * lock stateid.
                  */
                 struct nfs4_ol_stateid *open_stp = NULL;
-               
+
+               if (nfsd4_has_session(cstate))
+                       /* See rfc 5661 18.10.3: given clientid is ignored: */
+                       memcpy(&lock->v.new.clientid,
+                               &cstate->session->se_client->cl_clientid,
+                               sizeof(clientid_t));
+
                 status = nfserr_stale_clientid;
-               if (!nfsd4_has_session(cstate) &&
-                   STALE_CLIENTID(&lock->lk_new_clientid))
+               if (STALE_CLIENTID(&lock->lk_new_clientid))
                         goto out;
  
                 /* validate and update open stateid and open seqid */
@@ -3948,25 +4004,12 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
                         goto out;
                 open_sop = openowner(open_stp->st_stateowner);
                 status = nfserr_bad_stateid;
-               if (!nfsd4_has_session(cstate) &&
-                       !same_clid(&open_sop->oo_owner.so_client->cl_clientid,
+               if (!same_clid(&open_sop->oo_owner.so_client->cl_clientid,
                                                 &lock->v.new.clientid))
                         goto out;
-               /* create lockowner and lock stateid */
-               fp = open_stp->st_file;
-               strhashval = lock_ownerstr_hashval(fp->fi_inode,
-                               open_sop->oo_owner.so_client->cl_clientid.cl_id,
-                               &lock->v.new.owner);
-               /* XXX: Do we need to check for duplicate stateowners on
-                * the same file, or should they just be allowed (and
-                * create new stateids)? */
-               status = nfserr_jukebox;
-               lock_sop = alloc_init_lock_stateowner(strhashval,
-                               open_sop->oo_owner.so_client, open_stp, lock);
-               if (lock_sop == NULL)
-                       goto out;
-               lock_stp = alloc_init_lock_stateid(lock_sop, fp, open_stp);
-               if (lock_stp == NULL)
+               status = lookup_or_create_lock_state(cstate, open_stp, lock,
+                                                       &lock_stp, &new_state);
+               if (status)
                         goto out;
         } else {
                 /* lock (lock owner + lock stateid) already exists */
@@ -3976,10 +4019,9 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
                                        NFS4_LOCK_STID, &lock_stp);
                 if (status)
                         goto out;
-               lock_sop = lockowner(lock_stp->st_stateowner);
-               fp = lock_stp->st_file;
         }
-       /* lock_sop and lock_stp have been created or found */
+       lock_sop = lockowner(lock_stp->st_stateowner);
+       fp = lock_stp->st_file;
  
         lkflg = setlkflg(lock->lk_type);
         status = nfs4_check_openmode(lock_stp, lkflg);
@@ -4054,7 +4096,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
                 break;
         }
  out:
-       if (status && lock->lk_is_new && lock_sop)
+       if (status && new_state)
                 release_lockowner(lock_sop);
         if (!cstate->replay_owner)
                 nfs4_unlock_state();
@@ -4251,7 +4293,7 @@ nfsd4_release_lockowner(struct svc_rqst *rqstp,
         struct nfs4_ol_stateid *stp;
         struct xdr_netobj *owner = &rlockowner->rl_owner;
         struct list_head matches;
-       int i;
+       unsigned int hashval = ownerstr_hashval(clid->cl_id, owner);
         __be32 status;
  
         dprintk("nfsd4_release_lockowner clientid: (%08x/%08x):\n",
@@ -4266,22 +4308,19 @@ nfsd4_release_lockowner(struct svc_rqst *rqstp,
         nfs4_lock_state();
  
         status = nfserr_locks_held;
-       /* XXX: we're doing a linear search through all the lockowners.
-        * Yipes!  For now we'll just hope clients aren't really using
-        * release_lockowner much, but eventually we have to fix these
-        * data structures. */
         INIT_LIST_HEAD(&matches);
-       for (i = 0; i < LOCK_HASH_SIZE; i++) {
-               list_for_each_entry(sop, &lock_ownerstr_hashtbl[i], so_strhash) {
-                       if (!same_owner_str(sop, owner, clid))
-                               continue;
-                       list_for_each_entry(stp, &sop->so_stateids,
-                                       st_perstateowner) {
-                               lo = lockowner(sop);
-                               if (check_for_locks(stp->st_file, lo))
-                                       goto out;
-                               list_add(&lo->lo_list, &matches);
-                       }
+
+       list_for_each_entry(sop, &ownerstr_hashtbl[hashval], so_strhash) {
+               if (sop->so_is_open_owner)
+                       continue;
+               if (!same_owner_str(sop, owner, clid))
+                       continue;
+               list_for_each_entry(stp, &sop->so_stateids,
+                               st_perstateowner) {
+                       lo = lockowner(sop);
+                       if (check_for_locks(stp->st_file, lo))
+                               goto out;
+                       list_add(&lo->lo_list, &matches);
                 }
         }
         /* Clients probably won't expect us to return with some (but not all)
@@ -4394,16 +4433,127 @@ nfs4_check_open_reclaim(clientid_t *clid)
         return nfs4_find_reclaim_client(clid) ? nfs_ok : nfserr_reclaim_bad;
  }
  
+#ifdef CONFIG_NFSD_FAULT_INJECTION
+
+void nfsd_forget_clients(u64 num)
+{
+       struct nfs4_client *clp, *next;
+       int count = 0;
+
+       nfs4_lock_state();
+       list_for_each_entry_safe(clp, next, &client_lru, cl_lru) {
+               nfsd4_remove_clid_dir(clp);
+               expire_client(clp);
+               if (++count == num)
+                       break;
+       }
+       nfs4_unlock_state();
+
+       printk(KERN_INFO "NFSD: Forgot %d clients", count);
+}
+
+static void release_lockowner_sop(struct nfs4_stateowner *sop)
+{
+       release_lockowner(lockowner(sop));
+}
+
+static void release_openowner_sop(struct nfs4_stateowner *sop)
+{
+       release_openowner(openowner(sop));
+}
+
+static int nfsd_release_n_owners(u64 num, bool is_open_owner,
+                               void (*release_sop)(struct nfs4_stateowner *))
+{
+       int i, count = 0;
+       struct nfs4_stateowner *sop, *next;
+
+       for (i = 0; i < OWNER_HASH_SIZE; i++) {
+               list_for_each_entry_safe(sop, next, &ownerstr_hashtbl[i], so_strhash) {
+                       if (sop->so_is_open_owner != is_open_owner)
+                               continue;
+                       release_sop(sop);
+                       if (++count == num)
+                               return count;
+               }
+       }
+       return count;
+}
+
+void nfsd_forget_locks(u64 num)
+{
+       int count;
+
+       nfs4_lock_state();
+       count = nfsd_release_n_owners(num, false, release_lockowner_sop);
+       nfs4_unlock_state();
+
+       printk(KERN_INFO "NFSD: Forgot %d locks", count);
+}
+
+void nfsd_forget_openowners(u64 num)
+{
+       int count;
+
+       nfs4_lock_state();
+       count = nfsd_release_n_owners(num, true, release_openowner_sop);
+       nfs4_unlock_state();
+
+       printk(KERN_INFO "NFSD: Forgot %d open owners", count);
+}
+
+int nfsd_process_n_delegations(u64 num, void (*deleg_func)(struct nfs4_delegation *))
+{
+       int i, count = 0;
+       struct nfs4_file *fp, *fnext;
+       struct nfs4_delegation *dp, *dnext;
+
+       for (i = 0; i < FILE_HASH_SIZE; i++) {
+               list_for_each_entry_safe(fp, fnext, &file_hashtbl[i], fi_hash) {
+                       list_for_each_entry_safe(dp, dnext, &fp->fi_delegations, dl_perfile) {
+                               deleg_func(dp);
+                               if (++count == num)
+                                       return count;
+                       }
+               }
+       }
+
+       return count;
+}
+
+void nfsd_forget_delegations(u64 num)
+{
+       unsigned int count;
+
+       nfs4_lock_state();
+       count = nfsd_process_n_delegations(num, unhash_delegation);
+       nfs4_unlock_state();
+
+       printk(KERN_INFO "NFSD: Forgot %d delegations", count);
+}
+
+void nfsd_recall_delegations(u64 num)
+{
+       unsigned int count;
+
+       nfs4_lock_state();
+       spin_lock(&recall_lock);
+       count = nfsd_process_n_delegations(num, nfsd_break_one_deleg);
+       spin_unlock(&recall_lock);
+       nfs4_unlock_state();
+
+       printk(KERN_INFO "NFSD: Recalled %d delegations", count);
+}
+
+#endif /* CONFIG_NFSD_FAULT_INJECTION */
+
  /* initialization to perform at module load time: */
  
-int
+void
  nfs4_state_init(void)
  {
-       int i, status;
+       int i;
  
-       status = nfsd4_init_slabs();
-       if (status)
-               return status;
         for (i = 0; i < CLIENT_HASH_SIZE; i++) {
                 INIT_LIST_HEAD(&conf_id_hashtbl[i]);
                 INIT_LIST_HEAD(&conf_str_hashtbl[i]);
@@ -4416,18 +4566,15 @@ nfs4_state_init(void)
         for (i = 0; i < FILE_HASH_SIZE; i++) {
                 INIT_LIST_HEAD(&file_hashtbl[i]);
         }
-       for (i = 0; i < OPEN_OWNER_HASH_SIZE; i++) {
-               INIT_LIST_HEAD(&open_ownerstr_hashtbl[i]);
-       }
-       for (i = 0; i < LOCK_HASH_SIZE; i++) {
-               INIT_LIST_HEAD(&lock_ownerstr_hashtbl[i]);
+       for (i = 0; i < OWNER_HASH_SIZE; i++) {
+               INIT_LIST_HEAD(&ownerstr_hashtbl[i]);
         }
-       memset(&onestateid, ~0, sizeof(stateid_t));
+       for (i = 0; i < LOCKOWNER_INO_HASH_SIZE; i++)
+               INIT_LIST_HEAD(&lockowner_ino_hashtbl[i]);
         INIT_LIST_HEAD(&close_lru);
         INIT_LIST_HEAD(&client_lru);
         INIT_LIST_HEAD(&del_recall_lru);
         reclaim_str_hashtbl_size = 0;
-       return 0;
  }
  
  static void
@@ -4526,7 +4673,6 @@ __nfs4_state_shutdown(void)
         spin_unlock(&recall_lock);
         list_for_each_safe(pos, next, &reaplist) {
                 dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru);
-               list_del_init(&dp->dl_recall_lru);
                 unhash_delegation(dp);
         }
  
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c

index b6fa792d6b858b5950c483092e7c05ca8c583446..0ec5a1b9700e5e8d59196cfa387f608aa5c7f0ce 100644 (file)
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -215,10 +215,9 @@ defer_free(struct nfsd4_compoundargs *argp,
  static char *savemem(struct nfsd4_compoundargs *argp, __be32 *p, int nbytes)
  {
         if (p == argp->tmp) {
-               p = kmalloc(nbytes, GFP_KERNEL);
+               p = kmemdup(argp->tmp, nbytes, GFP_KERNEL);
                 if (!p)
                         return NULL;
-               memcpy(p, argp->tmp, nbytes);
         } else {
                 BUG_ON(p != argp->tmpp);
                 argp->tmpp = NULL;
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c

index bb4a11d58a5aac3e6f1e8121ab94e2121c9a3290..748eda93ce590d1ad1e4f7892f29e25f8ad8856a 100644 (file)
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -18,6 +18,7 @@
  #include "idmap.h"
  #include "nfsd.h"
  #include "cache.h"
+#include "fault_inject.h"
  
  /*
   *     We have a single directory with several nodes in it.
@@ -1128,9 +1129,13 @@ static int __init init_nfsd(void)
         int retval;
         printk(KERN_INFO "Installing knfsd (copyright (C) 1996 okir@monad.swb.de).\n");
  
-       retval = nfs4_state_init(); /* nfs4 locking state */
+       retval = nfsd4_init_slabs();
         if (retval)
                 return retval;
+       nfs4_state_init();
+       retval = nfsd_fault_inject_init(); /* nfsd fault injection controls */
+       if (retval)
+               goto out_free_slabs;
         nfsd_stat_init();       /* Statistics */
         retval = nfsd_reply_cache_init();
         if (retval)
@@ -1161,6 +1166,8 @@ out_free_cache:
         nfsd_reply_cache_shutdown();
  out_free_stat:
         nfsd_stat_shutdown();
+       nfsd_fault_inject_cleanup();
+out_free_slabs:
         nfsd4_free_slabs();
         return retval;
  }
@@ -1175,6 +1182,7 @@ static void __exit exit_nfsd(void)
         nfsd_lockd_shutdown();
         nfsd_idmap_shutdown();
         nfsd4_free_slabs();
+       nfsd_fault_inject_cleanup();
         unregister_filesystem(&nfsd_fs_type);
  }
  
diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h

index 58134a23fdfbaebfbb0482e510e2d0e73bc7e44e..1d1e8589b4ce6ec02ad08dfbdd8848a051e67c2f 100644 (file)
--- a/fs/nfsd/nfsd.h
+++ b/fs/nfsd/nfsd.h
@@ -104,14 +104,16 @@ static inline int nfsd_v4client(struct svc_rqst *rq)
   */
  #ifdef CONFIG_NFSD_V4
  extern unsigned int max_delegations;
-int nfs4_state_init(void);
+void nfs4_state_init(void);
+int nfsd4_init_slabs(void);
  void nfsd4_free_slabs(void);
  int nfs4_state_start(void);
  void nfs4_state_shutdown(void);
  void nfs4_reset_lease(time_t leasetime);
  int nfs4_reset_recoverydir(char *recdir);
  #else
-static inline int nfs4_state_init(void) { return 0; }
+static inline void nfs4_state_init(void) { }
+static inline int nfsd4_init_slabs(void) { return 0; }
  static inline void nfsd4_free_slabs(void) { }
  static inline int nfs4_state_start(void) { return 0; }
  static inline void nfs4_state_shutdown(void) { }
@@ -338,15 +340,15 @@ static inline u32 nfsd_suppattrs2(u32 minorversion)
  }
  
  /* These will return ERR_INVAL if specified in GETATTR or READDIR. */
-#define NFSD_WRITEONLY_ATTRS_WORD1                                                         \
-(FATTR4_WORD1_TIME_ACCESS_SET   | FATTR4_WORD1_TIME_MODIFY_SET)
+#define NFSD_WRITEONLY_ATTRS_WORD1 \
+       (FATTR4_WORD1_TIME_ACCESS_SET   | FATTR4_WORD1_TIME_MODIFY_SET)
  
  /* These are the only attrs allowed in CREATE/OPEN/SETATTR. */
-#define NFSD_WRITEABLE_ATTRS_WORD0                                                          \
-(FATTR4_WORD0_SIZE              | FATTR4_WORD0_ACL                                         )
-#define NFSD_WRITEABLE_ATTRS_WORD1                                                          \
-(FATTR4_WORD1_MODE              | FATTR4_WORD1_OWNER         | FATTR4_WORD1_OWNER_GROUP     \
- | FATTR4_WORD1_TIME_ACCESS_SET | FATTR4_WORD1_TIME_MODIFY_SET)
+#define NFSD_WRITEABLE_ATTRS_WORD0 \
+       (FATTR4_WORD0_SIZE | FATTR4_WORD0_ACL)
+#define NFSD_WRITEABLE_ATTRS_WORD1 \
+       (FATTR4_WORD1_MODE | FATTR4_WORD1_OWNER | FATTR4_WORD1_OWNER_GROUP \
+       | FATTR4_WORD1_TIME_ACCESS_SET | FATTR4_WORD1_TIME_MODIFY_SET)
  #define NFSD_WRITEABLE_ATTRS_WORD2 0
  
  #define NFSD_SUPPATTR_EXCLCREAT_WORD0 \
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h

index a3cf38476a1b74f611cc35fff2dfc7f40e0d5e77..ffb5df1db94ff86558aff1340a6b07af2af29b79 100644 (file)
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -366,6 +366,7 @@ struct nfs4_openowner {
  
  struct nfs4_lockowner {
         struct nfs4_stateowner  lo_owner; /* must be first element */
+       struct list_head        lo_owner_ino_hash; /* hash by owner,file */
         struct list_head        lo_perstateid; /* for lockowners only */
         struct list_head        lo_list; /* for temporary uses */
  };
@@ -482,7 +483,7 @@ extern void nfsd4_shutdown_recdir(void);
  extern int nfs4_client_to_reclaim(const char *name);
  extern int nfs4_has_reclaimed_state(const char *name, bool use_exchange_id);
  extern void nfsd4_recdir_purge_old(void);
-extern int nfsd4_create_clid_dir(struct nfs4_client *clp);
+extern void nfsd4_create_clid_dir(struct nfs4_client *clp);
  extern void nfsd4_remove_clid_dir(struct nfs4_client *clp);
  extern void release_session_client(struct nfsd4_session *);
  extern __be32 nfs4_validate_stateid(struct nfs4_client *, stateid_t *);
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c

index d25a723b68ada457a3ae58973fd98e427c609b53..edf6d3ed87778822d85c32929cab580c93ed3c7a 100644 (file)
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -594,8 +594,19 @@ nfsd4_get_nfs4_acl(struct svc_rqst *rqstp, struct dentry *dentry, struct nfs4_ac
         return error;
  }
  
-#define NFSD_XATTR_JUNCTION_PREFIX XATTR_TRUSTED_PREFIX "junction."
-#define NFSD_XATTR_JUNCTION_TYPE NFSD_XATTR_JUNCTION_PREFIX "type"
+/*
+ * NFS junction information is stored in an extended attribute.
+ */
+#define NFSD_JUNCTION_XATTR_NAME       XATTR_TRUSTED_PREFIX "junction.nfs"
+
+/**
+ * nfsd4_is_junction - Test if an object could be an NFS junction
+ *
+ * @dentry: object to test
+ *
+ * Returns 1 if "dentry" appears to contain NFS junction information.
+ * Otherwise 0 is returned.
+ */
  int nfsd4_is_junction(struct dentry *dentry)
  {
         struct inode *inode = dentry->d_inode;
@@ -606,7 +617,7 @@ int nfsd4_is_junction(struct dentry *dentry)
                 return 0;
         if (!(inode->i_mode & S_ISVTX))
                 return 0;
-       if (vfs_getxattr(dentry, NFSD_XATTR_JUNCTION_TYPE, NULL, 0) <= 0)
+       if (vfs_getxattr(dentry, NFSD_JUNCTION_XATTR_NAME, NULL, 0) <= 0)
                 return 0;
         return 1;
  }
diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h

index 8620f79658d42e593e313124b189d22c1cde7a42..dfa900948af79a6fbd277bbbb5b56c0c7842cec4 100644 (file)
--- a/include/linux/sunrpc/svc_xprt.h
+++ b/include/linux/sunrpc/svc_xprt.h
@@ -109,7 +109,7 @@ static inline int register_xpt_user(struct svc_xprt *xpt, struct svc_xpt_user *u
  
  int    svc_reg_xprt_class(struct svc_xprt_class *);
  void   svc_unreg_xprt_class(struct svc_xprt_class *);
-void   svc_xprt_init(struct svc_xprt_class *, struct svc_xprt *,
+void   svc_xprt_init(struct net *, struct svc_xprt_class *, struct svc_xprt *,
                       struct svc_serv *);
  int    svc_create_xprt(struct svc_serv *, const char *, struct net *,
                         const int, const unsigned short, int);
@@ -118,7 +118,6 @@ void        svc_xprt_received(struct svc_xprt *);
  void   svc_xprt_put(struct svc_xprt *xprt);
  void   svc_xprt_copy_addrs(struct svc_rqst *rqstp, struct svc_xprt *xprt);
  void   svc_close_xprt(struct svc_xprt *xprt);
-void   svc_delete_xprt(struct svc_xprt *xprt);
  int    svc_port_is_privileged(struct sockaddr *sin);
  int    svc_print_xprts(char *buf, int maxlen);
  struct svc_xprt *svc_find_xprt(struct svc_serv *serv, const char *xcl_name,
diff --git a/include/linux/sunrpc/svcsock.h b/include/linux/sunrpc/svcsock.h

index 85c50b40759de9df4528077a64b6346a472f9dfb..c84e9741cb2a25471838c2c31503b8d550c8bfbf 100644 (file)
--- a/include/linux/sunrpc/svcsock.h
+++ b/include/linux/sunrpc/svcsock.h
@@ -34,7 +34,7 @@ struct svc_sock {
  /*
   * Function prototypes.
   */
-void           svc_close_all(struct list_head *);
+void           svc_close_all(struct svc_serv *);
  int            svc_recv(struct svc_rqst *, long);
  int            svc_send(struct svc_rqst *);
  void           svc_drop(struct svc_rqst *);
diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c

index 03b56bc3b659f914041ffd18352f3b43e10480a1..465df9ae1046b7fc12fe99fd0759017be7a7dc2a 100644 (file)
--- a/net/sunrpc/cache.c
+++ b/net/sunrpc/cache.c
@@ -1641,6 +1641,7 @@ int cache_register_net(struct cache_detail *cd, struct net *net)
                 sunrpc_destroy_cache_detail(cd);
         return ret;
  }
+EXPORT_SYMBOL_GPL(cache_register_net);
  
  int cache_register(struct cache_detail *cd)
  {
@@ -1653,6 +1654,7 @@ void cache_unregister_net(struct cache_detail *cd, struct net *net)
         remove_cache_proc_entries(cd, net);
         sunrpc_destroy_cache_detail(cd);
  }
+EXPORT_SYMBOL_GPL(cache_unregister_net);
  
  void cache_unregister(struct cache_detail *cd)
  {
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c

index 9d01d46b05f36785e0612786a2365be80a08dc0d..e4aabc02368b94e0d7b0109ab7906bfbba329b23 100644 (file)
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -167,6 +167,7 @@ svc_pool_map_alloc_arrays(struct svc_pool_map *m, unsigned int maxpools)
  
  fail_free:
         kfree(m->to_pool);
+       m->to_pool = NULL;
  fail:
         return -ENOMEM;
  }
@@ -285,9 +286,10 @@ svc_pool_map_put(void)
         mutex_lock(&svc_pool_map_mutex);
  
         if (!--m->count) {
-               m->mode = SVC_POOL_DEFAULT;
                 kfree(m->to_pool);
+               m->to_pool = NULL;
                 kfree(m->pool_to);
+               m->pool_to = NULL;
                 m->npools = 0;
         }
  
@@ -527,17 +529,20 @@ svc_destroy(struct svc_serv *serv)
                 printk("svc_destroy: no threads for serv=%p!\n", serv);
  
         del_timer_sync(&serv->sv_temptimer);
-
-       svc_close_all(&serv->sv_tempsocks);
+       /*
+        * The set of xprts (contained in the sv_tempsocks and
+        * sv_permsocks lists) is now constant, since it is modified
+        * only by accepting new sockets (done by service threads in
+        * svc_recv) or aging old ones (done by sv_temptimer), or
+        * configuration changes (excluded by whatever locking the
+        * caller is using--nfsd_mutex in the case of nfsd).  So it's
+        * safe to traverse those lists and shut everything down:
+        */
+       svc_close_all(serv);
  
         if (serv->sv_shutdown)
                 serv->sv_shutdown(serv);
  
-       svc_close_all(&serv->sv_permsocks);
-
-       BUG_ON(!list_empty(&serv->sv_permsocks));
-       BUG_ON(!list_empty(&serv->sv_tempsocks));
-
         cache_clean_deferred(serv);
  
         if (svc_serv_is_pooled(serv))
@@ -683,8 +688,8 @@ found_pool:
   * Create or destroy enough new threads to make the number
   * of threads the given number.  If `pool' is non-NULL, applies
   * only to threads in that pool, otherwise round-robins between
- * all pools.  Must be called with a svc_get() reference and
- * the BKL or another lock to protect access to svc_serv fields.
+ * all pools.  Caller must ensure that mutual exclusion between this and
+ * server startup or shutdown.
   *
   * Destroying threads relies on the service threads filling in
   * rqstp->rq_task, which only the nfs ones do.  Assumes the serv
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c

index 38649cfa4e81350aed397d3bdeb316556aaf57e6..74cb0d8e9ca1f58aae66b85bee5c313cf3473f18 100644 (file)
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -22,6 +22,7 @@ static struct svc_deferred_req *svc_deferred_dequeue(struct svc_xprt *xprt);
  static int svc_deferred_recv(struct svc_rqst *rqstp);
  static struct cache_deferred_req *svc_defer(struct cache_req *req);
  static void svc_age_temp_xprts(unsigned long closure);
+static void svc_delete_xprt(struct svc_xprt *xprt);
  
  /* apparently the "standard" is that clients close
   * idle connections after 5 minutes, servers after
@@ -147,8 +148,8 @@ EXPORT_SYMBOL_GPL(svc_xprt_put);
   * Called by transport drivers to initialize the transport independent
   * portion of the transport instance.
   */
-void svc_xprt_init(struct svc_xprt_class *xcl, struct svc_xprt *xprt,
-                  struct svc_serv *serv)
+void svc_xprt_init(struct net *net, struct svc_xprt_class *xcl,
+                  struct svc_xprt *xprt, struct svc_serv *serv)
  {
         memset(xprt, 0, sizeof(*xprt));
         xprt->xpt_class = xcl;
@@ -163,7 +164,7 @@ void svc_xprt_init(struct svc_xprt_class *xcl, struct svc_xprt *xprt,
         spin_lock_init(&xprt->xpt_lock);
         set_bit(XPT_BUSY, &xprt->xpt_flags);
         rpc_init_wait_queue(&xprt->xpt_bc_pending, "xpt_bc_pending");
-       xprt->xpt_net = get_net(&init_net);
+       xprt->xpt_net = get_net(net);
  }
  EXPORT_SYMBOL_GPL(svc_xprt_init);
  
@@ -878,7 +879,7 @@ static void call_xpt_users(struct svc_xprt *xprt)
  /*
   * Remove a dead transport
   */
-void svc_delete_xprt(struct svc_xprt *xprt)
+static void svc_delete_xprt(struct svc_xprt *xprt)
  {
         struct svc_serv *serv = xprt->xpt_server;
         struct svc_deferred_req *dr;
@@ -893,14 +894,7 @@ void svc_delete_xprt(struct svc_xprt *xprt)
         spin_lock_bh(&serv->sv_lock);
         if (!test_and_set_bit(XPT_DETACHED, &xprt->xpt_flags))
                 list_del_init(&xprt->xpt_list);
-       /*
-        * The only time we're called while xpt_ready is still on a list
-        * is while the list itself is about to be destroyed (in
-        * svc_destroy).  BUT svc_xprt_enqueue could still be attempting
-        * to add new entries to the sp_sockets list, so we can't leave
-        * a freed xprt on it.
-        */
-       list_del_init(&xprt->xpt_ready);
+       BUG_ON(!list_empty(&xprt->xpt_ready));
         if (test_bit(XPT_TEMP, &xprt->xpt_flags))
                 serv->sv_tmpcnt--;
         spin_unlock_bh(&serv->sv_lock);
@@ -928,22 +922,48 @@ void svc_close_xprt(struct svc_xprt *xprt)
  }
  EXPORT_SYMBOL_GPL(svc_close_xprt);
  
-void svc_close_all(struct list_head *xprt_list)
+static void svc_close_list(struct list_head *xprt_list)
+{
+       struct svc_xprt *xprt;
+
+       list_for_each_entry(xprt, xprt_list, xpt_list) {
+               set_bit(XPT_CLOSE, &xprt->xpt_flags);
+               set_bit(XPT_BUSY, &xprt->xpt_flags);
+       }
+}
+
+void svc_close_all(struct svc_serv *serv)
  {
+       struct svc_pool *pool;
         struct svc_xprt *xprt;
         struct svc_xprt *tmp;
+       int i;
+
+       svc_close_list(&serv->sv_tempsocks);
+       svc_close_list(&serv->sv_permsocks);
  
+       for (i = 0; i < serv->sv_nrpools; i++) {
+               pool = &serv->sv_pools[i];
+
+               spin_lock_bh(&pool->sp_lock);
+               while (!list_empty(&pool->sp_sockets)) {
+                       xprt = list_first_entry(&pool->sp_sockets, struct svc_xprt, xpt_ready);
+                       list_del_init(&xprt->xpt_ready);
+               }
+               spin_unlock_bh(&pool->sp_lock);
+       }
         /*
-        * The server is shutting down, and no more threads are running.
-        * svc_xprt_enqueue() might still be running, but at worst it
-        * will re-add the xprt to sp_sockets, which will soon get
-        * freed.  So we don't bother with any more locking, and don't
-        * leave the close to the (nonexistent) server threads:
+        * At this point the sp_sockets lists will stay empty, since
+        * svc_enqueue will not add new entries without taking the
+        * sp_lock and checking XPT_BUSY.
          */
-       list_for_each_entry_safe(xprt, tmp, xprt_list, xpt_list) {
-               set_bit(XPT_CLOSE, &xprt->xpt_flags);
+       list_for_each_entry_safe(xprt, tmp, &serv->sv_tempsocks, xpt_list)
                 svc_delete_xprt(xprt);
-       }
+       list_for_each_entry_safe(xprt, tmp, &serv->sv_permsocks, xpt_list)
+               svc_delete_xprt(xprt);
+
+       BUG_ON(!list_empty(&serv->sv_permsocks));
+       BUG_ON(!list_empty(&serv->sv_tempsocks));
  }
  
  /*
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c

index 4653286fcc9e685e1d1999c532325ce48870d4f8..464570906f80c24190260bef957a53128202a486 100644 (file)
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -739,7 +739,8 @@ static void svc_udp_init(struct svc_sock *svsk, struct svc_serv *serv)
  {
         int err, level, optname, one = 1;
  
-       svc_xprt_init(&svc_udp_class, &svsk->sk_xprt, serv);
+       svc_xprt_init(sock_net(svsk->sk_sock->sk), &svc_udp_class,
+                     &svsk->sk_xprt, serv);
         clear_bit(XPT_CACHE_AUTH, &svsk->sk_xprt.xpt_flags);
         svsk->sk_sk->sk_data_ready = svc_udp_data_ready;
         svsk->sk_sk->sk_write_space = svc_write_space;
@@ -1343,7 +1344,8 @@ static void svc_tcp_init(struct svc_sock *svsk, struct svc_serv *serv)
  {
         struct sock     *sk = svsk->sk_sk;
  
-       svc_xprt_init(&svc_tcp_class, &svsk->sk_xprt, serv);
+       svc_xprt_init(sock_net(svsk->sk_sock->sk), &svc_tcp_class,
+                     &svsk->sk_xprt, serv);
         set_bit(XPT_CACHE_AUTH, &svsk->sk_xprt.xpt_flags);
         if (sk->sk_state == TCP_LISTEN) {
                 dprintk("setting up TCP socket for listening\n");
@@ -1659,7 +1661,7 @@ static struct svc_xprt *svc_bc_create_socket(struct svc_serv *serv,
                 return ERR_PTR(-ENOMEM);
  
         xprt = &svsk->sk_xprt;
-       svc_xprt_init(&svc_tcp_bc_class, xprt, serv);
+       svc_xprt_init(net, &svc_tcp_bc_class, xprt, serv);
  
         serv->sv_bc_xprt = xprt;
  
diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c

index ba1296d88de0295258281db5d76ac1f2c1f0a34a..894cb42db91d6c9e2cb71a9ae0aa076db4573d96 100644 (file)
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -453,7 +453,7 @@ static struct svcxprt_rdma *rdma_create_xprt(struct svc_serv *serv,
  
         if (!cma_xprt)
                 return NULL;
-       svc_xprt_init(&svc_rdma_class, &cma_xprt->sc_xprt, serv);
+       svc_xprt_init(&init_net, &svc_rdma_class, &cma_xprt->sc_xprt, serv);
         INIT_LIST_HEAD(&cma_xprt->sc_accept_q);
         INIT_LIST_HEAD(&cma_xprt->sc_dto_q);
         INIT_LIST_HEAD(&cma_xprt->sc_rq_dto_q);
diff --git a/tools/nfsd/inject_fault.sh b/tools/nfsd/inject_fault.sh

new file mode 100755 (executable)

index 0000000..06a399a
--- /dev/null
+++ b/tools/nfsd/inject_fault.sh
@@ -0,0 +1,49 @@
+#!/bin/bash
+#
+# Copyright (c) 2011 Bryan Schumaker <bjschuma@netapp.com>
+#
+# Script for easier NFSD fault injection
+
+# Check that debugfs has been mounted
+DEBUGFS=`cat /proc/mounts | grep debugfs`
+if [ "$DEBUGFS" == "" ]; then
+       echo "debugfs does not appear to be mounted!"
+       echo "Please mount debugfs and try again"
+       exit 1
+fi
+
+# Check that the fault injection directory exists
+DEBUGDIR=`echo $DEBUGFS | awk '{print $2}'`/nfsd
+if [ ! -d "$DEBUGDIR" ]; then
+       echo "$DEBUGDIR does not exist"
+       echo "Check that your .config selects CONFIG_NFSD_FAULT_INJECTION"
+       exit 1
+fi
+
+function help()
+{
+       echo "Usage $0 injection_type [count]"
+       echo ""
+       echo "Injection types are:"
+       ls $DEBUGDIR
+       exit 1
+}
+
+if [ $# == 0 ]; then
+       help
+elif [ ! -f $DEBUGDIR/$1 ]; then
+       help
+elif [ $# != 2 ]; then
+       COUNT=0
+else
+       COUNT=$2
+fi
+
+BEFORE=`mktemp`
+AFTER=`mktemp`
+dmesg > $BEFORE
+echo $COUNT > $DEBUGDIR/$1
+dmesg > $AFTER
+# Capture lines that only exist in the $AFTER file
+diff $BEFORE $AFTER | grep ">"
+rm -f $BEFORE $AFTER
author	Linus Torvalds <torvalds@linux-foundation.org>
	Sat, 14 Jan 2012 20:26:41 +0000 (12:26 -0800)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Sat, 14 Jan 2012 20:26:41 +0000 (12:26 -0800)
CREDITS		patch \| blob \| history
Documentation/filesystems/nfs/00-INDEX		patch \| blob \| history
Documentation/filesystems/nfs/fault_injection.txt	[new file with mode: 0644]	patch \| blob
MAINTAINERS		patch \| blob \| history
fs/nfsd/Kconfig		patch \| blob \| history
fs/nfsd/Makefile		patch \| blob \| history
fs/nfsd/export.c		patch \| blob \| history
fs/nfsd/fault_inject.c	[new file with mode: 0644]	patch \| blob
fs/nfsd/fault_inject.h	[new file with mode: 0644]	patch \| blob
fs/nfsd/nfs4idmap.c		patch \| blob \| history
fs/nfsd/nfs4proc.c		patch \| blob \| history
fs/nfsd/nfs4recover.c		patch \| blob \| history
fs/nfsd/nfs4state.c		patch \| blob \| history
fs/nfsd/nfs4xdr.c		patch \| blob \| history
fs/nfsd/nfsctl.c		patch \| blob \| history
fs/nfsd/nfsd.h		patch \| blob \| history
fs/nfsd/state.h		patch \| blob \| history
fs/nfsd/vfs.c		patch \| blob \| history
include/linux/sunrpc/svc_xprt.h		patch \| blob \| history
include/linux/sunrpc/svcsock.h		patch \| blob \| history
net/sunrpc/cache.c		patch \| blob \| history
net/sunrpc/svc.c		patch \| blob \| history
net/sunrpc/svc_xprt.c		patch \| blob \| history
net/sunrpc/svcsock.c		patch \| blob \| history
net/sunrpc/xprtrdma/svc_rdma_transport.c		patch \| blob \| history
tools/nfsd/inject_fault.sh	[new file with mode: 0755]	patch \| blob