From f8e6defe7f4456d8700e5a3796a1e9fb54a88543 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Fri, 28 Oct 2011 08:40:00 +1100
Subject: [PATCH 01/31] Update NFSD MAINTAINER

Neil hasn't really been at all active as a maintainer for some years now.
So move his maintainership to CREDITS

Signed-off-by: NeilBrown <neilb@suse.de>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 CREDITS     | 5 +++++
 MAINTAINERS | 1 -
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/CREDITS b/CREDITS
index 07e32a87d956..557da475bc9c 100644
--- a/CREDITS
+++ b/CREDITS
@@ -514,6 +514,11 @@ S: Bessemerstraat 21
 S: Amsterdam
 S: The Netherlands
 
+N: NeilBrown
+E: neil@brown.name
+P: 4096R/566281B9 1BC6 29EB D390 D870 7B5F  497A 39EC 9EDD 5662 81B9
+D: NFSD Maintainer 2000-2007
+
 N: Zach Brown
 E: zab@zabbo.net
 D: maestro pci sound
diff --git a/MAINTAINERS b/MAINTAINERS
index 4808256446f2..6ab923d15853 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -3770,7 +3770,6 @@ S:	Odd Fixes
 
 KERNEL NFSD, SUNRPC, AND LOCKD SERVERS
 M:	"J. Bruce Fields" <bfields@fieldses.org>
-M:	Neil Brown <neilb@suse.de>
 L:	linux-nfs@vger.kernel.org
 W:	http://nfs.sourceforge.net/
 S:	Supported

From b93d87c19821ba7d3ee11557403d782e541071ad Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@redhat.com>
Date: Mon, 7 Nov 2011 16:37:57 -0500
Subject: [PATCH 02/31] nfsd4: fix lockowner matching

Lockowners are looked up by file as well as by owner, but we were
forgetting to do a comparison on the file.  This could cause an
incorrect result from lockt.

(Note looking up the inode from the lockowner is pretty awkward here.
The data structures need fixing.)

Cc: stable@kernel.org
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/nfsd/nfs4state.c | 17 +++++++++++++++--
 1 file changed, 15 insertions(+), 2 deletions(-)

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 47e94e33a975..5abced7a7408 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -3809,16 +3809,29 @@ nevermind:
 		deny->ld_type = NFS4_WRITE_LT;
 }
 
+static bool same_lockowner_ino(struct nfs4_lockowner *lo, struct inode *inode, clientid_t *clid, struct xdr_netobj *owner)
+{
+	struct nfs4_ol_stateid *lst;
+
+	if (!same_owner_str(&lo->lo_owner, owner, clid))
+		return false;
+	lst = list_first_entry(&lo->lo_owner.so_stateids,
+			       struct nfs4_ol_stateid, st_perstateowner);
+	return lst->st_file->fi_inode == inode;
+}
+
 static struct nfs4_lockowner *
 find_lockowner_str(struct inode *inode, clientid_t *clid,
 		struct xdr_netobj *owner)
 {
 	unsigned int hashval = lock_ownerstr_hashval(inode, clid->cl_id, owner);
+	struct nfs4_lockowner *lo;
 	struct nfs4_stateowner *op;
 
 	list_for_each_entry(op, &lock_ownerstr_hashtbl[hashval], so_strhash) {
-		if (same_owner_str(op, owner, clid))
-			return lockowner(op);
+		lo = lockowner(op);
+		if (same_lockowner_ino(lo, inode, clid, owner))
+			return lo;
 	}
 	return NULL;
 }

From 684e563858018d27acb8f00e30c026215bbd0ffb Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@redhat.com>
Date: Fri, 4 Nov 2011 17:08:10 -0400
Subject: [PATCH 03/31] nfsd4: cleanup lock clientid handling in sessions case

I'd rather the "ignore clientid in sessions case" rule be enforced in
just one place.

Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/nfsd/nfs4state.c | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 5abced7a7408..9354ddebbee2 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -3946,10 +3946,15 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 		 * lock stateid.
 		 */
 		struct nfs4_ol_stateid *open_stp = NULL;
-		
+
+		if (nfsd4_has_session(cstate))
+			/* See rfc 5661 18.10.3: given clientid is ignored: */
+			memcpy(&lock->v.new.clientid,
+				&cstate->session->se_client->cl_clientid,
+				sizeof(clientid_t));
+
 		status = nfserr_stale_clientid;
-		if (!nfsd4_has_session(cstate) &&
-		    STALE_CLIENTID(&lock->lk_new_clientid))
+		if (STALE_CLIENTID(&lock->lk_new_clientid))
 			goto out;
 
 		/* validate and update open stateid and open seqid */
@@ -3961,8 +3966,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 			goto out;
 		open_sop = openowner(open_stp->st_stateowner);
 		status = nfserr_bad_stateid;
-		if (!nfsd4_has_session(cstate) &&
-			!same_clid(&open_sop->oo_owner.so_client->cl_clientid,
+		if (!same_clid(&open_sop->oo_owner.so_client->cl_clientid,
 						&lock->v.new.clientid))
 			goto out;
 		/* create lockowner and lock stateid */

From 64a284d07c7d84299a90826950079a8ef11e8204 Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@redhat.com>
Date: Thu, 20 Oct 2011 06:57:46 -0400
Subject: [PATCH 04/31] nfsd4: maintain one seqid stream per (lockowner, file)

Instead of creating a new lockowner and stateid for every
open_to_lockowner call, reuse the existing lockowner if it exists.

Reported-by: Trond Myklebust <Trond.Myklebust@netapp.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/nfsd/nfs4state.c | 58 +++++++++++++++++++++++++++++----------------
 1 file changed, 38 insertions(+), 20 deletions(-)

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 9354ddebbee2..d09d5242c088 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -3905,6 +3905,37 @@ static void get_lock_access(struct nfs4_ol_stateid *lock_stp, u32 access)
 	__set_bit(access, &lock_stp->st_access_bmap);
 }
 
+__be32 lookup_or_create_lock_state(struct nfsd4_compound_state *cstate, struct nfs4_ol_stateid *ost, struct nfsd4_lock *lock, struct nfs4_ol_stateid **lst, bool *new)
+{
+	struct nfs4_file *fi = ost->st_file;
+	struct nfs4_openowner *oo = openowner(ost->st_stateowner);
+	struct nfs4_client *cl = oo->oo_owner.so_client;
+	struct nfs4_lockowner *lo;
+	unsigned int strhashval;
+
+	lo = find_lockowner_str(fi->fi_inode, &cl->cl_clientid, &lock->v.new.owner);
+	if (lo) {
+		if (!cstate->minorversion)
+			return nfserr_bad_seqid;
+		/* XXX: a lockowner always has exactly one stateid: */
+		*lst = list_first_entry(&lo->lo_owner.so_stateids,
+				struct nfs4_ol_stateid, st_perstateowner);
+		return nfs_ok;
+	}
+	strhashval = lock_ownerstr_hashval(fi->fi_inode, cl->cl_clientid.cl_id,
+			&lock->v.new.owner);
+	lo = alloc_init_lock_stateowner(strhashval, cl, ost, lock);
+	if (lo == NULL)
+		return nfserr_jukebox;
+	*lst = alloc_init_lock_stateid(lo, fi, ost);
+	if (*lst == NULL) {
+		release_lockowner(lo);
+		return nfserr_jukebox;
+	}
+	*new = true;
+	return nfs_ok;
+}
+
 /*
  *  LOCK operation 
  */
@@ -3920,7 +3951,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 	struct file_lock file_lock;
 	struct file_lock conflock;
 	__be32 status = 0;
-	unsigned int strhashval;
+	bool new_state = false;
 	int lkflg;
 	int err;
 
@@ -3969,21 +4000,9 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 		if (!same_clid(&open_sop->oo_owner.so_client->cl_clientid,
 						&lock->v.new.clientid))
 			goto out;
-		/* create lockowner and lock stateid */
-		fp = open_stp->st_file;
-		strhashval = lock_ownerstr_hashval(fp->fi_inode,
-				open_sop->oo_owner.so_client->cl_clientid.cl_id,
-				&lock->v.new.owner);
-		/* XXX: Do we need to check for duplicate stateowners on
-		 * the same file, or should they just be allowed (and
-		 * create new stateids)? */
-		status = nfserr_jukebox;
-		lock_sop = alloc_init_lock_stateowner(strhashval,
-				open_sop->oo_owner.so_client, open_stp, lock);
-		if (lock_sop == NULL)
-			goto out;
-		lock_stp = alloc_init_lock_stateid(lock_sop, fp, open_stp);
-		if (lock_stp == NULL)
+		status = lookup_or_create_lock_state(cstate, open_stp, lock,
+							&lock_stp, &new_state);
+		if (status)
 			goto out;
 	} else {
 		/* lock (lock owner + lock stateid) already exists */
@@ -3993,10 +4012,9 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 				       NFS4_LOCK_STID, &lock_stp);
 		if (status)
 			goto out;
-		lock_sop = lockowner(lock_stp->st_stateowner);
-		fp = lock_stp->st_file;
 	}
-	/* lock_sop and lock_stp have been created or found */
+	lock_sop = lockowner(lock_stp->st_stateowner);
+	fp = lock_stp->st_file;
 
 	lkflg = setlkflg(lock->lk_type);
 	status = nfs4_check_openmode(lock_stp, lkflg);
@@ -4071,7 +4089,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 		break;
 	}
 out:
-	if (status && lock->lk_is_new && lock_sop)
+	if (status && new_state)
 		release_lockowner(lock_sop);
 	if (!cstate->replay_owner)
 		nfs4_unlock_state();

From 65178db42a02c7984f711614546e97e9952d8e01 Mon Sep 17 00:00:00 2001
From: Bryan Schumaker <bjschuma@netapp.com>
Date: Tue, 1 Nov 2011 13:35:21 -0400
Subject: [PATCH 05/31] NFSD: Added fault injection

Fault injection on the NFS server makes it easier to test the client's
state manager and recovery threads.  Simulating errors on the server is
easier than finding the right conditions that cause them naturally.

This patch uses debugfs to add a simple framework for fault injection to
the server.  This framework is a config option, and can be enabled
through CONFIG_NFSD_FAULT_INJECTION.  Assuming you have debugfs mounted
to /sys/debug, a set of files will be created in /sys/debug/nfsd/.
Writing to any of these files will cause the corresponding action and
write a log entry to dmesg.

Signed-off-by: Bryan Schumaker <bjschuma@netapp.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/nfsd/Kconfig        |  10 ++++
 fs/nfsd/Makefile       |   1 +
 fs/nfsd/fault_inject.c |  91 ++++++++++++++++++++++++++++++++
 fs/nfsd/fault_inject.h |  28 ++++++++++
 fs/nfsd/nfs4state.c    | 115 +++++++++++++++++++++++++++++++++++++++++
 fs/nfsd/nfsctl.c       |   7 +++
 6 files changed, 252 insertions(+)
 create mode 100644 fs/nfsd/fault_inject.c
 create mode 100644 fs/nfsd/fault_inject.h

diff --git a/fs/nfsd/Kconfig b/fs/nfsd/Kconfig
index 10e6366608f2..8df1ea4a6ff9 100644
--- a/fs/nfsd/Kconfig
+++ b/fs/nfsd/Kconfig
@@ -80,3 +80,13 @@ config NFSD_V4
 	  available from http://linux-nfs.org/.
 
 	  If unsure, say N.
+
+config NFSD_FAULT_INJECTION
+	bool "NFS server manual fault injection"
+	depends on NFSD_V4 && DEBUG_KERNEL
+	help
+	  This option enables support for manually injecting faults
+	  into the NFS server.  This is intended to be used for
+	  testing error recovery on the NFS client.
+
+	  If unsure, say N.
diff --git a/fs/nfsd/Makefile b/fs/nfsd/Makefile
index 9b118ee20193..af32ef06b4fe 100644
--- a/fs/nfsd/Makefile
+++ b/fs/nfsd/Makefile
@@ -6,6 +6,7 @@ obj-$(CONFIG_NFSD)	+= nfsd.o
 
 nfsd-y 			:= nfssvc.o nfsctl.o nfsproc.o nfsfh.o vfs.o \
 			   export.o auth.o lockd.o nfscache.o nfsxdr.o stats.o
+nfsd-$(CONFIG_NFSD_FAULT_INJECTION) += fault_inject.o
 nfsd-$(CONFIG_NFSD_V2_ACL) += nfs2acl.o
 nfsd-$(CONFIG_NFSD_V3)	+= nfs3proc.o nfs3xdr.o
 nfsd-$(CONFIG_NFSD_V3_ACL) += nfs3acl.o
diff --git a/fs/nfsd/fault_inject.c b/fs/nfsd/fault_inject.c
new file mode 100644
index 000000000000..ce7f0758d84c
--- /dev/null
+++ b/fs/nfsd/fault_inject.c
@@ -0,0 +1,91 @@
+/*
+ * Copyright (c) 2011 Bryan Schumaker <bjschuma@netapp.com>
+ *
+ * Uses debugfs to create fault injection points for client testing
+ */
+
+#include <linux/types.h>
+#include <linux/fs.h>
+#include <linux/debugfs.h>
+#include <linux/module.h>
+
+#include "state.h"
+#include "fault_inject.h"
+
+struct nfsd_fault_inject_op {
+	char *file;
+	void (*func)(u64);
+};
+
+static struct nfsd_fault_inject_op inject_ops[] = {
+	{
+		.file   = "forget_clients",
+		.func   = nfsd_forget_clients,
+	},
+	{
+		.file   = "forget_locks",
+		.func   = nfsd_forget_locks,
+	},
+	{
+		.file   = "forget_openowners",
+		.func   = nfsd_forget_openowners,
+	},
+	{
+		.file   = "forget_delegations",
+		.func   = nfsd_forget_delegations,
+	},
+	{
+		.file   = "recall_delegations",
+		.func   = nfsd_recall_delegations,
+	},
+};
+
+static long int NUM_INJECT_OPS = sizeof(inject_ops) / sizeof(struct nfsd_fault_inject_op);
+static struct dentry *debug_dir;
+
+static int nfsd_inject_set(void *op_ptr, u64 val)
+{
+	struct nfsd_fault_inject_op *op = op_ptr;
+
+	if (val == 0)
+		printk(KERN_INFO "NFSD Fault Injection: %s (all)", op->file);
+	else
+		printk(KERN_INFO "NFSD Fault Injection: %s (n = %llu)", op->file, val);
+
+	op->func(val);
+	return 0;
+}
+
+static int nfsd_inject_get(void *data, u64 *val)
+{
+	return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(fops_nfsd, nfsd_inject_get, nfsd_inject_set, "%llu\n");
+
+void nfsd_fault_inject_cleanup(void)
+{
+	debugfs_remove_recursive(debug_dir);
+}
+
+int nfsd_fault_inject_init(void)
+{
+	unsigned int i;
+	struct nfsd_fault_inject_op *op;
+	mode_t mode = S_IFREG | S_IRUSR | S_IWUSR;
+
+	debug_dir = debugfs_create_dir("nfsd", NULL);
+	if (!debug_dir)
+		goto fail;
+
+	for (i = 0; i < NUM_INJECT_OPS; i++) {
+		op = &inject_ops[i];
+		if (!debugfs_create_file(op->file, mode, debug_dir, op, &fops_nfsd))
+			goto fail;
+	}
+	return 0;
+
+fail:
+	nfsd_fault_inject_cleanup();
+	return -ENOMEM;
+}
diff --git a/fs/nfsd/fault_inject.h b/fs/nfsd/fault_inject.h
new file mode 100644
index 000000000000..90bd0570956c
--- /dev/null
+++ b/fs/nfsd/fault_inject.h
@@ -0,0 +1,28 @@
+/*
+ * Copyright (c) 2011 Bryan Schumaker <bjschuma@netapp.com>
+ *
+ * Function definitions for fault injection
+ */
+
+#ifndef LINUX_NFSD_FAULT_INJECT_H
+#define LINUX_NFSD_FAULT_INJECT_H
+
+#ifdef CONFIG_NFSD_FAULT_INJECTION
+int nfsd_fault_inject_init(void);
+void nfsd_fault_inject_cleanup(void);
+void nfsd_forget_clients(u64);
+void nfsd_forget_locks(u64);
+void nfsd_forget_openowners(u64);
+void nfsd_forget_delegations(u64);
+void nfsd_recall_delegations(u64);
+#else /* CONFIG_NFSD_FAULT_INJECTION */
+static inline int nfsd_fault_inject_init(void) { return 0; }
+static inline void nfsd_fault_inject_cleanup(void) {}
+static inline void nfsd_forget_clients(u64 num) {}
+static inline void nfsd_forget_locks(u64 num) {}
+static inline void nfsd_forget_openowners(u64 num) {}
+static inline void nfsd_forget_delegations(u64 num) {}
+static inline void nfsd_recall_delegations(u64 num) {}
+#endif /* CONFIG_NFSD_FAULT_INJECTION */
+
+#endif /* LINUX_NFSD_FAULT_INJECT_H */
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index d09d5242c088..a511eff05698 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -4429,6 +4429,121 @@ nfs4_check_open_reclaim(clientid_t *clid)
 	return nfs4_find_reclaim_client(clid) ? nfs_ok : nfserr_reclaim_bad;
 }
 
+#ifdef CONFIG_NFSD_FAULT_INJECTION
+
+void nfsd_forget_clients(u64 num)
+{
+	struct nfs4_client *clp, *next;
+	int count = 0;
+
+	nfs4_lock_state();
+	list_for_each_entry_safe(clp, next, &client_lru, cl_lru) {
+		nfsd4_remove_clid_dir(clp);
+		expire_client(clp);
+		if (++count == num)
+			break;
+	}
+	nfs4_unlock_state();
+
+	printk(KERN_INFO "NFSD: Forgot %d clients", count);
+}
+
+static void release_lockowner_sop(struct nfs4_stateowner *sop)
+{
+	release_lockowner(lockowner(sop));
+}
+
+static void release_openowner_sop(struct nfs4_stateowner *sop)
+{
+	release_openowner(openowner(sop));
+}
+
+static int nfsd_release_n_owners(u64 num,
+				struct list_head hashtbl[],
+				unsigned int hashtbl_size,
+				void (*release_sop)(struct nfs4_stateowner *))
+{
+	int i, count = 0;
+	struct nfs4_stateowner *sop, *next;
+
+	for (i = 0; i < hashtbl_size; i++) {
+		list_for_each_entry_safe(sop, next, &hashtbl[i], so_strhash) {
+			release_sop(sop);
+			if (++count == num)
+				return count;
+		}
+	}
+	return count;
+}
+
+void nfsd_forget_locks(u64 num)
+{
+	int count;
+
+	nfs4_lock_state();
+	count = nfsd_release_n_owners(num, lock_ownerstr_hashtbl,
+				     LOCK_HASH_SIZE, release_lockowner_sop);
+	nfs4_unlock_state();
+
+	printk(KERN_INFO "NFSD: Forgot %d locks", count);
+}
+
+void nfsd_forget_openowners(u64 num)
+{
+	int count;
+
+	nfs4_lock_state();
+	count = nfsd_release_n_owners(num, open_ownerstr_hashtbl,
+				     OPEN_OWNER_HASH_SIZE, release_openowner_sop);
+	nfs4_unlock_state();
+
+	printk(KERN_INFO "NFSD: Forgot %d open owners", count);
+}
+
+int nfsd_process_n_delegations(u64 num, void (*deleg_func)(struct nfs4_delegation *))
+{
+	int i, count = 0;
+	struct nfs4_file *fp;
+	struct nfs4_delegation *dp, *next;
+
+	for (i = 0; i < FILE_HASH_SIZE; i++) {
+		list_for_each_entry(fp, &file_hashtbl[i], fi_hash) {
+			list_for_each_entry_safe(dp, next, &fp->fi_delegations, dl_perfile) {
+				deleg_func(dp);
+				if (++count == num)
+					return count;
+			}
+		}
+	}
+	return count;
+}
+
+void nfsd_forget_delegations(u64 num)
+{
+	unsigned int count;
+
+	nfs4_lock_state();
+	count = nfsd_process_n_delegations(num, unhash_delegation);
+	nfs4_unlock_state();
+
+	printk(KERN_INFO "NFSD: Forgot %d delegations", count);
+}
+
+void nfsd_recall_delegations(u64 num)
+{
+	unsigned int count;
+
+	nfs4_lock_state();
+	spin_lock(&recall_lock);
+	count = nfsd_process_n_delegations(num, nfsd_break_one_deleg);
+	spin_unlock(&recall_lock);
+	nfs4_unlock_state();
+
+	printk(KERN_INFO "NFSD: Recalled %d delegations", count);
+}
+
+#endif /* CONFIG_NFSD_FAULT_INJECTION */
+
 /* initialization to perform at module load time: */
 
 int
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index c45a2ea4a090..b2e8093ebc21 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -18,6 +18,7 @@
 #include "idmap.h"
 #include "nfsd.h"
 #include "cache.h"
+#include "fault_inject.h"
 
 /*
  *	We have a single directory with several nodes in it.
@@ -1131,6 +1132,9 @@ static int __init init_nfsd(void)
 	retval = nfs4_state_init(); /* nfs4 locking state */
 	if (retval)
 		return retval;
+	retval = nfsd_fault_inject_init(); /* nfsd fault injection controls */
+	if (retval)
+		goto out_free_slabs;
 	nfsd_stat_init();	/* Statistics */
 	retval = nfsd_reply_cache_init();
 	if (retval)
@@ -1161,6 +1165,8 @@ out_free_cache:
 	nfsd_reply_cache_shutdown();
 out_free_stat:
 	nfsd_stat_shutdown();
+	nfsd_fault_inject_cleanup();
+out_free_slabs:
 	nfsd4_free_slabs();
 	return retval;
 }
@@ -1175,6 +1181,7 @@ static void __exit exit_nfsd(void)
 	nfsd_lockd_shutdown();
 	nfsd_idmap_shutdown();
 	nfsd4_free_slabs();
+	nfsd_fault_inject_cleanup();
 	unregister_filesystem(&nfsd_fs_type);
 }
 

From 800b927b38c7cdfbd7df39d7a8a4b065637ab7b6 Mon Sep 17 00:00:00 2001
From: Bryan Schumaker <bjschuma@netapp.com>
Date: Tue, 1 Nov 2011 13:35:22 -0400
Subject: [PATCH 06/31] NFSD: Added fault injection script

This script provides a convenient way to use the NFSD fault injection
framework.  Fault injection writes to dmesg using the KERN_INFO flag, so
this script will compare the before and after output of `dmesg` to show
the user what happened

Signed-off-by: Bryan Schumaker <bjschuma@netapp.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 tools/nfsd/inject_fault.sh | 49 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 49 insertions(+)
 create mode 100755 tools/nfsd/inject_fault.sh

diff --git a/tools/nfsd/inject_fault.sh b/tools/nfsd/inject_fault.sh
new file mode 100755
index 000000000000..06a399ac8b2f
--- /dev/null
+++ b/tools/nfsd/inject_fault.sh
@@ -0,0 +1,49 @@
+#!/bin/bash
+#
+# Copyright (c) 2011 Bryan Schumaker <bjschuma@netapp.com>
+#
+# Script for easier NFSD fault injection
+
+# Check that debugfs has been mounted
+DEBUGFS=`cat /proc/mounts | grep debugfs`
+if [ "$DEBUGFS" == "" ]; then
+	echo "debugfs does not appear to be mounted!"
+	echo "Please mount debugfs and try again"
+	exit 1
+fi
+
+# Check that the fault injection directory exists
+DEBUGDIR=`echo $DEBUGFS | awk '{print $2}'`/nfsd
+if [ ! -d "$DEBUGDIR" ]; then
+	echo "$DEBUGDIR does not exist"
+	echo "Check that your .config selects CONFIG_NFSD_FAULT_INJECTION"
+	exit 1
+fi
+
+function help()
+{
+	echo "Usage $0 injection_type [count]"
+	echo ""
+	echo "Injection types are:"
+	ls $DEBUGDIR
+	exit 1
+}
+
+if [ $# == 0 ]; then
+	help
+elif [ ! -f $DEBUGDIR/$1 ]; then
+	help
+elif [ $# != 2 ]; then
+	COUNT=0
+else
+	COUNT=$2
+fi
+
+BEFORE=`mktemp`
+AFTER=`mktemp`
+dmesg > $BEFORE
+echo $COUNT > $DEBUGDIR/$1
+dmesg > $AFTER
+# Capture lines that only exist in the $AFTER file
+diff $BEFORE $AFTER | grep ">"
+rm -f $BEFORE $AFTER

From 114a0a08d46cfb0eefb1a882f7866b7f57bfc5ba Mon Sep 17 00:00:00 2001
From: Bryan Schumaker <bjschuma@netapp.com>
Date: Tue, 1 Nov 2011 13:35:23 -0400
Subject: [PATCH 07/31] NFSD: Added fault injection documentation

Signed-off-by: Bryan Schumaker <bjschuma@netapp.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 Documentation/filesystems/nfs/00-INDEX        |  2 +
 .../filesystems/nfs/fault_injection.txt       | 69 +++++++++++++++++++
 2 files changed, 71 insertions(+)
 create mode 100644 Documentation/filesystems/nfs/fault_injection.txt

diff --git a/Documentation/filesystems/nfs/00-INDEX b/Documentation/filesystems/nfs/00-INDEX
index a57e12411d2a..1716874a651e 100644
--- a/Documentation/filesystems/nfs/00-INDEX
+++ b/Documentation/filesystems/nfs/00-INDEX
@@ -2,6 +2,8 @@
 	- this file (nfs-related documentation).
 Exporting
 	- explanation of how to make filesystems exportable.
+fault_injection.txt
+	- information for using fault injection on the server
 knfsd-stats.txt
 	- statistics which the NFS server makes available to user space.
 nfs.txt
diff --git a/Documentation/filesystems/nfs/fault_injection.txt b/Documentation/filesystems/nfs/fault_injection.txt
new file mode 100644
index 000000000000..426d166089a3
--- /dev/null
+++ b/Documentation/filesystems/nfs/fault_injection.txt
@@ -0,0 +1,69 @@
+
+Fault Injection
+===============
+Fault injection is a method for forcing errors that may not normally occur, or
+may be difficult to reproduce.  Forcing these errors in a controlled environment
+can help the developer find and fix bugs before their code is shipped in a
+production system.  Injecting an error on the Linux NFS server will allow us to
+observe how the client reacts and if it manages to recover its state correctly.
+
+NFSD_FAULT_INJECTION must be selected when configuring the kernel to use this
+feature.
+
+
+Using Fault Injection
+=====================
+On the client, mount the fault injection server through NFS v4.0+ and do some
+work over NFS (open files, take locks, ...).
+
+On the server, mount the debugfs filesystem to <debug_dir> and ls
+<debug_dir>/nfsd.  This will show a list of files that will be used for
+injecting faults on the NFS server.  As root, write a number n to the file
+corresponding to the action you want the server to take.  The server will then
+process the first n items it finds.  So if you want to forget 5 locks, echo '5'
+to <debug_dir>/nfsd/forget_locks.  A value of 0 will tell the server to forget
+all corresponding items.  A log message will be created containing the number
+of items forgotten (check dmesg).
+
+Go back to work on the client and check if the client recovered from the error
+correctly.
+
+
+Available Faults
+================
+forget_clients:
+     The NFS server keeps a list of clients that have placed a mount call.  If
+     this list is cleared, the server will have no knowledge of who the client
+     is, forcing the client to reauthenticate with the server.
+
+forget_openowners:
+     The NFS server keeps a list of what files are currently opened and who
+     they were opened by.  Clearing this list will force the client to reopen
+     its files.
+
+forget_locks:
+     The NFS server keeps a list of what files are currently locked in the VFS.
+     Clearing this list will force the client to reclaim its locks (files are
+     unlocked through the VFS as they are cleared from this list).
+
+forget_delegations:
+     A delegation is used to assure the client that a file, or part of a file,
+     has not changed since the delegation was awarded.  Clearing this list will
+     force the client to reaquire its delegation before accessing the file
+     again.
+
+recall_delegations:
+     Delegations can be recalled by the server when another client attempts to
+     access a file.  This test will notify the client that its delegation has
+     been revoked, forcing the client to reaquire the delegation before using
+     the file again.
+
+
+tools/nfs/inject_faults.sh script
+=================================
+This script has been created to ease the fault injection process.  This script
+will detect the mounted debugfs directory and write to the files located there
+based on the arguments passed by the user.  For example, running
+`inject_faults.sh forget_locks 1` as root will instruct the server to forget
+one lock.  Running `inject_faults forget_locks` will instruct the server to
+forgetall locks.

From 72083396074035ffa5cf81b6bb3e55f1d615badf Mon Sep 17 00:00:00 2001
From: Bryan Schumaker <bjschuma@netapp.com>
Date: Tue, 1 Nov 2011 15:24:59 -0400
Subject: [PATCH 08/31] NFSD: Call nfsd4_init_slabs() from init_nfsd()

init_nfsd() was calling free_slabs() during cleanup code, but the call
to init_slabs() was hidden in nfsd4_state_init().  This could be
confusing to people unfamiliar with the code.

Signed-off-by: Bryan Schumaker <bjschuma@netapp.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/nfsd/nfs4state.c | 10 +++-------
 fs/nfsd/nfsctl.c    |  3 ++-
 fs/nfsd/nfsd.h      |  6 ++++--
 3 files changed, 9 insertions(+), 10 deletions(-)

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index a511eff05698..1d6812db5099 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -2301,7 +2301,7 @@ nfsd4_free_slabs(void)
 	nfsd4_free_slab(&deleg_slab);
 }
 
-static int
+int
 nfsd4_init_slabs(void)
 {
 	openowner_slab = kmem_cache_create("nfsd4_openowners",
@@ -4546,14 +4546,11 @@ void nfsd_recall_delegations(u64 num)
 
 /* initialization to perform at module load time: */
 
-int
+void
 nfs4_state_init(void)
 {
-	int i, status;
+	int i;
 
-	status = nfsd4_init_slabs();
-	if (status)
-		return status;
 	for (i = 0; i < CLIENT_HASH_SIZE; i++) {
 		INIT_LIST_HEAD(&conf_id_hashtbl[i]);
 		INIT_LIST_HEAD(&conf_str_hashtbl[i]);
@@ -4577,7 +4574,6 @@ nfs4_state_init(void)
 	INIT_LIST_HEAD(&client_lru);
 	INIT_LIST_HEAD(&del_recall_lru);
 	reclaim_str_hashtbl_size = 0;
-	return 0;
 }
 
 static void
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index b2e8093ebc21..8daa935f329f 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -1129,9 +1129,10 @@ static int __init init_nfsd(void)
 	int retval;
 	printk(KERN_INFO "Installing knfsd (copyright (C) 1996 okir@monad.swb.de).\n");
 
-	retval = nfs4_state_init(); /* nfs4 locking state */
+	retval = nfsd4_init_slabs();
 	if (retval)
 		return retval;
+	nfs4_state_init();
 	retval = nfsd_fault_inject_init(); /* nfsd fault injection controls */
 	if (retval)
 		goto out_free_slabs;
diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h
index 58134a23fdfb..4e2ee29f2cf9 100644
--- a/fs/nfsd/nfsd.h
+++ b/fs/nfsd/nfsd.h
@@ -104,14 +104,16 @@ static inline int nfsd_v4client(struct svc_rqst *rq)
  */
 #ifdef CONFIG_NFSD_V4
 extern unsigned int max_delegations;
-int nfs4_state_init(void);
+void nfs4_state_init(void);
+int nfsd4_init_slabs(void);
 void nfsd4_free_slabs(void);
 int nfs4_state_start(void);
 void nfs4_state_shutdown(void);
 void nfs4_reset_lease(time_t leasetime);
 int nfs4_reset_recoverydir(char *recdir);
 #else
-static inline int nfs4_state_init(void) { return 0; }
+static inline void nfs4_state_init(void) { }
+static inline int nfsd4_init_slabs(void) { return 0; }
 static inline void nfsd4_free_slabs(void) { }
 static inline int nfs4_state_start(void) { return 0; }
 static inline void nfs4_state_shutdown(void) { }

From c7e8472cf8ae877b232eb506284a5b15cf7efb2c Mon Sep 17 00:00:00 2001
From: Bryan Schumaker <bjschuma@netapp.com>
Date: Tue, 1 Nov 2011 14:18:28 -0400
Subject: [PATCH 09/31] NFSD: Remove unnecessary whitespace

The close parenthesis was hard to find with it spaced so far over.

Signed-off-by: Bryan Schumaker <bjschuma@netapp.com>
[bfields@redhat.com: get all these lines under 80 chars while we're here]
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/nfsd/nfsd.h | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h
index 4e2ee29f2cf9..1d1e8589b4ce 100644
--- a/fs/nfsd/nfsd.h
+++ b/fs/nfsd/nfsd.h
@@ -340,15 +340,15 @@ static inline u32 nfsd_suppattrs2(u32 minorversion)
 }
 
 /* These will return ERR_INVAL if specified in GETATTR or READDIR. */
-#define NFSD_WRITEONLY_ATTRS_WORD1							    \
-(FATTR4_WORD1_TIME_ACCESS_SET   | FATTR4_WORD1_TIME_MODIFY_SET)
+#define NFSD_WRITEONLY_ATTRS_WORD1 \
+	(FATTR4_WORD1_TIME_ACCESS_SET   | FATTR4_WORD1_TIME_MODIFY_SET)
 
 /* These are the only attrs allowed in CREATE/OPEN/SETATTR. */
-#define NFSD_WRITEABLE_ATTRS_WORD0                                                          \
-(FATTR4_WORD0_SIZE              | FATTR4_WORD0_ACL                                         )
-#define NFSD_WRITEABLE_ATTRS_WORD1                                                          \
-(FATTR4_WORD1_MODE              | FATTR4_WORD1_OWNER         | FATTR4_WORD1_OWNER_GROUP     \
- | FATTR4_WORD1_TIME_ACCESS_SET | FATTR4_WORD1_TIME_MODIFY_SET)
+#define NFSD_WRITEABLE_ATTRS_WORD0 \
+	(FATTR4_WORD0_SIZE | FATTR4_WORD0_ACL)
+#define NFSD_WRITEABLE_ATTRS_WORD1 \
+	(FATTR4_WORD1_MODE | FATTR4_WORD1_OWNER | FATTR4_WORD1_OWNER_GROUP \
+	| FATTR4_WORD1_TIME_ACCESS_SET | FATTR4_WORD1_TIME_MODIFY_SET)
 #define NFSD_WRITEABLE_ATTRS_WORD2 0
 
 #define NFSD_SUPPATTR_EXCLCREAT_WORD0 \

From 06f1f864d4ae5804e83785308d41f14a08e4b980 Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@redhat.com>
Date: Mon, 7 Nov 2011 16:58:18 -0500
Subject: [PATCH 10/31] nfsd4: hash lockowners to simplify RELEASE_LOCKOWNER

Hash lockowners on just the owner string rather than on (owner, inode).
This makes the owner-string lookup needed for RELEASE_LOCKOWNER simpler
(currently it's doing at a linear search through the entire hash
table!).  That may come at the expense of making (owner, inode) lookups
more expensive if a client reuses the same lockowner across multiple
files.  We might add a separate lookup for that.

Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/nfsd/nfs4state.c | 42 ++++++++++++++----------------------------
 1 file changed, 14 insertions(+), 28 deletions(-)

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 1d6812db5099..eec990022ad3 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -3746,15 +3746,6 @@ last_byte_offset(u64 start, u64 len)
 	return end > start ? end - 1: NFS4_MAX_UINT64;
 }
 
-static inline unsigned int
-lock_ownerstr_hashval(struct inode *inode, u32 cl_id,
-		struct xdr_netobj *ownername)
-{
-	return (file_hashval(inode) + cl_id
-			+ opaque_hashval(ownername->data, ownername->len))
-		& LOCK_HASH_MASK;
-}
-
 static struct list_head	lock_ownerstr_hashtbl[LOCK_HASH_SIZE];
 
 /*
@@ -3824,7 +3815,7 @@ static struct nfs4_lockowner *
 find_lockowner_str(struct inode *inode, clientid_t *clid,
 		struct xdr_netobj *owner)
 {
-	unsigned int hashval = lock_ownerstr_hashval(inode, clid->cl_id, owner);
+	unsigned int hashval = open_ownerstr_hashval(clid->cl_id, owner);
 	struct nfs4_lockowner *lo;
 	struct nfs4_stateowner *op;
 
@@ -3847,7 +3838,7 @@ static void hash_lockowner(struct nfs4_lockowner *lo, unsigned int strhashval, s
  * Called in nfsd4_lock - therefore, OPEN and OPEN_CONFIRM (if needed) has 
  * occurred. 
  *
- * strhashval = lock_ownerstr_hashval 
+ * strhashval = open_ownerstr_hashval
  */
 
 static struct nfs4_lockowner *
@@ -3922,7 +3913,7 @@ __be32 lookup_or_create_lock_state(struct nfsd4_compound_state *cstate, struct n
 				struct nfs4_ol_stateid, st_perstateowner);
 		return nfs_ok;
 	}
-	strhashval = lock_ownerstr_hashval(fi->fi_inode, cl->cl_clientid.cl_id,
+	strhashval = open_ownerstr_hashval(cl->cl_clientid.cl_id,
 			&lock->v.new.owner);
 	lo = alloc_init_lock_stateowner(strhashval, cl, ost, lock);
 	if (lo == NULL)
@@ -4286,7 +4277,7 @@ nfsd4_release_lockowner(struct svc_rqst *rqstp,
 	struct nfs4_ol_stateid *stp;
 	struct xdr_netobj *owner = &rlockowner->rl_owner;
 	struct list_head matches;
-	int i;
+	unsigned int hashval = open_ownerstr_hashval(clid->cl_id, owner);
 	__be32 status;
 
 	dprintk("nfsd4_release_lockowner clientid: (%08x/%08x):\n",
@@ -4301,22 +4292,17 @@ nfsd4_release_lockowner(struct svc_rqst *rqstp,
 	nfs4_lock_state();
 
 	status = nfserr_locks_held;
-	/* XXX: we're doing a linear search through all the lockowners.
-	 * Yipes!  For now we'll just hope clients aren't really using
-	 * release_lockowner much, but eventually we have to fix these
-	 * data structures. */
 	INIT_LIST_HEAD(&matches);
-	for (i = 0; i < LOCK_HASH_SIZE; i++) {
-		list_for_each_entry(sop, &lock_ownerstr_hashtbl[i], so_strhash) {
-			if (!same_owner_str(sop, owner, clid))
-				continue;
-			list_for_each_entry(stp, &sop->so_stateids,
-					st_perstateowner) {
-				lo = lockowner(sop);
-				if (check_for_locks(stp->st_file, lo))
-					goto out;
-				list_add(&lo->lo_list, &matches);
-			}
+
+	list_for_each_entry(sop, &lock_ownerstr_hashtbl[hashval], so_strhash) {
+		if (!same_owner_str(sop, owner, clid))
+			continue;
+		list_for_each_entry(stp, &sop->so_stateids,
+				st_perstateowner) {
+			lo = lockowner(sop);
+			if (check_for_locks(stp->st_file, lo))
+				goto out;
+			list_add(&lo->lo_list, &matches);
 		}
 	}
 	/* Clients probably won't expect us to return with some (but not all)

From 16bfdaafa2c66d8cc81422a97a105a849ca65b3e Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@redhat.com>
Date: Mon, 7 Nov 2011 17:23:30 -0500
Subject: [PATCH 11/31] nfsd4: share open and lock owner hash tables

Now that they're used in the same way, it's a little simpler to put open
and lock owners in the same hash table, and I can't see a reason not to.

Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/nfsd/nfs4state.c | 71 ++++++++++++++++++++-------------------------
 1 file changed, 32 insertions(+), 39 deletions(-)

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index eec990022ad3..dcbe7f37759d 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -133,21 +133,21 @@ unsigned int max_delegations;
  * Open owner state (share locks)
  */
 
-/* hash tables for open owners */
-#define OPEN_OWNER_HASH_BITS              8
-#define OPEN_OWNER_HASH_SIZE             (1 << OPEN_OWNER_HASH_BITS)
-#define OPEN_OWNER_HASH_MASK             (OPEN_OWNER_HASH_SIZE - 1)
+/* hash tables for lock and open owners */
+#define OWNER_HASH_BITS              8
+#define OWNER_HASH_SIZE             (1 << OWNER_HASH_BITS)
+#define OWNER_HASH_MASK             (OWNER_HASH_SIZE - 1)
 
-static unsigned int open_ownerstr_hashval(u32 clientid, struct xdr_netobj *ownername)
+static unsigned int ownerstr_hashval(u32 clientid, struct xdr_netobj *ownername)
 {
 	unsigned int ret;
 
 	ret = opaque_hashval(ownername->data, ownername->len);
 	ret += clientid;
-	return ret & OPEN_OWNER_HASH_MASK;
+	return ret & OWNER_HASH_MASK;
 }
 
-static struct list_head	open_ownerstr_hashtbl[OPEN_OWNER_HASH_SIZE];
+static struct list_head	ownerstr_hashtbl[OWNER_HASH_SIZE];
 
 /* hash table for nfs4_file */
 #define FILE_HASH_BITS                   8
@@ -2373,7 +2373,7 @@ static inline void *alloc_stateowner(struct kmem_cache *slab, struct xdr_netobj
 
 static void hash_openowner(struct nfs4_openowner *oo, struct nfs4_client *clp, unsigned int strhashval)
 {
-	list_add(&oo->oo_owner.so_strhash, &open_ownerstr_hashtbl[strhashval]);
+	list_add(&oo->oo_owner.so_strhash, &ownerstr_hashtbl[strhashval]);
 	list_add(&oo->oo_perclient, &clp->cl_openowners);
 }
 
@@ -2436,7 +2436,9 @@ find_openstateowner_str(unsigned int hashval, struct nfsd4_open *open)
 	struct nfs4_stateowner *so;
 	struct nfs4_openowner *oo;
 
-	list_for_each_entry(so, &open_ownerstr_hashtbl[hashval], so_strhash) {
+	list_for_each_entry(so, &ownerstr_hashtbl[hashval], so_strhash) {
+		if (!so->so_is_open_owner)
+			continue;
 		if (same_owner_str(so, &open->op_owner, &open->op_clientid)) {
 			oo = openowner(so);
 			renew_client(oo->oo_owner.so_client);
@@ -2580,7 +2582,7 @@ nfsd4_process_open1(struct nfsd4_compound_state *cstate,
 	if (open->op_file == NULL)
 		return nfserr_jukebox;
 
-	strhashval = open_ownerstr_hashval(clientid->cl_id, &open->op_owner);
+	strhashval = ownerstr_hashval(clientid->cl_id, &open->op_owner);
 	oo = find_openstateowner_str(strhashval, open);
 	open->op_openowner = oo;
 	if (!oo) {
@@ -3718,13 +3720,7 @@ out:
 }
 
 
-/* 
- * Lock owner state (byte-range locks)
- */
 #define LOFF_OVERFLOW(start, len)      ((u64)(len) > ~(u64)(start))
-#define LOCK_HASH_BITS              8
-#define LOCK_HASH_SIZE             (1 << LOCK_HASH_BITS)
-#define LOCK_HASH_MASK             (LOCK_HASH_SIZE - 1)
 
 static inline u64
 end_offset(u64 start, u64 len)
@@ -3746,8 +3742,6 @@ last_byte_offset(u64 start, u64 len)
 	return end > start ? end - 1: NFS4_MAX_UINT64;
 }
 
-static struct list_head	lock_ownerstr_hashtbl[LOCK_HASH_SIZE];
-
 /*
  * TODO: Linux file offsets are _signed_ 64-bit quantities, which means that
  * we can't properly handle lock requests that go beyond the (2^63 - 1)-th
@@ -3815,11 +3809,13 @@ static struct nfs4_lockowner *
 find_lockowner_str(struct inode *inode, clientid_t *clid,
 		struct xdr_netobj *owner)
 {
-	unsigned int hashval = open_ownerstr_hashval(clid->cl_id, owner);
+	unsigned int hashval = ownerstr_hashval(clid->cl_id, owner);
 	struct nfs4_lockowner *lo;
 	struct nfs4_stateowner *op;
 
-	list_for_each_entry(op, &lock_ownerstr_hashtbl[hashval], so_strhash) {
+	list_for_each_entry(op, &ownerstr_hashtbl[hashval], so_strhash) {
+		if (op->so_is_open_owner)
+			continue;
 		lo = lockowner(op);
 		if (same_lockowner_ino(lo, inode, clid, owner))
 			return lo;
@@ -3829,7 +3825,7 @@ find_lockowner_str(struct inode *inode, clientid_t *clid,
 
 static void hash_lockowner(struct nfs4_lockowner *lo, unsigned int strhashval, struct nfs4_client *clp, struct nfs4_ol_stateid *open_stp)
 {
-	list_add(&lo->lo_owner.so_strhash, &lock_ownerstr_hashtbl[strhashval]);
+	list_add(&lo->lo_owner.so_strhash, &ownerstr_hashtbl[strhashval]);
 	list_add(&lo->lo_perstateid, &open_stp->st_lockowners);
 }
 
@@ -3838,7 +3834,7 @@ static void hash_lockowner(struct nfs4_lockowner *lo, unsigned int strhashval, s
  * Called in nfsd4_lock - therefore, OPEN and OPEN_CONFIRM (if needed) has 
  * occurred. 
  *
- * strhashval = open_ownerstr_hashval
+ * strhashval = ownerstr_hashval
  */
 
 static struct nfs4_lockowner *
@@ -3913,7 +3909,7 @@ __be32 lookup_or_create_lock_state(struct nfsd4_compound_state *cstate, struct n
 				struct nfs4_ol_stateid, st_perstateowner);
 		return nfs_ok;
 	}
-	strhashval = open_ownerstr_hashval(cl->cl_clientid.cl_id,
+	strhashval = ownerstr_hashval(cl->cl_clientid.cl_id,
 			&lock->v.new.owner);
 	lo = alloc_init_lock_stateowner(strhashval, cl, ost, lock);
 	if (lo == NULL)
@@ -4277,7 +4273,7 @@ nfsd4_release_lockowner(struct svc_rqst *rqstp,
 	struct nfs4_ol_stateid *stp;
 	struct xdr_netobj *owner = &rlockowner->rl_owner;
 	struct list_head matches;
-	unsigned int hashval = open_ownerstr_hashval(clid->cl_id, owner);
+	unsigned int hashval = ownerstr_hashval(clid->cl_id, owner);
 	__be32 status;
 
 	dprintk("nfsd4_release_lockowner clientid: (%08x/%08x):\n",
@@ -4294,7 +4290,9 @@ nfsd4_release_lockowner(struct svc_rqst *rqstp,
 	status = nfserr_locks_held;
 	INIT_LIST_HEAD(&matches);
 
-	list_for_each_entry(sop, &lock_ownerstr_hashtbl[hashval], so_strhash) {
+	list_for_each_entry(sop, &ownerstr_hashtbl[hashval], so_strhash) {
+		if (sop->so_is_open_owner)
+			continue;
 		if (!same_owner_str(sop, owner, clid))
 			continue;
 		list_for_each_entry(stp, &sop->so_stateids,
@@ -4444,16 +4442,16 @@ static void release_openowner_sop(struct nfs4_stateowner *sop)
 	release_openowner(openowner(sop));
 }
 
-static int nfsd_release_n_owners(u64 num,
-				struct list_head hashtbl[],
-				unsigned int hashtbl_size,
+static int nfsd_release_n_owners(u64 num, bool is_open_owner
 				void (*release_sop)(struct nfs4_stateowner *))
 {
 	int i, count = 0;
 	struct nfs4_stateowner *sop, *next;
 
-	for (i = 0; i < hashtbl_size; i++) {
-		list_for_each_entry_safe(sop, next, &hashtbl[i], so_strhash) {
+	for (i = 0; i < OWNER_HASH_SIZE; i++) {
+		list_for_each_entry_safe(sop, next, &ownerstr_hashtbl[i], so_strhash) {
+			if (sop->so_is_open_owner != is_open_owner)
+				continue;
 			release_sop(sop);
 			if (++count == num)
 				return count;
@@ -4467,8 +4465,7 @@ void nfsd_forget_locks(u64 num)
 	int count;
 
 	nfs4_lock_state();
-	count = nfsd_release_n_owners(num, lock_ownerstr_hashtbl,
-				     LOCK_HASH_SIZE, release_lockowner_sop);
+	count = nfsd_release_n_owners(num, false, release_lockowner_sop);
 	nfs4_unlock_state();
 
 	printk(KERN_INFO "NFSD: Forgot %d locks", count);
@@ -4479,8 +4476,7 @@ void nfsd_forget_openowners(u64 num)
 	int count;
 
 	nfs4_lock_state();
-	count = nfsd_release_n_owners(num, open_ownerstr_hashtbl,
-				     OPEN_OWNER_HASH_SIZE, release_openowner_sop);
+	count = nfsd_release_n_owners(num, true, release_openowner_sop);
 	nfs4_unlock_state();
 
 	printk(KERN_INFO "NFSD: Forgot %d open owners", count);
@@ -4549,11 +4545,8 @@ nfs4_state_init(void)
 	for (i = 0; i < FILE_HASH_SIZE; i++) {
 		INIT_LIST_HEAD(&file_hashtbl[i]);
 	}
-	for (i = 0; i < OPEN_OWNER_HASH_SIZE; i++) {
-		INIT_LIST_HEAD(&open_ownerstr_hashtbl[i]);
-	}
-	for (i = 0; i < LOCK_HASH_SIZE; i++) {
-		INIT_LIST_HEAD(&lock_ownerstr_hashtbl[i]);
+	for (i = 0; i < OWNER_HASH_SIZE; i++) {
+		INIT_LIST_HEAD(&ownerstr_hashtbl[i]);
 	}
 	memset(&onestateid, ~0, sizeof(stateid_t));
 	INIT_LIST_HEAD(&close_lru);

From 353de31b86850309b205a3d4cc1294052471b14d Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@redhat.com>
Date: Tue, 15 Nov 2011 19:25:03 -0500
Subject: [PATCH 12/31] nfsd4: fix CONFIG_NFSD_FAULT_INJECTION compile error

Reported-by: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/nfsd/nfs4state.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index dcbe7f37759d..7e4edbde7373 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -4442,7 +4442,7 @@ static void release_openowner_sop(struct nfs4_stateowner *sop)
 	release_openowner(openowner(sop));
 }
 
-static int nfsd_release_n_owners(u64 num, bool is_open_owner
+static int nfsd_release_n_owners(u64 num, bool is_open_owner,
 				void (*release_sop)(struct nfs4_stateowner *))
 {
 	int i, count = 0;

From 009673b439cf74d70a486fca0177e274febd81a7 Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@redhat.com>
Date: Mon, 7 Nov 2011 17:40:10 -0500
Subject: [PATCH 13/31] nfsd4: add a separate (lockowner, inode) lookup

Address the possible performance regression mentioned in "nfsd4: hash
lockowners to simplify RELEASE_LOCKOWNER" by providing a separate
(lockowner, inode) hash.

Really, I doubt this matters much, but I think it's likely we'll change
these data structures here and I'd rather that the need for (owner,
inode) lookups be well-documented.

Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/nfsd/nfs4state.c | 29 +++++++++++++++++++++++------
 fs/nfsd/state.h     |  1 +
 2 files changed, 24 insertions(+), 6 deletions(-)

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 7e4edbde7373..a84978c13bb8 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -514,6 +514,7 @@ static void unhash_lockowner(struct nfs4_lockowner *lo)
 
 	list_del(&lo->lo_owner.so_strhash);
 	list_del(&lo->lo_perstateid);
+	list_del(&lo->lo_owner_ino_hash);
 	while (!list_empty(&lo->lo_owner.so_stateids)) {
 		stp = list_first_entry(&lo->lo_owner.so_stateids,
 				struct nfs4_ol_stateid, st_perstateowner);
@@ -3722,6 +3723,10 @@ out:
 
 #define LOFF_OVERFLOW(start, len)      ((u64)(len) > ~(u64)(start))
 
+#define LOCKOWNER_INO_HASH_BITS 8
+#define LOCKOWNER_INO_HASH_SIZE (1 << LOCKOWNER_INO_HASH_BITS)
+#define LOCKOWNER_INO_HASH_MASK (LOCKOWNER_INO_HASH_SIZE - 1)
+
 static inline u64
 end_offset(u64 start, u64 len)
 {
@@ -3742,6 +3747,15 @@ last_byte_offset(u64 start, u64 len)
 	return end > start ? end - 1: NFS4_MAX_UINT64;
 }
 
+static unsigned int lockowner_ino_hashval(struct inode *inode, u32 cl_id, struct xdr_netobj *ownername)
+{
+	return (file_hashval(inode) + cl_id
+			+ opaque_hashval(ownername->data, ownername->len))
+		& LOCKOWNER_INO_HASH_MASK;
+}
+
+static struct list_head lockowner_ino_hashtbl[LOCKOWNER_INO_HASH_SIZE];
+
 /*
  * TODO: Linux file offsets are _signed_ 64-bit quantities, which means that
  * we can't properly handle lock requests that go beyond the (2^63 - 1)-th
@@ -3809,14 +3823,10 @@ static struct nfs4_lockowner *
 find_lockowner_str(struct inode *inode, clientid_t *clid,
 		struct xdr_netobj *owner)
 {
-	unsigned int hashval = ownerstr_hashval(clid->cl_id, owner);
+	unsigned int hashval = lockowner_ino_hashval(inode, clid->cl_id, owner);
 	struct nfs4_lockowner *lo;
-	struct nfs4_stateowner *op;
 
-	list_for_each_entry(op, &ownerstr_hashtbl[hashval], so_strhash) {
-		if (op->so_is_open_owner)
-			continue;
-		lo = lockowner(op);
+	list_for_each_entry(lo, &lockowner_ino_hashtbl[hashval], lo_owner_ino_hash) {
 		if (same_lockowner_ino(lo, inode, clid, owner))
 			return lo;
 	}
@@ -3825,7 +3835,12 @@ find_lockowner_str(struct inode *inode, clientid_t *clid,
 
 static void hash_lockowner(struct nfs4_lockowner *lo, unsigned int strhashval, struct nfs4_client *clp, struct nfs4_ol_stateid *open_stp)
 {
+	struct inode *inode = open_stp->st_file->fi_inode;
+	unsigned int inohash = lockowner_ino_hashval(inode,
+			clp->cl_clientid.cl_id, &lo->lo_owner.so_owner);
+
 	list_add(&lo->lo_owner.so_strhash, &ownerstr_hashtbl[strhashval]);
+	list_add(&lo->lo_owner_ino_hash, &lockowner_ino_hashtbl[inohash]);
 	list_add(&lo->lo_perstateid, &open_stp->st_lockowners);
 }
 
@@ -4548,6 +4563,8 @@ nfs4_state_init(void)
 	for (i = 0; i < OWNER_HASH_SIZE; i++) {
 		INIT_LIST_HEAD(&ownerstr_hashtbl[i]);
 	}
+	for (i = 0; i < LOCKOWNER_INO_HASH_SIZE; i++)
+		INIT_LIST_HEAD(&lockowner_ino_hashtbl[i]);
 	memset(&onestateid, ~0, sizeof(stateid_t));
 	INIT_LIST_HEAD(&close_lru);
 	INIT_LIST_HEAD(&client_lru);
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index a3cf38476a1b..89c2cd84d796 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -366,6 +366,7 @@ struct nfs4_openowner {
 
 struct nfs4_lockowner {
 	struct nfs4_stateowner	lo_owner; /* must be first element */
+	struct list_head	lo_owner_ino_hash; /* hash by owner,file */
 	struct list_head        lo_perstateid; /* for lockowners only */
 	struct list_head	lo_list; /* for temporary uses */
 };

From 67114fe6103183190fb0a24f58e5695a80beb2ec Mon Sep 17 00:00:00 2001
From: Thomas Meyer <thomas@m3y3r.de>
Date: Thu, 17 Nov 2011 23:43:40 +0100
Subject: [PATCH 14/31] nfsd4: Use kmemdup rather than duplicating its
 implementation

The semantic patch that makes this change is available
in scripts/coccinelle/api/memdup.cocci.

Signed-off-by: Thomas Meyer <thomas@m3y3r.de>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/nfsd/nfs4state.c | 3 +--
 fs/nfsd/nfs4xdr.c   | 3 +--
 2 files changed, 2 insertions(+), 4 deletions(-)

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index a84978c13bb8..6ab677913f9c 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -986,12 +986,11 @@ static struct nfs4_client *alloc_client(struct xdr_netobj name)
 	clp = kzalloc(sizeof(struct nfs4_client), GFP_KERNEL);
 	if (clp == NULL)
 		return NULL;
-	clp->cl_name.data = kmalloc(name.len, GFP_KERNEL);
+	clp->cl_name.data = kmemdup(name.data, name.len, GFP_KERNEL);
 	if (clp->cl_name.data == NULL) {
 		kfree(clp);
 		return NULL;
 	}
-	memcpy(clp->cl_name.data, name.data, name.len);
 	clp->cl_name.len = name.len;
 	return clp;
 }
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index b6fa792d6b85..0ec5a1b9700e 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -215,10 +215,9 @@ defer_free(struct nfsd4_compoundargs *argp,
 static char *savemem(struct nfsd4_compoundargs *argp, __be32 *p, int nbytes)
 {
 	if (p == argp->tmp) {
-		p = kmalloc(nbytes, GFP_KERNEL);
+		p = kmemdup(argp->tmp, nbytes, GFP_KERNEL);
 		if (!p)
 			return NULL;
-		memcpy(p, argp->tmp, nbytes);
 	} else {
 		BUG_ON(p != argp->tmpp);
 		argp->tmpp = NULL;

From b2ea70afade7080360ac55c4e64ff7a5fafdb67b Mon Sep 17 00:00:00 2001
From: Sasha Levin <levinsasha928@gmail.com>
Date: Fri, 18 Nov 2011 12:14:49 +0200
Subject: [PATCH 15/31] nfsd: Fix oops when parsing a 0 length export

expkey_parse() oopses when handling a 0 length export. This is easily
triggerable from usermode by writing 0 bytes into
'/proc/[proc id]/net/rpc/nfsd.fh/channel'.

Below is the log:

[ 1402.286893] BUG: unable to handle kernel paging request at ffff880077c49fff
[ 1402.287632] IP: [<ffffffff812b4b99>] expkey_parse+0x28/0x2e1
[ 1402.287632] PGD 2206063 PUD 1fdfd067 PMD 1ffbc067 PTE 8000000077c49160
[ 1402.287632] Oops: 0000 [#1] PREEMPT SMP DEBUG_PAGEALLOC
[ 1402.287632] CPU 1
[ 1402.287632] Pid: 20198, comm: trinity Not tainted 3.2.0-rc2-sasha-00058-gc65cd37 #6
[ 1402.287632] RIP: 0010:[<ffffffff812b4b99>]  [<ffffffff812b4b99>] expkey_parse+0x28/0x2e1
[ 1402.287632] RSP: 0018:ffff880077f0fd68  EFLAGS: 00010292
[ 1402.287632] RAX: ffff880077c49fff RBX: 00000000ffffffea RCX: 0000000001043400
[ 1402.287632] RDX: 0000000000000000 RSI: ffff880077c4a000 RDI: ffffffff82283de0
[ 1402.287632] RBP: ffff880077f0fe18 R08: 0000000000000001 R09: ffff880000000000
[ 1402.287632] R10: 0000000000000000 R11: 0000000000000001 R12: ffff880077c4a000
[ 1402.287632] R13: ffffffff82283de0 R14: 0000000001043400 R15: ffffffff82283de0
[ 1402.287632] FS:  00007f25fec3f700(0000) GS:ffff88007d400000(0000) knlGS:0000000000000000
[ 1402.287632] CS:  0010 DS: 0000 ES: 0000 CR0: 000000008005003b
[ 1402.287632] CR2: ffff880077c49fff CR3: 0000000077e1d000 CR4: 00000000000406e0
[ 1402.287632] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[ 1402.287632] DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400
[ 1402.287632] Process trinity (pid: 20198, threadinfo ffff880077f0e000, task ffff880077db17b0)
[ 1402.287632] Stack:
[ 1402.287632]  ffff880077db17b0 ffff880077c4a000 ffff880077f0fdb8 ffffffff810b411e
[ 1402.287632]  ffff880000000000 ffff880077db17b0 ffff880077c4a000 ffffffff82283de0
[ 1402.287632]  0000000001043400 ffffffff82283de0 ffff880077f0fde8 ffffffff81111f63
[ 1402.287632] Call Trace:
[ 1402.287632]  [<ffffffff810b411e>] ? lock_release+0x1af/0x1bc
[ 1402.287632]  [<ffffffff81111f63>] ? might_fault+0x97/0x9e
[ 1402.287632]  [<ffffffff81111f1a>] ? might_fault+0x4e/0x9e
[ 1402.287632]  [<ffffffff81a8bcf2>] cache_do_downcall+0x3e/0x4f
[ 1402.287632]  [<ffffffff81a8c950>] cache_write.clone.16+0xbb/0x130
[ 1402.287632]  [<ffffffff81a8c9df>] ? cache_write_pipefs+0x1a/0x1a
[ 1402.287632]  [<ffffffff81a8c9f8>] cache_write_procfs+0x19/0x1b
[ 1402.287632]  [<ffffffff8118dc54>] proc_reg_write+0x8e/0xad
[ 1402.287632]  [<ffffffff8113fe81>] vfs_write+0xaa/0xfd
[ 1402.287632]  [<ffffffff8114142d>] ? fget_light+0x35/0x9e
[ 1402.287632]  [<ffffffff8113ff8b>] sys_write+0x48/0x6f
[ 1402.287632]  [<ffffffff81bbdb92>] system_call_fastpath+0x16/0x1b
[ 1402.287632] Code: c0 c9 c3 55 48 63 d2 48 89 e5 48 8d 44 32 ff 41 57 41 56 41 55 41 54 53 bb ea ff ff ff 48 81 ec 88 00 00 00 48 89 b5 58 ff ff ff
[ 1402.287632]  38 0a 0f 85 89 02 00 00 c6 00 00 48 8b 3d 44 4a e5 01 48 85
[ 1402.287632] RIP  [<ffffffff812b4b99>] expkey_parse+0x28/0x2e1
[ 1402.287632]  RSP <ffff880077f0fd68>
[ 1402.287632] CR2: ffff880077c49fff
[ 1402.287632] ---[ end trace 368ef53ff773a5e3 ]---

Cc: "J. Bruce Fields" <bfields@fieldses.org>
Cc: Neil Brown <neilb@suse.de>
Cc: linux-nfs@vger.kernel.org
Cc: stable@kernel.org
Signed-off-by: Sasha Levin <levinsasha928@gmail.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/nfsd/export.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c
index 62f3b9074e84..5f312abf1247 100644
--- a/fs/nfsd/export.c
+++ b/fs/nfsd/export.c
@@ -87,7 +87,7 @@ static int expkey_parse(struct cache_detail *cd, char *mesg, int mlen)
 	struct svc_expkey key;
 	struct svc_expkey *ek = NULL;
 
-	if (mesg[mlen-1] != '\n')
+	if (mlen < 1 || mesg[mlen-1] != '\n')
 		return -EINVAL;
 	mesg[mlen-1] = 0;
 

From 7710ec36b6f516e026f9e29e50e67d2547c2a79b Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@redhat.com>
Date: Fri, 25 Nov 2011 18:44:05 -0500
Subject: [PATCH 16/31] svcrpc: make svc_delete_xprt static

Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 include/linux/sunrpc/svc_xprt.h | 1 -
 net/sunrpc/svc_xprt.c           | 3 ++-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h
index 8620f79658d4..5488e593160a 100644
--- a/include/linux/sunrpc/svc_xprt.h
+++ b/include/linux/sunrpc/svc_xprt.h
@@ -118,7 +118,6 @@ void	svc_xprt_received(struct svc_xprt *);
 void	svc_xprt_put(struct svc_xprt *xprt);
 void	svc_xprt_copy_addrs(struct svc_rqst *rqstp, struct svc_xprt *xprt);
 void	svc_close_xprt(struct svc_xprt *xprt);
-void	svc_delete_xprt(struct svc_xprt *xprt);
 int	svc_port_is_privileged(struct sockaddr *sin);
 int	svc_print_xprts(char *buf, int maxlen);
 struct	svc_xprt *svc_find_xprt(struct svc_serv *serv, const char *xcl_name,
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index 447cd0eb415c..8046c6d1f9c2 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -22,6 +22,7 @@ static struct svc_deferred_req *svc_deferred_dequeue(struct svc_xprt *xprt);
 static int svc_deferred_recv(struct svc_rqst *rqstp);
 static struct cache_deferred_req *svc_defer(struct cache_req *req);
 static void svc_age_temp_xprts(unsigned long closure);
+static void svc_delete_xprt(struct svc_xprt *xprt);
 
 /* apparently the "standard" is that clients close
  * idle connections after 5 minutes, servers after
@@ -878,7 +879,7 @@ static void call_xpt_users(struct svc_xprt *xprt)
 /*
  * Remove a dead transport
  */
-void svc_delete_xprt(struct svc_xprt *xprt)
+static void svc_delete_xprt(struct svc_xprt *xprt)
 {
 	struct svc_serv	*serv = xprt->xpt_server;
 	struct svc_deferred_req *dr;

From 2fefb8a09e7ed251ae8996e0c69066e74c5aa560 Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@redhat.com>
Date: Tue, 29 Nov 2011 11:35:35 -0500
Subject: [PATCH 17/31] svcrpc: destroy server sockets all at once

There's no reason I can see that we need to call sv_shutdown between
closing the two lists of sockets.

Cc: stable@kernel.org
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 include/linux/sunrpc/svcsock.h |  2 +-
 net/sunrpc/svc.c               |  7 +------
 net/sunrpc/svc_xprt.c          | 11 ++++++++++-
 3 files changed, 12 insertions(+), 8 deletions(-)

diff --git a/include/linux/sunrpc/svcsock.h b/include/linux/sunrpc/svcsock.h
index 85c50b40759d..c84e9741cb2a 100644
--- a/include/linux/sunrpc/svcsock.h
+++ b/include/linux/sunrpc/svcsock.h
@@ -34,7 +34,7 @@ struct svc_sock {
 /*
  * Function prototypes.
  */
-void		svc_close_all(struct list_head *);
+void		svc_close_all(struct svc_serv *);
 int		svc_recv(struct svc_rqst *, long);
 int		svc_send(struct svc_rqst *);
 void		svc_drop(struct svc_rqst *);
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index 6e038884ae0c..60babf0a9847 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -528,16 +528,11 @@ svc_destroy(struct svc_serv *serv)
 
 	del_timer_sync(&serv->sv_temptimer);
 
-	svc_close_all(&serv->sv_tempsocks);
+	svc_close_all(serv);
 
 	if (serv->sv_shutdown)
 		serv->sv_shutdown(serv);
 
-	svc_close_all(&serv->sv_permsocks);
-
-	BUG_ON(!list_empty(&serv->sv_permsocks));
-	BUG_ON(!list_empty(&serv->sv_tempsocks));
-
 	cache_clean_deferred(serv);
 
 	if (svc_serv_is_pooled(serv))
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index 8046c6d1f9c2..099ddf99d2a1 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -929,7 +929,7 @@ void svc_close_xprt(struct svc_xprt *xprt)
 }
 EXPORT_SYMBOL_GPL(svc_close_xprt);
 
-void svc_close_all(struct list_head *xprt_list)
+static void svc_close_list(struct list_head *xprt_list)
 {
 	struct svc_xprt *xprt;
 	struct svc_xprt *tmp;
@@ -947,6 +947,15 @@ void svc_close_all(struct list_head *xprt_list)
 	}
 }
 
+void svc_close_all(struct svc_serv *serv)
+{
+	svc_close_list(&serv->sv_tempsocks);
+	svc_close_list(&serv->sv_permsocks);
+	BUG_ON(!list_empty(&serv->sv_permsocks));
+	BUG_ON(!list_empty(&serv->sv_tempsocks));
+
+}
+
 /*
  * Handle defer and revisit of requests
  */

From b4f36f88b3ee7cf26bf0be84e6c7fc15f84dcb71 Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@redhat.com>
Date: Tue, 29 Nov 2011 17:00:26 -0500
Subject: [PATCH 18/31] svcrpc: avoid memory-corruption on pool shutdown

Socket callbacks use svc_xprt_enqueue() to add an xprt to a
pool->sp_sockets list.  In normal operation a server thread will later
come along and take the xprt off that list.  On shutdown, after all the
threads have exited, we instead manually walk the sv_tempsocks and
sv_permsocks lists to find all the xprt's and delete them.

So the sp_sockets lists don't really matter any more.  As a result,
we've mostly just ignored them and hoped they would go away.

Which has gotten us into trouble; witness for example ebc63e531cc6
"svcrpc: fix list-corrupting race on nfsd shutdown", the result of Ben
Greear noticing that a still-running svc_xprt_enqueue() could re-add an
xprt to an sp_sockets list just before it was deleted.  The fix was to
remove it from the list at the end of svc_delete_xprt().  But that only
made corruption less likely--I can see nothing that prevents a
svc_xprt_enqueue() from adding another xprt to the list at the same
moment that we're removing this xprt from the list.  In fact, despite
the earlier xpo_detach(), I don't even see what guarantees that
svc_xprt_enqueue() couldn't still be running on this xprt.

So, instead, note that svc_xprt_enqueue() essentially does:
	lock sp_lock
		if XPT_BUSY unset
			add to sp_sockets
	unlock sp_lock

So, if we do:

	set XPT_BUSY on every xprt.
	Empty every sp_sockets list, under the sp_socks locks.

Then we're left knowing that the sp_sockets lists are all empty and will
stay that way, since any svc_xprt_enqueue() will check XPT_BUSY under
the sp_lock and see it set.

And *then* we can continue deleting the xprt's.

(Thanks to Jeff Layton for being correctly suspicious of this code....)

Cc: Ben Greear <greearb@candelatech.com>
Cc: Jeff Layton <jlayton@redhat.com>
Cc: stable@kernel.org
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 net/sunrpc/svc.c      | 10 ++++++++-
 net/sunrpc/svc_xprt.c | 48 ++++++++++++++++++++++++++-----------------
 2 files changed, 38 insertions(+), 20 deletions(-)

diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index 60babf0a9847..1a6c16ed7fa6 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -527,7 +527,15 @@ svc_destroy(struct svc_serv *serv)
 		printk("svc_destroy: no threads for serv=%p!\n", serv);
 
 	del_timer_sync(&serv->sv_temptimer);
-
+	/*
+	 * The set of xprts (contained in the sv_tempsocks and
+	 * sv_permsocks lists) is now constant, since it is modified
+	 * only by accepting new sockets (done by service threads in
+	 * svc_recv) or aging old ones (done by sv_temptimer), or
+	 * configuration changes (excluded by whatever locking the
+	 * caller is using--nfsd_mutex in the case of nfsd).  So it's
+	 * safe to traverse those lists and shut everything down:
+	 */
 	svc_close_all(serv);
 
 	if (serv->sv_shutdown)
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index 099ddf99d2a1..0d80c064e634 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -894,14 +894,7 @@ static void svc_delete_xprt(struct svc_xprt *xprt)
 	spin_lock_bh(&serv->sv_lock);
 	if (!test_and_set_bit(XPT_DETACHED, &xprt->xpt_flags))
 		list_del_init(&xprt->xpt_list);
-	/*
-	 * The only time we're called while xpt_ready is still on a list
-	 * is while the list itself is about to be destroyed (in
-	 * svc_destroy).  BUT svc_xprt_enqueue could still be attempting
-	 * to add new entries to the sp_sockets list, so we can't leave
-	 * a freed xprt on it.
-	 */
-	list_del_init(&xprt->xpt_ready);
+	BUG_ON(!list_empty(&xprt->xpt_ready));
 	if (test_bit(XPT_TEMP, &xprt->xpt_flags))
 		serv->sv_tmpcnt--;
 	spin_unlock_bh(&serv->sv_lock);
@@ -932,28 +925,45 @@ EXPORT_SYMBOL_GPL(svc_close_xprt);
 static void svc_close_list(struct list_head *xprt_list)
 {
 	struct svc_xprt *xprt;
-	struct svc_xprt *tmp;
 
-	/*
-	 * The server is shutting down, and no more threads are running.
-	 * svc_xprt_enqueue() might still be running, but at worst it
-	 * will re-add the xprt to sp_sockets, which will soon get
-	 * freed.  So we don't bother with any more locking, and don't
-	 * leave the close to the (nonexistent) server threads:
-	 */
-	list_for_each_entry_safe(xprt, tmp, xprt_list, xpt_list) {
+	list_for_each_entry(xprt, xprt_list, xpt_list) {
 		set_bit(XPT_CLOSE, &xprt->xpt_flags);
-		svc_delete_xprt(xprt);
+		set_bit(XPT_BUSY, &xprt->xpt_flags);
 	}
 }
 
 void svc_close_all(struct svc_serv *serv)
 {
+	struct svc_pool *pool;
+	struct svc_xprt *xprt;
+	struct svc_xprt *tmp;
+	int i;
+
 	svc_close_list(&serv->sv_tempsocks);
 	svc_close_list(&serv->sv_permsocks);
+
+	for (i = 0; i < serv->sv_nrpools; i++) {
+		pool = &serv->sv_pools[i];
+
+		spin_lock_bh(&pool->sp_lock);
+		while (!list_empty(&pool->sp_sockets)) {
+			xprt = list_first_entry(&pool->sp_sockets, struct svc_xprt, xpt_ready);
+			list_del_init(&xprt->xpt_ready);
+		}
+		spin_unlock_bh(&pool->sp_lock);
+	}
+	/*
+	 * At this point the sp_sockets lists will stay empty, since
+	 * svc_enqueue will not add new entries without taking the
+	 * sp_lock and checking XPT_BUSY.
+	 */
+	list_for_each_entry_safe(xprt, tmp, &serv->sv_tempsocks, xpt_list)
+		svc_delete_xprt(xprt);
+	list_for_each_entry_safe(xprt, tmp, &serv->sv_permsocks, xpt_list)
+		svc_delete_xprt(xprt);
+
 	BUG_ON(!list_empty(&serv->sv_permsocks));
 	BUG_ON(!list_empty(&serv->sv_tempsocks));
-
 }
 
 /*

From 0cf99b91c669510b785b459c211772091a94efd5 Mon Sep 17 00:00:00 2001
From: Mi Jinlong <mijinlong@cn.fujitsu.com>
Date: Wed, 23 Nov 2011 10:48:40 +0800
Subject: [PATCH 19/31] nfsd41: allow non-reclaim open-by-fh's in 4.1

With NFSv4.0 it was safe to assume that open-by-filehandles were always
reclaims.

With NFSv4.1 there are non-reclaim open-by-filehandle operations, so we
should ensure we're only insisting on reclaims in the
OPEN_CLAIM_PREVIOUS case.

Signed-off-by: Mi Jinlong <mijinlong@cn.fujitsu.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/nfsd/nfs4proc.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index fa383361bc61..9415bc415ce8 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -266,10 +266,6 @@ do_open_fhandle(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_
 {
 	__be32 status;
 
-	/* Only reclaims from previously confirmed clients are valid */
-	if ((status = nfs4_check_open_reclaim(&open->op_clientid)))
-		return status;
-
 	/* We don't know the target directory, and therefore can not
 	* set the change info
 	*/
@@ -373,6 +369,9 @@ nfsd4_open(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 			break;
 		case NFS4_OPEN_CLAIM_PREVIOUS:
 			open->op_openowner->oo_flags |= NFS4_OO_CONFIRMED;
+			status = nfs4_check_open_reclaim(&open->op_clientid);
+			if (status)
+				goto out;
 		case NFS4_OPEN_CLAIM_FH:
 		case NFS4_OPEN_CLAIM_DELEG_CUR_FH:
 			status = do_open_fhandle(rqstp, &cstate->current_fh,

From 94cf3179ccfc69d727dd884fd0831d82ada6bb06 Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@redhat.com>
Date: Thu, 1 Dec 2011 17:51:21 -0500
Subject: [PATCH 20/31] svcrpc: update outdated BKL comment

Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 net/sunrpc/svc.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index 1a6c16ed7fa6..e9632bb66535 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -686,8 +686,8 @@ found_pool:
  * Create or destroy enough new threads to make the number
  * of threads the given number.  If `pool' is non-NULL, applies
  * only to threads in that pool, otherwise round-robins between
- * all pools.  Must be called with a svc_get() reference and
- * the BKL or another lock to protect access to svc_serv fields.
+ * all pools.  Caller must ensure that mutual exclusion between this and
+ * server startup or shutdown.
  *
  * Destroying threads relies on the service threads filling in
  * rqstp->rq_task, which only the nfs ones do.  Assumes the serv

From bd4620ddf6d6eb3d9e7d073ad601fa4299d46ba9 Mon Sep 17 00:00:00 2001
From: Stanislav Kinsbursky <skinsbursky@parallels.com>
Date: Tue, 6 Dec 2011 14:19:10 +0300
Subject: [PATCH 21/31] SUNRPC: create svc_xprt in proper network namespace

This patch makes svc_xprt inherit network namespace link from its socket.

Signed-off-by: Stanislav Kinsbursky <skinsbursky@parallels.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 include/linux/sunrpc/svc_xprt.h          | 2 +-
 net/sunrpc/svc_xprt.c                    | 6 +++---
 net/sunrpc/svcsock.c                     | 8 +++++---
 net/sunrpc/xprtrdma/svc_rdma_transport.c | 2 +-
 4 files changed, 10 insertions(+), 8 deletions(-)

diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h
index 5488e593160a..dfa900948af7 100644
--- a/include/linux/sunrpc/svc_xprt.h
+++ b/include/linux/sunrpc/svc_xprt.h
@@ -109,7 +109,7 @@ static inline int register_xpt_user(struct svc_xprt *xpt, struct svc_xpt_user *u
 
 int	svc_reg_xprt_class(struct svc_xprt_class *);
 void	svc_unreg_xprt_class(struct svc_xprt_class *);
-void	svc_xprt_init(struct svc_xprt_class *, struct svc_xprt *,
+void	svc_xprt_init(struct net *, struct svc_xprt_class *, struct svc_xprt *,
 		      struct svc_serv *);
 int	svc_create_xprt(struct svc_serv *, const char *, struct net *,
 			const int, const unsigned short, int);
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index 0d80c064e634..0633c7e2fe63 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -148,8 +148,8 @@ EXPORT_SYMBOL_GPL(svc_xprt_put);
  * Called by transport drivers to initialize the transport independent
  * portion of the transport instance.
  */
-void svc_xprt_init(struct svc_xprt_class *xcl, struct svc_xprt *xprt,
-		   struct svc_serv *serv)
+void svc_xprt_init(struct net *net, struct svc_xprt_class *xcl,
+		   struct svc_xprt *xprt, struct svc_serv *serv)
 {
 	memset(xprt, 0, sizeof(*xprt));
 	xprt->xpt_class = xcl;
@@ -164,7 +164,7 @@ void svc_xprt_init(struct svc_xprt_class *xcl, struct svc_xprt *xprt,
 	spin_lock_init(&xprt->xpt_lock);
 	set_bit(XPT_BUSY, &xprt->xpt_flags);
 	rpc_init_wait_queue(&xprt->xpt_bc_pending, "xpt_bc_pending");
-	xprt->xpt_net = get_net(&init_net);
+	xprt->xpt_net = get_net(net);
 }
 EXPORT_SYMBOL_GPL(svc_xprt_init);
 
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index 71bed1c1c77a..277909e651ed 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -739,7 +739,8 @@ static void svc_udp_init(struct svc_sock *svsk, struct svc_serv *serv)
 {
 	int err, level, optname, one = 1;
 
-	svc_xprt_init(&svc_udp_class, &svsk->sk_xprt, serv);
+	svc_xprt_init(sock_net(svsk->sk_sock->sk), &svc_udp_class,
+		      &svsk->sk_xprt, serv);
 	clear_bit(XPT_CACHE_AUTH, &svsk->sk_xprt.xpt_flags);
 	svsk->sk_sk->sk_data_ready = svc_udp_data_ready;
 	svsk->sk_sk->sk_write_space = svc_write_space;
@@ -1343,7 +1344,8 @@ static void svc_tcp_init(struct svc_sock *svsk, struct svc_serv *serv)
 {
 	struct sock	*sk = svsk->sk_sk;
 
-	svc_xprt_init(&svc_tcp_class, &svsk->sk_xprt, serv);
+	svc_xprt_init(sock_net(svsk->sk_sock->sk), &svc_tcp_class,
+		      &svsk->sk_xprt, serv);
 	set_bit(XPT_CACHE_AUTH, &svsk->sk_xprt.xpt_flags);
 	if (sk->sk_state == TCP_LISTEN) {
 		dprintk("setting up TCP socket for listening\n");
@@ -1659,7 +1661,7 @@ static struct svc_xprt *svc_bc_create_socket(struct svc_serv *serv,
 		return ERR_PTR(-ENOMEM);
 
 	xprt = &svsk->sk_xprt;
-	svc_xprt_init(&svc_tcp_bc_class, xprt, serv);
+	svc_xprt_init(net, &svc_tcp_bc_class, xprt, serv);
 
 	serv->sv_bc_xprt = xprt;
 
diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index ba1296d88de0..894cb42db91d 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -453,7 +453,7 @@ static struct svcxprt_rdma *rdma_create_xprt(struct svc_serv *serv,
 
 	if (!cma_xprt)
 		return NULL;
-	svc_xprt_init(&svc_rdma_class, &cma_xprt->sc_xprt, serv);
+	svc_xprt_init(&init_net, &svc_rdma_class, &cma_xprt->sc_xprt, serv);
 	INIT_LIST_HEAD(&cma_xprt->sc_accept_q);
 	INIT_LIST_HEAD(&cma_xprt->sc_dto_q);
 	INIT_LIST_HEAD(&cma_xprt->sc_rq_dto_q);

From f5c8593b94190aabdcf207a544f082c7816c4fe6 Mon Sep 17 00:00:00 2001
From: Stanislav Kinsbursky <skinsbursky@parallels.com>
Date: Wed, 7 Dec 2011 12:57:56 +0300
Subject: [PATCH 22/31] NFSd: use network-namespace-aware cache registering
 routines

v2: cache_register_net() and cache_unregister_net() GPL exports added

This is a cleanup patch. Hope, some day generic cache_register() and
cache_unregister() will be removed.

Signed-off-by: Stanislav Kinsbursky <skinsbursky@parallels.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/nfsd/export.c    | 10 +++++-----
 fs/nfsd/nfs4idmap.c | 11 ++++++-----
 net/sunrpc/cache.c  |  2 ++
 3 files changed, 13 insertions(+), 10 deletions(-)

diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c
index 5f312abf1247..cf8a6bd062fa 100644
--- a/fs/nfsd/export.c
+++ b/fs/nfsd/export.c
@@ -1226,12 +1226,12 @@ nfsd_export_init(void)
 	int rv;
 	dprintk("nfsd: initializing export module.\n");
 
-	rv = cache_register(&svc_export_cache);
+	rv = cache_register_net(&svc_export_cache, &init_net);
 	if (rv)
 		return rv;
-	rv = cache_register(&svc_expkey_cache);
+	rv = cache_register_net(&svc_expkey_cache, &init_net);
 	if (rv)
-		cache_unregister(&svc_export_cache);
+		cache_unregister_net(&svc_export_cache, &init_net);
 	return rv;
 
 }
@@ -1255,8 +1255,8 @@ nfsd_export_shutdown(void)
 
 	dprintk("nfsd: shutting down export module.\n");
 
-	cache_unregister(&svc_expkey_cache);
-	cache_unregister(&svc_export_cache);
+	cache_unregister_net(&svc_expkey_cache, &init_net);
+	cache_unregister_net(&svc_export_cache, &init_net);
 	svcauth_unix_purge();
 
 	dprintk("nfsd: export shutdown complete.\n");
diff --git a/fs/nfsd/nfs4idmap.c b/fs/nfsd/nfs4idmap.c
index 55780a22fdbd..94096273cd6c 100644
--- a/fs/nfsd/nfs4idmap.c
+++ b/fs/nfsd/nfs4idmap.c
@@ -36,6 +36,7 @@
 #include <linux/seq_file.h>
 #include <linux/sched.h>
 #include <linux/slab.h>
+#include <net/net_namespace.h>
 #include "idmap.h"
 #include "nfsd.h"
 
@@ -466,20 +467,20 @@ nfsd_idmap_init(void)
 {
 	int rv;
 
-	rv = cache_register(&idtoname_cache);
+	rv = cache_register_net(&idtoname_cache, &init_net);
 	if (rv)
 		return rv;
-	rv = cache_register(&nametoid_cache);
+	rv = cache_register_net(&nametoid_cache, &init_net);
 	if (rv)
-		cache_unregister(&idtoname_cache);
+		cache_unregister_net(&idtoname_cache, &init_net);
 	return rv;
 }
 
 void
 nfsd_idmap_shutdown(void)
 {
-	cache_unregister(&idtoname_cache);
-	cache_unregister(&nametoid_cache);
+	cache_unregister_net(&idtoname_cache, &init_net);
+	cache_unregister_net(&nametoid_cache, &init_net);
 }
 
 static int
diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c
index 72ad836e4fe0..b8daa577734f 100644
--- a/net/sunrpc/cache.c
+++ b/net/sunrpc/cache.c
@@ -1641,6 +1641,7 @@ int cache_register_net(struct cache_detail *cd, struct net *net)
 		sunrpc_destroy_cache_detail(cd);
 	return ret;
 }
+EXPORT_SYMBOL_GPL(cache_register_net);
 
 int cache_register(struct cache_detail *cd)
 {
@@ -1653,6 +1654,7 @@ void cache_unregister_net(struct cache_detail *cd, struct net *net)
 	remove_cache_proc_entries(cd, net);
 	sunrpc_destroy_cache_detail(cd);
 }
+EXPORT_SYMBOL_GPL(cache_unregister_net);
 
 void cache_unregister(struct cache_detail *cd)
 {

From f32f3c2d3f09a586349ca9180885dc8741290fd9 Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@redhat.com>
Date: Mon, 12 Dec 2011 15:00:35 -0500
Subject: [PATCH 23/31] nfsd4: initialize special stateid's at compile time

Stateid's with "other" ("opaque") field all zeros or all ones are
reserved.  We define all_ones separately on the off chance there will be
more such some day, though currently all the other special stateid's
have zero other field.

Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/nfsd/nfs4state.c | 17 ++++++++++++-----
 1 file changed, 12 insertions(+), 5 deletions(-)

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 6ab677913f9c..213da7b7e7d3 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -49,12 +49,20 @@
 time_t nfsd4_lease = 90;     /* default lease time */
 time_t nfsd4_grace = 90;
 static time_t boot_time;
-static stateid_t zerostateid;             /* bits all 0 */
-static stateid_t onestateid;              /* bits all 1 */
+
+#define all_ones {{~0,~0},~0}
+static const stateid_t one_stateid = {
+	.si_generation = ~0,
+	.si_opaque = all_ones,
+};
+static const stateid_t zero_stateid = {
+	/* all fields zero */
+};
+
 static u64 current_sessionid = 1;
 
-#define ZERO_STATEID(stateid) (!memcmp((stateid), &zerostateid, sizeof(stateid_t)))
-#define ONE_STATEID(stateid)  (!memcmp((stateid), &onestateid, sizeof(stateid_t)))
+#define ZERO_STATEID(stateid) (!memcmp((stateid), &zero_stateid, sizeof(stateid_t)))
+#define ONE_STATEID(stateid)  (!memcmp((stateid), &one_stateid, sizeof(stateid_t)))
 
 /* forward declarations */
 static int check_for_locks(struct nfs4_file *filp, struct nfs4_lockowner *lowner);
@@ -4564,7 +4572,6 @@ nfs4_state_init(void)
 	}
 	for (i = 0; i < LOCKOWNER_INO_HASH_SIZE; i++)
 		INIT_LIST_HEAD(&lockowner_ino_hashtbl[i]);
-	memset(&onestateid, ~0, sizeof(stateid_t));
 	INIT_LIST_HEAD(&close_lru);
 	INIT_LIST_HEAD(&client_lru);
 	INIT_LIST_HEAD(&del_recall_lru);

From 39c4cc0fcc38cff29d5b9884372b17c894c9c080 Mon Sep 17 00:00:00 2001
From: Bryan Schumaker <bjschuma@netapp.com>
Date: Tue, 13 Dec 2011 16:35:58 -0500
Subject: [PATCH 24/31] NFSD: Only reinitilize the recall_lru list under the
 recall lock

unhash_delegation() will grab the recall lock before calling
list_del_init() in each of these places.  This patch removes the
redundant calls.

Signed-off-by: Bryan Schumaker <bjschuma@netapp.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/nfsd/nfs4state.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 213da7b7e7d3..19ca9b54200b 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -1066,7 +1066,6 @@ expire_client(struct nfs4_client *clp)
 	spin_unlock(&recall_lock);
 	while (!list_empty(&reaplist)) {
 		dp = list_entry(reaplist.next, struct nfs4_delegation, dl_recall_lru);
-		list_del_init(&dp->dl_recall_lru);
 		unhash_delegation(dp);
 	}
 	while (!list_empty(&clp->cl_openowners)) {
@@ -3133,7 +3132,6 @@ nfs4_laundromat(void)
 	spin_unlock(&recall_lock);
 	list_for_each_safe(pos, next, &reaplist) {
 		dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru);
-		list_del_init(&dp->dl_recall_lru);
 		unhash_delegation(dp);
 	}
 	test_val = nfsd4_lease;
@@ -4674,7 +4672,6 @@ __nfs4_state_shutdown(void)
 	spin_unlock(&recall_lock);
 	list_for_each_safe(pos, next, &reaplist) {
 		dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru);
-		list_del_init(&dp->dl_recall_lru);
 		unhash_delegation(dp);
 	}
 

From 2d3475c0ad625f7a43844a6cd0130d136416e604 Mon Sep 17 00:00:00 2001
From: Bryan Schumaker <bjschuma@netapp.com>
Date: Wed, 14 Dec 2011 14:39:56 -0500
Subject: [PATCH 25/31] NFSD: forget_delegations should use
 list_for_each_entry_safe

Otherwise the for loop could try to use a file recently removed from the
file_hashtbl list and oops.

Signed-off-by: Bryan Schumaker <bjschuma@netapp.com>
Tested-by: Casey Bodley <cbodley@citi.umich.edu>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/nfsd/nfs4state.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 19ca9b54200b..7355fe405d69 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -4505,18 +4505,19 @@ void nfsd_forget_openowners(u64 num)
 int nfsd_process_n_delegations(u64 num, void (*deleg_func)(struct nfs4_delegation *))
 {
 	int i, count = 0;
-	struct nfs4_file *fp;
-	struct nfs4_delegation *dp, *next;
+	struct nfs4_file *fp, *fnext;
+	struct nfs4_delegation *dp, *dnext;
 
 	for (i = 0; i < FILE_HASH_SIZE; i++) {
-		list_for_each_entry(fp, &file_hashtbl[i], fi_hash) {
-			list_for_each_entry_safe(dp, next, &fp->fi_delegations, dl_perfile) {
+		list_for_each_entry_safe(fp, fnext, &file_hashtbl[i], fi_hash) {
+			list_for_each_entry_safe(dp, dnext, &fp->fi_delegations, dl_perfile) {
 				deleg_func(dp);
 				if (++count == num)
 					return count;
 			}
 		}
 	}
+
 	return count;
 }
 

From aec39680b02ed55fcbb2bc87ad96eba16a651546 Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@redhat.com>
Date: Mon, 2 Jan 2012 17:30:05 -0500
Subject: [PATCH 26/31] nfsd4: fix spurious 4.1 post-reboot failures

In the NFSv4.1 case, this could cause a spurious "NFSD: failed to write
recovery record (err -17); please check that /var/lib/nfs/v4recovery
exists and is writable.

Signed-off-by: J. Bruce Fields <bfields@redhat.com>
Reported-by: Steve Dickson <SteveD@redhat.com>
---
 fs/nfsd/nfs4recover.c | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c
index ed083b9a731b..28712e20bb11 100644
--- a/fs/nfsd/nfs4recover.c
+++ b/fs/nfsd/nfs4recover.c
@@ -144,8 +144,15 @@ nfsd4_create_clid_dir(struct nfs4_client *clp)
 		status = PTR_ERR(dentry);
 		goto out_unlock;
 	}
-	status = -EEXIST;
 	if (dentry->d_inode)
+		/*
+		 * In the 4.1 case, where we're called from
+		 * reclaim_complete(), records from the previous reboot
+		 * may still be left, so this is OK.
+		 *
+		 * In the 4.0 case, we should never get here; but we may
+		 * as well be forgiving and just succeed silently.
+		 */
 		goto out_put;
 	status = mnt_want_write(rec_file->f_path.mnt);
 	if (status)

From b8548894bde94ccee836e210274ff401225e9733 Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@redhat.com>
Date: Mon, 2 Jan 2012 17:49:12 -0500
Subject: [PATCH 27/31] nfsd4: be forgiving in the absence of the recovery
 directory

If the recovery directory doesn't exist, then behavior after a reboot
will be suboptimal.  But it's unnecessarily harsh to then prevent the
nfsv4 server from working at all.  Instead just print a warning
(already done in nfsd4_init_recdir()) and soldier on.

Tested-by: Lior <lior@tonian.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/nfsd/nfs4recover.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c
index 28712e20bb11..eb01fffdc2f1 100644
--- a/fs/nfsd/nfs4recover.c
+++ b/fs/nfsd/nfs4recover.c
@@ -127,10 +127,11 @@ nfsd4_create_clid_dir(struct nfs4_client *clp)
 
 	dprintk("NFSD: nfsd4_create_clid_dir for \"%s\"\n", dname);
 
-	if (!rec_file || clp->cl_firststate)
+	if (clp->cl_firststate)
 		return 0;
-
 	clp->cl_firststate = 1;
+	if (!rec_file)
+		return -ENOENT;
 	status = nfs4_save_creds(&original_cred);
 	if (status < 0)
 		return status;

From 61c8504c428edcebf23b97775a129c5b393a302b Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@redhat.com>
Date: Thu, 22 Dec 2011 18:22:49 -0700
Subject: [PATCH 28/31] svcrpc: fix double-free on shutdown of nfsd after
 changing pool mode

The pool_to and to_pool fields of the global svc_pool_map are freed on
shutdown, but are initialized in nfsd startup only in the
SVC_POOL_PERCPU and SVC_POOL_PERNODE cases.

They *are* initialized to zero on kernel startup.  So as long as you use
only SVC_POOL_GLOBAL (the default), this will never be a problem.

You're also OK if you only ever use SVC_POOL_PERCPU or SVC_POOL_PERNODE.

However, the following sequence events leads to a double-free:

	1. set SVC_POOL_PERCPU or SVC_POOL_PERNODE
	2. start nfsd: both fields are initialized.
	3. shutdown nfsd: both fields are freed.
	4. set SVC_POOL_GLOBAL
	5. start nfsd: the fields are left untouched.
	6. shutdown nfsd: now we try to free them again.

Step 4 is actually unnecessary, since (for some bizarre reason), nfsd
automatically resets the pool mode to SVC_POOL_GLOBAL on shutdown.

Cc: stable@kernel.org
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 net/sunrpc/svc.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index e9632bb66535..1dd5fd014559 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -167,6 +167,7 @@ svc_pool_map_alloc_arrays(struct svc_pool_map *m, unsigned int maxpools)
 
 fail_free:
 	kfree(m->to_pool);
+	m->to_pool = NULL;
 fail:
 	return -ENOMEM;
 }
@@ -287,7 +288,9 @@ svc_pool_map_put(void)
 	if (!--m->count) {
 		m->mode = SVC_POOL_DEFAULT;
 		kfree(m->to_pool);
+		m->to_pool = NULL;
 		kfree(m->pool_to);
+		m->pool_to = NULL;
 		m->npools = 0;
 	}
 

From 9689dcce0b456793c46bdeea7a79adfab1bc9c5d Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@redhat.com>
Date: Fri, 23 Dec 2011 13:52:19 -0700
Subject: [PATCH 29/31] svcrpc: don't revert to SVC_POOL_DEFAULT on nfsd
 shutdown

This was unexpected behavior (at least for me)--why would you want
configuration settings automatically lost on nfsd restart?

In practice this won't affect distributions, which likely set everything
on every startup.  But I'd expect the behavior to be less confusing to
someone manually restarting nfsd for testing.

Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 net/sunrpc/svc.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index 1dd5fd014559..97017989fa1d 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -286,7 +286,6 @@ svc_pool_map_put(void)
 	mutex_lock(&svc_pool_map_mutex);
 
 	if (!--m->count) {
-		m->mode = SVC_POOL_DEFAULT;
 		kfree(m->to_pool);
 		m->to_pool = NULL;
 		kfree(m->pool_to);

From 9b4146e85536ebd6b989f43906986e34486efdba Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Wed, 4 Jan 2012 16:26:43 -0500
Subject: [PATCH 30/31] NFSD: Change name of extended attribute containing
 junction

As of fedfs-utils-0.8.0, user space stores all NFS junction
information in a single extended attribute: "trusted.junction.nfs".

Both FedFS and NFS basic junctions are stored in this one attribute,
and the intention is that all future forms of NFS junction metadata
will be stored in this attribute.  Other protocols may use a different
extended attribute.

Thus NFSD needs to look only for that one extended attribute.  The
"trusted.junction.type" xattr is deprecated.  fedfs-utils-0.8.0 will
continue to attach a "trusted.junction.type" xattr to junctions, but
future fedfs-utils releases may no longer do that.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/nfsd/vfs.c | 17 ++++++++++++++---
 1 file changed, 14 insertions(+), 3 deletions(-)

diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 7a2e442623c8..896891306215 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -594,8 +594,19 @@ nfsd4_get_nfs4_acl(struct svc_rqst *rqstp, struct dentry *dentry, struct nfs4_ac
 	return error;
 }
 
-#define NFSD_XATTR_JUNCTION_PREFIX XATTR_TRUSTED_PREFIX "junction."
-#define NFSD_XATTR_JUNCTION_TYPE NFSD_XATTR_JUNCTION_PREFIX "type"
+/*
+ * NFS junction information is stored in an extended attribute.
+ */
+#define NFSD_JUNCTION_XATTR_NAME	XATTR_TRUSTED_PREFIX "junction.nfs"
+
+/**
+ * nfsd4_is_junction - Test if an object could be an NFS junction
+ *
+ * @dentry: object to test
+ *
+ * Returns 1 if "dentry" appears to contain NFS junction information.
+ * Otherwise 0 is returned.
+ */
 int nfsd4_is_junction(struct dentry *dentry)
 {
 	struct inode *inode = dentry->d_inode;
@@ -606,7 +617,7 @@ int nfsd4_is_junction(struct dentry *dentry)
 		return 0;
 	if (!(inode->i_mode & S_ISVTX))
 		return 0;
-	if (vfs_getxattr(dentry, NFSD_XATTR_JUNCTION_TYPE, NULL, 0) <= 0)
+	if (vfs_getxattr(dentry, NFSD_JUNCTION_XATTR_NAME, NULL, 0) <= 0)
 		return 0;
 	return 1;
 }

From 7a6ef8c72314f254c107c6a9ed7cb201961ee05a Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@redhat.com>
Date: Thu, 5 Jan 2012 15:38:41 -0500
Subject: [PATCH 31/31] nfsd4: nfsd4_create_clid_dir return value is unused

Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/nfsd/nfs4recover.c | 10 ++++------
 fs/nfsd/state.h       |  2 +-
 2 files changed, 5 insertions(+), 7 deletions(-)

diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c
index eb01fffdc2f1..a52f267f122a 100644
--- a/fs/nfsd/nfs4recover.c
+++ b/fs/nfsd/nfs4recover.c
@@ -117,8 +117,7 @@ out_no_tfm:
 	return status;
 }
 
-int
-nfsd4_create_clid_dir(struct nfs4_client *clp)
+void nfsd4_create_clid_dir(struct nfs4_client *clp)
 {
 	const struct cred *original_cred;
 	char *dname = clp->cl_recdir;
@@ -128,13 +127,13 @@ nfsd4_create_clid_dir(struct nfs4_client *clp)
 	dprintk("NFSD: nfsd4_create_clid_dir for \"%s\"\n", dname);
 
 	if (clp->cl_firststate)
-		return 0;
+		return;
 	clp->cl_firststate = 1;
 	if (!rec_file)
-		return -ENOENT;
+		return;
 	status = nfs4_save_creds(&original_cred);
 	if (status < 0)
-		return status;
+		return;
 
 	dir = rec_file->f_path.dentry;
 	/* lock the parent */
@@ -172,7 +171,6 @@ out_unlock:
 				" and is writeable", status,
 				user_recovery_dirname);
 	nfs4_reset_creds(original_cred);
-	return status;
 }
 
 typedef int (recdir_func)(struct dentry *, struct dentry *);
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index 89c2cd84d796..ffb5df1db94f 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -483,7 +483,7 @@ extern void nfsd4_shutdown_recdir(void);
 extern int nfs4_client_to_reclaim(const char *name);
 extern int nfs4_has_reclaimed_state(const char *name, bool use_exchange_id);
 extern void nfsd4_recdir_purge_old(void);
-extern int nfsd4_create_clid_dir(struct nfs4_client *clp);
+extern void nfsd4_create_clid_dir(struct nfs4_client *clp);
 extern void nfsd4_remove_clid_dir(struct nfs4_client *clp);
 extern void release_session_client(struct nfsd4_session *);
 extern __be32 nfs4_validate_stateid(struct nfs4_client *, stateid_t *);