[Midnightbsd-cvs] src [10026] trunk/sys/fs: sync nullfs and nfs with freebsd

laffer1 at midnightbsd.org laffer1 at midnightbsd.org
Sun May 27 18:18:25 EDT 2018


Revision: 10026
          http://svnweb.midnightbsd.org/src/?rev=10026
Author:   laffer1
Date:     2018-05-27 18:18:25 -0400 (Sun, 27 May 2018)
Log Message:
-----------
sync nullfs and nfs with freebsd

Modified Paths:
--------------
    trunk/sys/fs/nfs/nfs.h
    trunk/sys/fs/nfs/nfs_commonacl.c
    trunk/sys/fs/nfs/nfs_commonkrpc.c
    trunk/sys/fs/nfs/nfs_commonport.c
    trunk/sys/fs/nfs/nfs_commonsubs.c
    trunk/sys/fs/nfs/nfs_var.h
    trunk/sys/fs/nfs/nfscl.h
    trunk/sys/fs/nfs/nfsclstate.h
    trunk/sys/fs/nfs/nfsdport.h
    trunk/sys/fs/nfs/nfskpiport.h
    trunk/sys/fs/nfs/nfsm_subs.h
    trunk/sys/fs/nfs/nfsport.h
    trunk/sys/fs/nfs/nfsproto.h
    trunk/sys/fs/nfs/nfsrvcache.h
    trunk/sys/fs/nfs/nfsrvstate.h
    trunk/sys/fs/nfs/nfsv4_errstr.h
    trunk/sys/fs/nfs/rpcv2.h
    trunk/sys/fs/nfs/xdr_subs.h
    trunk/sys/fs/nfsclient/nfs.h
    trunk/sys/fs/nfsclient/nfs_clbio.c
    trunk/sys/fs/nfsclient/nfs_clcomsubs.c
    trunk/sys/fs/nfsclient/nfs_clkdtrace.c
    trunk/sys/fs/nfsclient/nfs_clkrpc.c
    trunk/sys/fs/nfsclient/nfs_clnfsiod.c
    trunk/sys/fs/nfsclient/nfs_clnode.c
    trunk/sys/fs/nfsclient/nfs_clport.c
    trunk/sys/fs/nfsclient/nfs_clrpcops.c
    trunk/sys/fs/nfsclient/nfs_clstate.c
    trunk/sys/fs/nfsclient/nfs_clsubs.c
    trunk/sys/fs/nfsclient/nfs_clvfsops.c
    trunk/sys/fs/nfsclient/nfs_clvnops.c
    trunk/sys/fs/nfsclient/nfs_kdtrace.h
    trunk/sys/fs/nfsclient/nfsmount.h
    trunk/sys/fs/nfsclient/nfsnode.h
    trunk/sys/fs/nfsclient/nlminfo.h
    trunk/sys/fs/nfsserver/nfs_fha_new.c
    trunk/sys/fs/nfsserver/nfs_fha_new.h
    trunk/sys/fs/nfsserver/nfs_nfsdcache.c
    trunk/sys/fs/nfsserver/nfs_nfsdkrpc.c
    trunk/sys/fs/nfsserver/nfs_nfsdport.c
    trunk/sys/fs/nfsserver/nfs_nfsdserv.c
    trunk/sys/fs/nfsserver/nfs_nfsdsocket.c
    trunk/sys/fs/nfsserver/nfs_nfsdstate.c
    trunk/sys/fs/nfsserver/nfs_nfsdsubs.c
    trunk/sys/fs/nullfs/null.h
    trunk/sys/fs/nullfs/null_subr.c
    trunk/sys/fs/nullfs/null_vfsops.c
    trunk/sys/fs/nullfs/null_vnops.c

Modified: trunk/sys/fs/nfs/nfs.h
===================================================================
--- trunk/sys/fs/nfs/nfs.h	2018-05-27 22:18:11 UTC (rev 10025)
+++ trunk/sys/fs/nfs/nfs.h	2018-05-27 22:18:25 UTC (rev 10026)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * Copyright (c) 1989, 1993
  *	The Regents of the University of California.  All rights reserved.
@@ -29,7 +30,7 @@
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
- * $MidnightBSD$
+ * $FreeBSD: stable/10/sys/fs/nfs/nfs.h 317404 2017-04-25 11:36:25Z rmacklem $
  */
 
 #ifndef _NFS_NFS_H_
@@ -50,6 +51,8 @@
 #define	NFS_MAXREXMIT	100		/* Stop counting after this many */
 #define	NFSV4_CALLBACKTIMEO (2 * NFS_HZ) /* Timeout in ticks */
 #define	NFSV4_CALLBACKRETRY 5		/* Number of retries before failure */
+#define	NFSV4_SLOTS	64		/* Number of slots, fore channel */
+#define	NFSV4_CBSLOTS	8		/* Number of slots, back channel */
 #define	NFSV4_CBRETRYCNT 4		/* # of CBRecall retries upon err */
 #define	NFSV4_UPCALLTIMEO (15 * NFS_HZ)	/* Timeout in ticks for upcalls */
 					/* to gssd or nfsuserd */
@@ -90,16 +93,16 @@
 #ifndef NFSLOCKHASHSIZE
 #define	NFSLOCKHASHSIZE		20	/* Size of server nfslock hash table */
 #endif
+#ifndef NFSSESSIONHASHSIZE
+#define	NFSSESSIONHASHSIZE	20	/* Size of server session hash table */
+#endif
 #define	NFSSTATEHASHSIZE	10	/* Size of server stateid hash table */
-#ifndef NFSUSERHASHSIZE
-#define	NFSUSERHASHSIZE		30	/* Size of user id hash table */
-#endif
-#ifndef NFSGROUPHASHSIZE
-#define	NFSGROUPHASHSIZE	5	/* Size of group id hash table */
-#endif
 #ifndef	NFSCLDELEGHIGHWATER
 #define	NFSCLDELEGHIGHWATER	10000	/* limit for client delegations */
 #endif
+#ifndef	NFSCLLAYOUTHIGHWATER
+#define	NFSCLLAYOUTHIGHWATER	10000	/* limit for client pNFS layouts */
+#endif
 #ifndef NFSNOOPEN			/* Inactive open owner (sec) */
 #define	NFSNOOPEN		120
 #endif
@@ -130,11 +133,11 @@
 
 /*
  * This macro defines the high water mark for issuing V4 delegations.
- * (It is currently set at a conservative 20% of NFSRV_V4STATELIMIT. This
+ * (It is currently set at a conservative 20% of nfsrv_v4statelimit. This
  *  may want to increase when clients can make more effective use of
  *  delegations.)
  */
-#define	NFSRV_V4DELEGLIMIT(c) (((c) * 5) > NFSRV_V4STATELIMIT)
+#define	NFSRV_V4DELEGLIMIT(c) (((c) * 5) > nfsrv_v4statelimit)
 
 #define	NFS_READDIRBLKSIZ	DIRBLKSIZ	/* Minimal nm_readdirsize */
 
@@ -151,7 +154,7 @@
 	(t).tv_sec = time.tv_sec; (t).tv_nsec = 1000 * time.tv_usec; } while (0)
 #define	NFS_SRVMAXDATA(n) 						\
 		(((n)->nd_flag & (ND_NFSV3 | ND_NFSV4)) ? 		\
-		 NFS_MAXDATA : NFS_V2MAXDATA)
+		 NFS_SRVMAXIO : NFS_V2MAXDATA)
 #define	NFS64BITSSET	0xffffffffffffffffull
 #define	NFS64BITSMINUS1	0xfffffffffffffffeull
 
@@ -196,8 +199,20 @@
 	int		nid_usertimeout;/* User name timeout (minutes) */
 	u_char		*nid_name;	/* Name */
 	int		nid_namelen;	/* and its length */
+	gid_t		*nid_grps;	/* and the list */
+	int		nid_ngroup;	/* Size of groups list */
 };
 
+struct nfsd_oidargs {
+	int		nid_flag;	/* Flags (see below) */
+	uid_t		nid_uid;	/* user/group id */
+	gid_t		nid_gid;
+	int		nid_usermax;	/* Upper bound on user name cache */
+	int		nid_usertimeout;/* User name timeout (minutes) */
+	u_char		*nid_name;	/* Name */
+	int		nid_namelen;	/* and its length */
+};
+
 struct nfsd_clid {
 	int		nclid_idlen;	/* Length of client id */
 	u_char		nclid_id[NFSV4_OPAQUELIMIT]; /* and name */
@@ -272,6 +287,8 @@
 #define	LCL_GSSINTEGRITY	0x00002000
 #define	LCL_GSSPRIVACY		0x00004000
 #define	LCL_ADMINREVOKED	0x00008000
+#define	LCL_RECLAIMCOMPLETE	0x00010000
+#define	LCL_NFSV41		0x00020000
 
 #define	LCL_GSS		LCL_KERBV	/* Or of all mechs */
 
@@ -314,6 +331,11 @@
 #define	NFSLCK_SETATTR		0x02000000
 #define	NFSLCK_DELEGPURGE	0x04000000
 #define	NFSLCK_DELEGRETURN	0x08000000
+#define	NFSLCK_WANTWDELEG	0x10000000
+#define	NFSLCK_WANTRDELEG	0x20000000
+#define	NFSLCK_WANTNODELEG	0x40000000
+#define	NFSLCK_WANTBITS							\
+    (NFSLCK_WANTWDELEG | NFSLCK_WANTRDELEG | NFSLCK_WANTNODELEG)
 
 /* And bits for nid_flag */
 #define	NFSID_INITIALIZE	0x0001
@@ -331,11 +353,6 @@
  */
 #define	NFS_NFSSTATS	1		/* struct: struct nfsstats */
 
-#define	FS_NFS_NAMES { 							\
-		       { 0, 0 }, 					\
-		       { "nfsstats", CTLTYPE_STRUCT }, 			\
-}
-
 /*
  * Here is the definition of the attribute bits array and macros that
  * manipulate it.
@@ -342,68 +359,120 @@
  * THE MACROS MUST BE MANUALLY MODIFIED IF NFSATTRBIT_MAXWORDS CHANGES!!
  * It is (NFSATTRBIT_MAX + 31) / 32.
  */
-#define	NFSATTRBIT_MAXWORDS	2
+#define	NFSATTRBIT_MAXWORDS	3
 
 typedef struct {
 	u_int32_t bits[NFSATTRBIT_MAXWORDS];
 } nfsattrbit_t;
 
-#define	NFSZERO_ATTRBIT(b) do { (b)->bits[0] = 0; (b)->bits[1] = 0; } while (0)
-#define	NFSSET_ATTRBIT(t, f) do { (t)->bits[0] = (f)->bits[0]; 		\
-				  (t)->bits[1] = (f)->bits[1]; } while (0)
+#define	NFSZERO_ATTRBIT(b) do {						\
+	(b)->bits[0] = 0;						\
+	(b)->bits[1] = 0;						\
+	(b)->bits[2] = 0;						\
+} while (0)
+
+#define	NFSSET_ATTRBIT(t, f) do {					\
+	(t)->bits[0] = (f)->bits[0];			 		\
+	(t)->bits[1] = (f)->bits[1];					\
+	(t)->bits[2] = (f)->bits[2];					\
+} while (0)
+
 #define	NFSSETSUPP_ATTRBIT(b) do { 					\
 	(b)->bits[0] = NFSATTRBIT_SUPP0; 				\
-	(b)->bits[1] = (NFSATTRBIT_SUPP1 | NFSATTRBIT_SUPPSETONLY); } while (0)
+	(b)->bits[1] = (NFSATTRBIT_SUPP1 | NFSATTRBIT_SUPPSETONLY);	\
+	(b)->bits[2] = NFSATTRBIT_SUPP2;				\
+} while (0)
+
 #define	NFSISSET_ATTRBIT(b, p)	((b)->bits[(p) / 32] & (1 << ((p) % 32)))
 #define	NFSSETBIT_ATTRBIT(b, p)	((b)->bits[(p) / 32] |= (1 << ((p) % 32)))
 #define	NFSCLRBIT_ATTRBIT(b, p)	((b)->bits[(p) / 32] &= ~(1 << ((p) % 32)))
+
 #define	NFSCLRALL_ATTRBIT(b, a)	do { 					\
-		(b)->bits[0] &= ~((a)->bits[0]); 			\
-		(b)->bits[1] &= ~((a)->bits[1]); 			\
-		} while (0)
+	(b)->bits[0] &= ~((a)->bits[0]);	 			\
+	(b)->bits[1] &= ~((a)->bits[1]);	 			\
+	(b)->bits[2] &= ~((a)->bits[2]);				\
+} while (0)
+
 #define	NFSCLRNOT_ATTRBIT(b, a)	do { 					\
-		(b)->bits[0] &= ((a)->bits[0]); 			\
-		(b)->bits[1] &= ((a)->bits[1]); 			\
-		} while (0)
+	(b)->bits[0] &= ((a)->bits[0]);		 			\
+	(b)->bits[1] &= ((a)->bits[1]);		 			\
+	(b)->bits[2] &= ((a)->bits[2]);		 			\
+} while (0)
+
 #define	NFSCLRNOTFILLABLE_ATTRBIT(b) do { 				\
-		(b)->bits[0] &= NFSATTRBIT_SUPP0; 			\
-		(b)->bits[1] &= NFSATTRBIT_SUPP1; } while (0)
+	(b)->bits[0] &= NFSATTRBIT_SUPP0;	 			\
+	(b)->bits[1] &= NFSATTRBIT_SUPP1;				\
+	(b)->bits[2] &= NFSATTRBIT_SUPP2;				\
+} while (0)
+
 #define	NFSCLRNOTSETABLE_ATTRBIT(b) do { 				\
-		(b)->bits[0] &= NFSATTRBIT_SETABLE0; 			\
-		(b)->bits[1] &= NFSATTRBIT_SETABLE1; } while (0)
-#define	NFSNONZERO_ATTRBIT(b)	((b)->bits[0] || (b)->bits[1])
-#define	NFSEQUAL_ATTRBIT(b, p)						\
-	((b)->bits[0] == (p)->bits[0] && (b)->bits[1] == (p)->bits[1])
+	(b)->bits[0] &= NFSATTRBIT_SETABLE0;	 			\
+	(b)->bits[1] &= NFSATTRBIT_SETABLE1;				\
+	(b)->bits[2] &= NFSATTRBIT_SETABLE2;				\
+} while (0)
+
+#define	NFSNONZERO_ATTRBIT(b)	((b)->bits[0] || (b)->bits[1] || (b)->bits[2])
+#define	NFSEQUAL_ATTRBIT(b, p)	((b)->bits[0] == (p)->bits[0] &&	\
+	(b)->bits[1] == (p)->bits[1] && (b)->bits[2] == (p)->bits[2])
+
 #define	NFSGETATTR_ATTRBIT(b) do { 					\
-		(b)->bits[0] = NFSATTRBIT_GETATTR0; 			\
-		(b)->bits[1] = NFSATTRBIT_GETATTR1; } while (0)
+	(b)->bits[0] = NFSATTRBIT_GETATTR0;	 			\
+	(b)->bits[1] = NFSATTRBIT_GETATTR1;				\
+	(b)->bits[2] = NFSATTRBIT_GETATTR2;				\
+} while (0)
+
 #define	NFSWCCATTR_ATTRBIT(b) do { 					\
-		(b)->bits[0] = NFSATTRBIT_WCCATTR0; 			\
-		(b)->bits[1] = NFSATTRBIT_WCCATTR1; } while (0)
+	(b)->bits[0] = NFSATTRBIT_WCCATTR0;	 			\
+	(b)->bits[1] = NFSATTRBIT_WCCATTR1;				\
+	(b)->bits[2] = NFSATTRBIT_WCCATTR2;				\
+} while (0)
+
 #define	NFSWRITEGETATTR_ATTRBIT(b) do { 				\
-		(b)->bits[0] = NFSATTRBIT_WRITEGETATTR0;		\
-		(b)->bits[1] = NFSATTRBIT_WRITEGETATTR1; } while (0)
+	(b)->bits[0] = NFSATTRBIT_WRITEGETATTR0;			\
+	(b)->bits[1] = NFSATTRBIT_WRITEGETATTR1;			\
+	(b)->bits[2] = NFSATTRBIT_WRITEGETATTR2;			\
+} while (0)
+
 #define	NFSCBGETATTR_ATTRBIT(b, c) do { 				\
-	(c)->bits[0] = ((b)->bits[0] & NFSATTRBIT_CBGETATTR0); 		\
-	(c)->bits[1] = ((b)->bits[1] & NFSATTRBIT_CBGETATTR1); } while (0)
+	(c)->bits[0] = ((b)->bits[0] & NFSATTRBIT_CBGETATTR0);		\
+	(c)->bits[1] = ((b)->bits[1] & NFSATTRBIT_CBGETATTR1);		\
+	(c)->bits[2] = ((b)->bits[2] & NFSATTRBIT_CBGETATTR2);		\
+} while (0)
+
 #define	NFSPATHCONF_GETATTRBIT(b) do { 					\
-		(b)->bits[0] = NFSGETATTRBIT_PATHCONF0; 		\
-		(b)->bits[1] = NFSGETATTRBIT_PATHCONF1; } while (0)
+	(b)->bits[0] = NFSGETATTRBIT_PATHCONF0;		 		\
+	(b)->bits[1] = NFSGETATTRBIT_PATHCONF1;				\
+	(b)->bits[2] = NFSGETATTRBIT_PATHCONF2;				\
+} while (0)
+
 #define	NFSSTATFS_GETATTRBIT(b)	do { 					\
-		(b)->bits[0] = NFSGETATTRBIT_STATFS0; 			\
-		(b)->bits[1] = NFSGETATTRBIT_STATFS1; } while (0)
+	(b)->bits[0] = NFSGETATTRBIT_STATFS0;	 			\
+	(b)->bits[1] = NFSGETATTRBIT_STATFS1;				\
+	(b)->bits[2] = NFSGETATTRBIT_STATFS2;				\
+} while (0)
+
 #define	NFSISSETSTATFS_ATTRBIT(b) 					\
 		(((b)->bits[0] & NFSATTRBIT_STATFS0) || 		\
-		 ((b)->bits[1] & NFSATTRBIT_STATFS1))
+		 ((b)->bits[1] & NFSATTRBIT_STATFS1) ||			\
+		 ((b)->bits[2] & NFSATTRBIT_STATFS2))
+
 #define	NFSCLRSTATFS_ATTRBIT(b)	do { 					\
-		(b)->bits[0] &= ~NFSATTRBIT_STATFS0; 			\
-		(b)->bits[1] &= ~NFSATTRBIT_STATFS1; } while (0)
+	(b)->bits[0] &= ~NFSATTRBIT_STATFS0;	 			\
+	(b)->bits[1] &= ~NFSATTRBIT_STATFS1;				\
+	(b)->bits[2] &= ~NFSATTRBIT_STATFS2;				\
+} while (0)
+
 #define	NFSREADDIRPLUS_ATTRBIT(b) do { 					\
-		(b)->bits[0] = NFSATTRBIT_READDIRPLUS0; 		\
-		(b)->bits[1] = NFSATTRBIT_READDIRPLUS1; } while (0)
+	(b)->bits[0] = NFSATTRBIT_READDIRPLUS0;		 		\
+	(b)->bits[1] = NFSATTRBIT_READDIRPLUS1;				\
+	(b)->bits[2] = NFSATTRBIT_READDIRPLUS2;				\
+} while (0)
+
 #define	NFSREFERRAL_ATTRBIT(b) do { 					\
-		(b)->bits[0] = NFSATTRBIT_REFERRAL0;	 		\
-		(b)->bits[1] = NFSATTRBIT_REFERRAL1; } while (0)
+	(b)->bits[0] = NFSATTRBIT_REFERRAL0;		 		\
+	(b)->bits[1] = NFSATTRBIT_REFERRAL1;				\
+	(b)->bits[2] = NFSATTRBIT_REFERRAL2;				\
+} while (0)
 
 /*
  * Store uid, gid creds that were used when the stateid was acquired.
@@ -462,6 +531,7 @@
 	u_int32_t	nr_prog;
 	u_int32_t	nr_vers;
 	struct __rpc_client *nr_client;
+	AUTH		*nr_auth;
 };
 
 /*
@@ -528,6 +598,11 @@
 	nfsquad_t		nd_clientid;	/* Implied clientid */
 	int			nd_gssnamelen;	/* principal name length */
 	char			*nd_gssname;	/* principal name */
+	uint32_t		*nd_slotseq;	/* ptr to slot seq# in req */
+	uint8_t			nd_sessionid[NFSX_V4SESSIONID];	/* Session id */
+	uint32_t		nd_slotid;	/* Slotid for this RPC */
+	SVCXPRT			*nd_xprt;	/* Server RPC handle */
+	uint32_t		*nd_sequence;	/* Sequence Op. ptr */
 };
 
 #define	nd_princlen	nd_gssnamelen
@@ -559,6 +634,11 @@
 #define	ND_EXGSSPRIVACY		0x00400000
 #define	ND_INCRSEQID		0x00800000
 #define	ND_NFSCL		0x01000000
+#define	ND_NFSV41		0x02000000
+#define	ND_HASSEQUENCE		0x04000000
+#define	ND_CACHETHIS		0x08000000
+#define	ND_LASTOP		0x10000000
+#define	ND_LOOPBADSESS		0x20000000
 
 /*
  * ND_GSS should be the "or" of all GSS type authentications.
@@ -571,6 +651,8 @@
 	int	savereply;
 	int	modifyfs;
 	int	lktype;
+	int	needsseq;
+	int	loopbadsess;
 };
 
 /*
@@ -644,6 +726,15 @@
 #define	NFSACCCHK_VPNOTLOCKED		0
 #define	NFSACCCHK_VPISLOCKED		1
 
+/*
+ * Slot for the NFSv4.1 Sequence Op.
+ */
+struct nfsslot {
+	int		nfssl_inprog;
+	uint32_t	nfssl_seq;
+	struct mbuf	*nfssl_reply;
+};
+
 #endif	/* _KERNEL */
 
 #endif	/* _NFS_NFS_H */

Modified: trunk/sys/fs/nfs/nfs_commonacl.c
===================================================================
--- trunk/sys/fs/nfs/nfs_commonacl.c	2018-05-27 22:18:11 UTC (rev 10025)
+++ trunk/sys/fs/nfs/nfs_commonacl.c	2018-05-27 22:18:25 UTC (rev 10026)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * Copyright (c) 2009 Rick Macklem, University of Guelph
  * All rights reserved.
@@ -26,7 +27,7 @@
  */
 
 #include <sys/cdefs.h>
-__MBSDID("$MidnightBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/fs/nfs/nfs_commonacl.c 240720 2012-09-20 02:49:25Z rmacklem $");
 
 #ifndef APPLEKEXT
 #include <fs/nfs/nfsport.h>

Modified: trunk/sys/fs/nfs/nfs_commonkrpc.c
===================================================================
--- trunk/sys/fs/nfs/nfs_commonkrpc.c	2018-05-27 22:18:11 UTC (rev 10025)
+++ trunk/sys/fs/nfs/nfs_commonkrpc.c	2018-05-27 22:18:25 UTC (rev 10026)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * Copyright (c) 1989, 1991, 1993, 1995
  *	The Regents of the University of California.  All rights reserved.
@@ -32,7 +33,7 @@
  */
 
 #include <sys/cdefs.h>
-__MBSDID("$MidnightBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/fs/nfs/nfs_commonkrpc.c 325407 2017-11-04 21:30:27Z rmacklem $");
 
 /*
  * Socket operations for use by nfs
@@ -59,6 +60,7 @@
 #include <sys/vnode.h>
 
 #include <rpc/rpc.h>
+#include <rpc/krpc.h>
 
 #include <kgssapi/krb5/kcrypto.h>
 
@@ -76,29 +78,33 @@
 /*
  * Registered probes by RPC type.
  */
-uint32_t	nfscl_nfs2_start_probes[NFS_NPROCS + 1];
-uint32_t	nfscl_nfs2_done_probes[NFS_NPROCS + 1];
+uint32_t	nfscl_nfs2_start_probes[NFSV41_NPROCS + 1];
+uint32_t	nfscl_nfs2_done_probes[NFSV41_NPROCS + 1];
 
-uint32_t	nfscl_nfs3_start_probes[NFS_NPROCS + 1];
-uint32_t	nfscl_nfs3_done_probes[NFS_NPROCS + 1];
+uint32_t	nfscl_nfs3_start_probes[NFSV41_NPROCS + 1];
+uint32_t	nfscl_nfs3_done_probes[NFSV41_NPROCS + 1];
 
-uint32_t	nfscl_nfs4_start_probes[NFS_NPROCS + 1];
-uint32_t	nfscl_nfs4_done_probes[NFS_NPROCS + 1];
+uint32_t	nfscl_nfs4_start_probes[NFSV41_NPROCS + 1];
+uint32_t	nfscl_nfs4_done_probes[NFSV41_NPROCS + 1];
 #endif
 
 NFSSTATESPINLOCK;
 NFSREQSPINLOCK;
+NFSDLOCKMUTEX;
+NFSCLSTATEMUTEX;
 extern struct nfsstats newnfsstats;
 extern struct nfsreqhead nfsd_reqq;
 extern int nfscl_ticks;
 extern void (*ncl_call_invalcaches)(struct vnode *);
+extern int nfs_numnfscbd;
+extern int nfscl_debuglevel;
 
+SVCPOOL		*nfscbd_pool;
 static int	nfsrv_gsscallbackson = 0;
 static int	nfs_bufpackets = 4;
 static int	nfs_reconnects;
 static int	nfs3_jukebox_delay = 10;
 static int	nfs_skip_wcc_data_onerr = 1;
-static int	nfs_keytab_enctype = ETYPE_DES_CBC_CRC;
 
 SYSCTL_DECL(_vfs_nfs);
 
@@ -110,8 +116,6 @@
     "Number of seconds to delay a retry after receiving EJUKEBOX");
 SYSCTL_INT(_vfs_nfs, OID_AUTO, skip_wcc_data_onerr, CTLFLAG_RW, &nfs_skip_wcc_data_onerr, 0,
     "Disable weak cache consistency checking when server returns an error");
-SYSCTL_INT(_vfs_nfs, OID_AUTO, keytab_enctype, CTLFLAG_RW, &nfs_keytab_enctype, 0,
-    "Encryption type for the keytab entry used by nfs");
 
 static void	nfs_down(struct nfsmount *, struct thread *, const char *,
     int, int);
@@ -159,7 +163,7 @@
     struct ucred *cred, NFSPROC_T *p, int callback_retry_mult)
 {
 	int rcvreserve, sndreserve;
-	int pktscale;
+	int pktscale, pktscalesav;
 	struct sockaddr *saddr;
 	struct ucred *origcred;
 	CLIENT *client;
@@ -167,6 +171,7 @@
 	struct socket *so;
 	int one = 1, retries, error = 0;
 	struct thread *td = curthread;
+	SVCXPRT *xprt;
 	struct timeval timo;
 
 	/*
@@ -207,6 +212,7 @@
 		pktscale = 2;
 	if (pktscale > 64)
 		pktscale = 64;
+	pktscalesav = pktscale;
 	/*
 	 * soreserve() can fail if sb_max is too small, so shrink pktscale
 	 * and try again if there is an error.
@@ -225,8 +231,12 @@
 		goto out;
 	}
 	do {
-	    if (error != 0 && pktscale > 2)
+	    if (error != 0 && pktscale > 2) {
+		if (nmp != NULL && nrp->nr_sotype == SOCK_STREAM &&
+		    pktscale == pktscalesav)
+		    printf("Consider increasing kern.ipc.maxsockbuf\n");
 		pktscale--;
+	    }
 	    if (nrp->nr_sotype == SOCK_DGRAM) {
 		if (nmp != NULL) {
 			sndreserve = (NFS_MAXDGRAMDATA + NFS_MAXPKTHDR) *
@@ -240,9 +250,9 @@
 		if (nrp->nr_sotype != SOCK_STREAM)
 			panic("nfscon sotype");
 		if (nmp != NULL) {
-			sndreserve = (NFS_MAXBSIZE + NFS_MAXPKTHDR +
+			sndreserve = (NFS_MAXBSIZE + NFS_MAXXDR +
 			    sizeof (u_int32_t)) * pktscale;
-			rcvreserve = (NFS_MAXBSIZE + NFS_MAXPKTHDR +
+			rcvreserve = (NFS_MAXBSIZE + NFS_MAXXDR +
 			    sizeof (u_int32_t)) * pktscale;
 		} else {
 			sndreserve = rcvreserve = 1024 * pktscale;
@@ -249,6 +259,10 @@
 		}
 	    }
 	    error = soreserve(so, sndreserve, rcvreserve);
+	    if (error != 0 && nmp != NULL && nrp->nr_sotype == SOCK_STREAM &&
+		pktscale <= 2)
+		printf("Must increase kern.ipc.maxsockbuf or reduce"
+		    " rsize, wsize\n");
 	} while (error != 0 && pktscale > 2);
 	soclose(so);
 	if (error) {
@@ -277,6 +291,25 @@
 				retries = nmp->nm_retry;
 		} else
 			retries = INT_MAX;
+		/* cred == NULL for DS connects. */
+		if (NFSHASNFSV4N(nmp) && cred != NULL) {
+			/*
+			 * Make sure the nfscbd_pool doesn't get destroyed
+			 * while doing this.
+			 */
+			NFSD_LOCK();
+			if (nfs_numnfscbd > 0) {
+				nfs_numnfscbd++;
+				NFSD_UNLOCK();
+				xprt = svc_vc_create_backchannel(nfscbd_pool);
+				CLNT_CONTROL(client, CLSET_BACKCHANNEL, xprt);
+				NFSD_LOCK();
+				nfs_numnfscbd--;
+				if (nfs_numnfscbd == 0)
+					wakeup(&nfs_numnfscbd);
+			}
+			NFSD_UNLOCK();
+		}
 	} else {
 		/*
 		 * Three cases:
@@ -316,6 +349,7 @@
 
 	mtx_lock(&nrp->nr_mtx);
 	if (nrp->nr_client != NULL) {
+		mtx_unlock(&nrp->nr_mtx);
 		/*
 		 * Someone else already connected.
 		 */
@@ -322,18 +356,18 @@
 		CLNT_RELEASE(client);
 	} else {
 		nrp->nr_client = client;
+		/*
+		 * Protocols that do not require connections may be optionally
+		 * left unconnected for servers that reply from a port other
+		 * than NFS_PORT.
+		 */
+		if (nmp == NULL || (nmp->nm_flag & NFSMNT_NOCONN) == 0) {
+			mtx_unlock(&nrp->nr_mtx);
+			CLNT_CONTROL(client, CLSET_CONNECT, &one);
+		} else
+			mtx_unlock(&nrp->nr_mtx);
 	}
 
-	/*
-	 * Protocols that do not require connections may be optionally left
-	 * unconnected for servers that reply from a port other than NFS_PORT.
-	 */
-	if (nmp == NULL || (nmp->nm_flag & NFSMNT_NOCONN) == 0) {
-		mtx_unlock(&nrp->nr_mtx);
-		CLNT_CONTROL(client, CLSET_CONNECT, &one);
-	} else {
-		mtx_unlock(&nrp->nr_mtx);
-	}
 
 	/* Restore current thread's credentials. */
 	td->td_ucred = origcred;
@@ -370,9 +404,6 @@
 {
 	rpc_gss_service_t svc;
 	AUTH *auth;
-#ifdef notyet
-	rpc_gss_options_req_t req_options;
-#endif
 
 	switch (secflavour) {
 	case RPCSEC_GSS_KRB5:
@@ -388,28 +419,16 @@
 			svc = rpc_gss_svc_integrity;
 		else
 			svc = rpc_gss_svc_privacy;
-#ifdef notyet
-		req_options.req_flags = GSS_C_MUTUAL_FLAG;
-		req_options.time_req = 0;
-		req_options.my_cred = GSS_C_NO_CREDENTIAL;
-		req_options.input_channel_bindings = NULL;
-		req_options.enc_type = nfs_keytab_enctype;
 
-		auth = rpc_gss_secfind_call(nrp->nr_client, cred,
-		    clnt_principal, srv_principal, mech_oid, svc,
-		    &req_options);
-#else
-		/*
-		 * Until changes to the rpcsec_gss code are committed,
-		 * there is no support for host based initiator
-		 * principals. As such, that case cannot yet be handled.
-		 */
 		if (clnt_principal == NULL)
 			auth = rpc_gss_secfind_call(nrp->nr_client, cred,
 			    srv_principal, mech_oid, svc);
-		else
-			auth = NULL;
-#endif
+		else {
+			auth = rpc_gss_seccreate_call(nrp->nr_client, cred,
+			    clnt_principal, srv_principal, "kerberosv5",
+			    svc, NULL, NULL, NULL);
+			return (auth);
+		}
 		if (auth != NULL)
 			return (auth);
 		/* fallthrough */
@@ -467,12 +486,13 @@
 newnfs_request(struct nfsrv_descript *nd, struct nfsmount *nmp,
     struct nfsclient *clp, struct nfssockreq *nrp, vnode_t vp,
     struct thread *td, struct ucred *cred, u_int32_t prog, u_int32_t vers,
-    u_char *retsum, int toplevel, u_int64_t *xidp)
+    u_char *retsum, int toplevel, u_int64_t *xidp, struct nfsclsession *dssep)
 {
-	u_int32_t *tl;
+	uint32_t retseq, retval, slotseq, *tl;
 	time_t waituntil;
-	int i, j, set_sigset = 0, timeo;
+	int i = 0, j = 0, opcnt, set_sigset = 0, slot;
 	int trycnt, error = 0, usegssname = 0, secflavour = AUTH_SYS;
+	int freeslot, maxslot, reterr, slotpos, timeo;
 	u_int16_t procnum;
 	u_int trylater_delay = 1;
 	struct nfs_feedback_arg nf;
@@ -481,10 +501,13 @@
 	struct rpc_callextra ext;
 	enum clnt_stat stat;
 	struct nfsreq *rep = NULL;
-	char *srv_principal = NULL;
+	char *srv_principal = NULL, *clnt_principal = NULL;
 	sigset_t oldset;
 	struct ucred *authcred;
+	struct nfsclsession *sep;
+	uint8_t sessionid[NFSX_V4SESSIONID];
 
+	sep = dssep;
 	if (xidp != NULL)
 		*xidp = 0;
 	/* Reject requests while attempting a forced unmount. */
@@ -544,6 +567,7 @@
 			 */
 			if (nmp->nm_krbnamelen > 0) {
 				usegssname = 1;
+				clnt_principal = nmp->nm_krbname;
 			} else if (nmp->nm_uid != (uid_t)-1) {
 				KASSERT(nmp->nm_sockreq.nr_cred != NULL,
 				    ("newnfs_request: NULL nr_cred"));
@@ -598,10 +622,19 @@
 
 	if (nd->nd_procnum == NFSPROC_NULL)
 		auth = authnone_create();
-	else if (usegssname)
-		auth = nfs_getauth(nrp, secflavour, nmp->nm_krbname,
-		    srv_principal, NULL, authcred);
-	else
+	else if (usegssname) {
+		/*
+		 * For this case, the authenticator is held in the
+		 * nfssockreq structure, so don't release the reference count
+		 * held on it. --> Don't AUTH_DESTROY() it in this function.
+		 */
+		if (nrp->nr_auth == NULL)
+			nrp->nr_auth = nfs_getauth(nrp, secflavour,
+			    clnt_principal, srv_principal, NULL, authcred);
+		else
+			rpc_gss_refresh_auth_call(nrp->nr_auth);
+		auth = nrp->nr_auth;
+	} else
 		auth = nfs_getauth(nrp, secflavour, NULL,
 		    srv_principal, NULL, authcred);
 	crfree(authcred);
@@ -668,7 +701,9 @@
 #endif
 	}
 	trycnt = 0;
+	freeslot = -1;		/* Set to slot that needs to be free'd */
 tryagain:
+	slot = -1;		/* Slot that needs a sequence# increment. */
 	/*
 	 * This timeout specifies when a new socket should be created,
 	 * along with new xid values. For UDP, this should be done
@@ -720,8 +755,12 @@
 	}
 
 	nd->nd_mrep = NULL;
-	stat = CLNT_CALL_MBUF(nrp->nr_client, &ext, procnum, nd->nd_mreq,
-	    &nd->nd_mrep, timo);
+	if (clp != NULL && sep != NULL)
+		stat = clnt_bck_call(nrp->nr_client, &ext, procnum,
+		    nd->nd_mreq, &nd->nd_mrep, timo, sep->nfsess_xprt);
+	else
+		stat = CLNT_CALL_MBUF(nrp->nr_client, &ext, procnum,
+		    nd->nd_mreq, &nd->nd_mrep, timo);
 
 	if (rep != NULL) {
 		/*
@@ -755,7 +794,8 @@
 	}
 	if (error) {
 		m_freem(nd->nd_mreq);
-		AUTH_DESTROY(auth);
+		if (usegssname == 0)
+			AUTH_DESTROY(auth);
 		if (rep != NULL)
 			FREE((caddr_t)rep, M_NFSDREQ);
 		if (set_sigset)
@@ -775,13 +815,155 @@
 	nd->nd_md = nd->nd_mrep;
 	nd->nd_dpos = NFSMTOD(nd->nd_md, caddr_t);
 	nd->nd_repstat = 0;
-	if (nd->nd_procnum != NFSPROC_NULL) {
+	if (nd->nd_procnum != NFSPROC_NULL &&
+	    nd->nd_procnum != NFSV4PROC_CBNULL) {
+		/* If sep == NULL, set it to the default in nmp. */
+		if (sep == NULL && nmp != NULL)
+			sep = nfsmnt_mdssession(nmp);
 		/*
 		 * and now the actual NFS xdr.
 		 */
 		NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
 		nd->nd_repstat = fxdr_unsigned(u_int32_t, *tl);
+		if (nd->nd_repstat >= 10000)
+			NFSCL_DEBUG(1, "proc=%d reps=%d\n", (int)nd->nd_procnum,
+			    (int)nd->nd_repstat);
+
+		/*
+		 * Get rid of the tag, return count and SEQUENCE result for
+		 * NFSv4.
+		 */
+		if ((nd->nd_flag & ND_NFSV4) != 0) {
+			NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
+			i = fxdr_unsigned(int, *tl);
+			error = nfsm_advance(nd, NFSM_RNDUP(i), -1);
+			if (error)
+				goto nfsmout;
+			NFSM_DISSECT(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
+			opcnt = fxdr_unsigned(int, *tl++);
+			i = fxdr_unsigned(int, *tl++);
+			j = fxdr_unsigned(int, *tl);
+			if (j >= 10000)
+				NFSCL_DEBUG(1, "fop=%d fst=%d\n", i, j);
+			/*
+			 * If the first op is Sequence, free up the slot.
+			 */
+			if ((nmp != NULL && i == NFSV4OP_SEQUENCE && j != 0) ||
+			    (clp != NULL && i == NFSV4OP_CBSEQUENCE && j != 0))
+				NFSCL_DEBUG(1, "failed seq=%d\n", j);
+			if ((nmp != NULL && i == NFSV4OP_SEQUENCE && j == 0) ||
+			    (clp != NULL && i == NFSV4OP_CBSEQUENCE && j == 0)
+			    ) {
+				if (i == NFSV4OP_SEQUENCE)
+					NFSM_DISSECT(tl, uint32_t *,
+					    NFSX_V4SESSIONID +
+					    5 * NFSX_UNSIGNED);
+				else
+					NFSM_DISSECT(tl, uint32_t *,
+					    NFSX_V4SESSIONID +
+					    4 * NFSX_UNSIGNED);
+				mtx_lock(&sep->nfsess_mtx);
+				if (bcmp(tl, sep->nfsess_sessionid,
+				    NFSX_V4SESSIONID) == 0) {
+					tl += NFSX_V4SESSIONID / NFSX_UNSIGNED;
+					retseq = fxdr_unsigned(uint32_t, *tl++);
+					slot = fxdr_unsigned(int, *tl++);
+					freeslot = slot;
+					if (retseq != sep->nfsess_slotseq[slot])
+						printf("retseq diff 0x%x\n",
+						    retseq);
+					retval = fxdr_unsigned(uint32_t, *++tl);
+					if ((retval + 1) < sep->nfsess_foreslots
+					    )
+						sep->nfsess_foreslots = (retval
+						    + 1);
+					else if ((retval + 1) >
+					    sep->nfsess_foreslots)
+						sep->nfsess_foreslots = (retval
+						    < 64) ? (retval + 1) : 64;
+				}
+				mtx_unlock(&sep->nfsess_mtx);
+
+				/* Grab the op and status for the next one. */
+				if (opcnt > 1) {
+					NFSM_DISSECT(tl, uint32_t *,
+					    2 * NFSX_UNSIGNED);
+					i = fxdr_unsigned(int, *tl++);
+					j = fxdr_unsigned(int, *tl);
+				}
+			}
+		}
 		if (nd->nd_repstat != 0) {
+			if (nd->nd_repstat == NFSERR_BADSESSION &&
+			    nmp != NULL && dssep == NULL) {
+				/*
+				 * If this is a client side MDS RPC, mark
+				 * the MDS session defunct and initiate
+				 * recovery, as required.
+				 * The nfsess_defunct field is protected by
+				 * the NFSLOCKMNT()/nm_mtx lock and not the
+				 * nfsess_mtx lock to simplify its handling,
+				 * for the MDS session. This lock is also
+				 * sufficient for nfsess_sessionid, since it
+				 * never changes in the structure.
+				 */
+				NFSCL_DEBUG(1, "Got badsession\n");
+				NFSLOCKCLSTATE();
+				NFSLOCKMNT(nmp);
+				sep = NFSMNT_MDSSESSION(nmp);
+				if (bcmp(sep->nfsess_sessionid, nd->nd_sequence,
+				    NFSX_V4SESSIONID) == 0) {
+					/* Initiate recovery. */
+					sep->nfsess_defunct = 1;
+					NFSCL_DEBUG(1, "Marked defunct\n");
+					if (nmp->nm_clp != NULL) {
+						nmp->nm_clp->nfsc_flags |=
+						    NFSCLFLAGS_RECOVER;
+						wakeup(nmp->nm_clp);
+					}
+				}
+				NFSUNLOCKCLSTATE();
+				/*
+				 * Sleep for up to 1sec waiting for a new
+				 * session.
+				 */
+				mtx_sleep(&nmp->nm_sess, &nmp->nm_mtx, PZERO,
+				    "nfsbadsess", hz);
+				/*
+				 * Get the session again, in case a new one
+				 * has been created during the sleep.
+				 */
+				sep = NFSMNT_MDSSESSION(nmp);
+				NFSUNLOCKMNT(nmp);
+				if ((nd->nd_flag & ND_LOOPBADSESS) != 0) {
+					reterr = nfsv4_sequencelookup(nmp, sep,
+					    &slotpos, &maxslot, &slotseq,
+					    sessionid);
+					if (reterr == 0) {
+						/* Fill in new session info. */
+						NFSCL_DEBUG(1,
+						  "Filling in new sequence\n");
+						tl = nd->nd_sequence;
+						bcopy(sessionid, tl,
+						    NFSX_V4SESSIONID);
+						tl += NFSX_V4SESSIONID /
+						    NFSX_UNSIGNED;
+						*tl++ = txdr_unsigned(slotseq);
+						*tl++ = txdr_unsigned(slotpos);
+						*tl = txdr_unsigned(maxslot);
+					}
+					if (reterr == NFSERR_BADSESSION ||
+					    reterr == 0) {
+						NFSCL_DEBUG(1,
+						    "Badsession looping\n");
+						m_freem(nd->nd_mrep);
+						nd->nd_mrep = NULL;
+						goto tryagain;
+					}
+					nd->nd_repstat = reterr;
+					NFSCL_DEBUG(1, "Got err=%d\n", reterr);
+				}
+			}
 			if (((nd->nd_repstat == NFSERR_DELAY ||
 			      nd->nd_repstat == NFSERR_GRACE) &&
 			     (nd->nd_flag & ND_NFSV4) &&
@@ -788,7 +970,9 @@
 			     nd->nd_procnum != NFSPROC_DELEGRETURN &&
 			     nd->nd_procnum != NFSPROC_SETATTR &&
 			     nd->nd_procnum != NFSPROC_READ &&
+			     nd->nd_procnum != NFSPROC_READDS &&
 			     nd->nd_procnum != NFSPROC_WRITE &&
+			     nd->nd_procnum != NFSPROC_WRITEDS &&
 			     nd->nd_procnum != NFSPROC_OPEN &&
 			     nd->nd_procnum != NFSPROC_CREATE &&
 			     nd->nd_procnum != NFSPROC_OPENCONFIRM &&
@@ -805,6 +989,13 @@
 				while (NFSD_MONOSEC < waituntil)
 					(void) nfs_catnap(PZERO, 0, "nfstry");
 				trylater_delay *= 2;
+				if (slot != -1) {
+					mtx_lock(&sep->nfsess_mtx);
+					sep->nfsess_slotseq[slot]++;
+					*nd->nd_slotseq = txdr_unsigned(
+					    sep->nfsess_slotseq[slot]);
+					mtx_unlock(&sep->nfsess_mtx);
+				}
 				m_freem(nd->nd_mrep);
 				nd->nd_mrep = NULL;
 				goto tryagain;
@@ -821,34 +1012,22 @@
 					(*ncl_call_invalcaches)(vp);
 			}
 		}
-
-		/*
-		 * Get rid of the tag, return count, and PUTFH result for V4.
-		 */
-		if (nd->nd_flag & ND_NFSV4) {
-			NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
-			i = fxdr_unsigned(int, *tl);
-			error = nfsm_advance(nd, NFSM_RNDUP(i), -1);
-			if (error)
-				goto nfsmout;
-			NFSM_DISSECT(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
-			i = fxdr_unsigned(int, *++tl);
-
+		if ((nd->nd_flag & ND_NFSV4) != 0) {
+			/* Free the slot, as required. */
+			if (freeslot != -1)
+				nfsv4_freeslot(sep, freeslot);
 			/*
-			 * If the first op's status is non-zero, mark that
-			 * there is no more data to process.
+			 * If this op is Putfh, throw its results away.
 			 */
-			if (*++tl)
-				nd->nd_flag |= ND_NOMOREDATA;
-
-			/*
-			 * If the first op is Putfh, throw its results away
-			 * and toss the op# and status for the first op.
-			 */
-			if (nmp != NULL && i == NFSV4OP_PUTFH && *tl == 0) {
+			if (j >= 10000)
+				NFSCL_DEBUG(1, "nop=%d nst=%d\n", i, j);
+			if (nmp != NULL && i == NFSV4OP_PUTFH && j == 0) {
 				NFSM_DISSECT(tl,u_int32_t *,2 * NFSX_UNSIGNED);
 				i = fxdr_unsigned(int, *tl++);
 				j = fxdr_unsigned(int, *tl);
+				if (j >= 10000)
+					NFSCL_DEBUG(1, "n2op=%d n2st=%d\n", i,
+					    j);
 				/*
 				 * All Compounds that do an Op that must
 				 * be in sequence consist of NFSV4OP_PUTFH
@@ -871,13 +1050,15 @@
 				      j != NFSERR_RESOURCE &&
 				      j != NFSERR_NOFILEHANDLE)))		 
 					nd->nd_flag |= ND_INCRSEQID;
-				/*
-				 * If the first op's status is non-zero, mark
-				 * that there is no more data to process.
-				 */
-				if (j)
-					nd->nd_flag |= ND_NOMOREDATA;
 			}
+			/*
+			 * If this op's status is non-zero, mark
+			 * that there is no more data to process.
+			 * The exception is Setattr, which always has xdr
+			 * when it has failed.
+			 */
+			if (j != 0 && i != NFSV4OP_SETATTR)
+				nd->nd_flag |= ND_NOMOREDATA;
 
 			/*
 			 * If R_DONTRECOVER is set, replace the stale error
@@ -884,6 +1065,7 @@
 			 * reply, so that recovery isn't initiated.
 			 */
 			if ((nd->nd_repstat == NFSERR_STALECLIENTID ||
+			     nd->nd_repstat == NFSERR_BADSESSION ||
 			     nd->nd_repstat == NFSERR_STALESTATEID) &&
 			    rep != NULL && (rep->r_flags & R_DONTRECOVER))
 				nd->nd_repstat = NFSERR_STALEDONTRECOVER;
@@ -912,7 +1094,8 @@
 #endif
 
 	m_freem(nd->nd_mreq);
-	AUTH_DESTROY(auth);
+	if (usegssname == 0)
+		AUTH_DESTROY(auth);
 	if (rep != NULL)
 		FREE((caddr_t)rep, M_NFSDREQ);
 	if (set_sigset)
@@ -921,7 +1104,8 @@
 nfsmout:
 	mbuf_freem(nd->nd_mrep);
 	mbuf_freem(nd->nd_mreq);
-	AUTH_DESTROY(auth);
+	if (usegssname == 0)
+		AUTH_DESTROY(auth);
 	if (rep != NULL)
 		FREE((caddr_t)rep, M_NFSDREQ);
 	if (set_sigset)
@@ -937,9 +1121,29 @@
 int
 newnfs_nmcancelreqs(struct nfsmount *nmp)
 {
+	struct nfsclds *dsp;
+	struct __rpc_client *cl;
 
 	if (nmp->nm_sockreq.nr_client != NULL)
 		CLNT_CLOSE(nmp->nm_sockreq.nr_client);
+lookformore:
+	NFSLOCKMNT(nmp);
+	TAILQ_FOREACH(dsp, &nmp->nm_sess, nfsclds_list) {
+		NFSLOCKDS(dsp);
+		if (dsp != TAILQ_FIRST(&nmp->nm_sess) &&
+		    (dsp->nfsclds_flags & NFSCLDS_CLOSED) == 0 &&
+		    dsp->nfsclds_sockp != NULL &&
+		    dsp->nfsclds_sockp->nr_client != NULL) {
+			dsp->nfsclds_flags |= NFSCLDS_CLOSED;
+			cl = dsp->nfsclds_sockp->nr_client;
+			NFSUNLOCKDS(dsp);
+			NFSUNLOCKMNT(nmp);
+			CLNT_CLOSE(cl);
+			goto lookformore;
+		}
+		NFSUNLOCKDS(dsp);
+	}
+	NFSUNLOCKMNT(nmp);
 	return (0);
 }
 

Modified: trunk/sys/fs/nfs/nfs_commonport.c
===================================================================
--- trunk/sys/fs/nfs/nfs_commonport.c	2018-05-27 22:18:11 UTC (rev 10025)
+++ trunk/sys/fs/nfs/nfs_commonport.c	2018-05-27 22:18:25 UTC (rev 10026)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * Copyright (c) 1989, 1993
  *	The Regents of the University of California.  All rights reserved.
@@ -32,7 +33,7 @@
  */
 
 #include <sys/cdefs.h>
-__MBSDID("$MidnightBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/fs/nfs/nfs_commonport.c 317404 2017-04-25 11:36:25Z rmacklem $");
 
 /*
  * Functions that need to be different for different versions of BSD
@@ -63,6 +64,7 @@
 int nfscl_debuglevel = 0;
 char nfsv4_callbackaddr[INET6_ADDRSTRLEN];
 struct callout newnfsd_callout;
+int nfsrv_lughashsize = 100;
 void (*nfsd_call_servertimer)(void) = NULL;
 void (*ncl_call_invalcaches)(struct vnode *) = NULL;
 
@@ -79,6 +81,9 @@
     "NFSv4 callback addr for server to use");
 SYSCTL_INT(_vfs_nfs, OID_AUTO, debuglevel, CTLFLAG_RW, &nfscl_debuglevel,
     0, "Debug level for new nfs client");
+TUNABLE_INT("vfs.nfs.userhashsize", &nfsrv_lughashsize);
+SYSCTL_INT(_vfs_nfs, OID_AUTO, userhashsize, CTLFLAG_RDTUN, &nfsrv_lughashsize,
+    0, "Size of hash tables for uid/name mapping");
 
 /*
  * Defines for malloc
@@ -106,6 +111,13 @@
     "New NFS directory offset data");
 MALLOC_DEFINE(M_NEWNFSDROLLBACK, "NFSD rollback",
     "New NFS local lock rollback");
+MALLOC_DEFINE(M_NEWNFSLAYOUT, "NFSCL layout", "NFSv4.1 Layout");
+MALLOC_DEFINE(M_NEWNFSFLAYOUT, "NFSCL flayout", "NFSv4.1 File Layout");
+MALLOC_DEFINE(M_NEWNFSDEVINFO, "NFSCL devinfo", "NFSv4.1 Device Info");
+MALLOC_DEFINE(M_NEWNFSSOCKREQ, "NFSCL sockreq", "NFS Sock Req");
+MALLOC_DEFINE(M_NEWNFSCLDS, "NFSCL session", "NFSv4.1 Session");
+MALLOC_DEFINE(M_NEWNFSLAYRECALL, "NFSCL layrecall", "NFSv4.1 Layout Recall");
+MALLOC_DEFINE(M_NEWNFSDSESSION, "NFSD session", "NFSD Session for a client");
 
 /*
  * Definition of mutex locks.
@@ -118,6 +130,7 @@
 struct mtx nfs_nameid_mutex;
 struct mtx nfs_req_mutex;
 struct mtx nfs_slock_mutex;
+struct mtx nfs_clstate_mutex;
 
 /* local functions */
 static int nfssvc_call(struct thread *, struct nfssvc_args *, struct ucred *);
@@ -213,7 +226,7 @@
 {
 	int error;
 
-	NDINIT(ndp, LOOKUP, FOLLOW | LOCKLEAF | MPSAFE, UIO_USERSPACE, fname,
+	NDINIT(ndp, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, fname,
 	    p);
 	error = namei(ndp);
 	if (!error) {
@@ -274,11 +287,11 @@
 	if (isdgram)
 		pref = NFS_MAXDGRAMDATA;
 	else
-		pref = NFS_MAXDATA;
-	sip->fs_rtmax = NFS_MAXDATA;
+		pref = NFS_SRVMAXIO;
+	sip->fs_rtmax = NFS_SRVMAXIO;
 	sip->fs_rtpref = pref;
 	sip->fs_rtmult = NFS_FABLKSIZE;
-	sip->fs_wtmax = NFS_MAXDATA;
+	sip->fs_wtmax = NFS_SRVMAXIO;
 	sip->fs_wtpref = pref;
 	sip->fs_wtmult = NFS_FABLKSIZE;
 	sip->fs_dtpref = pref;
@@ -438,9 +451,25 @@
 {
 	int error = EINVAL;
 	struct nfsd_idargs nid;
+	struct nfsd_oidargs onid;
 
 	if (uap->flag & NFSSVC_IDNAME) {
-		error = copyin(uap->argp, (caddr_t)&nid, sizeof (nid));
+		if ((uap->flag & NFSSVC_NEWSTRUCT) != 0)
+			error = copyin(uap->argp, &nid, sizeof(nid));
+		else {
+			error = copyin(uap->argp, &onid, sizeof(onid));
+			if (error == 0) {
+				nid.nid_flag = onid.nid_flag;
+				nid.nid_uid = onid.nid_uid;
+				nid.nid_gid = onid.nid_gid;
+				nid.nid_usermax = onid.nid_usermax;
+				nid.nid_usertimeout = onid.nid_usertimeout;
+				nid.nid_name = onid.nid_name;
+				nid.nid_namelen = onid.nid_namelen;
+				nid.nid_ngroup = 0;
+				nid.nid_grps = NULL;
+			}
+		}
 		if (error)
 			goto out;
 		error = nfssvc_idname(&nid);
@@ -538,6 +567,7 @@
 	/* Initialize SMP locks used by both client and server. */
 	mtx_init(&newnfsd_mtx, "newnfsd_mtx", NULL, MTX_DEF);
 	mtx_init(&nfs_state_mutex, "nfs_state_mutex", NULL, MTX_DEF);
+	mtx_init(&nfs_clstate_mutex, "nfs_clstate_mutex", NULL, MTX_DEF);
 }
 
 /*
@@ -582,7 +612,7 @@
 		mtx_init(&nfs_req_mutex, "nfs_req_mutex", NULL, MTX_DEF);
 		mtx_init(&nfsrv_nfsuserdsock.nr_mtx, "nfsuserd", NULL,
 		    MTX_DEF);
-		callout_init(&newnfsd_callout, CALLOUT_MPSAFE);
+		callout_init(&newnfsd_callout, 1);
 		newnfs_init();
 		nfsd_call_nfscommon = nfssvc_nfscommon;
 		loaded = 1;
@@ -597,10 +627,13 @@
 
 		nfsd_call_nfscommon = NULL;
 		callout_drain(&newnfsd_callout);
+		/* Clean out the name<-->id cache. */
+		nfsrv_cleanusergroup();
 		/* and get rid of the mutexes */
 		mtx_destroy(&nfs_nameid_mutex);
 		mtx_destroy(&newnfsd_mtx);
 		mtx_destroy(&nfs_state_mutex);
+		mtx_destroy(&nfs_clstate_mutex);
 		mtx_destroy(&nfs_sockl_mutex);
 		mtx_destroy(&nfs_slock_mutex);
 		mtx_destroy(&nfs_req_mutex);

Modified: trunk/sys/fs/nfs/nfs_commonsubs.c
===================================================================
--- trunk/sys/fs/nfs/nfs_commonsubs.c	2018-05-27 22:18:11 UTC (rev 10025)
+++ trunk/sys/fs/nfs/nfs_commonsubs.c	2018-05-27 22:18:25 UTC (rev 10026)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * Copyright (c) 1989, 1993
  *	The Regents of the University of California.  All rights reserved.
@@ -32,7 +33,7 @@
  */
 
 #include <sys/cdefs.h>
-__MBSDID("$MidnightBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/fs/nfs/nfs_commonsubs.c 321877 2017-08-01 15:51:16Z trasz $");
 
 /*
  * These functions support the macros and help fiddle mbuf chains for
@@ -44,6 +45,8 @@
 
 #include <fs/nfs/nfsport.h>
 
+#include <security/mac/mac_framework.h>
+
 /*
  * Data items converted to xdr at startup, since they are constant
  * This is kinda hokey, but may save a little time doing byte swaps
@@ -61,13 +64,20 @@
 struct nfssockreq nfsrv_nfsuserdsock;
 int nfsrv_nfsuserd = 0;
 struct nfsreqhead nfsd_reqq;
-uid_t nfsrv_defaultuid;
-gid_t nfsrv_defaultgid;
+uid_t nfsrv_defaultuid = UID_NOBODY;
+gid_t nfsrv_defaultgid = GID_NOGROUP;
 int nfsrv_lease = NFSRV_LEASE;
 int ncl_mbuf_mlen = MLEN;
+int nfsd_enable_stringtouid = 0;
+static int nfs_enable_uidtostring = 0;
 NFSNAMEIDMUTEX;
 NFSSOCKMUTEX;
+extern int nfsrv_lughashsize;
 
+SYSCTL_DECL(_vfs_nfs);
+SYSCTL_INT(_vfs_nfs, OID_AUTO, enable_uidtostring, CTLFLAG_RW,
+    &nfs_enable_uidtostring, 0, "Make nfs always send numeric owner_names");
+
 /*
  * This array of structures indicates, for V4:
  * retfh - which of 3 types of calling args are used
@@ -85,47 +95,66 @@
  * non-idempotent Ops.
  * Define it here, since it is used by both the client and server.
  */
-struct nfsv4_opflag nfsv4_opflag[NFSV4OP_NOPS] = {
-	{ 0, 0, 0, 0, LK_EXCLUSIVE },		/* undef */
-	{ 0, 0, 0, 0, LK_EXCLUSIVE },		/* undef */
-	{ 0, 0, 0, 0, LK_EXCLUSIVE },		/* undef */
-	{ 0, 1, 0, 0, LK_SHARED },		/* Access */
-	{ 0, 1, 0, 0, LK_EXCLUSIVE },		/* Close */
-	{ 0, 2, 0, 1, LK_EXCLUSIVE },		/* Commit */
-	{ 1, 2, 1, 1, LK_EXCLUSIVE },		/* Create */
-	{ 0, 0, 0, 0, LK_EXCLUSIVE },		/* Delegpurge */
-	{ 0, 1, 0, 0, LK_EXCLUSIVE },		/* Delegreturn */
-	{ 0, 1, 0, 0, LK_SHARED },		/* Getattr */
-	{ 0, 1, 0, 0, LK_EXCLUSIVE },		/* GetFH */
-	{ 2, 1, 1, 1, LK_EXCLUSIVE },		/* Link */
-	{ 0, 1, 0, 0, LK_EXCLUSIVE },		/* Lock */
-	{ 0, 1, 0, 0, LK_EXCLUSIVE },		/* LockT */
-	{ 0, 1, 0, 0, LK_EXCLUSIVE },		/* LockU */
-	{ 1, 1, 0, 0, LK_EXCLUSIVE },		/* Lookup */
-	{ 1, 1, 0, 0, LK_EXCLUSIVE },		/* Lookupp */
-	{ 0, 1, 0, 0, LK_EXCLUSIVE },		/* NVerify */
-	{ 1, 1, 0, 1, LK_EXCLUSIVE },		/* Open */
-	{ 1, 1, 0, 0, LK_EXCLUSIVE },		/* OpenAttr */
-	{ 0, 1, 0, 0, LK_EXCLUSIVE },		/* OpenConfirm */
-	{ 0, 1, 0, 0, LK_EXCLUSIVE },		/* OpenDowngrade */
-	{ 1, 0, 0, 0, LK_EXCLUSIVE },		/* PutFH */
-	{ 1, 0, 0, 0, LK_EXCLUSIVE },		/* PutPubFH */
-	{ 1, 0, 0, 0, LK_EXCLUSIVE },		/* PutRootFH */
-	{ 0, 1, 0, 0, LK_SHARED },		/* Read */
-	{ 0, 1, 0, 0, LK_SHARED },		/* Readdir */
-	{ 0, 1, 0, 0, LK_SHARED },		/* ReadLink */
-	{ 0, 2, 1, 1, LK_EXCLUSIVE },		/* Remove */
-	{ 2, 1, 1, 1, LK_EXCLUSIVE },		/* Rename */
-	{ 0, 0, 0, 0, LK_EXCLUSIVE },		/* Renew */
-	{ 0, 0, 0, 0, LK_EXCLUSIVE },		/* RestoreFH */
-	{ 0, 1, 0, 0, LK_EXCLUSIVE },		/* SaveFH */
-	{ 0, 1, 0, 0, LK_EXCLUSIVE },		/* SecInfo */
-	{ 0, 2, 1, 1, LK_EXCLUSIVE },		/* Setattr */
-	{ 0, 0, 0, 0, LK_EXCLUSIVE },		/* SetClientID */
-	{ 0, 0, 0, 0, LK_EXCLUSIVE },		/* SetClientIDConfirm */
-	{ 0, 1, 0, 0, LK_EXCLUSIVE },		/* Verify */
-	{ 0, 2, 1, 1, LK_EXCLUSIVE },		/* Write */
-	{ 0, 0, 0, 0, LK_EXCLUSIVE },		/* ReleaseLockOwner */
+struct nfsv4_opflag nfsv4_opflag[NFSV41_NOPS] = {
+	{ 0, 0, 0, 0, LK_EXCLUSIVE, 1, 1 },		/* undef */
+	{ 0, 0, 0, 0, LK_EXCLUSIVE, 1, 1 },		/* undef */
+	{ 0, 0, 0, 0, LK_EXCLUSIVE, 1, 1 },		/* undef */
+	{ 0, 1, 0, 0, LK_SHARED, 1, 1 },		/* Access */
+	{ 0, 1, 0, 0, LK_EXCLUSIVE, 1, 0 },		/* Close */
+	{ 0, 2, 0, 1, LK_EXCLUSIVE, 1, 1 },		/* Commit */
+	{ 1, 2, 1, 1, LK_EXCLUSIVE, 1, 1 },		/* Create */
+	{ 0, 0, 0, 0, LK_EXCLUSIVE, 1, 0 },		/* Delegpurge */
+	{ 0, 1, 0, 0, LK_EXCLUSIVE, 1, 0 },		/* Delegreturn */
+	{ 0, 1, 0, 0, LK_SHARED, 1, 1 },		/* Getattr */
+	{ 0, 1, 0, 0, LK_EXCLUSIVE, 1, 1 },		/* GetFH */
+	{ 2, 1, 1, 1, LK_EXCLUSIVE, 1, 1 },		/* Link */
+	{ 0, 1, 0, 0, LK_EXCLUSIVE, 1, 0 },		/* Lock */
+	{ 0, 1, 0, 0, LK_EXCLUSIVE, 1, 0 },		/* LockT */
+	{ 0, 1, 0, 0, LK_EXCLUSIVE, 1, 0 },		/* LockU */
+	{ 1, 2, 0, 0, LK_EXCLUSIVE, 1, 1 },		/* Lookup */
+	{ 1, 2, 0, 0, LK_EXCLUSIVE, 1, 1 },		/* Lookupp */
+	{ 0, 1, 0, 0, LK_EXCLUSIVE, 1, 1 },		/* NVerify */
+	{ 1, 1, 0, 1, LK_EXCLUSIVE, 1, 0 },		/* Open */
+	{ 1, 1, 0, 0, LK_EXCLUSIVE, 1, 0 },		/* OpenAttr */
+	{ 0, 1, 0, 0, LK_EXCLUSIVE, 1, 0 },		/* OpenConfirm */
+	{ 0, 1, 0, 0, LK_EXCLUSIVE, 1, 0 },		/* OpenDowngrade */
+	{ 1, 0, 0, 0, LK_EXCLUSIVE, 1, 1 },		/* PutFH */
+	{ 1, 0, 0, 0, LK_EXCLUSIVE, 1, 1 },		/* PutPubFH */
+	{ 1, 0, 0, 0, LK_EXCLUSIVE, 1, 1 },		/* PutRootFH */
+	{ 0, 1, 0, 0, LK_SHARED, 1, 0 },		/* Read */
+	{ 0, 1, 0, 0, LK_SHARED, 1, 1 },		/* Readdir */
+	{ 0, 1, 0, 0, LK_SHARED, 1, 1 },		/* ReadLink */
+	{ 0, 2, 1, 1, LK_EXCLUSIVE, 1, 1 },		/* Remove */
+	{ 2, 1, 1, 1, LK_EXCLUSIVE, 1, 1 },		/* Rename */
+	{ 0, 0, 0, 0, LK_EXCLUSIVE, 1, 0 },		/* Renew */
+	{ 0, 0, 0, 0, LK_EXCLUSIVE, 1, 1 },		/* RestoreFH */
+	{ 0, 1, 0, 0, LK_EXCLUSIVE, 1, 1 },		/* SaveFH */
+	{ 0, 1, 0, 0, LK_EXCLUSIVE, 1, 1 },		/* SecInfo */
+	{ 0, 2, 1, 1, LK_EXCLUSIVE, 1, 0 },		/* Setattr */
+	{ 0, 0, 0, 0, LK_EXCLUSIVE, 1, 1 },		/* SetClientID */
+	{ 0, 0, 0, 0, LK_EXCLUSIVE, 1, 1 },		/* SetClientIDConfirm */
+	{ 0, 1, 0, 0, LK_EXCLUSIVE, 1, 1 },		/* Verify */
+	{ 0, 2, 1, 1, LK_EXCLUSIVE, 1, 0 },		/* Write */
+	{ 0, 0, 0, 0, LK_EXCLUSIVE, 1, 0 },		/* ReleaseLockOwner */
+	{ 0, 0, 0, 0, LK_EXCLUSIVE, 1, 1 },		/* Backchannel Ctrl */
+	{ 0, 0, 0, 0, LK_EXCLUSIVE, 1, 1 },		/* Bind Conn to Sess */
+	{ 0, 0, 0, 0, LK_EXCLUSIVE, 0, 0 },		/* Exchange ID */
+	{ 0, 0, 0, 0, LK_EXCLUSIVE, 0, 0 },		/* Create Session */
+	{ 0, 0, 0, 0, LK_EXCLUSIVE, 0, 0 },		/* Destroy Session */
+	{ 0, 0, 0, 0, LK_EXCLUSIVE, 1, 0 },		/* Free StateID */
+	{ 0, 0, 0, 0, LK_EXCLUSIVE, 1, 1 },		/* Get Dir Deleg */
+	{ 0, 0, 0, 0, LK_EXCLUSIVE, 1, 1 },		/* Get Device Info */
+	{ 0, 0, 0, 0, LK_EXCLUSIVE, 1, 1 },		/* Get Device List */
+	{ 0, 1, 0, 0, LK_EXCLUSIVE, 1, 1 },		/* Layout Commit */
+	{ 0, 1, 0, 0, LK_EXCLUSIVE, 1, 1 },		/* Layout Get */
+	{ 0, 1, 0, 0, LK_EXCLUSIVE, 1, 0 },		/* Layout Return */
+	{ 0, 0, 0, 0, LK_EXCLUSIVE, 1, 1 },		/* Secinfo No name */
+	{ 0, 0, 0, 0, LK_EXCLUSIVE, 1, 0 },		/* Sequence */
+	{ 0, 0, 0, 0, LK_EXCLUSIVE, 1, 1 },		/* Set SSV */
+	{ 0, 0, 0, 0, LK_EXCLUSIVE, 1, 1 },		/* Test StateID */
+	{ 0, 0, 0, 0, LK_EXCLUSIVE, 1, 1 },		/* Want Delegation */
+	{ 0, 0, 0, 0, LK_EXCLUSIVE, 0, 0 },		/* Destroy ClientID */
+	{ 0, 0, 0, 0, LK_EXCLUSIVE, 1, 0 },		/* Reclaim Complete */
 };
 #endif	/* !APPLEKEXT */
 
@@ -134,11 +163,14 @@
 static int nfsrv_dnsnamelen;
 static u_char *nfsrv_dnsname = NULL;
 static int nfsrv_usermax = 999999999;
-static struct nfsuserhashhead nfsuserhash[NFSUSERHASHSIZE];
-static struct nfsuserhashhead nfsusernamehash[NFSUSERHASHSIZE];
-static struct nfsuserhashhead nfsgrouphash[NFSGROUPHASHSIZE];
-static struct nfsuserhashhead nfsgroupnamehash[NFSGROUPHASHSIZE];
-static struct nfsuserlruhead nfsuserlruhead;
+struct nfsrv_lughash {
+	struct mtx		mtx;
+	struct nfsuserhashhead	lughead;
+};
+static struct nfsrv_lughash	*nfsuserhash;
+static struct nfsrv_lughash	*nfsusernamehash;
+static struct nfsrv_lughash	*nfsgrouphash;
+static struct nfsrv_lughash	*nfsgroupnamehash;
 
 /*
  * This static array indicates whether or not the RPC generates a large
@@ -147,9 +179,9 @@
  * marked 0 in this array, the code will still work, just not quite as
  * efficiently.)
  */
-static int nfs_bigreply[NFS_NPROCS] = { 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0,
+int nfs_bigreply[NFSV41_NPROCS] = { 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0,
     0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    0, 0, 0, 0, 0 };
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0 };
 
 /* local functions */
 static int nfsrv_skipace(struct nfsrv_descript *nd, int *acesizep);
@@ -157,7 +189,7 @@
 static int nfsrv_cmpmixedcase(u_char *cp, u_char *cp2, int len);
 static int nfsrv_getuser(int procnum, uid_t uid, gid_t gid, char *name,
     NFSPROC_T *p);
-static void nfsrv_removeuser(struct nfsusrgrp *usrp);
+static void nfsrv_removeuser(struct nfsusrgrp *usrp, int isuser);
 static int nfsrv_getrefstr(struct nfsrv_descript *, u_char **, u_char **,
     int *, int *);
 static void nfsrv_refstrbigenough(int, u_char **, u_char **, int *);
@@ -199,7 +231,8 @@
 				}
 				mbufcp = NFSMTOD(mp, caddr_t);
 				len = mbuf_len(mp);
-				KASSERT(len > 0, ("len %d", len));
+				KASSERT(len >= 0,
+				    ("len %d, corrupted mbuf?", len));
 			}
 			xfer = (left > len) ? len : left;
 #ifdef notdef
@@ -385,7 +418,7 @@
 	while (siz > 0) {
 		if (left == 0) {
 			if (siz > ncl_mbuf_mlen)
-				NFSMCLGET(m1, M_WAIT);
+				NFSMCLGET(m1, M_WAITOK);
 			else
 				NFSMGET(m1);
 			mbuf_setlen(m1, 0);
@@ -742,13 +775,10 @@
 		error = NFSERR_BADXDR;
 		goto nfsmout;
 	}
-	if (cnt > NFSATTRBIT_MAXWORDS) {
+	if (cnt > NFSATTRBIT_MAXWORDS)
 		outcnt = NFSATTRBIT_MAXWORDS;
-		if (retnotsupp)
-			*retnotsupp = NFSERR_ATTRNOTSUPP;
-	} else {
+	else
 		outcnt = cnt;
-	}
 	NFSZERO_ATTRBIT(attrbitp);
 	if (outcnt > 0) {
 		NFSM_DISSECT(tl, u_int32_t *, outcnt * NFSX_UNSIGNED);
@@ -755,8 +785,11 @@
 		for (i = 0; i < outcnt; i++)
 			attrbitp->bits[i] = fxdr_unsigned(u_int32_t, *tl++);
 	}
-	if (cnt > outcnt)
-		error = nfsm_advance(nd, (cnt - outcnt) * NFSX_UNSIGNED, -1);
+	for (i = 0; i < (cnt - outcnt); i++) {
+		NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
+		if (retnotsupp != NULL && *tl != 0)
+			*retnotsupp = NFSERR_ATTRNOTSUPP;
+	}
 	if (cntp)
 		*cntp = NFSX_UNSIGNED + (cnt * NFSX_UNSIGNED);
 nfsmout:
@@ -800,6 +833,11 @@
 	struct dqblk dqb;
 	uid_t savuid;
 #endif
+	static struct timeval last64fileid;
+	static size_t count64fileid;
+	static struct timeval last64mountfileid;
+	static size_t count64mountfileid;
+	static struct timeval warninterval = { 60, 0 };
 
 	if (compare) {
 		retnotsup = 0;
@@ -856,6 +894,14 @@
 			pc->pc_caseinsensitive = 0;
 			pc->pc_casepreserving = 1;
 		}
+		if (sfp != NULL) {
+			sfp->sf_ffiles = UINT64_MAX;
+			sfp->sf_tfiles = UINT64_MAX;
+			sfp->sf_afiles = UINT64_MAX;
+			sfp->sf_fbytes = UINT64_MAX;
+			sfp->sf_tbytes = UINT64_MAX;
+			sfp->sf_abytes = UINT64_MAX;
+		}
 	}
 
 	/*
@@ -1169,8 +1215,14 @@
 					*retcmpp = NFSERR_NOTSAME;
 				}
 			} else if (nap != NULL) {
-				if (*tl++)
-					printf("NFSv4 fileid > 32bits\n");
+				if (*tl++) {
+					count64fileid++;
+					if (ratecheck(&last64fileid, &warninterval)) {
+						printf("NFSv4 fileid > 32bits (%zu occurrences)\n",
+						    count64fileid);
+						count64fileid = 0;
+					}
+				}
 				nap->na_fileid = thyp;
 			}
 			attrsum += NFSX_HYPER;
@@ -1707,12 +1759,35 @@
 				}
 			    }
 			} else if (nap != NULL) {
-			    if (*tl++)
-				printf("NFSv4 mounted on fileid > 32bits\n");
+			    if (*tl++) {
+				count64mountfileid++;
+				if (ratecheck(&last64mountfileid, &warninterval)) {
+					printf("NFSv4 mounted on fileid > 32bits (%zu occurrences)\n",
+					    count64mountfileid);
+					count64mountfileid = 0;
+				}
+			    }
 			    nap->na_mntonfileno = thyp;
 			}
 			attrsum += NFSX_HYPER;
 			break;
+		case NFSATTRBIT_SUPPATTREXCLCREAT:
+			retnotsup = 0;
+			error = nfsrv_getattrbits(nd, &retattrbits,
+			    &cnt, &retnotsup);
+			if (error)
+			    goto nfsmout;
+			if (compare && !(*retcmpp)) {
+			   NFSSETSUPP_ATTRBIT(&checkattrbits);
+			   NFSCLRNOTSETABLE_ATTRBIT(&checkattrbits);
+			   NFSCLRBIT_ATTRBIT(&checkattrbits,
+				NFSATTRBIT_TIMEACCESSSET);
+			   if (!NFSEQUAL_ATTRBIT(&retattrbits, &checkattrbits)
+			       || retnotsup)
+				*retcmpp = NFSERR_NOTSAME;
+			}
+			attrsum += cnt;
+			break;
 		default:
 			printf("EEK! nfsv4_loadattr unknown attr=%d\n",
 				bitpos);
@@ -1860,7 +1935,7 @@
 		if (isleptp)
 			*isleptp = 1;
 		(void) nfsmsleep(&lp->nfslock_lock, mutex,
-		    PZERO - 1, "nfsv4lck", NULL);
+		    PZERO - 1, "nfsv4gr", NULL);
 	}
 	if (mp != NULL && (mp->mnt_kern_flag & MNTK_UNMOUNTF) != 0)
 		return;
@@ -1992,7 +2067,12 @@
 	 * First, set the bits that can be filled and get fsinfo.
 	 */
 	NFSSET_ATTRBIT(retbitp, attrbitp);
-	/* If p and cred are NULL, it is a client side call */
+	/*
+	 * If both p and cred are NULL, it is a client side setattr call.
+	 * If both p and cred are not NULL, it is a server side reply call.
+	 * If p is not NULL and cred is NULL, it is a client side callback
+	 * reply call.
+	 */
 	if (p == NULL && cred == NULL) {
 		NFSCLRNOTSETABLE_ATTRBIT(retbitp);
 		aclp = saclp;
@@ -2444,6 +2524,12 @@
 			txdr_hyper(uquad, tl);
 			retnum += NFSX_HYPER;
 			break;
+		case NFSATTRBIT_SUPPATTREXCLCREAT:
+			NFSSETSUPP_ATTRBIT(&attrbits);
+			NFSCLRNOTSETABLE_ATTRBIT(&attrbits);
+			NFSCLRBIT_ATTRBIT(&attrbits, NFSATTRBIT_TIMEACCESSSET);
+			retnum += nfsrv_putattrbit(nd, &attrbits);
+			break;
 		default:
 			printf("EEK! Bad V4 attribute bitpos=%d\n", bitpos);
 		};
@@ -2492,11 +2578,11 @@
 	u_char *cp = *cpp;
 	uid_t tmp;
 	int cnt, hasampersand, len = NFSV4_SMALLSTR, ret;
+	struct nfsrv_lughash *hp;
 
 	cnt = 0;
 tryagain:
-	NFSLOCKNAMEID();
-	if (nfsrv_dnsname) {
+	if (nfsrv_dnsnamelen > 0 && !nfs_enable_uidtostring) {
 		/*
 		 * Always map nfsrv_defaultuid to "nobody".
 		 */
@@ -2503,7 +2589,6 @@
 		if (uid == nfsrv_defaultuid) {
 			i = nfsrv_dnsnamelen + 7;
 			if (i > len) {
-				NFSUNLOCKNAMEID();
 				if (len > NFSV4_SMALLSTR)
 					free(cp, M_NFSSTRING);
 				cp = malloc(i, M_NFSSTRING, M_WAITOK);
@@ -2515,11 +2600,12 @@
 			NFSBCOPY("nobody@", cp, 7);
 			cp += 7;
 			NFSBCOPY(nfsrv_dnsname, cp, nfsrv_dnsnamelen);
-			NFSUNLOCKNAMEID();
 			return;
 		}
 		hasampersand = 0;
-		LIST_FOREACH(usrp, NFSUSERHASH(uid), lug_numhash) {
+		hp = NFSUSERHASH(uid);
+		mtx_lock(&hp->mtx);
+		TAILQ_FOREACH(usrp, &hp->lughead, lug_numhash) {
 			if (usrp->lug_uid == uid) {
 				if (usrp->lug_expiry < NFSD_MONOSEC)
 					break;
@@ -2539,7 +2625,7 @@
 					i = usrp->lug_namelen +
 					    nfsrv_dnsnamelen + 1;
 				if (i > len) {
-					NFSUNLOCKNAMEID();
+					mtx_unlock(&hp->mtx);
 					if (len > NFSV4_SMALLSTR)
 						free(cp, M_NFSSTRING);
 					cp = malloc(i, M_NFSSTRING, M_WAITOK);
@@ -2554,20 +2640,19 @@
 					*cp++ = '@';
 					NFSBCOPY(nfsrv_dnsname, cp, nfsrv_dnsnamelen);
 				}
-				TAILQ_REMOVE(&nfsuserlruhead, usrp, lug_lru);
-				TAILQ_INSERT_TAIL(&nfsuserlruhead, usrp, lug_lru);
-				NFSUNLOCKNAMEID();
+				TAILQ_REMOVE(&hp->lughead, usrp, lug_numhash);
+				TAILQ_INSERT_TAIL(&hp->lughead, usrp,
+				    lug_numhash);
+				mtx_unlock(&hp->mtx);
 				return;
 			}
 		}
-		NFSUNLOCKNAMEID();
+		mtx_unlock(&hp->mtx);
 		cnt++;
 		ret = nfsrv_getuser(RPCNFSUSERD_GETUID, uid, (gid_t)0,
 		    NULL, p);
 		if (ret == 0 && cnt < 2)
 			goto tryagain;
-	} else {
-		NFSUNLOCKNAMEID();
 	}
 
 	/*
@@ -2591,6 +2676,52 @@
 }
 
 /*
+ * Get a credential for the uid with the server's group list.
+ * If none is found, just return the credential passed in after
+ * logging a warning message.
+ */
+struct ucred *
+nfsrv_getgrpscred(struct ucred *oldcred)
+{
+	struct nfsusrgrp *usrp;
+	struct ucred *newcred;
+	int cnt, ret;
+	uid_t uid;
+	struct nfsrv_lughash *hp;
+
+	cnt = 0;
+	uid = oldcred->cr_uid;
+tryagain:
+	if (nfsrv_dnsnamelen > 0) {
+		hp = NFSUSERHASH(uid);
+		mtx_lock(&hp->mtx);
+		TAILQ_FOREACH(usrp, &hp->lughead, lug_numhash) {
+			if (usrp->lug_uid == uid) {
+				if (usrp->lug_expiry < NFSD_MONOSEC)
+					break;
+				if (usrp->lug_cred != NULL) {
+					newcred = crhold(usrp->lug_cred);
+					crfree(oldcred);
+				} else
+					newcred = oldcred;
+				TAILQ_REMOVE(&hp->lughead, usrp, lug_numhash);
+				TAILQ_INSERT_TAIL(&hp->lughead, usrp,
+				    lug_numhash);
+				mtx_unlock(&hp->mtx);
+				return (newcred);
+			}
+		}
+		mtx_unlock(&hp->mtx);
+		cnt++;
+		ret = nfsrv_getuser(RPCNFSUSERD_GETUID, uid, (gid_t)0,
+		    NULL, curthread);
+		if (ret == 0 && cnt < 2)
+			goto tryagain;
+	}
+	return (oldcred);
+}
+
+/*
  * Convert a string to a uid.
  * If no conversion is possible return NFSERR_BADOWNER, otherwise
  * return 0.
@@ -2608,6 +2739,7 @@
 	int cnt, ret;
 	int error = 0;
 	uid_t tuid;
+	struct nfsrv_lughash *hp, *hp2;
 
 	if (len == 0) {
 		error = NFSERR_BADOWNER;
@@ -2616,9 +2748,14 @@
 	/* If a string of digits and an AUTH_SYS mount, just convert it. */
 	str0 = str;
 	tuid = (uid_t)strtoul(str0, &endstr, 10);
-	if ((endstr - str0) == len &&
-	    (nd->nd_flag & (ND_KERBV | ND_NFSCL)) == ND_NFSCL) {
-		*uidp = tuid;
+	if ((endstr - str0) == len) {
+		/* A numeric string. */
+		if ((nd->nd_flag & ND_KERBV) == 0 &&
+		    ((nd->nd_flag & ND_NFSCL) != 0 ||
+		      nfsd_enable_stringtouid != 0))
+			*uidp = tuid;
+		else
+			error = NFSERR_BADOWNER;
 		goto out;
 	}
 	/*
@@ -2632,49 +2769,55 @@
 
 	cnt = 0;
 tryagain:
-	NFSLOCKNAMEID();
-	/*
-	 * If an '@' is found and the domain name matches, search for the name
-	 * with dns stripped off.
-	 * Mixed case alpahbetics will match for the domain name, but all
-	 * upper case will not.
-	 */
-	if (cnt == 0 && i < len && i > 0 && nfsrv_dnsname &&
-	    (len - 1 - i) == nfsrv_dnsnamelen &&
-	    !nfsrv_cmpmixedcase(cp, nfsrv_dnsname, nfsrv_dnsnamelen)) {
-		len -= (nfsrv_dnsnamelen + 1);
-		*(cp - 1) = '\0';
-	}
-
-	/*
-	 * Check for the special case of "nobody".
-	 */
-	if (len == 6 && !NFSBCMP(str, "nobody", 6)) {
-		*uidp = nfsrv_defaultuid;
-		NFSUNLOCKNAMEID();
-		error = 0;
-		goto out;
-	}
-
-	LIST_FOREACH(usrp, NFSUSERNAMEHASH(str, len), lug_namehash) {
-		if (usrp->lug_namelen == len &&
-		    !NFSBCMP(usrp->lug_name, str, len)) {
-			if (usrp->lug_expiry < NFSD_MONOSEC)
-				break;
-			*uidp = usrp->lug_uid;
-			TAILQ_REMOVE(&nfsuserlruhead, usrp, lug_lru);
-			TAILQ_INSERT_TAIL(&nfsuserlruhead, usrp, lug_lru);
-			NFSUNLOCKNAMEID();
+	if (nfsrv_dnsnamelen > 0) {
+		/*
+		 * If an '@' is found and the domain name matches, search for
+		 * the name with dns stripped off.
+		 * Mixed case alpahbetics will match for the domain name, but
+		 * all upper case will not.
+		 */
+		if (cnt == 0 && i < len && i > 0 &&
+		    (len - 1 - i) == nfsrv_dnsnamelen &&
+		    !nfsrv_cmpmixedcase(cp, nfsrv_dnsname, nfsrv_dnsnamelen)) {
+			len -= (nfsrv_dnsnamelen + 1);
+			*(cp - 1) = '\0';
+		}
+	
+		/*
+		 * Check for the special case of "nobody".
+		 */
+		if (len == 6 && !NFSBCMP(str, "nobody", 6)) {
+			*uidp = nfsrv_defaultuid;
 			error = 0;
 			goto out;
 		}
+	
+		hp = NFSUSERNAMEHASH(str, len);
+		mtx_lock(&hp->mtx);
+		TAILQ_FOREACH(usrp, &hp->lughead, lug_namehash) {
+			if (usrp->lug_namelen == len &&
+			    !NFSBCMP(usrp->lug_name, str, len)) {
+				if (usrp->lug_expiry < NFSD_MONOSEC)
+					break;
+				hp2 = NFSUSERHASH(usrp->lug_uid);
+				mtx_lock(&hp2->mtx);
+				TAILQ_REMOVE(&hp2->lughead, usrp, lug_numhash);
+				TAILQ_INSERT_TAIL(&hp2->lughead, usrp,
+				    lug_numhash);
+				*uidp = usrp->lug_uid;
+				mtx_unlock(&hp2->mtx);
+				mtx_unlock(&hp->mtx);
+				error = 0;
+				goto out;
+			}
+		}
+		mtx_unlock(&hp->mtx);
+		cnt++;
+		ret = nfsrv_getuser(RPCNFSUSERD_GETUSER, (uid_t)0, (gid_t)0,
+		    str, p);
+		if (ret == 0 && cnt < 2)
+			goto tryagain;
 	}
-	NFSUNLOCKNAMEID();
-	cnt++;
-	ret = nfsrv_getuser(RPCNFSUSERD_GETUSER, (uid_t)0, (gid_t)0,
-	    str, p);
-	if (ret == 0 && cnt < 2)
-		goto tryagain;
 	error = NFSERR_BADOWNER;
 
 out:
@@ -2697,11 +2840,11 @@
 	u_char *cp = *cpp;
 	gid_t tmp;
 	int cnt, hasampersand, len = NFSV4_SMALLSTR, ret;
+	struct nfsrv_lughash *hp;
 
 	cnt = 0;
 tryagain:
-	NFSLOCKNAMEID();
-	if (nfsrv_dnsname) {
+	if (nfsrv_dnsnamelen > 0 && !nfs_enable_uidtostring) {
 		/*
 		 * Always map nfsrv_defaultgid to "nogroup".
 		 */
@@ -2708,7 +2851,6 @@
 		if (gid == nfsrv_defaultgid) {
 			i = nfsrv_dnsnamelen + 8;
 			if (i > len) {
-				NFSUNLOCKNAMEID();
 				if (len > NFSV4_SMALLSTR)
 					free(cp, M_NFSSTRING);
 				cp = malloc(i, M_NFSSTRING, M_WAITOK);
@@ -2720,11 +2862,12 @@
 			NFSBCOPY("nogroup@", cp, 8);
 			cp += 8;
 			NFSBCOPY(nfsrv_dnsname, cp, nfsrv_dnsnamelen);
-			NFSUNLOCKNAMEID();
 			return;
 		}
 		hasampersand = 0;
-		LIST_FOREACH(usrp, NFSGROUPHASH(gid), lug_numhash) {
+		hp = NFSGROUPHASH(gid);
+		mtx_lock(&hp->mtx);
+		TAILQ_FOREACH(usrp, &hp->lughead, lug_numhash) {
 			if (usrp->lug_gid == gid) {
 				if (usrp->lug_expiry < NFSD_MONOSEC)
 					break;
@@ -2744,7 +2887,7 @@
 					i = usrp->lug_namelen +
 					    nfsrv_dnsnamelen + 1;
 				if (i > len) {
-					NFSUNLOCKNAMEID();
+					mtx_unlock(&hp->mtx);
 					if (len > NFSV4_SMALLSTR)
 						free(cp, M_NFSSTRING);
 					cp = malloc(i, M_NFSSTRING, M_WAITOK);
@@ -2759,20 +2902,19 @@
 					*cp++ = '@';
 					NFSBCOPY(nfsrv_dnsname, cp, nfsrv_dnsnamelen);
 				}
-				TAILQ_REMOVE(&nfsuserlruhead, usrp, lug_lru);
-				TAILQ_INSERT_TAIL(&nfsuserlruhead, usrp, lug_lru);
-				NFSUNLOCKNAMEID();
+				TAILQ_REMOVE(&hp->lughead, usrp, lug_numhash);
+				TAILQ_INSERT_TAIL(&hp->lughead, usrp,
+				    lug_numhash);
+				mtx_unlock(&hp->mtx);
 				return;
 			}
 		}
-		NFSUNLOCKNAMEID();
+		mtx_unlock(&hp->mtx);
 		cnt++;
 		ret = nfsrv_getuser(RPCNFSUSERD_GETGID, (uid_t)0, gid,
 		    NULL, p);
 		if (ret == 0 && cnt < 2)
 			goto tryagain;
-	} else {
-		NFSUNLOCKNAMEID();
 	}
 
 	/*
@@ -2813,6 +2955,7 @@
 	int cnt, ret;
 	int error = 0;
 	gid_t tgid;
+	struct nfsrv_lughash *hp, *hp2;
 
 	if (len == 0) {
 		error =  NFSERR_BADOWNER;
@@ -2821,9 +2964,14 @@
 	/* If a string of digits and an AUTH_SYS mount, just convert it. */
 	str0 = str;
 	tgid = (gid_t)strtoul(str0, &endstr, 10);
-	if ((endstr - str0) == len &&
-	    (nd->nd_flag & (ND_KERBV | ND_NFSCL)) == ND_NFSCL) {
-		*gidp = tgid;
+	if ((endstr - str0) == len) {
+		/* A numeric string. */
+		if ((nd->nd_flag & ND_KERBV) == 0 &&
+		    ((nd->nd_flag & ND_NFSCL) != 0 ||
+		      nfsd_enable_stringtouid != 0))
+			*gidp = tgid;
+		else
+			error = NFSERR_BADOWNER;
 		goto out;
 	}
 	/*
@@ -2837,47 +2985,53 @@
 
 	cnt = 0;
 tryagain:
-	NFSLOCKNAMEID();
-	/*
-	 * If an '@' is found and the dns name matches, search for the name
-	 * with the dns stripped off.
-	 */
-	if (cnt == 0 && i < len && i > 0 && nfsrv_dnsname &&
-	    (len - 1 - i) == nfsrv_dnsnamelen &&
-	    !nfsrv_cmpmixedcase(cp, nfsrv_dnsname, nfsrv_dnsnamelen)) {
-		len -= (nfsrv_dnsnamelen + 1);
-		*(cp - 1) = '\0';
-	}
-
-	/*
-	 * Check for the special case of "nogroup".
-	 */
-	if (len == 7 && !NFSBCMP(str, "nogroup", 7)) {
-		*gidp = nfsrv_defaultgid;
-		NFSUNLOCKNAMEID();
-		error = 0;
-		goto out;
-	}
-
-	LIST_FOREACH(usrp, NFSGROUPNAMEHASH(str, len), lug_namehash) {
-		if (usrp->lug_namelen == len &&
-		    !NFSBCMP(usrp->lug_name, str, len)) {
-			if (usrp->lug_expiry < NFSD_MONOSEC)
-				break;
-			*gidp = usrp->lug_gid;
-			TAILQ_REMOVE(&nfsuserlruhead, usrp, lug_lru);
-			TAILQ_INSERT_TAIL(&nfsuserlruhead, usrp, lug_lru);
-			NFSUNLOCKNAMEID();
+	if (nfsrv_dnsnamelen > 0) {
+		/*
+		 * If an '@' is found and the dns name matches, search for the
+		 * name with the dns stripped off.
+		 */
+		if (cnt == 0 && i < len && i > 0 &&
+		    (len - 1 - i) == nfsrv_dnsnamelen &&
+		    !nfsrv_cmpmixedcase(cp, nfsrv_dnsname, nfsrv_dnsnamelen)) {
+			len -= (nfsrv_dnsnamelen + 1);
+			*(cp - 1) = '\0';
+		}
+	
+		/*
+		 * Check for the special case of "nogroup".
+		 */
+		if (len == 7 && !NFSBCMP(str, "nogroup", 7)) {
+			*gidp = nfsrv_defaultgid;
 			error = 0;
 			goto out;
 		}
+	
+		hp = NFSGROUPNAMEHASH(str, len);
+		mtx_lock(&hp->mtx);
+		TAILQ_FOREACH(usrp, &hp->lughead, lug_namehash) {
+			if (usrp->lug_namelen == len &&
+			    !NFSBCMP(usrp->lug_name, str, len)) {
+				if (usrp->lug_expiry < NFSD_MONOSEC)
+					break;
+				hp2 = NFSGROUPHASH(usrp->lug_gid);
+				mtx_lock(&hp2->mtx);
+				TAILQ_REMOVE(&hp2->lughead, usrp, lug_numhash);
+				TAILQ_INSERT_TAIL(&hp2->lughead, usrp,
+				    lug_numhash);
+				*gidp = usrp->lug_gid;
+				mtx_unlock(&hp2->mtx);
+				mtx_unlock(&hp->mtx);
+				error = 0;
+				goto out;
+			}
+		}
+		mtx_unlock(&hp->mtx);
+		cnt++;
+		ret = nfsrv_getuser(RPCNFSUSERD_GETGROUP, (uid_t)0, (gid_t)0,
+		    str, p);
+		if (ret == 0 && cnt < 2)
+			goto tryagain;
 	}
-	NFSUNLOCKNAMEID();
-	cnt++;
-	ret = nfsrv_getuser(RPCNFSUSERD_GETGROUP, (uid_t)0, (gid_t)0,
-	    str, p);
-	if (ret == 0 && cnt < 2)
-		goto tryagain;
 	error = NFSERR_BADOWNER;
 
 out:
@@ -3016,7 +3170,7 @@
 		(void) nfsm_strtom(nd, name, len);
 	}
 	error = newnfs_request(nd, NULL, NULL, &nfsrv_nfsuserdsock, NULL, NULL,
-		cred, RPCPROG_NFSUSERD, RPCNFSUSERD_VERS, NULL, 0, NULL);
+		cred, RPCPROG_NFSUSERD, RPCNFSUSERD_VERS, NULL, 0, NULL, NULL);
 	NFSFREECRED(cred);
 	if (!error) {
 		mbuf_freem(nd->nd_mrep);
@@ -3035,52 +3189,110 @@
 nfssvc_idname(struct nfsd_idargs *nidp)
 {
 	struct nfsusrgrp *nusrp, *usrp, *newusrp;
-	struct nfsuserhashhead *hp;
-	int i;
+	struct nfsrv_lughash *hp_name, *hp_idnum, *thp;
+	int i, group_locked, groupname_locked, user_locked, username_locked;
 	int error = 0;
 	u_char *cp;
+	gid_t *grps;
+	struct ucred *cr;
+	static int onethread = 0;
+	static time_t lasttime = 0;
 
+	if (nidp->nid_namelen <= 0 || nidp->nid_namelen > MAXHOSTNAMELEN) {
+		error = EINVAL;
+		goto out;
+	}
 	if (nidp->nid_flag & NFSID_INITIALIZE) {
-	    cp = (u_char *)malloc(nidp->nid_namelen + 1,
-		M_NFSSTRING, M_WAITOK);
-	    error = copyin(CAST_USER_ADDR_T(nidp->nid_name), cp,
-		nidp->nid_namelen);
-	    NFSLOCKNAMEID();
-	    if (nfsrv_dnsname) {
+		cp = malloc(nidp->nid_namelen + 1, M_NFSSTRING, M_WAITOK);
+		error = copyin(CAST_USER_ADDR_T(nidp->nid_name), cp,
+		    nidp->nid_namelen);
+		if (error != 0) {
+			free(cp, M_NFSSTRING);
+			goto out;
+		}
+		if (atomic_cmpset_acq_int(&nfsrv_dnsnamelen, 0, 0) == 0) {
+			/*
+			 * Free up all the old stuff and reinitialize hash
+			 * lists.  All mutexes for both lists must be locked,
+			 * with the user/group name ones before the uid/gid
+			 * ones, to avoid a LOR.
+			 */
+			for (i = 0; i < nfsrv_lughashsize; i++)
+				mtx_lock(&nfsusernamehash[i].mtx);
+			for (i = 0; i < nfsrv_lughashsize; i++)
+				mtx_lock(&nfsuserhash[i].mtx);
+			for (i = 0; i < nfsrv_lughashsize; i++)
+				TAILQ_FOREACH_SAFE(usrp,
+				    &nfsuserhash[i].lughead, lug_numhash, nusrp)
+					nfsrv_removeuser(usrp, 1);
+			for (i = 0; i < nfsrv_lughashsize; i++)
+				mtx_unlock(&nfsuserhash[i].mtx);
+			for (i = 0; i < nfsrv_lughashsize; i++)
+				mtx_unlock(&nfsusernamehash[i].mtx);
+			for (i = 0; i < nfsrv_lughashsize; i++)
+				mtx_lock(&nfsgroupnamehash[i].mtx);
+			for (i = 0; i < nfsrv_lughashsize; i++)
+				mtx_lock(&nfsgrouphash[i].mtx);
+			for (i = 0; i < nfsrv_lughashsize; i++)
+				TAILQ_FOREACH_SAFE(usrp,
+				    &nfsgrouphash[i].lughead, lug_numhash,
+				    nusrp)
+					nfsrv_removeuser(usrp, 0);
+			for (i = 0; i < nfsrv_lughashsize; i++)
+				mtx_unlock(&nfsgrouphash[i].mtx);
+			for (i = 0; i < nfsrv_lughashsize; i++)
+				mtx_unlock(&nfsgroupnamehash[i].mtx);
+			free(nfsrv_dnsname, M_NFSSTRING);
+			nfsrv_dnsname = NULL;
+		}
+		if (nfsuserhash == NULL) {
+			/* Allocate the hash tables. */
+			nfsuserhash = malloc(sizeof(struct nfsrv_lughash) *
+			    nfsrv_lughashsize, M_NFSUSERGROUP, M_WAITOK |
+			    M_ZERO);
+			for (i = 0; i < nfsrv_lughashsize; i++)
+				mtx_init(&nfsuserhash[i].mtx, "nfsuidhash",
+				    NULL, MTX_DEF | MTX_DUPOK);
+			nfsusernamehash = malloc(sizeof(struct nfsrv_lughash) *
+			    nfsrv_lughashsize, M_NFSUSERGROUP, M_WAITOK |
+			    M_ZERO);
+			for (i = 0; i < nfsrv_lughashsize; i++)
+				mtx_init(&nfsusernamehash[i].mtx,
+				    "nfsusrhash", NULL, MTX_DEF |
+				    MTX_DUPOK);
+			nfsgrouphash = malloc(sizeof(struct nfsrv_lughash) *
+			    nfsrv_lughashsize, M_NFSUSERGROUP, M_WAITOK |
+			    M_ZERO);
+			for (i = 0; i < nfsrv_lughashsize; i++)
+				mtx_init(&nfsgrouphash[i].mtx, "nfsgidhash",
+				    NULL, MTX_DEF | MTX_DUPOK);
+			nfsgroupnamehash = malloc(sizeof(struct nfsrv_lughash) *
+			    nfsrv_lughashsize, M_NFSUSERGROUP, M_WAITOK |
+			    M_ZERO);
+			for (i = 0; i < nfsrv_lughashsize; i++)
+			    mtx_init(&nfsgroupnamehash[i].mtx,
+			    "nfsgrphash", NULL, MTX_DEF | MTX_DUPOK);
+		}
+		/* (Re)initialize the list heads. */
+		for (i = 0; i < nfsrv_lughashsize; i++)
+			TAILQ_INIT(&nfsuserhash[i].lughead);
+		for (i = 0; i < nfsrv_lughashsize; i++)
+			TAILQ_INIT(&nfsusernamehash[i].lughead);
+		for (i = 0; i < nfsrv_lughashsize; i++)
+			TAILQ_INIT(&nfsgrouphash[i].lughead);
+		for (i = 0; i < nfsrv_lughashsize; i++)
+			TAILQ_INIT(&nfsgroupnamehash[i].lughead);
+
 		/*
-		 * Free up all the old stuff and reinitialize hash lists.
+		 * Put name in "DNS" string.
 		 */
-		TAILQ_FOREACH_SAFE(usrp, &nfsuserlruhead, lug_lru, nusrp) {
-			nfsrv_removeuser(usrp);
-		}
-		free(nfsrv_dnsname, M_NFSSTRING);
-		nfsrv_dnsname = NULL;
-	    }
-	    TAILQ_INIT(&nfsuserlruhead);
-	    for (i = 0; i < NFSUSERHASHSIZE; i++)
-		LIST_INIT(&nfsuserhash[i]);
-	    for (i = 0; i < NFSGROUPHASHSIZE; i++)
-		LIST_INIT(&nfsgrouphash[i]);
-	    for (i = 0; i < NFSUSERHASHSIZE; i++)
-		LIST_INIT(&nfsusernamehash[i]);
-	    for (i = 0; i < NFSGROUPHASHSIZE; i++)
-		LIST_INIT(&nfsgroupnamehash[i]);
-
-	    /*
-	     * Put name in "DNS" string.
-	     */
-	    if (!error) {
 		nfsrv_dnsname = cp;
-		nfsrv_dnsnamelen = nidp->nid_namelen;
 		nfsrv_defaultuid = nidp->nid_uid;
 		nfsrv_defaultgid = nidp->nid_gid;
 		nfsrv_usercnt = 0;
 		nfsrv_usermax = nidp->nid_usermax;
-	    }
-	    NFSUNLOCKNAMEID();
-	    if (error)
-		free(cp, M_NFSSTRING);
-	    goto out;
+		atomic_store_rel_int(&nfsrv_dnsnamelen, nidp->nid_namelen);
+		goto out;
 	}
 
 	/*
@@ -3087,60 +3299,113 @@
 	 * malloc the new one now, so any potential sleep occurs before
 	 * manipulation of the lists.
 	 */
-	MALLOC(newusrp, struct nfsusrgrp *, sizeof (struct nfsusrgrp) +
-	    nidp->nid_namelen, M_NFSUSERGROUP, M_WAITOK);
+	newusrp = malloc(sizeof(struct nfsusrgrp) + nidp->nid_namelen,
+	    M_NFSUSERGROUP, M_WAITOK | M_ZERO);
 	error = copyin(CAST_USER_ADDR_T(nidp->nid_name), newusrp->lug_name,
 	    nidp->nid_namelen);
+	if (error == 0 && nidp->nid_ngroup > 0 &&
+	    (nidp->nid_flag & NFSID_ADDUID) != 0) {
+		grps = malloc(sizeof(gid_t) * nidp->nid_ngroup, M_TEMP,
+		    M_WAITOK);
+		error = copyin(CAST_USER_ADDR_T(nidp->nid_grps), grps,
+		    sizeof(gid_t) * nidp->nid_ngroup);
+		if (error == 0) {
+			/*
+			 * Create a credential just like svc_getcred(),
+			 * but using the group list provided.
+			 */
+			cr = crget();
+			cr->cr_uid = cr->cr_ruid = cr->cr_svuid = nidp->nid_uid;
+			crsetgroups(cr, nidp->nid_ngroup, grps);
+			cr->cr_rgid = cr->cr_svgid = cr->cr_groups[0];
+			cr->cr_prison = &prison0;
+			prison_hold(cr->cr_prison);
+#ifdef MAC
+			mac_cred_associate_nfsd(cr);
+#endif
+			newusrp->lug_cred = cr;
+		}
+		free(grps, M_TEMP);
+	}
 	if (error) {
-		free((caddr_t)newusrp, M_NFSUSERGROUP);
+		free(newusrp, M_NFSUSERGROUP);
 		goto out;
 	}
 	newusrp->lug_namelen = nidp->nid_namelen;
 
-	NFSLOCKNAMEID();
 	/*
+	 * The lock order is username[0]->[nfsrv_lughashsize - 1] followed
+	 * by uid[0]->[nfsrv_lughashsize - 1], with the same for group.
+	 * The flags user_locked, username_locked, group_locked and
+	 * groupname_locked are set to indicate all of those hash lists are
+	 * locked. hp_name != NULL  and hp_idnum != NULL indicates that
+	 * the respective one mutex is locked.
+	 */
+	user_locked = username_locked = group_locked = groupname_locked = 0;
+	hp_name = hp_idnum = NULL;
+
+	/*
 	 * Delete old entries, as required.
 	 */
 	if (nidp->nid_flag & (NFSID_DELUID | NFSID_ADDUID)) {
-		hp = NFSUSERHASH(nidp->nid_uid);
-		LIST_FOREACH_SAFE(usrp, hp, lug_numhash, nusrp) {
+		/* Must lock all username hash lists first, to avoid a LOR. */
+		for (i = 0; i < nfsrv_lughashsize; i++)
+			mtx_lock(&nfsusernamehash[i].mtx);
+		username_locked = 1;
+		hp_idnum = NFSUSERHASH(nidp->nid_uid);
+		mtx_lock(&hp_idnum->mtx);
+		TAILQ_FOREACH_SAFE(usrp, &hp_idnum->lughead, lug_numhash,
+		    nusrp) {
 			if (usrp->lug_uid == nidp->nid_uid)
-				nfsrv_removeuser(usrp);
+				nfsrv_removeuser(usrp, 1);
 		}
-	}
-	if (nidp->nid_flag & (NFSID_DELUSERNAME | NFSID_ADDUSERNAME)) {
-		hp = NFSUSERNAMEHASH(newusrp->lug_name, newusrp->lug_namelen);
-		LIST_FOREACH_SAFE(usrp, hp, lug_namehash, nusrp) {
+	} else if (nidp->nid_flag & (NFSID_DELUSERNAME | NFSID_ADDUSERNAME)) {
+		hp_name = NFSUSERNAMEHASH(newusrp->lug_name,
+		    newusrp->lug_namelen);
+		mtx_lock(&hp_name->mtx);
+		TAILQ_FOREACH_SAFE(usrp, &hp_name->lughead, lug_namehash,
+		    nusrp) {
 			if (usrp->lug_namelen == newusrp->lug_namelen &&
 			    !NFSBCMP(usrp->lug_name, newusrp->lug_name,
-			    usrp->lug_namelen))
-				nfsrv_removeuser(usrp);
+			    usrp->lug_namelen)) {
+				thp = NFSUSERHASH(usrp->lug_uid);
+				mtx_lock(&thp->mtx);
+				nfsrv_removeuser(usrp, 1);
+				mtx_unlock(&thp->mtx);
+			}
 		}
-	}
-	if (nidp->nid_flag & (NFSID_DELGID | NFSID_ADDGID)) {
-		hp = NFSGROUPHASH(nidp->nid_gid);
-		LIST_FOREACH_SAFE(usrp, hp, lug_numhash, nusrp) {
+		hp_idnum = NFSUSERHASH(nidp->nid_uid);
+		mtx_lock(&hp_idnum->mtx);
+	} else if (nidp->nid_flag & (NFSID_DELGID | NFSID_ADDGID)) {
+		/* Must lock all groupname hash lists first, to avoid a LOR. */
+		for (i = 0; i < nfsrv_lughashsize; i++)
+			mtx_lock(&nfsgroupnamehash[i].mtx);
+		groupname_locked = 1;
+		hp_idnum = NFSGROUPHASH(nidp->nid_gid);
+		mtx_lock(&hp_idnum->mtx);
+		TAILQ_FOREACH_SAFE(usrp, &hp_idnum->lughead, lug_numhash,
+		    nusrp) {
 			if (usrp->lug_gid == nidp->nid_gid)
-				nfsrv_removeuser(usrp);
+				nfsrv_removeuser(usrp, 0);
 		}
-	}
-	if (nidp->nid_flag & (NFSID_DELGROUPNAME | NFSID_ADDGROUPNAME)) {
-		hp = NFSGROUPNAMEHASH(newusrp->lug_name, newusrp->lug_namelen);
-		LIST_FOREACH_SAFE(usrp, hp, lug_namehash, nusrp) {
+	} else if (nidp->nid_flag & (NFSID_DELGROUPNAME | NFSID_ADDGROUPNAME)) {
+		hp_name = NFSGROUPNAMEHASH(newusrp->lug_name,
+		    newusrp->lug_namelen);
+		mtx_lock(&hp_name->mtx);
+		TAILQ_FOREACH_SAFE(usrp, &hp_name->lughead, lug_namehash,
+		    nusrp) {
 			if (usrp->lug_namelen == newusrp->lug_namelen &&
 			    !NFSBCMP(usrp->lug_name, newusrp->lug_name,
-			    usrp->lug_namelen))
-				nfsrv_removeuser(usrp);
+			    usrp->lug_namelen)) {
+				thp = NFSGROUPHASH(usrp->lug_gid);
+				mtx_lock(&thp->mtx);
+				nfsrv_removeuser(usrp, 0);
+				mtx_unlock(&thp->mtx);
+			}
 		}
+		hp_idnum = NFSGROUPHASH(nidp->nid_gid);
+		mtx_lock(&hp_idnum->mtx);
 	}
-	TAILQ_FOREACH_SAFE(usrp, &nfsuserlruhead, lug_lru, nusrp) {
-		if (usrp->lug_expiry < NFSD_MONOSEC)
-			nfsrv_removeuser(usrp);
-	}
-	while (nfsrv_usercnt >= nfsrv_usermax) {
-		usrp = TAILQ_FIRST(&nfsuserlruhead);
-		nfsrv_removeuser(usrp);
-	}
 
 	/*
 	 * Now, we can add the new one.
@@ -3151,23 +3416,129 @@
 		newusrp->lug_expiry = NFSD_MONOSEC + 5;
 	if (nidp->nid_flag & (NFSID_ADDUID | NFSID_ADDUSERNAME)) {
 		newusrp->lug_uid = nidp->nid_uid;
-		LIST_INSERT_HEAD(NFSUSERHASH(newusrp->lug_uid), newusrp,
-		    lug_numhash);
-		LIST_INSERT_HEAD(NFSUSERNAMEHASH(newusrp->lug_name,
-		    newusrp->lug_namelen), newusrp, lug_namehash);
-		TAILQ_INSERT_TAIL(&nfsuserlruhead, newusrp, lug_lru);
-		nfsrv_usercnt++;
+		thp = NFSUSERHASH(newusrp->lug_uid);
+		mtx_assert(&thp->mtx, MA_OWNED);
+		TAILQ_INSERT_TAIL(&thp->lughead, newusrp, lug_numhash);
+		thp = NFSUSERNAMEHASH(newusrp->lug_name, newusrp->lug_namelen);
+		mtx_assert(&thp->mtx, MA_OWNED);
+		TAILQ_INSERT_TAIL(&thp->lughead, newusrp, lug_namehash);
+		atomic_add_int(&nfsrv_usercnt, 1);
 	} else if (nidp->nid_flag & (NFSID_ADDGID | NFSID_ADDGROUPNAME)) {
 		newusrp->lug_gid = nidp->nid_gid;
-		LIST_INSERT_HEAD(NFSGROUPHASH(newusrp->lug_gid), newusrp,
-		    lug_numhash);
-		LIST_INSERT_HEAD(NFSGROUPNAMEHASH(newusrp->lug_name,
-		    newusrp->lug_namelen), newusrp, lug_namehash);
-		TAILQ_INSERT_TAIL(&nfsuserlruhead, newusrp, lug_lru);
-		nfsrv_usercnt++;
-	} else
-		FREE((caddr_t)newusrp, M_NFSUSERGROUP);
-	NFSUNLOCKNAMEID();
+		thp = NFSGROUPHASH(newusrp->lug_gid);
+		mtx_assert(&thp->mtx, MA_OWNED);
+		TAILQ_INSERT_TAIL(&thp->lughead, newusrp, lug_numhash);
+		thp = NFSGROUPNAMEHASH(newusrp->lug_name, newusrp->lug_namelen);
+		mtx_assert(&thp->mtx, MA_OWNED);
+		TAILQ_INSERT_TAIL(&thp->lughead, newusrp, lug_namehash);
+		atomic_add_int(&nfsrv_usercnt, 1);
+	} else {
+		if (newusrp->lug_cred != NULL)
+			crfree(newusrp->lug_cred);
+		free(newusrp, M_NFSUSERGROUP);
+	}
+
+	/*
+	 * Once per second, allow one thread to trim the cache.
+	 */
+	if (lasttime < NFSD_MONOSEC &&
+	    atomic_cmpset_acq_int(&onethread, 0, 1) != 0) {
+		/*
+		 * First, unlock the single mutexes, so that all entries
+		 * can be locked and any LOR is avoided.
+		 */
+		if (hp_name != NULL) {
+			mtx_unlock(&hp_name->mtx);
+			hp_name = NULL;
+		}
+		if (hp_idnum != NULL) {
+			mtx_unlock(&hp_idnum->mtx);
+			hp_idnum = NULL;
+		}
+
+		if ((nidp->nid_flag & (NFSID_DELUID | NFSID_ADDUID |
+		    NFSID_DELUSERNAME | NFSID_ADDUSERNAME)) != 0) {
+			if (username_locked == 0) {
+				for (i = 0; i < nfsrv_lughashsize; i++)
+					mtx_lock(&nfsusernamehash[i].mtx);
+				username_locked = 1;
+			}
+			KASSERT(user_locked == 0,
+			    ("nfssvc_idname: user_locked"));
+			for (i = 0; i < nfsrv_lughashsize; i++)
+				mtx_lock(&nfsuserhash[i].mtx);
+			user_locked = 1;
+			for (i = 0; i < nfsrv_lughashsize; i++) {
+				TAILQ_FOREACH_SAFE(usrp,
+				    &nfsuserhash[i].lughead, lug_numhash,
+				    nusrp)
+					if (usrp->lug_expiry < NFSD_MONOSEC)
+						nfsrv_removeuser(usrp, 1);
+			}
+			for (i = 0; i < nfsrv_lughashsize; i++) {
+				/*
+				 * Trim the cache using an approximate LRU
+				 * algorithm.  This code deletes the least
+				 * recently used entry on each hash list.
+				 */
+				if (nfsrv_usercnt <= nfsrv_usermax)
+					break;
+				usrp = TAILQ_FIRST(&nfsuserhash[i].lughead);
+				if (usrp != NULL)
+					nfsrv_removeuser(usrp, 1);
+			}
+		} else {
+			if (groupname_locked == 0) {
+				for (i = 0; i < nfsrv_lughashsize; i++)
+					mtx_lock(&nfsgroupnamehash[i].mtx);
+				groupname_locked = 1;
+			}
+			KASSERT(group_locked == 0,
+			    ("nfssvc_idname: group_locked"));
+			for (i = 0; i < nfsrv_lughashsize; i++)
+				mtx_lock(&nfsgrouphash[i].mtx);
+			group_locked = 1;
+			for (i = 0; i < nfsrv_lughashsize; i++) {
+				TAILQ_FOREACH_SAFE(usrp,
+				    &nfsgrouphash[i].lughead, lug_numhash,
+				    nusrp)
+					if (usrp->lug_expiry < NFSD_MONOSEC)
+						nfsrv_removeuser(usrp, 0);
+			}
+			for (i = 0; i < nfsrv_lughashsize; i++) {
+				/*
+				 * Trim the cache using an approximate LRU
+				 * algorithm.  This code deletes the least
+				 * recently user entry on each hash list.
+				 */
+				if (nfsrv_usercnt <= nfsrv_usermax)
+					break;
+				usrp = TAILQ_FIRST(&nfsgrouphash[i].lughead);
+				if (usrp != NULL)
+					nfsrv_removeuser(usrp, 0);
+			}
+		}
+		lasttime = NFSD_MONOSEC;
+		atomic_store_rel_int(&onethread, 0);
+	}
+
+	/* Now, unlock all locked mutexes. */
+	if (hp_idnum != NULL)
+		mtx_unlock(&hp_idnum->mtx);
+	if (hp_name != NULL)
+		mtx_unlock(&hp_name->mtx);
+	if (user_locked != 0)
+		for (i = 0; i < nfsrv_lughashsize; i++)
+			mtx_unlock(&nfsuserhash[i].mtx);
+	if (username_locked != 0)
+		for (i = 0; i < nfsrv_lughashsize; i++)
+			mtx_unlock(&nfsusernamehash[i].mtx);
+	if (group_locked != 0)
+		for (i = 0; i < nfsrv_lughashsize; i++)
+			mtx_unlock(&nfsgrouphash[i].mtx);
+	if (groupname_locked != 0)
+		for (i = 0; i < nfsrv_lughashsize; i++)
+			mtx_unlock(&nfsgroupnamehash[i].mtx);
 out:
 	NFSEXITCODE(error);
 	return (error);
@@ -3177,18 +3548,81 @@
  * Remove a user/group name element.
  */
 static void
-nfsrv_removeuser(struct nfsusrgrp *usrp)
+nfsrv_removeuser(struct nfsusrgrp *usrp, int isuser)
 {
+	struct nfsrv_lughash *hp;
 
-	NFSNAMEIDREQUIRED();
-	LIST_REMOVE(usrp, lug_numhash);
-	LIST_REMOVE(usrp, lug_namehash);
-	TAILQ_REMOVE(&nfsuserlruhead, usrp, lug_lru);
-	nfsrv_usercnt--;
-	FREE((caddr_t)usrp, M_NFSUSERGROUP);
+	if (isuser != 0) {
+		hp = NFSUSERHASH(usrp->lug_uid);
+		mtx_assert(&hp->mtx, MA_OWNED);
+		TAILQ_REMOVE(&hp->lughead, usrp, lug_numhash);
+		hp = NFSUSERNAMEHASH(usrp->lug_name, usrp->lug_namelen);
+		mtx_assert(&hp->mtx, MA_OWNED);
+		TAILQ_REMOVE(&hp->lughead, usrp, lug_namehash);
+	} else {
+		hp = NFSGROUPHASH(usrp->lug_gid);
+		mtx_assert(&hp->mtx, MA_OWNED);
+		TAILQ_REMOVE(&hp->lughead, usrp, lug_numhash);
+		hp = NFSGROUPNAMEHASH(usrp->lug_name, usrp->lug_namelen);
+		mtx_assert(&hp->mtx, MA_OWNED);
+		TAILQ_REMOVE(&hp->lughead, usrp, lug_namehash);
+	}
+	atomic_add_int(&nfsrv_usercnt, -1);
+	if (usrp->lug_cred != NULL)
+		crfree(usrp->lug_cred);
+	free(usrp, M_NFSUSERGROUP);
 }
 
 /*
+ * Free up all the allocations related to the name<-->id cache.
+ * This function should only be called when the nfsuserd daemon isn't
+ * running, since it doesn't do any locking.
+ * This function is meant to be used when the nfscommon module is unloaded.
+ */
+APPLESTATIC void
+nfsrv_cleanusergroup(void)
+{
+	struct nfsrv_lughash *hp, *hp2;
+	struct nfsusrgrp *nusrp, *usrp;
+	int i;
+
+	if (nfsuserhash == NULL)
+		return;
+
+	for (i = 0; i < nfsrv_lughashsize; i++) {
+		hp = &nfsuserhash[i];
+		TAILQ_FOREACH_SAFE(usrp, &hp->lughead, lug_numhash, nusrp) {
+			TAILQ_REMOVE(&hp->lughead, usrp, lug_numhash);
+			hp2 = NFSUSERNAMEHASH(usrp->lug_name,
+			    usrp->lug_namelen);
+			TAILQ_REMOVE(&hp2->lughead, usrp, lug_namehash);
+			if (usrp->lug_cred != NULL)
+				crfree(usrp->lug_cred);
+			free(usrp, M_NFSUSERGROUP);
+		}
+		hp = &nfsgrouphash[i];
+		TAILQ_FOREACH_SAFE(usrp, &hp->lughead, lug_numhash, nusrp) {
+			TAILQ_REMOVE(&hp->lughead, usrp, lug_numhash);
+			hp2 = NFSGROUPNAMEHASH(usrp->lug_name,
+			    usrp->lug_namelen);
+			TAILQ_REMOVE(&hp2->lughead, usrp, lug_namehash);
+			if (usrp->lug_cred != NULL)
+				crfree(usrp->lug_cred);
+			free(usrp, M_NFSUSERGROUP);
+		}
+		mtx_destroy(&nfsuserhash[i].mtx);
+		mtx_destroy(&nfsusernamehash[i].mtx);
+		mtx_destroy(&nfsgroupnamehash[i].mtx);
+		mtx_destroy(&nfsgrouphash[i].mtx);
+	}
+	free(nfsuserhash, M_NFSUSERGROUP);
+	free(nfsusernamehash, M_NFSUSERGROUP);
+	free(nfsgrouphash, M_NFSUSERGROUP);
+	free(nfsgroupnamehash, M_NFSUSERGROUP);
+	free(nfsrv_dnsname, M_NFSSTRING);
+}
+
+/*
  * This function scans a byte string and checks for UTF-8 compliance.
  * It returns 0 if it conforms and NFSERR_INVAL if not.
  */
@@ -3455,7 +3889,7 @@
 	 */
 	if ((nd->nd_flag & ND_GSSINITREPLY) == 0 &&
 	    nfs_bigreply[nd->nd_procnum]) {
-		NFSMCLGET(mreq, M_WAIT);
+		NFSMCLGET(mreq, M_WAITOK);
 		nd->nd_mreq = mreq;
 		nd->nd_mb = mreq;
 	} else {
@@ -3510,3 +3944,307 @@
 	NFSUNLOCKSOCK();
 }
 
+APPLESTATIC int
+nfsv4_getipaddr(struct nfsrv_descript *nd, struct sockaddr_storage *sa,
+    int *isudp)
+{
+	struct sockaddr_in *sad;
+	struct sockaddr_in6 *sad6;
+	struct in_addr saddr;
+	uint32_t portnum, *tl;
+	int af = 0, i, j, k;
+	char addr[64], protocol[5], *cp;
+	int cantparse = 0, error = 0;
+	uint16_t portv;
+
+	NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
+	i = fxdr_unsigned(int, *tl);
+	if (i >= 3 && i <= 4) {
+		error = nfsrv_mtostr(nd, protocol, i);
+		if (error)
+			goto nfsmout;
+		if (strcmp(protocol, "tcp") == 0) {
+			af = AF_INET;
+			*isudp = 0;
+		} else if (strcmp(protocol, "udp") == 0) {
+			af = AF_INET;
+			*isudp = 1;
+		} else if (strcmp(protocol, "tcp6") == 0) {
+			af = AF_INET6;
+			*isudp = 0;
+		} else if (strcmp(protocol, "udp6") == 0) {
+			af = AF_INET6;
+			*isudp = 1;
+		} else
+			cantparse = 1;
+	} else {
+		cantparse = 1;
+		if (i > 0) {
+			error = nfsm_advance(nd, NFSM_RNDUP(i), -1);
+			if (error)
+				goto nfsmout;
+		}
+	}
+	NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
+	i = fxdr_unsigned(int, *tl);
+	if (i < 0) {
+		error = NFSERR_BADXDR;
+		goto nfsmout;
+	} else if (cantparse == 0 && i >= 11 && i < 64) {
+		/*
+		 * The shortest address is 11chars and the longest is < 64.
+		 */
+		error = nfsrv_mtostr(nd, addr, i);
+		if (error)
+			goto nfsmout;
+
+		/* Find the port# at the end and extract that. */
+		i = strlen(addr);
+		k = 0;
+		cp = &addr[i - 1];
+		/* Count back two '.'s from end to get port# field. */
+		for (j = 0; j < i; j++) {
+			if (*cp == '.') {
+				k++;
+				if (k == 2)
+					break;
+			}
+			cp--;
+		}
+		if (k == 2) {
+			/*
+			 * The NFSv4 port# is appended as .N.N, where N is
+			 * a decimal # in the range 0-255, just like an inet4
+			 * address. Cheat and use inet_aton(), which will
+			 * return a Class A address and then shift the high
+			 * order 8bits over to convert it to the port#.
+			 */
+			*cp++ = '\0';
+			if (inet_aton(cp, &saddr) == 1) {
+				portnum = ntohl(saddr.s_addr);
+				portv = (uint16_t)((portnum >> 16) |
+				    (portnum & 0xff));
+			} else
+				cantparse = 1;
+		} else
+			cantparse = 1;
+		if (cantparse == 0) {
+			if (af == AF_INET) {
+				sad = (struct sockaddr_in *)sa;
+				if (inet_pton(af, addr, &sad->sin_addr) == 1) {
+					sad->sin_len = sizeof(*sad);
+					sad->sin_family = AF_INET;
+					sad->sin_port = htons(portv);
+					return (0);
+				}
+			} else {
+				sad6 = (struct sockaddr_in6 *)sa;
+				if (inet_pton(af, addr, &sad6->sin6_addr)
+				    == 1) {
+					sad6->sin6_len = sizeof(*sad6);
+					sad6->sin6_family = AF_INET6;
+					sad6->sin6_port = htons(portv);
+					return (0);
+				}
+			}
+		}
+	} else {
+		if (i > 0) {
+			error = nfsm_advance(nd, NFSM_RNDUP(i), -1);
+			if (error)
+				goto nfsmout;
+		}
+	}
+	error = EPERM;
+nfsmout:
+	return (error);
+}
+
+/*
+ * Handle an NFSv4.1 Sequence request for the session.
+ * If reply != NULL, use it to return the cached reply, as required.
+ * The client gets a cached reply via this call for callbacks, however the
+ * server gets a cached reply via the nfsv4_seqsess_cachereply() call.
+ */
+int
+nfsv4_seqsession(uint32_t seqid, uint32_t slotid, uint32_t highslot,
+    struct nfsslot *slots, struct mbuf **reply, uint16_t maxslot)
+{
+	int error;
+
+	error = 0;
+	if (reply != NULL)
+		*reply = NULL;
+	if (slotid > maxslot)
+		return (NFSERR_BADSLOT);
+	if (seqid == slots[slotid].nfssl_seq) {
+		/* A retry. */
+		if (slots[slotid].nfssl_inprog != 0)
+			error = NFSERR_DELAY;
+		else if (slots[slotid].nfssl_reply != NULL) {
+			if (reply != NULL) {
+				*reply = slots[slotid].nfssl_reply;
+				slots[slotid].nfssl_reply = NULL;
+			}
+			slots[slotid].nfssl_inprog = 1;
+			error = NFSERR_REPLYFROMCACHE;
+		} else
+			/* No reply cached, so just do it. */
+			slots[slotid].nfssl_inprog = 1;
+	} else if ((slots[slotid].nfssl_seq + 1) == seqid) {
+		if (slots[slotid].nfssl_reply != NULL)
+			m_freem(slots[slotid].nfssl_reply);
+		slots[slotid].nfssl_reply = NULL;
+		slots[slotid].nfssl_inprog = 1;
+		slots[slotid].nfssl_seq++;
+	} else
+		error = NFSERR_SEQMISORDERED;
+	return (error);
+}
+
+/*
+ * Cache this reply for the slot.
+ * Use the "rep" argument to return the cached reply if repstat is set to
+ * NFSERR_REPLYFROMCACHE. The client never sets repstat to this value.
+ */
+void
+nfsv4_seqsess_cacherep(uint32_t slotid, struct nfsslot *slots, int repstat,
+   struct mbuf **rep)
+{
+
+	if (repstat == NFSERR_REPLYFROMCACHE) {
+		*rep = slots[slotid].nfssl_reply;
+		slots[slotid].nfssl_reply = NULL;
+	} else {
+		if (slots[slotid].nfssl_reply != NULL)
+			m_freem(slots[slotid].nfssl_reply);
+		slots[slotid].nfssl_reply = *rep;
+	}
+	slots[slotid].nfssl_inprog = 0;
+}
+
+/*
+ * Generate the xdr for an NFSv4.1 Sequence Operation.
+ */
+APPLESTATIC void
+nfsv4_setsequence(struct nfsmount *nmp, struct nfsrv_descript *nd,
+    struct nfsclsession *sep, int dont_replycache)
+{
+	uint32_t *tl, slotseq = 0;
+	int error, maxslot, slotpos;
+	uint8_t sessionid[NFSX_V4SESSIONID];
+
+	error = nfsv4_sequencelookup(nmp, sep, &slotpos, &maxslot, &slotseq,
+	    sessionid);
+
+	/* Build the Sequence arguments. */
+	NFSM_BUILD(tl, uint32_t *, NFSX_V4SESSIONID + 4 * NFSX_UNSIGNED);
+	nd->nd_sequence = tl;
+	bcopy(sessionid, tl, NFSX_V4SESSIONID);
+	tl += NFSX_V4SESSIONID / NFSX_UNSIGNED;
+	nd->nd_slotseq = tl;
+	if (error == 0) {
+		*tl++ = txdr_unsigned(slotseq);
+		*tl++ = txdr_unsigned(slotpos);
+		*tl++ = txdr_unsigned(maxslot);
+		if (dont_replycache == 0)
+			*tl = newnfs_true;
+		else
+			*tl = newnfs_false;
+	} else {
+		/*
+		 * There are two errors and the rest of the session can
+		 * just be zeros.
+		 * NFSERR_BADSESSION: This bad session should just generate
+		 *    the same error again when the RPC is retried.
+		 * ESTALE: A forced dismount is in progress and will cause the
+		 *    RPC to fail later.
+		 */
+		*tl++ = 0;
+		*tl++ = 0;
+		*tl++ = 0;
+		*tl = 0;
+	}
+	nd->nd_flag |= ND_HASSEQUENCE;
+}
+
+int
+nfsv4_sequencelookup(struct nfsmount *nmp, struct nfsclsession *sep,
+    int *slotposp, int *maxslotp, uint32_t *slotseqp, uint8_t *sessionid)
+{
+	int i, maxslot, slotpos;
+	uint64_t bitval;
+
+	/* Find an unused slot. */
+	slotpos = -1;
+	maxslot = -1;
+	mtx_lock(&sep->nfsess_mtx);
+	do {
+		if (nmp != NULL && sep->nfsess_defunct != 0) {
+			/* Just return the bad session. */
+			bcopy(sep->nfsess_sessionid, sessionid,
+			    NFSX_V4SESSIONID);
+			mtx_unlock(&sep->nfsess_mtx);
+			return (NFSERR_BADSESSION);
+		}
+		bitval = 1;
+		for (i = 0; i < sep->nfsess_foreslots; i++) {
+			if ((bitval & sep->nfsess_slots) == 0) {
+				slotpos = i;
+				sep->nfsess_slots |= bitval;
+				sep->nfsess_slotseq[i]++;
+				*slotseqp = sep->nfsess_slotseq[i];
+				break;
+			}
+			bitval <<= 1;
+		}
+		if (slotpos == -1) {
+			/*
+			 * If a forced dismount is in progress, just return.
+			 * This RPC attempt will fail when it calls
+			 * newnfs_request().
+			 */
+			if (nmp != NULL &&
+			    (nmp->nm_mountp->mnt_kern_flag & MNTK_UNMOUNTF)
+			    != 0) {
+				mtx_unlock(&sep->nfsess_mtx);
+				return (ESTALE);
+			}
+			/* Wake up once/sec, to check for a forced dismount. */
+			(void)mtx_sleep(&sep->nfsess_slots, &sep->nfsess_mtx,
+			    PZERO, "nfsclseq", hz);
+		}
+	} while (slotpos == -1);
+	/* Now, find the highest slot in use. (nfsc_slots is 64bits) */
+	bitval = 1;
+	for (i = 0; i < 64; i++) {
+		if ((bitval & sep->nfsess_slots) != 0)
+			maxslot = i;
+		bitval <<= 1;
+	}
+	bcopy(sep->nfsess_sessionid, sessionid, NFSX_V4SESSIONID);
+	mtx_unlock(&sep->nfsess_mtx);
+	*slotposp = slotpos;
+	*maxslotp = maxslot;
+	return (0);
+}
+
+/*
+ * Free a session slot.
+ */
+APPLESTATIC void
+nfsv4_freeslot(struct nfsclsession *sep, int slot)
+{
+	uint64_t bitval;
+
+	bitval = 1;
+	if (slot > 0)
+		bitval <<= slot;
+	mtx_lock(&sep->nfsess_mtx);
+	if ((bitval & sep->nfsess_slots) == 0)
+		printf("freeing free slot!!\n");
+	sep->nfsess_slots &= ~bitval;
+	wakeup(&sep->nfsess_slots);
+	mtx_unlock(&sep->nfsess_mtx);
+}
+

Modified: trunk/sys/fs/nfs/nfs_var.h
===================================================================
--- trunk/sys/fs/nfs/nfs_var.h	2018-05-27 22:18:11 UTC (rev 10025)
+++ trunk/sys/fs/nfs/nfs_var.h	2018-05-27 22:18:25 UTC (rev 10026)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * Copyright (c) 1989, 1993
  *	The Regents of the University of California.  All rights reserved.
@@ -29,7 +30,7 @@
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
- * $MidnightBSD$
+ * $FreeBSD: stable/10/sys/fs/nfs/nfs_var.h 321031 2017-07-15 19:24:54Z rmacklem $
  */
 
 /*
@@ -61,6 +62,7 @@
 struct nfsstate;
 struct nfslock;
 struct nfsclient;
+struct nfsdsession;
 struct nfslockconflict;
 struct nfsd_idargs;
 struct nfsd_clid;
@@ -69,9 +71,12 @@
 struct nfsclopen;
 struct nfsclopenhead;
 struct nfsclclient;
+struct nfsclsession;
 struct nfscllockowner;
 struct nfscllock;
 struct nfscldeleg;
+struct nfscllayout;
+struct nfscldevinfo;
 struct nfsv4lock;
 struct nfsvattr;
 struct nfs_vattr;
@@ -87,8 +92,11 @@
 /* nfs_nfsdstate.c */
 int nfsrv_setclient(struct nfsrv_descript *, struct nfsclient **,
     nfsquad_t *, nfsquad_t *, NFSPROC_T *);
-int nfsrv_getclient(nfsquad_t, int, struct nfsclient **, nfsquad_t,
-    struct nfsrv_descript *, NFSPROC_T *);
+int nfsrv_getclient(nfsquad_t, int, struct nfsclient **, struct nfsdsession *,
+    nfsquad_t, uint32_t, struct nfsrv_descript *, NFSPROC_T *);
+int nfsrv_destroyclient(nfsquad_t, NFSPROC_T *);
+int nfsrv_destroysession(struct nfsrv_descript *, uint8_t *);
+int nfsrv_freestateid(struct nfsrv_descript *, nfsv4stateid_t *, NFSPROC_T *);
 int nfsrv_adminrevoke(struct nfsd_clid *, NFSPROC_T *);
 void nfsrv_dumpclients(struct nfsd_dumpclients *, int);
 void nfsrv_dumplocks(vnode_t, struct nfsd_dumplocks *, int, NFSPROC_T *);
@@ -102,8 +110,8 @@
     vnode_t, struct nfsrv_descript *, NFSPROC_T *, int);
 int nfsrv_openupdate(vnode_t, struct nfsstate *, nfsquad_t,
     nfsv4stateid_t *, struct nfsrv_descript *, NFSPROC_T *);
-int nfsrv_delegupdate(nfsquad_t, nfsv4stateid_t *, vnode_t, int,
-    struct ucred *, NFSPROC_T *);
+int nfsrv_delegupdate(struct nfsrv_descript *, nfsquad_t, nfsv4stateid_t *,
+    vnode_t, int, struct ucred *, NFSPROC_T *);
 int nfsrv_releaselckown(struct nfsstate *, nfsquad_t, NFSPROC_T *);
 void nfsrv_zapclient(struct nfsclient *, NFSPROC_T *);
 int nfssvc_idname(struct nfsd_idargs *);
@@ -124,6 +132,11 @@
 int nfsrv_nfsuserdport(u_short, NFSPROC_T *);
 void nfsrv_nfsuserddelport(void);
 void nfsrv_throwawayallstate(NFSPROC_T *);
+int nfsrv_checksequence(struct nfsrv_descript *, uint32_t, uint32_t *,
+    uint32_t *, int, uint32_t *, NFSPROC_T *);
+int nfsrv_checkreclaimcomplete(struct nfsrv_descript *);
+void nfsrv_cache_session(uint8_t *, uint32_t, int, struct mbuf **);
+void nfsrv_freeallbackchannel_xprts(void);
 
 /* nfs_nfsdserv.c */
 int nfsrvd_access(struct nfsrv_descript *, int,
@@ -208,21 +221,38 @@
     vnode_t, NFSPROC_T *, struct nfsexstuff *);
 int nfsrvd_pathconf(struct nfsrv_descript *, int,
     vnode_t, NFSPROC_T *, struct nfsexstuff *);
+int nfsrvd_exchangeid(struct nfsrv_descript *, int,
+    vnode_t, NFSPROC_T *, struct nfsexstuff *);
+int nfsrvd_createsession(struct nfsrv_descript *, int,
+    vnode_t, NFSPROC_T *, struct nfsexstuff *);
+int nfsrvd_sequence(struct nfsrv_descript *, int,
+    vnode_t, NFSPROC_T *, struct nfsexstuff *);
+int nfsrvd_reclaimcomplete(struct nfsrv_descript *, int,
+    vnode_t, NFSPROC_T *, struct nfsexstuff *);
+int nfsrvd_destroyclientid(struct nfsrv_descript *, int,
+    vnode_t, NFSPROC_T *, struct nfsexstuff *);
+int nfsrvd_destroysession(struct nfsrv_descript *, int,
+    vnode_t, NFSPROC_T *, struct nfsexstuff *);
+int nfsrvd_freestateid(struct nfsrv_descript *, int,
+    vnode_t, NFSPROC_T *, struct nfsexstuff *);
+int nfsrvd_notsupp(struct nfsrv_descript *, int,
+    vnode_t, NFSPROC_T *, struct nfsexstuff *);
 
 /* nfs_nfsdsocket.c */
 void nfsrvd_rephead(struct nfsrv_descript *);
-void nfsrvd_dorpc(struct nfsrv_descript *, int, NFSPROC_T *);
+void nfsrvd_dorpc(struct nfsrv_descript *, int, u_char *, int, u_int32_t,
+    NFSPROC_T *);
 
 /* nfs_nfsdcache.c */
 void nfsrvd_initcache(void);
-int nfsrvd_getcache(struct nfsrv_descript *, struct socket *);
-struct nfsrvcache *nfsrvd_updatecache(struct nfsrv_descript *,
-    struct socket *);
-void nfsrvd_sentcache(struct nfsrvcache *, struct socket *, int);
+int nfsrvd_getcache(struct nfsrv_descript *);
+struct nfsrvcache *nfsrvd_updatecache(struct nfsrv_descript *);
+void nfsrvd_sentcache(struct nfsrvcache *, int, uint32_t);
 void nfsrvd_cleancache(void);
 void nfsrvd_refcache(struct nfsrvcache *);
 void nfsrvd_derefcache(struct nfsrvcache *);
 void nfsrvd_delcache(struct nfsrvcache *);
+void nfsrc_trimcache(uint64_t, uint32_t, int);
 
 /* nfs_commonsubs.c */
 void newnfs_init(void);
@@ -254,14 +284,26 @@
 int nfsv4_getref_nonblock(struct nfsv4lock *);
 int nfsv4_testlock(struct nfsv4lock *);
 int nfsrv_mtostr(struct nfsrv_descript *, char *, int);
+void nfsrv_cleanusergroup(void);
 int nfsrv_checkutf8(u_int8_t *, int);
 int newnfs_sndlock(int *);
 void newnfs_sndunlock(int *);
+int nfsv4_getipaddr(struct nfsrv_descript *, struct sockaddr_storage *,
+    int *);
+int nfsv4_seqsession(uint32_t, uint32_t, uint32_t, struct nfsslot *,
+    struct mbuf **, uint16_t);
+void nfsv4_seqsess_cacherep(uint32_t, struct nfsslot *, int, struct mbuf **);
+void nfsv4_setsequence(struct nfsmount *, struct nfsrv_descript *,
+    struct nfsclsession *, int);
+int nfsv4_sequencelookup(struct nfsmount *, struct nfsclsession *, int *,
+    int *, uint32_t *, uint8_t *);
+void nfsv4_freeslot(struct nfsclsession *, int);
+struct ucred *nfsrv_getgrpscred(struct ucred *);
 
 /* nfs_clcomsubs.c */
 void nfsm_uiombuf(struct nfsrv_descript *, struct uio *, int);
 void nfscl_reqstart(struct nfsrv_descript *, int, struct nfsmount *,
-    u_int8_t *, int, u_int32_t **);
+    u_int8_t *, int, u_int32_t **, struct nfsclsession *);
 nfsuint64 *nfscl_getcookie(struct nfsnode *, off_t off, int);
 void nfscl_fillsattr(struct nfsrv_descript *, struct vattr *,
       vnode_t, int, u_int32_t);
@@ -311,14 +353,13 @@
     NFSPATHLEN_T *);
 void nfsd_init(void);
 int nfsd_checkrootexp(struct nfsrv_descript *);
+void nfsd_getminorvers(struct nfsrv_descript *, u_char *, u_char **, int *,
+    u_int32_t *);
 
 /* nfs_clvfsops.c */
 void nfscl_retopts(struct nfsmount *, char *, size_t);
 
 /* nfs_commonport.c */
-int nfsrv_checksockseqnum(struct socket *, tcp_seq);
-int nfsrv_getsockseqnum(struct socket *, tcp_seq *);
-int nfsrv_getsocksndseq(struct socket *, tcp_seq *, tcp_seq *);
 int nfsrv_lookupfilename(struct nameidata *, char *, NFSPROC_T *);
 void nfsrv_object_create(vnode_t, NFSPROC_T *);
 int nfsrv_mallocmget_limit(void);
@@ -360,12 +401,12 @@
     struct nfsclopen *, struct ucred *, NFSPROC_T *, int);
 int nfsrpc_openconfirm(vnode_t, u_int8_t *, int, struct nfsclopen *,
     struct ucred *, NFSPROC_T *);
-int nfsrpc_setclient(struct nfsmount *, struct nfsclclient *,
+int nfsrpc_setclient(struct nfsmount *, struct nfsclclient *, int,
     struct ucred *, NFSPROC_T *);
 int nfsrpc_getattr(vnode_t, struct ucred *, NFSPROC_T *,
     struct nfsvattr *, void *);
 int nfsrpc_getattrnovp(struct nfsmount *, u_int8_t *, int, int,
-    struct ucred *, NFSPROC_T *, struct nfsvattr *, u_int64_t *);
+    struct ucred *, NFSPROC_T *, struct nfsvattr *, u_int64_t *, uint32_t *);
 int nfsrpc_setattr(vnode_t, struct vattr *, NFSACL_T *, struct ucred *,
     NFSPROC_T *, struct nfsvattr *, int *, void *);
 int nfsrpc_lookup(vnode_t, char *, int, struct ucred *, NFSPROC_T *,
@@ -404,7 +445,7 @@
 int nfsrpc_readdirplus(vnode_t, struct uio *, nfsuint64 *, 
     struct ucred *, NFSPROC_T *, struct nfsvattr *, int *, int *, void *);
 int nfsrpc_commit(vnode_t, u_quad_t, int, struct ucred *,
-    NFSPROC_T *, u_char *, struct nfsvattr *, int *, void *);
+    NFSPROC_T *, struct nfsvattr *, int *, void *);
 int nfsrpc_advlock(vnode_t, off_t, int, struct flock *, int,
     struct ucred *, NFSPROC_T *, void *, int);
 int nfsrpc_lockt(struct nfsrv_descript *, vnode_t,
@@ -419,7 +460,7 @@
     NFSPROC_T *, struct nfsvattr *, int *, void *);
 int nfsrpc_pathconf(vnode_t, struct nfsv3_pathconf *,
     struct ucred *, NFSPROC_T *, struct nfsvattr *, int *, void *);
-int nfsrpc_renew(struct nfsclclient *, struct ucred *,
+int nfsrpc_renew(struct nfsclclient *, struct nfsclds *, struct ucred *,
     NFSPROC_T *);
 int nfsrpc_rellockown(struct nfsmount *, struct nfscllockowner *, uint8_t *,
     int, struct ucred *, NFSPROC_T *);
@@ -429,16 +470,42 @@
     struct nfsmount *, NFSPROC_T *, int);
 int nfsrpc_getacl(vnode_t, struct ucred *, NFSPROC_T *, NFSACL_T *, void *);
 int nfsrpc_setacl(vnode_t, struct ucred *, NFSPROC_T *, NFSACL_T *, void *);
+int nfsrpc_exchangeid(struct nfsmount *, struct nfsclclient *,
+    struct nfssockreq *, uint32_t, struct nfsclds **, struct ucred *,
+    NFSPROC_T *);
+int nfsrpc_createsession(struct nfsmount *, struct nfsclsession *,
+    struct nfssockreq *, uint32_t, int, struct ucred *, NFSPROC_T *);
+int nfsrpc_destroysession(struct nfsmount *, struct nfsclclient *,
+    struct ucred *, NFSPROC_T *);
+int nfsrpc_destroyclient(struct nfsmount *, struct nfsclclient *,
+    struct ucred *, NFSPROC_T *);
+int nfsrpc_layoutget(struct nfsmount *, uint8_t *, int, int, uint64_t, uint64_t,
+    uint64_t, int, nfsv4stateid_t *, int *, struct nfsclflayouthead *,
+    struct ucred *, NFSPROC_T *, void *);
+int nfsrpc_getdeviceinfo(struct nfsmount *, uint8_t *, int, uint32_t *,
+    struct nfscldevinfo **, struct ucred *, NFSPROC_T *);
+int nfsrpc_layoutcommit(struct nfsmount *, uint8_t *, int, int,
+    uint64_t, uint64_t, uint64_t, nfsv4stateid_t *, int, int, uint8_t *,
+    struct ucred *, NFSPROC_T *, void *);
+int nfsrpc_layoutreturn(struct nfsmount *, uint8_t *, int, int, int, uint32_t,
+    int, uint64_t, uint64_t, nfsv4stateid_t *, int, uint32_t *, struct ucred *,
+    NFSPROC_T *, void *);
+int nfsrpc_reclaimcomplete(struct nfsmount *, struct ucred *, NFSPROC_T *);
+int nfscl_doiods(vnode_t, struct uio *, int *, int *, uint32_t, int,
+    struct ucred *, NFSPROC_T *);
+int nfscl_findlayoutforio(struct nfscllayout *, uint64_t, uint32_t,
+    struct nfsclflayout **);
+void nfscl_freenfsclds(struct nfsclds *);
 
 /* nfs_clstate.c */
 int nfscl_open(vnode_t, u_int8_t *, int, u_int32_t, int,
     struct ucred *, NFSPROC_T *, struct nfsclowner **, struct nfsclopen **,
     int *, int *, int);
-int nfscl_getstateid(vnode_t, u_int8_t *, int, u_int32_t, struct ucred *,
+int nfscl_getstateid(vnode_t, u_int8_t *, int, u_int32_t, int, struct ucred *,
     NFSPROC_T *, nfsv4stateid_t *, void **);
-void nfscl_ownerrelease(struct nfsclowner *, int, int, int);
-void nfscl_openrelease(struct nfsclopen *, int, int);
-int nfscl_getcl(vnode_t, struct ucred *, NFSPROC_T *,
+void nfscl_ownerrelease(struct nfsmount *, struct nfsclowner *, int, int, int);
+void nfscl_openrelease(struct nfsmount *, struct nfsclopen *, int, int);
+int nfscl_getcl(struct mount *, struct ucred *, NFSPROC_T *, int,
     struct nfsclclient **);
 struct nfsclclient *nfscl_findcl(struct nfsmount *);
 void nfscl_clientrelease(struct nfsclclient *);
@@ -490,6 +557,21 @@
 int nfscl_tryclose(struct nfsclopen *, struct ucred *,
     struct nfsmount *, NFSPROC_T *);
 void nfscl_cleanup(NFSPROC_T *);
+int nfscl_layout(struct nfsmount *, vnode_t, u_int8_t *, int, nfsv4stateid_t *,
+    int, struct nfsclflayouthead *, struct nfscllayout **, struct ucred *,
+    NFSPROC_T *);
+struct nfscllayout *nfscl_getlayout(struct nfsclclient *, uint8_t *, int,
+    uint64_t, struct nfsclflayout **, int *);
+void nfscl_rellayout(struct nfscllayout *, int);
+struct nfscldevinfo *nfscl_getdevinfo(struct nfsclclient *, uint8_t *,
+    struct nfscldevinfo *);
+void nfscl_reldevinfo(struct nfscldevinfo *);
+int nfscl_adddevinfo(struct nfsmount *, struct nfscldevinfo *,
+    struct nfsclflayout *);
+void nfscl_freelayout(struct nfscllayout *);
+void nfscl_freeflayout(struct nfsclflayout *);
+void nfscl_freedevinfo(struct nfscldevinfo *);
+int nfscl_layoutcommit(vnode_t, NFSPROC_T *);
 
 /* nfs_clport.c */
 int nfscl_nget(mount_t, vnode_t, struct nfsfh *,
@@ -511,7 +593,7 @@
 void nfscl_init(void);
 
 /* nfs_clbio.c */
-int ncl_flush(vnode_t, int, struct ucred *, NFSPROC_T *, int, int);
+int ncl_flush(vnode_t, int, NFSPROC_T *, int, int);
 
 /* nfs_clnode.c */
 void ncl_invalcaches(vnode_t);
@@ -561,7 +643,7 @@
     nfsv4stateid_t *, struct nfsstate *, int *, struct nfsvattr *, int32_t *,
     int, NFSACL_T *, nfsattrbit_t *, struct ucred *, NFSPROC_T *,
     struct nfsexstuff *, vnode_t *);
-void nfsvno_updfilerev(vnode_t, struct nfsvattr *, struct ucred *,
+int nfsvno_updfilerev(vnode_t, struct nfsvattr *, struct ucred *,
     NFSPROC_T *);
 int nfsvno_fillattr(struct nfsrv_descript *, struct mount *, vnode_t,
     struct nfsvattr *, fhandle_t *, int, nfsattrbit_t *,
@@ -579,6 +661,7 @@
 int nfsrv_v4rootexport(void *, struct ucred *, NFSPROC_T *);
 int nfsvno_testexp(struct nfsrv_descript *, struct nfsexstuff *);
 uint32_t nfsrv_hashfh(fhandle_t *);
+uint32_t nfsrv_hashsessionid(uint8_t *);
 void nfsrv_backupstable(void);
 
 /* nfs_commonkrpc.c */
@@ -588,7 +671,8 @@
 int newnfs_msleep(struct thread *, void *, struct mtx *, int, char *, int);
 int newnfs_request(struct nfsrv_descript *, struct nfsmount *,
     struct nfsclient *, struct nfssockreq *, vnode_t, NFSPROC_T *,
-    struct ucred *, u_int32_t, u_int32_t, u_char *, int, u_int64_t *);
+    struct ucred *, u_int32_t, u_int32_t, u_char *, int, u_int64_t *,
+    struct nfsclsession *);
 int newnfs_connect(struct nfsmount *, struct nfssockreq *,
     struct ucred *, NFSPROC_T *, int);
 void newnfs_disconnect(struct nfssockreq *);

Modified: trunk/sys/fs/nfs/nfscl.h
===================================================================
--- trunk/sys/fs/nfs/nfscl.h	2018-05-27 22:18:11 UTC (rev 10025)
+++ trunk/sys/fs/nfs/nfscl.h	2018-05-27 22:18:25 UTC (rev 10026)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * Copyright (c) 2009 Rick Macklem, University of Guelph
  * All rights reserved.
@@ -23,7 +24,7 @@
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
- * $MidnightBSD$
+ * $FreeBSD: stable/10/sys/fs/nfs/nfscl.h 244042 2012-12-08 22:52:39Z rmacklem $
  */
 
 #ifndef	_NFS_NFSCL_H
@@ -49,7 +50,7 @@
  */
 #define	NFSCL_REQSTART(n, p, v) 					\
 	nfscl_reqstart((n), (p), VFSTONFS((v)->v_mount), 		\
-	    VTONFS(v)->n_fhp->nfh_fh, VTONFS(v)->n_fhp->nfh_len, NULL)
+	    VTONFS(v)->n_fhp->nfh_fh, VTONFS(v)->n_fhp->nfh_len, NULL, NULL)
 
 /*
  * These two macros convert between a lease duration and renew interval.

Modified: trunk/sys/fs/nfs/nfsclstate.h
===================================================================
--- trunk/sys/fs/nfs/nfsclstate.h	2018-05-27 22:18:11 UTC (rev 10025)
+++ trunk/sys/fs/nfs/nfsclstate.h	2018-05-27 22:18:25 UTC (rev 10026)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * Copyright (c) 2009 Rick Macklem, University of Guelph
  * All rights reserved.
@@ -23,7 +24,7 @@
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
- * $MidnightBSD$
+ * $FreeBSD: stable/10/sys/fs/nfs/nfsclstate.h 325407 2017-11-04 21:30:27Z rmacklem $
  */
 
 #ifndef _NFS_NFSCLSTATE_H_
@@ -40,26 +41,78 @@
 LIST_HEAD(nfsclownerhead, nfsclowner);
 TAILQ_HEAD(nfscldeleghead, nfscldeleg);
 LIST_HEAD(nfscldeleghash, nfscldeleg);
+TAILQ_HEAD(nfscllayouthead, nfscllayout);
+LIST_HEAD(nfscllayouthash, nfscllayout);
+LIST_HEAD(nfsclflayouthead, nfsclflayout);
+LIST_HEAD(nfscldevinfohead, nfscldevinfo);
+LIST_HEAD(nfsclrecalllayouthead, nfsclrecalllayout);
 #define	NFSCLDELEGHASHSIZE	256
-#define	NFSCLDELEGHASH(c, f, l)						\
+#define	NFSCLDELEGHASH(c, f, l)							\
 	(&((c)->nfsc_deleghash[ncl_hash((f), (l)) % NFSCLDELEGHASHSIZE]))
+#define	NFSCLLAYOUTHASHSIZE	256
+#define	NFSCLLAYOUTHASH(c, f, l)						\
+	(&((c)->nfsc_layouthash[ncl_hash((f), (l)) % NFSCLLAYOUTHASHSIZE]))
 
+/* Structure for NFSv4.1 session stuff. */
+struct nfsclsession {
+	struct mtx	nfsess_mtx;
+	struct nfsslot	nfsess_cbslots[NFSV4_CBSLOTS];
+	nfsquad_t	nfsess_clientid;
+	SVCXPRT		*nfsess_xprt;		/* For backchannel callback */
+	uint32_t	nfsess_slotseq[64];	/* Max for 64bit nm_slots */
+	uint64_t	nfsess_slots;
+	uint32_t	nfsess_sequenceid;
+	uint32_t	nfsess_maxcache;	/* Max size for cached reply. */
+	uint16_t	nfsess_foreslots;
+	uint16_t	nfsess_backslots;
+	uint8_t		nfsess_sessionid[NFSX_V4SESSIONID];
+	uint8_t		nfsess_defunct;		/* Non-zero for old sessions */
+};
+
+/*
+ * This structure holds the session, clientid and related information
+ * needed for an NFSv4.1 Meta Data Server (MDS) or Data Server (DS).
+ * It is malloc'd to the correct length.
+ */
+struct nfsclds {
+	TAILQ_ENTRY(nfsclds)	nfsclds_list;
+	struct nfsclsession	nfsclds_sess;
+	struct mtx		nfsclds_mtx;
+	struct nfssockreq	*nfsclds_sockp;
+	time_t			nfsclds_expire;
+	uint16_t		nfsclds_flags;
+	uint16_t		nfsclds_servownlen;
+	uint8_t			nfsclds_verf[NFSX_VERF];
+	uint8_t			nfsclds_serverown[0];
+};
+
+/*
+ * Flags for nfsclds_flags.
+ */
+#define	NFSCLDS_HASWRITEVERF	0x0001
+#define	NFSCLDS_MDS		0x0002
+#define	NFSCLDS_DS		0x0004
+#define	NFSCLDS_CLOSED		0x0008
+
 struct nfsclclient {
 	LIST_ENTRY(nfsclclient) nfsc_list;
 	struct nfsclownerhead	nfsc_owner;
 	struct nfscldeleghead	nfsc_deleg;
 	struct nfscldeleghash	nfsc_deleghash[NFSCLDELEGHASHSIZE];
-	struct nfsv4lock nfsc_lock;
-	struct proc	*nfsc_renewthread;
-	struct nfsmount	*nfsc_nmp;
-	nfsquad_t	nfsc_clientid;
-	time_t		nfsc_expire;
-	u_int32_t	nfsc_clientidrev;
-	u_int32_t	nfsc_renew;
-	u_int32_t	nfsc_cbident;
-	u_int16_t	nfsc_flags;
-	u_int16_t	nfsc_idlen;
-	u_int8_t	nfsc_id[1];	/* Malloc'd to correct length */
+	struct nfscllayouthead	nfsc_layout;
+	struct nfscllayouthash	nfsc_layouthash[NFSCLLAYOUTHASHSIZE];
+	struct nfscldevinfohead	nfsc_devinfo;
+	struct nfsv4lock	nfsc_lock;
+	struct proc		*nfsc_renewthread;
+	struct nfsmount		*nfsc_nmp;
+	time_t			nfsc_expire;
+	u_int32_t		nfsc_clientidrev;
+	u_int32_t		nfsc_rev;
+	u_int32_t		nfsc_renew;
+	u_int32_t		nfsc_cbident;
+	u_int16_t		nfsc_flags;
+	u_int16_t		nfsc_idlen;
+	u_int8_t		nfsc_id[1];	/* Malloc'd to correct length */
 };
 
 /*
@@ -176,6 +229,141 @@
 };
 
 /*
+ * MALLOC'd to the correct length to accommodate the file handle.
+ */
+struct nfscllayout {
+	TAILQ_ENTRY(nfscllayout)	nfsly_list;
+	LIST_ENTRY(nfscllayout)		nfsly_hash;
+	nfsv4stateid_t			nfsly_stateid;
+	struct nfsv4lock		nfsly_lock;
+	uint64_t			nfsly_filesid[2];
+	uint64_t			nfsly_lastbyte;
+	struct nfsclflayouthead		nfsly_flayread;
+	struct nfsclflayouthead		nfsly_flayrw;
+	struct nfsclrecalllayouthead	nfsly_recall;
+	time_t				nfsly_timestamp;
+	struct nfsclclient		*nfsly_clp;
+	uint16_t			nfsly_flags;
+	uint16_t			nfsly_fhlen;
+	uint8_t				nfsly_fh[1];
+};
+
+/*
+ * Flags for nfsly_flags.
+ */
+#define	NFSLY_FILES		0x0001
+#define	NFSLY_BLOCK		0x0002
+#define	NFSLY_OBJECT		0x0004
+#define	NFSLY_RECALL		0x0008
+#define	NFSLY_RECALLFILE	0x0010
+#define	NFSLY_RECALLFSID	0x0020
+#define	NFSLY_RECALLALL		0x0040
+#define	NFSLY_RETONCLOSE	0x0080
+#define	NFSLY_WRITTEN		0x0100	/* Has been used to write to a DS. */
+
+/*
+ * MALLOC'd to the correct length to accommodate the file handle list.
+ * These hang off of nfsly_flayread and nfsly_flayrw, sorted in increasing
+ * offset order.
+ * The nfsly_flayread list holds the ones with iomode == NFSLAYOUTIOMODE_READ,
+ * whereas the nfsly_flayrw holds the ones with iomode == NFSLAYOUTIOMODE_RW.
+ */
+struct nfsclflayout {
+	LIST_ENTRY(nfsclflayout)	nfsfl_list;
+	uint8_t				nfsfl_dev[NFSX_V4DEVICEID];
+	uint64_t			nfsfl_off;
+	uint64_t			nfsfl_end;
+	uint64_t			nfsfl_patoff;
+	struct nfscldevinfo		*nfsfl_devp;
+	uint32_t			nfsfl_iomode;
+	uint32_t			nfsfl_util;
+	uint32_t			nfsfl_stripe1;
+	uint16_t			nfsfl_flags;
+	uint16_t			nfsfl_fhcnt;
+	struct nfsfh			*nfsfl_fh[1];	/* FH list for DS */
+};
+
+/*
+ * Flags for nfsfl_flags.
+ */
+#define	NFSFL_RECALL	0x0001		/* File layout has been recalled */
+
+/*
+ * Structure that is used to store a LAYOUTRECALL.
+ */
+struct nfsclrecalllayout {
+	LIST_ENTRY(nfsclrecalllayout)	nfsrecly_list;
+	uint64_t			nfsrecly_off;
+	uint64_t			nfsrecly_len;
+	int				nfsrecly_recalltype;
+	uint32_t			nfsrecly_iomode;
+	uint32_t			nfsrecly_stateseqid;
+};
+
+/*
+ * Stores the NFSv4.1 Device Info. Malloc'd to the correct length to
+ * store the list of network connections and list of indices.
+ * nfsdi_data[] is allocated the following way:
+ * - nfsdi_addrcnt * struct nfsclds
+ * - stripe indices, each stored as one byte, since there can be many
+ *   of them. (This implies a limit of 256 on nfsdi_addrcnt, since the
+ *   indices select which address.)
+ */
+struct nfscldevinfo {
+	LIST_ENTRY(nfscldevinfo)	nfsdi_list;
+	uint8_t				nfsdi_deviceid[NFSX_V4DEVICEID];
+	struct nfsclclient		*nfsdi_clp;
+	uint32_t			nfsdi_refcnt;
+	uint32_t			nfsdi_layoutrefs;
+	uint16_t			nfsdi_stripecnt;
+	uint16_t			nfsdi_addrcnt;
+	struct nfsclds			*nfsdi_data[0];
+};
+
+/* These inline functions return values from nfsdi_data[]. */
+/*
+ * Return a pointer to the address at "pos".
+ */
+static __inline struct nfsclds **
+nfsfldi_addr(struct nfscldevinfo *ndi, int pos)
+{
+
+	if (pos >= ndi->nfsdi_addrcnt)
+		return (NULL);
+	return (&ndi->nfsdi_data[pos]);
+}
+
+/*
+ * Return the Nth ("pos") stripe index.
+ */
+static __inline int
+nfsfldi_stripeindex(struct nfscldevinfo *ndi, int pos)
+{
+	uint8_t *valp;
+
+	if (pos >= ndi->nfsdi_stripecnt)
+		return (-1);
+	valp = (uint8_t *)&ndi->nfsdi_data[ndi->nfsdi_addrcnt];
+	valp += pos;
+	return ((int)*valp);
+}
+
+/*
+ * Set the Nth ("pos") stripe index to "val".
+ */
+static __inline void
+nfsfldi_setstripeindex(struct nfscldevinfo *ndi, int pos, uint8_t val)
+{
+	uint8_t *valp;
+
+	if (pos >= ndi->nfsdi_stripecnt)
+		return;
+	valp = (uint8_t *)&ndi->nfsdi_data[ndi->nfsdi_addrcnt];
+	valp += pos;
+	*valp = val;
+}
+
+/*
  * Macro for incrementing the seqid#.
  */
 #define	NFSCL_INCRSEQID(s, n)	do { 					\

Modified: trunk/sys/fs/nfs/nfsdport.h
===================================================================
--- trunk/sys/fs/nfs/nfsdport.h	2018-05-27 22:18:11 UTC (rev 10025)
+++ trunk/sys/fs/nfs/nfsdport.h	2018-05-27 22:18:25 UTC (rev 10026)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * Copyright (c) 2009 Rick Macklem, University of Guelph
  * All rights reserved.
@@ -23,7 +24,7 @@
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
- * $MidnightBSD$
+ * $FreeBSD: stable/10/sys/fs/nfs/nfsdport.h 284216 2015-06-10 12:17:19Z rmacklem $
  */
 
 /*
@@ -88,14 +89,12 @@
      bcmp(&(f1)->fh_fid, &(f2)->fh_fid, sizeof(struct fid)) == 0)
 
 #define	NFSLOCKHASH(f) 							\
-	(&nfslockhash[nfsrv_hashfh(f) % NFSLOCKHASHSIZE])
+	(&nfslockhash[nfsrv_hashfh(f) % nfsrv_lockhashsize])
 
 #define	NFSFPVNODE(f)	((struct vnode *)((f)->f_data))
 #define	NFSFPCRED(f)	((f)->f_cred)
 #define	NFSFPFLAG(f)	((f)->f_flag)
 
-int fp_getfvp(NFSPROC_T *, int, struct file **, struct vnode **);
-
 #define	NFSNAMEICNDSET(n, c, o, f)	do {				\
 	(n)->cn_cred = (c);						\
 	(n)->cn_nameiop = (o);						\
@@ -117,3 +116,9 @@
 #define	NFSRV_MINFH	(sizeof (fhandle_t))
 #define	NFSRV_MAXFH	(sizeof (fhandle_t))
 
+/* Use this macro for debug printfs. */
+#define	NFSD_DEBUG(level, ...)	do {					\
+		if (nfsd_debuglevel >= (level))				\
+			printf(__VA_ARGS__);				\
+	} while (0)
+

Modified: trunk/sys/fs/nfs/nfskpiport.h
===================================================================
--- trunk/sys/fs/nfs/nfskpiport.h	2018-05-27 22:18:11 UTC (rev 10025)
+++ trunk/sys/fs/nfs/nfskpiport.h	2018-05-27 22:18:25 UTC (rev 10026)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * Copyright (c) 2009 Rick Macklem, University of Guelph
  * All rights reserved.
@@ -23,7 +24,7 @@
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
- * $MidnightBSD$
+ * $FreeBSD: stable/10/sys/fs/nfs/nfskpiport.h 207785 2010-05-08 14:50:12Z rmacklem $
  */
 
 #ifndef _NFS_NFSKPIPORT_H_

Modified: trunk/sys/fs/nfs/nfsm_subs.h
===================================================================
--- trunk/sys/fs/nfs/nfsm_subs.h	2018-05-27 22:18:11 UTC (rev 10025)
+++ trunk/sys/fs/nfs/nfsm_subs.h	2018-05-27 22:18:25 UTC (rev 10026)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * Copyright (c) 1989, 1993
  *	The Regents of the University of California.  All rights reserved.
@@ -29,7 +30,7 @@
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
- * $MidnightBSD$
+ * $FreeBSD: stable/10/sys/fs/nfs/nfsm_subs.h 249592 2013-04-17 21:00:22Z ken $
  */
 
 #ifndef _NFS_NFSM_SUBS_H_
@@ -73,7 +74,7 @@
 	struct mbuf *mb2;
 
 	if (siz > M_TRAILINGSPACE(nd->nd_mb)) {
-		NFSMCLGET(mb2, M_DONTWAIT);
+		NFSMCLGET(mb2, M_NOWAIT);
 		if (siz > MLEN)
 			panic("build > MLEN");
 		mbuf_setlen(mb2, 0);

Modified: trunk/sys/fs/nfs/nfsport.h
===================================================================
--- trunk/sys/fs/nfs/nfsport.h	2018-05-27 22:18:11 UTC (rev 10025)
+++ trunk/sys/fs/nfs/nfsport.h	2018-05-27 22:18:25 UTC (rev 10026)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * Copyright (c) 1989, 1993
  *	The Regents of the University of California.  All rights reserved.
@@ -29,7 +30,7 @@
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
- * $MidnightBSD$
+ * $FreeBSD: stable/10/sys/fs/nfs/nfsport.h 317984 2017-05-08 21:40:42Z rmacklem $
  */
 
 #ifndef _NFS_NFSPORT_H_
@@ -78,7 +79,6 @@
 #include <sys/priv.h>
 #include <sys/kthread.h>
 #include <sys/syscallsubr.h>
-#include <fs/fifofs/fifo.h>
 #include <net/if.h>
 #include <net/radix.h>
 #include <net/route.h>
@@ -141,32 +141,32 @@
  * Allocate mbufs. Must succeed and never set the mbuf ptr to NULL.
  */
 #define	NFSMGET(m)	do { 					\
-		MGET((m), M_TRYWAIT, MT_DATA); 			\
+		MGET((m), M_WAITOK, MT_DATA); 			\
 		while ((m) == NULL ) { 				\
 			(void) nfs_catnap(PZERO, 0, "nfsmget");	\
-			MGET((m), M_TRYWAIT, MT_DATA); 		\
+			MGET((m), M_WAITOK, MT_DATA); 		\
 		} 						\
 	} while (0)
 #define	NFSMGETHDR(m)	do { 					\
-		MGETHDR((m), M_TRYWAIT, MT_DATA);		\
+		MGETHDR((m), M_WAITOK, MT_DATA);		\
 		while ((m) == NULL ) { 				\
 			(void) nfs_catnap(PZERO, 0, "nfsmget");	\
-			MGETHDR((m), M_TRYWAIT, MT_DATA); 	\
+			MGETHDR((m), M_WAITOK, MT_DATA); 	\
 		} 						\
 	} while (0)
 #define	NFSMCLGET(m, w)	do { 					\
-		MGET((m), M_TRYWAIT, MT_DATA); 			\
+		MGET((m), M_WAITOK, MT_DATA); 			\
 		while ((m) == NULL ) { 				\
 			(void) nfs_catnap(PZERO, 0, "nfsmget");	\
-			MGET((m), M_TRYWAIT, MT_DATA); 		\
+			MGET((m), M_WAITOK, MT_DATA); 		\
 		} 						\
 		MCLGET((m), (w));				\
 	} while (0)
 #define	NFSMCLGETHDR(m, w) do { 				\
-		MGETHDR((m), M_TRYWAIT, MT_DATA);		\
+		MGETHDR((m), M_WAITOK, MT_DATA);		\
 		while ((m) == NULL ) { 				\
 			(void) nfs_catnap(PZERO, 0, "nfsmget");	\
-			MGETHDR((m), M_TRYWAIT, MT_DATA); 	\
+			MGETHDR((m), M_WAITOK, MT_DATA); 	\
 		} 						\
 	} while (0)
 #define	NFSMTOD	mtod
@@ -229,6 +229,34 @@
  */
 #define	NFSV4OP_NOPS		40
 
+/*
+ * Additional Ops for NFSv4.1.
+ */
+#define	NFSV4OP_BACKCHANNELCTL	40
+#define	NFSV4OP_BINDCONNTOSESS	41
+#define	NFSV4OP_EXCHANGEID	42
+#define	NFSV4OP_CREATESESSION	43
+#define	NFSV4OP_DESTROYSESSION	44
+#define	NFSV4OP_FREESTATEID	45
+#define	NFSV4OP_GETDIRDELEG	46
+#define	NFSV4OP_GETDEVINFO	47
+#define	NFSV4OP_GETDEVLIST	48
+#define	NFSV4OP_LAYOUTCOMMIT	49
+#define	NFSV4OP_LAYOUTGET	50
+#define	NFSV4OP_LAYOUTRETURN	51
+#define	NFSV4OP_SECINFONONAME	52
+#define	NFSV4OP_SEQUENCE	53
+#define	NFSV4OP_SETSSV		54
+#define	NFSV4OP_TESTSTATEID	55
+#define	NFSV4OP_WANTDELEG	56
+#define	NFSV4OP_DESTROYCLIENTID	57
+#define	NFSV4OP_RECLAIMCOMPL	58
+
+/*
+ * Must be one more than last op#.
+ */
+#define	NFSV41_NOPS		59
+
 /* Quirky case if the illegal op code */
 #define	NFSV4OP_OPILLEGAL	10044
 
@@ -262,6 +290,20 @@
 #define	NFSV4OP_CBNOPS		5
 
 /*
+ * Additional Callback Ops for NFSv4.1 only. Not yet in nfsstats.
+ */
+#define	NFSV4OP_CBLAYOUTRECALL	5
+#define	NFSV4OP_CBNOTIFY	6
+#define	NFSV4OP_CBPUSHDELEG	7
+#define	NFSV4OP_CBRECALLANY	8
+#define	NFSV4OP_CBRECALLOBJAVAIL 9
+#define	NFSV4OP_CBRECALLSLOT	10
+#define	NFSV4OP_CBSEQUENCE	11
+#define	NFSV4OP_CBWANTCANCELLED	12
+#define	NFSV4OP_CBNOTIFYLOCK	13
+#define	NFSV4OP_CBNOTIFYDEVID	14
+
+/*
  * The lower numbers -> 21 are used by NFSv2 and v3. These define higher
  * numbers used by NFSv4.
  * NFS_V3NPROCS is one greater than the last V3 op and NFS_NPROCS is
@@ -294,6 +336,27 @@
  * Must be defined as one higher than the last Proc# above.
  */
 #define	NFSV4_NPROCS		41
+
+/* Additional procedures for NFSv4.1. */
+#define	NFSPROC_EXCHANGEID	41
+#define	NFSPROC_CREATESESSION	42
+#define	NFSPROC_DESTROYSESSION	43
+#define	NFSPROC_DESTROYCLIENT	44
+#define	NFSPROC_FREESTATEID	45
+#define	NFSPROC_LAYOUTGET	46
+#define	NFSPROC_GETDEVICEINFO	47
+#define	NFSPROC_LAYOUTCOMMIT	48
+#define	NFSPROC_LAYOUTRETURN	49
+#define	NFSPROC_RECLAIMCOMPL	50
+#define	NFSPROC_WRITEDS		51
+#define	NFSPROC_READDS		52
+#define	NFSPROC_COMMITDS	53
+
+/*
+ * Must be defined as one higher than the last NFSv4.1 Proc# above.
+ */
+#define	NFSV41_NPROCS		54
+
 #endif	/* NFS_V3NPROCS */
 
 /*
@@ -369,6 +432,7 @@
 #include <fs/nfs/rpcv2.h>
 #include <fs/nfs/nfsproto.h>
 #include <fs/nfs/nfs.h>
+#include <fs/nfs/nfsclstate.h>
 #include <fs/nfs/nfs_var.h>
 #include <fs/nfs/nfsm_subs.h>
 #include <fs/nfs/nfsrvcache.h>
@@ -375,7 +439,6 @@
 #include <fs/nfs/nfsrvstate.h>
 #include <fs/nfs/xdr_subs.h>
 #include <fs/nfs/nfscl.h>
-#include <fs/nfs/nfsclstate.h>
 #include <nfsclient/nfsargs.h>
 #include <fs/nfsclient/nfsmount.h>
 
@@ -541,11 +604,6 @@
 #define	NFSREQSPINLOCK		extern struct mtx nfs_req_mutex
 #define	NFSLOCKREQ()		mtx_lock(&nfs_req_mutex)
 #define	NFSUNLOCKREQ()		mtx_unlock(&nfs_req_mutex)
-#define	NFSCACHEMUTEX		extern struct mtx nfs_cache_mutex
-#define	NFSCACHEMUTEXPTR	(&nfs_cache_mutex)
-#define	NFSLOCKCACHE()		mtx_lock(&nfs_cache_mutex)
-#define	NFSUNLOCKCACHE()	mtx_unlock(&nfs_cache_mutex)
-#define	NFSCACHELOCKREQUIRED()	mtx_assert(&nfs_cache_mutex, MA_OWNED)
 #define	NFSSOCKMUTEX		extern struct mtx nfs_slock_mutex
 #define	NFSSOCKMUTEXPTR		(&nfs_slock_mutex)
 #define	NFSLOCKSOCK()		mtx_lock(&nfs_slock_mutex)
@@ -578,6 +636,11 @@
 #define	NFSPROCLISTUNLOCK()	sx_sunlock(&allproc_lock)
 #define	NFSLOCKSOCKREQ(r)	mtx_lock(&((r)->nr_mtx))
 #define	NFSUNLOCKSOCKREQ(r)	mtx_unlock(&((r)->nr_mtx))
+#define	NFSLOCKDS(d)		mtx_lock(&((d)->nfsclds_mtx))
+#define	NFSUNLOCKDS(d)		mtx_unlock(&((d)->nfsclds_mtx))
+#define	NFSSESSIONMUTEXPTR(s)	(&((s)->mtx))
+#define	NFSLOCKSESSION(s)	mtx_lock(&((s)->mtx))
+#define	NFSUNLOCKSESSION(s)	mtx_unlock(&((s)->mtx))
 
 /*
  * Use these macros to initialize/free a mutex.
@@ -667,6 +730,13 @@
 MALLOC_DECLARE(M_NEWNFSDIRECTIO);
 MALLOC_DECLARE(M_NEWNFSMNT);
 MALLOC_DECLARE(M_NEWNFSDROLLBACK);
+MALLOC_DECLARE(M_NEWNFSLAYOUT);
+MALLOC_DECLARE(M_NEWNFSFLAYOUT);
+MALLOC_DECLARE(M_NEWNFSDEVINFO);
+MALLOC_DECLARE(M_NEWNFSSOCKREQ);
+MALLOC_DECLARE(M_NEWNFSCLDS);
+MALLOC_DECLARE(M_NEWNFSLAYRECALL);
+MALLOC_DECLARE(M_NEWNFSDSESSION);
 #define	M_NFSRVCACHE	M_NEWNFSRVCACHE
 #define	M_NFSDCLIENT	M_NEWNFSDCLIENT
 #define	M_NFSDSTATE	M_NEWNFSDSTATE
@@ -686,6 +756,13 @@
 #define	M_NFSV4NODE	M_NEWNFSV4NODE
 #define	M_NFSDIRECTIO	M_NEWNFSDIRECTIO
 #define	M_NFSDROLLBACK	M_NEWNFSDROLLBACK
+#define	M_NFSLAYOUT	M_NEWNFSLAYOUT
+#define	M_NFSFLAYOUT	M_NEWNFSFLAYOUT
+#define	M_NFSDEVINFO	M_NEWNFSDEVINFO
+#define	M_NFSSOCKREQ	M_NEWNFSSOCKREQ
+#define	M_NFSCLDS	M_NEWNFSCLDS
+#define	M_NFSLAYRECALL	M_NEWNFSLAYRECALL
+#define	M_NFSDSESSION	M_NEWNFSDSESSION
 
 #define	NFSINT_SIGMASK(set) 						\
 	(SIGISMEMBER(set, SIGINT) || SIGISMEMBER(set, SIGTERM) ||	\
@@ -706,17 +783,19 @@
 /*
  * Set this macro to index() or strchr(), whichever is supported.
  */
-#define	STRCHR(s, c)	index((s), (c))
+#define	STRCHR(s, c)		strchr((s), (c))
 
 /*
  * Set the n_time in the client write rpc, as required.
  */
-#define	NFSWRITERPC_SETTIME(w, n, v4)					\
+#define	NFSWRITERPC_SETTIME(w, n, a, v4)				\
 	do {								\
 		if (w) {						\
-			(n)->n_mtime = (n)->n_vattr.na_vattr.va_mtime; \
+			mtx_lock(&((n)->n_mtx));			\
+			(n)->n_mtime = (a)->na_mtime;			\
 			if (v4)						\
-			    (n)->n_change = (n)->n_vattr.na_vattr.va_filerev; \
+				(n)->n_change = (a)->na_filerev;	\
+			mtx_unlock(&((n)->n_mtx));			\
 		}							\
 	} while (0)
 
@@ -754,12 +833,17 @@
  */
 #define	NFSSTA_HASWRITEVERF	0x00040000  /* Has write verifier */
 #define	NFSSTA_GOTFSINFO	0x00100000  /* Got the fsinfo */
+#define	NFSSTA_OPENMODE		0x00200000  /* Must use correct open mode */
+#define	NFSSTA_NOLAYOUTCOMMIT	0x04000000  /* Don't do LayoutCommit */
+#define	NFSSTA_SESSPERSIST	0x08000000  /* Has a persistent session */
 #define	NFSSTA_TIMEO		0x10000000  /* Experiencing a timeout */
 #define	NFSSTA_LOCKTIMEO	0x20000000  /* Experiencing a lockd timeout */
 #define	NFSSTA_HASSETFSID	0x40000000  /* Has set the fsid */
+#define	NFSSTA_PNFS		0x80000000  /* pNFS is enabled */
 
 #define	NFSHASNFSV3(n)		((n)->nm_flag & NFSMNT_NFSV3)
 #define	NFSHASNFSV4(n)		((n)->nm_flag & NFSMNT_NFSV4)
+#define	NFSHASNFSV4N(n)		((n)->nm_minorvers > 0)
 #define	NFSHASNFSV3OR4(n)	((n)->nm_flag & (NFSMNT_NFSV3 | NFSMNT_NFSV4))
 #define	NFSHASGOTFSINFO(n)	((n)->nm_state & NFSSTA_GOTFSINFO)
 #define	NFSHASHASSETFSID(n)	((n)->nm_state & NFSSTA_HASSETFSID)
@@ -776,6 +860,13 @@
 #define	NFSHASPRIVACY(n)	((n)->nm_flag & NFSMNT_PRIVACY)
 #define	NFSSETWRITEVERF(n)	((n)->nm_state |= NFSSTA_HASWRITEVERF)
 #define	NFSSETHASSETFSID(n)	((n)->nm_state |= NFSSTA_HASSETFSID)
+#define	NFSHASPNFSOPT(n)	((n)->nm_flag & NFSMNT_PNFS)
+#define	NFSHASNOLAYOUTCOMMIT(n)	((n)->nm_state & NFSSTA_NOLAYOUTCOMMIT)
+#define	NFSHASSESSPERSIST(n)	((n)->nm_state & NFSSTA_SESSPERSIST)
+#define	NFSHASPNFS(n)		((n)->nm_state & NFSSTA_PNFS)
+#define	NFSHASOPENMODE(n)	((n)->nm_state & NFSSTA_OPENMODE)
+#define	NFSHASONEOPENOWN(n)	(((n)->nm_flag & NFSMNT_ONEOPENOWN) != 0 &&	\
+				    (n)->nm_minorvers > 0)
 
 /*
  * Gets the stats field out of the mount structure.
@@ -883,7 +974,7 @@
 };
 
 #ifndef NFS_MAXBSIZE
-#define	NFS_MAXBSIZE	MAXBSIZE
+#define	NFS_MAXBSIZE	MAXBCACHEBUF
 #endif
 
 /*
@@ -898,11 +989,11 @@
 #endif
 
 /*
- * Define this as the flags argument for msleep() when catching signals
- * while holding a resource that other threads would block for, such as
- * a vnode lock.
+ * Name used by getnewvnode() to describe filesystem, "newnfs".
+ * For perfomance reasons it is useful to have the same string
+ * used in both places that call getnewvnode().
  */
-#define	NFS_PCATCH	(PCATCH | PBDRY)
+extern const char nfs_vnode_tag[];
 
 #endif	/* _KERNEL */
 

Modified: trunk/sys/fs/nfs/nfsproto.h
===================================================================
--- trunk/sys/fs/nfs/nfsproto.h	2018-05-27 22:18:11 UTC (rev 10025)
+++ trunk/sys/fs/nfs/nfsproto.h	2018-05-27 22:18:25 UTC (rev 10026)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * Copyright (c) 1989, 1993
  *	The Regents of the University of California.  All rights reserved.
@@ -29,7 +30,7 @@
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
- * $MidnightBSD$
+ * $FreeBSD: stable/10/sys/fs/nfs/nfsproto.h 320617 2017-07-03 20:12:31Z rmacklem $
  */
 
 #ifndef _NFS_NFSPROTO_H_
@@ -54,17 +55,44 @@
 #define	NFS_VER4	4
 #define	NFS_V2MAXDATA	8192
 #define	NFS_MAXDGRAMDATA 16384
-#define	NFS_MAXDATA	NFS_MAXBSIZE
 #define	NFS_MAXPATHLEN	1024
 #define	NFS_MAXNAMLEN	255
+/*
+ * Calculating the maximum XDR overhead for an NFS RPC isn't easy.
+ * NFS_MAXPKTHDR is antiquated and assumes AUTH_SYS over UDP.
+ * NFS_MAXXDR should be sufficient for all NFS versions over TCP.
+ * It includes:
+ * - Maximum RPC message header. It can include 2 400byte authenticators plus
+ *   a machine name of unlimited length, although it is usually relatively
+ *   small.
+ * - XDR overheads for the NFSv4 compound. This can include Owner and
+ *   Owner_group strings, which are usually fairly small, but are allowed
+ *   to be up to 1024 bytes each.
+ * 4096 is overkill, but should always be sufficient.
+ */
 #define	NFS_MAXPKTHDR	404
-#define	NFS_MAXPACKET	(NFS_MAXDATA + 2048)
+#define	NFS_MAXXDR	4096
+#define	NFS_MAXPACKET	(NFS_SRVMAXIO + NFS_MAXXDR)
 #define	NFS_MINPACKET	20
 #define	NFS_FABLKSIZE	512	/* Size in bytes of a block wrt fa_blocks */
 #define	NFSV4_MINORVERSION	0	/* V4 Minor version */
+#define	NFSV41_MINORVERSION	1	/* V4 Minor version */
 #define	NFSV4_CBVERS		1	/* V4 CB Version */
+#define	NFSV41_CBVERS		4	/* V4.1 CB Version */
 #define	NFSV4_SMALLSTR	50		/* Strings small enough for stack */
 
+/*
+ * This value isn't a fixed value in the RFCs.
+ * It is the maximum data size supported by NFSv3 or NFSv4 over TCP for
+ * the server.  It should be set to the I/O size preferred by ZFS or
+ * MAXBSIZE, whichever is greater.
+ * ZFS currently prefers 128K.
+ * It used to be called NFS_MAXDATA, but has been renamed to clarify that
+ * it refers to server side only and doesn't conflict with the NFS_MAXDATA
+ * defined in rpcsvc/nfs_prot.h for userland.
+ */
+#define	NFS_SRVMAXIO	(128 * 1024)
+
 /* Stat numbers for rpc returns (version 2, 3 and 4) */
 /*
  * These numbers are hard-wired in the RFCs, so they can't be changed.
@@ -145,6 +173,46 @@
 #define	NFSERR_ADMINREVOKED	10047
 #define	NFSERR_CBPATHDOWN	10048
 
+/* NFSv4.1 specific errors. */
+#define	NFSERR_BADIOMODE	10049
+#define	NFSERR_BADLAYOUT	10050
+#define	NFSERR_BADSESSIONDIGEST	10051
+#define	NFSERR_BADSESSION	10052
+#define	NFSERR_BADSLOT		10053
+#define	NFSERR_COMPLETEALREADY	10054
+#define	NFSERR_NOTBNDTOSESS	10055
+#define	NFSERR_DELEGALREADYWANT	10056
+#define	NFSERR_BACKCHANBUSY	10057
+#define	NFSERR_LAYOUTTRYLATER	10058
+#define	NFSERR_LAYOUTUNAVAIL	10059
+#define	NFSERR_NOMATCHLAYOUT	10060
+#define	NFSERR_RECALLCONFLICT	10061
+#define	NFSERR_UNKNLAYOUTTYPE	10062
+#define	NFSERR_SEQMISORDERED	10063
+#define	NFSERR_SEQUENCEPOS	10064
+#define	NFSERR_REQTOOBIG	10065
+#define	NFSERR_REPTOOBIG	10066
+#define	NFSERR_REPTOOBIGTOCACHE	10067
+#define	NFSERR_RETRYUNCACHEDREP	10068
+#define	NFSERR_UNSAFECOMPOUND	10069
+#define	NFSERR_TOOMANYOPS	10070
+#define	NFSERR_OPNOTINSESS	10071
+#define	NFSERR_HASHALGUNSUPP	10072
+#define	NFSERR_CLIENTIDBUSY	10074
+#define	NFSERR_PNFSIOHOLE	10075
+#define	NFSERR_SEQFALSERETRY	10076
+#define	NFSERR_BADHIGHSLOT	10077
+#define	NFSERR_DEADSESSION	10078
+#define	NFSERR_ENCRALGUNSUPP	10079
+#define	NFSERR_PNFSNOLAYOUT	10080
+#define	NFSERR_NOTONLYOP	10081
+#define	NFSERR_WRONGCRED	10082
+#define	NFSERR_WRONGTYPE	10083
+#define	NFSERR_DIRDELEGUNAVAIL	10084
+#define	NFSERR_REJECTDELEG	10085
+#define	NFSERR_RETURNCONFLICT	10086
+#define	NFSERR_DELEGREVOKED	10087
+
 #define	NFSERR_STALEWRITEVERF	30001	/* Fake return for nfs_commit() */
 #define	NFSERR_DONTREPLY	30003	/* Don't process request */
 #define	NFSERR_RETVOID		30004	/* Return void, not error */
@@ -189,6 +257,8 @@
 #define	NFSX_V4SPECDATA		(2 * NFSX_UNSIGNED)
 #define	NFSX_V4TIME		(NFSX_HYPER + NFSX_UNSIGNED)
 #define	NFSX_V4SETTIME		(NFSX_UNSIGNED + NFSX_V4TIME)
+#define	NFSX_V4SESSIONID	16
+#define	NFSX_V4DEVICEID		16
 
 /* sizes common to multiple NFS versions */
 #define	NFSX_FHMAX		(NFSX_V4FHMAX)
@@ -258,6 +328,27 @@
  * Must be defined as one higher than the last Proc# above.
  */
 #define	NFSV4_NPROCS		41
+
+/* Additional procedures for NFSv4.1. */
+#define	NFSPROC_EXCHANGEID	41
+#define	NFSPROC_CREATESESSION	42
+#define	NFSPROC_DESTROYSESSION	43
+#define	NFSPROC_DESTROYCLIENT	44
+#define	NFSPROC_FREESTATEID	45
+#define	NFSPROC_LAYOUTGET	46
+#define	NFSPROC_GETDEVICEINFO	47
+#define	NFSPROC_LAYOUTCOMMIT	48
+#define	NFSPROC_LAYOUTRETURN	49
+#define	NFSPROC_RECLAIMCOMPL	50
+#define	NFSPROC_WRITEDS		51
+#define	NFSPROC_READDS		52
+#define	NFSPROC_COMMITDS	53
+
+/*
+ * Must be defined as one higher than the last NFSv4.1 Proc# above.
+ */
+#define	NFSV41_NPROCS		54
+
 #endif	/* NFS_V3NPROCS */
 
 /*
@@ -269,10 +360,10 @@
 
 /*
  * NFSPROC_NOOP is a fake op# that can't be the same as any V2/3/4 Procedure
- * or Operation#. Since the NFS V4 Op #s go higher, use NFSV4OP_NOPS, which
+ * or Operation#. Since the NFS V4 Op #s go higher, use NFSV41_NOPS, which
  * is one greater than the highest Op#.
  */
-#define	NFSPROC_NOOP		NFSV4OP_NOPS
+#define	NFSPROC_NOOP		NFSV41_NOPS
 
 /* Actual Version 2 procedure numbers */
 #define	NFSV2PROC_NULL		0
@@ -324,9 +415,13 @@
 #define	NFSV4OPEN_CLAIMPREVIOUS		1
 #define	NFSV4OPEN_CLAIMDELEGATECUR	2
 #define	NFSV4OPEN_CLAIMDELEGATEPREV	3
+#define	NFSV4OPEN_CLAIMFH		4
+#define	NFSV4OPEN_CLAIMDELEGATECURFH	5
+#define	NFSV4OPEN_CLAIMDELEGATEPREVFH	6
 #define	NFSV4OPEN_DELEGATENONE		0
 #define	NFSV4OPEN_DELEGATEREAD		1
 #define	NFSV4OPEN_DELEGATEWRITE		2
+#define	NFSV4OPEN_DELEGATENONEEXT	3
 #define	NFSV4OPEN_LIMITSIZE		1
 #define	NFSV4OPEN_LIMITBLOCKS		2
 
@@ -406,6 +501,7 @@
 #define	NFSSTATEID_PUTALLZERO		0
 #define	NFSSTATEID_PUTALLONE		1
 #define	NFSSTATEID_PUTSTATEID		2
+#define	NFSSTATEID_PUTSEQIDZERO		3
 
 /*
  * Bits for share access and deny.
@@ -413,6 +509,14 @@
 #define	NFSV4OPEN_ACCESSREAD		0x00000001
 #define	NFSV4OPEN_ACCESSWRITE		0x00000002
 #define	NFSV4OPEN_ACCESSBOTH		0x00000003
+#define	NFSV4OPEN_WANTDELEGMASK		0x0000ff00
+#define	NFSV4OPEN_WANTREADDELEG		0x00000100
+#define	NFSV4OPEN_WANTWRITEDELEG	0x00000200
+#define	NFSV4OPEN_WANTANYDELEG		0x00000300
+#define	NFSV4OPEN_WANTNODELEG		0x00000400
+#define	NFSV4OPEN_WANTCANCEL		0x00000500
+#define	NFSV4OPEN_WANTSIGNALDELEG	0x00010000
+#define	NFSV4OPEN_WANTPUSHDELEG		0x00020000
 
 #define	NFSV4OPEN_DENYNONE		0x00000000
 #define	NFSV4OPEN_DENYREAD		0x00000001
@@ -420,16 +524,35 @@
 #define	NFSV4OPEN_DENYBOTH		0x00000003
 
 /*
+ * Delegate_none_ext reply values.
+ */
+#define	NFSV4OPEN_NOTWANTED		0
+#define	NFSV4OPEN_CONTENTION		1
+#define	NFSV4OPEN_RESOURCE		2
+#define	NFSV4OPEN_NOTSUPPFTYPE		3
+#define	NFSV4OPEN_NOTSUPPWRITEFTYPE	4
+#define	NFSV4OPEN_NOTSUPPUPGRADE	5
+#define	NFSV4OPEN_NOTSUPPDOWNGRADE	6
+#define	NFSV4OPEN_CANCELLED		7
+#define	NFSV4OPEN_ISDIR			8
+
+/*
  * Open result flags
- * (The first two are in the spec. The rest are used internally.)
+ * (The first four are in the spec. The rest are used internally.)
  */
 #define	NFSV4OPEN_RESULTCONFIRM		0x00000002
 #define	NFSV4OPEN_LOCKTYPEPOSIX		0x00000004
+#define	NFSV4OPEN_PRESERVEUNLINKED	0x00000008
+#define	NFSV4OPEN_MAYNOTIFYLOCK		0x00000020
 #define	NFSV4OPEN_RFLAGS 						\
-		(NFSV4OPEN_RESULTCONFIRM | NFSV4OPEN_LOCKTYPEPOSIX)
+    (NFSV4OPEN_RESULTCONFIRM | NFSV4OPEN_LOCKTYPEPOSIX |		\
+    NFSV4OPEN_PRESERVEUNLINKED | NFSV4OPEN_MAYNOTIFYLOCK)
 #define	NFSV4OPEN_RECALL		0x00010000
 #define	NFSV4OPEN_READDELEGATE		0x00020000
 #define	NFSV4OPEN_WRITEDELEGATE		0x00040000
+#define	NFSV4OPEN_WDRESOURCE		0x00080000
+#define	NFSV4OPEN_WDCONTENTION		0x00100000
+#define	NFSV4OPEN_WDNOTWANTED		0x00200000
 
 /*
  * NFS V4 File Handle types
@@ -462,6 +585,7 @@
 #define	NFSCREATE_UNCHECKED		0
 #define	NFSCREATE_GUARDED		1
 #define	NFSCREATE_EXCLUSIVE		2
+#define	NFSCREATE_EXCLUSIVE41		3
 
 #define	NFSV3FSINFO_LINK		0x01
 #define	NFSV3FSINFO_SYMLINK		0x02
@@ -468,6 +592,63 @@
 #define	NFSV3FSINFO_HOMOGENEOUS		0x08
 #define	NFSV3FSINFO_CANSETTIME		0x10
 
+/* Flags for Exchange ID */
+#define	NFSV4EXCH_SUPPMOVEDREFER	0x00000001
+#define	NFSV4EXCH_SUPPMOVEDMIGR	0x00000002
+#define	NFSV4EXCH_BINDPRINCSTATEID	0x00000100
+#define	NFSV4EXCH_USENONPNFS		0x00010000
+#define	NFSV4EXCH_USEPNFSMDS		0x00020000
+#define	NFSV4EXCH_USEPNFSDS		0x00040000
+#define	NFSV4EXCH_MASKPNFS		0x00070000
+#define	NFSV4EXCH_UPDCONFIRMEDRECA	0x40000000
+#define	NFSV4EXCH_CONFIRMEDR		0x80000000
+
+/* State Protects */
+#define	NFSV4EXCH_SP4NONE		0
+#define	NFSV4EXCH_SP4MACHCRED		1
+#define	NFSV4EXCH_SP4SSV		2
+
+/* Flags for Create Session */
+#define	NFSV4CRSESS_PERSIST		0x00000001
+#define	NFSV4CRSESS_CONNBACKCHAN	0x00000002
+#define	NFSV4CRSESS_CONNRDMA		0x00000004
+
+/* Flags for Sequence */
+#define	NFSV4SEQ_CBPATHDOWN		0x00000001
+#define	NFSV4SEQ_CBGSSCONTEXPIRING	0x00000002
+#define	NFSV4SEQ_CBGSSCONTEXPIRED	0x00000004
+#define	NFSV4SEQ_EXPIREDALLSTATEREVOKED	0x00000008
+#define	NFSV4SEQ_EXPIREDSOMESTATEREVOKED 0x00000010
+#define	NFSV4SEQ_ADMINSTATEREVOKED	0x00000020
+#define	NFSV4SEQ_RECALLABLESTATEREVOKED	0x00000040
+#define	NFSV4SEQ_LEASEMOVED		0x00000080
+#define	NFSV4SEQ_RESTARTRECLAIMNEEDED	0x00000100
+#define	NFSV4SEQ_CBPATHDOWNSESSION	0x00000200
+#define	NFSV4SEQ_BACKCHANNELFAULT	0x00000400
+#define	NFSV4SEQ_DEVIDCHANGED		0x00000800
+#define	NFSV4SEQ_DEVIDDELETED		0x00001000
+
+/* Flags for Layout. */
+#define	NFSLAYOUTRETURN_FILE		1
+#define	NFSLAYOUTRETURN_FSID		2
+#define	NFSLAYOUTRETURN_ALL		3
+
+#define	NFSLAYOUT_NFSV4_1_FILES		0x1
+#define	NFSLAYOUT_OSD2_OBJECTS		0x2
+#define	NFSLAYOUT_BLOCK_VOLUME		0x3
+
+#define	NFSLAYOUTIOMODE_READ		1
+#define	NFSLAYOUTIOMODE_RW		2
+#define	NFSLAYOUTIOMODE_ANY		3
+
+/* Flags for Get Device Info. */
+#define	NFSDEVICEIDNOTIFY_CHANGEBIT	0x1
+#define	NFSDEVICEIDNOTIFY_DELETEBIT	0x2
+
+/* Flags for File Layout. */
+#define	NFSFLAYUTIL_DENSE		0x1
+#define	NFSFLAYUTIL_COMMIT_THRU_MDS	0x2
+
 /* Conversion macros */
 #define	vtonfsv2_mode(t,m) 						\
 		txdr_unsigned(((t) == VFIFO) ? MAKEIMODE(VCHR, (m)) : 	\
@@ -681,6 +862,27 @@
 #define	NFSATTRBIT_TIMEMODIFY		53
 #define	NFSATTRBIT_TIMEMODIFYSET	54
 #define	NFSATTRBIT_MOUNTEDONFILEID	55
+#define	NFSATTRBIT_DIRNOTIFDELAY	56
+#define	NFSATTRBIT_DIRENTNOTIFDELAY	57
+#define	NFSATTRBIT_DACL			58
+#define	NFSATTRBIT_SACL			59
+#define	NFSATTRBIT_CHANGEPOLICY		60
+#define	NFSATTRBIT_FSSTATUS		61
+#define	NFSATTRBIT_FSLAYOUTTYPE		62
+#define	NFSATTRBIT_LAYOUTHINT		63
+#define	NFSATTRBIT_LAYOUTTYPE		64
+#define	NFSATTRBIT_LAYOUTBLKSIZE	65
+#define	NFSATTRBIT_LAYOUTALIGNMENT	66
+#define	NFSATTRBIT_FSLOCATIONSINFO	67
+#define	NFSATTRBIT_MDSTHRESHOLD		68
+#define	NFSATTRBIT_RETENTIONGET		69
+#define	NFSATTRBIT_RETENTIONSET		70
+#define	NFSATTRBIT_RETENTEVTGET		71
+#define	NFSATTRBIT_RETENTEVTSET		72
+#define	NFSATTRBIT_RETENTIONHOLD	73
+#define	NFSATTRBIT_MODESETMASKED	74
+#define	NFSATTRBIT_SUPPATTREXCLCREAT	75
+#define	NFSATTRBIT_FSCHARSETCAP		76
 
 #define	NFSATTRBM_SUPPORTEDATTRS	0x00000001
 #define	NFSATTRBM_TYPE			0x00000002
@@ -738,8 +940,29 @@
 #define	NFSATTRBM_TIMEMODIFY		0x00200000
 #define	NFSATTRBM_TIMEMODIFYSET		0x00400000
 #define	NFSATTRBM_MOUNTEDONFILEID	0x00800000
+#define	NFSATTRBM_DIRNOTIFDELAY		0x01000000
+#define	NFSATTRBM_DIRENTNOTIFDELAY	0x02000000
+#define	NFSATTRBM_DACL			0x04000000
+#define	NFSATTRBM_SACL			0x08000000
+#define	NFSATTRBM_CHANGEPOLICY		0x10000000
+#define	NFSATTRBM_FSSTATUS		0x20000000
+#define	NFSATTRBM_FSLAYOUTTYPE		0x40000000
+#define	NFSATTRBM_LAYOUTHINT		0x80000000
+#define	NFSATTRBM_LAYOUTTYPE		0x00000001
+#define	NFSATTRBM_LAYOUTBLKSIZE		0x00000002
+#define	NFSATTRBM_LAYOUTALIGNMENT	0x00000004
+#define	NFSATTRBM_FSLOCATIONSINFO	0x00000008
+#define	NFSATTRBM_MDSTHRESHOLD		0x00000010
+#define	NFSATTRBM_RETENTIONGET		0x00000020
+#define	NFSATTRBM_RETENTIONSET		0x00000040
+#define	NFSATTRBM_RETENTEVTGET		0x00000080
+#define	NFSATTRBM_RETENTEVTSET		0x00000100
+#define	NFSATTRBM_RETENTIONHOLD		0x00000200
+#define	NFSATTRBM_MODESETMASKED		0x00000400
+#define	NFSATTRBM_SUPPATTREXCLCREAT	0x00000800
+#define	NFSATTRBM_FSCHARSETCAP		0x00001000
 
-#define	NFSATTRBIT_MAX			56
+#define	NFSATTRBIT_MAX			77
 
 /*
  * Sets of attributes that are supported, by words in the bitmap.
@@ -747,6 +970,7 @@
 /*
  * NFSATTRBIT_SUPPORTED - SUPP0 - bits 0<->31
  *			  SUPP1 - bits 32<->63
+ *			  SUPP2 - bits 64<->95
  */
 #define	NFSATTRBIT_SUPP0						\
  	(NFSATTRBM_SUPPORTEDATTRS |					\
@@ -813,6 +1037,8 @@
 #define	NFSATTRBIT_SUPP1	NFSATTRBIT_S1
 #endif
 
+#define	NFSATTRBIT_SUPP2	NFSATTRBM_SUPPATTREXCLCREAT
+
 /*
  * NFSATTRBIT_SUPPSETONLY is the OR of NFSATTRBIT_TIMEACCESSSET and
  * NFSATTRBIT_TIMEMODIFYSET.
@@ -823,6 +1049,7 @@
 /*
  * NFSATTRBIT_SETABLE - SETABLE0 - bits 0<->31
  *			SETABLE1 - bits 32<->63
+ *			SETABLE2 - bits 64<->95
  */
 #define	NFSATTRBIT_SETABLE0						\
 	(NFSATTRBM_SIZE |						\
@@ -833,6 +1060,7 @@
  	NFSATTRBM_OWNERGROUP |						\
  	NFSATTRBM_TIMEACCESSSET |					\
  	NFSATTRBM_TIMEMODIFYSET)
+#define	NFSATTRBIT_SETABLE2		0
 
 /*
  * Set of attributes that the getattr vnode op needs.
@@ -863,6 +1091,11 @@
  	NFSATTRBM_TIMEMODIFY)
 
 /*
+ * NFSATTRBIT_GETATTR2 - bits 64<->95
+ */
+#define	NFSATTRBIT_GETATTR2		0
+
+/*
  * Subset of the above that the Write RPC gets.
  * OR of the following bits.
  * NFSATTRBIT_WRITEGETATTR0 - bits 0<->31
@@ -889,6 +1122,11 @@
  	NFSATTRBM_TIMEMODIFY)
 
 /*
+ * NFSATTRBIT_WRITEGETATTR2 - bits 64<->95
+ */
+#define	NFSATTRBIT_WRITEGETATTR2	0
+
+/*
  * Set of attributes that the wccattr operation op needs.
  * OR of the following bits.
  * NFSATTRBIT_WCCATTR0 - bits 0<->31
@@ -902,6 +1140,11 @@
  	(NFSATTRBM_TIMEMODIFY)
 
 /*
+ * NFSATTRBIT_WCCATTR2 - bits 64<->95
+ */
+#define	NFSATTRBIT_WCCATTR2		0
+
+/*
  * NFSATTRBIT_CBGETATTR0 - bits 0<->31
  */
 #define	NFSATTRBIT_CBGETATTR0	(NFSATTRBM_CHANGE | NFSATTRBM_SIZE)
@@ -912,6 +1155,11 @@
 #define	NFSATTRBIT_CBGETATTR1		0x0
 
 /*
+ * NFSATTRBIT_CBGETATTR2 - bits 64<->95
+ */
+#define	NFSATTRBIT_CBGETATTR2		0x0
+
+/*
  * Sets of attributes that require a VFS_STATFS() call to get the
  * values of.
  * NFSATTRBIT_STATFS0 - bits 0<->31
@@ -943,6 +1191,11 @@
 	NFSATTRBM_TIMEDELTA)
 
 /*
+ * NFSATTRBIT_STATFS2 - bits 64<->95
+ */
+#define	NFSATTRBIT_STATFS2		0
+
+/*
  * These are the bits that are needed by the nfs_statfs() call.
  * (The regular getattr bits are or'd in so the vnode gets the correct
  *  type, etc.)
@@ -970,6 +1223,11 @@
 				NFSATTRBM_TIMEDELTA)
 
 /*
+ * NFSGETATTRBIT_STATFS2 - bits 64<->95
+ */
+#define	NFSGETATTRBIT_STATFS2		0
+
+/*
  * Set of attributes for the equivalent of an nfsv3 pathconf rpc.
  * NFSGETATTRBIT_PATHCONF0 - bits 0<->31
  */
@@ -987,6 +1245,11 @@
 				NFSATTRBM_NOTRUNC)
 
 /*
+ * NFSGETATTRBIT_PATHCONF2 - bits 64<->95
+ */
+#define	NFSGETATTRBIT_PATHCONF2		0
+
+/*
  * Sets of attributes required by readdir and readdirplus.
  * NFSATTRBIT_READDIRPLUS0	(NFSATTRBIT_GETATTR0 | NFSATTRBIT_FILEHANDLE |
  *				 NFSATTRBIT_RDATTRERROR)
@@ -994,6 +1257,7 @@
 #define	NFSATTRBIT_READDIRPLUS0	(NFSATTRBIT_GETATTR0 | NFSATTRBM_FILEHANDLE | \
 				NFSATTRBM_RDATTRERROR)
 #define	NFSATTRBIT_READDIRPLUS1	NFSATTRBIT_GETATTR1
+#define	NFSATTRBIT_READDIRPLUS2		0
 
 /*
  * Set of attributes supported by Referral vnodes.
@@ -1001,6 +1265,7 @@
 #define	NFSATTRBIT_REFERRAL0	(NFSATTRBM_TYPE | NFSATTRBM_FSID |	\
 	NFSATTRBM_RDATTRERROR | NFSATTRBM_FSLOCATIONS)
 #define	NFSATTRBIT_REFERRAL1	NFSATTRBM_MOUNTEDONFILEID
+#define	NFSATTRBIT_REFERRAL2		0
 
 /*
  * Structure for data handled by the statfs rpc. Since some fields are

Modified: trunk/sys/fs/nfs/nfsrvcache.h
===================================================================
--- trunk/sys/fs/nfs/nfsrvcache.h	2018-05-27 22:18:11 UTC (rev 10025)
+++ trunk/sys/fs/nfs/nfsrvcache.h	2018-05-27 22:18:25 UTC (rev 10026)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * Copyright (c) 1989, 1993
  *	The Regents of the University of California.  All rights reserved.
@@ -29,7 +30,7 @@
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
- * $MidnightBSD$
+ * $FreeBSD: stable/10/sys/fs/nfs/nfsrvcache.h 269655 2014-08-07 03:50:30Z kib $
  */
 
 #ifndef _NFS_NFSRVCACHE_H_
@@ -41,10 +42,12 @@
 #define	NFSRVCACHE_MAX_SIZE	2048
 #define	NFSRVCACHE_MIN_SIZE	  64
 
-#define	NFSRVCACHE_HASHSIZE	20
+#define	NFSRVCACHE_HASHSIZE	500
 
+/* Cache table entry. */
 struct nfsrvcache {
 	LIST_ENTRY(nfsrvcache) rc_hash;		/* Hash chain */
+	LIST_ENTRY(nfsrvcache) rc_ahash;	/* ACK hash chain */
 	TAILQ_ENTRY(nfsrvcache)	rc_lru;		/* UDP lru chain */
 	u_int32_t	rc_xid;			/* rpc id number */
 	time_t		rc_timestamp;		/* Time done */
@@ -63,6 +66,7 @@
 			int16_t		refcnt;
 			u_int16_t	cksum;
 			time_t		cachetime;
+			int		acked;
 		} ot;
 	} rc_un2;
 	u_int16_t	rc_proc;		/* rpc proc number */
@@ -80,7 +84,14 @@
 #define	rc_reqlen	rc_un2.ot.len
 #define	rc_cksum	rc_un2.ot.cksum
 #define	rc_cachetime	rc_un2.ot.cachetime
+#define	rc_acked	rc_un2.ot.acked
 
+/* TCP ACK values */
+#define	RC_NO_SEQ		0
+#define	RC_NO_ACK		1
+#define	RC_ACK			2
+#define	RC_NACK			3
+
 /* Return values */
 #define	RC_DROPIT		0
 #define	RC_REPLY		1
@@ -94,7 +105,6 @@
 #define	RC_UDP		0x0010
 #define	RC_INETIPV6	0x0020
 #define	RC_INPROG	0x0040
-#define	RC_TCPSEQ	0x0080
 #define	RC_NFSV2	0x0100
 #define	RC_NFSV3	0x0200
 #define	RC_NFSV4	0x0400
@@ -104,4 +114,10 @@
 
 LIST_HEAD(nfsrvhashhead, nfsrvcache);
 
+/* The fine-grained locked cache hash table for TCP. */
+struct nfsrchash_bucket {
+	struct mtx		mtx;
+	struct nfsrvhashhead	tbl;
+};
+
 #endif	/* _NFS_NFSRVCACHE_H_ */

Modified: trunk/sys/fs/nfs/nfsrvstate.h
===================================================================
--- trunk/sys/fs/nfs/nfsrvstate.h	2018-05-27 22:18:11 UTC (rev 10025)
+++ trunk/sys/fs/nfs/nfsrvstate.h	2018-05-27 22:18:25 UTC (rev 10026)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * Copyright (c) 2009 Rick Macklem, University of Guelph
  * All rights reserved.
@@ -23,7 +24,7 @@
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
- * $MidnightBSD$
+ * $FreeBSD: stable/10/sys/fs/nfs/nfsrvstate.h 299222 2016-05-07 20:09:15Z rmacklem $
  */
 
 #ifndef _NFS_NFSRVSTATE_H_
@@ -42,28 +43,36 @@
 LIST_HEAD(nfsstatehead, nfsstate);
 LIST_HEAD(nfslockhead, nfslock);
 LIST_HEAD(nfslockhashhead, nfslockfile);
+LIST_HEAD(nfssessionhead, nfsdsession);
+LIST_HEAD(nfssessionhashhead, nfsdsession);
 
 /*
  * List head for nfsusrgrp.
  */
-LIST_HEAD(nfsuserhashhead, nfsusrgrp);
-TAILQ_HEAD(nfsuserlruhead, nfsusrgrp);
+TAILQ_HEAD(nfsuserhashhead, nfsusrgrp);
 
 #define	NFSCLIENTHASH(id)						\
-	(&nfsclienthash[(id).lval[1] % NFSCLIENTHASHSIZE])
+	(&nfsclienthash[(id).lval[1] % nfsrv_clienthashsize])
 #define	NFSSTATEHASH(clp, id)						\
-	(&((clp)->lc_stateid[(id).other[2] % NFSSTATEHASHSIZE]))
+	(&((clp)->lc_stateid[(id).other[2] % nfsrv_statehashsize]))
 #define	NFSUSERHASH(id)							\
-	(&nfsuserhash[(id) % NFSUSERHASHSIZE])
+	(&nfsuserhash[(id) % nfsrv_lughashsize])
 #define	NFSUSERNAMEHASH(p, l)						\
 	(&nfsusernamehash[((l)>=4?(*(p)+*((p)+1)+*((p)+2)+*((p)+3)):*(p)) \
-		% NFSUSERHASHSIZE])
+		% nfsrv_lughashsize])
 #define	NFSGROUPHASH(id)						\
-	(&nfsgrouphash[(id) % NFSGROUPHASHSIZE])
+	(&nfsgrouphash[(id) % nfsrv_lughashsize])
 #define	NFSGROUPNAMEHASH(p, l)						\
 	(&nfsgroupnamehash[((l)>=4?(*(p)+*((p)+1)+*((p)+2)+*((p)+3)):*(p)) \
-		% NFSGROUPHASHSIZE])
+		% nfsrv_lughashsize])
 
+struct nfssessionhash {
+	struct mtx			mtx;
+	struct nfssessionhashhead	list;
+};
+#define	NFSSESSIONHASH(f) 						\
+	(&nfssessionhash[nfsrv_hashsessionid(f) % nfsrv_sessionhashsize])
+
 /*
  * Client server structure for V4. It is doubly linked into two lists.
  * The first is a hash table based on the clientid and the second is a
@@ -72,10 +81,11 @@
  */
 struct nfsclient {
 	LIST_ENTRY(nfsclient) lc_hash;		/* Clientid hash list */
-	struct nfsstatehead lc_stateid[NFSSTATEHASHSIZE]; /* stateid hash */
+	struct nfsstatehead *lc_stateid;	/* Stateid hash */
 	struct nfsstatehead lc_open;		/* Open owner list */
 	struct nfsstatehead lc_deleg;		/* Delegations */
 	struct nfsstatehead lc_olddeleg;	/* and old delegations */
+	struct nfssessionhead lc_session;	/* List of NFSv4.1 sessions */
 	time_t		lc_expiry;		/* Expiry time (sec) */
 	time_t		lc_delegtime;		/* Old deleg expiry (sec) */
 	nfsquad_t	lc_clientid;		/* 64 bit clientid */
@@ -87,10 +97,10 @@
 	u_int32_t	lc_cbref;		/* Cnt of callbacks */
 	uid_t		lc_uid;			/* User credential */
 	gid_t		lc_gid;
-	u_int16_t	lc_namelen;
+	u_int16_t	lc_idlen;		/* Client ID and len */
+	u_int16_t	lc_namelen;		/* plus GSS principal and len */
 	u_char		*lc_name;
 	struct nfssockreq lc_req;		/* Callback info */
-	u_short		lc_idlen;		/* Length of id string */
 	u_int32_t	lc_flags;		/* LCL_ flag bits */
 	u_char		lc_verf[NFSX_VERF];	 /* client verifier */
 	u_char		lc_id[1];		/* Malloc'd correct size */
@@ -101,6 +111,43 @@
 #define	CLOPS_RENEWOP		0x0004
 
 /*
+ * Structure for an NFSv4.1 session.
+ * Locking rules for this structure.
+ * To add/delete one of these structures from the lists, you must lock
+ * both: NFSLOCKSTATE() and NFSLOCKSESSION(session hashhead) in that order.
+ * To traverse the lists looking for one of these, you must hold one
+ * of these two locks.
+ * The exception is if the thread holds the exclusive root sleep lock.
+ * In this case, all other nfsd threads are blocked, so locking the
+ * mutexes isn't required.
+ * When manipulating sess_refcnt, NFSLOCKSTATE() must be locked.
+ * When manipulating the fields withinsess_cbsess except nfsess_xprt,
+ * sess_cbsess.nfsess_mtx must be locked.
+ * When manipulating sess_slots and sess_cbsess.nfsess_xprt,
+ * NFSLOCKSESSION(session hashhead) must be locked.
+ */
+struct nfsdsession {
+	uint64_t		sess_refcnt;	/* Reference count. */
+	LIST_ENTRY(nfsdsession)	sess_hash;	/* Hash list of sessions. */
+	LIST_ENTRY(nfsdsession)	sess_list;	/* List of client sessions. */
+	struct nfsslot		sess_slots[NFSV4_SLOTS];
+	struct nfsclient	*sess_clp;	/* Associated clientid. */
+	uint32_t		sess_crflags;
+	uint32_t		sess_cbprogram;
+	uint32_t		sess_maxreq;
+	uint32_t		sess_maxresp;
+	uint32_t		sess_maxrespcached;
+	uint32_t		sess_maxops;
+	uint32_t		sess_maxslots;
+	uint32_t		sess_cbmaxreq;
+	uint32_t		sess_cbmaxresp;
+	uint32_t		sess_cbmaxrespcached;
+	uint32_t		sess_cbmaxops;
+	uint8_t			sess_sessionid[NFSX_V4SESSIONID];
+	struct nfsclsession	sess_cbsess;	/* Callback session. */
+};
+
+/*
  * Nfs state structure. I couldn't resist overloading this one, since
  * it makes cleanup, etc. simpler. These structures are used in four ways:
  * - open_owner structures chained off of nfsclient
@@ -217,14 +264,14 @@
  * names.
  */
 struct nfsusrgrp {
-	TAILQ_ENTRY(nfsusrgrp)	lug_lru;	/* LRU list */
-	LIST_ENTRY(nfsusrgrp)	lug_numhash;	/* Hash by id# */
-	LIST_ENTRY(nfsusrgrp)	lug_namehash;	/* and by name */
+	TAILQ_ENTRY(nfsusrgrp)	lug_numhash;	/* Hash by id# */
+	TAILQ_ENTRY(nfsusrgrp)	lug_namehash;	/* and by name */
 	time_t			lug_expiry;	/* Expiry time in sec */
 	union {
 		uid_t		un_uid;		/* id# */
 		gid_t		un_gid;
 	} lug_un;
+	struct ucred		*lug_cred;	/* Cred. with groups list */
 	int			lug_namelen;	/* Name length */
 	u_char			lug_name[1];	/* malloc'd correct length */
 };

Modified: trunk/sys/fs/nfs/nfsv4_errstr.h
===================================================================
--- trunk/sys/fs/nfs/nfsv4_errstr.h	2018-05-27 22:18:11 UTC (rev 10025)
+++ trunk/sys/fs/nfs/nfsv4_errstr.h	2018-05-27 22:18:25 UTC (rev 10026)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * Copyright (c) 2009 Rick Macklem, University of Guelph
  * All rights reserved.
@@ -23,7 +24,7 @@
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
- * $MidnightBSD$
+ * $FreeBSD: stable/10/sys/fs/nfs/nfsv4_errstr.h 191783 2009-05-04 15:23:58Z rmacklem $
  */
 
 #ifndef _NFS_NFSV4ERRSTR_H_

Modified: trunk/sys/fs/nfs/rpcv2.h
===================================================================
--- trunk/sys/fs/nfs/rpcv2.h	2018-05-27 22:18:11 UTC (rev 10025)
+++ trunk/sys/fs/nfs/rpcv2.h	2018-05-27 22:18:25 UTC (rev 10026)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * Copyright (c) 1989, 1993
  *	The Regents of the University of California.  All rights reserved.
@@ -29,7 +30,7 @@
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
- * $MidnightBSD$
+ * $FreeBSD: stable/10/sys/fs/nfs/rpcv2.h 191783 2009-05-04 15:23:58Z rmacklem $
  */
 
 #ifndef _NFS_RPCV2_H_

Modified: trunk/sys/fs/nfs/xdr_subs.h
===================================================================
--- trunk/sys/fs/nfs/xdr_subs.h	2018-05-27 22:18:11 UTC (rev 10025)
+++ trunk/sys/fs/nfs/xdr_subs.h	2018-05-27 22:18:25 UTC (rev 10026)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * Copyright (c) 1989, 1993
  *	The Regents of the University of California.  All rights reserved.
@@ -29,7 +30,7 @@
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
- * $MidnightBSD$
+ * $FreeBSD: stable/10/sys/fs/nfs/xdr_subs.h 191783 2009-05-04 15:23:58Z rmacklem $
  */
 
 #ifndef _NFS_XDR_SUBS_H_

Modified: trunk/sys/fs/nfsclient/nfs.h
===================================================================
--- trunk/sys/fs/nfsclient/nfs.h	2018-05-27 22:18:11 UTC (rev 10025)
+++ trunk/sys/fs/nfsclient/nfs.h	2018-05-27 22:18:25 UTC (rev 10026)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * Copyright (c) 1989, 1993
  *	The Regents of the University of California.  All rights reserved.
@@ -29,7 +30,7 @@
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
- * $MidnightBSD$
+ * $FreeBSD: stable/10/sys/fs/nfsclient/nfs.h 221040 2011-04-25 23:12:18Z rmacklem $
  */
 
 #ifndef _NFSCLIENT_NFS_H_

Modified: trunk/sys/fs/nfsclient/nfs_clbio.c
===================================================================
--- trunk/sys/fs/nfsclient/nfs_clbio.c	2018-05-27 22:18:11 UTC (rev 10025)
+++ trunk/sys/fs/nfsclient/nfs_clbio.c	2018-05-27 22:18:25 UTC (rev 10026)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * Copyright (c) 1989, 1993
  *	The Regents of the University of California.  All rights reserved.
@@ -33,7 +34,7 @@
  */
 
 #include <sys/cdefs.h>
-__MBSDID("$MidnightBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/fs/nfsclient/nfs_clbio.c 306663 2016-10-03 23:17:57Z rmacklem $");
 
 #include "opt_kdtrace.h"
 
@@ -43,6 +44,7 @@
 #include <sys/buf.h>
 #include <sys/kernel.h>
 #include <sys/mount.h>
+#include <sys/rwlock.h>
 #include <sys/vmmeter.h>
 #include <sys/vnode.h>
 
@@ -104,7 +106,7 @@
 	count = ap->a_count;
 
 	if ((object = vp->v_object) == NULL) {
-		ncl_printf("nfs_getpages: called with non-merged cache vnode??\n");
+		printf("ncl_getpages: called with non-merged cache vnode\n");
 		return (VM_PAGER_ERROR);
 	}
 
@@ -112,7 +114,7 @@
 		mtx_lock(&np->n_mtx);
 		if ((np->n_flag & NNONCACHE) && (vp->v_type == VREG)) {
 			mtx_unlock(&np->n_mtx);
-			ncl_printf("nfs_getpages: called on non-cacheable vnode??\n");
+			printf("ncl_getpages: called on non-cacheable vnode\n");
 			return (VM_PAGER_ERROR);
 		} else
 			mtx_unlock(&np->n_mtx);
@@ -134,7 +136,7 @@
 	 * allow the pager to zero-out the blanks.  Partially valid pages
 	 * can only occur at the file EOF.
 	 */
-	VM_OBJECT_LOCK(object);
+	VM_OBJECT_WLOCK(object);
 	if (pages[ap->a_reqpage]->valid != 0) {
 		for (i = 0; i < npages; ++i) {
 			if (i != ap->a_reqpage) {
@@ -143,10 +145,10 @@
 				vm_page_unlock(pages[i]);
 			}
 		}
-		VM_OBJECT_UNLOCK(object);
+		VM_OBJECT_WUNLOCK(object);
 		return (0);
 	}
-	VM_OBJECT_UNLOCK(object);
+	VM_OBJECT_WUNLOCK(object);
 
 	/*
 	 * We use only the kva address for the buffer, but this is extremely
@@ -175,8 +177,8 @@
 	relpbuf(bp, &ncl_pbuf_freecnt);
 
 	if (error && (uio.uio_resid == count)) {
-		ncl_printf("nfs_getpages: error %d\n", error);
-		VM_OBJECT_LOCK(object);
+		printf("ncl_getpages: error %d\n", error);
+		VM_OBJECT_WLOCK(object);
 		for (i = 0; i < npages; ++i) {
 			if (i != ap->a_reqpage) {
 				vm_page_lock(pages[i]);
@@ -184,7 +186,7 @@
 				vm_page_unlock(pages[i]);
 			}
 		}
-		VM_OBJECT_UNLOCK(object);
+		VM_OBJECT_WUNLOCK(object);
 		return (VM_PAGER_ERROR);
 	}
 
@@ -195,7 +197,7 @@
 	 */
 
 	size = count - uio.uio_resid;
-	VM_OBJECT_LOCK(object);
+	VM_OBJECT_WLOCK(object);
 	for (i = 0, toff = 0; i < npages; i++, toff = nextoff) {
 		vm_page_t m;
 		nextoff = toff + PAGE_SIZE;
@@ -213,7 +215,7 @@
 			 * Read operation filled a partial page.
 			 */
 			m->valid = 0;
-			vm_page_set_valid(m, 0, size - toff);
+			vm_page_set_valid_range(m, 0, size - toff);
 			KASSERT(m->dirty == 0,
 			    ("nfs_getpages: page %p is dirty", m));
 		} else {
@@ -231,7 +233,7 @@
 		if (i != ap->a_reqpage)
 			vm_page_readahead_finish(m);
 	}
-	VM_OBJECT_UNLOCK(object);
+	VM_OBJECT_WUNLOCK(object);
 	return (0);
 }
 
@@ -282,7 +284,7 @@
 	if (newnfs_directio_enable && !newnfs_directio_allow_mmap &&
 	    (np->n_flag & NNONCACHE) && (vp->v_type == VREG)) {
 		mtx_unlock(&np->n_mtx);
-		ncl_printf("ncl_putpages: called on noncache-able vnode??\n");
+		printf("ncl_putpages: called on noncache-able vnode\n");
 		mtx_lock(&np->n_mtx);
 	}
 
@@ -483,7 +485,7 @@
 	    case VREG:
 		NFSINCRGLOBAL(newnfsstats.biocache_reads);
 		lbn = uio->uio_offset / biosize;
-		on = uio->uio_offset & (biosize - 1);
+		on = uio->uio_offset - (lbn * biosize);
 
 		/*
 		 * Start the read ahead(s), as required.
@@ -693,7 +695,7 @@
 			n = np->n_direofoffset - uio->uio_offset;
 		break;
 	    default:
-		ncl_printf(" ncl_bioread: type %x unexpected\n", vp->v_type);
+		printf(" ncl_bioread: type %x unexpected\n", vp->v_type);
 		bp = NULL;
 		break;
 	    };
@@ -873,8 +875,8 @@
 	struct vattr vattr;
 	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
 	daddr_t lbn;
-	int bcount;
-	int bp_cached, n, on, error = 0, error1;
+	int bcount, noncontig_write, obcount;
+	int bp_cached, n, on, error = 0, error1, wouldcommit;
 	size_t orig_resid, local_resid;
 	off_t orig_size, tmp_off;
 
@@ -918,7 +920,6 @@
 			if (ioflag & IO_NDELAY)
 				return (EAGAIN);
 #endif
-flush_and_restart:
 			np->n_attrstamp = 0;
 			KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(vp);
 			error = ncl_vinvalbuf(vp, V_SAVE, td, 1);
@@ -975,6 +976,7 @@
 	 * IO_UNIT -- we just make all writes atomic anyway, as there's
 	 * no point optimizing for something that really won't ever happen.
 	 */
+	wouldcommit = 0;
 	if (!(ioflag & IO_SYNC)) {
 		int nflag;
 
@@ -981,21 +983,7 @@
 		mtx_lock(&np->n_mtx);
 		nflag = np->n_flag;
 		mtx_unlock(&np->n_mtx);
-		int needrestart = 0;
-		if (nmp->nm_wcommitsize < uio->uio_resid) {
-			/*
-			 * If this request could not possibly be completed
-			 * without exceeding the maximum outstanding write
-			 * commit size, see if we can convert it into a
-			 * synchronous write operation.
-			 */
-			if (ioflag & IO_NDELAY)
-				return (EAGAIN);
-			ioflag |= IO_SYNC;
-			if (nflag & NMODIFIED)
-				needrestart = 1;
-		} else if (nflag & NMODIFIED) {
-			int wouldcommit = 0;
+		if (nflag & NMODIFIED) {
 			BO_LOCK(&vp->v_bufobj);
 			if (vp->v_bufobj.bo_dirty.bv_cnt != 0) {
 				TAILQ_FOREACH(bp, &vp->v_bufobj.bo_dirty.bv_hd,
@@ -1005,30 +993,25 @@
 				}
 			}
 			BO_UNLOCK(&vp->v_bufobj);
-			/*
-			 * Since we're not operating synchronously and
-			 * bypassing the buffer cache, we are in a commit
-			 * and holding all of these buffers whether
-			 * transmitted or not.  If not limited, this
-			 * will lead to the buffer cache deadlocking,
-			 * as no one else can flush our uncommitted buffers.
-			 */
-			wouldcommit += uio->uio_resid;
-			/*
-			 * If we would initially exceed the maximum
-			 * outstanding write commit size, flush and restart.
-			 */
-			if (wouldcommit > nmp->nm_wcommitsize)
-				needrestart = 1;
 		}
-		if (needrestart)
-			goto flush_and_restart;
 	}
 
 	do {
+		if (!(ioflag & IO_SYNC)) {
+			wouldcommit += biosize;
+			if (wouldcommit > nmp->nm_wcommitsize) {
+				np->n_attrstamp = 0;
+				KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(vp);
+				error = ncl_vinvalbuf(vp, V_SAVE, td, 1);
+				if (error)
+					return (error);
+				wouldcommit = biosize;
+			}
+		}
+
 		NFSINCRGLOBAL(newnfsstats.biocache_writes);
 		lbn = uio->uio_offset / biosize;
-		on = uio->uio_offset & (biosize-1);
+		on = uio->uio_offset - (lbn * biosize);
 		n = MIN((unsigned)(biosize - on), uio->uio_resid);
 again:
 		/*
@@ -1036,7 +1019,15 @@
 		 * unaligned buffer size.
 		 */
 		mtx_lock(&np->n_mtx);
-		if (uio->uio_offset == np->n_size && n) {
+		if ((np->n_flag & NHASBEENLOCKED) == 0 &&
+		    (nmp->nm_flag & NFSMNT_NONCONTIGWR) != 0)
+			noncontig_write = 1;
+		else
+			noncontig_write = 0;
+		if ((uio->uio_offset == np->n_size ||
+		    (noncontig_write != 0 &&
+		    lbn == (np->n_size / biosize) &&
+		    uio->uio_offset + n > np->n_size)) && n) {
 			mtx_unlock(&np->n_mtx);
 			/*
 			 * Get the buffer (in its pre-append state to maintain
@@ -1044,8 +1035,8 @@
 			 * nfsnode after we have locked the buffer to prevent
 			 * readers from reading garbage.
 			 */
-			bcount = on;
-			bp = nfs_getcacheblk(vp, lbn, bcount, td);
+			obcount = np->n_size - (lbn * biosize);
+			bp = nfs_getcacheblk(vp, lbn, obcount, td);
 
 			if (bp != NULL) {
 				long save;
@@ -1057,9 +1048,12 @@
 				mtx_unlock(&np->n_mtx);
 
 				save = bp->b_flags & B_CACHE;
-				bcount += n;
+				bcount = on + n;
 				allocbuf(bp, bcount);
 				bp->b_flags |= save;
+				if (noncontig_write != 0 && on > obcount)
+					vfs_bio_bzero_buf(bp, obcount, on -
+					    obcount);
 			}
 		} else {
 			/*
@@ -1144,7 +1138,7 @@
 		 */
 
 		if (bp->b_dirtyend > bcount) {
-			ncl_printf("NFS append race @%lx:%d\n",
+			printf("NFS append race @%lx:%d\n",
 			    (long)bp->b_blkno * DEV_BSIZE,
 			    bp->b_dirtyend - bcount);
 			bp->b_dirtyend = bcount;
@@ -1158,6 +1152,8 @@
 		 * area, just update the b_dirtyoff and b_dirtyend,
 		 * otherwise force a write rpc of the old dirty area.
 		 *
+		 * If there has been a file lock applied to this file
+		 * or vfs.nfs.old_noncontig_writing is set, do the following:
 		 * While it is possible to merge discontiguous writes due to
 		 * our having a B_CACHE buffer ( and thus valid read data
 		 * for the hole), we don't because it could lead to
@@ -1164,13 +1160,15 @@
 		 * significant cache coherency problems with multiple clients,
 		 * especially if locking is implemented later on.
 		 *
-		 * As an optimization we could theoretically maintain
-		 * a linked list of discontinuous areas, but we would still
-		 * have to commit them separately so there isn't much
-		 * advantage to it except perhaps a bit of asynchronization.
+		 * If vfs.nfs.old_noncontig_writing is not set and there has
+		 * not been file locking done on this file:
+		 * Relax coherency a bit for the sake of performance and
+		 * expand the current dirty region to contain the new
+		 * write even if it means we mark some non-dirty data as
+		 * dirty.
 		 */
 
-		if (bp->b_dirtyend > 0 &&
+		if (noncontig_write == 0 && bp->b_dirtyend > 0 &&
 		    (on > bp->b_dirtyend || (on + n) < bp->b_dirtyoff)) {
 			if (bwrite(bp) == EINTR) {
 				error = EINTR;
@@ -1296,7 +1294,7 @@
 		sigset_t oldset;
 
 		newnfs_set_sigmask(td, &oldset);
-		bp = getblk(vp, bn, size, NFS_PCATCH, 0, 0);
+		bp = getblk(vp, bn, size, PCATCH, 0, 0);
 		newnfs_restore_sigmask(td, &oldset);
 		while (bp == NULL) {
 			if (newnfs_sigintr(nmp, td))
@@ -1331,7 +1329,7 @@
 	if ((nmp->nm_mountp->mnt_kern_flag & MNTK_UNMOUNTF))
 		intrflg = 1;
 	if (intrflg) {
-		slpflag = NFS_PCATCH;
+		slpflag = PCATCH;
 		slptimeo = 2 * hz;
 	} else {
 		slpflag = 0;
@@ -1353,9 +1351,9 @@
 	 * Now, flush as required.
 	 */
 	if ((flags & V_SAVE) && (vp->v_bufobj.bo_object != NULL)) {
-		VM_OBJECT_LOCK(vp->v_bufobj.bo_object);
+		VM_OBJECT_WLOCK(vp->v_bufobj.bo_object);
 		vm_object_page_clean(vp->v_bufobj.bo_object, 0, 0, OBJPC_SYNC);
-		VM_OBJECT_UNLOCK(vp->v_bufobj.bo_object);
+		VM_OBJECT_WUNLOCK(vp->v_bufobj.bo_object);
 		/*
 		 * If the page clean was interrupted, fail the invalidation.
 		 * Not doing so, we run the risk of losing dirty pages in the
@@ -1371,7 +1369,16 @@
 			goto out;
 		error = vinvalbuf(vp, flags, 0, slptimeo);
 	}
-	mtx_lock(&np->n_mtx);
+	if (NFSHASPNFS(nmp)) {
+		nfscl_layoutcommit(vp, td);
+		/*
+		 * Invalidate the attribute cache, since writes to a DS
+		 * won't update the size attribute.
+		 */
+		mtx_lock(&np->n_mtx);
+		np->n_attrstamp = 0;
+	} else
+		mtx_lock(&np->n_mtx);
 	if (np->n_directio_asyncwr == 0)
 		np->n_flag &= ~NMODIFIED;
 	mtx_unlock(&np->n_mtx);
@@ -1418,7 +1425,7 @@
 	}
 again:
 	if (nmp->nm_flag & NFSMNT_INT)
-		slpflag = NFS_PCATCH;
+		slpflag = PCATCH;
 	gotiod = FALSE;
 
 	/*
@@ -1483,7 +1490,7 @@
 					mtx_unlock(&ncl_iod_mutex);
 					return (error2);
 				}
-				if (slpflag == NFS_PCATCH) {
+				if (slpflag == PCATCH) {
 					slpflag = 0;
 					slptimeo = 2 * hz;
 				}
@@ -1552,6 +1559,13 @@
 	if ((bp->b_flags & B_DIRECT) && bp->b_iocmd == BIO_WRITE) {
 		struct nfsnode *np = VTONFS(bp->b_vp);
 		mtx_lock(&np->n_mtx);
+		if (NFSHASPNFS(VFSTONFS(vnode_mount(bp->b_vp)))) {
+			/*
+			 * Invalidate the attribute cache, since writes to a DS
+			 * won't update the size attribute.
+			 */
+			np->n_attrstamp = 0;
+		}
 		np->n_directio_asyncwr--;
 		if (np->n_directio_asyncwr == 0) {
 			np->n_flag &= ~NMODIFIED;
@@ -1665,7 +1679,7 @@
 			bp->b_flags |= B_INVAL;
 		break;
 	    default:
-		ncl_printf("ncl_doio:  type %x unexpected\n", vp->v_type);
+		printf("ncl_doio:  type %x unexpected\n", vp->v_type);
 		break;
 	    };
 	    if (error) {
@@ -1850,9 +1864,9 @@
 		 * truncation point.  We may have a B_DELWRI and/or B_CACHE
 		 * buffer that now needs to be truncated.
 		 */
-		error = vtruncbuf(vp, cred, td, nsize, biosize);
+		error = vtruncbuf(vp, cred, nsize, biosize);
 		lbn = nsize / biosize;
-		bufsize = nsize & (biosize - 1);
+		bufsize = nsize - (lbn * biosize);
 		bp = nfs_getcacheblk(vp, lbn, bufsize, td);
 		if (!bp)
 			return EINTR;

Modified: trunk/sys/fs/nfsclient/nfs_clcomsubs.c
===================================================================
--- trunk/sys/fs/nfsclient/nfs_clcomsubs.c	2018-05-27 22:18:11 UTC (rev 10025)
+++ trunk/sys/fs/nfsclient/nfs_clcomsubs.c	2018-05-27 22:18:25 UTC (rev 10026)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * Copyright (c) 1989, 1993
  *	The Regents of the University of California.  All rights reserved.
@@ -32,7 +33,7 @@
  */
 
 #include <sys/cdefs.h>
-__MBSDID("$MidnightBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/fs/nfsclient/nfs_clcomsubs.c 318685 2017-05-22 22:10:02Z rmacklem $");
 
 /*
  * These functions support the macros and help fiddle mbuf chains for
@@ -43,10 +44,11 @@
 #include <fs/nfs/nfsport.h>
 
 extern struct nfsstats newnfsstats;
-extern struct nfsv4_opflag nfsv4_opflag[NFSV4OP_NOPS];
+extern struct nfsv4_opflag nfsv4_opflag[NFSV41_NOPS];
 extern int ncl_mbuf_mlen;
 extern enum vtype newnv2tov_type[8];
 extern enum vtype nv34tov_type[8];
+extern int	nfs_bigreply[NFSV41_NPROCS];
 NFSCLSTATEMUTEX;
 #endif	/* !APPLEKEXT */
 
@@ -56,7 +58,7 @@
 	int	opcnt;
 	const u_char *tag;
 	int	taglen;
-} nfsv4_opmap[NFS_NPROCS] = {
+} nfsv4_opmap[NFSV41_NPROCS] = {
 	{ 0, 1, "Null", 4 },
 	{ NFSV4OP_GETATTR, 1, "Getattr", 7, },
 	{ NFSV4OP_SETATTR, 2, "Setattr", 7, },
@@ -65,8 +67,8 @@
 	{ NFSV4OP_READLINK, 2, "Readlink", 8, },
 	{ NFSV4OP_READ, 1, "Read", 4, },
 	{ NFSV4OP_WRITE, 2, "Write", 5, },
-	{ NFSV4OP_OPEN, 3, "Open", 4, },
-	{ NFSV4OP_CREATE, 3, "Create", 6, },
+	{ NFSV4OP_OPEN, 5, "Open", 4, },
+	{ NFSV4OP_CREATE, 5, "Create", 6, },
 	{ NFSV4OP_CREATE, 1, "Create", 6, },
 	{ NFSV4OP_CREATE, 3, "Create", 6, },
 	{ NFSV4OP_REMOVE, 1, "Remove", 6, },
@@ -98,15 +100,28 @@
 	{ NFSV4OP_DELEGRETURN, 9, "DelegRename2", 12, },
 	{ NFSV4OP_GETATTR, 1, "Getacl", 6, },
 	{ NFSV4OP_SETATTR, 1, "Setacl", 6, },
+	{ NFSV4OP_EXCHANGEID, 1, "ExchangeID", 10, },
+	{ NFSV4OP_CREATESESSION, 1, "CreateSession", 13, },
+	{ NFSV4OP_DESTROYSESSION, 1, "DestroySession", 14, },
+	{ NFSV4OP_DESTROYCLIENTID, 1, "DestroyClient", 13, },
+	{ NFSV4OP_FREESTATEID, 1, "FreeStateID", 11, },
+	{ NFSV4OP_LAYOUTGET, 1, "LayoutGet", 9, },
+	{ NFSV4OP_GETDEVINFO, 1, "GetDeviceInfo", 13, },
+	{ NFSV4OP_LAYOUTCOMMIT, 1, "LayoutCommit", 12, },
+	{ NFSV4OP_LAYOUTRETURN, 1, "LayoutReturn", 12, },
+	{ NFSV4OP_RECLAIMCOMPL, 1, "ReclaimComplete", 15, },
+	{ NFSV4OP_WRITE, 1, "WriteDS", 7, },
+	{ NFSV4OP_READ, 1, "ReadDS", 6, },
+	{ NFSV4OP_COMMIT, 1, "CommitDS", 8, },
 };
 
-
 /*
  * NFS RPCS that have large request message size.
  */
-static int nfs_bigrequest[NFS_NPROCS] = {
+static int nfs_bigrequest[NFSV41_NPROCS] = {
 	0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+	0, 0, 0, 0, 0, 0, 1, 0, 0
 };
 
 /*
@@ -115,7 +130,7 @@
  */
 APPLESTATIC void
 nfscl_reqstart(struct nfsrv_descript *nd, int procnum, struct nfsmount *nmp,
-    u_int8_t *nfhp, int fhlen, u_int32_t **opcntpp)
+    u_int8_t *nfhp, int fhlen, u_int32_t **opcntpp, struct nfsclsession *sep)
 {
 	struct mbuf *mb;
 	u_int32_t *tl;
@@ -125,9 +140,12 @@
 	/*
 	 * First, fill in some of the fields of nd.
 	 */
-	if (NFSHASNFSV4(nmp))
+	nd->nd_slotseq = NULL;
+	if (NFSHASNFSV4(nmp)) {
 		nd->nd_flag = ND_NFSV4 | ND_NFSCL;
-	else if (NFSHASNFSV3(nmp))
+		if (NFSHASNFSV4N(nmp))
+			nd->nd_flag |= ND_NFSV41;
+	} else if (NFSHASNFSV3(nmp))
 		nd->nd_flag = ND_NFSV3 | ND_NFSCL;
 	else
 		nd->nd_flag = ND_NFSV2 | ND_NFSCL;
@@ -138,7 +156,7 @@
 	 * Get the first mbuf for the request.
 	 */
 	if (nfs_bigrequest[procnum])
-		NFSMCLGET(mb, M_WAIT);
+		NFSMCLGET(mb, M_WAITOK);
 	else
 		NFSMGET(mb);
 	mbuf_setlen(mb, 0);
@@ -151,33 +169,84 @@
 	if (nd->nd_flag & ND_NFSV4) {
 		opcnt = nfsv4_opmap[procnum].opcnt +
 		    nfsv4_opflag[nfsv4_opmap[procnum].op].needscfh;
+		if ((nd->nd_flag & ND_NFSV41) != 0) {
+			opcnt += nfsv4_opflag[nfsv4_opmap[procnum].op].needsseq;
+			if (procnum == NFSPROC_RENEW)
+				/*
+				 * For the special case of Renew, just do a
+				 * Sequence Op.
+				 */
+				opcnt = 1;
+			else if (procnum == NFSPROC_WRITEDS ||
+			    procnum == NFSPROC_COMMITDS)
+				/*
+				 * For the special case of a Writeor Commit to
+				 * a DS, the opcnt == 3, for Sequence, PutFH,
+				 * Write/Commit.
+				 */
+				opcnt = 3;
+		}
 		/*
 		 * What should the tag really be?
 		 */
 		(void) nfsm_strtom(nd, nfsv4_opmap[procnum].tag,
 			nfsv4_opmap[procnum].taglen);
-		NFSM_BUILD(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
-		*tl++ = txdr_unsigned(NFSV4_MINORVERSION);
+		NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
+		if ((nd->nd_flag & ND_NFSV41) != 0)
+			*tl++ = txdr_unsigned(NFSV41_MINORVERSION);
+		else
+			*tl++ = txdr_unsigned(NFSV4_MINORVERSION);
 		if (opcntpp != NULL)
 			*opcntpp = tl;
-		*tl++ = txdr_unsigned(opcnt);
+		*tl = txdr_unsigned(opcnt);
+		if ((nd->nd_flag & ND_NFSV41) != 0 &&
+		    nfsv4_opflag[nfsv4_opmap[procnum].op].needsseq > 0) {
+			if (nfsv4_opflag[nfsv4_opmap[procnum].op].loopbadsess >
+			    0)
+				nd->nd_flag |= ND_LOOPBADSESS;
+			NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
+			*tl = txdr_unsigned(NFSV4OP_SEQUENCE);
+			if (sep == NULL) {
+				sep = nfsmnt_mdssession(nmp);
+				nfsv4_setsequence(nmp, nd, sep,
+				    nfs_bigreply[procnum]);
+			} else
+				nfsv4_setsequence(nmp, nd, sep,
+				    nfs_bigreply[procnum]);
+		}
 		if (nfsv4_opflag[nfsv4_opmap[procnum].op].needscfh > 0) {
+			NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
 			*tl = txdr_unsigned(NFSV4OP_PUTFH);
 			(void) nfsm_fhtom(nd, nfhp, fhlen, 0);
-			if (nfsv4_opflag[nfsv4_opmap[procnum].op].needscfh==2){
+			if (nfsv4_opflag[nfsv4_opmap[procnum].op].needscfh
+			    == 2 && procnum != NFSPROC_WRITEDS &&
+			    procnum != NFSPROC_COMMITDS) {
 				NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
 				*tl = txdr_unsigned(NFSV4OP_GETATTR);
-				NFSWCCATTR_ATTRBIT(&attrbits);
+				/*
+				 * For Lookup Ops, we want all the directory
+				 * attributes, so we can load the name cache.
+				 */
+				if (procnum == NFSPROC_LOOKUP ||
+				    procnum == NFSPROC_LOOKUPP)
+					NFSGETATTR_ATTRBIT(&attrbits);
+				else {
+					NFSWCCATTR_ATTRBIT(&attrbits);
+					nd->nd_flag |= ND_V4WCCATTR;
+				}
 				(void) nfsrv_putattrbit(nd, &attrbits);
-				nd->nd_flag |= ND_V4WCCATTR;
 			}
+		}
+		if (procnum != NFSPROC_RENEW ||
+		    (nd->nd_flag & ND_NFSV41) == 0) {
 			NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
+			*tl = txdr_unsigned(nfsv4_opmap[procnum].op);
 		}
-		*tl = txdr_unsigned(nfsv4_opmap[procnum].op);
 	} else {
 		(void) nfsm_fhtom(nd, nfhp, fhlen, 0);
 	}
-	NFSINCRGLOBAL(newnfsstats.rpccnt[procnum]);
+	if (procnum < NFSV4_NPROCS)
+		NFSINCRGLOBAL(newnfsstats.rpccnt[procnum]);
 }
 
 #ifndef APPLE
@@ -212,7 +281,7 @@
 			mlen = M_TRAILINGSPACE(mp);
 			if (mlen == 0) {
 				if (clflg)
-					NFSMCLGET(mp, M_WAIT);
+					NFSMCLGET(mp, M_WAITOK);
 				else
 					NFSMGET(mp);
 				mbuf_setlen(mp, 0);
@@ -403,6 +472,12 @@
 		flag = fxdr_unsigned(int, *tl);
 	} else if (nd->nd_flag & ND_NFSV4) {
 		NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
+		/* If the GetFH failed, clear flag. */
+		if (*++tl != 0) {
+			nd->nd_flag |= ND_NOMOREDATA;
+			flag = 0;
+			error = ENXIO;	/* Return ENXIO so *nfhpp isn't used. */
+		}
 	}
 	if (flag) {
 		error = nfsm_getfh(nd, nfhpp);
@@ -413,8 +488,12 @@
 	/*
 	 * Now, get the attributes.
 	 */
-	if (nd->nd_flag & ND_NFSV4) {
+	if (flag != 0 && (nd->nd_flag & ND_NFSV4) != 0) {
 		NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
+		if (*++tl != 0) {
+			nd->nd_flag |= ND_NOMOREDATA;
+			flag = 0;
+		}
 	} else if (nd->nd_flag & ND_NFSV3) {
 		NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
 		if (flag) {
@@ -453,6 +532,11 @@
 		st->other[0] = 0xffffffff;
 		st->other[1] = 0xffffffff;
 		st->other[2] = 0xffffffff;
+	} else if (flag == NFSSTATEID_PUTSEQIDZERO) {
+		st->seqid = 0;
+		st->other[0] = stateidp->other[0];
+		st->other[1] = stateidp->other[1];
+		st->other[2] = stateidp->other[2];
 	} else {
 		st->seqid = stateidp->seqid;
 		st->other[0] = stateidp->other[0];

Modified: trunk/sys/fs/nfsclient/nfs_clkdtrace.c
===================================================================
--- trunk/sys/fs/nfsclient/nfs_clkdtrace.c	2018-05-27 22:18:11 UTC (rev 10025)
+++ trunk/sys/fs/nfsclient/nfs_clkdtrace.c	2018-05-27 22:18:25 UTC (rev 10026)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * Copyright (c) 2009 Robert N. M. Watson
  * All rights reserved.
@@ -28,7 +29,7 @@
  */
 
 #include <sys/cdefs.h>
-__MBSDID("$MidnightBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/fs/nfsclient/nfs_clkdtrace.c 244042 2012-12-08 22:52:39Z rmacklem $");
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -92,7 +93,7 @@
  * This table is indexed by NFSv3 procedure number, but also used for NFSv2
  * procedure names and NFSv4 operations.
  */
-static struct dtnfsclient_rpc	dtnfsclient_rpcs[NFS_NPROCS + 1] = {
+static struct dtnfsclient_rpc	dtnfsclient_rpcs[NFSV41_NPROCS + 1] = {
 	{ "null", "null", "null" },
 	{ "getattr", "getattr", "getattr" },
 	{ "setattr", "setattr", "setattr" },
@@ -196,17 +197,17 @@
  * stored in one of these two NFS client-allocated arrays; 0 indicates that
  * the event is not being traced so probes should not be called.
  *
- * For simplicity, we allocate both v2, v3 and v4 arrays as NFS_NPROCS + 1, and
- * the v2, v3 arrays are simply sparse.
+ * For simplicity, we allocate both v2, v3 and v4 arrays as NFSV41_NPROCS + 1,
+ * and the v2, v3 arrays are simply sparse.
  */
-extern uint32_t			nfscl_nfs2_start_probes[NFS_NPROCS + 1];
-extern uint32_t			nfscl_nfs2_done_probes[NFS_NPROCS + 1];
+extern uint32_t			nfscl_nfs2_start_probes[NFSV41_NPROCS + 1];
+extern uint32_t			nfscl_nfs2_done_probes[NFSV41_NPROCS + 1];
 
-extern uint32_t			nfscl_nfs3_start_probes[NFS_NPROCS + 1];
-extern uint32_t			nfscl_nfs3_done_probes[NFS_NPROCS + 1];
+extern uint32_t			nfscl_nfs3_start_probes[NFSV41_NPROCS + 1];
+extern uint32_t			nfscl_nfs3_done_probes[NFSV41_NPROCS + 1];
 
-extern uint32_t			nfscl_nfs4_start_probes[NFS_NPROCS + 1];
-extern uint32_t			nfscl_nfs4_done_probes[NFS_NPROCS + 1];
+extern uint32_t			nfscl_nfs4_start_probes[NFSV41_NPROCS + 1];
+extern uint32_t			nfscl_nfs4_done_probes[NFSV41_NPROCS + 1];
 
 /*
  * Look up a DTrace probe ID to see if it's associated with a "done" event --
@@ -217,7 +218,7 @@
 {
 	int i;
 
-	for (i = 0; i < NFS_NPROCS + 1; i++) {
+	for (i = 0; i < NFSV41_NPROCS + 1; i++) {
 		if (dtnfsclient_rpcs[i].nr_v4_id_done == id ||
 		    dtnfsclient_rpcs[i].nr_v3_id_done == id ||
 		    dtnfsclient_rpcs[i].nr_v2_id_done == id)
@@ -401,7 +402,7 @@
 	 * Register NFSv2 RPC procedures; note sparseness check for each slot
 	 * in the NFSv3, NFSv4 procnum-indexed array.
 	 */
-	for (i = 0; i < NFS_NPROCS + 1; i++) {
+	for (i = 0; i < NFSV41_NPROCS + 1; i++) {
 		if (dtnfsclient_rpcs[i].nr_v2_name != NULL &&
 		    dtrace_probe_lookup(dtnfsclient_id, dtnfsclient_nfs2_str,
 		    dtnfsclient_rpcs[i].nr_v2_name, dtnfsclient_start_str) ==
@@ -430,7 +431,7 @@
 	 * Register NFSv3 RPC procedures; note sparseness check for each slot
 	 * in the NFSv4 procnum-indexed array.
 	 */
-	for (i = 0; i < NFS_NPROCS + 1; i++) {
+	for (i = 0; i < NFSV41_NPROCS + 1; i++) {
 		if (dtnfsclient_rpcs[i].nr_v3_name != NULL &&
 		    dtrace_probe_lookup(dtnfsclient_id, dtnfsclient_nfs3_str,
 		    dtnfsclient_rpcs[i].nr_v3_name, dtnfsclient_start_str) ==
@@ -458,7 +459,7 @@
 	/*
 	 * Register NFSv4 RPC procedures.
 	 */
-	for (i = 0; i < NFS_NPROCS + 1; i++) {
+	for (i = 0; i < NFSV41_NPROCS + 1; i++) {
 		if (dtrace_probe_lookup(dtnfsclient_id, dtnfsclient_nfs4_str,
 		    dtnfsclient_rpcs[i].nr_v4_name, dtnfsclient_start_str) ==
 		    0) {

Modified: trunk/sys/fs/nfsclient/nfs_clkrpc.c
===================================================================
--- trunk/sys/fs/nfsclient/nfs_clkrpc.c	2018-05-27 22:18:11 UTC (rev 10025)
+++ trunk/sys/fs/nfsclient/nfs_clkrpc.c	2018-05-27 22:18:25 UTC (rev 10026)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * Copyright (c) 1989, 1993
  *	The Regents of the University of California.  All rights reserved.
@@ -32,7 +33,7 @@
  */
 
 #include <sys/cdefs.h>
-__MBSDID("$MidnightBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/fs/nfsclient/nfs_clkrpc.c 317524 2017-04-27 21:27:20Z rmacklem $");
 
 #include "opt_kgssapi.h"
 
@@ -45,12 +46,13 @@
 
 NFSDLOCKMUTEX;
 
-SVCPOOL		*nfscbd_pool;
+extern SVCPOOL	*nfscbd_pool;
 
 static int nfs_cbproc(struct nfsrv_descript *, u_int32_t);
 
 extern u_long sb_max_adj;
 extern int nfs_numnfscbd;
+extern int nfscl_debuglevel;
 
 /*
  * NFS client system calls for handling callbacks.
@@ -90,6 +92,7 @@
 	nd.nd_mreq = NULL;
 	nd.nd_cred = NULL;
 
+	NFSCL_DEBUG(1, "cbproc=%d\n",nd.nd_procnum);
 	if (nd.nd_procnum != NFSPROC_NULL) {
 		if (!svc_getcred(rqst, &nd.nd_cred, &credflavor)) {
 			svcerr_weakauth(rqst);
@@ -133,9 +136,10 @@
 		svcerr_auth(rqst, nd.nd_repstat & ~NFSERR_AUTHERR);
 		if (nd.nd_mreq != NULL)
 			m_freem(nd.nd_mreq);
-	} else if (!svc_sendreply_mbuf(rqst, nd.nd_mreq)) {
+	} else if (!svc_sendreply_mbuf(rqst, nd.nd_mreq))
 		svcerr_systemerr(rqst);
-	}
+	else
+		NFSCL_DEBUG(1, "cbrep sent\n");
 	svc_freereq(rqst);
 }
 
@@ -271,19 +275,24 @@
 	NFSD_LOCK_ASSERT();
 
 	if (terminating) {
+		/* Wait for any xprt registrations to complete. */
+		while (nfs_numnfscbd > 0)
+			msleep(&nfs_numnfscbd, NFSDLOCKMUTEXPTR, PZERO, 
+			    "nfscbdt", 0);
+		if (nfscbd_pool != NULL) {
+			NFSD_UNLOCK();
+			svcpool_close(nfscbd_pool);
+			NFSD_LOCK();
+		}
+	}
+
+	if (nfscbd_pool == NULL) {
 		NFSD_UNLOCK();
-		svcpool_destroy(nfscbd_pool);
-		nfscbd_pool = NULL;
+		nfscbd_pool = svcpool_create("nfscbd", NULL);
+		nfscbd_pool->sp_rcache = NULL;
+		nfscbd_pool->sp_assign = NULL;
+		nfscbd_pool->sp_done = NULL;
 		NFSD_LOCK();
 	}
-
-	NFSD_UNLOCK();
-
-	nfscbd_pool = svcpool_create("nfscbd", NULL);
-	nfscbd_pool->sp_rcache = NULL;
-	nfscbd_pool->sp_assign = NULL;
-	nfscbd_pool->sp_done = NULL;
-
-	NFSD_LOCK();
 }
 

Modified: trunk/sys/fs/nfsclient/nfs_clnfsiod.c
===================================================================
--- trunk/sys/fs/nfsclient/nfs_clnfsiod.c	2018-05-27 22:18:11 UTC (rev 10025)
+++ trunk/sys/fs/nfsclient/nfs_clnfsiod.c	2018-05-27 22:18:25 UTC (rev 10026)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * Copyright (c) 1989, 1993
  *	The Regents of the University of California.  All rights reserved.
@@ -33,7 +34,7 @@
  */
 
 #include <sys/cdefs.h>
-__MBSDID("$MidnightBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/fs/nfsclient/nfs_clnfsiod.c 249630 2013-04-18 23:20:16Z rmacklem $");
 
 #include <sys/param.h>
 #include <sys/systm.h>

Modified: trunk/sys/fs/nfsclient/nfs_clnode.c
===================================================================
--- trunk/sys/fs/nfsclient/nfs_clnode.c	2018-05-27 22:18:11 UTC (rev 10025)
+++ trunk/sys/fs/nfsclient/nfs_clnode.c	2018-05-27 22:18:25 UTC (rev 10026)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * Copyright (c) 1989, 1993
  *	The Regents of the University of California.  All rights reserved.
@@ -33,7 +34,7 @@
  */
 
 #include <sys/cdefs.h>
-__MBSDID("$MidnightBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/fs/nfsclient/nfs_clnode.c 321031 2017-07-15 19:24:54Z rmacklem $");
 
 #include "opt_kdtrace.h"
 
@@ -66,6 +67,8 @@
 
 uma_zone_t newnfsnode_zone;
 
+const char nfs_vnode_tag[] = "newnfs";
+
 static void	nfs_freesillyrename(void *arg, __unused int pending);
 
 void
@@ -122,15 +125,9 @@
 		*npp = VTONFS(nvp);
 		return (0);
 	}
-
-	/*
-	 * Allocate before getnewvnode since doing so afterward
-	 * might cause a bogus v_data pointer to get dereferenced
-	 * elsewhere if zalloc should block.
-	 */
 	np = uma_zalloc(newnfsnode_zone, M_WAITOK | M_ZERO);
 
-	error = getnewvnode("newnfs", mntp, &newnfs_vnodeops, &nvp);
+	error = getnewvnode(nfs_vnode_tag, mntp, &newnfs_vnodeops, &nvp);
 	if (error) {
 		uma_zfree(newnfsnode_zone, np);
 		return (error);
@@ -204,16 +201,41 @@
 	free(sp, M_NEWNFSREQ);
 }
 
+static void
+ncl_releasesillyrename(struct vnode *vp, struct thread *td)
+{
+	struct nfsnode *np;
+	struct sillyrename *sp;
+
+	ASSERT_VOP_ELOCKED(vp, "releasesillyrename");
+	np = VTONFS(vp);
+	mtx_assert(&np->n_mtx, MA_OWNED);
+	if (vp->v_type != VDIR) {
+		sp = np->n_sillyrename;
+		np->n_sillyrename = NULL;
+	} else
+		sp = NULL;
+	if (sp != NULL) {
+		mtx_unlock(&np->n_mtx);
+		(void) ncl_vinvalbuf(vp, 0, td, 1);
+		/*
+		 * Remove the silly file that was rename'd earlier
+		 */
+		ncl_removeit(sp, vp);
+		crfree(sp->s_cred);
+		TASK_INIT(&sp->s_task, 0, nfs_freesillyrename, sp);
+		taskqueue_enqueue(taskqueue_thread, &sp->s_task);
+		mtx_lock(&np->n_mtx);
+	}
+}
+
 int
 ncl_inactive(struct vop_inactive_args *ap)
 {
+	struct vnode *vp = ap->a_vp;
 	struct nfsnode *np;
-	struct sillyrename *sp;
-	struct vnode *vp = ap->a_vp;
 	boolean_t retv;
 
-	np = VTONFS(vp);
-
 	if (NFS_ISV4(vp) && vp->v_type == VREG) {
 		/*
 		 * Since mmap()'d files do I/O after VOP_CLOSE(), the NFSv4
@@ -222,37 +244,30 @@
 		 * stateid is available for the writes.
 		 */
 		if (vp->v_object != NULL) {
-			VM_OBJECT_LOCK(vp->v_object);
+			VM_OBJECT_WLOCK(vp->v_object);
 			retv = vm_object_page_clean(vp->v_object, 0, 0,
 			    OBJPC_SYNC);
-			VM_OBJECT_UNLOCK(vp->v_object);
+			VM_OBJECT_WUNLOCK(vp->v_object);
 		} else
 			retv = TRUE;
 		if (retv == TRUE) {
-			(void)ncl_flush(vp, MNT_WAIT, NULL, ap->a_td, 1, 0);
+			(void)ncl_flush(vp, MNT_WAIT, ap->a_td, 1, 0);
 			(void)nfsrpc_close(vp, 1, ap->a_td);
 		}
 	}
 
+	np = VTONFS(vp);
 	mtx_lock(&np->n_mtx);
-	if (vp->v_type != VDIR) {
-		sp = np->n_sillyrename;
-		np->n_sillyrename = NULL;
-	} else
-		sp = NULL;
-	if (sp) {
-		mtx_unlock(&np->n_mtx);
-		(void) ncl_vinvalbuf(vp, 0, ap->a_td, 1);
-		/*
-		 * Remove the silly file that was rename'd earlier
-		 */
-		ncl_removeit(sp, vp);
-		crfree(sp->s_cred);
-		TASK_INIT(&sp->s_task, 0, nfs_freesillyrename, sp);
-		taskqueue_enqueue(taskqueue_thread, &sp->s_task);
-		mtx_lock(&np->n_mtx);
-	}
-	np->n_flag &= NMODIFIED;
+	ncl_releasesillyrename(vp, ap->a_td);
+
+	/*
+	 * NMODIFIED means that there might be dirty/stale buffers
+	 * associated with the NFS vnode.
+	 * NDSCOMMIT means that the file is on a pNFS server and commits
+	 * should be done to the DS.
+	 * None of the other flags are meaningful after the vnode is unused.
+	 */
+	np->n_flag &= (NMODIFIED | NDSCOMMIT);
 	mtx_unlock(&np->n_mtx);
 	return (0);
 }
@@ -274,6 +289,10 @@
 	if (nfs_reclaim_p != NULL)
 		nfs_reclaim_p(ap);
 
+	mtx_lock(&np->n_mtx);
+	ncl_releasesillyrename(vp, ap->a_td);
+	mtx_unlock(&np->n_mtx);
+
 	/*
 	 * Destroy the vm object and flush associated pages.
 	 */
@@ -338,4 +357,3 @@
 	KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(vp);
 	mtx_unlock(&np->n_mtx);
 }
-

Modified: trunk/sys/fs/nfsclient/nfs_clport.c
===================================================================
--- trunk/sys/fs/nfsclient/nfs_clport.c	2018-05-27 22:18:11 UTC (rev 10025)
+++ trunk/sys/fs/nfsclient/nfs_clport.c	2018-05-27 22:18:25 UTC (rev 10026)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * Copyright (c) 1989, 1993
  *	The Regents of the University of California.  All rights reserved.
@@ -32,12 +33,12 @@
  */
 
 #include <sys/cdefs.h>
-__MBSDID("$MidnightBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/fs/nfsclient/nfs_clport.c 321057 2017-07-16 19:36:44Z rmacklem $");
 
 #include "opt_inet6.h"
 #include "opt_kdtrace.h"
 
-#include <sys/capability.h>
+#include <sys/capsicum.h>
 
 /*
  * generally, I don't like #includes inside .h files, but it seems to
@@ -78,7 +79,6 @@
 extern int nfscl_enablecallb;
 extern int nfs_numnfscbd;
 extern int nfscl_inited;
-struct mtx nfs_clstate_mutex;
 struct mtx ncl_iod_mutex;
 NFSDLOCKMUTEX;
 
@@ -197,15 +197,9 @@
 		FREE((caddr_t)nfhp, M_NFSFH);
 		return (0);
 	}
-
-	/*
-	 * Allocate before getnewvnode since doing so afterward
-	 * might cause a bogus v_data pointer to get dereferenced
-	 * elsewhere if zalloc should block.
-	 */
 	np = uma_zalloc(newnfsnode_zone, M_WAITOK | M_ZERO);
 
-	error = getnewvnode("newnfs", mntp, &newnfs_vnodeops, &nvp);
+	error = getnewvnode(nfs_vnode_tag, mntp, &newnfs_vnodeops, &nvp);
 	if (error) {
 		uma_zfree(newnfsnode_zone, np);
 		FREE((caddr_t)nfhp, M_NFSFH);
@@ -285,7 +279,7 @@
 }
 
 /*
- * Anothe variant of nfs_nget(). This one is only used by reopen. It
+ * Another variant of nfs_nget(). This one is only used by reopen. It
  * takes almost the same args as nfs_nget(), but only succeeds if an entry
  * exists in the cache. (Since files should already be "open" with a
  * vnode ref cnt on the node when reopen calls this, it should always
@@ -324,11 +318,7 @@
 		NFSVOPUNLOCK(nvp, 0);
 	} else if (error == EBUSY) {
 		/*
-		 * The LK_EXCLOTHER lock type tells nfs_lock1() to not try
-		 * and lock the vnode, but just get a v_usecount on it.
-		 * LK_NOWAIT is set so that when vget() returns ENOENT,
-		 * vfs_hash_get() fails instead of looping.
-		 * If this succeeds, it is safe so long as a vflush() with
+		 * It is safe so long as a vflush() with
 		 * FORCECLOSE has not been done. Since the Renew thread is
 		 * stopped and the MNTK_UNMOUNTF flag is set before doing
 		 * a vflush() with FORCECLOSE, we should be ok here.
@@ -335,10 +325,17 @@
 		 */
 		if ((mntp->mnt_kern_flag & MNTK_UNMOUNTF))
 			error = EINTR;
-		else
-			error = vfs_hash_get(mntp, hash,
-			    (LK_EXCLOTHER | LK_NOWAIT), td, &nvp,
-			    newnfs_vncmpf, nfhp);
+		else {
+			vfs_hash_ref(mntp, hash, td, &nvp, newnfs_vncmpf, nfhp);
+			if (nvp == NULL) {
+				error = ENOENT;
+			} else if ((nvp->v_iflag & VI_DOOMED) != 0) {
+				error = ENOENT;
+				vrele(nvp);
+			} else {
+				error = 0;
+			}
+		}
 	}
 	FREE(nfhp, M_NFSFH);
 	if (error)
@@ -439,6 +436,7 @@
 				vap->va_size = np->n_size;
 				np->n_attrstamp = 0;
 				KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(vp);
+				vnode_pager_setsize(vp, np->n_size);
 			} else if (np->n_flag & NMODIFIED) {
 				/*
 				 * We've modified the file: Use the larger
@@ -450,12 +448,22 @@
 					np->n_size = vap->va_size;
 					np->n_flag |= NSIZECHANGED;
 				}
+				vnode_pager_setsize(vp, np->n_size);
+			} else if (vap->va_size < np->n_size) {
+				/*
+				 * When shrinking the size, the call to
+				 * vnode_pager_setsize() cannot be done
+				 * with the mutex held, so delay it until
+				 * after the mtx_unlock call.
+				 */
+				nsize = np->n_size = vap->va_size;
+				np->n_flag |= NSIZECHANGED;
+				setnsize = 1;
 			} else {
 				np->n_size = vap->va_size;
 				np->n_flag |= NSIZECHANGED;
+				vnode_pager_setsize(vp, np->n_size);
 			}
-			setnsize = 1;
-			nsize = vap->va_size;
 		} else {
 			np->n_size = vap->va_size;
 		}
@@ -550,7 +558,7 @@
 	struct proc *p;
 
 	if (id == NULL) {
-		printf("NULL id\n");
+		/* Return the single open_owner of all 0 bytes. */
 		bzero(cp, NFSV4CL_LOCKNAMELEN);
 		return;
 	}
@@ -659,6 +667,8 @@
 			}
 		}
 		error = nfscl_postop_attr(nd, nap, flagp, stuff);
+		if (wccflagp != NULL && *flagp == 0)
+			*wccflagp = 0;
 	} else if ((nd->nd_flag & (ND_NOMOREDATA | ND_NFSV4 | ND_V4WCCATTR))
 	    == (ND_NFSV4 | ND_V4WCCATTR)) {
 		error = nfsv4_loadattr(nd, NULL, &nfsva, NULL,
@@ -864,7 +874,7 @@
 	else
 		vers = NFS_VER2;
 	ret = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, vp, p, cred,
-		NFS_PROG, vers, NULL, 1, NULL);
+		NFS_PROG, vers, NULL, 1, NULL, NULL);
 	return (ret);
 }
 
@@ -1092,9 +1102,16 @@
 	 * us to do a SETATTR RPC. FreeBSD servers store the verifier
 	 * in atime, but we can't really assume that all servers will
 	 * so we ensure that our SETATTR sets both atime and mtime.
+	 * Set the VA_UTIMES_NULL flag for this case, so that
+	 * the server's time will be used.  This is needed to
+	 * work around a bug in some Solaris servers, where
+	 * setting the time TOCLIENT causes the Setattr RPC
+	 * to return NFS_OK, but not set va_mode.
 	 */
-	if (vap->va_mtime.tv_sec == VNOVAL)
+	if (vap->va_mtime.tv_sec == VNOVAL) {
 		vfs_timestamp(&vap->va_mtime);
+		vap->va_vaflags |= VA_UTIMES_NULL;
+	}
 	if (vap->va_atime.tv_sec == VNOVAL)
 		vap->va_atime = vap->va_mtime;
 	return (1);
@@ -1111,7 +1128,7 @@
 {
 	struct proc *p;
 
-	if (error < 10000)
+	if (error < 10000 || error >= NFSERR_STALEWRITEVERF)
 		return (error);
 	if (td != NULL)
 		p = td->td_proc;
@@ -1123,10 +1140,15 @@
 		    "No name and/or group mapping for uid,gid:(%d,%d)\n",
 		    uid, gid);
 		return (EPERM);
+	case NFSERR_BADNAME:
+	case NFSERR_BADCHAR:
+		printf("nfsv4 char/name not handled by server\n");
+		return (ENOENT);
 	case NFSERR_STALECLIENTID:
 	case NFSERR_STALESTATEID:
 	case NFSERR_EXPIRED:
 	case NFSERR_BADSTATEID:
+	case NFSERR_BADSESSION:
 		printf("nfsv4 recover err returned %d\n", error);
 		return (EIO);
 	case NFSERR_BADHANDLE:
@@ -1142,8 +1164,6 @@
 	case NFSERR_LEASEMOVED:
 	case NFSERR_RECLAIMBAD:
 	case NFSERR_BADXDR:
-	case NFSERR_BADCHAR:
-	case NFSERR_BADNAME:
 	case NFSERR_OPILLEGAL:
 		printf("nfsv4 client/server protocol prob err=%d\n",
 		    error);
@@ -1167,8 +1187,15 @@
 	} tl;
 	struct proc *p;
 	pid_t pid;
-	int ret = 0;
+	int i, ret = 0;
 
+	/* For the single open_owner of all 0 bytes, just return 0. */
+	for (i = 0; i < NFSV4CL_LOCKNAMELEN; i++)
+		if (own[i] != 0)
+			break;
+	if (i == NFSV4CL_LOCKNAMELEN)
+		return (0);
+
 	tl.cval[0] = *own++;
 	tl.cval[1] = *own++;
 	tl.cval[2] = *own++;
@@ -1211,10 +1238,11 @@
 	struct file *fp;
 	struct nfscbd_args nfscbdarg;
 	struct nfsd_nfscbd_args nfscbdarg2;
-	int error;
 	struct nameidata nd;
 	struct nfscl_dumpmntopts dumpmntopts;
+	cap_rights_t rights;
 	char *buf;
+	int error;
 
 	if (uap->flag & NFSSVC_CBADDSOCK) {
 		error = copyin(uap->argp, (caddr_t)&nfscbdarg, sizeof(nfscbdarg));
@@ -1225,10 +1253,10 @@
 		 * pretend that we need them all. It is better to be too
 		 * careful than too reckless.
 		 */
-		if ((error = fget(td, nfscbdarg.sock, CAP_SOCK_ALL, &fp))
-		    != 0) {
+		error = fget(td, nfscbdarg.sock,
+		    cap_rights_init(&rights, CAP_SOCK_CLIENT), &fp);
+		if (error)
 			return (error);
-		}
 		if (fp->f_type != DTYPE_SOCKET) {
 			fdrop(fp, td);
 			return (EPERM);
@@ -1291,8 +1319,6 @@
 		if (loaded)
 			return (0);
 		newnfs_portinit();
-		mtx_init(&nfs_clstate_mutex, "nfs_clstate_mutex", NULL,
-		    MTX_DEF);
 		mtx_init(&ncl_iod_mutex, "ncl_iod_mutex", NULL, MTX_DEF);
 		nfscl_init();
 		NFSD_LOCK();
@@ -1316,7 +1342,6 @@
 		ncl_call_invalcaches = NULL;
 		nfsd_call_nfscl = NULL;
 		/* and get rid of the mutexes */
-		mtx_destroy(&nfs_clstate_mutex);
 		mtx_destroy(&ncl_iod_mutex);
 		loaded = 0;
 		break;

Modified: trunk/sys/fs/nfsclient/nfs_clrpcops.c
===================================================================
--- trunk/sys/fs/nfsclient/nfs_clrpcops.c	2018-05-27 22:18:11 UTC (rev 10025)
+++ trunk/sys/fs/nfsclient/nfs_clrpcops.c	2018-05-27 22:18:25 UTC (rev 10026)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * Copyright (c) 1989, 1993
  *	The Regents of the University of California.  All rights reserved.
@@ -32,7 +33,7 @@
  */
 
 #include <sys/cdefs.h>
-__MBSDID("$MidnightBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/fs/nfsclient/nfs_clrpcops.c 324543 2017-10-11 23:21:24Z rmacklem $");
 
 /*
  * Rpc op calls, generally called from the vnode op calls or through the
@@ -46,7 +47,14 @@
 #include "opt_inet6.h"
 
 #include <fs/nfs/nfsport.h>
+#include <sys/sysctl.h>
 
+SYSCTL_DECL(_vfs_nfs);
+
+static int	nfsignore_eexist = 0;
+SYSCTL_INT(_vfs_nfs, OID_AUTO, ignore_eexist, CTLFLAG_RW,
+    &nfsignore_eexist, 0, "NFS ignore EEXIST replies for mkdir/symlink");
+
 /*
  * Global variables
  */
@@ -67,6 +75,19 @@
 
 #define	DIRHDSIZ	(sizeof (struct dirent) - (MAXNAMLEN + 1))
 
+/*
+ * nfscl_getsameserver() can return one of three values:
+ * NFSDSP_USETHISSESSION - Use this session for the DS.
+ * NFSDSP_SEQTHISSESSION - Use the nfsclds_sequence field of this dsp for new
+ *     session.
+ * NFSDSP_NOTFOUND - No matching server was found.
+ */
+enum nfsclds_state {
+	NFSDSP_USETHISSESSION = 0,
+	NFSDSP_SEQTHISSESSION = 1,
+	NFSDSP_NOTFOUND = 2,
+};
+
 static int nfsrpc_setattrrpc(vnode_t , struct vattr *, nfsv4stateid_t *,
     struct ucred *, NFSPROC_T *, struct nfsvattr *, int *, void *);
 static int nfsrpc_readrpc(vnode_t , struct uio *, struct ucred *,
@@ -86,6 +107,26 @@
     u_int32_t, struct ucred *, NFSPROC_T *, int);
 static int nfsrpc_setaclrpc(vnode_t, struct ucred *, NFSPROC_T *,
     struct acl *, nfsv4stateid_t *, void *);
+static int nfsrpc_getlayout(struct nfsmount *, vnode_t, struct nfsfh *, int,
+    uint32_t *, nfsv4stateid_t *, uint64_t, struct nfscllayout **,
+    struct ucred *, NFSPROC_T *);
+static int nfsrpc_fillsa(struct nfsmount *, struct sockaddr_storage *,
+    struct nfsclds **, NFSPROC_T *);
+static void nfscl_initsessionslots(struct nfsclsession *);
+static int nfscl_doflayoutio(vnode_t, struct uio *, int *, int *, int *,
+    nfsv4stateid_t *, int, struct nfscldevinfo *, struct nfscllayout *,
+    struct nfsclflayout *, uint64_t, uint64_t, int, struct ucred *,
+    NFSPROC_T *);
+static int nfsrpc_readds(vnode_t, struct uio *, nfsv4stateid_t *, int *,
+    struct nfsclds *, uint64_t, int, struct nfsfh *, struct ucred *,
+    NFSPROC_T *);
+static int nfsrpc_writeds(vnode_t, struct uio *, int *, int *,
+    nfsv4stateid_t *, struct nfsclds *, uint64_t, int,
+    struct nfsfh *, int, struct ucred *, NFSPROC_T *);
+static enum nfsclds_state nfscl_getsameserver(struct nfsmount *,
+    struct nfsclds *, struct nfsclds **);
+static int nfsrpc_commitds(vnode_t, uint64_t, int, struct nfsclds *,
+    struct nfsfh *, struct ucred *, NFSPROC_T *);
 
 /*
  * nfs null call from vfs.
@@ -306,9 +347,10 @@
 	     */
 	    if (!error)
 		op->nfso_opencnt++;
-	    nfscl_openrelease(op, error, newone);
+	    nfscl_openrelease(nmp, op, error, newone);
 	    if (error == NFSERR_GRACE || error == NFSERR_STALECLIENTID ||
-		error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY) {
+		error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY ||
+		error == NFSERR_BADSESSION) {
 		(void) nfs_catnap(PZERO, error, "nfs_open");
 	    } else if ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID)
 		&& clidrev != 0) {
@@ -317,6 +359,7 @@
 	    }
 	} while (error == NFSERR_GRACE || error == NFSERR_STALECLIENTID ||
 	    error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY ||
+	    error == NFSERR_BADSESSION ||
 	    ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID) &&
 	     expireret == 0 && clidrev != 0 && retrycnt < 4));
 	if (error && retrycnt >= 4)
@@ -341,16 +384,18 @@
 	u_int32_t rflags, deleg;
 	nfsattrbit_t attrbits;
 	int error, ret, acesize, limitby;
+	struct nfsclsession *tsep;
 
 	dp = *dpp;
 	*dpp = NULL;
-	nfscl_reqstart(nd, NFSPROC_OPEN, nmp, nfhp, fhlen, NULL);
+	nfscl_reqstart(nd, NFSPROC_OPEN, nmp, nfhp, fhlen, NULL, NULL);
 	NFSM_BUILD(tl, u_int32_t *, 5 * NFSX_UNSIGNED);
 	*tl++ = txdr_unsigned(op->nfso_own->nfsow_seqid);
 	*tl++ = txdr_unsigned(mode & NFSV4OPEN_ACCESSBOTH);
 	*tl++ = txdr_unsigned((mode >> NFSLCK_SHIFT) & NFSV4OPEN_DENYBOTH);
-	*tl++ = op->nfso_own->nfsow_clp->nfsc_clientid.lval[0];
-	*tl = op->nfso_own->nfsow_clp->nfsc_clientid.lval[1];
+	tsep = nfsmnt_mdssession(nmp);
+	*tl++ = tsep->nfsess_clientid.lval[0];
+	*tl = tsep->nfsess_clientid.lval[1];
 	(void) nfsm_strtom(nd, op->nfso_own->nfsow_owner, NFSV4CL_LOCKNAMELEN);
 	NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
 	*tl++ = txdr_unsigned(NFSV4OPEN_NOCREATE);
@@ -362,7 +407,10 @@
 		if (dp != NULL) {
 			*tl = txdr_unsigned(NFSV4OPEN_CLAIMDELEGATECUR);
 			NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID);
-			*tl++ = dp->nfsdl_stateid.seqid;
+			if (NFSHASNFSV4N(nmp))
+				*tl++ = 0;
+			else
+				*tl++ = dp->nfsdl_stateid.seqid;
 			*tl++ = dp->nfsdl_stateid.other[0];
 			*tl++ = dp->nfsdl_stateid.other[1];
 			*tl = dp->nfsdl_stateid.other[2];
@@ -380,7 +428,7 @@
 	if (syscred)
 		nd->nd_flag |= ND_USEGSSNAME;
 	error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, vp, p, cred,
-	    NFS_PROG, NFS_VER4, NULL, 1, NULL);
+	    NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
 	if (error)
 		return (error);
 	NFSCL_INCRSEQID(op->nfso_own->nfsow_seqid, nd);
@@ -501,7 +549,8 @@
 			if (ndp != NULL)
 				FREE((caddr_t)ndp, M_NFSCLDELEG);
 			if (ret == NFSERR_STALECLIENTID ||
-			    ret == NFSERR_STALEDONTRECOVER)
+			    ret == NFSERR_STALEDONTRECOVER ||
+			    ret == NFSERR_BADSESSION)
 				error = ret;
 		    }
 		}
@@ -532,7 +581,10 @@
 
 	NFSCL_REQSTART(nd, NFSPROC_OPENDOWNGRADE, vp);
 	NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID + 3 * NFSX_UNSIGNED);
-	*tl++ = op->nfso_stateid.seqid;
+	if (NFSHASNFSV4N(VFSTONFS(vnode_mount(vp))))
+		*tl++ = 0;
+	else
+		*tl++ = op->nfso_stateid.seqid;
 	*tl++ = op->nfso_stateid.other[0];
 	*tl++ = op->nfso_stateid.other[1];
 	*tl++ = op->nfso_stateid.other[2];
@@ -690,10 +742,13 @@
 	int error;
 
 	nfscl_reqstart(nd, NFSPROC_CLOSE, nmp, op->nfso_fh,
-	    op->nfso_fhlen, NULL);
+	    op->nfso_fhlen, NULL, NULL);
 	NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED + NFSX_STATEID);
 	*tl++ = txdr_unsigned(op->nfso_own->nfsow_seqid);
-	*tl++ = op->nfso_stateid.seqid;
+	if (NFSHASNFSV4N(nmp))
+		*tl++ = 0;
+	else
+		*tl++ = op->nfso_stateid.seqid;
 	*tl++ = op->nfso_stateid.other[0];
 	*tl++ = op->nfso_stateid.other[1];
 	*tl = op->nfso_stateid.other[2];
@@ -700,7 +755,7 @@
 	if (syscred)
 		nd->nd_flag |= ND_USEGSSNAME;
 	error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
-	    NFS_PROG, NFS_VER4, NULL, 1, NULL);
+	    NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
 	if (error)
 		return (error);
 	NFSCL_INCRSEQID(op->nfso_own->nfsow_seqid, nd);
@@ -723,10 +778,13 @@
 {
 	u_int32_t *tl;
 	struct nfsrv_descript nfsd, *nd = &nfsd;
+	struct nfsmount *nmp;
 	int error;
 
-	nfscl_reqstart(nd, NFSPROC_OPENCONFIRM, VFSTONFS(vnode_mount(vp)),
-	    nfhp, fhlen, NULL);
+	nmp = VFSTONFS(vnode_mount(vp));
+	if (NFSHASNFSV4N(nmp))
+		return (0);		/* No confirmation for NFSv4.1. */
+	nfscl_reqstart(nd, NFSPROC_OPENCONFIRM, nmp, nfhp, fhlen, NULL, NULL);
 	NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED + NFSX_STATEID);
 	*tl++ = op->nfso_stateid.seqid;
 	*tl++ = op->nfso_stateid.other[0];
@@ -757,7 +815,7 @@
  * when a mount has just occurred and when the server replies NFSERR_EXPIRED.
  */
 APPLESTATIC int
-nfsrpc_setclient(struct nfsmount *nmp, struct nfsclclient *clp,
+nfsrpc_setclient(struct nfsmount *nmp, struct nfsclclient *clp, int reclaim,
     struct ucred *cred, NFSPROC_T *p)
 {
 	u_int32_t *tl;
@@ -770,13 +828,79 @@
 	nfsquad_t confirm;
 	u_int32_t lease;
 	static u_int32_t rev = 0;
+	struct nfsclds *dsp;
+	struct nfsclsession *tsep;
 
 	if (nfsboottime.tv_sec == 0)
 		NFSSETBOOTTIME(nfsboottime);
-	nfscl_reqstart(nd, NFSPROC_SETCLIENTID, nmp, NULL, 0, NULL);
+	clp->nfsc_rev = rev++;
+	if (NFSHASNFSV4N(nmp)) {
+		/*
+		 * Either there was no previous session or the
+		 * previous session has failed, so...
+		 * do an ExchangeID followed by the CreateSession.
+		 */
+		error = nfsrpc_exchangeid(nmp, clp, &nmp->nm_sockreq,
+		    NFSV4EXCH_USEPNFSMDS | NFSV4EXCH_USENONPNFS, &dsp, cred, p);
+		NFSCL_DEBUG(1, "aft exch=%d\n", error);
+		if (error == 0)
+			error = nfsrpc_createsession(nmp, &dsp->nfsclds_sess,
+			    &nmp->nm_sockreq,
+			    dsp->nfsclds_sess.nfsess_sequenceid, 1, cred, p);
+		if (error == 0) {
+			NFSLOCKMNT(nmp);
+			/*
+			 * The old sessions cannot be safely free'd
+			 * here, since they may still be used by
+			 * in-progress RPCs.
+			 */
+			tsep = NULL;
+			if (TAILQ_FIRST(&nmp->nm_sess) != NULL)
+				tsep = NFSMNT_MDSSESSION(nmp);
+			TAILQ_INSERT_HEAD(&nmp->nm_sess, dsp,
+			    nfsclds_list);
+			/*
+			 * Wake up RPCs waiting for a slot on the
+			 * old session. These will then fail with
+			 * NFSERR_BADSESSION and be retried with the
+			 * new session by nfsv4_setsequence().
+			 * Also wakeup() processes waiting for the
+			 * new session.
+			 */
+			if (tsep != NULL)
+				wakeup(&tsep->nfsess_slots);
+			wakeup(&nmp->nm_sess);
+			NFSUNLOCKMNT(nmp);
+		} else
+			nfscl_freenfsclds(dsp);
+		NFSCL_DEBUG(1, "aft createsess=%d\n", error);
+		if (error == 0 && reclaim == 0) {
+			error = nfsrpc_reclaimcomplete(nmp, cred, p);
+			NFSCL_DEBUG(1, "aft reclaimcomp=%d\n", error);
+			if (error == NFSERR_COMPLETEALREADY ||
+			    error == NFSERR_NOTSUPP)
+				/* Ignore this error. */
+				error = 0;
+		}
+		return (error);
+	}
+
+	/*
+	 * Allocate a single session structure for NFSv4.0, because some of
+	 * the fields are used by NFSv4.0 although it doesn't do a session.
+	 */
+	dsp = malloc(sizeof(struct nfsclds), M_NFSCLDS, M_WAITOK | M_ZERO);
+	mtx_init(&dsp->nfsclds_mtx, "nfsds", NULL, MTX_DEF);
+	mtx_init(&dsp->nfsclds_sess.nfsess_mtx, "nfssession", NULL, MTX_DEF);
+	NFSLOCKMNT(nmp);
+	TAILQ_INSERT_HEAD(&nmp->nm_sess, dsp, nfsclds_list);
+	tsep = NFSMNT_MDSSESSION(nmp);
+	NFSUNLOCKMNT(nmp);
+
+	nfscl_reqstart(nd, NFSPROC_SETCLIENTID, nmp, NULL, 0, NULL, NULL);
 	NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
 	*tl++ = txdr_unsigned(nfsboottime.tv_sec);
-	*tl = txdr_unsigned(rev++);
+	*tl = txdr_unsigned(clp->nfsc_rev);
 	(void) nfsm_strtom(nd, clp->nfsc_id, clp->nfsc_idlen);
 
 	/*
@@ -827,13 +951,13 @@
 	*tl = txdr_unsigned(clp->nfsc_cbident);
 	nd->nd_flag |= ND_USEGSSNAME;
 	error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
-		NFS_PROG, NFS_VER4, NULL, 1, NULL);
+		NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
 	if (error)
 		return (error);
 	if (nd->nd_repstat == 0) {
 	    NFSM_DISSECT(tl, u_int32_t *, 4 * NFSX_UNSIGNED);
-	    clp->nfsc_clientid.lval[0] = *tl++;
-	    clp->nfsc_clientid.lval[1] = *tl++;
+	    tsep->nfsess_clientid.lval[0] = *tl++;
+	    tsep->nfsess_clientid.lval[1] = *tl++;
 	    confirm.lval[0] = *tl++;
 	    confirm.lval[1] = *tl;
 	    mbuf_freem(nd->nd_mrep);
@@ -842,15 +966,16 @@
 	    /*
 	     * and confirm it.
 	     */
-	    nfscl_reqstart(nd, NFSPROC_SETCLIENTIDCFRM, nmp, NULL, 0, NULL);
+	    nfscl_reqstart(nd, NFSPROC_SETCLIENTIDCFRM, nmp, NULL, 0, NULL,
+		NULL);
 	    NFSM_BUILD(tl, u_int32_t *, 4 * NFSX_UNSIGNED);
-	    *tl++ = clp->nfsc_clientid.lval[0];
-	    *tl++ = clp->nfsc_clientid.lval[1];
+	    *tl++ = tsep->nfsess_clientid.lval[0];
+	    *tl++ = tsep->nfsess_clientid.lval[1];
 	    *tl++ = confirm.lval[0];
 	    *tl = confirm.lval[1];
 	    nd->nd_flag |= ND_USEGSSNAME;
 	    error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p,
-		cred, NFS_PROG, NFS_VER4, NULL, 1, NULL);
+		cred, NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
 	    if (error)
 		return (error);
 	    mbuf_freem(nd->nd_mrep);
@@ -857,13 +982,13 @@
 	    nd->nd_mrep = NULL;
 	    if (nd->nd_repstat == 0) {
 		nfscl_reqstart(nd, NFSPROC_GETATTR, nmp, nmp->nm_fh,
-		    nmp->nm_fhsize, NULL);
+		    nmp->nm_fhsize, NULL, NULL);
 		NFSZERO_ATTRBIT(&attrbits);
 		NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_LEASETIME);
 		(void) nfsrv_putattrbit(nd, &attrbits);
 		nd->nd_flag |= ND_USEGSSNAME;
 		error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p,
-		    cred, NFS_PROG, NFS_VER4, NULL, 1, NULL);
+		    cred, NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
 		if (error)
 		    return (error);
 		if (nd->nd_repstat == 0) {
@@ -917,16 +1042,18 @@
  */
 APPLESTATIC int
 nfsrpc_getattrnovp(struct nfsmount *nmp, u_int8_t *fhp, int fhlen, int syscred,
-    struct ucred *cred, NFSPROC_T *p, struct nfsvattr *nap, u_int64_t *xidp)
+    struct ucred *cred, NFSPROC_T *p, struct nfsvattr *nap, u_int64_t *xidp,
+    uint32_t *leasep)
 {
 	struct nfsrv_descript nfsd, *nd = &nfsd;
 	int error, vers = NFS_VER2;
 	nfsattrbit_t attrbits;
 	
-	nfscl_reqstart(nd, NFSPROC_GETATTR, nmp, fhp, fhlen, NULL);
+	nfscl_reqstart(nd, NFSPROC_GETATTR, nmp, fhp, fhlen, NULL, NULL);
 	if (nd->nd_flag & ND_NFSV4) {
 		vers = NFS_VER4;
 		NFSGETATTR_ATTRBIT(&attrbits);
+		NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_LEASETIME);
 		(void) nfsrv_putattrbit(nd, &attrbits);
 	} else if (nd->nd_flag & ND_NFSV3) {
 		vers = NFS_VER3;
@@ -934,12 +1061,17 @@
 	if (syscred)
 		nd->nd_flag |= ND_USEGSSNAME;
 	error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
-	    NFS_PROG, vers, NULL, 1, xidp);
+	    NFS_PROG, vers, NULL, 1, xidp, NULL);
 	if (error)
 		return (error);
-	if (!nd->nd_repstat)
-		error = nfsm_loadattr(nd, nap);
-	else
+	if (nd->nd_repstat == 0) {
+		if ((nd->nd_flag & ND_NFSV4) != 0)
+			error = nfsv4_loadattr(nd, NULL, nap, NULL, NULL, 0,
+			    NULL, NULL, NULL, NULL, NULL, 0, NULL, leasep, NULL,
+			    NULL, NULL);
+		else
+			error = nfsm_loadattr(nd, nap);
+	} else
 		error = nd->nd_repstat;
 	mbuf_freem(nd->nd_mrep);
 	return (error);
@@ -973,7 +1105,7 @@
 		if (NFSHASNFSV4(nmp)) {
 			nfhp = VTONFS(vp)->n_fhp;
 			error = nfscl_getstateid(vp, nfhp->nfh_fh,
-			    nfhp->nfh_len, mode, cred, p, &stateid, &lckp);
+			    nfhp->nfh_len, mode, 0, cred, p, &stateid, &lckp);
 			if (error && vnode_vtype(vp) == VREG &&
 			    (mode == NFSV4OPEN_ACCESSWRITE ||
 			     nfstest_openallsetattr)) {
@@ -990,7 +1122,7 @@
 				if (!openerr)
 					(void) nfscl_getstateid(vp,
 					    nfhp->nfh_fh, nfhp->nfh_len,
-					    mode, cred, p, &stateid, &lckp);
+					    mode, 0, cred, p, &stateid, &lckp);
 			}
 		}
 		if (vap != NULL)
@@ -999,6 +1131,11 @@
 		else
 			error = nfsrpc_setaclrpc(vp, cred, p, aclp, &stateid,
 			    stuff);
+		if (error == NFSERR_OPENMODE && mode == NFSV4OPEN_ACCESSREAD) {
+			NFSLOCKMNT(nmp);
+			nmp->nm_state |= NFSSTA_OPENMODE;
+			NFSUNLOCKMNT(nmp);
+		}
 		if (error == NFSERR_STALESTATEID)
 			nfscl_initiate_recovery(nmp->nm_clp);
 		if (lckp != NULL)
@@ -1007,7 +1144,7 @@
 			(void) nfsrpc_close(vp, 0, p);
 		if (error == NFSERR_GRACE || error == NFSERR_STALESTATEID ||
 		    error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY ||
-		    error == NFSERR_OLDSTATEID) {
+		    error == NFSERR_OLDSTATEID || error == NFSERR_BADSESSION) {
 			(void) nfs_catnap(PZERO, error, "nfs_setattr");
 		} else if ((error == NFSERR_EXPIRED ||
 		    error == NFSERR_BADSTATEID) && clidrev != 0) {
@@ -1016,9 +1153,12 @@
 		retrycnt++;
 	} while (error == NFSERR_GRACE || error == NFSERR_STALESTATEID ||
 	    error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY ||
+	    error == NFSERR_BADSESSION ||
 	    (error == NFSERR_OLDSTATEID && retrycnt < 20) ||
 	    ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID) &&
-	     expireret == 0 && clidrev != 0 && retrycnt < 4));
+	     expireret == 0 && clidrev != 0 && retrycnt < 4) ||
+	    (error == NFSERR_OPENMODE && mode == NFSV4OPEN_ACCESSREAD &&
+	     retrycnt < 4));
 	if (error && retrycnt >= 4)
 		error = EIO;
 	return (error);
@@ -1054,7 +1194,7 @@
 		return (error);
 	if (nd->nd_flag & (ND_NFSV3 | ND_NFSV4))
 		error = nfscl_wcc_data(nd, vp, rnap, attrflagp, NULL, stuff);
-	if ((nd->nd_flag & ND_NFSV4) && !error)
+	if ((nd->nd_flag & (ND_NFSV4 | ND_NOMOREDATA)) == ND_NFSV4 && !error)
 		error = nfsrv_getattrbits(nd, &attrbits, NULL, NULL);
 	if (!(nd->nd_flag & ND_NFSV3) && !nd->nd_repstat && !error)
 		error = nfscl_postop_attr(nd, rnap, attrflagp, stuff);
@@ -1135,14 +1275,23 @@
 		}
 		if (nd->nd_flag & ND_NFSV3)
 		    error = nfscl_postop_attr(nd, dnap, dattrflagp, stuff);
+		else if ((nd->nd_flag & (ND_NFSV4 | ND_NOMOREDATA)) ==
+		    ND_NFSV4) {
+			/* Load the directory attributes. */
+			error = nfsm_loadattr(nd, dnap);
+			if (error == 0)
+				*dattrflagp = 1;
+		}
 		goto nfsmout;
 	}
 	if ((nd->nd_flag & (ND_NFSV4 | ND_NOMOREDATA)) == ND_NFSV4) {
-		NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
-		if (*(tl + 1)) {
-			nd->nd_flag |= ND_NOMOREDATA;
+		/* Load the directory attributes. */
+		error = nfsm_loadattr(nd, dnap);
+		if (error != 0)
 			goto nfsmout;
-		}
+		*dattrflagp = 1;
+		/* Skip over the Lookup and GetFH operation status values. */
+		NFSM_DISSECT(tl, u_int32_t *, 4 * NFSX_UNSIGNED);
 	}
 	error = nfsm_getfh(nd, nfhpp);
 	if (error)
@@ -1242,9 +1391,15 @@
 		lckp = NULL;
 		if (NFSHASNFSV4(nmp))
 			(void)nfscl_getstateid(vp, nfhp->nfh_fh, nfhp->nfh_len,
-			    NFSV4OPEN_ACCESSREAD, newcred, p, &stateid, &lckp);
+			    NFSV4OPEN_ACCESSREAD, 0, newcred, p, &stateid,
+			    &lckp);
 		error = nfsrpc_readrpc(vp, uiop, newcred, &stateid, p, nap,
 		    attrflagp, stuff);
+		if (error == NFSERR_OPENMODE) {
+			NFSLOCKMNT(nmp);
+			nmp->nm_state |= NFSSTA_OPENMODE;
+			NFSUNLOCKMNT(nmp);
+		}
 		if (error == NFSERR_STALESTATEID)
 			nfscl_initiate_recovery(nmp->nm_clp);
 		if (lckp != NULL)
@@ -1251,7 +1406,7 @@
 			nfscl_lockderef(lckp);
 		if (error == NFSERR_GRACE || error == NFSERR_STALESTATEID ||
 		    error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY ||
-		    error == NFSERR_OLDSTATEID) {
+		    error == NFSERR_OLDSTATEID || error == NFSERR_BADSESSION) {
 			(void) nfs_catnap(PZERO, error, "nfs_read");
 		} else if ((error == NFSERR_EXPIRED ||
 		    error == NFSERR_BADSTATEID) && clidrev != 0) {
@@ -1260,9 +1415,11 @@
 		retrycnt++;
 	} while (error == NFSERR_GRACE || error == NFSERR_STALESTATEID ||
 	    error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY ||
+	    error == NFSERR_BADSESSION ||
 	    (error == NFSERR_OLDSTATEID && retrycnt < 20) ||
 	    ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID) &&
-	     expireret == 0 && clidrev != 0 && retrycnt < 4));
+	     expireret == 0 && clidrev != 0 && retrycnt < 4) ||
+	    (error == NFSERR_OPENMODE && retrycnt < 4));
 	if (error && retrycnt >= 4)
 		error = EIO;
 	if (NFSHASNFSV4(nmp))
@@ -1395,7 +1552,8 @@
 		nostateid = 0;
 		if (NFSHASNFSV4(nmp)) {
 			(void)nfscl_getstateid(vp, nfhp->nfh_fh, nfhp->nfh_len,
-			    NFSV4OPEN_ACCESSWRITE, newcred, p, &stateid, &lckp);
+			    NFSV4OPEN_ACCESSWRITE, 0, newcred, p, &stateid,
+			    &lckp);
 			if (stateid.other[0] == 0 && stateid.other[1] == 0 &&
 			    stateid.other[2] == 0) {
 				nostateid = 1;
@@ -1419,7 +1577,7 @@
 			nfscl_lockderef(lckp);
 		if (error == NFSERR_GRACE || error == NFSERR_STALESTATEID ||
 		    error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY ||
-		    error == NFSERR_OLDSTATEID) {
+		    error == NFSERR_OLDSTATEID || error == NFSERR_BADSESSION) {
 			(void) nfs_catnap(PZERO, error, "nfs_write");
 		} else if ((error == NFSERR_EXPIRED ||
 		    error == NFSERR_BADSTATEID) && clidrev != 0) {
@@ -1427,13 +1585,13 @@
 		}
 		retrycnt++;
 	} while (error == NFSERR_GRACE || error == NFSERR_DELAY ||
-	    ((error == NFSERR_STALESTATEID ||
+	    ((error == NFSERR_STALESTATEID || error == NFSERR_BADSESSION ||
 	      error == NFSERR_STALEDONTRECOVER) && called_from_strategy == 0) ||
 	    (error == NFSERR_OLDSTATEID && retrycnt < 20) ||
 	    ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID) &&
 	     expireret == 0 && clidrev != 0 && retrycnt < 4));
 	if (error != 0 && (retrycnt >= 4 ||
-	    ((error == NFSERR_STALESTATEID ||
+	    ((error == NFSERR_STALESTATEID || error == NFSERR_BADSESSION ||
 	      error == NFSERR_STALEDONTRECOVER) && called_from_strategy != 0)))
 		error = EIO;
 	if (NFSHASNFSV4(nmp))
@@ -1609,7 +1767,7 @@
 		}
 		if (error)
 			goto nfsmout;
-		NFSWRITERPC_SETTIME(wccflag, np, (nd->nd_flag & ND_NFSV4));
+		NFSWRITERPC_SETTIME(wccflag, np, nap, (nd->nd_flag & ND_NFSV4));
 		mbuf_freem(nd->nd_mrep);
 		nd->nd_mrep = NULL;
 		tsiz -= len;
@@ -1745,9 +1903,10 @@
 		if (dp != NULL)
 			(void) nfscl_deleg(nmp->nm_mountp, owp->nfsow_clp,
 			    (*nfhpp)->nfh_fh, (*nfhpp)->nfh_len, cred, p, &dp);
-		nfscl_ownerrelease(owp, error, newone, unlocked);
+		nfscl_ownerrelease(nmp, owp, error, newone, unlocked);
 		if (error == NFSERR_GRACE || error == NFSERR_STALECLIENTID ||
-		    error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY) {
+		    error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY ||
+		    error == NFSERR_BADSESSION) {
 			(void) nfs_catnap(PZERO, error, "nfs_open");
 		} else if ((error == NFSERR_EXPIRED ||
 		    error == NFSERR_BADSTATEID) && clidrev != 0) {
@@ -1756,6 +1915,7 @@
 		}
 	    } while (error == NFSERR_GRACE || error == NFSERR_STALECLIENTID ||
 		error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY ||
+		error == NFSERR_BADSESSION ||
 		((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID) &&
 		 expireret == 0 && clidrev != 0 && retrycnt < 4));
 	    if (error && retrycnt >= 4)
@@ -1836,7 +1996,11 @@
 	nfsattrbit_t attrbits;
 	nfsv4stateid_t stateid;
 	u_int32_t rflags;
+	struct nfsmount *nmp;
+	struct nfsclsession *tsep;
 
+	nmp = VFSTONFS(dvp->v_mount);
+	np = VTONFS(dvp);
 	*unlockedp = 0;
 	*nfhpp = NULL;
 	*dpp = NULL;
@@ -1853,16 +2017,33 @@
 	*tl++ = txdr_unsigned(NFSV4OPEN_ACCESSWRITE |
 	    NFSV4OPEN_ACCESSREAD);
 	*tl++ = txdr_unsigned(NFSV4OPEN_DENYNONE);
-	*tl++ = owp->nfsow_clp->nfsc_clientid.lval[0];
-	*tl = owp->nfsow_clp->nfsc_clientid.lval[1];
+	tsep = nfsmnt_mdssession(nmp);
+	*tl++ = tsep->nfsess_clientid.lval[0];
+	*tl = tsep->nfsess_clientid.lval[1];
 	(void) nfsm_strtom(nd, owp->nfsow_owner, NFSV4CL_LOCKNAMELEN);
 	NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
 	*tl++ = txdr_unsigned(NFSV4OPEN_CREATE);
 	if (fmode & O_EXCL) {
-		*tl = txdr_unsigned(NFSCREATE_EXCLUSIVE);
-		NFSM_BUILD(tl, u_int32_t *, NFSX_VERF);
-		*tl++ = cverf.lval[0];
-		*tl = cverf.lval[1];
+		if (NFSHASNFSV4N(nmp)) {
+			if (NFSHASSESSPERSIST(nmp)) {
+				/* Use GUARDED for persistent sessions. */
+				*tl = txdr_unsigned(NFSCREATE_GUARDED);
+				nfscl_fillsattr(nd, vap, dvp, 0, 0);
+			} else {
+				/* Otherwise, use EXCLUSIVE4_1. */
+				*tl = txdr_unsigned(NFSCREATE_EXCLUSIVE41);
+				NFSM_BUILD(tl, u_int32_t *, NFSX_VERF);
+				*tl++ = cverf.lval[0];
+				*tl = cverf.lval[1];
+				nfscl_fillsattr(nd, vap, dvp, 0, 0);
+			}
+		} else {
+			/* NFSv4.0 */
+			*tl = txdr_unsigned(NFSCREATE_EXCLUSIVE);
+			NFSM_BUILD(tl, u_int32_t *, NFSX_VERF);
+			*tl++ = cverf.lval[0];
+			*tl = cverf.lval[1];
+		}
 	} else {
 		*tl = txdr_unsigned(NFSCREATE_UNCHECKED);
 		nfscl_fillsattr(nd, vap, dvp, 0, 0);
@@ -1870,17 +2051,22 @@
 	NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
 	*tl = txdr_unsigned(NFSV4OPEN_CLAIMNULL);
 	(void) nfsm_strtom(nd, name, namelen);
+	/* Get the new file's handle and attributes. */
 	NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
 	*tl++ = txdr_unsigned(NFSV4OP_GETFH);
 	*tl = txdr_unsigned(NFSV4OP_GETATTR);
 	NFSGETATTR_ATTRBIT(&attrbits);
 	(void) nfsrv_putattrbit(nd, &attrbits);
+	/* Get the directory's post-op attributes. */
+	NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
+	*tl = txdr_unsigned(NFSV4OP_PUTFH);
+	(void) nfsm_fhtom(nd, np->n_fhp->nfh_fh, np->n_fhp->nfh_len, 0);
+	NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
+	*tl = txdr_unsigned(NFSV4OP_GETATTR);
+	(void) nfsrv_putattrbit(nd, &attrbits);
 	error = nfscl_request(nd, dvp, p, cred, dstuff);
 	if (error)
 		return (error);
-	error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp, NULL, dstuff);
-	if (error)
-		goto nfsmout;
 	NFSCL_INCRSEQID(owp->nfsow_seqid, nd);
 	if (nd->nd_repstat == 0) {
 		NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID +
@@ -1952,6 +2138,13 @@
 		error = nfscl_mtofh(nd, nfhpp, nnap, attrflagp);
 		if (error)
 			goto nfsmout;
+		/* Get rid of the PutFH and Getattr status values. */
+		NFSM_DISSECT(tl, u_int32_t *, 4 * NFSX_UNSIGNED);
+		/* Load the directory attributes. */
+		error = nfsm_loadattr(nd, dnap);
+		if (error)
+			goto nfsmout;
+		*dattrflagp = 1;
 		if (dp != NULL && *attrflagp) {
 			dp->nfsdl_change = nnap->na_filerev;
 			dp->nfsdl_modtime = nnap->na_mtime;
@@ -1995,7 +2188,6 @@
 		if ((rflags & NFSV4OPEN_RESULTCONFIRM) &&
 		    (owp->nfsow_clp->nfsc_flags & NFSCLFLAGS_GOTDELEG) &&
 		    !error && dp == NULL) {
-		    np = VTONFS(dvp);
 		    do {
 			ret = nfsrpc_openrpc(VFSTONFS(vnode_mount(dvp)), dvp,
 			    np->n_fhp->nfh_fh, np->n_fhp->nfh_len,
@@ -2006,14 +2198,17 @@
 			    (void) nfs_catnap(PZERO, ret, "nfs_crt2");
 		    } while (ret == NFSERR_DELAY);
 		    if (ret) {
-			if (dp != NULL)
+			if (dp != NULL) {
 				FREE((caddr_t)dp, M_NFSCLDELEG);
+				dp = NULL;
+			}
 			if (ret == NFSERR_STALECLIENTID ||
-			    ret == NFSERR_STALEDONTRECOVER)
+			    ret == NFSERR_STALEDONTRECOVER ||
+			    ret == NFSERR_BADSESSION)
 				error = ret;
 		    }
 		}
-		nfscl_openrelease(op, error, newone);
+		nfscl_openrelease(nmp, op, error, newone);
 		*unlockedp = 1;
 	}
 	if (nd->nd_repstat != 0 && error == 0)
@@ -2055,7 +2250,10 @@
 			NFSCL_REQSTART(nd, NFSPROC_RETDELEGREMOVE, vp);
 			NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID +
 			    NFSX_UNSIGNED);
-			*tl++ = dstateid.seqid;
+			if (NFSHASNFSV4N(nmp))
+				*tl++ = 0;
+			else
+				*tl++ = dstateid.seqid;
 			*tl++ = dstateid.other[0];
 			*tl++ = dstateid.other[1];
 			*tl++ = dstateid.other[2];
@@ -2138,7 +2336,10 @@
 		}
 		if (gotfd) {
 			NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID);
-			*tl++ = fdstateid.seqid;
+			if (NFSHASNFSV4N(nmp))
+				*tl++ = 0;
+			else
+				*tl++ = fdstateid.seqid;
 			*tl++ = fdstateid.other[0];
 			*tl++ = fdstateid.other[1];
 			*tl = fdstateid.other[2];
@@ -2154,7 +2355,10 @@
 		}
 		if (gottd) {
 			NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID);
-			*tl++ = tdstateid.seqid;
+			if (NFSHASNFSV4N(nmp))
+				*tl++ = 0;
+			else
+				*tl++ = tdstateid.seqid;
 			*tl++ = tdstateid.other[0];
 			*tl++ = tdstateid.other[1];
 			*tl = tdstateid.other[2];
@@ -2369,8 +2573,12 @@
 	mbuf_freem(nd->nd_mrep);
 	/*
 	 * Kludge: Map EEXIST => 0 assuming that it is a reply to a retry.
+	 * Only do this if vfs.nfs.ignore_eexist is set.
+	 * Never do this for NFSv4.1 or later minor versions, since sessions
+	 * should guarantee "exactly once" RPC semantics.
 	 */
-	if (error == EEXIST)
+	if (error == EEXIST && nfsignore_eexist != 0 && (!NFSHASNFSV4(nmp) ||
+	    nmp->nm_minorvers == 0))
 		error = 0;
 	return (error);
 }
@@ -2388,10 +2596,14 @@
 	struct nfsrv_descript nfsd, *nd = &nfsd;
 	nfsattrbit_t attrbits;
 	int error = 0;
+	struct nfsfh *fhp;
+	struct nfsmount *nmp;
 
 	*nfhpp = NULL;
 	*attrflagp = 0;
 	*dattrflagp = 0;
+	nmp = VFSTONFS(vnode_mount(dvp));
+	fhp = VTONFS(dvp)->n_fhp;
 	if (namelen > NFS_MAXNAMLEN)
 		return (ENAMETOOLONG);
 	NFSCL_REQSTART(nd, NFSPROC_MKDIR, dvp);
@@ -2407,6 +2619,12 @@
 		*tl++ = txdr_unsigned(NFSV4OP_GETFH);
 		*tl = txdr_unsigned(NFSV4OP_GETATTR);
 		(void) nfsrv_putattrbit(nd, &attrbits);
+		NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
+		*tl = txdr_unsigned(NFSV4OP_PUTFH);
+		(void) nfsm_fhtom(nd, fhp->nfh_fh, fhp->nfh_len, 0);
+		NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
+		*tl = txdr_unsigned(NFSV4OP_GETATTR);
+		(void) nfsrv_putattrbit(nd, &attrbits);
 	}
 	error = nfscl_request(nd, dvp, p, cred, dstuff);
 	if (error)
@@ -2420,6 +2638,14 @@
 		}
 		if (!error)
 			error = nfscl_mtofh(nd, nfhpp, nnap, attrflagp);
+		if (error == 0 && (nd->nd_flag & ND_NFSV4) != 0) {
+			/* Get rid of the PutFH and Getattr status values. */
+			NFSM_DISSECT(tl, u_int32_t *, 4 * NFSX_UNSIGNED);
+			/* Load the directory attributes. */
+			error = nfsm_loadattr(nd, dnap);
+			if (error == 0)
+				*dattrflagp = 1;
+		}
 	}
 	if ((nd->nd_flag & ND_NFSV3) && !error)
 		error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp, NULL, dstuff);
@@ -2428,9 +2654,13 @@
 nfsmout:
 	mbuf_freem(nd->nd_mrep);
 	/*
-	 * Kludge: Map EEXIST => 0 assuming that you have a reply to a retry.
+	 * Kludge: Map EEXIST => 0 assuming that it is a reply to a retry.
+	 * Only do this if vfs.nfs.ignore_eexist is set.
+	 * Never do this for NFSv4.1 or later minor versions, since sessions
+	 * should guarantee "exactly once" RPC semantics.
 	 */
-	if (error == EEXIST)
+	if (error == EEXIST && nfsignore_eexist != 0 && (!NFSHASNFSV4(nmp) ||
+	    nmp->nm_minorvers == 0))
 		error = 0;
 	return (error);
 }
@@ -2566,14 +2796,6 @@
 		 * Joy, oh joy. For V4 we get to hand craft '.' and '..'.
 		 */
 		if (uiop->uio_offset == 0) {
-#if defined(__FreeBSD_version) && __FreeBSD_version >= 800000
-			error = VOP_GETATTR(vp, &nfsva.na_vattr, cred);
-#else
-			error = VOP_GETATTR(vp, &nfsva.na_vattr, cred, p);
-#endif
-			if (error)
-			    return (error);
-			dotfileid = nfsva.na_fileid;
 			NFSCL_REQSTART(nd, NFSPROC_LOOKUPP, vp);
 			NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
 			*tl++ = txdr_unsigned(NFSV4OP_GETFH);
@@ -2582,9 +2804,16 @@
 			error = nfscl_request(nd, vp, p, cred, stuff);
 			if (error)
 			    return (error);
+			dotfileid = 0;	/* Fake out the compiler. */
+			if ((nd->nd_flag & ND_NOMOREDATA) == 0) {
+			    error = nfsm_loadattr(nd, &nfsva);
+			    if (error != 0)
+				goto nfsmout;
+			    dotfileid = nfsva.na_fileid;
+			}
 			if (nd->nd_repstat == 0) {
-			    NFSM_DISSECT(tl, u_int32_t *, 3*NFSX_UNSIGNED);
-			    len = fxdr_unsigned(int, *(tl + 2));
+			    NFSM_DISSECT(tl, u_int32_t *, 5 * NFSX_UNSIGNED);
+			    len = fxdr_unsigned(int, *(tl + 4));
 			    if (len > 0 && len <= NFSX_V4FHMAX)
 				error = nfsm_advance(nd, NFSM_RNDUP(len), -1);
 			    else
@@ -2993,15 +3222,6 @@
 		 * Joy, oh joy. For V4 we get to hand craft '.' and '..'.
 		 */
 		if (uiop->uio_offset == 0) {
-#if defined(__FreeBSD_version) && __FreeBSD_version >= 800000
-			error = VOP_GETATTR(vp, &nfsva.na_vattr, cred);
-#else
-			error = VOP_GETATTR(vp, &nfsva.na_vattr, cred, p);
-#endif
-			if (error)
-			    return (error);
-			dctime = nfsva.na_ctime;
-			dotfileid = nfsva.na_fileid;
 			NFSCL_REQSTART(nd, NFSPROC_LOOKUPP, vp);
 			NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
 			*tl++ = txdr_unsigned(NFSV4OP_GETFH);
@@ -3010,9 +3230,17 @@
 			error = nfscl_request(nd, vp, p, cred, stuff);
 			if (error)
 			    return (error);
+			dotfileid = 0;	/* Fake out the compiler. */
+			if ((nd->nd_flag & ND_NOMOREDATA) == 0) {
+			    error = nfsm_loadattr(nd, &nfsva);
+			    if (error != 0)
+				goto nfsmout;
+			    dctime = nfsva.na_ctime;
+			    dotfileid = nfsva.na_fileid;
+			}
 			if (nd->nd_repstat == 0) {
-			    NFSM_DISSECT(tl, u_int32_t *, 3*NFSX_UNSIGNED);
-			    len = fxdr_unsigned(int, *(tl + 2));
+			    NFSM_DISSECT(tl, u_int32_t *, 5 * NFSX_UNSIGNED);
+			    len = fxdr_unsigned(int, *(tl + 4));
 			    if (len > 0 && len <= NFSX_V4FHMAX)
 				error = nfsm_advance(nd, NFSM_RNDUP(len), -1);
 			    else
@@ -3421,13 +3649,13 @@
  */
 APPLESTATIC int
 nfsrpc_commit(vnode_t vp, u_quad_t offset, int cnt, struct ucred *cred,
-    NFSPROC_T *p, u_char *verfp, struct nfsvattr *nap, int *attrflagp,
-    void *stuff)
+    NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp, void *stuff)
 {
 	u_int32_t *tl;
 	struct nfsrv_descript nfsd, *nd = &nfsd;
 	nfsattrbit_t attrbits;
 	int error;
+	struct nfsmount *nmp = VFSTONFS(vnode_mount(vp));
 	
 	*attrflagp = 0;
 	NFSCL_REQSTART(nd, NFSPROC_COMMIT, vp);
@@ -3450,7 +3678,12 @@
 	error = nfscl_wcc_data(nd, vp, nap, attrflagp, NULL, stuff);
 	if (!error && !nd->nd_repstat) {
 		NFSM_DISSECT(tl, u_int32_t *, NFSX_VERF);
-		NFSBCOPY((caddr_t)tl, verfp, NFSX_VERF);
+		NFSLOCKMNT(nmp);
+		if (NFSBCMP(nmp->nm_verf, tl, NFSX_VERF)) {
+			NFSBCOPY(tl, nmp->nm_verf, NFSX_VERF);
+			nd->nd_repstat = NFSERR_STALEWRITEVERF;
+		}
+		NFSUNLOCKMNT(nmp);
 		if (nd->nd_flag & ND_NFSV4)
 			error = nfscl_postop_attr(nd, nap, attrflagp, stuff);
 	}
@@ -3516,7 +3749,7 @@
 	do {
 	    nd->nd_repstat = 0;
 	    if (op == F_GETLK) {
-		error = nfscl_getcl(vp, cred, p, &clp);
+		error = nfscl_getcl(vnode_mount(vp), cred, p, 1, &clp);
 		if (error)
 			return (error);
 		error = nfscl_lockt(vp, clp, off, len, fl, p, id, flags);
@@ -3533,7 +3766,7 @@
 		 * We must loop around for all lockowner cases.
 		 */
 		callcnt = 0;
-		error = nfscl_getcl(vp, cred, p, &clp);
+		error = nfscl_getcl(vnode_mount(vp), cred, p, 1, &clp);
 		if (error)
 			return (error);
 		do {
@@ -3610,7 +3843,8 @@
 	        error = nd->nd_repstat;
 	    if (error == NFSERR_GRACE || error == NFSERR_STALESTATEID ||
 		error == NFSERR_STALEDONTRECOVER ||
-		error == NFSERR_STALECLIENTID || error == NFSERR_DELAY) {
+		error == NFSERR_STALECLIENTID || error == NFSERR_DELAY ||
+		error == NFSERR_BADSESSION) {
 		(void) nfs_catnap(PZERO, error, "nfs_advlock");
 	    } else if ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID)
 		&& clidrev != 0) {
@@ -3620,6 +3854,7 @@
 	} while (error == NFSERR_GRACE ||
 	    error == NFSERR_STALECLIENTID || error == NFSERR_DELAY ||
 	    error == NFSERR_STALEDONTRECOVER || error == NFSERR_STALESTATEID ||
+	    error == NFSERR_BADSESSION ||
 	    ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID) &&
 	     expireret == 0 && clidrev != 0 && retrycnt < 4));
 	if (error && retrycnt >= 4)
@@ -3639,7 +3874,10 @@
 	int error, type, size;
 	uint8_t own[NFSV4CL_LOCKNAMELEN + NFSX_V4FHMAX];
 	struct nfsnode *np;
+	struct nfsmount *nmp;
+	struct nfsclsession *tsep;
 
+	nmp = VFSTONFS(vp->v_mount);
 	NFSCL_REQSTART(nd, NFSPROC_LOCKT, vp);
 	NFSM_BUILD(tl, u_int32_t *, 7 * NFSX_UNSIGNED);
 	if (fl->l_type == F_RDLCK)
@@ -3650,8 +3888,9 @@
 	tl += 2;
 	txdr_hyper(len, tl);
 	tl += 2;
-	*tl++ = clp->nfsc_clientid.lval[0];
-	*tl = clp->nfsc_clientid.lval[1];
+	tsep = nfsmnt_mdssession(nmp);
+	*tl++ = tsep->nfsess_clientid.lval[0];
+	*tl = tsep->nfsess_clientid.lval[1];
 	nfscl_filllockowner(id, own, flags);
 	np = VTONFS(vp);
 	NFSBCOPY(np->n_fhp->nfh_fh, &own[NFSV4CL_LOCKNAMELEN],
@@ -3710,7 +3949,7 @@
 	int error;
 
 	nfscl_reqstart(nd, NFSPROC_LOCKU, nmp, lp->nfsl_open->nfso_fh,
-	    lp->nfsl_open->nfso_fhlen, NULL);
+	    lp->nfsl_open->nfso_fhlen, NULL, NULL);
 	NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID + 6 * NFSX_UNSIGNED);
 	*tl++ = txdr_unsigned(type);
 	*tl = txdr_unsigned(lp->nfsl_seqid);
@@ -3718,7 +3957,10 @@
 	    (arc4random() % nfstest_outofseq) == 0)
 		*tl = txdr_unsigned(lp->nfsl_seqid + 1);
 	tl++;
-	*tl++ = lp->nfsl_stateid.seqid;
+	if (NFSHASNFSV4N(nmp))
+		*tl++ = 0;
+	else
+		*tl++ = lp->nfsl_stateid.seqid;
 	*tl++ = lp->nfsl_stateid.other[0];
 	*tl++ = lp->nfsl_stateid.other[1];
 	*tl++ = lp->nfsl_stateid.other[2];
@@ -3728,7 +3970,7 @@
 	if (syscred)
 		nd->nd_flag |= ND_USEGSSNAME;
 	error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
-	    NFS_PROG, NFS_VER4, NULL, 1, NULL);
+	    NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
 	NFSCL_INCRSEQID(lp->nfsl_seqid, nd);
 	if (error)
 		return (error);
@@ -3757,8 +3999,9 @@
 	u_int32_t *tl;
 	int error, size;
 	uint8_t own[NFSV4CL_LOCKNAMELEN + NFSX_V4FHMAX];
+	struct nfsclsession *tsep;
 
-	nfscl_reqstart(nd, NFSPROC_LOCK, nmp, nfhp, fhlen, NULL);
+	nfscl_reqstart(nd, NFSPROC_LOCK, nmp, nfhp, fhlen, NULL, NULL);
 	NFSM_BUILD(tl, u_int32_t *, 7 * NFSX_UNSIGNED);
 	if (type == F_RDLCK)
 		*tl++ = txdr_unsigned(NFSV4LOCKT_READ);
@@ -3774,13 +4017,17 @@
 	    NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID +
 		2 * NFSX_UNSIGNED + NFSX_HYPER);
 	    *tl++ = txdr_unsigned(lp->nfsl_open->nfso_own->nfsow_seqid);
-	    *tl++ = lp->nfsl_open->nfso_stateid.seqid;
+	    if (NFSHASNFSV4N(nmp))
+		*tl++ = 0;
+	    else
+		*tl++ = lp->nfsl_open->nfso_stateid.seqid;
 	    *tl++ = lp->nfsl_open->nfso_stateid.other[0];
 	    *tl++ = lp->nfsl_open->nfso_stateid.other[1];
 	    *tl++ = lp->nfsl_open->nfso_stateid.other[2];
 	    *tl++ = txdr_unsigned(lp->nfsl_seqid);
-	    *tl++ = lp->nfsl_open->nfso_own->nfsow_clp->nfsc_clientid.lval[0];
-	    *tl = lp->nfsl_open->nfso_own->nfsow_clp->nfsc_clientid.lval[1];
+	    tsep = nfsmnt_mdssession(nmp);
+	    *tl++ = tsep->nfsess_clientid.lval[0];
+	    *tl = tsep->nfsess_clientid.lval[1];
 	    NFSBCOPY(lp->nfsl_owner, own, NFSV4CL_LOCKNAMELEN);
 	    NFSBCOPY(nfhp, &own[NFSV4CL_LOCKNAMELEN], fhlen);
 	    (void)nfsm_strtom(nd, own, NFSV4CL_LOCKNAMELEN + fhlen);
@@ -3787,7 +4034,10 @@
 	} else {
 	    *tl = newnfs_false;
 	    NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID + NFSX_UNSIGNED);
-	    *tl++ = lp->nfsl_stateid.seqid;
+	    if (NFSHASNFSV4N(nmp))
+		*tl++ = 0;
+	    else
+		*tl++ = lp->nfsl_stateid.seqid;
 	    *tl++ = lp->nfsl_stateid.other[0];
 	    *tl++ = lp->nfsl_stateid.other[1];
 	    *tl++ = lp->nfsl_stateid.other[2];
@@ -3799,7 +4049,7 @@
 	if (syscred)
 		nd->nd_flag |= ND_USEGSSNAME;
 	error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, vp, p, cred,
-	    NFS_PROG, NFS_VER4, NULL, 1, NULL);
+	    NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
 	if (error)
 		return (error);
 	if (newone)
@@ -4009,7 +4259,8 @@
  * This function performs the Renew RPC.
  */
 APPLESTATIC int
-nfsrpc_renew(struct nfsclclient *clp, struct ucred *cred, NFSPROC_T *p)
+nfsrpc_renew(struct nfsclclient *clp, struct nfsclds *dsp, struct ucred *cred,
+    NFSPROC_T *p)
 {
 	u_int32_t *tl;
 	struct nfsrv_descript nfsd;
@@ -4016,17 +4267,37 @@
 	struct nfsrv_descript *nd = &nfsd;
 	struct nfsmount *nmp;
 	int error;
+	struct nfssockreq *nrp;
+	struct nfsclsession *tsep;
 
 	nmp = clp->nfsc_nmp;
 	if (nmp == NULL)
 		return (0);
-	nfscl_reqstart(nd, NFSPROC_RENEW, nmp, NULL, 0, NULL);
-	NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
-	*tl++ = clp->nfsc_clientid.lval[0];
-	*tl = clp->nfsc_clientid.lval[1];
+	if (dsp == NULL)
+		nfscl_reqstart(nd, NFSPROC_RENEW, nmp, NULL, 0, NULL, NULL);
+	else
+		nfscl_reqstart(nd, NFSPROC_RENEW, nmp, NULL, 0, NULL,
+		    &dsp->nfsclds_sess);
+	if (!NFSHASNFSV4N(nmp)) {
+		/* NFSv4.1 just uses a Sequence Op and not a Renew. */
+		NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
+		tsep = nfsmnt_mdssession(nmp);
+		*tl++ = tsep->nfsess_clientid.lval[0];
+		*tl = tsep->nfsess_clientid.lval[1];
+	}
+	nrp = NULL;
+	if (dsp != NULL)
+		nrp = dsp->nfsclds_sockp;
+	if (nrp == NULL)
+		/* If NULL, use the MDS socket. */
+		nrp = &nmp->nm_sockreq;
 	nd->nd_flag |= ND_USEGSSNAME;
-	error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
-		NFS_PROG, NFS_VER4, NULL, 1, NULL);
+	if (dsp == NULL)
+		error = newnfs_request(nd, nmp, NULL, nrp, NULL, p, cred,
+		    NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
+	else
+		error = newnfs_request(nd, nmp, NULL, nrp, NULL, p, cred,
+		    NFS_PROG, NFS_VER4, NULL, 1, NULL, &dsp->nfsclds_sess);
 	if (error)
 		return (error);
 	error = nd->nd_repstat;
@@ -4045,17 +4316,27 @@
 	u_int32_t *tl;
 	int error;
 	uint8_t own[NFSV4CL_LOCKNAMELEN + NFSX_V4FHMAX];
+	struct nfsclsession *tsep;
 
-	nfscl_reqstart(nd, NFSPROC_RELEASELCKOWN, nmp, NULL, 0, NULL);
-	NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
-	*tl++ = nmp->nm_clp->nfsc_clientid.lval[0];
-	*tl = nmp->nm_clp->nfsc_clientid.lval[1];
-	NFSBCOPY(lp->nfsl_owner, own, NFSV4CL_LOCKNAMELEN);
-	NFSBCOPY(fh, &own[NFSV4CL_LOCKNAMELEN], fhlen);
-	(void)nfsm_strtom(nd, own, NFSV4CL_LOCKNAMELEN + fhlen);
+	if (NFSHASNFSV4N(nmp)) {
+		/* For NFSv4.1, do a FreeStateID. */
+		nfscl_reqstart(nd, NFSPROC_FREESTATEID, nmp, NULL, 0, NULL,
+		    NULL);
+		nfsm_stateidtom(nd, &lp->nfsl_stateid, NFSSTATEID_PUTSTATEID);
+	} else {
+		nfscl_reqstart(nd, NFSPROC_RELEASELCKOWN, nmp, NULL, 0, NULL,
+		    NULL);
+		NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
+		tsep = nfsmnt_mdssession(nmp);
+		*tl++ = tsep->nfsess_clientid.lval[0];
+		*tl = tsep->nfsess_clientid.lval[1];
+		NFSBCOPY(lp->nfsl_owner, own, NFSV4CL_LOCKNAMELEN);
+		NFSBCOPY(fh, &own[NFSV4CL_LOCKNAMELEN], fhlen);
+		(void)nfsm_strtom(nd, own, NFSV4CL_LOCKNAMELEN + fhlen);
+	}
 	nd->nd_flag |= ND_USEGSSNAME;
 	error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
-	    NFS_PROG, NFS_VER4, NULL, 1, NULL);
+	    NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
 	if (error)
 		return (error);
 	error = nd->nd_repstat;
@@ -4077,7 +4358,7 @@
 	int error, cnt, len, setnil;
 	u_int32_t *opcntp;
 
-	nfscl_reqstart(nd, NFSPROC_PUTROOTFH, nmp, NULL, 0, &opcntp);
+	nfscl_reqstart(nd, NFSPROC_PUTROOTFH, nmp, NULL, 0, &opcntp, NULL);
 	cp = dirpath;
 	cnt = 0;
 	do {
@@ -4101,12 +4382,16 @@
 			*cp2++ = '/';
 		cp = cp2;
 	} while (*cp != '\0');
-	*opcntp = txdr_unsigned(2 + cnt);
+	if (NFSHASNFSV4N(nmp))
+		/* Has a Sequence Op done by nfscl_reqstart(). */
+		*opcntp = txdr_unsigned(3 + cnt);
+	else
+		*opcntp = txdr_unsigned(2 + cnt);
 	NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
 	*tl = txdr_unsigned(NFSV4OP_GETFH);
 	nd->nd_flag |= ND_USEGSSNAME;
 	error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
-		NFS_PROG, NFS_VER4, NULL, 1, NULL);
+		NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
 	if (error)
 		return (error);
 	if (nd->nd_repstat == 0) {
@@ -4140,9 +4425,12 @@
 	int error;
 
 	nfscl_reqstart(nd, NFSPROC_DELEGRETURN, nmp, dp->nfsdl_fh,
-	    dp->nfsdl_fhlen, NULL);
+	    dp->nfsdl_fhlen, NULL, NULL);
 	NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID);
-	*tl++ = dp->nfsdl_stateid.seqid;
+	if (NFSHASNFSV4N(nmp))
+		*tl++ = 0;
+	else
+		*tl++ = dp->nfsdl_stateid.seqid;
 	*tl++ = dp->nfsdl_stateid.other[0];
 	*tl++ = dp->nfsdl_stateid.other[1];
 	*tl = dp->nfsdl_stateid.other[2];
@@ -4149,7 +4437,7 @@
 	if (syscred)
 		nd->nd_flag |= ND_USEGSSNAME;
 	error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
-	    NFS_PROG, NFS_VER4, NULL, 1, NULL);
+	    NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
 	if (error)
 		return (error);
 	error = nd->nd_repstat;
@@ -4230,3 +4518,1532 @@
 	mbuf_freem(nd->nd_mrep);
 	return (nd->nd_repstat);
 }
+
+/*
+ * Do the NFSv4.1 Exchange ID.
+ */
+int
+nfsrpc_exchangeid(struct nfsmount *nmp, struct nfsclclient *clp,
+    struct nfssockreq *nrp, uint32_t exchflags, struct nfsclds **dspp,
+    struct ucred *cred, NFSPROC_T *p)
+{
+	uint32_t *tl, v41flags;
+	struct nfsrv_descript nfsd;
+	struct nfsrv_descript *nd = &nfsd;
+	struct nfsclds *dsp;
+	struct timespec verstime;
+	int error, len;
+
+	*dspp = NULL;
+	nfscl_reqstart(nd, NFSPROC_EXCHANGEID, nmp, NULL, 0, NULL, NULL);
+	NFSM_BUILD(tl, uint32_t *, 2 * NFSX_UNSIGNED);
+	*tl++ = txdr_unsigned(nfsboottime.tv_sec);	/* Client owner */
+	*tl = txdr_unsigned(clp->nfsc_rev);
+	(void) nfsm_strtom(nd, clp->nfsc_id, clp->nfsc_idlen);
+
+	NFSM_BUILD(tl, uint32_t *, 3 * NFSX_UNSIGNED);
+	*tl++ = txdr_unsigned(exchflags);
+	*tl++ = txdr_unsigned(NFSV4EXCH_SP4NONE);
+
+	/* Set the implementation id4 */
+	*tl = txdr_unsigned(1);
+	(void) nfsm_strtom(nd, "freebsd.org", strlen("freebsd.org"));
+	(void) nfsm_strtom(nd, version, strlen(version));
+	NFSM_BUILD(tl, uint32_t *, NFSX_V4TIME);
+	verstime.tv_sec = 1293840000;		/* Jan 1, 2011 */
+	verstime.tv_nsec = 0;
+	txdr_nfsv4time(&verstime, tl);
+	nd->nd_flag |= ND_USEGSSNAME;
+	error = newnfs_request(nd, nmp, NULL, nrp, NULL, p, cred,
+	    NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
+	NFSCL_DEBUG(1, "exchangeid err=%d reps=%d\n", error,
+	    (int)nd->nd_repstat);
+	if (error != 0)
+		return (error);
+	if (nd->nd_repstat == 0) {
+		NFSM_DISSECT(tl, uint32_t *, 6 * NFSX_UNSIGNED + NFSX_HYPER);
+		len = fxdr_unsigned(int, *(tl + 7));
+		if (len < 0 || len > NFSV4_OPAQUELIMIT) {
+			error = NFSERR_BADXDR;
+			goto nfsmout;
+		}
+		dsp = malloc(sizeof(struct nfsclds) + len + 1, M_NFSCLDS,
+		    M_WAITOK | M_ZERO);
+		dsp->nfsclds_expire = NFSD_MONOSEC + clp->nfsc_renew;
+		dsp->nfsclds_servownlen = len;
+		dsp->nfsclds_sess.nfsess_clientid.lval[0] = *tl++;
+		dsp->nfsclds_sess.nfsess_clientid.lval[1] = *tl++;
+		dsp->nfsclds_sess.nfsess_sequenceid =
+		    fxdr_unsigned(uint32_t, *tl++);
+		v41flags = fxdr_unsigned(uint32_t, *tl);
+		if ((v41flags & NFSV4EXCH_USEPNFSMDS) != 0 &&
+		    NFSHASPNFSOPT(nmp)) {
+			NFSCL_DEBUG(1, "set PNFS\n");
+			NFSLOCKMNT(nmp);
+			nmp->nm_state |= NFSSTA_PNFS;
+			NFSUNLOCKMNT(nmp);
+			dsp->nfsclds_flags |= NFSCLDS_MDS;
+		}
+		if ((v41flags & NFSV4EXCH_USEPNFSDS) != 0)
+			dsp->nfsclds_flags |= NFSCLDS_DS;
+		if (len > 0)
+			nd->nd_repstat = nfsrv_mtostr(nd,
+			    dsp->nfsclds_serverown, len);
+		if (nd->nd_repstat == 0) {
+			mtx_init(&dsp->nfsclds_mtx, "nfsds", NULL, MTX_DEF);
+			mtx_init(&dsp->nfsclds_sess.nfsess_mtx, "nfssession",
+			    NULL, MTX_DEF);
+			nfscl_initsessionslots(&dsp->nfsclds_sess);
+			*dspp = dsp;
+		} else
+			free(dsp, M_NFSCLDS);
+	}
+	error = nd->nd_repstat;
+nfsmout:
+	mbuf_freem(nd->nd_mrep);
+	return (error);
+}
+
+/*
+ * Do the NFSv4.1 Create Session.
+ */
+int
+nfsrpc_createsession(struct nfsmount *nmp, struct nfsclsession *sep,
+    struct nfssockreq *nrp, uint32_t sequenceid, int mds, struct ucred *cred,
+    NFSPROC_T *p)
+{
+	uint32_t crflags, maxval, *tl;
+	struct nfsrv_descript nfsd;
+	struct nfsrv_descript *nd = &nfsd;
+	int error, irdcnt;
+
+	/* Make sure nm_rsize, nm_wsize is set. */
+	if (nmp->nm_rsize > NFS_MAXBSIZE || nmp->nm_rsize == 0)
+		nmp->nm_rsize = NFS_MAXBSIZE;
+	if (nmp->nm_wsize > NFS_MAXBSIZE || nmp->nm_wsize == 0)
+		nmp->nm_wsize = NFS_MAXBSIZE;
+	nfscl_reqstart(nd, NFSPROC_CREATESESSION, nmp, NULL, 0, NULL, NULL);
+	NFSM_BUILD(tl, uint32_t *, 4 * NFSX_UNSIGNED);
+	*tl++ = sep->nfsess_clientid.lval[0];
+	*tl++ = sep->nfsess_clientid.lval[1];
+	*tl++ = txdr_unsigned(sequenceid);
+	crflags = (NFSMNT_RDONLY(nmp->nm_mountp) ? 0 : NFSV4CRSESS_PERSIST);
+	if (nfscl_enablecallb != 0 && nfs_numnfscbd > 0 && mds != 0)
+		crflags |= NFSV4CRSESS_CONNBACKCHAN;
+	*tl = txdr_unsigned(crflags);
+
+	/* Fill in fore channel attributes. */
+	NFSM_BUILD(tl, uint32_t *, 7 * NFSX_UNSIGNED);
+	*tl++ = 0;				/* Header pad size */
+	*tl++ = txdr_unsigned(nmp->nm_wsize + NFS_MAXXDR);/* Max request size */
+	*tl++ = txdr_unsigned(nmp->nm_rsize + NFS_MAXXDR);/* Max reply size */
+	*tl++ = txdr_unsigned(4096);		/* Max response size cached */
+	*tl++ = txdr_unsigned(20);		/* Max operations */
+	*tl++ = txdr_unsigned(64);		/* Max slots */
+	*tl = 0;				/* No rdma ird */
+
+	/* Fill in back channel attributes. */
+	NFSM_BUILD(tl, uint32_t *, 7 * NFSX_UNSIGNED);
+	*tl++ = 0;				/* Header pad size */
+	*tl++ = txdr_unsigned(10000);		/* Max request size */
+	*tl++ = txdr_unsigned(10000);		/* Max response size */
+	*tl++ = txdr_unsigned(4096);		/* Max response size cached */
+	*tl++ = txdr_unsigned(4);		/* Max operations */
+	*tl++ = txdr_unsigned(NFSV4_CBSLOTS);	/* Max slots */
+	*tl = 0;				/* No rdma ird */
+
+	NFSM_BUILD(tl, uint32_t *, 8 * NFSX_UNSIGNED);
+	*tl++ = txdr_unsigned(NFS_CALLBCKPROG);	/* Call back prog # */
+
+	/* Allow AUTH_SYS callbacks as uid, gid == 0. */
+	*tl++ = txdr_unsigned(1);		/* Auth_sys only */
+	*tl++ = txdr_unsigned(AUTH_SYS);	/* AUTH_SYS type */
+	*tl++ = txdr_unsigned(nfsboottime.tv_sec); /* time stamp */
+	*tl++ = 0;				/* Null machine name */
+	*tl++ = 0;				/* Uid == 0 */
+	*tl++ = 0;				/* Gid == 0 */
+	*tl = 0;				/* No additional gids */
+	nd->nd_flag |= ND_USEGSSNAME;
+	error = newnfs_request(nd, nmp, NULL, nrp, NULL, p, cred, NFS_PROG,
+	    NFS_VER4, NULL, 1, NULL, NULL);
+	if (error != 0)
+		return (error);
+	if (nd->nd_repstat == 0) {
+		NFSM_DISSECT(tl, uint32_t *, NFSX_V4SESSIONID +
+		    2 * NFSX_UNSIGNED);
+		bcopy(tl, sep->nfsess_sessionid, NFSX_V4SESSIONID);
+		tl += NFSX_V4SESSIONID / NFSX_UNSIGNED;
+		sep->nfsess_sequenceid = fxdr_unsigned(uint32_t, *tl++);
+		crflags = fxdr_unsigned(uint32_t, *tl);
+		if ((crflags & NFSV4CRSESS_PERSIST) != 0 && mds != 0) {
+			NFSLOCKMNT(nmp);
+			nmp->nm_state |= NFSSTA_SESSPERSIST;
+			NFSUNLOCKMNT(nmp);
+		}
+
+		/* Get the fore channel slot count. */
+		NFSM_DISSECT(tl, uint32_t *, 7 * NFSX_UNSIGNED);
+		tl++;			/* Skip the header pad size. */
+
+		/* Make sure nm_wsize is small enough. */
+		maxval = fxdr_unsigned(uint32_t, *tl++);
+		while (maxval < nmp->nm_wsize + NFS_MAXXDR) {
+			if (nmp->nm_wsize > 8096)
+				nmp->nm_wsize /= 2;
+			else
+				break;
+		}
+
+		/* Make sure nm_rsize is small enough. */
+		maxval = fxdr_unsigned(uint32_t, *tl++);
+		while (maxval < nmp->nm_rsize + NFS_MAXXDR) {
+			if (nmp->nm_rsize > 8096)
+				nmp->nm_rsize /= 2;
+			else
+				break;
+		}
+
+		sep->nfsess_maxcache = fxdr_unsigned(int, *tl++);
+		tl++;
+		sep->nfsess_foreslots = fxdr_unsigned(uint16_t, *tl++);
+		NFSCL_DEBUG(4, "fore slots=%d\n", (int)sep->nfsess_foreslots);
+		irdcnt = fxdr_unsigned(int, *tl);
+		if (irdcnt > 0)
+			NFSM_DISSECT(tl, uint32_t *, irdcnt * NFSX_UNSIGNED);
+
+		/* and the back channel slot count. */
+		NFSM_DISSECT(tl, uint32_t *, 7 * NFSX_UNSIGNED);
+		tl += 5;
+		sep->nfsess_backslots = fxdr_unsigned(uint16_t, *tl);
+		NFSCL_DEBUG(4, "back slots=%d\n", (int)sep->nfsess_backslots);
+	}
+	error = nd->nd_repstat;
+nfsmout:
+	mbuf_freem(nd->nd_mrep);
+	return (error);
+}
+
+/*
+ * Do the NFSv4.1 Destroy Session.
+ */
+int
+nfsrpc_destroysession(struct nfsmount *nmp, struct nfsclclient *clp,
+    struct ucred *cred, NFSPROC_T *p)
+{
+	uint32_t *tl;
+	struct nfsrv_descript nfsd;
+	struct nfsrv_descript *nd = &nfsd;
+	int error;
+	struct nfsclsession *tsep;
+
+	nfscl_reqstart(nd, NFSPROC_DESTROYSESSION, nmp, NULL, 0, NULL, NULL);
+	NFSM_BUILD(tl, uint32_t *, NFSX_V4SESSIONID);
+	tsep = nfsmnt_mdssession(nmp);
+	bcopy(tsep->nfsess_sessionid, tl, NFSX_V4SESSIONID);
+	nd->nd_flag |= ND_USEGSSNAME;
+	error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
+	    NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
+	if (error != 0)
+		return (error);
+	error = nd->nd_repstat;
+	mbuf_freem(nd->nd_mrep);
+	return (error);
+}
+
+/*
+ * Do the NFSv4.1 Destroy Client.
+ */
+int
+nfsrpc_destroyclient(struct nfsmount *nmp, struct nfsclclient *clp,
+    struct ucred *cred, NFSPROC_T *p)
+{
+	uint32_t *tl;
+	struct nfsrv_descript nfsd;
+	struct nfsrv_descript *nd = &nfsd;
+	int error;
+	struct nfsclsession *tsep;
+
+	nfscl_reqstart(nd, NFSPROC_DESTROYCLIENT, nmp, NULL, 0, NULL, NULL);
+	NFSM_BUILD(tl, uint32_t *, 2 * NFSX_UNSIGNED);
+	tsep = nfsmnt_mdssession(nmp);
+	*tl++ = tsep->nfsess_clientid.lval[0];
+	*tl = tsep->nfsess_clientid.lval[1];
+	nd->nd_flag |= ND_USEGSSNAME;
+	error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
+	    NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
+	if (error != 0)
+		return (error);
+	error = nd->nd_repstat;
+	mbuf_freem(nd->nd_mrep);
+	return (error);
+}
+
+/*
+ * Do the NFSv4.1 LayoutGet.
+ */
+int
+nfsrpc_layoutget(struct nfsmount *nmp, uint8_t *fhp, int fhlen, int iomode,
+    uint64_t offset, uint64_t len, uint64_t minlen, int layoutlen,
+    nfsv4stateid_t *stateidp, int *retonclosep, struct nfsclflayouthead *flhp,
+    struct ucred *cred, NFSPROC_T *p, void *stuff)
+{
+	uint32_t *tl;
+	struct nfsrv_descript nfsd, *nd = &nfsd;
+	struct nfsfh *nfhp;
+	struct nfsclflayout *flp, *prevflp, *tflp;
+	int cnt, error, gotiomode, fhcnt, nfhlen, i, j;
+	uint8_t *cp;
+	uint64_t retlen;
+
+	flp = NULL;
+	gotiomode = -1;
+	nfscl_reqstart(nd, NFSPROC_LAYOUTGET, nmp, fhp, fhlen, NULL, NULL);
+	NFSM_BUILD(tl, uint32_t *, 4 * NFSX_UNSIGNED + 3 * NFSX_HYPER +
+	    NFSX_STATEID);
+	*tl++ = newnfs_false;		/* Don't signal availability. */
+	*tl++ = txdr_unsigned(NFSLAYOUT_NFSV4_1_FILES);
+	*tl++ = txdr_unsigned(iomode);
+	txdr_hyper(offset, tl);
+	tl += 2;
+	txdr_hyper(len, tl);
+	tl += 2;
+	txdr_hyper(minlen, tl);
+	tl += 2;
+	*tl++ = txdr_unsigned(stateidp->seqid);
+	NFSCL_DEBUG(4, "layget seq=%d\n", (int)stateidp->seqid);
+	*tl++ = stateidp->other[0];
+	*tl++ = stateidp->other[1];
+	*tl++ = stateidp->other[2];
+	*tl = txdr_unsigned(layoutlen);
+	nd->nd_flag |= ND_USEGSSNAME;
+	error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
+	    NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
+	if (error != 0)
+		return (error);
+	if (nd->nd_repstat == 0) {
+		NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED + NFSX_STATEID);
+		if (*tl++ != 0)
+			*retonclosep = 1;
+		else
+			*retonclosep = 0;
+		stateidp->seqid = fxdr_unsigned(uint32_t, *tl++);
+		NFSCL_DEBUG(4, "retoncls=%d stseq=%d\n", *retonclosep,
+		    (int)stateidp->seqid);
+		stateidp->other[0] = *tl++;
+		stateidp->other[1] = *tl++;
+		stateidp->other[2] = *tl++;
+		cnt = fxdr_unsigned(int, *tl);
+		NFSCL_DEBUG(4, "layg cnt=%d\n", cnt);
+		if (cnt <= 0 || cnt > 10000) {
+			/* Don't accept more than 10000 layouts in reply. */
+			error = NFSERR_BADXDR;
+			goto nfsmout;
+		}
+		for (i = 0; i < cnt; i++) {
+			/* Dissect all the way to the file handle cnt. */
+			NFSM_DISSECT(tl, uint32_t *, 3 * NFSX_HYPER +
+			    6 * NFSX_UNSIGNED + NFSX_V4DEVICEID);
+			fhcnt = fxdr_unsigned(int, *(tl + 11 +
+			    NFSX_V4DEVICEID / NFSX_UNSIGNED));
+			NFSCL_DEBUG(4, "fhcnt=%d\n", fhcnt);
+			if (fhcnt < 0 || fhcnt > 100) {
+				/* Don't accept more than 100 file handles. */
+				error = NFSERR_BADXDR;
+				goto nfsmout;
+			}
+			if (fhcnt > 1)
+				flp = malloc(sizeof(*flp) + (fhcnt - 1) *
+				    sizeof(struct nfsfh *),
+				    M_NFSFLAYOUT, M_WAITOK);
+			else
+				flp = malloc(sizeof(*flp),
+				    M_NFSFLAYOUT, M_WAITOK);
+			flp->nfsfl_flags = 0;
+			flp->nfsfl_fhcnt = 0;
+			flp->nfsfl_devp = NULL;
+			flp->nfsfl_off = fxdr_hyper(tl); tl += 2;
+			retlen = fxdr_hyper(tl); tl += 2;
+			if (flp->nfsfl_off + retlen < flp->nfsfl_off)
+				flp->nfsfl_end = UINT64_MAX - flp->nfsfl_off;
+			else
+				flp->nfsfl_end = flp->nfsfl_off + retlen;
+			flp->nfsfl_iomode = fxdr_unsigned(int, *tl++);
+			if (gotiomode == -1)
+				gotiomode = flp->nfsfl_iomode;
+			NFSCL_DEBUG(4, "layg reqiom=%d retiom=%d\n", iomode,
+			    (int)flp->nfsfl_iomode);
+			if (fxdr_unsigned(int, *tl++) !=
+			    NFSLAYOUT_NFSV4_1_FILES) {
+				printf("NFSv4.1: got non-files layout\n");
+				error = NFSERR_BADXDR;
+				goto nfsmout;
+			}
+			NFSBCOPY(++tl, flp->nfsfl_dev, NFSX_V4DEVICEID);
+			tl += (NFSX_V4DEVICEID / NFSX_UNSIGNED);
+			flp->nfsfl_util = fxdr_unsigned(uint32_t, *tl++);
+			NFSCL_DEBUG(4, "flutil=0x%x\n", flp->nfsfl_util);
+			flp->nfsfl_stripe1 = fxdr_unsigned(uint32_t, *tl++);
+			flp->nfsfl_patoff = fxdr_hyper(tl); tl += 2;
+			if (fxdr_unsigned(int, *tl) != fhcnt) {
+				printf("EEK! bad fhcnt\n");
+				error = NFSERR_BADXDR;
+				goto nfsmout;
+			}
+			for (j = 0; j < fhcnt; j++) {
+				NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
+				nfhlen = fxdr_unsigned(int, *tl);
+				if (nfhlen <= 0 || nfhlen > NFSX_V4FHMAX) {
+					error = NFSERR_BADXDR;
+					goto nfsmout;
+				}
+				nfhp = malloc(sizeof(*nfhp) + nfhlen - 1,
+				    M_NFSFH, M_WAITOK);
+				flp->nfsfl_fh[j] = nfhp;
+				flp->nfsfl_fhcnt++;
+				nfhp->nfh_len = nfhlen;
+				NFSM_DISSECT(cp, uint8_t *, NFSM_RNDUP(nfhlen));
+				NFSBCOPY(cp, nfhp->nfh_fh, nfhlen);
+			}
+			if (flp->nfsfl_iomode == gotiomode) {
+				/* Keep the list in increasing offset order. */
+				tflp = LIST_FIRST(flhp);
+				prevflp = NULL;
+				while (tflp != NULL &&
+				    tflp->nfsfl_off < flp->nfsfl_off) {
+					prevflp = tflp;
+					tflp = LIST_NEXT(tflp, nfsfl_list);
+				}
+				if (prevflp == NULL)
+					LIST_INSERT_HEAD(flhp, flp, nfsfl_list);
+				else
+					LIST_INSERT_AFTER(prevflp, flp,
+					    nfsfl_list);
+			} else {
+				printf("nfscl_layoutget(): got wrong iomode\n");
+				nfscl_freeflayout(flp);
+			}
+			flp = NULL;
+		}
+	}
+	if (nd->nd_repstat != 0 && error == 0)
+		error = nd->nd_repstat;
+nfsmout:
+	if (error != 0 && flp != NULL)
+		nfscl_freeflayout(flp);
+	mbuf_freem(nd->nd_mrep);
+	return (error);
+}
+
+/*
+ * Do the NFSv4.1 Get Device Info.
+ */
+int
+nfsrpc_getdeviceinfo(struct nfsmount *nmp, uint8_t *deviceid, int layouttype,
+    uint32_t *notifybitsp, struct nfscldevinfo **ndip, struct ucred *cred,
+    NFSPROC_T *p)
+{
+	uint32_t cnt, *tl;
+	struct nfsrv_descript nfsd;
+	struct nfsrv_descript *nd = &nfsd;
+	struct sockaddr_storage ss;
+	struct nfsclds *dsp = NULL, **dspp;
+	struct nfscldevinfo *ndi;
+	int addrcnt, bitcnt, error, i, isudp, j, pos, safilled, stripecnt;
+	uint8_t stripeindex;
+
+	*ndip = NULL;
+	ndi = NULL;
+	nfscl_reqstart(nd, NFSPROC_GETDEVICEINFO, nmp, NULL, 0, NULL, NULL);
+	NFSM_BUILD(tl, uint32_t *, NFSX_V4DEVICEID + 3 * NFSX_UNSIGNED);
+	NFSBCOPY(deviceid, tl, NFSX_V4DEVICEID);
+	tl += (NFSX_V4DEVICEID / NFSX_UNSIGNED);
+	*tl++ = txdr_unsigned(layouttype);
+	*tl++ = txdr_unsigned(100000);
+	if (notifybitsp != NULL && *notifybitsp != 0) {
+		*tl = txdr_unsigned(1);		/* One word of bits. */
+		NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED);
+		*tl = txdr_unsigned(*notifybitsp);
+	} else
+		*tl = txdr_unsigned(0);
+	nd->nd_flag |= ND_USEGSSNAME;
+	error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
+	    NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
+	if (error != 0)
+		return (error);
+	if (nd->nd_repstat == 0) {
+		NFSM_DISSECT(tl, uint32_t *, 3 * NFSX_UNSIGNED);
+		if (layouttype != fxdr_unsigned(int, *tl++))
+			printf("EEK! devinfo layout type not same!\n");
+		stripecnt = fxdr_unsigned(int, *++tl);
+		NFSCL_DEBUG(4, "stripecnt=%d\n", stripecnt);
+		if (stripecnt < 1 || stripecnt > 4096) {
+			printf("NFS devinfo stripecnt %d: out of range\n",
+			    stripecnt);
+			error = NFSERR_BADXDR;
+			goto nfsmout;
+		}
+		NFSM_DISSECT(tl, uint32_t *, (stripecnt + 1) * NFSX_UNSIGNED);
+		addrcnt = fxdr_unsigned(int, *(tl + stripecnt));
+		NFSCL_DEBUG(4, "addrcnt=%d\n", addrcnt);
+		if (addrcnt < 1 || addrcnt > 128) {
+			printf("NFS devinfo addrcnt %d: out of range\n",
+			    addrcnt);
+			error = NFSERR_BADXDR;
+			goto nfsmout;
+		}
+
+		/*
+		 * Now we know how many stripe indices and addresses, so
+		 * we can allocate the structure the correct size.
+		 */
+		i = (stripecnt * sizeof(uint8_t)) / sizeof(struct nfsclds *)
+		    + 1;
+		NFSCL_DEBUG(4, "stripeindices=%d\n", i);
+		ndi = malloc(sizeof(*ndi) + (addrcnt + i) *
+		    sizeof(struct nfsclds *), M_NFSDEVINFO, M_WAITOK | M_ZERO);
+		NFSBCOPY(deviceid, ndi->nfsdi_deviceid, NFSX_V4DEVICEID);
+		ndi->nfsdi_refcnt = 0;
+		ndi->nfsdi_stripecnt = stripecnt;
+		ndi->nfsdi_addrcnt = addrcnt;
+		/* Fill in the stripe indices. */
+		for (i = 0; i < stripecnt; i++) {
+			stripeindex = fxdr_unsigned(uint8_t, *tl++);
+			NFSCL_DEBUG(4, "stripeind=%d\n", stripeindex);
+			if (stripeindex >= addrcnt) {
+				printf("NFS devinfo stripeindex %d: too big\n",
+				    (int)stripeindex);
+				error = NFSERR_BADXDR;
+				goto nfsmout;
+			}
+			nfsfldi_setstripeindex(ndi, i, stripeindex);
+		}
+
+		/* Now, dissect the server address(es). */
+		safilled = 0;
+		for (i = 0; i < addrcnt; i++) {
+			NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
+			cnt = fxdr_unsigned(uint32_t, *tl);
+			if (cnt == 0) {
+				printf("NFS devinfo 0 len addrlist\n");
+				error = NFSERR_BADXDR;
+				goto nfsmout;
+			}
+			dspp = nfsfldi_addr(ndi, i);
+			pos = arc4random() % cnt;	/* Choose one. */
+			safilled = 0;
+			for (j = 0; j < cnt; j++) {
+				error = nfsv4_getipaddr(nd, &ss, &isudp);
+				if (error != 0 && error != EPERM) {
+					error = NFSERR_BADXDR;
+					goto nfsmout;
+				}
+				if (error == 0 && isudp == 0) {
+					/*
+					 * The algorithm is:
+					 * - use "pos" entry if it is of the
+					 *   same af_family or none of them
+					 *   is of the same af_family
+					 * else
+					 * - use the first one of the same
+					 *   af_family.
+					 */
+					if ((safilled == 0 && ss.ss_family ==
+					     nmp->nm_nam->sa_family) ||
+					    (j == pos &&
+					     (safilled == 0 || ss.ss_family ==
+					      nmp->nm_nam->sa_family)) ||
+					    (safilled == 1 && ss.ss_family ==
+					     nmp->nm_nam->sa_family)) {
+						error = nfsrpc_fillsa(nmp, &ss,
+						    &dsp, p);
+						if (error == 0) {
+							*dspp = dsp;
+							if (ss.ss_family ==
+							 nmp->nm_nam->sa_family)
+								safilled = 2;
+							else
+								safilled = 1;
+						}
+					}
+				}
+			}
+			if (safilled == 0)
+				break;
+		}
+
+		/* And the notify bits. */
+		NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
+		if (safilled != 0) {
+			bitcnt = fxdr_unsigned(int, *tl);
+			if (bitcnt > 0) {
+				NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
+				if (notifybitsp != NULL)
+					*notifybitsp =
+					    fxdr_unsigned(uint32_t, *tl);
+			}
+			*ndip = ndi;
+		} else
+			error = EPERM;
+	}
+	if (nd->nd_repstat != 0)
+		error = nd->nd_repstat;
+nfsmout:
+	if (error != 0 && ndi != NULL)
+		nfscl_freedevinfo(ndi);
+	mbuf_freem(nd->nd_mrep);
+	return (error);
+}
+
+/*
+ * Do the NFSv4.1 LayoutCommit.
+ */
+int
+nfsrpc_layoutcommit(struct nfsmount *nmp, uint8_t *fh, int fhlen, int reclaim,
+    uint64_t off, uint64_t len, uint64_t lastbyte, nfsv4stateid_t *stateidp,
+    int layouttype, int layoutupdatecnt, uint8_t *layp, struct ucred *cred,
+    NFSPROC_T *p, void *stuff)
+{
+	uint32_t *tl;
+	struct nfsrv_descript nfsd, *nd = &nfsd;
+	int error, outcnt, i;
+	uint8_t *cp;
+
+	nfscl_reqstart(nd, NFSPROC_LAYOUTCOMMIT, nmp, fh, fhlen, NULL, NULL);
+	NFSM_BUILD(tl, uint32_t *, 5 * NFSX_UNSIGNED + 3 * NFSX_HYPER +
+	    NFSX_STATEID);
+	txdr_hyper(off, tl);
+	tl += 2;
+	txdr_hyper(len, tl);
+	tl += 2;
+	if (reclaim != 0)
+		*tl++ = newnfs_true;
+	else
+		*tl++ = newnfs_false;
+	*tl++ = txdr_unsigned(stateidp->seqid);
+	*tl++ = stateidp->other[0];
+	*tl++ = stateidp->other[1];
+	*tl++ = stateidp->other[2];
+	*tl++ = newnfs_true;
+	if (lastbyte < off)
+		lastbyte = off;
+	else if (lastbyte >= (off + len))
+		lastbyte = off + len - 1;
+	txdr_hyper(lastbyte, tl);
+	tl += 2;
+	*tl++ = newnfs_false;
+	*tl++ = txdr_unsigned(layouttype);
+	*tl = txdr_unsigned(layoutupdatecnt);
+	if (layoutupdatecnt > 0) {
+		KASSERT(layouttype != NFSLAYOUT_NFSV4_1_FILES,
+		    ("Must be nil for Files Layout"));
+		outcnt = NFSM_RNDUP(layoutupdatecnt);
+		NFSM_BUILD(cp, uint8_t *, outcnt);
+		NFSBCOPY(layp, cp, layoutupdatecnt);
+		cp += layoutupdatecnt;
+		for (i = 0; i < (outcnt - layoutupdatecnt); i++)
+			*cp++ = 0x0;
+	}
+	nd->nd_flag |= ND_USEGSSNAME;
+	error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
+	    NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
+	if (error != 0)
+		return (error);
+	error = nd->nd_repstat;
+	mbuf_freem(nd->nd_mrep);
+	return (error);
+}
+
+/*
+ * Do the NFSv4.1 LayoutReturn.
+ */
+int
+nfsrpc_layoutreturn(struct nfsmount *nmp, uint8_t *fh, int fhlen, int reclaim,
+    int layouttype, uint32_t iomode, int layoutreturn, uint64_t offset,
+    uint64_t len, nfsv4stateid_t *stateidp, int layoutcnt, uint32_t *layp,
+    struct ucred *cred, NFSPROC_T *p, void *stuff)
+{
+	uint32_t *tl;
+	struct nfsrv_descript nfsd, *nd = &nfsd;
+	int error, outcnt, i;
+	uint8_t *cp;
+
+	nfscl_reqstart(nd, NFSPROC_LAYOUTRETURN, nmp, fh, fhlen, NULL, NULL);
+	NFSM_BUILD(tl, uint32_t *, 4 * NFSX_UNSIGNED);
+	if (reclaim != 0)
+		*tl++ = newnfs_true;
+	else
+		*tl++ = newnfs_false;
+	*tl++ = txdr_unsigned(layouttype);
+	*tl++ = txdr_unsigned(iomode);
+	*tl = txdr_unsigned(layoutreturn);
+	if (layoutreturn == NFSLAYOUTRETURN_FILE) {
+		NFSM_BUILD(tl, uint32_t *, 2 * NFSX_HYPER + NFSX_STATEID +
+		    NFSX_UNSIGNED);
+		txdr_hyper(offset, tl);
+		tl += 2;
+		txdr_hyper(len, tl);
+		tl += 2;
+		NFSCL_DEBUG(4, "layoutret stseq=%d\n", (int)stateidp->seqid);
+		*tl++ = txdr_unsigned(stateidp->seqid);
+		*tl++ = stateidp->other[0];
+		*tl++ = stateidp->other[1];
+		*tl++ = stateidp->other[2];
+		*tl = txdr_unsigned(layoutcnt);
+		if (layoutcnt > 0) {
+			outcnt = NFSM_RNDUP(layoutcnt);
+			NFSM_BUILD(cp, uint8_t *, outcnt);
+			NFSBCOPY(layp, cp, layoutcnt);
+			cp += layoutcnt;
+			for (i = 0; i < (outcnt - layoutcnt); i++)
+				*cp++ = 0x0;
+		}
+	}
+	nd->nd_flag |= ND_USEGSSNAME;
+	error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
+	    NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
+	if (error != 0)
+		return (error);
+	if (nd->nd_repstat == 0) {
+		NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
+		if (*tl != 0) {
+			NFSM_DISSECT(tl, uint32_t *, NFSX_STATEID);
+			stateidp->seqid = fxdr_unsigned(uint32_t, *tl++);
+			stateidp->other[0] = *tl++;
+			stateidp->other[1] = *tl++;
+			stateidp->other[2] = *tl;
+		}
+	} else
+		error = nd->nd_repstat;
+nfsmout:
+	mbuf_freem(nd->nd_mrep);
+	return (error);
+}
+
+/*
+ * Acquire a layout and devinfo, if possible. The caller must have acquired
+ * a reference count on the nfsclclient structure before calling this.
+ * Return the layout in lypp with a reference count on it, if successful.
+ */
+static int
+nfsrpc_getlayout(struct nfsmount *nmp, vnode_t vp, struct nfsfh *nfhp,
+    int iomode, uint32_t *notifybitsp, nfsv4stateid_t *stateidp, uint64_t off,
+    struct nfscllayout **lypp, struct ucred *cred, NFSPROC_T *p)
+{
+	struct nfscllayout *lyp;
+	struct nfsclflayout *flp, *tflp;
+	struct nfscldevinfo *dip;
+	struct nfsclflayouthead flh;
+	int error = 0, islocked, layoutlen, recalled, retonclose;
+	nfsv4stateid_t stateid;
+	struct nfsclsession *tsep;
+
+	*lypp = NULL;
+	/*
+	 * If lyp is returned non-NULL, there will be a refcnt (shared lock)
+	 * on it, iff flp != NULL or a lock (exclusive lock) on it iff
+	 * flp == NULL.
+	 */
+	lyp = nfscl_getlayout(nmp->nm_clp, nfhp->nfh_fh, nfhp->nfh_len,
+	    off, &flp, &recalled);
+	islocked = 0;
+	if (lyp == NULL || flp == NULL) {
+		if (recalled != 0)
+			return (EIO);
+		LIST_INIT(&flh);
+		tsep = nfsmnt_mdssession(nmp);
+		layoutlen = tsep->nfsess_maxcache -
+		    (NFSX_STATEID + 3 * NFSX_UNSIGNED);
+		if (lyp == NULL) {
+			stateid.seqid = 0;
+			stateid.other[0] = stateidp->other[0];
+			stateid.other[1] = stateidp->other[1];
+			stateid.other[2] = stateidp->other[2];
+			error = nfsrpc_layoutget(nmp, nfhp->nfh_fh,
+			    nfhp->nfh_len, iomode, (uint64_t)0, UINT64_MAX,
+			    (uint64_t)0, layoutlen, &stateid, &retonclose,
+			    &flh, cred, p, NULL);
+		} else {
+			islocked = 1;
+			stateid.seqid = lyp->nfsly_stateid.seqid;
+			stateid.other[0] = lyp->nfsly_stateid.other[0];
+			stateid.other[1] = lyp->nfsly_stateid.other[1];
+			stateid.other[2] = lyp->nfsly_stateid.other[2];
+			error = nfsrpc_layoutget(nmp, nfhp->nfh_fh,
+			    nfhp->nfh_len, iomode, off, UINT64_MAX,
+			    (uint64_t)0, layoutlen, &stateid, &retonclose,
+			    &flh, cred, p, NULL);
+		}
+		if (error == 0)
+			LIST_FOREACH(tflp, &flh, nfsfl_list) {
+				error = nfscl_adddevinfo(nmp, NULL, tflp);
+				if (error != 0) {
+					error = nfsrpc_getdeviceinfo(nmp,
+					    tflp->nfsfl_dev,
+					    NFSLAYOUT_NFSV4_1_FILES,
+					    notifybitsp, &dip, cred, p);
+					if (error != 0)
+						break;
+					error = nfscl_adddevinfo(nmp, dip,
+					    tflp);
+					if (error != 0)
+						printf(
+						    "getlayout: cannot add\n");
+				}
+			}
+		if (error == 0) {
+			/*
+			 * nfscl_layout() always returns with the nfsly_lock
+			 * set to a refcnt (shared lock).
+			 */
+			error = nfscl_layout(nmp, vp, nfhp->nfh_fh,
+			    nfhp->nfh_len, &stateid, retonclose, &flh, &lyp,
+			    cred, p);
+			if (error == 0)
+				*lypp = lyp;
+		} else if (islocked != 0)
+			nfsv4_unlock(&lyp->nfsly_lock, 0);
+	} else
+		*lypp = lyp;
+	return (error);
+}
+
+/*
+ * Do a TCP connection plus exchange id and create session.
+ * If successful, a "struct nfsclds" is linked into the list for the
+ * mount point and a pointer to it is returned.
+ */
+static int
+nfsrpc_fillsa(struct nfsmount *nmp, struct sockaddr_storage *ssp,
+    struct nfsclds **dspp, NFSPROC_T *p)
+{
+	struct sockaddr_in *msad, *sad, *ssd;
+	struct sockaddr_in6 *msad6, *sad6, *ssd6;
+	struct nfsclclient *clp;
+	struct nfssockreq *nrp;
+	struct nfsclds *dsp, *tdsp;
+	int error;
+	enum nfsclds_state retv;
+	uint32_t sequenceid;
+
+	KASSERT(nmp->nm_sockreq.nr_cred != NULL,
+	    ("nfsrpc_fillsa: NULL nr_cred"));
+	NFSLOCKCLSTATE();
+	clp = nmp->nm_clp;
+	NFSUNLOCKCLSTATE();
+	if (clp == NULL)
+		return (EPERM);
+	if (ssp->ss_family == AF_INET) {
+		ssd = (struct sockaddr_in *)ssp;
+		NFSLOCKMNT(nmp);
+
+		/*
+		 * Check to see if we already have a session for this
+		 * address that is usable for a DS.
+		 * Note that the MDS's address is in a different place
+		 * than the sessions already acquired for DS's.
+		 */
+		msad = (struct sockaddr_in *)nmp->nm_sockreq.nr_nam;
+		tdsp = TAILQ_FIRST(&nmp->nm_sess);
+		while (tdsp != NULL) {
+			if (msad != NULL && msad->sin_family == AF_INET &&
+			    ssd->sin_addr.s_addr == msad->sin_addr.s_addr &&
+			    ssd->sin_port == msad->sin_port &&
+			    (tdsp->nfsclds_flags & NFSCLDS_DS) != 0 &&
+			    tdsp->nfsclds_sess.nfsess_defunct == 0) {
+				*dspp = tdsp;
+				NFSUNLOCKMNT(nmp);
+				NFSCL_DEBUG(4, "fnd same addr\n");
+				return (0);
+			}
+			tdsp = TAILQ_NEXT(tdsp, nfsclds_list);
+			if (tdsp != NULL && tdsp->nfsclds_sockp != NULL)
+				msad = (struct sockaddr_in *)
+				    tdsp->nfsclds_sockp->nr_nam;
+			else
+				msad = NULL;
+		}
+		NFSUNLOCKMNT(nmp);
+
+		/* No IP address match, so look for new/trunked one. */
+		sad = malloc(sizeof(*sad), M_SONAME, M_WAITOK | M_ZERO);
+		sad->sin_len = sizeof(*sad);
+		sad->sin_family = AF_INET;
+		sad->sin_port = ssd->sin_port;
+		sad->sin_addr.s_addr = ssd->sin_addr.s_addr;
+		nrp = malloc(sizeof(*nrp), M_NFSSOCKREQ, M_WAITOK | M_ZERO);
+		nrp->nr_nam = (struct sockaddr *)sad;
+	} else if (ssp->ss_family == AF_INET6) {
+		ssd6 = (struct sockaddr_in6 *)ssp;
+		NFSLOCKMNT(nmp);
+
+		/*
+		 * Check to see if we already have a session for this
+		 * address that is usable for a DS.
+		 * Note that the MDS's address is in a different place
+		 * than the sessions already acquired for DS's.
+		 */
+		msad6 = (struct sockaddr_in6 *)nmp->nm_sockreq.nr_nam;
+		tdsp = TAILQ_FIRST(&nmp->nm_sess);
+		while (tdsp != NULL) {
+			if (msad6 != NULL && msad6->sin6_family == AF_INET6 &&
+			    IN6_ARE_ADDR_EQUAL(&ssd6->sin6_addr,
+			    &msad6->sin6_addr) &&
+			    ssd6->sin6_port == msad6->sin6_port &&
+			    (tdsp->nfsclds_flags & NFSCLDS_DS) != 0 &&
+			    tdsp->nfsclds_sess.nfsess_defunct == 0) {
+				*dspp = tdsp;
+				NFSUNLOCKMNT(nmp);
+				return (0);
+			}
+			tdsp = TAILQ_NEXT(tdsp, nfsclds_list);
+			if (tdsp != NULL && tdsp->nfsclds_sockp != NULL)
+				msad6 = (struct sockaddr_in6 *)
+				    tdsp->nfsclds_sockp->nr_nam;
+			else
+				msad6 = NULL;
+		}
+		NFSUNLOCKMNT(nmp);
+
+		/* No IP address match, so look for new/trunked one. */
+		sad6 = malloc(sizeof(*sad6), M_SONAME, M_WAITOK | M_ZERO);
+		sad6->sin6_len = sizeof(*sad6);
+		sad6->sin6_family = AF_INET6;
+		sad6->sin6_port = ssd6->sin6_port;
+		NFSBCOPY(&ssd6->sin6_addr, &sad6->sin6_addr,
+		    sizeof(struct in6_addr));
+		nrp = malloc(sizeof(*nrp), M_NFSSOCKREQ, M_WAITOK | M_ZERO);
+		nrp->nr_nam = (struct sockaddr *)sad6;
+	} else
+		return (EPERM);
+
+	nrp->nr_sotype = SOCK_STREAM;
+	mtx_init(&nrp->nr_mtx, "nfssock", NULL, MTX_DEF);
+	nrp->nr_prog = NFS_PROG;
+	nrp->nr_vers = NFS_VER4;
+
+	/*
+	 * Use the credentials that were used for the mount, which are
+	 * in nmp->nm_sockreq.nr_cred for newnfs_connect() etc.
+	 * Ref. counting the credentials with crhold() is probably not
+	 * necessary, since nm_sockreq.nr_cred won't be crfree()'d until
+	 * unmount, but I did it anyhow.
+	 */
+	nrp->nr_cred = crhold(nmp->nm_sockreq.nr_cred);
+	error = newnfs_connect(nmp, nrp, NULL, p, 0);
+	NFSCL_DEBUG(3, "DS connect=%d\n", error);
+
+	/* Now, do the exchangeid and create session. */
+	if (error == 0) {
+		error = nfsrpc_exchangeid(nmp, clp, nrp, NFSV4EXCH_USEPNFSDS,
+		    &dsp, nrp->nr_cred, p);
+		NFSCL_DEBUG(3, "DS exchangeid=%d\n", error);
+		if (error != 0)
+			newnfs_disconnect(nrp);
+	}
+	if (error == 0) {
+		dsp->nfsclds_sockp = nrp;
+		NFSLOCKMNT(nmp);
+		retv = nfscl_getsameserver(nmp, dsp, &tdsp);
+		NFSCL_DEBUG(3, "getsame ret=%d\n", retv);
+		if (retv == NFSDSP_USETHISSESSION) {
+			NFSUNLOCKMNT(nmp);
+			/*
+			 * If there is already a session for this server,
+			 * use it.
+			 */
+			(void)newnfs_disconnect(nrp);
+			nfscl_freenfsclds(dsp);
+			*dspp = tdsp;
+			return (0);
+		}
+		if (retv == NFSDSP_SEQTHISSESSION)
+			sequenceid = tdsp->nfsclds_sess.nfsess_sequenceid;
+		else
+			sequenceid = dsp->nfsclds_sess.nfsess_sequenceid;
+		NFSUNLOCKMNT(nmp);
+		error = nfsrpc_createsession(nmp, &dsp->nfsclds_sess,
+		    nrp, sequenceid, 0, nrp->nr_cred, p);
+		NFSCL_DEBUG(3, "DS createsess=%d\n", error);
+	} else {
+		NFSFREECRED(nrp->nr_cred);
+		NFSFREEMUTEX(&nrp->nr_mtx);
+		free(nrp->nr_nam, M_SONAME);
+		free(nrp, M_NFSSOCKREQ);
+	}
+	if (error == 0) {
+		NFSCL_DEBUG(3, "add DS session\n");
+		/*
+		 * Put it at the end of the list. That way the list
+		 * is ordered by when the entry was added. This matters
+		 * since the one done first is the one that should be
+		 * used for sequencid'ing any subsequent create sessions.
+		 */
+		NFSLOCKMNT(nmp);
+		TAILQ_INSERT_TAIL(&nmp->nm_sess, dsp, nfsclds_list);
+		NFSUNLOCKMNT(nmp);
+		*dspp = dsp;
+	} else if (dsp != NULL) {
+		newnfs_disconnect(nrp);
+		nfscl_freenfsclds(dsp);
+	}
+	return (error);
+}
+
+/*
+ * Do the NFSv4.1 Reclaim Complete.
+ */
+int
+nfsrpc_reclaimcomplete(struct nfsmount *nmp, struct ucred *cred, NFSPROC_T *p)
+{
+	uint32_t *tl;
+	struct nfsrv_descript nfsd;
+	struct nfsrv_descript *nd = &nfsd;
+	int error;
+
+	nfscl_reqstart(nd, NFSPROC_RECLAIMCOMPL, nmp, NULL, 0, NULL, NULL);
+	NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED);
+	*tl = newnfs_false;
+	nd->nd_flag |= ND_USEGSSNAME;
+	error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
+	    NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
+	if (error != 0)
+		return (error);
+	error = nd->nd_repstat;
+	mbuf_freem(nd->nd_mrep);
+	return (error);
+}
+
+/*
+ * Initialize the slot tables for a session.
+ */
+static void
+nfscl_initsessionslots(struct nfsclsession *sep)
+{
+	int i;
+
+	for (i = 0; i < NFSV4_CBSLOTS; i++) {
+		if (sep->nfsess_cbslots[i].nfssl_reply != NULL)
+			m_freem(sep->nfsess_cbslots[i].nfssl_reply);
+		NFSBZERO(&sep->nfsess_cbslots[i], sizeof(struct nfsslot));
+	}
+	for (i = 0; i < 64; i++)
+		sep->nfsess_slotseq[i] = 0;
+	sep->nfsess_slots = 0;
+}
+
+/*
+ * Called to try and do an I/O operation via an NFSv4.1 Data Server (DS).
+ */
+int
+nfscl_doiods(vnode_t vp, struct uio *uiop, int *iomode, int *must_commit,
+    uint32_t rwaccess, int docommit, struct ucred *cred, NFSPROC_T *p)
+{
+	struct nfsnode *np = VTONFS(vp);
+	struct nfsmount *nmp = VFSTONFS(vnode_mount(vp));
+	struct nfscllayout *layp;
+	struct nfscldevinfo *dip;
+	struct nfsclflayout *rflp;
+	nfsv4stateid_t stateid;
+	struct ucred *newcred;
+	uint64_t lastbyte, len, off, oresid, xfer;
+	int eof, error, iolaymode, recalled;
+	void *lckp;
+
+	if (!NFSHASPNFS(nmp) || nfscl_enablecallb == 0 || nfs_numnfscbd == 0 ||
+	    (np->n_flag & NNOLAYOUT) != 0)
+		return (EIO);
+	/* Now, get a reference cnt on the clientid for this mount. */
+	if (nfscl_getref(nmp) == 0)
+		return (EIO);
+
+	/* Find an appropriate stateid. */
+	newcred = NFSNEWCRED(cred);
+	error = nfscl_getstateid(vp, np->n_fhp->nfh_fh, np->n_fhp->nfh_len,
+	    rwaccess, 1, newcred, p, &stateid, &lckp);
+	if (error != 0) {
+		NFSFREECRED(newcred);
+		nfscl_relref(nmp);
+		return (error);
+	}
+	/* Search for a layout for this file. */
+	off = uiop->uio_offset;
+	layp = nfscl_getlayout(nmp->nm_clp, np->n_fhp->nfh_fh,
+	    np->n_fhp->nfh_len, off, &rflp, &recalled);
+	if (layp == NULL || rflp == NULL) {
+		if (recalled != 0) {
+			NFSFREECRED(newcred);
+			nfscl_relref(nmp);
+			return (EIO);
+		}
+		if (layp != NULL) {
+			nfscl_rellayout(layp, (rflp == NULL) ? 1 : 0);
+			layp = NULL;
+		}
+		/* Try and get a Layout, if it is supported. */
+		if (rwaccess == NFSV4OPEN_ACCESSWRITE ||
+		    (np->n_flag & NWRITEOPENED) != 0)
+			iolaymode = NFSLAYOUTIOMODE_RW;
+		else
+			iolaymode = NFSLAYOUTIOMODE_READ;
+		error = nfsrpc_getlayout(nmp, vp, np->n_fhp, iolaymode,
+		    NULL, &stateid, off, &layp, newcred, p);
+		if (error != 0) {
+			NFSLOCKNODE(np);
+			np->n_flag |= NNOLAYOUT;
+			NFSUNLOCKNODE(np);
+			if (lckp != NULL)
+				nfscl_lockderef(lckp);
+			NFSFREECRED(newcred);
+			if (layp != NULL)
+				nfscl_rellayout(layp, 0);
+			nfscl_relref(nmp);
+			return (error);
+		}
+	}
+
+	/*
+	 * Loop around finding a layout that works for the first part of
+	 * this I/O operation, and then call the function that actually
+	 * does the RPC.
+	 */
+	eof = 0;
+	len = (uint64_t)uiop->uio_resid;
+	while (len > 0 && error == 0 && eof == 0) {
+		off = uiop->uio_offset;
+		error = nfscl_findlayoutforio(layp, off, rwaccess, &rflp);
+		if (error == 0) {
+			oresid = xfer = (uint64_t)uiop->uio_resid;
+			if (xfer > (rflp->nfsfl_end - rflp->nfsfl_off))
+				xfer = rflp->nfsfl_end - rflp->nfsfl_off;
+			dip = nfscl_getdevinfo(nmp->nm_clp, rflp->nfsfl_dev,
+			    rflp->nfsfl_devp);
+			if (dip != NULL) {
+				error = nfscl_doflayoutio(vp, uiop, iomode,
+				    must_commit, &eof, &stateid, rwaccess, dip,
+				    layp, rflp, off, xfer, docommit, newcred,
+				    p);
+				nfscl_reldevinfo(dip);
+				lastbyte = off + xfer - 1;
+				if (error == 0) {
+					NFSLOCKCLSTATE();
+					if (lastbyte > layp->nfsly_lastbyte)
+						layp->nfsly_lastbyte = lastbyte;
+					NFSUNLOCKCLSTATE();
+				} else if (error == NFSERR_OPENMODE &&
+				    rwaccess == NFSV4OPEN_ACCESSREAD) {
+					NFSLOCKMNT(nmp);
+					nmp->nm_state |= NFSSTA_OPENMODE;
+					NFSUNLOCKMNT(nmp);
+				}
+			} else
+				error = EIO;
+			if (error == 0)
+				len -= (oresid - (uint64_t)uiop->uio_resid);
+		}
+	}
+	if (lckp != NULL)
+		nfscl_lockderef(lckp);
+	NFSFREECRED(newcred);
+	nfscl_rellayout(layp, 0);
+	nfscl_relref(nmp);
+	return (error);
+}
+
+/*
+ * Find a file layout that will handle the first bytes of the requested
+ * range and return the information from it needed to to the I/O operation.
+ */
+int
+nfscl_findlayoutforio(struct nfscllayout *lyp, uint64_t off, uint32_t rwaccess,
+    struct nfsclflayout **retflpp)
+{
+	struct nfsclflayout *flp, *nflp, *rflp;
+	uint32_t rw;
+
+	rflp = NULL;
+	rw = rwaccess;
+	/* For reading, do the Read list first and then the Write list. */
+	do {
+		if (rw == NFSV4OPEN_ACCESSREAD)
+			flp = LIST_FIRST(&lyp->nfsly_flayread);
+		else
+			flp = LIST_FIRST(&lyp->nfsly_flayrw);
+		while (flp != NULL) {
+			nflp = LIST_NEXT(flp, nfsfl_list);
+			if (flp->nfsfl_off > off)
+				break;
+			if (flp->nfsfl_end > off &&
+			    (rflp == NULL || rflp->nfsfl_end < flp->nfsfl_end))
+				rflp = flp;
+			flp = nflp;
+		}
+		if (rw == NFSV4OPEN_ACCESSREAD)
+			rw = NFSV4OPEN_ACCESSWRITE;
+		else
+			rw = 0;
+	} while (rw != 0);
+	if (rflp != NULL) {
+		/* This one covers the most bytes starting at off. */
+		*retflpp = rflp;
+		return (0);
+	}
+	return (EIO);
+}
+
+/*
+ * Do I/O using an NFSv4.1 file layout.
+ */
+static int
+nfscl_doflayoutio(vnode_t vp, struct uio *uiop, int *iomode, int *must_commit,
+    int *eofp, nfsv4stateid_t *stateidp, int rwflag, struct nfscldevinfo *dp,
+    struct nfscllayout *lyp, struct nfsclflayout *flp, uint64_t off,
+    uint64_t len, int docommit, struct ucred *cred, NFSPROC_T *p)
+{
+	uint64_t io_off, rel_off, stripe_unit_size, transfer, xfer;
+	int commit_thru_mds, error, stripe_index, stripe_pos;
+	struct nfsnode *np;
+	struct nfsfh *fhp;
+	struct nfsclds **dspp;
+
+	np = VTONFS(vp);
+	rel_off = off - flp->nfsfl_patoff;
+	stripe_unit_size = (flp->nfsfl_util >> 6) & 0x3ffffff;
+	stripe_pos = (rel_off / stripe_unit_size + flp->nfsfl_stripe1) %
+	    dp->nfsdi_stripecnt;
+	transfer = stripe_unit_size - (rel_off % stripe_unit_size);
+	error = 0;
+
+	/* Loop around, doing I/O for each stripe unit. */
+	while (len > 0 && error == 0) {
+		stripe_index = nfsfldi_stripeindex(dp, stripe_pos);
+		dspp = nfsfldi_addr(dp, stripe_index);
+		if (len > transfer && docommit == 0)
+			xfer = transfer;
+		else
+			xfer = len;
+		if ((flp->nfsfl_util & NFSFLAYUTIL_DENSE) != 0) {
+			/* Dense layout. */
+			if (stripe_pos >= flp->nfsfl_fhcnt)
+				return (EIO);
+			fhp = flp->nfsfl_fh[stripe_pos];
+			io_off = (rel_off / (stripe_unit_size *
+			    dp->nfsdi_stripecnt)) * stripe_unit_size +
+			    rel_off % stripe_unit_size;
+		} else {
+			/* Sparse layout. */
+			if (flp->nfsfl_fhcnt > 1) {
+				if (stripe_index >= flp->nfsfl_fhcnt)
+					return (EIO);
+				fhp = flp->nfsfl_fh[stripe_index];
+			} else if (flp->nfsfl_fhcnt == 1)
+				fhp = flp->nfsfl_fh[0];
+			else
+				fhp = np->n_fhp;
+			io_off = off;
+		}
+		if ((flp->nfsfl_util & NFSFLAYUTIL_COMMIT_THRU_MDS) != 0) {
+			commit_thru_mds = 1;
+			if (docommit != 0)
+				error = EIO;
+		} else {
+			commit_thru_mds = 0;
+			mtx_lock(&np->n_mtx);
+			np->n_flag |= NDSCOMMIT;
+			mtx_unlock(&np->n_mtx);
+		}
+		if (docommit != 0) {
+			if (error == 0)
+				error = nfsrpc_commitds(vp, io_off, xfer,
+				    *dspp, fhp, cred, p);
+			if (error == 0) {
+				/*
+				 * Set both eof and uio_resid = 0 to end any
+				 * loops.
+				 */
+				*eofp = 1;
+				uiop->uio_resid = 0;
+			} else {
+				mtx_lock(&np->n_mtx);
+				np->n_flag &= ~NDSCOMMIT;
+				mtx_unlock(&np->n_mtx);
+			}
+		} else if (rwflag == NFSV4OPEN_ACCESSREAD)
+			error = nfsrpc_readds(vp, uiop, stateidp, eofp, *dspp,
+			    io_off, xfer, fhp, cred, p);
+		else {
+			error = nfsrpc_writeds(vp, uiop, iomode, must_commit,
+			    stateidp, *dspp, io_off, xfer, fhp, commit_thru_mds,
+			    cred, p);
+			if (error == 0) {
+				NFSLOCKCLSTATE();
+				lyp->nfsly_flags |= NFSLY_WRITTEN;
+				NFSUNLOCKCLSTATE();
+			}
+		}
+		if (error == 0) {
+			transfer = stripe_unit_size;
+			stripe_pos = (stripe_pos + 1) % dp->nfsdi_stripecnt;
+			len -= xfer;
+			off += xfer;
+		}
+	}
+	return (error);
+}
+
+/*
+ * The actual read RPC done to a DS.
+ */
+static int
+nfsrpc_readds(vnode_t vp, struct uio *uiop, nfsv4stateid_t *stateidp, int *eofp,
+    struct nfsclds *dsp, uint64_t io_off, int len, struct nfsfh *fhp,
+    struct ucred *cred, NFSPROC_T *p)
+{
+	uint32_t *tl;
+	int error, retlen;
+	struct nfsrv_descript nfsd;
+	struct nfsmount *nmp = VFSTONFS(vnode_mount(vp));
+	struct nfsrv_descript *nd = &nfsd;
+	struct nfssockreq *nrp;
+
+	nd->nd_mrep = NULL;
+	nfscl_reqstart(nd, NFSPROC_READDS, nmp, fhp->nfh_fh, fhp->nfh_len,
+	    NULL, &dsp->nfsclds_sess);
+	nfsm_stateidtom(nd, stateidp, NFSSTATEID_PUTSEQIDZERO);
+	NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED * 3);
+	txdr_hyper(io_off, tl);
+	*(tl + 2) = txdr_unsigned(len);
+	nrp = dsp->nfsclds_sockp;
+	if (nrp == NULL)
+		/* If NULL, use the MDS socket. */
+		nrp = &nmp->nm_sockreq;
+	error = newnfs_request(nd, nmp, NULL, nrp, vp, p, cred,
+	    NFS_PROG, NFS_VER4, NULL, 1, NULL, &dsp->nfsclds_sess);
+	if (error != 0)
+		return (error);
+	if (nd->nd_repstat != 0) {
+		error = nd->nd_repstat;
+		goto nfsmout;
+	}
+	NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
+	*eofp = fxdr_unsigned(int, *tl);
+	NFSM_STRSIZ(retlen, len);
+	error = nfsm_mbufuio(nd, uiop, retlen);
+nfsmout:
+	if (nd->nd_mrep != NULL)
+		mbuf_freem(nd->nd_mrep);
+	return (error);
+}
+
+/*
+ * The actual write RPC done to a DS.
+ */
+static int
+nfsrpc_writeds(vnode_t vp, struct uio *uiop, int *iomode, int *must_commit,
+    nfsv4stateid_t *stateidp, struct nfsclds *dsp, uint64_t io_off, int len,
+    struct nfsfh *fhp, int commit_thru_mds, struct ucred *cred, NFSPROC_T *p)
+{
+	uint32_t *tl;
+	struct nfsmount *nmp = VFSTONFS(vnode_mount(vp));
+	int error, rlen, commit, committed = NFSWRITE_FILESYNC;
+	int32_t backup;
+	struct nfsrv_descript nfsd;
+	struct nfsrv_descript *nd = &nfsd;
+	struct nfssockreq *nrp;
+
+	KASSERT(uiop->uio_iovcnt == 1, ("nfs: writerpc iovcnt > 1"));
+	nd->nd_mrep = NULL;
+	nfscl_reqstart(nd, NFSPROC_WRITEDS, nmp, fhp->nfh_fh, fhp->nfh_len,
+	    NULL, &dsp->nfsclds_sess);
+	nfsm_stateidtom(nd, stateidp, NFSSTATEID_PUTSEQIDZERO);
+	NFSM_BUILD(tl, uint32_t *, NFSX_HYPER + 2 * NFSX_UNSIGNED);
+	txdr_hyper(io_off, tl);
+	tl += 2;
+	*tl++ = txdr_unsigned(*iomode);
+	*tl = txdr_unsigned(len);
+	nfsm_uiombuf(nd, uiop, len);
+	nrp = dsp->nfsclds_sockp;
+	if (nrp == NULL)
+		/* If NULL, use the MDS socket. */
+		nrp = &nmp->nm_sockreq;
+	error = newnfs_request(nd, nmp, NULL, nrp, vp, p, cred,
+	    NFS_PROG, NFS_VER4, NULL, 1, NULL, &dsp->nfsclds_sess);
+	if (error != 0)
+		return (error);
+	if (nd->nd_repstat != 0) {
+		/*
+		 * In case the rpc gets retried, roll
+		 * the uio fileds changed by nfsm_uiombuf()
+		 * back.
+		 */
+		uiop->uio_offset -= len;
+		uio_uio_resid_add(uiop, len);
+		uio_iov_base_add(uiop, -len);
+		uio_iov_len_add(uiop, len);
+		error = nd->nd_repstat;
+	} else {
+		NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED + NFSX_VERF);
+		rlen = fxdr_unsigned(int, *tl++);
+		if (rlen == 0) {
+			error = NFSERR_IO;
+			goto nfsmout;
+		} else if (rlen < len) {
+			backup = len - rlen;
+			uio_iov_base_add(uiop, -(backup));
+			uio_iov_len_add(uiop, backup);
+			uiop->uio_offset -= backup;
+			uio_uio_resid_add(uiop, backup);
+			len = rlen;
+		}
+		commit = fxdr_unsigned(int, *tl++);
+
+		/*
+		 * Return the lowest committment level
+		 * obtained by any of the RPCs.
+		 */
+		if (committed == NFSWRITE_FILESYNC)
+			committed = commit;
+		else if (committed == NFSWRITE_DATASYNC &&
+		    commit == NFSWRITE_UNSTABLE)
+			committed = commit;
+		if (commit_thru_mds != 0) {
+			NFSLOCKMNT(nmp);
+			if (!NFSHASWRITEVERF(nmp)) {
+				NFSBCOPY(tl, nmp->nm_verf, NFSX_VERF);
+				NFSSETWRITEVERF(nmp);
+	    		} else if (NFSBCMP(tl, nmp->nm_verf, NFSX_VERF)) {
+				*must_commit = 1;
+				NFSBCOPY(tl, nmp->nm_verf, NFSX_VERF);
+			}
+			NFSUNLOCKMNT(nmp);
+		} else {
+			NFSLOCKDS(dsp);
+			if ((dsp->nfsclds_flags & NFSCLDS_HASWRITEVERF) == 0) {
+				NFSBCOPY(tl, dsp->nfsclds_verf, NFSX_VERF);
+				dsp->nfsclds_flags |= NFSCLDS_HASWRITEVERF;
+			} else if (NFSBCMP(tl, dsp->nfsclds_verf, NFSX_VERF)) {
+				*must_commit = 1;
+				NFSBCOPY(tl, dsp->nfsclds_verf, NFSX_VERF);
+			}
+			NFSUNLOCKDS(dsp);
+		}
+	}
+nfsmout:
+	if (nd->nd_mrep != NULL)
+		mbuf_freem(nd->nd_mrep);
+	*iomode = committed;
+	if (nd->nd_repstat != 0 && error == 0)
+		error = nd->nd_repstat;
+	return (error);
+}
+
+/*
+ * Free up the nfsclds structure.
+ */
+void
+nfscl_freenfsclds(struct nfsclds *dsp)
+{
+	int i;
+
+	if (dsp == NULL)
+		return;
+	if (dsp->nfsclds_sockp != NULL) {
+		NFSFREECRED(dsp->nfsclds_sockp->nr_cred);
+		NFSFREEMUTEX(&dsp->nfsclds_sockp->nr_mtx);
+		free(dsp->nfsclds_sockp->nr_nam, M_SONAME);
+		free(dsp->nfsclds_sockp, M_NFSSOCKREQ);
+	}
+	NFSFREEMUTEX(&dsp->nfsclds_mtx);
+	NFSFREEMUTEX(&dsp->nfsclds_sess.nfsess_mtx);
+	for (i = 0; i < NFSV4_CBSLOTS; i++) {
+		if (dsp->nfsclds_sess.nfsess_cbslots[i].nfssl_reply != NULL)
+			m_freem(
+			    dsp->nfsclds_sess.nfsess_cbslots[i].nfssl_reply);
+	}
+	free(dsp, M_NFSCLDS);
+}
+
+static enum nfsclds_state
+nfscl_getsameserver(struct nfsmount *nmp, struct nfsclds *newdsp,
+    struct nfsclds **retdspp)
+{
+	struct nfsclds *dsp, *cur_dsp;
+
+	/*
+	 * Search the list of nfsclds structures for one with the same
+	 * server.
+	 */
+	cur_dsp = NULL;
+	TAILQ_FOREACH(dsp, &nmp->nm_sess, nfsclds_list) {
+		if (dsp->nfsclds_servownlen == newdsp->nfsclds_servownlen &&
+		    dsp->nfsclds_servownlen != 0 &&
+		    !NFSBCMP(dsp->nfsclds_serverown, newdsp->nfsclds_serverown,
+		    dsp->nfsclds_servownlen) &&
+		    dsp->nfsclds_sess.nfsess_defunct == 0) {
+			NFSCL_DEBUG(4, "fnd same fdsp=%p dsp=%p flg=0x%x\n",
+			    TAILQ_FIRST(&nmp->nm_sess), dsp,
+			    dsp->nfsclds_flags);
+			/* Server major id matches. */
+			if ((dsp->nfsclds_flags & NFSCLDS_DS) != 0) {
+				*retdspp = dsp;
+				return (NFSDSP_USETHISSESSION);
+			}
+
+			/*
+			 * Note the first match, so it can be used for
+			 * sequence'ing new sessions.
+			 */
+			if (cur_dsp == NULL)
+				cur_dsp = dsp;
+		}
+	}
+	if (cur_dsp != NULL) {
+		*retdspp = cur_dsp;
+		return (NFSDSP_SEQTHISSESSION);
+	}
+	return (NFSDSP_NOTFOUND);
+}
+
+/*
+ * NFS commit rpc to a NFSv4.1 DS.
+ */
+static int
+nfsrpc_commitds(vnode_t vp, uint64_t offset, int cnt, struct nfsclds *dsp,
+    struct nfsfh *fhp, struct ucred *cred, NFSPROC_T *p)
+{
+	uint32_t *tl;
+	struct nfsrv_descript nfsd, *nd = &nfsd;
+	struct nfsmount *nmp = VFSTONFS(vnode_mount(vp));
+	struct nfssockreq *nrp;
+	int error;
+	
+	nd->nd_mrep = NULL;
+	nfscl_reqstart(nd, NFSPROC_COMMITDS, nmp, fhp->nfh_fh, fhp->nfh_len,
+	    NULL, &dsp->nfsclds_sess);
+	NFSM_BUILD(tl, uint32_t *, NFSX_HYPER + NFSX_UNSIGNED);
+	txdr_hyper(offset, tl);
+	tl += 2;
+	*tl = txdr_unsigned(cnt);
+	nrp = dsp->nfsclds_sockp;
+	if (nrp == NULL)
+		/* If NULL, use the MDS socket. */
+		nrp = &nmp->nm_sockreq;
+	error = newnfs_request(nd, nmp, NULL, nrp, vp, p, cred,
+	    NFS_PROG, NFS_VER4, NULL, 1, NULL, &dsp->nfsclds_sess);
+	if (error != 0)
+		return (error);
+	if (nd->nd_repstat == 0) {
+		NFSM_DISSECT(tl, u_int32_t *, NFSX_VERF);
+		NFSLOCKDS(dsp);
+		if (NFSBCMP(tl, dsp->nfsclds_verf, NFSX_VERF)) {
+			NFSBCOPY(tl, dsp->nfsclds_verf, NFSX_VERF);
+			error = NFSERR_STALEWRITEVERF;
+		}
+		NFSUNLOCKDS(dsp);
+	}
+nfsmout:
+	if (error == 0 && nd->nd_repstat != 0)
+		error = nd->nd_repstat;
+	mbuf_freem(nd->nd_mrep);
+	return (error);
+}
+

Modified: trunk/sys/fs/nfsclient/nfs_clstate.c
===================================================================
--- trunk/sys/fs/nfsclient/nfs_clstate.c	2018-05-27 22:18:11 UTC (rev 10025)
+++ trunk/sys/fs/nfsclient/nfs_clstate.c	2018-05-27 22:18:25 UTC (rev 10026)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * Copyright (c) 2009 Rick Macklem, University of Guelph
  * All rights reserved.
@@ -26,7 +27,7 @@
  */
 
 #include <sys/cdefs.h>
-__MBSDID("$MidnightBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/fs/nfsclient/nfs_clstate.c 324545 2017-10-11 23:42:29Z rmacklem $");
 
 /*
  * These functions implement the client side state handling for NFSv4.
@@ -86,14 +87,20 @@
  */
 extern struct nfsstats newnfsstats;
 extern struct nfsreqhead nfsd_reqq;
+extern u_int32_t newnfs_false, newnfs_true;
+extern int nfscl_debuglevel;
+extern int nfscl_enablecallb;
+extern int nfs_numnfscbd;
 NFSREQSPINLOCK;
 NFSCLSTATEMUTEX;
 int nfscl_inited = 0;
 struct nfsclhead nfsclhead;	/* Head of clientid list */
 int nfscl_deleghighwater = NFSCLDELEGHIGHWATER;
+int nfscl_layouthighwater = NFSCLLAYOUTHIGHWATER;
 #endif	/* !APPLEKEXT */
 
 static int nfscl_delegcnt = 0;
+static int nfscl_layoutcnt = 0;
 static int nfscl_getopen(struct nfsclownerhead *, u_int8_t *, int, u_int8_t *,
     u_int8_t *, u_int32_t, struct nfscllockowner **, struct nfsclopen **);
 static void nfscl_clrelease(struct nfsclclient *);
@@ -109,9 +116,17 @@
     struct nfscllock **, int);
 static void nfscl_delegreturnall(struct nfsclclient *, NFSPROC_T *);
 static u_int32_t nfscl_nextcbident(void);
-static mount_t nfscl_getmnt(u_int32_t);
+static mount_t nfscl_getmnt(int, uint8_t *, u_int32_t, struct nfsclclient **);
+static struct nfsclclient *nfscl_getclnt(u_int32_t);
+static struct nfsclclient *nfscl_getclntsess(uint8_t *);
 static struct nfscldeleg *nfscl_finddeleg(struct nfsclclient *, u_int8_t *,
     int);
+static void nfscl_retoncloselayout(vnode_t, struct nfsclclient *, uint8_t *,
+    int, struct nfsclrecalllayout **);
+static void nfscl_reldevinfo_locked(struct nfscldevinfo *);
+static struct nfscllayout *nfscl_findlayout(struct nfsclclient *, u_int8_t *,
+    int);
+static struct nfscldevinfo *nfscl_finddevinfo(struct nfsclclient *, uint8_t *);
 static int nfscl_checkconflict(struct nfscllockownerhead *, struct nfscllock *,
     u_int8_t *, struct nfscllock **);
 static void nfscl_freealllocks(struct nfscllockownerhead *, int);
@@ -119,7 +134,7 @@
     struct nfscllock *, u_int8_t *, struct nfscldeleg *, struct nfscllock **);
 static void nfscl_newopen(struct nfsclclient *, struct nfscldeleg *,
     struct nfsclowner **, struct nfsclowner **, struct nfsclopen **,
-    struct nfsclopen **, u_int8_t *, u_int8_t *, int, int *);
+    struct nfsclopen **, u_int8_t *, u_int8_t *, int, struct ucred *, int *);
 static int nfscl_moveopen(vnode_t , struct nfsclclient *,
     struct nfsmount *, struct nfsclopen *, struct nfsclowner *,
     struct nfscldeleg *, struct ucred *, NFSPROC_T *);
@@ -135,7 +150,7 @@
 static int nfsrpc_reopen(struct nfsmount *, u_int8_t *, int, u_int32_t,
     struct nfsclopen *, struct nfscldeleg **, struct ucred *, NFSPROC_T *);
 static void nfscl_freedeleg(struct nfscldeleghead *, struct nfscldeleg *);
-static int nfscl_errmap(struct nfsrv_descript *);
+static int nfscl_errmap(struct nfsrv_descript *, u_int32_t);
 static void nfscl_cleanup_common(struct nfsclclient *, u_int8_t *);
 static int nfscl_recalldeleg(struct nfsclclient *, struct nfsmount *,
     struct nfscldeleg *, vnode_t, struct ucred *, NFSPROC_T *, int);
@@ -145,6 +160,15 @@
     struct nfsmount *, NFSPROC_T *);
 static void nfscl_emptylockowner(struct nfscllockowner *,
     struct nfscllockownerfhhead *);
+static void nfscl_mergeflayouts(struct nfsclflayouthead *,
+    struct nfsclflayouthead *);
+static int nfscl_layoutrecall(int, struct nfscllayout *, uint32_t, uint64_t,
+    uint64_t, uint32_t, struct nfsclrecalllayout *);
+static int nfscl_seq(uint32_t, uint32_t);
+static void nfscl_layoutreturn(struct nfsmount *, struct nfscllayout *,
+    struct ucred *, NFSPROC_T *);
+static void nfscl_dolayoutcommit(struct nfsmount *, struct nfscllayout *,
+    struct ucred *, NFSPROC_T *);
 
 static short nfscberr_null[] = {
 	0,
@@ -214,7 +238,7 @@
 	if (nfhp != NULL)
 	    MALLOC(nop, struct nfsclopen *, sizeof (struct nfsclopen) +
 		fhlen - 1, M_NFSCLOPEN, M_WAITOK);
-	ret = nfscl_getcl(vp, cred, p, &clp);
+	ret = nfscl_getcl(vnode_mount(vp), cred, p, 1, &clp);
 	if (ret != 0) {
 		FREE((caddr_t)nowp, M_NFSCLOWNER);
 		if (nop != NULL)
@@ -227,7 +251,6 @@
 	 * If none found, add the new one or return error, depending upon
 	 * "create".
 	 */
-	nfscl_filllockowner(p->td_proc, own, F_POSIX);
 	NFSLOCKCLSTATE();
 	dp = NULL;
 	/* First check the delegation list */
@@ -244,10 +267,17 @@
 		}
 	}
 
-	if (dp != NULL)
+	if (dp != NULL) {
+		nfscl_filllockowner(p->td_proc, own, F_POSIX);
 		ohp = &dp->nfsdl_owner;
-	else
+	} else {
+		/* For NFSv4.1 and this option, use a single open_owner. */
+		if (NFSHASONEOPENOWN(VFSTONFS(vnode_mount(vp))))
+			nfscl_filllockowner(NULL, own, F_POSIX);
+		else
+			nfscl_filllockowner(p->td_proc, own, F_POSIX);
 		ohp = &clp->nfsc_owner;
+	}
 	/* Now, search for an openowner */
 	LIST_FOREACH(owp, ohp, nfsow_list) {
 		if (!NFSBCMP(owp->nfsow_owner, own, NFSV4CL_LOCKNAMELEN))
@@ -258,23 +288,12 @@
 	 * Create a new open, as required.
 	 */
 	nfscl_newopen(clp, dp, &owp, &nowp, &op, &nop, own, nfhp, fhlen,
-	    newonep);
+	    cred, newonep);
 
 	/*
-	 * Serialize modifications to the open owner for multiple threads
-	 * within the same process using a read/write sleep lock.
+	 * Now, check the mode on the open and return the appropriate
+	 * value.
 	 */
-	if (lockit)
-		nfscl_lockexcl(&owp->nfsow_rwlock, NFSCLSTATEMUTEXPTR);
-	NFSUNLOCKCLSTATE();
-	if (nowp != NULL)
-		FREE((caddr_t)nowp, M_NFSCLOWNER);
-	if (nop != NULL)
-		FREE((caddr_t)nop, M_NFSCLOPEN);
-	if (owpp != NULL)
-		*owpp = owp;
-	if (opp != NULL)
-		*opp = op;
 	if (retp != NULL) {
 		if (nfhp != NULL && dp != NULL && nop == NULL)
 			/* new local open on delegation */
@@ -282,16 +301,42 @@
 		else
 			*retp = NFSCLOPEN_OK;
 	}
-
-	/*
-	 * Now, check the mode on the open and return the appropriate
-	 * value.
-	 */
 	if (op != NULL && (amode & ~(op->nfso_mode))) {
 		op->nfso_mode |= amode;
 		if (retp != NULL && dp == NULL)
 			*retp = NFSCLOPEN_DOOPEN;
 	}
+
+	/*
+	 * Serialize modifications to the open owner for multiple threads
+	 * within the same process using a read/write sleep lock.
+	 * For NFSv4.1 and a single OpenOwner, allow concurrent open operations
+	 * by acquiring a shared lock.  The close operations still use an
+	 * exclusive lock for this case.
+	 */
+	if (lockit != 0) {
+		if (NFSHASONEOPENOWN(VFSTONFS(vnode_mount(vp)))) {
+			/*
+			 * Get a shared lock on the OpenOwner, but first
+			 * wait for any pending exclusive lock, so that the
+			 * exclusive locker gets priority.
+			 */
+			nfsv4_lock(&owp->nfsow_rwlock, 0, NULL,
+			    NFSCLSTATEMUTEXPTR, NULL);
+			nfsv4_getref(&owp->nfsow_rwlock, NULL,
+			    NFSCLSTATEMUTEXPTR, NULL);
+		} else
+			nfscl_lockexcl(&owp->nfsow_rwlock, NFSCLSTATEMUTEXPTR);
+	}
+	NFSUNLOCKCLSTATE();
+	if (nowp != NULL)
+		FREE((caddr_t)nowp, M_NFSCLOWNER);
+	if (nop != NULL)
+		FREE((caddr_t)nop, M_NFSCLOPEN);
+	if (owpp != NULL)
+		*owpp = owp;
+	if (opp != NULL)
+		*opp = op;
 	return (0);
 }
 
@@ -302,7 +347,7 @@
 nfscl_newopen(struct nfsclclient *clp, struct nfscldeleg *dp,
     struct nfsclowner **owpp, struct nfsclowner **nowpp, struct nfsclopen **opp,
     struct nfsclopen **nopp, u_int8_t *own, u_int8_t *fhp, int fhlen,
-    int *newonep)
+    struct ucred *cred, int *newonep)
 {
 	struct nfsclowner *owp = *owpp, *nowp;
 	struct nfsclopen *op, *nop;
@@ -355,6 +400,8 @@
 			nop->nfso_stateid.other[0] = 0;
 			nop->nfso_stateid.other[1] = 0;
 			nop->nfso_stateid.other[2] = 0;
+			KASSERT(cred != NULL, ("%s: cred NULL\n", __func__));
+			newnfs_copyincred(cred, &nop->nfso_cred);
 			if (dp != NULL) {
 				TAILQ_REMOVE(&clp->nfsc_deleg, dp, nfsdl_list);
 				TAILQ_INSERT_HEAD(&clp->nfsc_deleg, dp,
@@ -451,15 +498,16 @@
  */
 APPLESTATIC int
 nfscl_getstateid(vnode_t vp, u_int8_t *nfhp, int fhlen, u_int32_t mode,
-    struct ucred *cred, NFSPROC_T *p, nfsv4stateid_t *stateidp,
+    int fords, struct ucred *cred, NFSPROC_T *p, nfsv4stateid_t *stateidp,
     void **lckpp)
 {
 	struct nfsclclient *clp;
 	struct nfsclowner *owp;
-	struct nfsclopen *op = NULL;
+	struct nfsclopen *op = NULL, *top;
 	struct nfscllockowner *lp;
 	struct nfscldeleg *dp;
 	struct nfsnode *np;
+	struct nfsmount *nmp;
 	u_int8_t own[NFSV4CL_LOCKNAMELEN];
 	int error, done;
 
@@ -466,16 +514,20 @@
 	*lckpp = NULL;
 	/*
 	 * Initially, just set the special stateid of all zeros.
+	 * (Don't do this for a DS, since the special stateid can't be used.)
 	 */
-	stateidp->seqid = 0;
-	stateidp->other[0] = 0;
-	stateidp->other[1] = 0;
-	stateidp->other[2] = 0;
+	if (fords == 0) {
+		stateidp->seqid = 0;
+		stateidp->other[0] = 0;
+		stateidp->other[1] = 0;
+		stateidp->other[2] = 0;
+	}
 	if (vnode_vtype(vp) != VREG)
 		return (EISDIR);
 	np = VTONFS(vp);
+	nmp = VFSTONFS(vnode_mount(vp));
 	NFSLOCKCLSTATE();
-	clp = nfscl_findcl(VFSTONFS(vnode_mount(vp)));
+	clp = nfscl_findcl(nmp);
 	if (clp == NULL) {
 		NFSUNLOCKCLSTATE();
 		return (EACCES);
@@ -522,11 +574,15 @@
 		 * If p != NULL, we want to search the parentage tree
 		 * for a matching OpenOwner and use that.
 		 */
-		nfscl_filllockowner(p->td_proc, own, F_POSIX);
+		if (NFSHASONEOPENOWN(VFSTONFS(vnode_mount(vp))))
+			nfscl_filllockowner(NULL, own, F_POSIX);
+		else
+			nfscl_filllockowner(p->td_proc, own, F_POSIX);
 		lp = NULL;
 		error = nfscl_getopen(&clp->nfsc_owner, nfhp, fhlen, own, own,
 		    mode, &lp, &op);
-		if (error == 0 && lp != NULL) {
+		if (error == 0 && lp != NULL && fords == 0) {
+			/* Don't return a lock stateid for a DS. */
 			stateidp->seqid =
 			    lp->nfsl_stateid.seqid;
 			stateidp->other[0] =
@@ -541,15 +597,21 @@
 	}
 	if (op == NULL) {
 		/* If not found, just look for any OpenOwner that will work. */
+		top = NULL;
 		done = 0;
 		owp = LIST_FIRST(&clp->nfsc_owner);
 		while (!done && owp != NULL) {
 			LIST_FOREACH(op, &owp->nfsow_open, nfso_list) {
 				if (op->nfso_fhlen == fhlen &&
-				    !NFSBCMP(op->nfso_fh, nfhp, fhlen) &&
-				    (mode & op->nfso_mode) == mode) {
-					done = 1;
-					break;
+				    !NFSBCMP(op->nfso_fh, nfhp, fhlen)) {
+					if (top == NULL && (op->nfso_mode &
+					    NFSV4OPEN_ACCESSWRITE) != 0 &&
+					    (mode & NFSV4OPEN_ACCESSREAD) != 0)
+						top = op;
+					if ((mode & op->nfso_mode) == mode) {
+						done = 1;
+						break;
+					}
 				}
 			}
 			if (!done)
@@ -556,8 +618,12 @@
 				owp = LIST_NEXT(owp, nfsow_list);
 		}
 		if (!done) {
-			NFSUNLOCKCLSTATE();
-			return (ENOENT);
+			NFSCL_DEBUG(2, "openmode top=%p\n", top);
+			if (top == NULL || NFSHASOPENMODE(nmp)) {
+				NFSUNLOCKCLSTATE();
+				return (ENOENT);
+			} else
+				op = top;
 		}
 		/*
 		 * For read aheads or write behinds, use the open cred.
@@ -655,15 +721,19 @@
  * with the open owner.
  */
 APPLESTATIC void
-nfscl_ownerrelease(struct nfsclowner *owp, __unused int error,
-    __unused int candelete, int unlocked)
+nfscl_ownerrelease(struct nfsmount *nmp, struct nfsclowner *owp,
+    __unused int error, __unused int candelete, int unlocked)
 {
 
 	if (owp == NULL)
 		return;
 	NFSLOCKCLSTATE();
-	if (!unlocked)
-		nfscl_lockunlock(&owp->nfsow_rwlock);
+	if (unlocked == 0) {
+		if (NFSHASONEOPENOWN(nmp))
+			nfsv4_relref(&owp->nfsow_rwlock);
+		else
+			nfscl_lockunlock(&owp->nfsow_rwlock);
+	}
 	nfscl_clrelease(owp->nfsow_clp);
 	NFSUNLOCKCLSTATE();
 }
@@ -672,7 +742,8 @@
  * Release use of an open structure under an open owner.
  */
 APPLESTATIC void
-nfscl_openrelease(struct nfsclopen *op, int error, int candelete)
+nfscl_openrelease(struct nfsmount *nmp, struct nfsclopen *op, int error,
+    int candelete)
 {
 	struct nfsclclient *clp;
 	struct nfsclowner *owp;
@@ -681,7 +752,10 @@
 		return;
 	NFSLOCKCLSTATE();
 	owp = op->nfso_own;
-	nfscl_lockunlock(&owp->nfsow_rwlock);
+	if (NFSHASONEOPENOWN(nmp))
+		nfsv4_relref(&owp->nfsow_rwlock);
+	else
+		nfscl_lockunlock(&owp->nfsow_rwlock);
 	clp = owp->nfsow_clp;
 	if (error && candelete && op->nfso_opencnt == 0)
 		nfscl_freeopen(op, 0);
@@ -697,21 +771,21 @@
  * If the "cred" argument is NULL, a new clientid should not be created.
  * If the "p" argument is NULL, a SetClientID/SetClientIDConfirm cannot
  * be done.
+ * The start_renewthread argument tells nfscl_getcl() to start a renew
+ * thread if this creates a new clp.
  * It always clpp with a reference count on it, unless returning an error.
  */
 APPLESTATIC int
-nfscl_getcl(vnode_t vp, struct ucred *cred, NFSPROC_T *p,
-    struct nfsclclient **clpp)
+nfscl_getcl(struct mount *mp, struct ucred *cred, NFSPROC_T *p,
+    int start_renewthread, struct nfsclclient **clpp)
 {
 	struct nfsclclient *clp;
 	struct nfsclclient *newclp = NULL;
-	struct mount *mp;
 	struct nfsmount *nmp;
 	char uuid[HOSTUUIDLEN];
 	int igotlock = 0, error, trystalecnt, clidinusedelay, i;
 	u_int16_t idlen = 0;
 
-	mp = vnode_mount(vp);
 	nmp = VFSTONFS(mp);
 	if (cred != NULL) {
 		getcredhostuuid(cred, uuid, sizeof uuid);
@@ -722,7 +796,7 @@
 			idlen += sizeof (u_int64_t) + 16; /* 16 random bytes */
 		MALLOC(newclp, struct nfsclclient *,
 		    sizeof (struct nfsclclient) + idlen - 1, M_NFSCLCLIENT,
-		    M_WAITOK);
+		    M_WAITOK | M_ZERO);
 	}
 	NFSLOCKCLSTATE();
 	/*
@@ -743,12 +817,15 @@
 			return (EACCES);
 		}
 		clp = newclp;
-		NFSBZERO((caddr_t)clp, sizeof(struct nfsclclient) + idlen - 1);
 		clp->nfsc_idlen = idlen;
 		LIST_INIT(&clp->nfsc_owner);
 		TAILQ_INIT(&clp->nfsc_deleg);
+		TAILQ_INIT(&clp->nfsc_layout);
+		LIST_INIT(&clp->nfsc_devinfo);
 		for (i = 0; i < NFSCLDELEGHASHSIZE; i++)
 			LIST_INIT(&clp->nfsc_deleghash[i]);
+		for (i = 0; i < NFSCLLAYOUTHASHSIZE; i++)
+			LIST_INIT(&clp->nfsc_layouthash[i]);
 		clp->nfsc_flags = NFSCLFLAGS_INITED;
 		clp->nfsc_clientidrev = 1;
 		clp->nfsc_cbident = nfscl_nextcbident();
@@ -758,11 +835,12 @@
 		nmp->nm_clp = clp;
 		clp->nfsc_nmp = nmp;
 		NFSUNLOCKCLSTATE();
-		nfscl_start_renewthread(clp);
+		if (start_renewthread != 0)
+			nfscl_start_renewthread(clp);
 	} else {
 		NFSUNLOCKCLSTATE();
 		if (newclp != NULL)
-			FREE((caddr_t)newclp, M_NFSCLCLIENT);
+			free(newclp, M_NFSCLCLIENT);
 	}
 	NFSLOCKCLSTATE();
 	while ((clp->nfsc_flags & NFSCLFLAGS_HASCLIENTID) == 0 && !igotlock &&
@@ -769,8 +847,18 @@
 	    (mp->mnt_kern_flag & MNTK_UNMOUNTF) == 0)
 		igotlock = nfsv4_lock(&clp->nfsc_lock, 1, NULL,
 		    NFSCLSTATEMUTEXPTR, mp);
-	if (!igotlock)
+	if (igotlock == 0) {
+		/*
+		 * Call nfsv4_lock() with "iwantlock == 0" so that it will
+		 * wait for a pending exclusive lock request.  This gives the
+		 * exclusive lock request priority over this shared lock
+		 * request.
+		 * An exclusive lock on nfsc_lock is used mainly for server
+		 * crash recoveries.
+		 */
+		nfsv4_lock(&clp->nfsc_lock, 0, NULL, NFSCLSTATEMUTEXPTR, mp);
 		nfsv4_getref(&clp->nfsc_lock, NULL, NFSCLSTATEMUTEXPTR, mp);
+	}
 	if (igotlock == 0 && (mp->mnt_kern_flag & MNTK_UNMOUNTF) != 0) {
 		/*
 		 * Both nfsv4_lock() and nfsv4_getref() know to check
@@ -818,14 +906,15 @@
 			clidinusedelay = 120;
 		trystalecnt = 3;
 		do {
-			error = nfsrpc_setclient(VFSTONFS(vnode_mount(vp)),
-			    clp, cred, p);
+			error = nfsrpc_setclient(nmp, clp, 0, cred, p);
 			if (error == NFSERR_STALECLIENTID ||
 			    error == NFSERR_STALEDONTRECOVER ||
+			    error == NFSERR_BADSESSION ||
 			    error == NFSERR_CLIDINUSE) {
 				(void) nfs_catnap(PZERO, error, "nfs_setcl");
 			}
 		} while (((error == NFSERR_STALECLIENTID ||
+		     error == NFSERR_BADSESSION ||
 		     error == NFSERR_STALEDONTRECOVER) && --trystalecnt > 0) ||
 		    (error == NFSERR_CLIDINUSE && --clidinusedelay > 0));
 		if (error) {
@@ -942,7 +1031,7 @@
 		if (recovery)
 			clp = rclp;
 		else
-			error = nfscl_getcl(vp, cred, p, &clp);
+			error = nfscl_getcl(vnode_mount(vp), cred, p, 1, &clp);
 	}
 	if (error) {
 		FREE((caddr_t)nlp, M_NFSCLLOCKOWNER);
@@ -958,7 +1047,10 @@
 	} else {
 		nfscl_filllockowner(id, own, flags);
 		ownp = own;
-		nfscl_filllockowner(p->td_proc, openown, F_POSIX);
+		if (NFSHASONEOPENOWN(VFSTONFS(vnode_mount(vp))))
+			nfscl_filllockowner(NULL, openown, F_POSIX);
+		else
+			nfscl_filllockowner(p->td_proc, openown, F_POSIX);
 		openownp = openown;
 	}
 	if (!recovery) {
@@ -1277,7 +1369,7 @@
 		end = NFS64BITSSET;
 	}
 
-	error = nfscl_getcl(vp, cred, p, &clp);
+	error = nfscl_getcl(vnode_mount(vp), cred, p, 1, &clp);
 	if (error)
 		return (1);
 	nfscl_filllockowner(id, own, flags);
@@ -1536,7 +1628,15 @@
 {
 	struct nfsclowner *owp, *nowp;
 	struct nfsclopen *op, *nop;
+	struct nfscllayout *lyp, *nlyp;
+	struct nfscldevinfo *dip, *ndip;
 
+	TAILQ_FOREACH_SAFE(lyp, &clp->nfsc_layout, nfsly_list, nlyp)
+		nfscl_freelayout(lyp);
+
+	LIST_FOREACH_SAFE(dip, &clp->nfsc_devinfo, nfsdi_list, ndip)
+		nfscl_freedevinfo(dip);
+
 	/* Now, all the OpenOwners, etc. */
 	LIST_FOREACH_SAFE(owp, &clp->nfsc_owner, nfsow_list, nowp) {
 		LIST_FOREACH_SAFE(op, &owp->nfsow_open, nfso_list, nop) {
@@ -1686,6 +1786,7 @@
 	struct nfsclowner *owp, *nowp;
 	struct nfsclopen *op;
 	struct nfscllockowner *lp, *nlp;
+	struct nfscldeleg *dp;
 
 	NFSPROCLISTLOCK();
 	NFSLOCKCLSTATE();
@@ -1699,6 +1800,20 @@
 		if (nfscl_procdoesntexist(owp->nfsow_owner))
 			nfscl_cleanup_common(clp, owp->nfsow_owner);
 	}
+
+	/*
+	 * For the single open_owner case, these lock owners need to be
+	 * checked to see if they still exist separately.
+	 * This is because nfscl_procdoesntexist() never returns true for
+	 * the single open_owner so that the above doesn't ever call
+	 * nfscl_cleanup_common().
+	 */
+	TAILQ_FOREACH(dp, &clp->nfsc_deleg, nfsdl_list) {
+		LIST_FOREACH_SAFE(lp, &dp->nfsdl_lock, nfsl_list, nlp) {
+			if (nfscl_procdoesntexist(lp->nfsl_owner))
+				nfscl_cleanup_common(clp, lp->nfsl_owner);
+		}
+	}
 	NFSUNLOCKCLSTATE();
 	NFSPROCLISTUNLOCK();
 }
@@ -1825,11 +1940,15 @@
 		LIST_REMOVE(clp, nfsc_list);
 		nfscl_delegreturnall(clp, p);
 		cred = newnfs_getcred();
-		(void) nfsrpc_setclient(nmp, clp, cred, p);
+		if (NFSHASNFSV4N(nmp)) {
+			(void)nfsrpc_destroysession(nmp, clp, cred, p);
+			(void)nfsrpc_destroyclient(nmp, clp, cred, p);
+		} else
+			(void)nfsrpc_setclient(nmp, clp, 0, cred, p);
 		nfscl_cleanclient(clp);
 		nmp->nm_clp = NULL;
 		NFSFREECRED(cred);
-		FREE((caddr_t)clp, M_NFSCLCLIENT);
+		free(clp, M_NFSCLCLIENT);
 	} else
 		NFSUNLOCKCLSTATE();
 }
@@ -1836,8 +1955,9 @@
 
 /*
  * This function is called when a server replies with NFSERR_STALECLIENTID
- * or NFSERR_STALESTATEID. It traverses the clientid lists, doing Opens
- * and Locks with reclaim. If these fail, it deletes the corresponding state.
+ * NFSERR_STALESTATEID or NFSERR_BADSESSION. It traverses the clientid lists,
+ * doing Opens and Locks with reclaim. If these fail, it deletes the
+ * corresponding state.
  */
 static void
 nfscl_recover(struct nfsclclient *clp, struct ucred *cred, NFSPROC_T *p)
@@ -1854,7 +1974,8 @@
 	struct nfsreq *rep;
 	u_int64_t len;
 	u_int32_t delegtype = NFSV4OPEN_DELEGATEWRITE, mode;
-	int igotlock = 0, error, trycnt, firstlock, s;
+	int i, igotlock = 0, error, trycnt, firstlock;
+	struct nfscllayout *lyp, *nlyp;
 
 	/*
 	 * First, lock the client structure, so everyone else will
@@ -1871,16 +1992,27 @@
 	nmp = clp->nfsc_nmp;
 	if (nmp == NULL)
 		panic("nfscl recover");
+
+	/*
+	 * For now, just get rid of all layouts. There may be a need
+	 * to do LayoutCommit Ops with reclaim == true later.
+	 */
+	TAILQ_FOREACH_SAFE(lyp, &clp->nfsc_layout, nfsly_list, nlyp)
+		nfscl_freelayout(lyp);
+	TAILQ_INIT(&clp->nfsc_layout);
+	for (i = 0; i < NFSCLLAYOUTHASHSIZE; i++)
+		LIST_INIT(&clp->nfsc_layouthash[i]);
+
 	trycnt = 5;
 	do {
-		error = nfsrpc_setclient(nmp, clp, cred, p);
+		error = nfsrpc_setclient(nmp, clp, 1, cred, p);
 	} while ((error == NFSERR_STALECLIENTID ||
+	     error == NFSERR_BADSESSION ||
 	     error == NFSERR_STALEDONTRECOVER) && --trycnt > 0);
 	if (error) {
-		nfscl_cleanclient(clp);
 		NFSLOCKCLSTATE();
-		clp->nfsc_flags &= ~(NFSCLFLAGS_HASCLIENTID |
-		    NFSCLFLAGS_RECOVER | NFSCLFLAGS_RECVRINPROG);
+		clp->nfsc_flags &= ~(NFSCLFLAGS_RECOVER |
+		    NFSCLFLAGS_RECVRINPROG);
 		wakeup(&clp->nfsc_flags);
 		nfsv4_unlock(&clp->nfsc_lock, 0);
 		NFSUNLOCKCLSTATE();
@@ -1893,11 +2025,11 @@
 	 * Mark requests already queued on the server, so that they don't
 	 * initiate another recovery cycle. Any requests already in the
 	 * queue that handle state information will have the old stale
-	 * clientid/stateid and will get a NFSERR_STALESTATEID or
-	 * NFSERR_STALECLIENTID reply from the server. This will be
-	 * translated to NFSERR_STALEDONTRECOVER when R_DONTRECOVER is set.
+	 * clientid/stateid and will get a NFSERR_STALESTATEID,
+	 * NFSERR_STALECLIENTID or NFSERR_BADSESSION reply from the server.
+	 * This will be translated to NFSERR_STALEDONTRECOVER when
+	 * R_DONTRECOVER is set.
 	 */
-	s = splsoftclock();
 	NFSLOCKREQ();
 	TAILQ_FOREACH(rep, &nfsd_reqq, r_chain) {
 		if (rep->r_nmp == nmp)
@@ -1904,7 +2036,6 @@
 			rep->r_flags |= R_DONTRECOVER;
 	}
 	NFSUNLOCKREQ();
-	splx(s);
 
 	/*
 	 * Now, mark all delegations "need reclaim".
@@ -1925,7 +2056,7 @@
 	    op = LIST_FIRST(&owp->nfsow_open);
 	    while (op != NULL) {
 		nop = LIST_NEXT(op, nfso_list);
-		if (error != NFSERR_NOGRACE) {
+		if (error != NFSERR_NOGRACE && error != NFSERR_BADSESSION) {
 		    /* Search for a delegation to reclaim with the open */
 		    TAILQ_FOREACH(dp, &clp->nfsc_deleg, nfsdl_list) {
 			if (!(dp->nfsdl_flags & NFSCLDL_NEEDRECLAIM))
@@ -1994,11 +2125,10 @@
 				    len = NFS64BITSSET;
 				else
 				    len = lop->nfslo_end - lop->nfslo_first;
-				if (error != NFSERR_NOGRACE)
-				    error = nfscl_trylock(nmp, NULL,
-					op->nfso_fh, op->nfso_fhlen, lp,
-					firstlock, 1, lop->nfslo_first, len,
-					lop->nfslo_type, tcred, p);
+				error = nfscl_trylock(nmp, NULL,
+				    op->nfso_fh, op->nfso_fhlen, lp,
+				    firstlock, 1, lop->nfslo_first, len,
+				    lop->nfslo_type, tcred, p);
 				if (error != 0)
 				    nfscl_freelock(lop, 0);
 				else
@@ -2010,10 +2140,10 @@
 				nfscl_freelockowner(lp, 0);
 			    lp = nlp;
 			}
-		    } else {
-			nfscl_freeopen(op, 0);
 		    }
 		}
+		if (error != 0 && error != NFSERR_BADSESSION)
+		    nfscl_freeopen(op, 0);
 		op = nop;
 	    }
 	    owp = nowp;
@@ -2044,7 +2174,7 @@
 		    nfscl_lockinit(&nowp->nfsow_rwlock);
 		}
 		nop = NULL;
-		if (error != NFSERR_NOGRACE) {
+		if (error != NFSERR_NOGRACE && error != NFSERR_BADSESSION) {
 		    MALLOC(nop, struct nfsclopen *, sizeof (struct nfsclopen) +
 			dp->nfsdl_fhlen - 1, M_NFSCLOPEN, M_WAITOK);
 		    nop->nfso_own = nowp;
@@ -2136,6 +2266,10 @@
 		FREE((caddr_t)dp, M_NFSCLDELEG);
 	}
 
+	/* For NFSv4.1 or later, do a RECLAIM_COMPLETE. */
+	if (NFSHASNFSV4N(nmp))
+		(void)nfsrpc_reclaimcomplete(nmp, cred, p);
+
 	NFSLOCKCLSTATE();
 	clp->nfsc_flags &= ~NFSCLFLAGS_RECVRINPROG;
 	wakeup(&clp->nfsc_flags);
@@ -2190,17 +2324,13 @@
 	cred = newnfs_getcred();
 	trycnt = 5;
 	do {
-		error = nfsrpc_setclient(nmp, clp, cred, p);
+		error = nfsrpc_setclient(nmp, clp, 0, cred, p);
 	} while ((error == NFSERR_STALECLIENTID ||
+	     error == NFSERR_BADSESSION ||
 	     error == NFSERR_STALEDONTRECOVER) && --trycnt > 0);
 	if (error) {
-		/*
-		 * Clear out any state.
-		 */
-		nfscl_cleanclient(clp);
 		NFSLOCKCLSTATE();
-		clp->nfsc_flags &= ~(NFSCLFLAGS_HASCLIENTID |
-		    NFSCLFLAGS_RECOVER);
+		clp->nfsc_flags &= ~NFSCLFLAGS_RECOVER;
 	} else {
 		/*
 		 * Expire the state for the client.
@@ -2398,6 +2528,11 @@
 	static time_t prevsec = 0;
 	struct nfscllockownerfh *lfhp, *nlfhp;
 	struct nfscllockownerfhhead lfh;
+	struct nfscllayout *lyp, *nlyp;
+	struct nfscldevinfo *dip, *ndip;
+	struct nfscllayouthead rlh;
+	struct nfsclrecalllayout *recallp;
+	struct nfsclds *dsp;
 
 	cred = newnfs_getcred();
 	NFSLOCKCLSTATE();
@@ -2414,8 +2549,11 @@
 			if (recover_done_time < NFSD_MONOSEC) {
 				recover_done_time = NFSD_MONOSEC +
 				    clp->nfsc_renew;
+				NFSCL_DEBUG(1, "Doing recovery..\n");
 				nfscl_recover(clp, cred, p);
 			} else {
+				NFSCL_DEBUG(1, "Clear Recovery dt=%u ms=%jd\n",
+				    recover_done_time, (intmax_t)NFSD_MONOSEC);
 				NFSLOCKCLSTATE();
 				clp->nfsc_flags &= ~NFSCLFLAGS_RECOVER;
 				NFSUNLOCKCLSTATE();
@@ -2425,10 +2563,11 @@
 		    (clp->nfsc_flags & NFSCLFLAGS_HASCLIENTID)) {
 			clp->nfsc_expire = NFSD_MONOSEC + clp->nfsc_renew;
 			clidrev = clp->nfsc_clientidrev;
-			error = nfsrpc_renew(clp, cred, p);
+			error = nfsrpc_renew(clp, NULL, cred, p);
 			if (error == NFSERR_CBPATHDOWN)
 			    cbpathdown = 1;
-			else if (error == NFSERR_STALECLIENTID) {
+			else if (error == NFSERR_STALECLIENTID ||
+			    error == NFSERR_BADSESSION) {
 			    NFSLOCKCLSTATE();
 			    clp->nfsc_flags |= NFSCLFLAGS_RECOVER;
 			    NFSUNLOCKCLSTATE();
@@ -2436,6 +2575,28 @@
 			    (void) nfscl_hasexpired(clp, clidrev, p);
 		}
 
+checkdsrenew:
+		if (NFSHASNFSV4N(clp->nfsc_nmp)) {
+			/* Do renews for any DS sessions. */
+			NFSLOCKMNT(clp->nfsc_nmp);
+			/* Skip first entry, since the MDS is handled above. */
+			dsp = TAILQ_FIRST(&clp->nfsc_nmp->nm_sess);
+			if (dsp != NULL)
+				dsp = TAILQ_NEXT(dsp, nfsclds_list);
+			while (dsp != NULL) {
+				if (dsp->nfsclds_expire <= NFSD_MONOSEC &&
+				    dsp->nfsclds_sess.nfsess_defunct == 0) {
+					dsp->nfsclds_expire = NFSD_MONOSEC +
+					    clp->nfsc_renew;
+					NFSUNLOCKMNT(clp->nfsc_nmp);
+					(void)nfsrpc_renew(clp, dsp, cred, p);
+					goto checkdsrenew;
+				}
+				dsp = TAILQ_NEXT(dsp, nfsclds_list);
+			}
+			NFSUNLOCKMNT(clp->nfsc_nmp);
+		}
+
 		TAILQ_INIT(&dh);
 		NFSLOCKCLSTATE();
 		if (cbpathdown)
@@ -2542,8 +2703,90 @@
 		}
 		if (igotlock)
 			nfsv4_unlock(&clp->nfsc_lock, 0);
+
+		/*
+		 * Do the recall on any layouts. To avoid trouble, always
+		 * come back up here after having slept.
+		 */
+		TAILQ_INIT(&rlh);
+tryagain2:
+		TAILQ_FOREACH_SAFE(lyp, &clp->nfsc_layout, nfsly_list, nlyp) {
+			if ((lyp->nfsly_flags & NFSLY_RECALL) != 0) {
+				/*
+				 * Wait for outstanding I/O ops to be done.
+				 */
+				if (lyp->nfsly_lock.nfslock_usecnt > 0 ||
+				    (lyp->nfsly_lock.nfslock_lock &
+				     NFSV4LOCK_LOCK) != 0) {
+					lyp->nfsly_lock.nfslock_lock |=
+					    NFSV4LOCK_WANTED;
+					(void)nfsmsleep(&lyp->nfsly_lock,
+					    NFSCLSTATEMUTEXPTR, PZERO, "nfslyp",
+					    NULL);
+					goto tryagain2;
+				}
+				/* Move the layout to the recall list. */
+				TAILQ_REMOVE(&clp->nfsc_layout, lyp,
+				    nfsly_list);
+				LIST_REMOVE(lyp, nfsly_hash);
+				TAILQ_INSERT_HEAD(&rlh, lyp, nfsly_list);
+
+				/* Handle any layout commits. */
+				if (!NFSHASNOLAYOUTCOMMIT(clp->nfsc_nmp) &&
+				    (lyp->nfsly_flags & NFSLY_WRITTEN) != 0) {
+					lyp->nfsly_flags &= ~NFSLY_WRITTEN;
+					NFSUNLOCKCLSTATE();
+					NFSCL_DEBUG(3, "do layoutcommit\n");
+					nfscl_dolayoutcommit(clp->nfsc_nmp, lyp,
+					    cred, p);
+					NFSLOCKCLSTATE();
+					goto tryagain2;
+				}
+			}
+		}
+
+		/* Now, look for stale layouts. */
+		lyp = TAILQ_LAST(&clp->nfsc_layout, nfscllayouthead);
+		while (lyp != NULL) {
+			nlyp = TAILQ_PREV(lyp, nfscllayouthead, nfsly_list);
+			if (lyp->nfsly_timestamp < NFSD_MONOSEC &&
+			    (lyp->nfsly_flags & NFSLY_RECALL) == 0 &&
+			    lyp->nfsly_lock.nfslock_usecnt == 0 &&
+			    lyp->nfsly_lock.nfslock_lock == 0) {
+				NFSCL_DEBUG(4, "ret stale lay=%d\n",
+				    nfscl_layoutcnt);
+				recallp = malloc(sizeof(*recallp),
+				    M_NFSLAYRECALL, M_NOWAIT);
+				if (recallp == NULL)
+					break;
+				(void)nfscl_layoutrecall(NFSLAYOUTRETURN_FILE,
+				    lyp, NFSLAYOUTIOMODE_ANY, 0, UINT64_MAX,
+				    lyp->nfsly_stateid.seqid, recallp);
+			}
+			lyp = nlyp;
+		}
+
+		/*
+		 * Free up any unreferenced device info structures.
+		 */
+		LIST_FOREACH_SAFE(dip, &clp->nfsc_devinfo, nfsdi_list, ndip) {
+			if (dip->nfsdi_layoutrefs == 0 &&
+			    dip->nfsdi_refcnt == 0) {
+				NFSCL_DEBUG(4, "freeing devinfo\n");
+				LIST_REMOVE(dip, nfsdi_list);
+				nfscl_freedevinfo(dip);
+			}
+		}
 		NFSUNLOCKCLSTATE();
 
+		/* Do layout return(s), as required. */
+		TAILQ_FOREACH_SAFE(lyp, &rlh, nfsly_list, nlyp) {
+			TAILQ_REMOVE(&rlh, lyp, nfsly_list);
+			NFSCL_DEBUG(4, "ret layout\n");
+			nfscl_layoutreturn(clp->nfsc_nmp, lyp, cred, p);
+			nfscl_freelayout(lyp);
+		}
+
 		/*
 		 * Delegreturn any delegations cleaned out or recalled.
 		 */
@@ -2599,8 +2842,8 @@
 }
 
 /*
- * Initiate state recovery. Called when NFSERR_STALECLIENTID or
- * NFSERR_STALESTATEID is received.
+ * Initiate state recovery. Called when NFSERR_STALECLIENTID,
+ * NFSERR_STALESTATEID or NFSERR_BADSESSION is received.
  */
 APPLESTATIC void
 nfscl_initiate_recovery(struct nfsclclient *clp)
@@ -2832,7 +3075,7 @@
 	struct nfsfh *nfhp;
 	int error, notdecr;
 
-	error = nfscl_getcl(vp, NULL, NULL, &clp);
+	error = nfscl_getcl(vnode_mount(vp), NULL, NULL, 1, &clp);
 	if (error)
 		return (error);
 	*clpp = clp;
@@ -2904,14 +3147,16 @@
 	struct nfsclopen *op;
 	struct nfscldeleg *dp;
 	struct nfsfh *nfhp;
+	struct nfsclrecalllayout *recallp;
 	int error;
 
-	error = nfscl_getcl(vp, NULL, NULL, &clp);
+	error = nfscl_getcl(vnode_mount(vp), NULL, NULL, 1, &clp);
 	if (error)
 		return (error);
 	*clpp = clp;
 
 	nfhp = VTONFS(vp)->n_fhp;
+	recallp = malloc(sizeof(*recallp), M_NFSLAYRECALL, M_WAITOK);
 	NFSLOCKCLSTATE();
 	/*
 	 * First get rid of the local Open structures, which should be no
@@ -2930,6 +3175,9 @@
 		}
 	}
 
+	/* Return any layouts marked return on close. */
+	nfscl_retoncloselayout(vp, clp, nfhp->nfh_fh, nfhp->nfh_len, &recallp);
+
 	/* Now process the opens against the server. */
 lookformore:
 	LIST_FOREACH(owp, &clp->nfsc_owner, nfsow_list) {
@@ -2951,6 +3199,11 @@
 		}
 	}
 	NFSUNLOCKCLSTATE();
+	/*
+	 * recallp has been set NULL by nfscl_retoncloselayout() if it was
+	 * used by the function, but calling free() with a NULL pointer is ok.
+	 */
+	free(recallp, M_NFSLAYRECALL);
 	return (0);
 }
 
@@ -2979,12 +3232,12 @@
 APPLESTATIC void
 nfscl_docb(struct nfsrv_descript *nd, NFSPROC_T *p)
 {
-	int i, op;
+	int clist, gotseq_ok, i, j, k, op, rcalls;
 	u_int32_t *tl;
 	struct nfsclclient *clp;
 	struct nfscldeleg *dp = NULL;
-	int numops, taglen = -1, error = 0, trunc, ret = 0;
-	u_int32_t minorvers, retops = 0, *retopsp = NULL, *repp, cbident;
+	int numops, taglen = -1, error = 0, trunc;
+	u_int32_t minorvers = 0, retops = 0, *retopsp = NULL, *repp, cbident;
 	u_char tag[NFSV4_SMALLSTR + 1], *tagstr;
 	vnode_t vp = NULL;
 	struct nfsnode *np;
@@ -2993,7 +3246,17 @@
 	mount_t mp;
 	nfsattrbit_t attrbits, rattrbits;
 	nfsv4stateid_t stateid;
+	uint32_t seqid, slotid = 0, highslot, cachethis;
+	uint8_t sessionid[NFSX_V4SESSIONID];
+	struct mbuf *rep;
+	struct nfscllayout *lyp;
+	uint64_t filesid[2], len, off;
+	int changed, gotone, laytype, recalltype;
+	uint32_t iomode;
+	struct nfsclrecalllayout *recallp = NULL;
+	struct nfsclsession *tsep;
 
+	gotseq_ok = 0;
 	nfsrvd_rephead(nd);
 	NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
 	taglen = fxdr_unsigned(int, *tl);
@@ -3019,7 +3282,7 @@
 	NFSM_BUILD(retopsp, u_int32_t *, NFSX_UNSIGNED);
 	NFSM_DISSECT(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
 	minorvers = fxdr_unsigned(u_int32_t, *tl++);
-	if (minorvers != NFSV4_MINORVERSION)
+	if (minorvers != NFSV4_MINORVERSION && minorvers != NFSV41_MINORVERSION)
 		nd->nd_repstat = NFSERR_MINORVERMISMATCH;
 	cbident = fxdr_unsigned(u_int32_t, *tl++);
 	if (nd->nd_repstat)
@@ -3034,73 +3297,85 @@
 		NFSM_BUILD(repp, u_int32_t *, 2 * NFSX_UNSIGNED);
 		*repp++ = *tl;
 		op = fxdr_unsigned(int, *tl);
-		if (op < NFSV4OP_CBGETATTR || op > NFSV4OP_CBRECALL) {
+		if (op < NFSV4OP_CBGETATTR ||
+		   (op > NFSV4OP_CBRECALL && minorvers == NFSV4_MINORVERSION) ||
+		   (op > NFSV4OP_CBNOTIFYDEVID &&
+		    minorvers == NFSV41_MINORVERSION)) {
 		    nd->nd_repstat = NFSERR_OPILLEGAL;
-		    *repp = nfscl_errmap(nd);
+		    *repp = nfscl_errmap(nd, minorvers);
 		    retops++;
 		    break;
 		}
 		nd->nd_procnum = op;
-		newnfsstats.cbrpccnt[nd->nd_procnum]++;
+		if (op < NFSV4OP_CBNOPS)
+			newnfsstats.cbrpccnt[nd->nd_procnum]++;
 		switch (op) {
 		case NFSV4OP_CBGETATTR:
-			clp = NULL;
+			NFSCL_DEBUG(4, "cbgetattr\n");
+			mp = NULL;
+			vp = NULL;
 			error = nfsm_getfh(nd, &nfhp);
 			if (!error)
 				error = nfsrv_getattrbits(nd, &attrbits,
 				    NULL, NULL);
+			if (error == 0 && i == 0 &&
+			    minorvers != NFSV4_MINORVERSION)
+				error = NFSERR_OPNOTINSESS;
 			if (!error) {
-				mp = nfscl_getmnt(cbident);
+				mp = nfscl_getmnt(minorvers, sessionid, cbident,
+				    &clp);
 				if (mp == NULL)
 					error = NFSERR_SERVERFAULT;
 			}
 			if (!error) {
-				dp = NULL;
+				error = nfscl_ngetreopen(mp, nfhp->nfh_fh,
+				    nfhp->nfh_len, p, &np);
+				if (!error)
+					vp = NFSTOV(np);
+			}
+			if (!error) {
+				NFSZERO_ATTRBIT(&rattrbits);
 				NFSLOCKCLSTATE();
-				clp = nfscl_findcl(VFSTONFS(mp));
-				if (clp != NULL)
-					dp = nfscl_finddeleg(clp, nfhp->nfh_fh,
-					    nfhp->nfh_len);
+				dp = nfscl_finddeleg(clp, nfhp->nfh_fh,
+				    nfhp->nfh_len);
+				if (dp != NULL) {
+					if (NFSISSET_ATTRBIT(&attrbits,
+					    NFSATTRBIT_SIZE)) {
+						if (vp != NULL)
+							va.va_size = np->n_size;
+						else
+							va.va_size =
+							    dp->nfsdl_size;
+						NFSSETBIT_ATTRBIT(&rattrbits,
+						    NFSATTRBIT_SIZE);
+					}
+					if (NFSISSET_ATTRBIT(&attrbits,
+					    NFSATTRBIT_CHANGE)) {
+						va.va_filerev =
+						    dp->nfsdl_change;
+						if (vp == NULL ||
+						    (np->n_flag & NDELEGMOD))
+							va.va_filerev++;
+						NFSSETBIT_ATTRBIT(&rattrbits,
+						    NFSATTRBIT_CHANGE);
+					}
+				} else
+					error = NFSERR_SERVERFAULT;
 				NFSUNLOCKCLSTATE();
-				if (dp == NULL)
-					error = NFSERR_SERVERFAULT;
 			}
-			if (!error) {
-				ret = nfscl_ngetreopen(mp, nfhp->nfh_fh,
-				    nfhp->nfh_len, p, &np);
-				if (!ret)
-					vp = NFSTOV(np);
-			}
+			if (vp != NULL)
+				vrele(vp);
+			if (mp != NULL)
+				vfs_unbusy(mp);
 			if (nfhp != NULL)
 				FREE((caddr_t)nfhp, M_NFSFH);
-			if (!error) {
-				NFSZERO_ATTRBIT(&rattrbits);
-				if (NFSISSET_ATTRBIT(&attrbits,
-				    NFSATTRBIT_SIZE)) {
-					if (!ret)
-						va.va_size = np->n_size;
-					else
-						va.va_size = dp->nfsdl_size;
-					NFSSETBIT_ATTRBIT(&rattrbits,
-					    NFSATTRBIT_SIZE);
-				}
-				if (NFSISSET_ATTRBIT(&attrbits,
-				    NFSATTRBIT_CHANGE)) {
-					va.va_filerev = dp->nfsdl_change;
-					if (ret || (np->n_flag & NDELEGMOD))
-						va.va_filerev++;
-					NFSSETBIT_ATTRBIT(&rattrbits,
-					    NFSATTRBIT_CHANGE);
-				}
+			if (!error)
 				(void) nfsv4_fillattr(nd, NULL, NULL, NULL, &va,
-				    NULL, 0, &rattrbits, NULL, NULL, 0, 0, 0, 0,
+				    NULL, 0, &rattrbits, NULL, p, 0, 0, 0, 0,
 				    (uint64_t)0);
-				if (!ret)
-					vrele(vp);
-			}
 			break;
 		case NFSV4OP_CBRECALL:
-			clp = NULL;
+			NFSCL_DEBUG(4, "cbrecall\n");
 			NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID +
 			    NFSX_UNSIGNED);
 			stateid.seqid = *tl++;
@@ -3109,14 +3384,15 @@
 			tl += (NFSX_STATEIDOTHER / NFSX_UNSIGNED);
 			trunc = fxdr_unsigned(int, *tl);
 			error = nfsm_getfh(nd, &nfhp);
+			if (error == 0 && i == 0 &&
+			    minorvers != NFSV4_MINORVERSION)
+				error = NFSERR_OPNOTINSESS;
 			if (!error) {
-				mp = nfscl_getmnt(cbident);
-				if (mp == NULL)
-					error = NFSERR_SERVERFAULT;
-			}
-			if (!error) {
 				NFSLOCKCLSTATE();
-				clp = nfscl_findcl(VFSTONFS(mp));
+				if (minorvers == NFSV4_MINORVERSION)
+					clp = nfscl_getclnt(cbident);
+				else
+					clp = nfscl_getclntsess(sessionid);
 				if (clp != NULL) {
 					dp = nfscl_finddeleg(clp, nfhp->nfh_fh,
 					    nfhp->nfh_len);
@@ -3134,6 +3410,203 @@
 			if (nfhp != NULL)
 				FREE((caddr_t)nfhp, M_NFSFH);
 			break;
+		case NFSV4OP_CBLAYOUTRECALL:
+			NFSCL_DEBUG(4, "cblayrec\n");
+			nfhp = NULL;
+			NFSM_DISSECT(tl, uint32_t *, 4 * NFSX_UNSIGNED);
+			laytype = fxdr_unsigned(int, *tl++);
+			iomode = fxdr_unsigned(uint32_t, *tl++);
+			if (newnfs_true == *tl++)
+				changed = 1;
+			else
+				changed = 0;
+			recalltype = fxdr_unsigned(int, *tl);
+			recallp = malloc(sizeof(*recallp), M_NFSLAYRECALL,
+			    M_WAITOK);
+			if (laytype != NFSLAYOUT_NFSV4_1_FILES)
+				error = NFSERR_NOMATCHLAYOUT;
+			else if (recalltype == NFSLAYOUTRETURN_FILE) {
+				error = nfsm_getfh(nd, &nfhp);
+				NFSCL_DEBUG(4, "retfile getfh=%d\n", error);
+				if (error != 0)
+					goto nfsmout;
+				NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_HYPER +
+				    NFSX_STATEID);
+				off = fxdr_hyper(tl); tl += 2;
+				len = fxdr_hyper(tl); tl += 2;
+				stateid.seqid = fxdr_unsigned(uint32_t, *tl++);
+				NFSBCOPY(tl, stateid.other, NFSX_STATEIDOTHER);
+				if (minorvers == NFSV4_MINORVERSION)
+					error = NFSERR_NOTSUPP;
+				else if (i == 0)
+					error = NFSERR_OPNOTINSESS;
+				if (error == 0) {
+					NFSLOCKCLSTATE();
+					clp = nfscl_getclntsess(sessionid);
+					NFSCL_DEBUG(4, "cbly clp=%p\n", clp);
+					if (clp != NULL) {
+						lyp = nfscl_findlayout(clp,
+						    nfhp->nfh_fh,
+						    nfhp->nfh_len);
+						NFSCL_DEBUG(4, "cblyp=%p\n",
+						    lyp);
+						if (lyp != NULL &&
+						    (lyp->nfsly_flags &
+						     NFSLY_FILES) != 0 &&
+						    !NFSBCMP(stateid.other,
+						    lyp->nfsly_stateid.other,
+						    NFSX_STATEIDOTHER)) {
+							error =
+							    nfscl_layoutrecall(
+							    recalltype,
+							    lyp, iomode, off,
+							    len, stateid.seqid,
+							    recallp);
+							recallp = NULL;
+							wakeup(clp);
+							NFSCL_DEBUG(4,
+							    "aft layrcal=%d\n",
+							    error);
+						} else
+							error =
+							  NFSERR_NOMATCHLAYOUT;
+					} else
+						error = NFSERR_NOMATCHLAYOUT;
+					NFSUNLOCKCLSTATE();
+				}
+				free(nfhp, M_NFSFH);
+			} else if (recalltype == NFSLAYOUTRETURN_FSID) {
+				NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_HYPER);
+				filesid[0] = fxdr_hyper(tl); tl += 2;
+				filesid[1] = fxdr_hyper(tl); tl += 2;
+				gotone = 0;
+				NFSLOCKCLSTATE();
+				clp = nfscl_getclntsess(sessionid);
+				if (clp != NULL) {
+					TAILQ_FOREACH(lyp, &clp->nfsc_layout,
+					    nfsly_list) {
+						if (lyp->nfsly_filesid[0] ==
+						    filesid[0] &&
+						    lyp->nfsly_filesid[1] ==
+						    filesid[1]) {
+							error =
+							    nfscl_layoutrecall(
+							    recalltype,
+							    lyp, iomode, 0,
+							    UINT64_MAX,
+							    lyp->nfsly_stateid.seqid,
+							    recallp);
+							recallp = NULL;
+							gotone = 1;
+						}
+					}
+					if (gotone != 0)
+						wakeup(clp);
+					else
+						error = NFSERR_NOMATCHLAYOUT;
+				} else
+					error = NFSERR_NOMATCHLAYOUT;
+				NFSUNLOCKCLSTATE();
+			} else if (recalltype == NFSLAYOUTRETURN_ALL) {
+				gotone = 0;
+				NFSLOCKCLSTATE();
+				clp = nfscl_getclntsess(sessionid);
+				if (clp != NULL) {
+					TAILQ_FOREACH(lyp, &clp->nfsc_layout,
+					    nfsly_list) {
+						error = nfscl_layoutrecall(
+						    recalltype, lyp, iomode, 0,
+						    UINT64_MAX,
+						    lyp->nfsly_stateid.seqid,
+						    recallp);
+						recallp = NULL;
+						gotone = 1;
+					}
+					if (gotone != 0)
+						wakeup(clp);
+					else
+						error = NFSERR_NOMATCHLAYOUT;
+				} else
+					error = NFSERR_NOMATCHLAYOUT;
+				NFSUNLOCKCLSTATE();
+			} else
+				error = NFSERR_NOMATCHLAYOUT;
+			if (recallp != NULL) {
+				free(recallp, M_NFSLAYRECALL);
+				recallp = NULL;
+			}
+			break;
+		case NFSV4OP_CBSEQUENCE:
+			NFSM_DISSECT(tl, uint32_t *, NFSX_V4SESSIONID +
+			    5 * NFSX_UNSIGNED);
+			bcopy(tl, sessionid, NFSX_V4SESSIONID);
+			tl += NFSX_V4SESSIONID / NFSX_UNSIGNED;
+			seqid = fxdr_unsigned(uint32_t, *tl++);
+			slotid = fxdr_unsigned(uint32_t, *tl++);
+			highslot = fxdr_unsigned(uint32_t, *tl++);
+			cachethis = *tl++;
+			/* Throw away the referring call stuff. */
+			clist = fxdr_unsigned(int, *tl);
+			for (j = 0; j < clist; j++) {
+				NFSM_DISSECT(tl, uint32_t *, NFSX_V4SESSIONID +
+				    NFSX_UNSIGNED);
+				tl += NFSX_V4SESSIONID / NFSX_UNSIGNED;
+				rcalls = fxdr_unsigned(int, *tl);
+				for (k = 0; k < rcalls; k++) {
+					NFSM_DISSECT(tl, uint32_t *,
+					    2 * NFSX_UNSIGNED);
+				}
+			}
+			NFSLOCKCLSTATE();
+			if (i == 0) {
+				clp = nfscl_getclntsess(sessionid);
+				if (clp == NULL)
+					error = NFSERR_SERVERFAULT;
+			} else
+				error = NFSERR_SEQUENCEPOS;
+			if (error == 0) {
+				tsep = nfsmnt_mdssession(clp->nfsc_nmp);
+				error = nfsv4_seqsession(seqid, slotid,
+				    highslot, tsep->nfsess_cbslots, &rep,
+				    tsep->nfsess_backslots);
+			}
+			NFSUNLOCKCLSTATE();
+			if (error == 0 || error == NFSERR_REPLYFROMCACHE) {
+				gotseq_ok = 1;
+				if (rep != NULL) {
+					/*
+					 * Handle a reply for a retried
+					 * callback.  The reply will be
+					 * re-inserted in the session cache
+					 * by the nfsv4_seqsess_cacherep() call
+					 * after out:
+					 */
+					KASSERT(error == NFSERR_REPLYFROMCACHE,
+					    ("cbsequence: non-NULL rep"));
+					NFSCL_DEBUG(4, "Got cbretry\n");
+					m_freem(nd->nd_mreq);
+					nd->nd_mreq = rep;
+					rep = NULL;
+					goto out;
+				}
+				NFSM_BUILD(tl, uint32_t *,
+				    NFSX_V4SESSIONID + 4 * NFSX_UNSIGNED);
+				bcopy(sessionid, tl, NFSX_V4SESSIONID);
+				tl += NFSX_V4SESSIONID / NFSX_UNSIGNED;
+				*tl++ = txdr_unsigned(seqid);
+				*tl++ = txdr_unsigned(slotid);
+				*tl++ = txdr_unsigned(NFSV4_CBSLOTS - 1);
+				*tl = txdr_unsigned(NFSV4_CBSLOTS - 1);
+			}
+			break;
+		default:
+			if (i == 0 && minorvers == NFSV41_MINORVERSION)
+				error = NFSERR_OPNOTINSESS;
+			else {
+				NFSCL_DEBUG(1, "unsupp callback %d\n", op);
+				error = NFSERR_NOTSUPP;
+			}
+			break;
 		};
 		if (error) {
 			if (error == EBADRPC || error == NFSERR_BADXDR) {
@@ -3145,12 +3618,14 @@
 		}
 		retops++;
 		if (nd->nd_repstat) {
-			*repp = nfscl_errmap(nd);
+			*repp = nfscl_errmap(nd, minorvers);
 			break;
 		} else
 			*repp = 0;	/* NFS4_OK */
 	}
 nfsmout:
+	if (recallp != NULL)
+		free(recallp, M_NFSLAYRECALL);
 	if (error) {
 		if (error == EBADRPC || error == NFSERR_BADXDR)
 			nd->nd_repstat = NFSERR_BADXDR;
@@ -3164,7 +3639,22 @@
 	} else {
 		*retopsp = txdr_unsigned(retops);
 	}
-	*nd->nd_errp = nfscl_errmap(nd);
+	*nd->nd_errp = nfscl_errmap(nd, minorvers);
+out:
+	if (gotseq_ok != 0) {
+		rep = m_copym(nd->nd_mreq, 0, M_COPYALL, M_WAITOK);
+		NFSLOCKCLSTATE();
+		clp = nfscl_getclntsess(sessionid);
+		if (clp != NULL) {
+			tsep = nfsmnt_mdssession(clp->nfsc_nmp);
+			nfsv4_seqsess_cacherep(slotid, tsep->nfsess_cbslots,
+			    NFSERR_OK, &rep);
+			NFSUNLOCKCLSTATE();
+		} else {
+			NFSUNLOCKCLSTATE();
+			m_freem(rep);
+		}
+	}
 }
 
 /*
@@ -3204,17 +3694,26 @@
 }
 
 /*
- * Get the mount point related to a given cbident.
+ * Get the mount point related to a given cbident or session and busy it.
  */
 static mount_t
-nfscl_getmnt(u_int32_t cbident)
+nfscl_getmnt(int minorvers, uint8_t *sessionid, u_int32_t cbident,
+    struct nfsclclient **clpp)
 {
 	struct nfsclclient *clp;
-	struct nfsmount *nmp;
+	mount_t mp;
+	int error;
+	struct nfsclsession *tsep;
 
+	*clpp = NULL;
 	NFSLOCKCLSTATE();
 	LIST_FOREACH(clp, &nfsclhead, nfsc_list) {
-		if (clp->nfsc_cbident == cbident)
+		tsep = nfsmnt_mdssession(clp->nfsc_nmp);
+		if (minorvers == NFSV4_MINORVERSION) {
+			if (clp->nfsc_cbident == cbident)
+				break;
+		} else if (!NFSBCMP(tsep->nfsess_sessionid, sessionid,
+		    NFSX_V4SESSIONID))
 			break;
 	}
 	if (clp == NULL) {
@@ -3221,12 +3720,50 @@
 		NFSUNLOCKCLSTATE();
 		return (NULL);
 	}
-	nmp = clp->nfsc_nmp;
+	mp = clp->nfsc_nmp->nm_mountp;
+	vfs_ref(mp);
 	NFSUNLOCKCLSTATE();
-	return (nmp->nm_mountp);
+	error = vfs_busy(mp, 0);
+	vfs_rel(mp);
+	if (error != 0)
+		return (NULL);
+	*clpp = clp;
+	return (mp);
 }
 
 /*
+ * Get the clientid pointer related to a given cbident.
+ */
+static struct nfsclclient *
+nfscl_getclnt(u_int32_t cbident)
+{
+	struct nfsclclient *clp;
+
+	LIST_FOREACH(clp, &nfsclhead, nfsc_list)
+		if (clp->nfsc_cbident == cbident)
+			break;
+	return (clp);
+}
+
+/*
+ * Get the clientid pointer related to a given sessionid.
+ */
+static struct nfsclclient *
+nfscl_getclntsess(uint8_t *sessionid)
+{
+	struct nfsclclient *clp;
+	struct nfsclsession *tsep;
+
+	LIST_FOREACH(clp, &nfsclhead, nfsc_list) {
+		tsep = nfsmnt_mdssession(clp->nfsc_nmp);
+		if (!NFSBCMP(tsep->nfsess_sessionid, sessionid,
+		    NFSX_V4SESSIONID))
+			break;
+	}
+	return (clp);
+}
+
+/*
  * Search for a lock conflict locally on the client. A conflict occurs if
  * - not same owner and overlapping byte range and at least one of them is
  *   a write lock or this is an unlock.
@@ -3383,8 +3920,7 @@
 	if ((dp->nfsdl_flags & NFSCLDL_WRITE) && (np->n_flag & NMODIFIED)) {
 		np->n_flag |= NDELEGRECALL;
 		NFSUNLOCKNODE(np);
-		ret = ncl_flush(vp, MNT_WAIT, cred, p, 1,
-		    called_from_renewthread);
+		ret = ncl_flush(vp, MNT_WAIT, p, 1, called_from_renewthread);
 		NFSLOCKNODE(np);
 		np->n_flag &= ~NDELEGRECALL;
 	}
@@ -3420,7 +3956,8 @@
 					ret = nfscl_moveopen(vp, clp, nmp, lop,
 					    owp, dp, cred, p);
 					if (ret == NFSERR_STALECLIENTID ||
-					    ret == NFSERR_STALEDONTRECOVER) {
+					    ret == NFSERR_STALEDONTRECOVER ||
+					    ret == NFSERR_BADSESSION) {
 						if (gotvp)
 							vrele(vp);
 						return (ret);
@@ -3444,7 +3981,7 @@
 				    M_WAITOK);
 				nfscl_newopen(clp, NULL, &owp, &nowp, &op, 
 				    NULL, lowp->nfsow_owner, dp->nfsdl_fh,
-				    dp->nfsdl_fhlen, NULL);
+				    dp->nfsdl_fhlen, NULL, NULL);
 				newnfs_copycred(&dp->nfsdl_cred, cred);
 				ret = nfscl_moveopen(vp, clp, nmp, lop,
 				    owp, dp, cred, p);
@@ -3451,7 +3988,8 @@
 				if (ret) {
 					nfscl_freeopenowner(owp, 0);
 					if (ret == NFSERR_STALECLIENTID ||
-					    ret == NFSERR_STALEDONTRECOVER) {
+					    ret == NFSERR_STALEDONTRECOVER ||
+					    ret == NFSERR_BADSESSION) {
 						if (gotvp)
 							vrele(vp);
 						return (ret);
@@ -3475,7 +4013,8 @@
 			ret = nfscl_relock(vp, clp, nmp, lp, lckp, cred, p);
 			if (ret == NFSERR_STALESTATEID ||
 			    ret == NFSERR_STALEDONTRECOVER ||
-			    ret == NFSERR_STALECLIENTID) {
+			    ret == NFSERR_STALECLIENTID ||
+			    ret == NFSERR_BADSESSION) {
 				if (gotvp)
 					vrele(vp);
 				return (ret);
@@ -3524,7 +4063,7 @@
 	    lop->nfso_fhlen - 1, M_NFSCLOPEN, M_WAITOK);
 	newone = 0;
 	nfscl_newopen(clp, NULL, &owp, NULL, &op, &nop, owp->nfsow_owner,
-	    lop->nfso_fh, lop->nfso_fhlen, &newone);
+	    lop->nfso_fh, lop->nfso_fhlen, cred, &newone);
 	ndp = dp;
 	error = nfscl_tryopen(nmp, vp, np->n_v4->n4_data, np->n_v4->n4_fhlen,
 	    lop->nfso_fh, lop->nfso_fhlen, lop->nfso_mode, op,
@@ -3533,8 +4072,6 @@
 		if (newone)
 			nfscl_freeopen(op, 0);
 	} else {
-		if (newone)
-			newnfs_copyincred(cred, &op->nfso_cred);
 		op->nfso_mode |= lop->nfso_mode;
 		op->nfso_opencnt += lop->nfso_opencnt;
 		nfscl_freeopen(lop, 1);
@@ -4210,7 +4747,7 @@
 }
 
 static int
-nfscl_errmap(struct nfsrv_descript *nd)
+nfscl_errmap(struct nfsrv_descript *nd, u_int32_t minorvers)
 {
 	short *defaulterrp, *errp;
 
@@ -4223,7 +4760,15 @@
 	if (nd->nd_repstat == NFSERR_MINORVERMISMATCH ||
 	    nd->nd_repstat == NFSERR_OPILLEGAL)
 		return (txdr_unsigned(nd->nd_repstat));
-	errp = defaulterrp = nfscl_cberrmap[nd->nd_procnum];
+	if (nd->nd_repstat >= NFSERR_BADIOMODE && nd->nd_repstat < 20000 &&
+	    minorvers > NFSV4_MINORVERSION) {
+		/* NFSv4.n error. */
+		return (txdr_unsigned(nd->nd_repstat));
+	}
+	if (nd->nd_procnum < NFSV4OP_CBNOPS)
+		errp = defaulterrp = nfscl_cberrmap[nd->nd_procnum];
+	else
+		return (txdr_unsigned(nd->nd_repstat));
 	while (*++errp)
 		if (*errp == (short)nd->nd_repstat)
 			return (txdr_unsigned(nd->nd_repstat));
@@ -4230,3 +4775,553 @@
 	return (txdr_unsigned(*defaulterrp));
 }
 
+/*
+ * Called to find/add a layout to a client.
+ * This function returns the layout with a refcnt (shared lock) upon
+ * success (returns 0) or with no lock/refcnt on the layout when an
+ * error is returned.
+ * If a layout is passed in via lypp, it is locked (exclusively locked).
+ */
+APPLESTATIC int
+nfscl_layout(struct nfsmount *nmp, vnode_t vp, u_int8_t *fhp, int fhlen,
+    nfsv4stateid_t *stateidp, int retonclose,
+    struct nfsclflayouthead *fhlp, struct nfscllayout **lypp,
+    struct ucred *cred, NFSPROC_T *p)
+{
+	struct nfsclclient *clp;
+	struct nfscllayout *lyp, *tlyp;
+	struct nfsclflayout *flp;
+	struct nfsnode *np = VTONFS(vp);
+	mount_t mp;
+	int layout_passed_in;
+
+	mp = nmp->nm_mountp;
+	layout_passed_in = 1;
+	tlyp = NULL;
+	lyp = *lypp;
+	if (lyp == NULL) {
+		layout_passed_in = 0;
+		tlyp = malloc(sizeof(*tlyp) + fhlen - 1, M_NFSLAYOUT,
+		    M_WAITOK | M_ZERO);
+	}
+
+	NFSLOCKCLSTATE();
+	clp = nmp->nm_clp;
+	if (clp == NULL) {
+		if (layout_passed_in != 0)
+			nfsv4_unlock(&lyp->nfsly_lock, 0);
+		NFSUNLOCKCLSTATE();
+		if (tlyp != NULL)
+			free(tlyp, M_NFSLAYOUT);
+		return (EPERM);
+	}
+	if (lyp == NULL) {
+		/*
+		 * Although no lyp was passed in, another thread might have
+		 * allocated one. If one is found, just increment it's ref
+		 * count and return it.
+		 */
+		lyp = nfscl_findlayout(clp, fhp, fhlen);
+		if (lyp == NULL) {
+			lyp = tlyp;
+			tlyp = NULL;
+			lyp->nfsly_stateid.seqid = stateidp->seqid;
+			lyp->nfsly_stateid.other[0] = stateidp->other[0];
+			lyp->nfsly_stateid.other[1] = stateidp->other[1];
+			lyp->nfsly_stateid.other[2] = stateidp->other[2];
+			lyp->nfsly_lastbyte = 0;
+			LIST_INIT(&lyp->nfsly_flayread);
+			LIST_INIT(&lyp->nfsly_flayrw);
+			LIST_INIT(&lyp->nfsly_recall);
+			lyp->nfsly_filesid[0] = np->n_vattr.na_filesid[0];
+			lyp->nfsly_filesid[1] = np->n_vattr.na_filesid[1];
+			lyp->nfsly_clp = clp;
+			lyp->nfsly_flags = (retonclose != 0) ?
+			    (NFSLY_FILES | NFSLY_RETONCLOSE) : NFSLY_FILES;
+			lyp->nfsly_fhlen = fhlen;
+			NFSBCOPY(fhp, lyp->nfsly_fh, fhlen);
+			TAILQ_INSERT_HEAD(&clp->nfsc_layout, lyp, nfsly_list);
+			LIST_INSERT_HEAD(NFSCLLAYOUTHASH(clp, fhp, fhlen), lyp,
+			    nfsly_hash);
+			lyp->nfsly_timestamp = NFSD_MONOSEC + 120;
+			nfscl_layoutcnt++;
+		} else {
+			if (retonclose != 0)
+				lyp->nfsly_flags |= NFSLY_RETONCLOSE;
+			TAILQ_REMOVE(&clp->nfsc_layout, lyp, nfsly_list);
+			TAILQ_INSERT_HEAD(&clp->nfsc_layout, lyp, nfsly_list);
+			lyp->nfsly_timestamp = NFSD_MONOSEC + 120;
+		}
+		nfsv4_getref(&lyp->nfsly_lock, NULL, NFSCLSTATEMUTEXPTR, mp);
+		if ((mp->mnt_kern_flag & MNTK_UNMOUNTF) != 0) {
+			NFSUNLOCKCLSTATE();
+			if (tlyp != NULL)
+				free(tlyp, M_NFSLAYOUT);
+			return (EPERM);
+		}
+		*lypp = lyp;
+	} else
+		lyp->nfsly_stateid.seqid = stateidp->seqid;
+
+	/* Merge the new list of File Layouts into the list. */
+	flp = LIST_FIRST(fhlp);
+	if (flp != NULL) {
+		if (flp->nfsfl_iomode == NFSLAYOUTIOMODE_READ)
+			nfscl_mergeflayouts(&lyp->nfsly_flayread, fhlp);
+		else
+			nfscl_mergeflayouts(&lyp->nfsly_flayrw, fhlp);
+	}
+	if (layout_passed_in != 0)
+		nfsv4_unlock(&lyp->nfsly_lock, 1);
+	NFSUNLOCKCLSTATE();
+	if (tlyp != NULL)
+		free(tlyp, M_NFSLAYOUT);
+	return (0);
+}
+
+/*
+ * Search for a layout by MDS file handle.
+ * If one is found, it is returned with a refcnt (shared lock) iff
+ * retflpp returned non-NULL and locked (exclusive locked) iff retflpp is
+ * returned NULL.
+ */
+struct nfscllayout *
+nfscl_getlayout(struct nfsclclient *clp, uint8_t *fhp, int fhlen,
+    uint64_t off, struct nfsclflayout **retflpp, int *recalledp)
+{
+	struct nfscllayout *lyp;
+	mount_t mp;
+	int error, igotlock;
+
+	mp = clp->nfsc_nmp->nm_mountp;
+	*recalledp = 0;
+	*retflpp = NULL;
+	NFSLOCKCLSTATE();
+	lyp = nfscl_findlayout(clp, fhp, fhlen);
+	if (lyp != NULL) {
+		if ((lyp->nfsly_flags & NFSLY_RECALL) == 0) {
+			TAILQ_REMOVE(&clp->nfsc_layout, lyp, nfsly_list);
+			TAILQ_INSERT_HEAD(&clp->nfsc_layout, lyp, nfsly_list);
+			lyp->nfsly_timestamp = NFSD_MONOSEC + 120;
+			error = nfscl_findlayoutforio(lyp, off,
+			    NFSV4OPEN_ACCESSREAD, retflpp);
+			if (error == 0)
+				nfsv4_getref(&lyp->nfsly_lock, NULL,
+				    NFSCLSTATEMUTEXPTR, mp);
+			else {
+				do {
+					igotlock = nfsv4_lock(&lyp->nfsly_lock,
+					    1, NULL, NFSCLSTATEMUTEXPTR, mp);
+				} while (igotlock == 0 &&
+				    (mp->mnt_kern_flag & MNTK_UNMOUNTF) == 0);
+				*retflpp = NULL;
+			}
+			if ((mp->mnt_kern_flag & MNTK_UNMOUNTF) != 0) {
+				lyp = NULL;
+				*recalledp = 1;
+			}
+		} else {
+			lyp = NULL;
+			*recalledp = 1;
+		}
+	}
+	NFSUNLOCKCLSTATE();
+	return (lyp);
+}
+
+/*
+ * Search for a layout by MDS file handle. If one is found, mark in to be
+ * recalled, if it already marked "return on close".
+ */
+static void
+nfscl_retoncloselayout(vnode_t vp, struct nfsclclient *clp, uint8_t *fhp,
+    int fhlen, struct nfsclrecalllayout **recallpp)
+{
+	struct nfscllayout *lyp;
+	uint32_t iomode;
+
+	if (vp->v_type != VREG || !NFSHASPNFS(VFSTONFS(vnode_mount(vp))) ||
+	    nfscl_enablecallb == 0 || nfs_numnfscbd == 0 ||
+	    (VTONFS(vp)->n_flag & NNOLAYOUT) != 0)
+		return;
+	lyp = nfscl_findlayout(clp, fhp, fhlen);
+	if (lyp != NULL && (lyp->nfsly_flags & (NFSLY_RETONCLOSE |
+	    NFSLY_RECALL)) == NFSLY_RETONCLOSE) {
+		iomode = 0;
+		if (!LIST_EMPTY(&lyp->nfsly_flayread))
+			iomode |= NFSLAYOUTIOMODE_READ;
+		if (!LIST_EMPTY(&lyp->nfsly_flayrw))
+			iomode |= NFSLAYOUTIOMODE_RW;
+		(void)nfscl_layoutrecall(NFSLAYOUTRETURN_FILE, lyp, iomode,
+		    0, UINT64_MAX, lyp->nfsly_stateid.seqid, *recallpp);
+		NFSCL_DEBUG(4, "retoncls recall iomode=%d\n", iomode);
+		*recallpp = NULL;
+	}
+}
+
+/*
+ * Dereference a layout.
+ */
+void
+nfscl_rellayout(struct nfscllayout *lyp, int exclocked)
+{
+
+	NFSLOCKCLSTATE();
+	if (exclocked != 0)
+		nfsv4_unlock(&lyp->nfsly_lock, 0);
+	else
+		nfsv4_relref(&lyp->nfsly_lock);
+	NFSUNLOCKCLSTATE();
+}
+
+/*
+ * Search for a devinfo by deviceid. If one is found, return it after
+ * acquiring a reference count on it.
+ */
+struct nfscldevinfo *
+nfscl_getdevinfo(struct nfsclclient *clp, uint8_t *deviceid,
+    struct nfscldevinfo *dip)
+{
+
+	NFSLOCKCLSTATE();
+	if (dip == NULL)
+		dip = nfscl_finddevinfo(clp, deviceid);
+	if (dip != NULL)
+		dip->nfsdi_refcnt++;
+	NFSUNLOCKCLSTATE();
+	return (dip);
+}
+
+/*
+ * Dereference a devinfo structure.
+ */
+static void
+nfscl_reldevinfo_locked(struct nfscldevinfo *dip)
+{
+
+	dip->nfsdi_refcnt--;
+	if (dip->nfsdi_refcnt == 0)
+		wakeup(&dip->nfsdi_refcnt);
+}
+
+/*
+ * Dereference a devinfo structure.
+ */
+void
+nfscl_reldevinfo(struct nfscldevinfo *dip)
+{
+
+	NFSLOCKCLSTATE();
+	nfscl_reldevinfo_locked(dip);
+	NFSUNLOCKCLSTATE();
+}
+
+/*
+ * Find a layout for this file handle. Return NULL upon failure.
+ */
+static struct nfscllayout *
+nfscl_findlayout(struct nfsclclient *clp, u_int8_t *fhp, int fhlen)
+{
+	struct nfscllayout *lyp;
+
+	LIST_FOREACH(lyp, NFSCLLAYOUTHASH(clp, fhp, fhlen), nfsly_hash)
+		if (lyp->nfsly_fhlen == fhlen &&
+		    !NFSBCMP(lyp->nfsly_fh, fhp, fhlen))
+			break;
+	return (lyp);
+}
+
+/*
+ * Find a devinfo for this deviceid. Return NULL upon failure.
+ */
+static struct nfscldevinfo *
+nfscl_finddevinfo(struct nfsclclient *clp, uint8_t *deviceid)
+{
+	struct nfscldevinfo *dip;
+
+	LIST_FOREACH(dip, &clp->nfsc_devinfo, nfsdi_list)
+		if (NFSBCMP(dip->nfsdi_deviceid, deviceid, NFSX_V4DEVICEID)
+		    == 0)
+			break;
+	return (dip);
+}
+
+/*
+ * Merge the new file layout list into the main one, maintaining it in
+ * increasing offset order.
+ */
+static void
+nfscl_mergeflayouts(struct nfsclflayouthead *fhlp,
+    struct nfsclflayouthead *newfhlp)
+{
+	struct nfsclflayout *flp, *nflp, *prevflp, *tflp;
+
+	flp = LIST_FIRST(fhlp);
+	prevflp = NULL;
+	LIST_FOREACH_SAFE(nflp, newfhlp, nfsfl_list, tflp) {
+		while (flp != NULL && flp->nfsfl_off < nflp->nfsfl_off) {
+			prevflp = flp;
+			flp = LIST_NEXT(flp, nfsfl_list);
+		}
+		if (prevflp == NULL)
+			LIST_INSERT_HEAD(fhlp, nflp, nfsfl_list);
+		else
+			LIST_INSERT_AFTER(prevflp, nflp, nfsfl_list);
+		prevflp = nflp;
+	}
+}
+
+/*
+ * Add this nfscldevinfo to the client, if it doesn't already exist.
+ * This function consumes the structure pointed at by dip, if not NULL.
+ */
+APPLESTATIC int
+nfscl_adddevinfo(struct nfsmount *nmp, struct nfscldevinfo *dip,
+    struct nfsclflayout *flp)
+{
+	struct nfsclclient *clp;
+	struct nfscldevinfo *tdip;
+
+	NFSLOCKCLSTATE();
+	clp = nmp->nm_clp;
+	if (clp == NULL) {
+		NFSUNLOCKCLSTATE();
+		if (dip != NULL)
+			free(dip, M_NFSDEVINFO);
+		return (ENODEV);
+	}
+	tdip = nfscl_finddevinfo(clp, flp->nfsfl_dev);
+	if (tdip != NULL) {
+		tdip->nfsdi_layoutrefs++;
+		flp->nfsfl_devp = tdip;
+		nfscl_reldevinfo_locked(tdip);
+		NFSUNLOCKCLSTATE();
+		if (dip != NULL)
+			free(dip, M_NFSDEVINFO);
+		return (0);
+	}
+	if (dip != NULL) {
+		LIST_INSERT_HEAD(&clp->nfsc_devinfo, dip, nfsdi_list);
+		dip->nfsdi_layoutrefs = 1;
+		flp->nfsfl_devp = dip;
+	}
+	NFSUNLOCKCLSTATE();
+	if (dip == NULL)
+		return (ENODEV);
+	return (0);
+}
+
+/*
+ * Free up a layout structure and associated file layout structure(s).
+ */
+APPLESTATIC void
+nfscl_freelayout(struct nfscllayout *layp)
+{
+	struct nfsclflayout *flp, *nflp;
+	struct nfsclrecalllayout *rp, *nrp;
+
+	LIST_FOREACH_SAFE(flp, &layp->nfsly_flayread, nfsfl_list, nflp) {
+		LIST_REMOVE(flp, nfsfl_list);
+		nfscl_freeflayout(flp);
+	}
+	LIST_FOREACH_SAFE(flp, &layp->nfsly_flayrw, nfsfl_list, nflp) {
+		LIST_REMOVE(flp, nfsfl_list);
+		nfscl_freeflayout(flp);
+	}
+	LIST_FOREACH_SAFE(rp, &layp->nfsly_recall, nfsrecly_list, nrp) {
+		LIST_REMOVE(rp, nfsrecly_list);
+		free(rp, M_NFSLAYRECALL);
+	}
+	nfscl_layoutcnt--;
+	free(layp, M_NFSLAYOUT);
+}
+
+/*
+ * Free up a file layout structure.
+ */
+APPLESTATIC void
+nfscl_freeflayout(struct nfsclflayout *flp)
+{
+	int i;
+
+	for (i = 0; i < flp->nfsfl_fhcnt; i++)
+		free(flp->nfsfl_fh[i], M_NFSFH);
+	if (flp->nfsfl_devp != NULL)
+		flp->nfsfl_devp->nfsdi_layoutrefs--;
+	free(flp, M_NFSFLAYOUT);
+}
+
+/*
+ * Free up a file layout devinfo structure.
+ */
+APPLESTATIC void
+nfscl_freedevinfo(struct nfscldevinfo *dip)
+{
+
+	free(dip, M_NFSDEVINFO);
+}
+
+/*
+ * Mark any layouts that match as recalled.
+ */
+static int
+nfscl_layoutrecall(int recalltype, struct nfscllayout *lyp, uint32_t iomode,
+    uint64_t off, uint64_t len, uint32_t stateseqid,
+    struct nfsclrecalllayout *recallp)
+{
+	struct nfsclrecalllayout *rp, *orp;
+
+	recallp->nfsrecly_recalltype = recalltype;
+	recallp->nfsrecly_iomode = iomode;
+	recallp->nfsrecly_stateseqid = stateseqid;
+	recallp->nfsrecly_off = off;
+	recallp->nfsrecly_len = len;
+	/*
+	 * Order the list as file returns first, followed by fsid and any
+	 * returns, both in increasing stateseqid order.
+	 * Note that the seqids wrap around, so 1 is after 0xffffffff.
+	 * (I'm not sure this is correct because I find RFC5661 confusing
+	 *  on this, but hopefully it will work ok.)
+	 */
+	orp = NULL;
+	LIST_FOREACH(rp, &lyp->nfsly_recall, nfsrecly_list) {
+		orp = rp;
+		if ((recalltype == NFSLAYOUTRETURN_FILE &&
+		     (rp->nfsrecly_recalltype != NFSLAYOUTRETURN_FILE ||
+		      nfscl_seq(stateseqid, rp->nfsrecly_stateseqid) != 0)) ||
+		    (recalltype != NFSLAYOUTRETURN_FILE &&
+		     rp->nfsrecly_recalltype != NFSLAYOUTRETURN_FILE &&
+		     nfscl_seq(stateseqid, rp->nfsrecly_stateseqid) != 0)) {
+			LIST_INSERT_BEFORE(rp, recallp, nfsrecly_list);
+			break;
+		}
+	}
+	if (rp == NULL) {
+		if (orp == NULL)
+			LIST_INSERT_HEAD(&lyp->nfsly_recall, recallp,
+			    nfsrecly_list);
+		else
+			LIST_INSERT_AFTER(orp, recallp, nfsrecly_list);
+	}
+	lyp->nfsly_flags |= NFSLY_RECALL;
+	return (0);
+}
+
+/*
+ * Compare the two seqids for ordering. The trick is that the seqids can
+ * wrap around from 0xffffffff->0, so check for the cases where one
+ * has wrapped around.
+ * Return 1 if seqid1 comes before seqid2, 0 otherwise.
+ */
+static int
+nfscl_seq(uint32_t seqid1, uint32_t seqid2)
+{
+
+	if (seqid2 > seqid1 && (seqid2 - seqid1) >= 0x7fffffff)
+		/* seqid2 has wrapped around. */
+		return (0);
+	if (seqid1 > seqid2 && (seqid1 - seqid2) >= 0x7fffffff)
+		/* seqid1 has wrapped around. */
+		return (1);
+	if (seqid1 <= seqid2)
+		return (1);
+	return (0);
+}
+
+/*
+ * Do a layout return for each of the recalls.
+ */
+static void
+nfscl_layoutreturn(struct nfsmount *nmp, struct nfscllayout *lyp,
+    struct ucred *cred, NFSPROC_T *p)
+{
+	struct nfsclrecalllayout *rp;
+	nfsv4stateid_t stateid;
+
+	NFSBCOPY(lyp->nfsly_stateid.other, stateid.other, NFSX_STATEIDOTHER);
+	stateid.seqid = lyp->nfsly_stateid.seqid;
+	LIST_FOREACH(rp, &lyp->nfsly_recall, nfsrecly_list) {
+		(void)nfsrpc_layoutreturn(nmp, lyp->nfsly_fh,
+		    lyp->nfsly_fhlen, 0, NFSLAYOUT_NFSV4_1_FILES,
+		    rp->nfsrecly_iomode, rp->nfsrecly_recalltype,
+		    rp->nfsrecly_off, rp->nfsrecly_len,
+		    &stateid, 0, NULL, cred, p, NULL);
+	}
+}
+
+/*
+ * Do the layout commit for a file layout.
+ */
+static void
+nfscl_dolayoutcommit(struct nfsmount *nmp, struct nfscllayout *lyp,
+    struct ucred *cred, NFSPROC_T *p)
+{
+	struct nfsclflayout *flp;
+	uint64_t len;
+	int error;
+
+	LIST_FOREACH(flp, &lyp->nfsly_flayrw, nfsfl_list) {
+		if (flp->nfsfl_off <= lyp->nfsly_lastbyte) {
+			len = flp->nfsfl_end - flp->nfsfl_off;
+			error = nfsrpc_layoutcommit(nmp, lyp->nfsly_fh,
+			    lyp->nfsly_fhlen, 0, flp->nfsfl_off, len,
+			    lyp->nfsly_lastbyte, &lyp->nfsly_stateid,
+			    NFSLAYOUT_NFSV4_1_FILES, 0, NULL, cred, p, NULL);
+			NFSCL_DEBUG(4, "layoutcommit err=%d\n", error);
+			if (error == NFSERR_NOTSUPP) {
+				/* If not supported, don't bother doing it. */
+				NFSLOCKMNT(nmp);
+				nmp->nm_state |= NFSSTA_NOLAYOUTCOMMIT;
+				NFSUNLOCKMNT(nmp);
+				break;
+			}
+		}
+	}
+}
+
+/*
+ * Commit all layouts for a file (vnode).
+ */
+int
+nfscl_layoutcommit(vnode_t vp, NFSPROC_T *p)
+{
+	struct nfsclclient *clp;
+	struct nfscllayout *lyp;
+	struct nfsnode *np = VTONFS(vp);
+	mount_t mp;
+	struct nfsmount *nmp;
+
+	mp = vnode_mount(vp);
+	nmp = VFSTONFS(mp);
+	if (NFSHASNOLAYOUTCOMMIT(nmp))
+		return (0);
+	NFSLOCKCLSTATE();
+	clp = nmp->nm_clp;
+	if (clp == NULL) {
+		NFSUNLOCKCLSTATE();
+		return (EPERM);
+	}
+	lyp = nfscl_findlayout(clp, np->n_fhp->nfh_fh, np->n_fhp->nfh_len);
+	if (lyp == NULL) {
+		NFSUNLOCKCLSTATE();
+		return (EPERM);
+	}
+	nfsv4_getref(&lyp->nfsly_lock, NULL, NFSCLSTATEMUTEXPTR, mp);
+	if ((mp->mnt_kern_flag & MNTK_UNMOUNTF) != 0) {
+		NFSUNLOCKCLSTATE();
+		return (EPERM);
+	}
+tryagain:
+	if ((lyp->nfsly_flags & NFSLY_WRITTEN) != 0) {
+		lyp->nfsly_flags &= ~NFSLY_WRITTEN;
+		NFSUNLOCKCLSTATE();
+		NFSCL_DEBUG(4, "do layoutcommit2\n");
+		nfscl_dolayoutcommit(clp->nfsc_nmp, lyp, NFSPROCCRED(p), p);
+		NFSLOCKCLSTATE();
+		goto tryagain;
+	}
+	nfsv4_relref(&lyp->nfsly_lock);
+	NFSUNLOCKCLSTATE();
+	return (0);
+}
+

Modified: trunk/sys/fs/nfsclient/nfs_clsubs.c
===================================================================
--- trunk/sys/fs/nfsclient/nfs_clsubs.c	2018-05-27 22:18:11 UTC (rev 10025)
+++ trunk/sys/fs/nfsclient/nfs_clsubs.c	2018-05-27 22:18:25 UTC (rev 10026)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * Copyright (c) 1989, 1993
  *	The Regents of the University of California.  All rights reserved.
@@ -33,7 +34,7 @@
  */
 
 #include <sys/cdefs.h>
-__MBSDID("$MidnightBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/fs/nfsclient/nfs_clsubs.c 306663 2016-10-03 23:17:57Z rmacklem $");
 
 #include "opt_kdtrace.h"
 
@@ -163,18 +164,6 @@
   	}
 }
 
-void
-ncl_printf(const char *fmt, ...)
-{
-	va_list ap;
-
-	mtx_lock(&Giant);
-	va_start(ap, fmt);
-	vprintf(fmt, ap);
-	va_end(ap);
-	mtx_unlock(&Giant);
-}
-
 #ifdef NFS_ACDEBUG
 #include <sys/sysctl.h>
 SYSCTL_DECL(_vfs_nfs);
@@ -199,9 +188,6 @@
 	vap = &np->n_vattr.na_vattr;
 	nmp = VFSTONFS(vp->v_mount);
 	mustflush = nfscl_mustflush(vp);	/* must be before mtx_lock() */
-#ifdef NFS_ACDEBUG
-	mtx_lock(&Giant);	/* ncl_printf() */
-#endif
 	mtx_lock(&np->n_mtx);
 	/* XXX n_mtime doesn't seem to be updated on a miss-and-reload */
 	timeo = (time_second - np->n_mtime.tv_sec) / 10;
@@ -208,7 +194,7 @@
 
 #ifdef NFS_ACDEBUG
 	if (nfs_acdebug>1)
-		ncl_printf("nfs_getattrcache: initial timeo = %d\n", timeo);
+		printf("ncl_getattrcache: initial timeo = %d\n", timeo);
 #endif
 
 	if (vap->va_type == VDIR) {
@@ -225,13 +211,13 @@
 
 #ifdef NFS_ACDEBUG
 	if (nfs_acdebug > 2)
-		ncl_printf("acregmin %d; acregmax %d; acdirmin %d; acdirmax %d\n",
-			   nmp->nm_acregmin, nmp->nm_acregmax,
-			   nmp->nm_acdirmin, nmp->nm_acdirmax);
+		printf("acregmin %d; acregmax %d; acdirmin %d; acdirmax %d\n",
+		    nmp->nm_acregmin, nmp->nm_acregmax,
+		    nmp->nm_acdirmin, nmp->nm_acdirmax);
 
 	if (nfs_acdebug)
-		ncl_printf("nfs_getattrcache: age = %d; final timeo = %d\n",
-			   (time_second - np->n_attrstamp), timeo);
+		printf("ncl_getattrcache: age = %d; final timeo = %d\n",
+		    (time_second - np->n_attrstamp), timeo);
 #endif
 
 	if ((time_second - np->n_attrstamp) >= timeo &&
@@ -238,9 +224,6 @@
 	    (mustflush != 0 || np->n_attrstamp == 0)) {
 		newnfsstats.attrcache_misses++;
 		mtx_unlock(&np->n_mtx);
-#ifdef NFS_ACDEBUG
-		mtx_unlock(&Giant);	/* ncl_printf() */
-#endif
 		KDTRACE_NFS_ATTRCACHE_GET_MISS(vp);
 		return( ENOENT);
 	}
@@ -268,9 +251,6 @@
 			vaper->va_mtime = np->n_mtim;
 	}
 	mtx_unlock(&np->n_mtx);
-#ifdef NFS_ACDEBUG
-	mtx_unlock(&Giant);	/* ncl_printf() */
-#endif
 	KDTRACE_NFS_ATTRCACHE_GET_HIT(vp, vap);
 	return (0);
 }

Modified: trunk/sys/fs/nfsclient/nfs_clvfsops.c
===================================================================
--- trunk/sys/fs/nfsclient/nfs_clvfsops.c	2018-05-27 22:18:11 UTC (rev 10025)
+++ trunk/sys/fs/nfsclient/nfs_clvfsops.c	2018-05-27 22:18:25 UTC (rev 10026)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * Copyright (c) 1989, 1993, 1995
  *	The Regents of the University of California.  All rights reserved.
@@ -33,7 +34,7 @@
  */
 
 #include <sys/cdefs.h>
-__MBSDID("$MidnightBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/fs/nfsclient/nfs_clvfsops.c 317975 2017-05-08 19:57:43Z rmacklem $");
 
 
 #include "opt_bootp.h"
@@ -80,9 +81,11 @@
 extern struct timeval nfsboottime;
 extern struct nfsstats	newnfsstats;
 extern int nfsrv_useacl;
+extern int nfscl_debuglevel;
 extern enum nfsiod_state ncl_iodwant[NFS_MAXASYNCDAEMON];
 extern struct nfsmount *ncl_iodmount[NFS_MAXASYNCDAEMON];
 extern struct mtx ncl_iod_mutex;
+NFSCLSTATEMUTEX;
 
 MALLOC_DEFINE(M_NEWNFSREQ, "newnfsclient_req", "New NFS request header");
 MALLOC_DEFINE(M_NEWNFSMNT, "newnfsmnt", "New NFS mount struct");
@@ -107,7 +110,7 @@
 static int	mountnfs(struct nfs_args *, struct mount *,
 		    struct sockaddr *, char *, u_char *, int, u_char *, int,
 		    u_char *, int, struct vnode **, struct ucred *,
-		    struct thread *, int, int);
+		    struct thread *, int, int, int);
 static void	nfs_getnlminfo(struct vnode *, uint8_t *, size_t *,
 		    struct sockaddr_storage *, int *, off_t *,
 		    struct timeval *);
@@ -118,6 +121,7 @@
 static vfs_statfs_t nfs_statfs;
 static vfs_sync_t nfs_sync;
 static vfs_sysctl_t nfs_sysctl;
+static vfs_purge_t nfs_purge;
 
 /*
  * nfs vfs operations.
@@ -132,6 +136,7 @@
 	.vfs_uninit =		ncl_uninit,
 	.vfs_unmount =		nfs_unmount,
 	.vfs_sysctl =		nfs_sysctl,
+	.vfs_purge =		nfs_purge,
 };
 VFS_SET(nfs_vfsops, nfs, VFCF_NETWORK | VFCF_SBDRY);
 
@@ -192,8 +197,8 @@
 	}
 	if (nmp->nm_rsize > maxio || nmp->nm_rsize == 0)
 		nmp->nm_rsize = maxio;
-	if (nmp->nm_rsize > MAXBSIZE)
-		nmp->nm_rsize = MAXBSIZE;
+	if (nmp->nm_rsize > NFS_MAXBSIZE)
+		nmp->nm_rsize = NFS_MAXBSIZE;
 	if (nmp->nm_readdirsize > maxio || nmp->nm_readdirsize == 0)
 		nmp->nm_readdirsize = maxio;
 	if (nmp->nm_readdirsize > nmp->nm_rsize)
@@ -200,17 +205,19 @@
 		nmp->nm_readdirsize = nmp->nm_rsize;
 	if (nmp->nm_wsize > maxio || nmp->nm_wsize == 0)
 		nmp->nm_wsize = maxio;
-	if (nmp->nm_wsize > MAXBSIZE)
-		nmp->nm_wsize = MAXBSIZE;
+	if (nmp->nm_wsize > NFS_MAXBSIZE)
+		nmp->nm_wsize = NFS_MAXBSIZE;
 
 	/*
 	 * Calculate the size used for io buffers.  Use the larger
 	 * of the two sizes to minimise nfs requests but make sure
 	 * that it is at least one VM page to avoid wasting buffer
-	 * space.
+	 * space.  It must also be at least NFS_DIRBLKSIZ, since
+	 * that is the buffer size used for directories.
 	 */
 	iosize = imax(nmp->nm_rsize, nmp->nm_wsize);
 	iosize = imax(iosize, PAGE_SIZE);
+	iosize = imax(iosize, NFS_DIRBLKSIZ);
 	nmp->nm_mountp->mnt_stat.f_iosize = iosize;
 	return (iosize);
 }
@@ -299,9 +306,11 @@
 	if (!error)
 		error = nfsrpc_statfs(vp, &sb, &fs, td->td_ucred, td, &nfsva,
 		    &attrflag, NULL);
+	if (error != 0)
+		NFSCL_DEBUG(2, "statfs=%d\n", error);
 	if (attrflag == 0) {
 		ret = nfsrpc_getattrnovp(nmp, nmp->nm_fh, nmp->nm_fhsize, 1,
-		    td->td_ucred, td, &nfsva, NULL);
+		    td->td_ucred, td, &nfsva, NULL, NULL);
 		if (ret) {
 			/*
 			 * Just set default values to get things going.
@@ -524,7 +533,7 @@
 	nam = sodupsockaddr((struct sockaddr *)sin, M_WAITOK);
 	if ((error = mountnfs(args, mp, nam, path, NULL, 0, dirpath, dirlen,
 	    NULL, 0, vpp, td->td_ucred, td, NFS_DEFAULT_NAMETIMEO, 
-	    NFS_DEFAULT_NEGNAMETIMEO)) != 0) {
+	    NFS_DEFAULT_NEGNAMETIMEO, 0)) != 0) {
 		printf("nfs_mountroot: mount %s on /: %d\n", path, error);
 		return (error);
 	}
@@ -588,10 +597,10 @@
 		nmp->nm_flag &= ~NFSMNT_RDIRPLUS;
 	}
 
-	/* Clear NFSMNT_RESVPORT for NFSv4, since it is not required. */
-	if ((argp->flags & NFSMNT_NFSV4) != 0) {
-		argp->flags &= ~NFSMNT_RESVPORT;
-		nmp->nm_flag &= ~NFSMNT_RESVPORT;
+	/* Clear ONEOPENOWN for NFSv2, 3 and 4.0. */
+	if (nmp->nm_minorvers == 0) {
+		argp->flags &= ~NFSMNT_ONEOPENOWN;
+		nmp->nm_flag &= ~NFSMNT_ONEOPENOWN;
 	}
 
 	/* Re-bind if rsrvd port requested and wasn't on one */
@@ -621,17 +630,27 @@
 
 	if ((argp->flags & NFSMNT_WSIZE) && argp->wsize > 0) {
 		nmp->nm_wsize = argp->wsize;
-		/* Round down to multiple of blocksize */
-		nmp->nm_wsize &= ~(NFS_FABLKSIZE - 1);
-		if (nmp->nm_wsize <= 0)
+		/*
+		 * Clip at the power of 2 below the size. There is an
+		 * issue (not isolated) that causes intermittent page
+		 * faults if this is not done.
+		 */
+		if (nmp->nm_wsize > NFS_FABLKSIZE)
+			nmp->nm_wsize = 1 << (fls(nmp->nm_wsize) - 1);
+		else
 			nmp->nm_wsize = NFS_FABLKSIZE;
 	}
 
 	if ((argp->flags & NFSMNT_RSIZE) && argp->rsize > 0) {
 		nmp->nm_rsize = argp->rsize;
-		/* Round down to multiple of blocksize */
-		nmp->nm_rsize &= ~(NFS_FABLKSIZE - 1);
-		if (nmp->nm_rsize <= 0)
+		/*
+		 * Clip at the power of 2 below the size. There is an
+		 * issue (not isolated) that causes intermittent page
+		 * faults if this is not done.
+		 */
+		if (nmp->nm_rsize > NFS_FABLKSIZE)
+			nmp->nm_rsize = 1 << (fls(nmp->nm_rsize) - 1);
+		else
 			nmp->nm_rsize = NFS_FABLKSIZE;
 	}
 
@@ -712,14 +731,15 @@
 }
 
 static const char *nfs_opts[] = { "from", "nfs_args",
-    "noatime", "noexec", "suiddir", "nosuid", "nosymfollow", "union",
+    "noac", "noatime", "noexec", "suiddir", "nosuid", "nosymfollow", "union",
     "noclusterr", "noclusterw", "multilabel", "acls", "force", "update",
     "async", "noconn", "nolockd", "conn", "lockd", "intr", "rdirplus",
     "readdirsize", "soft", "hard", "mntudp", "tcp", "udp", "wsize", "rsize",
-    "retrans", "acregmin", "acregmax", "acdirmin", "acdirmax", "resvport",
-    "readahead", "hostname", "timeout", "addr", "fh", "nfsv3", "sec",
-    "principal", "nfsv4", "gssname", "allgssname", "dirpath",
-    "nametimeo", "negnametimeo", "nocto", "wcommitsize",
+    "retrans", "actimeo", "acregmin", "acregmax", "acdirmin", "acdirmax",
+    "resvport", "readahead", "hostname", "timeo", "timeout", "addr", "fh",
+    "nfsv3", "sec", "principal", "nfsv4", "gssname", "allgssname", "dirpath",
+    "minorversion", "nametimeo", "negnametimeo", "nocto", "noncontigwr",
+    "pnfs", "wcommitsize", "oneopenown",
     NULL };
 
 /*
@@ -763,9 +783,10 @@
 	struct thread *td;
 	char hst[MNAMELEN];
 	u_char nfh[NFSX_FHMAX], krbname[100], dirpath[100], srvkrbname[100];
-	char *opt, *name, *secname;
+	char *cp, *opt, *name, *secname;
 	int nametimeo = NFS_DEFAULT_NAMETIMEO;
 	int negnametimeo = NFS_DEFAULT_NEGNAMETIMEO;
+	int minvers = 0;
 	int dirlen, has_nfs_args_opt, krbnamelen, srvkrbnamelen;
 	size_t hstlen;
 
@@ -803,10 +824,16 @@
 	}
 
 	/* Handle the new style options. */
+	if (vfs_getopt(mp->mnt_optnew, "noac", NULL, NULL) == 0) {
+		args.acdirmin = args.acdirmax =
+		    args.acregmin = args.acregmax = 0;
+		args.flags |= NFSMNT_ACDIRMIN | NFSMNT_ACDIRMAX |
+		    NFSMNT_ACREGMIN | NFSMNT_ACREGMAX;
+	}
 	if (vfs_getopt(mp->mnt_optnew, "noconn", NULL, NULL) == 0)
 		args.flags |= NFSMNT_NOCONN;
 	if (vfs_getopt(mp->mnt_optnew, "conn", NULL, NULL) == 0)
-		args.flags |= NFSMNT_NOCONN;
+		args.flags &= ~NFSMNT_NOCONN;
 	if (vfs_getopt(mp->mnt_optnew, "nolockd", NULL, NULL) == 0)
 		args.flags |= NFSMNT_NOLOCKD;
 	if (vfs_getopt(mp->mnt_optnew, "lockd", NULL, NULL) == 0)
@@ -839,6 +866,12 @@
 		args.flags |= NFSMNT_ALLGSSNAME;
 	if (vfs_getopt(mp->mnt_optnew, "nocto", NULL, NULL) == 0)
 		args.flags |= NFSMNT_NOCTO;
+	if (vfs_getopt(mp->mnt_optnew, "noncontigwr", NULL, NULL) == 0)
+		args.flags |= NFSMNT_NONCONTIGWR;
+	if (vfs_getopt(mp->mnt_optnew, "pnfs", NULL, NULL) == 0)
+		args.flags |= NFSMNT_PNFS;
+	if (vfs_getopt(mp->mnt_optnew, "oneopenown", NULL, NULL) == 0)
+		args.flags |= NFSMNT_ONEOPENOWN;
 	if (vfs_getopt(mp->mnt_optnew, "readdirsize", (void **)&opt, NULL) == 0) {
 		if (opt == NULL) { 
 			vfs_mount_error(mp, "illegal readdirsize");
@@ -914,6 +947,18 @@
 		}
 		args.flags |= NFSMNT_RETRANS;
 	}
+	if (vfs_getopt(mp->mnt_optnew, "actimeo", (void **)&opt, NULL) == 0) {
+		ret = sscanf(opt, "%d", &args.acregmin);
+		if (ret != 1 || args.acregmin < 0) {
+			vfs_mount_error(mp, "illegal actimeo: %s",
+			    opt);
+			error = EINVAL;
+			goto out;
+		}
+		args.acdirmin = args.acdirmax = args.acregmax = args.acregmin;
+		args.flags |= NFSMNT_ACDIRMIN | NFSMNT_ACDIRMAX |
+		    NFSMNT_ACREGMIN | NFSMNT_ACREGMAX;
+	}
 	if (vfs_getopt(mp->mnt_optnew, "acregmin", (void **)&opt, NULL) == 0) {
 		ret = sscanf(opt, "%d", &args.acregmin);
 		if (ret != 1 || args.acregmin < 0) {
@@ -963,6 +1008,16 @@
 		}
 		args.flags |= NFSMNT_WCOMMITSIZE;
 	}
+	if (vfs_getopt(mp->mnt_optnew, "timeo", (void **)&opt, NULL) == 0) {
+		ret = sscanf(opt, "%d", &args.timeo);
+		if (ret != 1 || args.timeo <= 0) {
+			vfs_mount_error(mp, "illegal timeo: %s",
+			    opt);
+			error = EINVAL;
+			goto out;
+		}
+		args.flags |= NFSMNT_TIMEO;
+	}
 	if (vfs_getopt(mp->mnt_optnew, "timeout", (void **)&opt, NULL) == 0) {
 		ret = sscanf(opt, "%d", &args.timeo);
 		if (ret != 1 || args.timeo <= 0) {
@@ -991,6 +1046,16 @@
 			goto out;
 		}
 	}
+	if (vfs_getopt(mp->mnt_optnew, "minorversion", (void **)&opt, NULL) ==
+	    0) {
+		ret = sscanf(opt, "%d", &minvers);
+		if (ret != 1 || minvers < 0 || minvers > 1 ||
+		    (args.flags & NFSMNT_NFSV4) == 0) {
+			vfs_mount_error(mp, "illegal minorversion: %s", opt);
+			error = EINVAL;
+			goto out;
+		}
+	}
 	if (vfs_getopt(mp->mnt_optnew, "sec",
 		(void **) &secname, NULL) == 0)
 		nfs_sec_name(secname, &args.flags);
@@ -1017,8 +1082,8 @@
 
 		/*
 		 * When doing an update, we can't change version,
-		 * security, switch lockd strategies or change cookie
-		 * translation
+		 * security, switch lockd strategies, change cookie
+		 * translation or switch oneopenown.
 		 */
 		args.flags = (args.flags &
 		    ~(NFSMNT_NFSV3 |
@@ -1026,6 +1091,7 @@
 		      NFSMNT_KERB |
 		      NFSMNT_INTEGRITY |
 		      NFSMNT_PRIVACY |
+		      NFSMNT_ONEOPENOWN |
 		      NFSMNT_NOLOCKD /*|NFSMNT_XLATECOOKIE*/)) |
 		    (nmp->nm_flag &
 			(NFSMNT_NFSV3 |
@@ -1033,6 +1099,7 @@
 			 NFSMNT_KERB |
 			 NFSMNT_INTEGRITY |
 			 NFSMNT_PRIVACY |
+			 NFSMNT_ONEOPENOWN |
 			 NFSMNT_NOLOCKD /*|NFSMNT_XLATECOOKIE*/));
 		nfs_decode_args(mp, nmp, &args, NULL, td->td_ucred, td);
 		goto out;
@@ -1093,14 +1160,23 @@
 			error = EINVAL;
 			goto out;
 		}
-		bcopy(args.hostname, hst, MNAMELEN);
-		hst[MNAMELEN - 1] = '\0';
+		if (len >= MNAMELEN) {
+			vfs_mount_error(mp, "Hostname too long");
+			error = EINVAL;
+			goto out;
+		}
+		bcopy(args.hostname, hst, len);
+		hst[len] = '\0';
 	}
 
 	if (vfs_getopt(mp->mnt_optnew, "principal", (void **)&name, NULL) == 0)
 		strlcpy(srvkrbname, name, sizeof (srvkrbname));
-	else
+	else {
 		snprintf(srvkrbname, sizeof (srvkrbname), "nfs@%s", hst);
+		cp = strchr(srvkrbname, ':');
+		if (cp != NULL)
+			*cp = '\0';
+	}
 	srvkrbnamelen = strlen(srvkrbname);
 
 	if (vfs_getopt(mp->mnt_optnew, "gssname", (void **)&name, NULL) == 0)
@@ -1135,12 +1211,14 @@
 	args.fh = nfh;
 	error = mountnfs(&args, mp, nam, hst, krbname, krbnamelen, dirpath,
 	    dirlen, srvkrbname, srvkrbnamelen, &vp, td->td_ucred, td,
-	    nametimeo, negnametimeo);
+	    nametimeo, negnametimeo, minvers);
 out:
 	if (!error) {
 		MNT_ILOCK(mp);
-		mp->mnt_kern_flag |= MNTK_MPSAFE | MNTK_LOOKUP_SHARED |
-		    MNTK_NO_IOPF;
+		mp->mnt_kern_flag |= MNTK_LOOKUP_SHARED | MNTK_NO_IOPF |
+		    MNTK_USES_BCACHE;
+		if ((VFSTONFS(mp)->nm_flag & NFSMNT_NFSV4) != 0)
+			mp->mnt_kern_flag |= MNTK_NULL_NOCACHE;
 		MNT_IUNLOCK(mp);
 	}
 	return (error);
@@ -1180,14 +1258,20 @@
 mountnfs(struct nfs_args *argp, struct mount *mp, struct sockaddr *nam,
     char *hst, u_char *krbname, int krbnamelen, u_char *dirpath, int dirlen,
     u_char *srvkrbname, int srvkrbnamelen, struct vnode **vpp,
-    struct ucred *cred, struct thread *td, int nametimeo, int negnametimeo)
+    struct ucred *cred, struct thread *td, int nametimeo, int negnametimeo,
+    int minvers)
 {
 	struct nfsmount *nmp;
 	struct nfsnode *np;
 	int error, trycnt, ret;
 	struct nfsvattr nfsva;
+	struct nfsclclient *clp;
+	struct nfsclds *dsp, *tdsp;
+	uint32_t lease;
 	static u_int64_t clval = 0;
 
+	NFSCL_DEBUG(3, "in mnt\n");
+	clp = NULL;
 	if (mp->mnt_flag & MNT_UPDATE) {
 		nmp = VFSTONFS(mp);
 		printf("%s: MNT_UPDATE is no longer handled here\n", __func__);
@@ -1198,6 +1282,7 @@
 		    krbnamelen + dirlen + srvkrbnamelen + 2,
 		    M_NEWNFSMNT, M_WAITOK | M_ZERO);
 		TAILQ_INIT(&nmp->nm_bufq);
+		TAILQ_INIT(&nmp->nm_sess);
 		if (clval == 0)
 			clval = (u_int64_t)nfsboottime.tv_sec;
 		nmp->nm_clval = clval++;
@@ -1259,10 +1344,17 @@
 	nmp->nm_timeo = NFS_TIMEO;
 	nmp->nm_retry = NFS_RETRANS;
 	nmp->nm_readahead = NFS_DEFRAHEAD;
-	if (desiredvnodes >= 11000)
-		nmp->nm_wcommitsize = hibufspace / (desiredvnodes / 1000);
+
+	/* This is empirical approximation of sqrt(hibufspace) * 256. */
+	nmp->nm_wcommitsize = NFS_MAXBSIZE / 256;
+	while ((long)nmp->nm_wcommitsize * nmp->nm_wcommitsize < hibufspace)
+		nmp->nm_wcommitsize *= 2;
+	nmp->nm_wcommitsize *= 256;
+
+	if ((argp->flags & NFSMNT_NFSV4) != 0)
+		nmp->nm_minorvers = minvers;
 	else
-		nmp->nm_wcommitsize = hibufspace / 10;
+		nmp->nm_minorvers = 0;
 
 	nfs_decode_args(mp, nmp, argp, hst, cred, td);
 
@@ -1273,8 +1365,6 @@
 	 *
 	 * For V3, ncl_fsinfo will adjust this as necessary.  Assume maximum
 	 * that we can handle until we find out otherwise.
-	 * XXX Our "safe" limit on the client is what we can store in our
-	 * buffer cache using signed(!) block numbers.
 	 */
 	if ((argp->flags & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0)
 		nmp->nm_maxfilesize = 0xffffffffLL;
@@ -1312,17 +1402,18 @@
 
 	if ((error = newnfs_connect(nmp, &nmp->nm_sockreq, cred, td, 0)))
 		goto bad;
+	/* For NFSv4.1, get the clientid now. */
+	if (nmp->nm_minorvers > 0) {
+		NFSCL_DEBUG(3, "at getcl\n");
+		error = nfscl_getcl(mp, cred, td, 0, &clp);
+		NFSCL_DEBUG(3, "aft getcl=%d\n", error);
+		if (error != 0)
+			goto bad;
+	}
 
-	/*
-	 * A reference count is needed on the nfsnode representing the
-	 * remote root.  If this object is not persistent, then backward
-	 * traversals of the mount point (i.e. "..") will not work if
-	 * the nfsnode gets flushed out of the cache. Ufs does not have
-	 * this problem, because one can identify root inodes by their
-	 * number == ROOTINO (2).
-	 */
 	if (nmp->nm_fhsize == 0 && (nmp->nm_flag & NFSMNT_NFSV4) &&
 	    nmp->nm_dirpathlen > 0) {
+		NFSCL_DEBUG(3, "in dirp\n");
 		/*
 		 * If the fhsize on the mount point == 0 for V4, the mount
 		 * path needs to be looked up.
@@ -1331,6 +1422,7 @@
 		do {
 			error = nfsrpc_getdirpath(nmp, NFSMNT_DIRPATH(nmp),
 			    cred, td);
+			NFSCL_DEBUG(3, "aft dirp=%d\n", error);
 			if (error)
 				(void) nfs_catnap(PZERO, error, "nfsgetdirp");
 		} while (error && --trycnt > 0);
@@ -1339,6 +1431,15 @@
 			goto bad;
 		}
 	}
+
+	/*
+	 * A reference count is needed on the nfsnode representing the
+	 * remote root.  If this object is not persistent, then backward
+	 * traversals of the mount point (i.e. "..") will not work if
+	 * the nfsnode gets flushed out of the cache. Ufs does not have
+	 * this problem, because one can identify root inodes by their
+	 * number == ROOTINO (2).
+	 */
 	if (nmp->nm_fhsize > 0) {
 		/*
 		 * Set f_iosize to NFS_DIRBLKSIZ so that bo_bsize gets set
@@ -1358,7 +1459,7 @@
 		 * (*vpp)->v_type with the correct value.
 		 */
 		ret = nfsrpc_getattrnovp(nmp, nmp->nm_fh, nmp->nm_fhsize, 1,
-		    cred, td, &nfsva, NULL);
+		    cred, td, &nfsva, NULL, &lease);
 		if (ret) {
 			/*
 			 * Just set default values to get things going.
@@ -1373,8 +1474,25 @@
 			nfsva.na_vattr.va_gen = 1;
 			nfsva.na_vattr.va_blocksize = NFS_FABLKSIZE;
 			nfsva.na_vattr.va_size = 512 * 1024;
+			lease = 60;
 		}
 		(void) nfscl_loadattrcache(vpp, &nfsva, NULL, NULL, 0, 1);
+		if (nmp->nm_minorvers > 0) {
+			NFSCL_DEBUG(3, "lease=%d\n", (int)lease);
+			NFSLOCKCLSTATE();
+			clp->nfsc_renew = NFSCL_RENEW(lease);
+			clp->nfsc_expire = NFSD_MONOSEC + clp->nfsc_renew;
+			clp->nfsc_clientidrev++;
+			if (clp->nfsc_clientidrev == 0)
+				clp->nfsc_clientidrev++;
+			NFSUNLOCKCLSTATE();
+			/*
+			 * Mount will succeed, so the renew thread can be
+			 * started now.
+			 */
+			nfscl_start_renewthread(clp);
+			nfscl_clientrelease(clp);
+		}
 		if (argp->flags & NFSMNT_NFSV3)
 			ncl_fsinfo(nmp, *vpp, cred, td);
 	
@@ -1396,10 +1514,26 @@
 	error = EIO;
 
 bad:
+	if (clp != NULL)
+		nfscl_clientrelease(clp);
 	newnfs_disconnect(&nmp->nm_sockreq);
 	crfree(nmp->nm_sockreq.nr_cred);
+	if (nmp->nm_sockreq.nr_auth != NULL)
+		AUTH_DESTROY(nmp->nm_sockreq.nr_auth);
 	mtx_destroy(&nmp->nm_sockreq.nr_mtx);
 	mtx_destroy(&nmp->nm_mtx);
+	if (nmp->nm_clp != NULL) {
+		NFSLOCKCLSTATE();
+		LIST_REMOVE(nmp->nm_clp, nfsc_list);
+		NFSUNLOCKCLSTATE();
+		free(nmp->nm_clp, M_NFSCLCLIENT);
+	}
+	TAILQ_FOREACH_SAFE(dsp, &nmp->nm_sess, nfsclds_list, tdsp) {
+		if (dsp != TAILQ_FIRST(&nmp->nm_sess) &&
+		    dsp->nfsclds_sockp != NULL)
+			newnfs_disconnect(dsp->nfsclds_sockp);
+		nfscl_freenfsclds(dsp);
+	}
 	FREE(nmp, M_NEWNFSMNT);
 	FREE(nam, M_SONAME);
 	return (error);
@@ -1414,6 +1548,7 @@
 	struct thread *td;
 	struct nfsmount *nmp;
 	int error, flags = 0, i, trycnt = 0;
+	struct nfsclds *dsp, *tdsp;
 
 	td = curthread;
 
@@ -1459,9 +1594,16 @@
 	newnfs_disconnect(&nmp->nm_sockreq);
 	crfree(nmp->nm_sockreq.nr_cred);
 	FREE(nmp->nm_nam, M_SONAME);
-
+	if (nmp->nm_sockreq.nr_auth != NULL)
+		AUTH_DESTROY(nmp->nm_sockreq.nr_auth);
 	mtx_destroy(&nmp->nm_sockreq.nr_mtx);
 	mtx_destroy(&nmp->nm_mtx);
+	TAILQ_FOREACH_SAFE(dsp, &nmp->nm_sess, nfsclds_list, tdsp) {
+		if (dsp != TAILQ_FIRST(&nmp->nm_sess) &&
+		    dsp->nfsclds_sockp != NULL)
+			newnfs_disconnect(dsp->nfsclds_sockp);
+		nfscl_freenfsclds(dsp);
+	}
 	FREE(nmp, M_NEWNFSMNT);
 out:
 	return (error);
@@ -1614,6 +1756,19 @@
 }
 
 /*
+ * Purge any RPCs in progress, so that they will all return errors.
+ * This allows dounmount() to continue as far as VFS_UNMOUNT() for a
+ * forced dismount.
+ */
+static void
+nfs_purge(struct mount *mp)
+{
+	struct nfsmount *nmp = VFSTONFS(mp);
+
+	newnfs_nmcancelreqs(nmp);
+}
+
+/*
  * Extract the information needed by the nlm from the nfs vnode.
  */
 static void
@@ -1689,6 +1844,14 @@
 	blen = buflen;
 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NFSV4) != 0, "nfsv4", &buf,
 	    &blen);
+	if ((nmp->nm_flag & NFSMNT_NFSV4) != 0) {
+		nfscl_printoptval(nmp, nmp->nm_minorvers, ",minorversion", &buf,
+		    &blen);
+		nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_PNFS) != 0, ",pnfs",
+		    &buf, &blen);
+		nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_ONEOPENOWN) != 0 &&
+		    nmp->nm_minorvers > 0, ",oneopenown", &buf, &blen);
+	}
 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NFSV3) != 0, "nfsv3", &buf,
 	    &blen);
 	nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0,
@@ -1709,6 +1872,8 @@
 	    &blen);
 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NOCTO) != 0, ",nocto", &buf,
 	    &blen);
+	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NONCONTIGWR) != 0,
+	    ",noncontigwr", &buf, &blen);
 	nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_NOLOCKD | NFSMNT_NFSV4)) ==
 	    0, ",lockd", &buf, &blen);
 	nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_NOLOCKD | NFSMNT_NFSV4)) ==

Modified: trunk/sys/fs/nfsclient/nfs_clvnops.c
===================================================================
--- trunk/sys/fs/nfsclient/nfs_clvnops.c	2018-05-27 22:18:11 UTC (rev 10025)
+++ trunk/sys/fs/nfsclient/nfs_clvnops.c	2018-05-27 22:18:25 UTC (rev 10026)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * Copyright (c) 1989, 1993
  *	The Regents of the University of California.  All rights reserved.
@@ -33,7 +34,7 @@
  */
 
 #include <sys/cdefs.h>
-__MBSDID("$MidnightBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/fs/nfsclient/nfs_clvnops.c 321031 2017-07-15 19:24:54Z rmacklem $");
 
 /*
  * vnode op calls for Sun NFS version 2, 3 and 4
@@ -103,6 +104,7 @@
 
 extern struct nfsstats newnfsstats;
 extern int nfsrv_useacl;
+extern int nfscl_debuglevel;
 MALLOC_DECLARE(M_NEWNFSREQ);
 
 /*
@@ -138,7 +140,6 @@
 static vop_symlink_t	nfs_symlink;
 static vop_readdir_t	nfs_readdir;
 static vop_strategy_t	nfs_strategy;
-static vop_lock1_t	nfs_lock1;
 static	int	nfs_lookitup(struct vnode *, char *, int,
 		    struct ucred *, struct thread *, struct nfsnode **);
 static	int	nfs_sillyrename(struct vnode *, struct vnode *,
@@ -150,6 +151,7 @@
 static vop_advlockasync_t nfs_advlockasync;
 static vop_getacl_t nfs_getacl;
 static vop_setacl_t nfs_setacl;
+static vop_set_text_t nfs_set_text;
 
 /*
  * Global vfs data structures for nfs
@@ -167,7 +169,6 @@
 	.vop_putpages =		ncl_putpages,
 	.vop_inactive =		ncl_inactive,
 	.vop_link =		nfs_link,
-	.vop_lock1 = 		nfs_lock1,
 	.vop_lookup =		nfs_lookup,
 	.vop_mkdir =		nfs_mkdir,
 	.vop_mknod =		nfs_mknod,
@@ -187,6 +188,7 @@
 	.vop_write =		ncl_write,
 	.vop_getacl =		nfs_getacl,
 	.vop_setacl =		nfs_setacl,
+	.vop_set_text =		nfs_set_text,
 };
 
 struct vop_vector newnfs_fifoops = {
@@ -606,6 +608,10 @@
 		np->n_directio_opens++;
 	}
 
+	/* If opened for writing via NFSv4.1 or later, mark that for pNFS. */
+	if (NFSHASPNFS(VFSTONFS(vp->v_mount)) && (fmode & FWRITE) != 0)
+		np->n_flag |= NWRITEOPENED;
+
 	/*
 	 * If this is an open for writing, capture a reference to the
 	 * credentials, so they can be used by ncl_putpages(). Using
@@ -619,6 +625,7 @@
 	} else
 		cred = NULL;
 	mtx_unlock(&np->n_mtx);
+
 	if (cred != NULL)
 		crfree(cred);
 	vnode_create_vobject(vp, vattr.va_size, ap->a_td);
@@ -691,9 +698,9 @@
 	     * mmap'ed writes or via write().
 	     */
 	    if (nfs_clean_pages_on_close && vp->v_object) {
-		VM_OBJECT_LOCK(vp->v_object);
+		VM_OBJECT_WLOCK(vp->v_object);
 		vm_object_page_clean(vp->v_object, 0, 0, 0);
-		VM_OBJECT_UNLOCK(vp->v_object);
+		VM_OBJECT_WUNLOCK(vp->v_object);
 	    }
 	    mtx_lock(&np->n_mtx);
 	    if (np->n_flag & NMODIFIED) {
@@ -715,12 +722,12 @@
 		     * traditional vnode locking implemented for Vnode Ops.
 		     */
 		    int cm = newnfs_commit_on_close ? 1 : 0;
-		    error = ncl_flush(vp, MNT_WAIT, cred, ap->a_td, cm, 0);
+		    error = ncl_flush(vp, MNT_WAIT, ap->a_td, cm, 0);
 		    /* np->n_flag &= ~NMODIFIED; */
 		} else if (NFS_ISV4(vp)) { 
 			if (nfscl_mustflush(vp) != 0) {
 				int cm = newnfs_commit_on_close ? 1 : 0;
-				error = ncl_flush(vp, MNT_WAIT, cred, ap->a_td,
+				error = ncl_flush(vp, MNT_WAIT, ap->a_td,
 				    cm, 0);
 				/*
 				 * as above w.r.t races when clearing
@@ -762,7 +769,9 @@
 		/*
 		 * Get attributes so "change" is up to date.
 		 */
-		if (error == 0 && nfscl_mustflush(vp) != 0) {
+		if (error == 0 && nfscl_mustflush(vp) != 0 &&
+		    vp->v_type == VREG &&
+		    (VFSTONFS(vp->v_mount)->nm_flag & NFSMNT_NOCTO) == 0) {
 			ret = nfsrpc_getattr(vp, cred, ap->a_td, &nfsva,
 			    NULL);
 			if (!ret) {
@@ -1062,7 +1071,7 @@
 
 	if ((error = VOP_ACCESS(dvp, VEXEC, cnp->cn_cred, td)) != 0)
 		return (error);
-	error = cache_lookup_times(dvp, vpp, cnp, &nctime, &ncticks);
+	error = cache_lookup(dvp, vpp, cnp, &nctime, &ncticks);
 	if (error > 0 && error != ENOENT)
 		return (error);
 	if (error == -1) {
@@ -1177,8 +1186,7 @@
 			return (EJUSTRETURN);
 		}
 
-		if ((cnp->cn_flags & MAKEENTRY) && cnp->cn_nameiop != CREATE &&
-		    dattrflag) {
+		if ((cnp->cn_flags & MAKEENTRY) != 0 && dattrflag) {
 			/*
 			 * Cache the modification time of the parent
 			 * directory from the post-op attributes in
@@ -1362,9 +1370,18 @@
 {
 	int error, ret, attrflag;
 	struct nfsvattr nfsva;
+	struct nfsmount *nmp;
 
-	error = nfsrpc_read(vp, uiop, cred, uiop->uio_td, &nfsva, &attrflag,
-	    NULL);
+	nmp = VFSTONFS(vnode_mount(vp));
+	error = EIO;
+	attrflag = 0;
+	if (NFSHASPNFS(nmp))
+		error = nfscl_doiods(vp, uiop, NULL, NULL,
+		    NFSV4OPEN_ACCESSREAD, 0, cred, uiop->uio_td);
+	NFSCL_DEBUG(4, "readrpc: aft doiods=%d\n", error);
+	if (error != 0)
+		error = nfsrpc_read(vp, uiop, cred, uiop->uio_td, &nfsva,
+		    &attrflag, NULL);
 	if (attrflag) {
 		ret = nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 1);
 		if (ret && !error)
@@ -1383,10 +1400,20 @@
     int *iomode, int *must_commit, int called_from_strategy)
 {
 	struct nfsvattr nfsva;
-	int error = 0, attrflag, ret;
+	int error, attrflag, ret;
+	struct nfsmount *nmp;
 
-	error = nfsrpc_write(vp, uiop, iomode, must_commit, cred,
-	    uiop->uio_td, &nfsva, &attrflag, NULL, called_from_strategy);
+	nmp = VFSTONFS(vnode_mount(vp));
+	error = EIO;
+	attrflag = 0;
+	if (NFSHASPNFS(nmp))
+		error = nfscl_doiods(vp, uiop, iomode, must_commit,
+		    NFSV4OPEN_ACCESSWRITE, 0, cred, uiop->uio_td);
+	NFSCL_DEBUG(4, "writerpc: aft doiods=%d\n", error);
+	if (error != 0)
+		error = nfsrpc_write(vp, uiop, iomode, must_commit, cred,
+		    uiop->uio_td, &nfsva, &attrflag, NULL,
+		    called_from_strategy);
 	if (attrflag) {
 		if (VTONFS(vp)->n_flag & ND_NFSV4)
 			ret = nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 1,
@@ -1581,20 +1608,6 @@
 		}
 	} else if (NFS_ISV34(dvp) && (fmode & O_EXCL)) {
 		if (nfscl_checksattr(vap, &nfsva)) {
-			/*
-			 * We are normally called with only a partially
-			 * initialized VAP. Since the NFSv3 spec says that
-			 * the server may use the file attributes to
-			 * store the verifier, the spec requires us to do a
-			 * SETATTR RPC. FreeBSD servers store the verifier in
-			 * atime, but we can't really assume that all servers
-			 * will so we ensure that our SETATTR sets both atime
-			 * and mtime.
-			 */
-			if (vap->va_mtime.tv_sec == VNOVAL)
-				vfs_timestamp(&vap->va_mtime);
-			if (vap->va_atime.tv_sec == VNOVAL)
-				vap->va_atime = vap->va_mtime;
 			error = nfsrpc_setattr(newvp, vap, NULL, cnp->cn_cred,
 			    cnp->cn_thread, &nfsva, &attrflag, NULL);
 			if (error && (vap->va_uid != (uid_t)VNOVAL ||
@@ -1776,7 +1789,7 @@
 	}
 
 	if (fvp == tvp) {
-		ncl_printf("nfs_rename: fvp == tvp (can't happen)\n");
+		printf("nfs_rename: fvp == tvp (can't happen)\n");
 		error = 0;
 		goto out;
 	}
@@ -1950,10 +1963,6 @@
 	struct nfsvattr nfsva, dnfsva;
 	int error = 0, attrflag, dattrflag;
 
-	if (vp->v_mount != tdvp->v_mount) {
-		return (EXDEV);
-	}
-
 	/*
 	 * Push all writes to the server, so that the attribute cache
 	 * doesn't get "out of sync" with the server.
@@ -2203,7 +2212,7 @@
 	struct vnode *vp = ap->a_vp;
 	struct nfsnode *np = VTONFS(vp);
 	struct uio *uio = ap->a_uio;
-	ssize_t tresid;
+	ssize_t tresid, left;
 	int error = 0;
 	struct vattr vattr;
 	
@@ -2232,6 +2241,17 @@
 	}
 
 	/*
+	 * NFS always guarantees that directory entries don't straddle
+	 * DIRBLKSIZ boundaries.  As such, we need to limit the size
+	 * to an exact multiple of DIRBLKSIZ, to avoid copying a partial
+	 * directory entry.
+	 */
+	left = uio->uio_resid % DIRBLKSIZ;
+	if (left == uio->uio_resid)
+		return (EINVAL);
+	uio->uio_resid -= left;
+
+	/*
 	 * Call ncl_bioread() to do the real work.
 	 */
 	tresid = uio->uio_resid;
@@ -2242,6 +2262,9 @@
 		if (ap->a_eofflag != NULL)
 			*ap->a_eofflag = 1;
 	}
+	
+	/* Add the partial DIRBLKSIZ (left) back in. */
+	uio->uio_resid += left;
 	return (error);
 }
 
@@ -2294,7 +2317,7 @@
 			dnp->n_direofoffset = uiop->uio_offset;
 		else {
 			if (uiop->uio_resid > 0)
-				ncl_printf("EEK! readdirrpc resid > 0\n");
+				printf("EEK! readdirrpc resid > 0\n");
 			ncl_dircookie_lock(dnp);
 			cookiep = ncl_getcookie(dnp, uiop->uio_offset, 1);
 			*cookiep = cookie;
@@ -2353,7 +2376,7 @@
 			dnp->n_direofoffset = uiop->uio_offset;
 		else {
 			if (uiop->uio_resid > 0)
-				ncl_printf("EEK! readdirplusrpc resid > 0\n");
+				printf("EEK! readdirplusrpc resid > 0\n");
 			ncl_dircookie_lock(dnp);
 			cookiep = ncl_getcookie(dnp, uiop->uio_offset, 1);
 			*cookiep = cookie;
@@ -2540,30 +2563,39 @@
 {
 	struct nfsvattr nfsva;
 	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
+	struct nfsnode *np;
+	struct uio uio;
 	int error, attrflag;
-	u_char verf[NFSX_VERF];
 
-	mtx_lock(&nmp->nm_mtx);
-	if ((nmp->nm_state & NFSSTA_HASWRITEVERF) == 0) {
-		mtx_unlock(&nmp->nm_mtx);
-		return (0);
+	np = VTONFS(vp);
+	error = EIO;
+	attrflag = 0;
+	if (NFSHASPNFS(nmp) && (np->n_flag & NDSCOMMIT) != 0) {
+		uio.uio_offset = offset;
+		uio.uio_resid = cnt;
+		error = nfscl_doiods(vp, &uio, NULL, NULL,
+		    NFSV4OPEN_ACCESSWRITE, 1, cred, td);
+		if (error != 0) {
+			mtx_lock(&np->n_mtx);
+			np->n_flag &= ~NDSCOMMIT;
+			mtx_unlock(&np->n_mtx);
+		}
 	}
-	mtx_unlock(&nmp->nm_mtx);
-	error = nfsrpc_commit(vp, offset, cnt, cred, td, verf, &nfsva,
-	    &attrflag, NULL);
-	if (!error) {
+	if (error != 0) {
 		mtx_lock(&nmp->nm_mtx);
-		if (NFSBCMP((caddr_t)nmp->nm_verf, verf, NFSX_VERF)) {
-			NFSBCOPY(verf, (caddr_t)nmp->nm_verf, NFSX_VERF);
-			error = NFSERR_STALEWRITEVERF;
+		if ((nmp->nm_state & NFSSTA_HASWRITEVERF) == 0) {
+			mtx_unlock(&nmp->nm_mtx);
+			return (0);
 		}
 		mtx_unlock(&nmp->nm_mtx);
-		if (!error && attrflag)
-			(void) nfscl_loadattrcache(&vp, &nfsva, NULL, NULL,
-			    0, 1);
-	} else if (NFS_ISV4(vp)) {
+		error = nfsrpc_commit(vp, offset, cnt, cred, td, &nfsva,
+		    &attrflag, NULL);
+	}
+	if (attrflag != 0)
+		(void) nfscl_loadattrcache(&vp, &nfsva, NULL, NULL,
+		    0, 1);
+	if (error != 0 && NFS_ISV4(vp))
 		error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0);
-	}
 	return (error);
 }
 
@@ -2616,7 +2648,7 @@
 		 */
 		return (0);
 	}
-	return (ncl_flush(ap->a_vp, ap->a_waitfor, NULL, ap->a_td, 1, 0));
+	return (ncl_flush(ap->a_vp, ap->a_waitfor, ap->a_td, 1, 0));
 }
 
 /*
@@ -2628,7 +2660,7 @@
  * waiting for a buffer write to complete.
  */
 int
-ncl_flush(struct vnode *vp, int waitfor, struct ucred *cred, struct thread *td,
+ncl_flush(struct vnode *vp, int waitfor, struct thread *td,
     int commit, int called_from_renewthread)
 {
 	struct nfsnode *np = VTONFS(vp);
@@ -2651,7 +2683,7 @@
 	if (called_from_renewthread != 0)
 		slptimeo = hz;
 	if (nmp->nm_flag & NFSMNT_INT)
-		slpflag = NFS_PCATCH;
+		slpflag = PCATCH;
 	if (!commit)
 		passone = 0;
 	bo = &vp->v_bufobj;
@@ -2836,7 +2868,7 @@
 
 			error = BUF_TIMELOCK(bp,
 			    LK_EXCLUSIVE | LK_SLEEPFAIL | LK_INTERLOCK,
-			    BO_MTX(bo), "nfsfsync", slpflag, slptimeo);
+			    BO_LOCKPTR(bo), "nfsfsync", slpflag, slptimeo);
 			if (error == 0) {
 				BUF_UNLOCK(bp);
 				goto loop;
@@ -2857,7 +2889,7 @@
 				error = EINTR;
 				goto done;
 			}
-			if (slpflag & PCATCH) {
+			if (slpflag == PCATCH) {
 				slpflag = 0;
 				slptimeo = 2 * hz;
 			}
@@ -2903,7 +2935,7 @@
 			    error = newnfs_sigintr(nmp, td);
 			    if (error)
 				goto done;
-			    if (slpflag & PCATCH) {
+			    if (slpflag == PCATCH) {
 				slpflag = 0;
 				slptimeo = 2 * hz;
 			    }
@@ -2935,7 +2967,16 @@
 		mtx_unlock(&np->n_mtx);
 	} else
 		BO_UNLOCK(bo);
-	mtx_lock(&np->n_mtx);
+	if (NFSHASPNFS(nmp)) {
+		nfscl_layoutcommit(vp, td);
+		/*
+		 * Invalidate the attribute cache, since writes to a DS
+		 * won't update the size attribute.
+		 */
+		mtx_lock(&np->n_mtx);
+		np->n_attrstamp = 0;
+	} else
+		mtx_lock(&np->n_mtx);
 	if (np->n_flag & NWRITEERR) {
 		error = np->n_error;
 		np->n_flag &= ~NWRITEERR;
@@ -2949,14 +2990,17 @@
 		free(bvec, M_TEMP);
 	if (error == 0 && commit != 0 && waitfor == MNT_WAIT &&
 	    (bo->bo_dirty.bv_cnt != 0 || bo->bo_numoutput != 0 ||
-	     np->n_directio_asyncwr != 0) && trycnt++ < 5) {
-		/* try, try again... */
-		passone = 1;
-		wcred = NULL;
-		bvec = NULL;
-		bvecsize = 0;
-printf("try%d\n", trycnt);
-		goto again;
+	    np->n_directio_asyncwr != 0)) {
+		if (trycnt++ < 5) {
+			/* try, try again... */
+			passone = 1;
+			wcred = NULL;
+			bvec = NULL;
+			bvecsize = 0;
+			goto again;
+		}
+		vn_printf(vp, "ncl_flush failed");
+		error = called_from_renewthread != 0 ? EIO : EBUSY;
 	}
 	return (error);
 }
@@ -2997,7 +3041,7 @@
 		if (ap->a_op == F_UNLCK &&
 		    nfscl_checkwritelocked(vp, ap->a_fl, cred, td, ap->a_id,
 		    ap->a_flags))
-			(void) ncl_flush(vp, MNT_WAIT, cred, td, 1, 0);
+			(void) ncl_flush(vp, MNT_WAIT, td, 1, 0);
 
 		/*
 		 * Loop around doing the lock op, while a blocking lock
@@ -3054,6 +3098,10 @@
 					np->n_change = va.va_filerev;
 				}
 			}
+			/* Mark that a file lock has been acquired. */
+			mtx_lock(&np->n_mtx);
+			np->n_flag |= NHASBEENLOCKED;
+			mtx_unlock(&np->n_mtx);
 		}
 		NFSVOPUNLOCK(vp, 0);
 		return (0);
@@ -3073,6 +3121,16 @@
 				error = ENOLCK;
 			}
 		}
+		if (error == 0 && ap->a_op == F_SETLK) {
+			error = NFSVOPLOCK(vp, LK_SHARED);
+			if (error == 0) {
+				/* Mark that a file lock has been acquired. */
+				mtx_lock(&np->n_mtx);
+				np->n_flag |= NHASBEENLOCKED;
+				mtx_unlock(&np->n_mtx);
+				NFSVOPUNLOCK(vp, 0);
+			}
+		}
 	}
 	return (error);
 }
@@ -3112,8 +3170,8 @@
 	struct vnode *vp = ap->a_vp;
 	struct nfsnode *np = VTONFS(vp);
 
-	ncl_printf("\tfileid %ld fsid 0x%x",
-	   np->n_vattr.na_fileid, np->n_vattr.na_fsid);
+	printf("\tfileid %ld fsid 0x%x", np->n_vattr.na_fileid,
+	    np->n_vattr.na_fsid);
 	if (vp->v_type == VFIFO)
 		fifo_printinfo(vp);
 	printf("\n");
@@ -3318,38 +3376,7 @@
 	.bop_bdflush	=	bufbdflush,
 };
 
-/*
- * Cloned from vop_stdlock(), and then the ugly hack added.
- */
 static int
-nfs_lock1(struct vop_lock1_args *ap)
-{
-	struct vnode *vp = ap->a_vp;
-	int error = 0;
-
-	/*
-	 * Since vfs_hash_get() calls vget() and it will no longer work
-	 * for FreeBSD8 with flags == 0, I can only think of this horrible
-	 * hack to work around it. I call vfs_hash_get() with LK_EXCLOTHER
-	 * and then handle it here. All I want for this case is a v_usecount
-	 * on the vnode to use for recovery, while another thread might
-	 * hold a lock on the vnode. I have the other threads blocked, so
-	 * there isn't any race problem.
-	 */
-	if ((ap->a_flags & LK_TYPE_MASK) == LK_EXCLOTHER) {
-		if ((ap->a_flags & LK_INTERLOCK) == 0)
-			panic("ncllock1");
-		if ((vp->v_iflag & VI_DOOMED))
-			error = ENOENT;
-		VI_UNLOCK(vp);
-		return (error);
-	}
-	return (_lockmgr_args(vp->v_vnlock, ap->a_flags, VI_MTX(vp),
-	    LK_WMESG_DEFAULT, LK_PRIO_DEFAULT, LK_TIMO_DEFAULT, ap->a_file,
-	    ap->a_line));
-}
-
-static int
 nfs_getacl(struct vop_getacl_args *ap)
 {
 	int error;
@@ -3381,6 +3408,36 @@
 	return (error);
 }
 
+static int
+nfs_set_text(struct vop_set_text_args *ap)
+{
+	struct vnode *vp = ap->a_vp;
+	struct nfsnode *np;
+
+	/*
+	 * If the text file has been mmap'd, flush any dirty pages to the
+	 * buffer cache and then...
+	 * Make sure all writes are pushed to the NFS server.  If this is not
+	 * done, the modify time of the file can change while the text
+	 * file is being executed.  This will cause the process that is
+	 * executing the text file to be terminated.
+	 */
+	if (vp->v_object != NULL) {
+		VM_OBJECT_WLOCK(vp->v_object);
+		vm_object_page_clean(vp->v_object, 0, 0, OBJPC_SYNC);
+		VM_OBJECT_WUNLOCK(vp->v_object);
+	}
+
+	/* And, finally, make sure that n_mtime is up to date. */
+	np = VTONFS(vp);
+	mtx_lock(&np->n_mtx);
+	np->n_mtime = np->n_vattr.na_mtime;
+	mtx_unlock(&np->n_mtx);
+
+	vp->v_vflag |= VV_TEXT;
+	return (0);
+}
+
 /*
  * Return POSIX pathconf information applicable to nfs filesystems.
  */
@@ -3393,12 +3450,15 @@
 	struct thread *td = curthread;
 	int attrflag, error;
 
-	if (NFS_ISV4(vp) || (NFS_ISV3(vp) && (ap->a_name == _PC_LINK_MAX ||
+	if ((NFS_ISV34(vp) && (ap->a_name == _PC_LINK_MAX ||
 	    ap->a_name == _PC_NAME_MAX || ap->a_name == _PC_CHOWN_RESTRICTED ||
-	    ap->a_name == _PC_NO_TRUNC))) {
+	    ap->a_name == _PC_NO_TRUNC)) ||
+	    (NFS_ISV4(vp) && ap->a_name == _PC_ACL_NFS4)) {
 		/*
 		 * Since only the above 4 a_names are returned by the NFSv3
 		 * Pathconf RPC, there is no point in doing it for others.
+		 * For NFSv4, the Pathconf RPC (actually a Getattr Op.) can
+		 * be used for _PC_NFS4_ACL as well.
 		 */
 		error = nfsrpc_pathconf(vp, &pc, td->td_ucred, td, &nfsva,
 		    &attrflag, NULL);

Modified: trunk/sys/fs/nfsclient/nfs_kdtrace.h
===================================================================
--- trunk/sys/fs/nfsclient/nfs_kdtrace.h	2018-05-27 22:18:11 UTC (rev 10025)
+++ trunk/sys/fs/nfsclient/nfs_kdtrace.h	2018-05-27 22:18:25 UTC (rev 10026)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * Copyright (c) 2009 Robert N. M. Watson
  * All rights reserved.
@@ -26,7 +27,7 @@
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
- * $MidnightBSD$
+ * $FreeBSD: stable/10/sys/fs/nfsclient/nfs_kdtrace.h 223280 2011-06-18 23:02:53Z rmacklem $
  */
 
 #ifndef _NFSCL_NFS_KDTRACE_H_

Modified: trunk/sys/fs/nfsclient/nfsmount.h
===================================================================
--- trunk/sys/fs/nfsclient/nfsmount.h	2018-05-27 22:18:11 UTC (rev 10025)
+++ trunk/sys/fs/nfsclient/nfsmount.h	2018-05-27 22:18:25 UTC (rev 10026)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * Copyright (c) 1989, 1993
  *	The Regents of the University of California.  All rights reserved.
@@ -29,7 +30,7 @@
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
- * $MidnightBSD$
+ * $FreeBSD: stable/10/sys/fs/nfsclient/nfsmount.h 317404 2017-04-25 11:36:25Z rmacklem $
  */
 
 #ifndef _NFSCLIENT_NFSMOUNT_H_
@@ -70,10 +71,12 @@
 	int	nm_negnametimeo;	/* timeout for -ve entries (sec) */
 
 	/* Newnfs additions */
+	TAILQ_HEAD(, nfsclds) nm_sess;	/* Session(s) for NFSv4.1. */
 	struct	nfsclclient *nm_clp;
 	uid_t	nm_uid;			/* Uid for SetClientID etc. */
 	u_int64_t nm_clval;		/* identifies which clientid */
 	u_int64_t nm_fsid[2];		/* NFSv4 fsid */
+	int	nm_minorvers;		/* Minor version # for NFSv4 */
 	u_int16_t nm_krbnamelen;	/* Krb5 host principal, if any */
 	u_int16_t nm_dirpathlen;	/* and mount dirpath, for V4 */
 	u_int16_t nm_srvkrbnamelen;	/* and the server's target name */
@@ -107,6 +110,25 @@
  */
 #define	VFSTONFS(mp)	((struct nfsmount *)((mp)->mnt_data))
 
+/*
+ * Get a pointer to the MDS session, which is always the first element
+ * in the list.
+ * This macro can only be safely used when the NFSLOCKMNT() lock is held.
+ * The inline function can be used when the lock isn't held.
+ */
+#define	NFSMNT_MDSSESSION(m)	(&(TAILQ_FIRST(&((m)->nm_sess))->nfsclds_sess))
+
+static __inline struct nfsclsession *
+nfsmnt_mdssession(struct nfsmount *nmp)
+{
+	struct nfsclsession *tsep;
+
+	mtx_lock(&nmp->nm_mtx);
+	tsep = NFSMNT_MDSSESSION(nmp);
+	mtx_unlock(&nmp->nm_mtx);
+	return (tsep);
+}
+
 #ifndef NFS_DEFAULT_NAMETIMEO
 #define NFS_DEFAULT_NAMETIMEO		60
 #endif

Modified: trunk/sys/fs/nfsclient/nfsnode.h
===================================================================
--- trunk/sys/fs/nfsclient/nfsnode.h	2018-05-27 22:18:11 UTC (rev 10025)
+++ trunk/sys/fs/nfsclient/nfsnode.h	2018-05-27 22:18:25 UTC (rev 10026)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * Copyright (c) 1989, 1993
  *	The Regents of the University of California.  All rights reserved.
@@ -29,7 +30,7 @@
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
- * $MidnightBSD$
+ * $FreeBSD: stable/10/sys/fs/nfsclient/nfsnode.h 321031 2017-07-15 19:24:54Z rmacklem $
  */
 
 #ifndef _NFSCLIENT_NFSNODE_H_
@@ -99,9 +100,6 @@
 	time_t			n_attrstamp;	/* Attr. cache timestamp */
 	struct nfs_accesscache	n_accesscache[NFS_ACCESSCACHESIZE];
 	struct timespec		n_mtime;	/* Prev modify time. */
-	struct timespec		n_unused0;
-	struct timespec		n_unused1;
-	int			n_unused2;
 	struct nfsfh		*n_fhp;		/* NFS File Handle */
 	struct vnode		*n_vnode;	/* associated vnode */
 	struct vnode		*n_dvp;		/* parent vnode */
@@ -158,6 +156,10 @@
 #define	NREMOVEWANT	0x00004000  /* Want notification that remove is done */
 #define	NLOCK		0x00008000  /* Sleep lock the node */
 #define	NLOCKWANT	0x00010000  /* Want the sleep lock */
+#define	NNOLAYOUT	0x00020000  /* Can't get a layout for this file */
+#define	NWRITEOPENED	0x00040000  /* Has been opened for writing */
+#define	NHASBEENLOCKED	0x00080000  /* Has been file locked. */
+#define	NDSCOMMIT	0x00100000  /* Commit is done via the DS. */
 
 /*
  * Convert between nfsnode pointers and vnode pointers
@@ -185,7 +187,6 @@
 void	ncl_invaldir(struct vnode *);
 int	ncl_upgrade_vnlock(struct vnode *);
 void	ncl_downgrade_vnlock(struct vnode *, int);
-void	ncl_printf(const char *, ...);
 void	ncl_dircookie_lock(struct nfsnode *);
 void	ncl_dircookie_unlock(struct nfsnode *);
 

Modified: trunk/sys/fs/nfsclient/nlminfo.h
===================================================================
--- trunk/sys/fs/nfsclient/nlminfo.h	2018-05-27 22:18:11 UTC (rev 10025)
+++ trunk/sys/fs/nfsclient/nlminfo.h	2018-05-27 22:18:25 UTC (rev 10026)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * Copyright (c) 1998 Berkeley Software Design, Inc. All rights reserved.
  * Redistribution and use in source and binary forms, with or without
@@ -24,7 +25,7 @@
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
- * $MidnightBSD$
+ * $FreeBSD: stable/10/sys/fs/nfsclient/nlminfo.h 191783 2009-05-04 15:23:58Z rmacklem $
  */
 
 /*

Modified: trunk/sys/fs/nfsserver/nfs_fha_new.c
===================================================================
--- trunk/sys/fs/nfsserver/nfs_fha_new.c	2018-05-27 22:18:11 UTC (rev 10025)
+++ trunk/sys/fs/nfsserver/nfs_fha_new.c	2018-05-27 22:18:25 UTC (rev 10026)
@@ -26,7 +26,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/fs/nfsserver/nfs_fha_new.c 322138 2017-08-07 07:40:00Z mav $");
 
 #include <fs/nfs/nfsport.h>
 
@@ -42,7 +42,7 @@
 static void fhanew_uninit(void *foo);
 rpcproc_t fhanew_get_procnum(rpcproc_t procnum);
 int fhanew_realign(struct mbuf **mb, int malloc_flags);
-int fhanew_get_fh(fhandle_t *fh, int v3, struct mbuf **md, caddr_t *dpos);
+int fhanew_get_fh(uint64_t *fh, int v3, struct mbuf **md, caddr_t *dpos);
 int fhanew_is_read(rpcproc_t procnum);
 int fhanew_is_write(rpcproc_t procnum);
 int fhanew_get_offset(struct mbuf **md, caddr_t *dpos, int v3,
@@ -94,7 +94,7 @@
 	sysctl_ctx_init(&softc->sysctl_ctx);
 	softc->sysctl_tree = SYSCTL_ADD_NODE(&softc->sysctl_ctx,
 	    SYSCTL_STATIC_CHILDREN(_vfs_nfsd), OID_AUTO, "fha", CTLFLAG_RD,
-	    0, "fha node");
+	    0, "NFS File Handle Affinity (FHA)");
 	if (softc->sysctl_tree == NULL) {
 		printf("%s: unable to allocate sysctl tree\n", __func__);
 		return;
@@ -129,11 +129,13 @@
 }
 
 int
-fhanew_get_fh(fhandle_t *fh, int v3, struct mbuf **md, caddr_t *dpos)
+fhanew_get_fh(uint64_t *fh, int v3, struct mbuf **md, caddr_t *dpos)
 {
 	struct nfsrv_descript lnd, *nd;
 	uint32_t *tl;
-	int error, len;
+	uint8_t *buf;
+	uint64_t t;
+	int error, len, i;
 
 	error = 0;
 	len = 0;
@@ -152,11 +154,13 @@
 		len = NFSX_V2FH;
 	}
 
+	t = 0;
 	if (len != 0) {
-		NFSM_DISSECT_NONBLOCK(tl, uint32_t *, len);
-		bcopy(tl, fh, len);
-	} else
-		bzero(fh, sizeof(*fh));
+		NFSM_DISSECT_NONBLOCK(buf, uint8_t *, len);
+		for (i = 0; i < len; i++)
+			t ^= ((uint64_t)buf[i] << (i & 7) * 8);
+	}
+	*fh = t;
 
 nfsmout:
 	*md = nd->nd_md;

Modified: trunk/sys/fs/nfsserver/nfs_fha_new.h
===================================================================
--- trunk/sys/fs/nfsserver/nfs_fha_new.h	2018-05-27 22:18:11 UTC (rev 10025)
+++ trunk/sys/fs/nfsserver/nfs_fha_new.h	2018-05-27 22:18:25 UTC (rev 10026)
@@ -25,7 +25,7 @@
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
-/* $FreeBSD$ */
+/* $FreeBSD: stable/10/sys/fs/nfsserver/nfs_fha_new.h 249592 2013-04-17 21:00:22Z ken $ */
 
 #ifndef	_NFS_FHA_NEW_H
 #define	_NFS_FHA_NEW_H 1
@@ -38,42 +38,3 @@
 #endif /* _KERNEL */
 
 #endif /* _NFS_FHA_NEW_H */
-/*-
- * Copyright (c) 2008 Isilon Inc http://www.isilon.com/
- * Copyright (c) 2013 Spectra Logic Corporation
- *
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-/* $FreeBSD$ */
-
-#ifndef	_NFS_FHA_NEW_H
-#define	_NFS_FHA_NEW_H 1
-
-#ifdef	_KERNEL
-
-#define	FHANEW_SERVER_NAME	"nfsd"
-
-SVCTHREAD *fhanew_assign(SVCTHREAD *this_thread, struct svc_req *req);
-#endif /* _KERNEL */
-
-#endif /* _NFS_FHA_NEW_H */

Modified: trunk/sys/fs/nfsserver/nfs_nfsdcache.c
===================================================================
--- trunk/sys/fs/nfsserver/nfs_nfsdcache.c	2018-05-27 22:18:11 UTC (rev 10025)
+++ trunk/sys/fs/nfsserver/nfs_nfsdcache.c	2018-05-27 22:18:25 UTC (rev 10026)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * Copyright (c) 1989, 1993
  *	The Regents of the University of California.  All rights reserved.
@@ -32,7 +33,7 @@
  */
 
 #include <sys/cdefs.h>
-__MBSDID("$MidnightBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/fs/nfsserver/nfs_nfsdcache.c 269398 2014-08-01 21:10:41Z rmacklem $");
 
 /*
  * Here is the basic algorithm:
@@ -160,15 +161,52 @@
 #include <fs/nfs/nfsport.h>
 
 extern struct nfsstats newnfsstats;
-NFSCACHEMUTEX;
+extern struct mtx nfsrc_udpmtx;
+extern struct nfsrchash_bucket nfsrchash_table[NFSRVCACHE_HASHSIZE];
+extern struct nfsrchash_bucket nfsrcahash_table[NFSRVCACHE_HASHSIZE];
 int nfsrc_floodlevel = NFSRVCACHE_FLOODLEVEL, nfsrc_tcpsavedreplies = 0;
 #endif	/* !APPLEKEXT */
 
-static int nfsrc_tcpnonidempotent = 1;
-static int nfsrc_udphighwater = NFSRVCACHE_UDPHIGHWATER, nfsrc_udpcachesize = 0;
+SYSCTL_DECL(_vfs_nfsd);
+
+static u_int	nfsrc_tcphighwater = 0;
+static int
+sysctl_tcphighwater(SYSCTL_HANDLER_ARGS)
+{
+	int error, newhighwater;
+
+	newhighwater = nfsrc_tcphighwater;
+	error = sysctl_handle_int(oidp, &newhighwater, 0, req);
+	if (error != 0 || req->newptr == NULL)
+		return (error);
+	if (newhighwater < 0)
+		return (EINVAL);
+	if (newhighwater >= nfsrc_floodlevel)
+		nfsrc_floodlevel = newhighwater + newhighwater / 5;
+	nfsrc_tcphighwater = newhighwater;
+	return (0);
+}
+SYSCTL_PROC(_vfs_nfsd, OID_AUTO, tcphighwater, CTLTYPE_UINT | CTLFLAG_RW, 0,
+    sizeof(nfsrc_tcphighwater), sysctl_tcphighwater, "IU",
+    "High water mark for TCP cache entries");
+
+static u_int	nfsrc_udphighwater = NFSRVCACHE_UDPHIGHWATER;
+SYSCTL_UINT(_vfs_nfsd, OID_AUTO, udphighwater, CTLFLAG_RW,
+    &nfsrc_udphighwater, 0,
+    "High water mark for UDP cache entries");
+static u_int	nfsrc_tcptimeout = NFSRVCACHE_TCPTIMEOUT;
+SYSCTL_UINT(_vfs_nfsd, OID_AUTO, tcpcachetimeo, CTLFLAG_RW,
+    &nfsrc_tcptimeout, 0,
+    "Timeout for TCP entries in the DRC");
+static u_int nfsrc_tcpnonidempotent = 1;
+SYSCTL_UINT(_vfs_nfsd, OID_AUTO, cachetcp, CTLFLAG_RW,
+    &nfsrc_tcpnonidempotent, 0,
+    "Enable the DRC for NFS over TCP");
+
+static int nfsrc_udpcachesize = 0;
 static TAILQ_HEAD(, nfsrvcache) nfsrvudplru;
-static struct nfsrvhashhead nfsrvhashtbl[NFSRVCACHE_HASHSIZE],
-    nfsrvudphashtbl[NFSRVCACHE_HASHSIZE];
+static struct nfsrvhashhead nfsrvudphashtbl[NFSRVCACHE_HASHSIZE];
+
 /*
  * and the reverse mapping from generic to Version 2 procedure numbers
  */
@@ -197,10 +235,12 @@
 	NFSV2PROC_NOOP,
 };
 
+#define	nfsrc_hash(xid)	(((xid) + ((xid) >> 24)) % NFSRVCACHE_HASHSIZE)
 #define	NFSRCUDPHASH(xid) \
-	(&nfsrvudphashtbl[((xid) + ((xid) >> 24)) % NFSRVCACHE_HASHSIZE])
+	(&nfsrvudphashtbl[nfsrc_hash(xid)])
 #define	NFSRCHASH(xid) \
-	(&nfsrvhashtbl[((xid) + ((xid) >> 24)) % NFSRVCACHE_HASHSIZE])
+	(&nfsrchash_table[nfsrc_hash(xid)].tbl)
+#define	NFSRCAHASH(xid) (&nfsrcahash_table[nfsrc_hash(xid)])
 #define	TRUE	1
 #define	FALSE	0
 #define	NFSRVCACHE_CHECKLEN	100
@@ -244,13 +284,22 @@
 static void nfsrc_unlock(struct nfsrvcache *rp);
 static void nfsrc_wanted(struct nfsrvcache *rp);
 static void nfsrc_freecache(struct nfsrvcache *rp);
-static void nfsrc_trimcache(u_int64_t, struct socket *);
-static int nfsrc_activesocket(struct nfsrvcache *rp, u_int64_t,
-    struct socket *);
 static int nfsrc_getlenandcksum(mbuf_t m1, u_int16_t *cksum);
 static void nfsrc_marksametcpconn(u_int64_t);
 
 /*
+ * Return the correct mutex for this cache entry.
+ */
+static __inline struct mtx *
+nfsrc_cachemutex(struct nfsrvcache *rp)
+{
+
+	if ((rp->rc_flag & RC_UDP) != 0)
+		return (&nfsrc_udpmtx);
+	return (&nfsrchash_table[nfsrc_hash(rp->rc_xid)].mtx);
+}
+
+/*
  * Initialize the server request cache list
  */
 APPLESTATIC void
@@ -264,7 +313,8 @@
 	inited = 1;
 	for (i = 0; i < NFSRVCACHE_HASHSIZE; i++) {
 		LIST_INIT(&nfsrvudphashtbl[i]);
-		LIST_INIT(&nfsrvhashtbl[i]);
+		LIST_INIT(&nfsrchash_table[i].tbl);
+		LIST_INIT(&nfsrcahash_table[i].tbl);
 	}
 	TAILQ_INIT(&nfsrvudplru);
 	nfsrc_tcpsavedreplies = 0;
@@ -276,10 +326,9 @@
 /*
  * Get a cache entry for this request. Basically just malloc a new one
  * and then call nfsrc_getudp() or nfsrc_gettcp() to do the rest.
- * Call nfsrc_trimcache() to clean up the cache before returning.
  */
 APPLESTATIC int
-nfsrvd_getcache(struct nfsrv_descript *nd, struct socket *so)
+nfsrvd_getcache(struct nfsrv_descript *nd)
 {
 	struct nfsrvcache *newrp;
 	int ret;
@@ -307,7 +356,6 @@
 	} else {
 		ret = nfsrc_gettcp(nd, newrp);
 	}
-	nfsrc_trimcache(nd->nd_sockref, so);
 	NFSEXITCODE2(0, nd);
 	return (ret);
 }
@@ -325,10 +373,12 @@
 	struct sockaddr_in6 *saddr6;
 	struct nfsrvhashhead *hp;
 	int ret = 0;
+	struct mtx *mutex;
 
+	mutex = nfsrc_cachemutex(newrp);
 	hp = NFSRCUDPHASH(newrp->rc_xid);
 loop:
-	NFSLOCKCACHE();
+	mtx_lock(mutex);
 	LIST_FOREACH(rp, hp, rc_hash) {
 	    if (newrp->rc_xid == rp->rc_xid &&
 		newrp->rc_proc == rp->rc_proc &&
@@ -336,8 +386,8 @@
 		nfsaddr_match(NETFAMILY(rp), &rp->rc_haddr, nd->nd_nam)) {
 			if ((rp->rc_flag & RC_LOCKED) != 0) {
 				rp->rc_flag |= RC_WANTED;
-				(void)mtx_sleep(rp, NFSCACHEMUTEXPTR,
-				    (PZERO - 1) | PDROP, "nfsrc", 10 * hz);
+				(void)mtx_sleep(rp, mutex, (PZERO - 1) | PDROP,
+				    "nfsrc", 10 * hz);
 				goto loop;
 			}
 			if (rp->rc_flag == 0)
@@ -347,7 +397,7 @@
 			TAILQ_INSERT_TAIL(&nfsrvudplru, rp, rc_lru);
 			if (rp->rc_flag & RC_INPROG) {
 				newnfsstats.srvcache_inproghits++;
-				NFSUNLOCKCACHE();
+				mtx_unlock(mutex);
 				ret = RC_DROPIT;
 			} else if (rp->rc_flag & RC_REPSTATUS) {
 				/*
@@ -354,7 +404,7 @@
 				 * V2 only.
 				 */
 				newnfsstats.srvcache_nonidemdonehits++;
-				NFSUNLOCKCACHE();
+				mtx_unlock(mutex);
 				nfsrvd_rephead(nd);
 				*(nd->nd_errp) = rp->rc_status;
 				ret = RC_REPLY;
@@ -362,9 +412,9 @@
 					NFSRVCACHE_UDPTIMEOUT;
 			} else if (rp->rc_flag & RC_REPMBUF) {
 				newnfsstats.srvcache_nonidemdonehits++;
-				NFSUNLOCKCACHE();
+				mtx_unlock(mutex);
 				nd->nd_mreq = m_copym(rp->rc_reply, 0,
-					M_COPYALL, M_WAIT);
+					M_COPYALL, M_WAITOK);
 				ret = RC_REPLY;
 				rp->rc_timestamp = NFSD_MONOSEC +
 					NFSRVCACHE_UDPTIMEOUT;
@@ -377,7 +427,7 @@
 		}
 	}
 	newnfsstats.srvcache_misses++;
-	newnfsstats.srvcache_size++;
+	atomic_add_int(&newnfsstats.srvcache_size, 1);
 	nfsrc_udpcachesize++;
 
 	newrp->rc_flag |= RC_INPROG;
@@ -392,7 +442,7 @@
 	}
 	LIST_INSERT_HEAD(hp, newrp, rc_hash);
 	TAILQ_INSERT_TAIL(&nfsrvudplru, newrp, rc_lru);
-	NFSUNLOCKCACHE();
+	mtx_unlock(mutex);
 	nd->nd_rp = newrp;
 	ret = RC_DOIT;
 
@@ -405,17 +455,19 @@
  * Update a request cache entry after the rpc has been done
  */
 APPLESTATIC struct nfsrvcache *
-nfsrvd_updatecache(struct nfsrv_descript *nd, struct socket *so)
+nfsrvd_updatecache(struct nfsrv_descript *nd)
 {
 	struct nfsrvcache *rp;
 	struct nfsrvcache *retrp = NULL;
 	mbuf_t m;
+	struct mtx *mutex;
 
 	rp = nd->nd_rp;
 	if (!rp)
 		panic("nfsrvd_updatecache null rp");
 	nd->nd_rp = NULL;
-	NFSLOCKCACHE();
+	mutex = nfsrc_cachemutex(rp);
+	mtx_lock(mutex);
 	nfsrc_lock(rp);
 	if (!(rp->rc_flag & RC_INPROG))
 		panic("nfsrvd_updatecache not inprog");
@@ -430,7 +482,7 @@
 	 */
 	if (nd->nd_repstat == NFSERR_REPLYFROMCACHE) {
 		newnfsstats.srvcache_nonidemdonehits++;
-		NFSUNLOCKCACHE();
+		mtx_unlock(mutex);
 		nd->nd_repstat = 0;
 		if (nd->nd_mreq)
 			mbuf_freem(nd->nd_mreq);
@@ -437,8 +489,8 @@
 		if (!(rp->rc_flag & RC_REPMBUF))
 			panic("reply from cache");
 		nd->nd_mreq = m_copym(rp->rc_reply, 0,
-		    M_COPYALL, M_WAIT);
-		rp->rc_timestamp = NFSD_MONOSEC + NFSRVCACHE_TCPTIMEOUT;
+		    M_COPYALL, M_WAITOK);
+		rp->rc_timestamp = NFSD_MONOSEC + nfsrc_tcptimeout;
 		nfsrc_unlock(rp);
 		goto out;
 	}
@@ -463,21 +515,21 @@
 		    nfsv2_repstat[newnfsv2_procid[nd->nd_procnum]]) {
 			rp->rc_status = nd->nd_repstat;
 			rp->rc_flag |= RC_REPSTATUS;
-			NFSUNLOCKCACHE();
+			mtx_unlock(mutex);
 		} else {
 			if (!(rp->rc_flag & RC_UDP)) {
-			    nfsrc_tcpsavedreplies++;
+			    atomic_add_int(&nfsrc_tcpsavedreplies, 1);
 			    if (nfsrc_tcpsavedreplies >
 				newnfsstats.srvcache_tcppeak)
 				newnfsstats.srvcache_tcppeak =
 				    nfsrc_tcpsavedreplies;
 			}
-			NFSUNLOCKCACHE();
-			m = m_copym(nd->nd_mreq, 0, M_COPYALL, M_WAIT);
-			NFSLOCKCACHE();
+			mtx_unlock(mutex);
+			m = m_copym(nd->nd_mreq, 0, M_COPYALL, M_WAITOK);
+			mtx_lock(mutex);
 			rp->rc_reply = m;
 			rp->rc_flag |= RC_REPMBUF;
-			NFSUNLOCKCACHE();
+			mtx_unlock(mutex);
 		}
 		if (rp->rc_flag & RC_UDP) {
 			rp->rc_timestamp = NFSD_MONOSEC +
@@ -484,8 +536,7 @@
 			    NFSRVCACHE_UDPTIMEOUT;
 			nfsrc_unlock(rp);
 		} else {
-			rp->rc_timestamp = NFSD_MONOSEC +
-			    NFSRVCACHE_TCPTIMEOUT;
+			rp->rc_timestamp = NFSD_MONOSEC + nfsrc_tcptimeout;
 			if (rp->rc_refcnt > 0)
 				nfsrc_unlock(rp);
 			else
@@ -493,11 +544,10 @@
 		}
 	} else {
 		nfsrc_freecache(rp);
-		NFSUNLOCKCACHE();
+		mtx_unlock(mutex);
 	}
 
 out:
-	nfsrc_trimcache(nd->nd_sockref, so);
 	NFSEXITCODE2(0, nd);
 	return (retrp);
 }
@@ -509,39 +559,37 @@
 APPLESTATIC void
 nfsrvd_delcache(struct nfsrvcache *rp)
 {
+	struct mtx *mutex;
 
+	mutex = nfsrc_cachemutex(rp);
 	if (!(rp->rc_flag & RC_INPROG))
 		panic("nfsrvd_delcache not in prog");
-	NFSLOCKCACHE();
+	mtx_lock(mutex);
 	rp->rc_flag &= ~RC_INPROG;
 	if (rp->rc_refcnt == 0 && !(rp->rc_flag & RC_LOCKED))
 		nfsrc_freecache(rp);
-	NFSUNLOCKCACHE();
+	mtx_unlock(mutex);
 }
 
 /*
  * Called after nfsrvd_updatecache() once the reply is sent, to update
- * the entry for nfsrc_activesocket() and unlock it. The argument is
+ * the entry's sequence number and unlock it. The argument is
  * the pointer returned by nfsrvd_updatecache().
  */
 APPLESTATIC void
-nfsrvd_sentcache(struct nfsrvcache *rp, struct socket *so, int err)
+nfsrvd_sentcache(struct nfsrvcache *rp, int have_seq, uint32_t seq)
 {
-	tcp_seq tmp_seq;
+	struct nfsrchash_bucket *hbp;
 
-	if (!(rp->rc_flag & RC_LOCKED))
-		panic("nfsrvd_sentcache not locked");
-	if (!err) {
-		if ((so->so_proto->pr_domain->dom_family != AF_INET &&
-		     so->so_proto->pr_domain->dom_family != AF_INET6) ||
-		     so->so_proto->pr_protocol != IPPROTO_TCP)
-			panic("nfs sent cache");
-		if (nfsrv_getsockseqnum(so, &tmp_seq)) {
-			NFSLOCKCACHE();
-			rp->rc_tcpseq = tmp_seq;
-			rp->rc_flag |= RC_TCPSEQ;
-			NFSUNLOCKCACHE();
-		}
+	KASSERT(rp->rc_flag & RC_LOCKED, ("nfsrvd_sentcache not locked"));
+	if (have_seq) {
+		hbp = NFSRCAHASH(rp->rc_sockref);
+		mtx_lock(&hbp->mtx);
+		rp->rc_tcpseq = seq;
+		if (rp->rc_acked != RC_NO_ACK)
+			LIST_INSERT_HEAD(&hbp->tbl, rp, rc_ahash);
+		rp->rc_acked = RC_NO_ACK;
+		mtx_unlock(&hbp->mtx);
 	}
 	nfsrc_unlock(rp);
 }
@@ -559,11 +607,13 @@
 	struct nfsrvcache *hitrp;
 	struct nfsrvhashhead *hp, nfsrc_templist;
 	int hit, ret = 0;
+	struct mtx *mutex;
 
+	mutex = nfsrc_cachemutex(newrp);
 	hp = NFSRCHASH(newrp->rc_xid);
 	newrp->rc_reqlen = nfsrc_getlenandcksum(nd->nd_mrep, &newrp->rc_cksum);
 tryagain:
-	NFSLOCKCACHE();
+	mtx_lock(mutex);
 	hit = 1;
 	LIST_INIT(&nfsrc_templist);
 	/*
@@ -621,8 +671,8 @@
 		rp = hitrp;
 		if ((rp->rc_flag & RC_LOCKED) != 0) {
 			rp->rc_flag |= RC_WANTED;
-			(void)mtx_sleep(rp, NFSCACHEMUTEXPTR,
-			    (PZERO - 1) | PDROP, "nfsrc", 10 * hz);
+			(void)mtx_sleep(rp, mutex, (PZERO - 1) | PDROP,
+			    "nfsrc", 10 * hz);
 			goto tryagain;
 		}
 		if (rp->rc_flag == 0)
@@ -630,7 +680,7 @@
 		rp->rc_flag |= RC_LOCKED;
 		if (rp->rc_flag & RC_INPROG) {
 			newnfsstats.srvcache_inproghits++;
-			NFSUNLOCKCACHE();
+			mtx_unlock(mutex);
 			if (newrp->rc_sockref == rp->rc_sockref)
 				nfsrc_marksametcpconn(rp->rc_sockref);
 			ret = RC_DROPIT;
@@ -639,24 +689,22 @@
 			 * V2 only.
 			 */
 			newnfsstats.srvcache_nonidemdonehits++;
-			NFSUNLOCKCACHE();
+			mtx_unlock(mutex);
 			if (newrp->rc_sockref == rp->rc_sockref)
 				nfsrc_marksametcpconn(rp->rc_sockref);
 			ret = RC_REPLY;
 			nfsrvd_rephead(nd);
 			*(nd->nd_errp) = rp->rc_status;
-			rp->rc_timestamp = NFSD_MONOSEC +
-				NFSRVCACHE_TCPTIMEOUT;
+			rp->rc_timestamp = NFSD_MONOSEC + nfsrc_tcptimeout;
 		} else if (rp->rc_flag & RC_REPMBUF) {
 			newnfsstats.srvcache_nonidemdonehits++;
-			NFSUNLOCKCACHE();
+			mtx_unlock(mutex);
 			if (newrp->rc_sockref == rp->rc_sockref)
 				nfsrc_marksametcpconn(rp->rc_sockref);
 			ret = RC_REPLY;
 			nd->nd_mreq = m_copym(rp->rc_reply, 0,
-				M_COPYALL, M_WAIT);
-			rp->rc_timestamp = NFSD_MONOSEC +
-				NFSRVCACHE_TCPTIMEOUT;
+				M_COPYALL, M_WAITOK);
+			rp->rc_timestamp = NFSD_MONOSEC + nfsrc_tcptimeout;
 		} else {
 			panic("nfs tcp cache1");
 		}
@@ -665,7 +713,7 @@
 		goto out;
 	}
 	newnfsstats.srvcache_misses++;
-	newnfsstats.srvcache_size++;
+	atomic_add_int(&newnfsstats.srvcache_size, 1);
 
 	/*
 	 * For TCP, multiple entries for a key are allowed, so don't
@@ -674,7 +722,7 @@
 	newrp->rc_cachetime = NFSD_MONOSEC;
 	newrp->rc_flag |= RC_INPROG;
 	LIST_INSERT_HEAD(hp, newrp, rc_hash);
-	NFSUNLOCKCACHE();
+	mtx_unlock(mutex);
 	nd->nd_rp = newrp;
 	ret = RC_DOIT;
 
@@ -685,16 +733,17 @@
 
 /*
  * Lock a cache entry.
- * Also puts a mutex lock on the cache list.
  */
 static void
 nfsrc_lock(struct nfsrvcache *rp)
 {
-	NFSCACHELOCKREQUIRED();
+	struct mtx *mutex;
+
+	mutex = nfsrc_cachemutex(rp);
+	mtx_assert(mutex, MA_OWNED);
 	while ((rp->rc_flag & RC_LOCKED) != 0) {
 		rp->rc_flag |= RC_WANTED;
-		(void)mtx_sleep(rp, NFSCACHEMUTEXPTR, PZERO - 1,
-		    "nfsrc", 0);
+		(void)mtx_sleep(rp, mutex, PZERO - 1, "nfsrc", 0);
 	}
 	rp->rc_flag |= RC_LOCKED;
 }
@@ -705,11 +754,13 @@
 static void
 nfsrc_unlock(struct nfsrvcache *rp)
 {
+	struct mtx *mutex;
 
-	NFSLOCKCACHE();
+	mutex = nfsrc_cachemutex(rp);
+	mtx_lock(mutex);
 	rp->rc_flag &= ~RC_LOCKED;
 	nfsrc_wanted(rp);
-	NFSUNLOCKCACHE();
+	mtx_unlock(mutex);
 }
 
 /*
@@ -731,21 +782,27 @@
 static void
 nfsrc_freecache(struct nfsrvcache *rp)
 {
+	struct nfsrchash_bucket *hbp;
 
-	NFSCACHELOCKREQUIRED();
 	LIST_REMOVE(rp, rc_hash);
 	if (rp->rc_flag & RC_UDP) {
 		TAILQ_REMOVE(&nfsrvudplru, rp, rc_lru);
 		nfsrc_udpcachesize--;
+	} else if (rp->rc_acked != RC_NO_SEQ) {
+		hbp = NFSRCAHASH(rp->rc_sockref);
+		mtx_lock(&hbp->mtx);
+		if (rp->rc_acked == RC_NO_ACK)
+			LIST_REMOVE(rp, rc_ahash);
+		mtx_unlock(&hbp->mtx);
 	}
 	nfsrc_wanted(rp);
 	if (rp->rc_flag & RC_REPMBUF) {
 		mbuf_freem(rp->rc_reply);
 		if (!(rp->rc_flag & RC_UDP))
-			nfsrc_tcpsavedreplies--;
+			atomic_add_int(&nfsrc_tcpsavedreplies, -1);
 	}
 	FREE((caddr_t)rp, M_NFSRVCACHE);
-	newnfsstats.srvcache_size--;
+	atomic_add_int(&newnfsstats.srvcache_size, -1);
 }
 
 /*
@@ -757,12 +814,13 @@
 	struct nfsrvcache *rp, *nextrp;
 	int i;
 
-	NFSLOCKCACHE();
 	for (i = 0; i < NFSRVCACHE_HASHSIZE; i++) {
-		LIST_FOREACH_SAFE(rp, &nfsrvhashtbl[i], rc_hash, nextrp) {
+		mtx_lock(&nfsrchash_table[i].mtx);
+		LIST_FOREACH_SAFE(rp, &nfsrchash_table[i].tbl, rc_hash, nextrp)
 			nfsrc_freecache(rp);
-		}
+		mtx_unlock(&nfsrchash_table[i].mtx);
 	}
+	mtx_lock(&nfsrc_udpmtx);
 	for (i = 0; i < NFSRVCACHE_HASHSIZE; i++) {
 		LIST_FOREACH_SAFE(rp, &nfsrvudphashtbl[i], rc_hash, nextrp) {
 			nfsrc_freecache(rp);
@@ -769,39 +827,147 @@
 		}
 	}
 	newnfsstats.srvcache_size = 0;
+	mtx_unlock(&nfsrc_udpmtx);
 	nfsrc_tcpsavedreplies = 0;
-	NFSUNLOCKCACHE();
 }
 
+#define HISTSIZE	16
 /*
  * The basic rule is to get rid of entries that are expired.
  */
-static void
-nfsrc_trimcache(u_int64_t sockref, struct socket *so)
+void
+nfsrc_trimcache(u_int64_t sockref, uint32_t snd_una, int final)
 {
+	struct nfsrchash_bucket *hbp;
 	struct nfsrvcache *rp, *nextrp;
-	int i;
+	int force, lastslot, i, j, k, tto, time_histo[HISTSIZE];
+	time_t thisstamp;
+	static time_t udp_lasttrim = 0, tcp_lasttrim = 0;
+	static int onethread = 0, oneslot = 0;
 
-	NFSLOCKCACHE();
-	TAILQ_FOREACH_SAFE(rp, &nfsrvudplru, rc_lru, nextrp) {
-		if (!(rp->rc_flag & (RC_INPROG|RC_LOCKED|RC_WANTED))
-		     && rp->rc_refcnt == 0
-		     && ((rp->rc_flag & RC_REFCNT) ||
-			 NFSD_MONOSEC > rp->rc_timestamp ||
-			 nfsrc_udpcachesize > nfsrc_udphighwater))
-			nfsrc_freecache(rp);
+	if (sockref != 0) {
+		hbp = NFSRCAHASH(sockref);
+		mtx_lock(&hbp->mtx);
+		LIST_FOREACH_SAFE(rp, &hbp->tbl, rc_ahash, nextrp) {
+			if (sockref == rp->rc_sockref) {
+				if (SEQ_GEQ(snd_una, rp->rc_tcpseq)) {
+					rp->rc_acked = RC_ACK;
+					LIST_REMOVE(rp, rc_ahash);
+				} else if (final) {
+					rp->rc_acked = RC_NACK;
+					LIST_REMOVE(rp, rc_ahash);
+				}
+			}
+		}
+		mtx_unlock(&hbp->mtx);
 	}
-	for (i = 0; i < NFSRVCACHE_HASHSIZE; i++) {
-		LIST_FOREACH_SAFE(rp, &nfsrvhashtbl[i], rc_hash, nextrp) {
+
+	if (atomic_cmpset_acq_int(&onethread, 0, 1) == 0)
+		return;
+	if (NFSD_MONOSEC != udp_lasttrim ||
+	    nfsrc_udpcachesize >= (nfsrc_udphighwater +
+	    nfsrc_udphighwater / 2)) {
+		mtx_lock(&nfsrc_udpmtx);
+		udp_lasttrim = NFSD_MONOSEC;
+		TAILQ_FOREACH_SAFE(rp, &nfsrvudplru, rc_lru, nextrp) {
 			if (!(rp->rc_flag & (RC_INPROG|RC_LOCKED|RC_WANTED))
 			     && rp->rc_refcnt == 0
 			     && ((rp->rc_flag & RC_REFCNT) ||
-				 NFSD_MONOSEC > rp->rc_timestamp ||
-				 nfsrc_activesocket(rp, sockref, so)))
+				 udp_lasttrim > rp->rc_timestamp ||
+				 nfsrc_udpcachesize > nfsrc_udphighwater))
 				nfsrc_freecache(rp);
 		}
+		mtx_unlock(&nfsrc_udpmtx);
 	}
-	NFSUNLOCKCACHE();
+	if (NFSD_MONOSEC != tcp_lasttrim ||
+	    nfsrc_tcpsavedreplies >= nfsrc_tcphighwater) {
+		force = nfsrc_tcphighwater / 4;
+		if (force > 0 &&
+		    nfsrc_tcpsavedreplies + force >= nfsrc_tcphighwater) {
+			for (i = 0; i < HISTSIZE; i++)
+				time_histo[i] = 0;
+			i = 0;
+			lastslot = NFSRVCACHE_HASHSIZE - 1;
+		} else {
+			force = 0;
+			if (NFSD_MONOSEC != tcp_lasttrim) {
+				i = 0;
+				lastslot = NFSRVCACHE_HASHSIZE - 1;
+			} else {
+				lastslot = i = oneslot;
+				if (++oneslot >= NFSRVCACHE_HASHSIZE)
+					oneslot = 0;
+			}
+		}
+		tto = nfsrc_tcptimeout;
+		tcp_lasttrim = NFSD_MONOSEC;
+		for (; i <= lastslot; i++) {
+			mtx_lock(&nfsrchash_table[i].mtx);
+			LIST_FOREACH_SAFE(rp, &nfsrchash_table[i].tbl, rc_hash,
+			    nextrp) {
+				if (!(rp->rc_flag &
+				     (RC_INPROG|RC_LOCKED|RC_WANTED))
+				     && rp->rc_refcnt == 0) {
+					if ((rp->rc_flag & RC_REFCNT) ||
+					    tcp_lasttrim > rp->rc_timestamp ||
+					    rp->rc_acked == RC_ACK) {
+						nfsrc_freecache(rp);
+						continue;
+					}
+
+					if (force == 0)
+						continue;
+					/*
+					 * The timestamps range from roughly the
+					 * present (tcp_lasttrim) to the present
+					 * + nfsrc_tcptimeout. Generate a simple
+					 * histogram of where the timeouts fall.
+					 */
+					j = rp->rc_timestamp - tcp_lasttrim;
+					if (j >= tto)
+						j = HISTSIZE - 1;
+					else if (j < 0)
+						j = 0;
+					else
+						j = j * HISTSIZE / tto;
+					time_histo[j]++;
+				}
+			}
+			mtx_unlock(&nfsrchash_table[i].mtx);
+		}
+		if (force) {
+			/*
+			 * Trim some more with a smaller timeout of as little
+			 * as 20% of nfsrc_tcptimeout to try and get below
+			 * 80% of the nfsrc_tcphighwater.
+			 */
+			k = 0;
+			for (i = 0; i < (HISTSIZE - 2); i++) {
+				k += time_histo[i];
+				if (k > force)
+					break;
+			}
+			k = tto * (i + 1) / HISTSIZE;
+			if (k < 1)
+				k = 1;
+			thisstamp = tcp_lasttrim + k;
+			for (i = 0; i < NFSRVCACHE_HASHSIZE; i++) {
+				mtx_lock(&nfsrchash_table[i].mtx);
+				LIST_FOREACH_SAFE(rp, &nfsrchash_table[i].tbl,
+				    rc_hash, nextrp) {
+					if (!(rp->rc_flag &
+					     (RC_INPROG|RC_LOCKED|RC_WANTED))
+					     && rp->rc_refcnt == 0
+					     && ((rp->rc_flag & RC_REFCNT) ||
+						 thisstamp > rp->rc_timestamp ||
+						 rp->rc_acked == RC_ACK))
+						nfsrc_freecache(rp);
+				}
+				mtx_unlock(&nfsrchash_table[i].mtx);
+			}
+		}
+	}
+	atomic_store_rel_int(&onethread, 0);
 }
 
 /*
@@ -810,12 +976,17 @@
 APPLESTATIC void
 nfsrvd_refcache(struct nfsrvcache *rp)
 {
+	struct mtx *mutex;
 
-	NFSLOCKCACHE();
+	if (rp == NULL)
+		/* For NFSv4.1, there is no cache entry. */
+		return;
+	mutex = nfsrc_cachemutex(rp);
+	mtx_lock(mutex);
 	if (rp->rc_refcnt < 0)
 		panic("nfs cache refcnt");
 	rp->rc_refcnt++;
-	NFSUNLOCKCACHE();
+	mtx_unlock(mutex);
 }
 
 /*
@@ -824,39 +995,19 @@
 APPLESTATIC void
 nfsrvd_derefcache(struct nfsrvcache *rp)
 {
+	struct mtx *mutex;
 
-	NFSLOCKCACHE();
+	mutex = nfsrc_cachemutex(rp);
+	mtx_lock(mutex);
 	if (rp->rc_refcnt <= 0)
 		panic("nfs cache derefcnt");
 	rp->rc_refcnt--;
 	if (rp->rc_refcnt == 0 && !(rp->rc_flag & (RC_LOCKED | RC_INPROG)))
 		nfsrc_freecache(rp);
-	NFSUNLOCKCACHE();
+	mtx_unlock(mutex);
 }
 
 /*
- * Check to see if the socket is active.
- * Return 1 if the reply has been received/acknowledged by the client,
- * 0 otherwise.
- * XXX - Uses tcp internals.
- */
-static int
-nfsrc_activesocket(struct nfsrvcache *rp, u_int64_t cur_sockref,
-    struct socket *cur_so)
-{
-	int ret = 0;
-
-	if (!(rp->rc_flag & RC_TCPSEQ))
-		return (ret);
-	/*
-	 * If the sockref is the same, it is the same TCP connection.
-	 */
-	if (cur_sockref == rp->rc_sockref)
-		ret = nfsrv_checksockseqnum(cur_so, rp->rc_tcpseq);
-	return (ret);
-}
-
-/*
  * Calculate the length of the mbuf list and a checksum on the first up to
  * NFSRVCACHE_CHECKLEN bytes.
  */

Modified: trunk/sys/fs/nfsserver/nfs_nfsdkrpc.c
===================================================================
--- trunk/sys/fs/nfsserver/nfs_nfsdkrpc.c	2018-05-27 22:18:11 UTC (rev 10025)
+++ trunk/sys/fs/nfsserver/nfs_nfsdkrpc.c	2018-05-27 22:18:25 UTC (rev 10026)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * Copyright (c) 1989, 1993
  *	The Regents of the University of California.  All rights reserved.
@@ -32,8 +33,11 @@
  */
 
 #include <sys/cdefs.h>
-__MBSDID("$MidnightBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/fs/nfsserver/nfs_nfsdkrpc.c 314034 2017-02-21 09:29:46Z avg $");
 
+#include <sys/param.h>
+#include <sys/systm.h>
+
 #include "opt_inet6.h"
 #include "opt_kgssapi.h"
 
@@ -85,20 +89,20 @@
 SVCPOOL		*nfsrvd_pool;
 
 static int	nfs_privport = 0;
-SYSCTL_INT(_vfs_nfsd, OID_AUTO, nfs_privport, CTLFLAG_RW,
+SYSCTL_INT(_vfs_nfsd, OID_AUTO, nfs_privport, CTLFLAG_RWTUN,
     &nfs_privport, 0,
     "Only allow clients using a privileged port for NFSv2 and 3");
 
 static int	nfs_minvers = NFS_VER2;
-SYSCTL_INT(_vfs_nfsd, OID_AUTO, server_min_nfsvers, CTLFLAG_RW,
+SYSCTL_INT(_vfs_nfsd, OID_AUTO, server_min_nfsvers, CTLFLAG_RWTUN,
     &nfs_minvers, 0, "The lowest version of NFS handled by the server");
 
 static int	nfs_maxvers = NFS_VER4;
-SYSCTL_INT(_vfs_nfsd, OID_AUTO, server_max_nfsvers, CTLFLAG_RW,
+SYSCTL_INT(_vfs_nfsd, OID_AUTO, server_max_nfsvers, CTLFLAG_RWTUN,
     &nfs_maxvers, 0, "The highest version of NFS handled by the server");
 
-static int nfs_proc(struct nfsrv_descript *, u_int32_t, struct socket *,
-    u_int64_t, struct nfsrvcache **);
+static int nfs_proc(struct nfsrv_descript *, u_int32_t, SVCXPRT *xprt,
+    struct nfsrvcache **);
 
 extern u_long sb_max_adj;
 extern int newnfs_numnfsd;
@@ -117,7 +121,8 @@
 
 	memset(&nd, 0, sizeof(nd));
 	if (rqst->rq_vers == NFS_VER2) {
-		if (rqst->rq_proc > NFSV2PROC_STATFS) {
+		if (rqst->rq_proc > NFSV2PROC_STATFS ||
+		    newnfs_nfsv3_procid[rqst->rq_proc] == NFSPROC_NOOP) {
 			svcerr_noproc(rqst);
 			svc_freereq(rqst);
 			goto out;
@@ -230,10 +235,16 @@
 		 * Get a refcnt (shared lock) on nfsd_suspend_lock.
 		 * NFSSVC_SUSPENDNFSD will take an exclusive lock on
 		 * nfsd_suspend_lock to suspend these threads.
+		 * The call to nfsv4_lock() that preceeds nfsv4_getref()
+		 * ensures that the acquisition of the exclusive lock
+		 * takes priority over acquisition of the shared lock by
+		 * waiting for any exclusive lock request to complete.
 		 * This must be done here, before the check of
 		 * nfsv4root exports by nfsvno_v4rootexport().
 		 */
 		NFSLOCKV4ROOTMUTEX();
+		nfsv4_lock(&nfsd_suspend_lock, 0, NULL, NFSV4ROOTLOCKMUTEXPTR,
+		    NULL);
 		nfsv4_getref(&nfsd_suspend_lock, NULL, NFSV4ROOTLOCKMUTEXPTR,
 		    NULL);
 		NFSUNLOCKV4ROOTMUTEX();
@@ -251,8 +262,7 @@
 			}
 		}
 
-		cacherep = nfs_proc(&nd, rqst->rq_xid, xprt->xp_socket,
-		    xprt->xp_sockref, &rp);
+		cacherep = nfs_proc(&nd, rqst->rq_xid, xprt, &rp);
 		NFSLOCKV4ROOTMUTEX();
 		nfsv4_relref(&nfsd_suspend_lock);
 		NFSUNLOCKV4ROOTMUTEX();
@@ -287,11 +297,15 @@
 	} else if (!svc_sendreply_mbuf(rqst, nd.nd_mreq)) {
 		svcerr_systemerr(rqst);
 	}
-	if (rp != NULL)
-		nfsrvd_sentcache(rp, xprt->xp_socket, 0);
+	if (rp != NULL) {
+		nfsrvd_sentcache(rp, (rqst->rq_reply_seq != 0 ||
+		    SVC_ACK(xprt, NULL)), rqst->rq_reply_seq);
+	}
 	svc_freereq(rqst);
 
 out:
+	if (softdep_ast_cleanup != NULL)
+		softdep_ast_cleanup();
 	NFSEXITCODE(0);
 }
 
@@ -300,11 +314,15 @@
  * Return the appropriate cache response.
  */
 static int
-nfs_proc(struct nfsrv_descript *nd, u_int32_t xid, struct socket *so,
-    u_int64_t sockref, struct nfsrvcache **rpp)
+nfs_proc(struct nfsrv_descript *nd, u_int32_t xid, SVCXPRT *xprt,
+    struct nfsrvcache **rpp)
 {
 	struct thread *td = curthread;
-	int cacherep = RC_DOIT, isdgram;
+	int cacherep = RC_DOIT, isdgram, taglen = -1;
+	struct mbuf *m;
+	u_char tag[NFSV4_SMALLSTR + 1], *tagstr = NULL;
+	u_int32_t minorvers = 0;
+	uint32_t ack;
 
 	*rpp = NULL;
 	if (nd->nd_nam2 == NULL) {
@@ -336,8 +354,19 @@
 			nd->nd_flag |= ND_SAMETCPCONN;
 		nd->nd_retxid = xid;
 		nd->nd_tcpconntime = NFSD_MONOSEC;
-		nd->nd_sockref = sockref;
-		cacherep = nfsrvd_getcache(nd, so);
+		nd->nd_sockref = xprt->xp_sockref;
+		if ((nd->nd_flag & ND_NFSV4) != 0)
+			nfsd_getminorvers(nd, tag, &tagstr, &taglen,
+			    &minorvers);
+		if ((nd->nd_flag & ND_NFSV41) != 0)
+			/* NFSv4.1 caches replies in the session slots. */
+			cacherep = RC_DOIT;
+		else {
+			cacherep = nfsrvd_getcache(nd);
+			ack = 0;
+			SVC_ACK(xprt, &ack);
+			nfsrc_trimcache(xprt->xp_sockref, ack, 0);
+		}
 	}
 
 	/*
@@ -347,18 +376,48 @@
 	 * RC_DROPIT - just throw the request away
 	 */
 	if (cacherep == RC_DOIT) {
-		nfsrvd_dorpc(nd, isdgram, td);
-		if (nd->nd_repstat == NFSERR_DONTREPLY)
-			cacherep = RC_DROPIT;
-		else
+		if ((nd->nd_flag & ND_NFSV41) != 0)
+			nd->nd_xprt = xprt;
+		nfsrvd_dorpc(nd, isdgram, tagstr, taglen, minorvers, td);
+		if ((nd->nd_flag & ND_NFSV41) != 0) {
+			if (nd->nd_repstat != NFSERR_REPLYFROMCACHE &&
+			    (nd->nd_flag & ND_SAVEREPLY) != 0) {
+				/* Cache a copy of the reply. */
+				m = m_copym(nd->nd_mreq, 0, M_COPYALL,
+				    M_WAITOK);
+			} else
+				m = NULL;
+			if ((nd->nd_flag & ND_HASSEQUENCE) != 0)
+				nfsrv_cache_session(nd->nd_sessionid,
+				    nd->nd_slotid, nd->nd_repstat, &m);
+			if (nd->nd_repstat == NFSERR_REPLYFROMCACHE)
+				nd->nd_repstat = 0;
 			cacherep = RC_REPLY;
-		*rpp = nfsrvd_updatecache(nd, so);
+		} else {
+			if (nd->nd_repstat == NFSERR_DONTREPLY)
+				cacherep = RC_DROPIT;
+			else
+				cacherep = RC_REPLY;
+			*rpp = nfsrvd_updatecache(nd);
+		}
 	}
+	if (tagstr != NULL && taglen > NFSV4_SMALLSTR)
+		free(tagstr, M_TEMP);
 
 	NFSEXITCODE2(0, nd);
 	return (cacherep);
 }
 
+static void
+nfssvc_loss(SVCXPRT *xprt)
+{
+	uint32_t ack;
+
+	ack = 0;
+	SVC_ACK(xprt, &ack);
+	nfsrc_trimcache(xprt->xp_sockref, ack, 1);
+}
+
 /*
  * Adds a socket to the list for servicing by nfsds.
  */
@@ -399,6 +458,8 @@
 		if (nfs_maxvers >= NFS_VER4)
 			svc_reg(xprt, NFS_PROG, NFS_VER4, nfssvc_program,
 			    NULL);
+		if (so->so_type == SOCK_STREAM)
+			svc_loss_reg(xprt, nfssvc_loss);
 		SVC_RELEASE(xprt);
 	}
 
@@ -415,6 +476,7 @@
 nfsrvd_nfsd(struct thread *td, struct nfsd_nfsd_args *args)
 {
 	char principal[MAXHOSTNAMELEN + 5];
+	struct proc *p;
 	int error = 0;
 	bool_t ret2, ret3, ret4;
 
@@ -432,6 +494,10 @@
 	 */
 	NFSD_LOCK();
 	if (newnfs_numnfsd == 0) {
+		p = td->td_proc;
+		PROC_LOCK(p);
+		p->p_flag2 |= P2_AST_SU;
+		PROC_UNLOCK(p);
 		newnfs_numnfsd++;
 
 		NFSD_UNLOCK();
@@ -463,6 +529,9 @@
 		NFSD_LOCK();
 		newnfs_numnfsd--;
 		nfsrvd_init(1);
+		PROC_LOCK(p);
+		p->p_flag2 &= ~P2_AST_SU;
+		PROC_UNLOCK(p);
 	}
 	NFSD_UNLOCK();
 
@@ -485,18 +554,17 @@
 	if (terminating) {
 		nfsd_master_proc = NULL;
 		NFSD_UNLOCK();
-		svcpool_destroy(nfsrvd_pool);
-		nfsrvd_pool = NULL;
+		nfsrv_freeallbackchannel_xprts();
+		svcpool_close(nfsrvd_pool);
 		NFSD_LOCK();
+	} else {
+		NFSD_UNLOCK();
+		nfsrvd_pool = svcpool_create("nfsd",
+		    SYSCTL_STATIC_CHILDREN(_vfs_nfsd));
+		nfsrvd_pool->sp_rcache = NULL;
+		nfsrvd_pool->sp_assign = fhanew_assign;
+		nfsrvd_pool->sp_done = fha_nd_complete;
+		NFSD_LOCK();
 	}
-
-	NFSD_UNLOCK();
-
-	nfsrvd_pool = svcpool_create("nfsd", SYSCTL_STATIC_CHILDREN(_vfs_nfsd));
-	nfsrvd_pool->sp_rcache = NULL;
-	nfsrvd_pool->sp_assign = fhanew_assign;
-	nfsrvd_pool->sp_done = fha_nd_complete;
-
-	NFSD_LOCK();
 }
 

Modified: trunk/sys/fs/nfsserver/nfs_nfsdport.c
===================================================================
--- trunk/sys/fs/nfsserver/nfs_nfsdport.c	2018-05-27 22:18:11 UTC (rev 10025)
+++ trunk/sys/fs/nfsserver/nfs_nfsdport.c	2018-05-27 22:18:25 UTC (rev 10026)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * Copyright (c) 1989, 1993
  *	The Regents of the University of California.  All rights reserved.
@@ -32,9 +33,9 @@
  */
 
 #include <sys/cdefs.h>
-__MBSDID("$MidnightBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/fs/nfsserver/nfs_nfsdport.c 317914 2017-05-07 19:57:45Z rmacklem $");
 
-#include <sys/capability.h>
+#include <sys/capsicum.h>
 
 /*
  * Functions that perform the vfs operations required by the routines in
@@ -58,13 +59,20 @@
 extern void (*nfsd_call_servertimer)(void);
 extern SVCPOOL	*nfsrvd_pool;
 extern struct nfsv4lock nfsd_suspend_lock;
+extern struct nfsclienthashhead *nfsclienthash;
+extern struct nfslockhashhead *nfslockhash;
+extern struct nfssessionhash *nfssessionhash;
+extern int nfsrv_sessionhashsize;
 struct vfsoptlist nfsv4root_opt, nfsv4root_newopt;
 NFSDLOCKMUTEX;
-struct mtx nfs_cache_mutex;
+struct nfsrchash_bucket nfsrchash_table[NFSRVCACHE_HASHSIZE];
+struct nfsrchash_bucket nfsrcahash_table[NFSRVCACHE_HASHSIZE];
+struct mtx nfsrc_udpmtx;
 struct mtx nfs_v4root_mutex;
 struct nfsrvfh nfs_rootfh, nfs_pubfh;
 int nfs_pubfhset = 0, nfs_rootfhset = 0;
 struct proc *nfsd_master_proc = NULL;
+int nfsd_debuglevel = 0;
 static pid_t nfsd_master_pid = (pid_t)-1;
 static char nfsd_master_comm[MAXCOMLEN + 1];
 static struct timeval nfsd_master_start;
@@ -78,6 +86,7 @@
 static int nfs_commit_miss;
 extern int nfsrv_issuedelegs;
 extern int nfsrv_dolocallocks;
+extern int nfsd_enable_stringtouid;
 
 SYSCTL_NODE(_vfs, OID_AUTO, nfsd, CTLFLAG_RW, 0, "New NFS server");
 SYSCTL_INT(_vfs_nfsd, OID_AUTO, mirrormnt, CTLFLAG_RW,
@@ -90,6 +99,10 @@
     &nfsrv_issuedelegs, 0, "Enable nfsd to issue delegations");
 SYSCTL_INT(_vfs_nfsd, OID_AUTO, enable_locallocks, CTLFLAG_RW,
     &nfsrv_dolocallocks, 0, "Enable nfsd to acquire local locks on files");
+SYSCTL_INT(_vfs_nfsd, OID_AUTO, debuglevel, CTLFLAG_RW, &nfsd_debuglevel,
+    0, "Debug level for new nfs server");
+SYSCTL_INT(_vfs_nfsd, OID_AUTO, enable_stringtouid, CTLFLAG_RW,
+    &nfsd_enable_stringtouid, 0, "Enable nfsd to accept numeric owner_names");
 
 #define	MAX_REORDERED_RPC	16
 #define	NUM_HEURISTIC		1031
@@ -390,7 +403,7 @@
 
 	/*
 	 * Initialize for scan, set ni_startdir and bump ref on dp again
-	 * becuase lookup() will dereference ni_startdir.
+	 * because lookup() will dereference ni_startdir.
 	 */
 
 	cnp->cn_thread = p;
@@ -506,11 +519,10 @@
 
 out:
 	if (error) {
-		uma_zfree(namei_zone, cnp->cn_pnbuf);
+		nfsvno_relpathbuf(ndp);
 		ndp->ni_vp = NULL;
 		ndp->ni_dvp = NULL;
 		ndp->ni_startdir = NULL;
-		cnp->cn_flags &= ~HASBUF;
 	} else if ((ndp->ni_cnd.cn_flags & (WANTPARENT|LOCKPARENT)) == 0) {
 		ndp->ni_dvp = NULL;
 	}
@@ -566,7 +578,7 @@
 	i = 0;
 	while (len < NFS_MAXPATHLEN) {
 		NFSMGET(mp);
-		MCLGET(mp, M_WAIT);
+		MCLGET(mp, M_WAITOK);
 		mp->m_len = NFSMSIZ(mp);
 		if (len == 0) {
 			mp3 = mp2 = mp;
@@ -636,7 +648,7 @@
 	i = 0;
 	while (left > 0) {
 		NFSMGET(m);
-		MCLGET(m, M_WAIT);
+		MCLGET(m, M_WAITOK);
 		m->m_len = 0;
 		siz = min(M_TRAILINGSPACE(m), left);
 		left -= siz;
@@ -672,6 +684,7 @@
 	uiop->uio_resid = len;
 	uiop->uio_rw = UIO_READ;
 	uiop->uio_segflg = UIO_SYSSPACE;
+	uiop->uio_td = NULL;
 	nh = nfsrv_sequential_heuristic(uiop, vp);
 	ioflag |= nh->nh_seqcount << IO_SEQSHIFT;
 	error = VOP_READ(vp, uiop, IO_NODELOCKED | ioflag, cred);
@@ -782,6 +795,11 @@
 					nvap->na_atime.tv_nsec = cverf[1];
 					error = VOP_SETATTR(ndp->ni_vp,
 					    &nvap->na_vattr, nd->nd_cred);
+					if (error != 0) {
+						vput(ndp->ni_vp);
+						ndp->ni_vp = NULL;
+						error = NFSERR_NOTSUPP;
+					}
 				}
 			}
 		/*
@@ -1261,16 +1279,19 @@
 	 * file is done.  At this time VOP_FSYNC does not accept offset and
 	 * byte count parameters so call VOP_FSYNC the whole file for now.
 	 * The same is true for NFSv4: RFC 3530 Sec. 14.2.3.
+	 * File systems that do not use the buffer cache (as indicated
+	 * by MNTK_USES_BCACHE not being set) must use VOP_FSYNC().
 	 */
-	if (cnt == 0 || cnt > MAX_COMMIT_COUNT) {
+	if (cnt == 0 || cnt > MAX_COMMIT_COUNT ||
+	    (vp->v_mount->mnt_kern_flag & MNTK_USES_BCACHE) == 0) {
 		/*
 		 * Give up and do the whole thing
 		 */
 		if (vp->v_object &&
 		   (vp->v_object->flags & OBJ_MIGHTBEDIRTY)) {
-			VM_OBJECT_LOCK(vp->v_object);
+			VM_OBJECT_WLOCK(vp->v_object);
 			vm_object_page_clean(vp->v_object, 0, 0, OBJPC_SYNC);
-			VM_OBJECT_UNLOCK(vp->v_object);
+			VM_OBJECT_WUNLOCK(vp->v_object);
 		}
 		error = VOP_FSYNC(vp, MNT_WAIT, td);
 	} else {
@@ -1299,10 +1320,10 @@
 
 		if (vp->v_object &&
 		   (vp->v_object->flags & OBJ_MIGHTBEDIRTY)) {
-			VM_OBJECT_LOCK(vp->v_object);
+			VM_OBJECT_WLOCK(vp->v_object);
 			vm_object_page_clean(vp->v_object, off, off + cnt,
 			    OBJPC_SYNC);
-			VM_OBJECT_UNLOCK(vp->v_object);
+			VM_OBJECT_WUNLOCK(vp->v_object);
 		}
 
 		bo = &vp->v_bufobj;
@@ -1322,7 +1343,7 @@
 			 */
 			if ((bp = gbincore(&vp->v_bufobj, lblkno)) != NULL) {
 				if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_SLEEPFAIL |
-				    LK_INTERLOCK, BO_MTX(bo)) == ENOLCK) {
+				    LK_INTERLOCK, BO_LOCKPTR(bo)) == ENOLCK) {
 					BO_LOCK(bo);
 					continue; /* retry */
 				}
@@ -1407,6 +1428,13 @@
 					nvap->na_atime.tv_nsec = cverf[1];
 					nd->nd_repstat = VOP_SETATTR(ndp->ni_vp,
 					    &nvap->na_vattr, cred);
+					if (nd->nd_repstat != 0) {
+						vput(ndp->ni_vp);
+						ndp->ni_vp = NULL;
+						nd->nd_repstat = NFSERR_NOTSUPP;
+					} else
+						NFSSETBIT_ATTRBIT(attrbitp,
+						    NFSATTRBIT_TIMEACCESS);
 				} else {
 					nfsrv_fixattr(nd, ndp->ni_vp, nvap,
 					    aclp, p, attrbitp, exp);
@@ -1469,8 +1497,9 @@
  * Updates the file rev and sets the mtime and ctime
  * to the current clock time, returning the va_filerev and va_Xtime
  * values.
+ * Return ESTALE to indicate the vnode is VI_DOOMED.
  */
-void
+int
 nfsvno_updfilerev(struct vnode *vp, struct nfsvattr *nvap,
     struct ucred *cred, struct thread *p)
 {
@@ -1478,8 +1507,14 @@
 
 	VATTR_NULL(&va);
 	vfs_timestamp(&va.va_mtime);
+	if (NFSVOPISLOCKED(vp) != LK_EXCLUSIVE) {
+		NFSVOPLOCK(vp, LK_UPGRADE | LK_RETRY);
+		if ((vp->v_iflag & VI_DOOMED) != 0)
+			return (ESTALE);
+	}
 	(void) VOP_SETATTR(vp, &va, cred);
 	(void) nfsvno_getattr(vp, nvap, cred, p, 1);
+	return (0);
 }
 
 /*
@@ -1539,7 +1574,7 @@
 	u_long *cookies = NULL, *cookiep;
 	struct uio io;
 	struct iovec iv;
-	int not_zfs;
+	int is_ufs;
 
 	if (nd->nd_repstat) {
 		nfsrv_postopattr(nd, getret, &at);
@@ -1594,7 +1629,7 @@
 			nfsrv_postopattr(nd, getret, &at);
 		goto out;
 	}
-	not_zfs = strcmp(vp->v_mount->mnt_vfc->vfc_name, "zfs");
+	is_ufs = strcmp(vp->v_mount->mnt_vfc->vfc_name, "ufs") == 0;
 	MALLOC(rbuf, caddr_t, siz, M_TEMP, M_WAITOK);
 again:
 	eofflag = 0;
@@ -1674,12 +1709,10 @@
 	 * skip over the records that precede the requested offset. This
 	 * requires the assumption that file offset cookies monotonically
 	 * increase.
-	 * Since the offset cookies don't monotonically increase for ZFS,
-	 * this is not done when ZFS is the file system.
 	 */
 	while (cpos < cend && ncookies > 0 &&
 	    (dp->d_fileno == 0 || dp->d_type == DT_WHT ||
-	     (not_zfs != 0 && ((u_quad_t)(*cookiep)) <= toff))) {
+	     (is_ufs == 1 && ((u_quad_t)(*cookiep)) <= toff))) {
 		cpos += dp->d_reclen;
 		dp = (struct dirent *)cpos;
 		cookiep++;
@@ -1792,7 +1825,7 @@
 	struct uio io;
 	struct iovec iv;
 	struct componentname cn;
-	int at_root, needs_unbusy, not_zfs, supports_nfsv4acls;
+	int at_root, is_ufs, is_zfs, needs_unbusy, supports_nfsv4acls;
 	struct mount *mp, *new_mp;
 	uint64_t mounted_on_fileno;
 
@@ -1872,7 +1905,8 @@
 			nfsrv_postopattr(nd, getret, &at);
 		goto out;
 	}
-	not_zfs = strcmp(vp->v_mount->mnt_vfc->vfc_name, "zfs");
+	is_ufs = strcmp(vp->v_mount->mnt_vfc->vfc_name, "ufs") == 0;
+	is_zfs = strcmp(vp->v_mount->mnt_vfc->vfc_name, "zfs") == 0;
 
 	MALLOC(rbuf, caddr_t, siz, M_TEMP, M_WAITOK);
 again:
@@ -1945,12 +1979,10 @@
 	 * skip over the records that precede the requested offset. This
 	 * requires the assumption that file offset cookies monotonically
 	 * increase.
-	 * Since the offset cookies don't monotonically increase for ZFS,
-	 * this is not done when ZFS is the file system.
 	 */
 	while (cpos < cend && ncookies > 0 &&
 	  (dp->d_fileno == 0 || dp->d_type == DT_WHT ||
-	   (not_zfs != 0 && ((u_quad_t)(*cookiep)) <= toff) ||
+	   (is_ufs == 1 && ((u_quad_t)(*cookiep)) <= toff) ||
 	   ((nd->nd_flag & ND_NFSV4) &&
 	    ((dp->d_namlen == 1 && dp->d_name[0] == '.') ||
 	     (dp->d_namlen==2 && dp->d_name[0]=='.' && dp->d_name[1]=='.'))))) {
@@ -1984,6 +2016,19 @@
 	}
 
 	/*
+	 * For now ZFS requires VOP_LOOKUP as a workaround.  Until ino_t is changed
+	 * to 64 bit type a ZFS filesystem with over 1 billion files in it
+	 * will suffer from 64bit -> 32bit truncation.
+	 */
+	if (is_zfs == 1)
+		usevget = 0;
+
+	cn.cn_nameiop = LOOKUP;
+	cn.cn_lkflags = LK_SHARED | LK_RETRY;
+	cn.cn_cred = nd->nd_cred;
+	cn.cn_thread = p;
+
+	/*
 	 * Save this position, in case there is an error before one entry
 	 * is created.
 	 */
@@ -2050,21 +2095,11 @@
 					else
 						r = EOPNOTSUPP;
 					if (r == EOPNOTSUPP) {
-						if (usevget) {
-							usevget = 0;
-							cn.cn_nameiop = LOOKUP;
-							cn.cn_lkflags =
-							    LK_SHARED |
-							    LK_RETRY;
-							cn.cn_cred =
-							    nd->nd_cred;
-							cn.cn_thread = p;
-						}
+						usevget = 0;
 						cn.cn_nameptr = dp->d_name;
 						cn.cn_namelen = nlen;
 						cn.cn_flags = ISLASTCN |
-						    NOFOLLOW | LOCKLEAF |
-						    MPSAFE;
+						    NOFOLLOW | LOCKLEAF;
 						if (nlen == 2 &&
 						    dp->d_name[0] == '.' &&
 						    dp->d_name[1] == '.')
@@ -2121,6 +2156,22 @@
 					if (!r)
 					    r = nfsvno_getattr(nvp, nvap,
 						nd->nd_cred, p, 1);
+					if (r == 0 && is_zfs == 1 &&
+					    nfsrv_enable_crossmntpt != 0 &&
+					    (nd->nd_flag & ND_NFSV4) != 0 &&
+					    nvp->v_type == VDIR &&
+					    vp->v_mount != nvp->v_mount) {
+					    /*
+					     * For a ZFS snapshot, there is a
+					     * pseudo mount that does not set
+					     * v_mountedhere, so it needs to
+					     * be detected via a different
+					     * mount structure.
+					     */
+					    at_root = 1;
+					    if (new_mp == mp)
+						new_mp = nvp->v_mount;
+					}
 				    }
 				} else {
 				    nvp = NULL;
@@ -2215,9 +2266,11 @@
 	if (dirlen > cnt || nd->nd_repstat) {
 		if (!nd->nd_repstat && entrycnt == 0)
 			nd->nd_repstat = NFSERR_TOOSMALL;
-		if (nd->nd_repstat)
+		if (nd->nd_repstat) {
 			newnfs_trimtrailing(nd, mb0, bpos0);
-		else
+			if (nd->nd_flag & ND_NFSV3)
+				nfsrv_postopattr(nd, getret, &at);
+		} else
 			newnfs_trimtrailing(nd, mb1, bpos1);
 		eofflag = 0;
 	} else if (cpos < cend)
@@ -2587,14 +2640,24 @@
 	 *  Fsinfo RPC. If set for anything else, this code might need
 	 *  to change.)
 	 */
-	if (NFSVNO_EXPORTED(exp) &&
-	    ((!(nd->nd_flag & ND_GSS) && nd->nd_cred->cr_uid == 0) ||
-	     NFSVNO_EXPORTANON(exp) ||
-	     (nd->nd_flag & ND_AUTHNONE))) {
-		nd->nd_cred->cr_uid = credanon->cr_uid;
-		nd->nd_cred->cr_gid = credanon->cr_gid;
-		crsetgroups(nd->nd_cred, credanon->cr_ngroups,
-		    credanon->cr_groups);
+	if (NFSVNO_EXPORTED(exp)) {
+		if (((nd->nd_flag & ND_GSS) == 0 && nd->nd_cred->cr_uid == 0) ||
+		     NFSVNO_EXPORTANON(exp) ||
+		     (nd->nd_flag & ND_AUTHNONE) != 0) {
+			nd->nd_cred->cr_uid = credanon->cr_uid;
+			nd->nd_cred->cr_gid = credanon->cr_gid;
+			crsetgroups(nd->nd_cred, credanon->cr_ngroups,
+			    credanon->cr_groups);
+		} else if ((nd->nd_flag & ND_GSS) == 0) {
+			/*
+			 * If using AUTH_SYS, call nfsrv_getgrpscred() to see
+			 * if there is a replacement credential with a group
+			 * list set up by "nfsuserd -manage-gids".
+			 * If there is no replacement, nfsrv_getgrpscred()
+			 * simply returns its argument.
+			 */
+			nd->nd_cred = nfsrv_getgrpscred(nd->nd_cred);
+		}
 	}
 
 out:
@@ -2640,10 +2703,7 @@
 
 	*credp = NULL;
 	exp->nes_numsecflavor = 0;
-	if (VFS_NEEDSGIANT(mp))
-		error = ESTALE;
-	else
-		error = VFS_FHTOVP(mp, &fhp->fh_fid, LK_EXCLUSIVE, vpp);
+	error = VFS_FHTOVP(mp, &fhp->fh_fid, lktype, vpp);
 	if (error != 0)
 		/* Make sure the server replies ESTALE to the client. */
 		error = ESTALE;
@@ -2664,14 +2724,6 @@
 				exp->nes_secflavors[i] = secflavors[i];
 		}
 	}
-	if (error == 0 && lktype == LK_SHARED)
-		/*
-		 * It would be much better to pass lktype to VFS_FHTOVP(),
-		 * but this will have to do until VFS_FHTOVP() has a lock
-		 * type argument like VFS_VGET().
-		 */
-		NFSVOPLOCK(*vpp, LK_DOWNGRADE | LK_RETRY);
-
 	NFSEXITCODE(error);
 	return (error);
 }
@@ -2784,7 +2836,7 @@
 /*
  * glue for fp.
  */
-int
+static int
 fp_getfvp(struct thread *p, int fd, struct file **fpp, struct vnode **vpp)
 {
 	struct filedesc *fdp;
@@ -2792,8 +2844,8 @@
 	int error = 0;
 
 	fdp = p->td_proc->p_fd;
-	if (fd >= fdp->fd_nfiles ||
-	    (fp = fdp->fd_ofiles[fd]) == NULL) {
+	if (fd < 0 || fd >= fdp->fd_nfiles ||
+	    (fp = fdp->fd_ofiles[fd].fde_file) == NULL) {
 		error = EBADF;
 		goto out;
 	}
@@ -2828,7 +2880,7 @@
 		/*
 		 * If fspec != NULL, this is the v4root path.
 		 */
-		NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE, UIO_USERSPACE,
+		NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE,
 		    nfsexargp->fspec, p);
 		if ((error = namei(&nd)) != 0)
 			goto out;
@@ -2849,40 +2901,6 @@
 }
 
 /*
- * Get the tcp socket sequence numbers we need.
- * (Maybe this should be moved to the tcp sources?)
- */
-int
-nfsrv_getsocksndseq(struct socket *so, tcp_seq *maxp, tcp_seq *unap)
-{
-	struct inpcb *inp;
-	struct tcpcb *tp;
-	int error = 0;
-
-	inp = sotoinpcb(so);
-	KASSERT(inp != NULL, ("nfsrv_getsocksndseq: inp == NULL"));
-	INP_RLOCK(inp);
-	if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) {
-		INP_RUNLOCK(inp);
-		error = EPIPE;
-		goto out;
-	}
-	tp = intotcpcb(inp);
-	if (tp->t_state != TCPS_ESTABLISHED) {
-		INP_RUNLOCK(inp);
-		error = EPIPE;
-		goto out;
-	}
-	*maxp = tp->snd_max;
-	*unap = tp->snd_una;
-	INP_RUNLOCK(inp);
-
-out:
-	NFSEXITCODE(error);
-	return (error);
-}
-
-/*
  * This function needs to test to see if the system is near its limit
  * for memory allocation via malloc() or mget() and return True iff
  * either of these resources are near their limit.
@@ -2959,13 +2977,8 @@
 
 	if (nfsrv_dolocallocks == 0)
 		goto out;
+	ASSERT_VOP_UNLOCKED(vp, "nfsvno_advlock: vp locked");
 
-	/* Check for VI_DOOMED here, so that VOP_ADVLOCK() isn't performed. */
-	if ((vp->v_iflag & VI_DOOMED) != 0) {
-		error = EPERM;
-		goto out;
-	}
-
 	fl.l_whence = SEEK_SET;
 	fl.l_type = ftype;
 	fl.l_start = (off_t)first;
@@ -2988,7 +3001,6 @@
 	fl.l_pid = (pid_t)0;
 	fl.l_sysid = (int)nfsv4_sysid;
 
-	NFSVOPUNLOCK(vp, 0);
 	if (ftype == F_UNLCK)
 		error = VOP_ADVLOCK(vp, (caddr_t)td->td_proc, F_UNLCK, &fl,
 		    (F_POSIX | F_REMOTE));
@@ -2995,7 +3007,6 @@
 	else
 		error = VOP_ADVLOCK(vp, (caddr_t)td->td_proc, F_SETLK, &fl,
 		    (F_POSIX | F_REMOTE));
-	NFSVOPLOCK(vp, LK_EXCLUSIVE | LK_RETRY);
 
 out:
 	NFSEXITCODE(error);
@@ -3047,6 +3058,7 @@
 	struct file *fp;
 	struct nfsd_addsock_args sockarg;
 	struct nfsd_nfsd_args nfsdarg;
+	cap_rights_t rights;
 	int error;
 
 	if (uap->flag & NFSSVC_NFSDADDSOCK) {
@@ -3058,7 +3070,9 @@
 		 * pretend that we need them all. It is better to be too
 		 * careful than too reckless.
 		 */
-		if ((error = fget(td, sockarg.sock, CAP_SOCK_ALL, &fp)) != 0)
+		error = fget(td, sockarg.sock,
+		    cap_rights_init(&rights, CAP_SOCK_SERVER), &fp);
+		if (error != 0)
 			goto out;
 		if (fp->f_type != DTYPE_SOCKET) {
 			fdrop(fp, td);
@@ -3258,6 +3272,18 @@
 }
 
 /*
+ * Calculate a hash value for the sessionid.
+ */
+uint32_t
+nfsrv_hashsessionid(uint8_t *sessionid)
+{
+	uint32_t hashval;
+
+	hashval = hash32_buf(sessionid, NFSX_V4SESSIONID, 0);
+	return (hashval);
+}
+
+/*
  * Signal the userland master nfsd to backup the stable restart file.
  */
 void
@@ -3291,7 +3317,7 @@
 static int
 nfsd_modevent(module_t mod, int type, void *data)
 {
-	int error = 0;
+	int error = 0, i;
 	static int loaded = 0;
 
 	switch (type) {
@@ -3299,10 +3325,15 @@
 		if (loaded)
 			goto out;
 		newnfs_portinit();
-		mtx_init(&nfs_cache_mutex, "nfs_cache_mutex", NULL, MTX_DEF);
-		mtx_init(&nfs_v4root_mutex, "nfs_v4root_mutex", NULL, MTX_DEF);
-		mtx_init(&nfsv4root_mnt.mnt_mtx, "struct mount mtx", NULL,
-		    MTX_DEF);
+		for (i = 0; i < NFSRVCACHE_HASHSIZE; i++) {
+			mtx_init(&nfsrchash_table[i].mtx, "nfsrtc", NULL,
+			    MTX_DEF);
+			mtx_init(&nfsrcahash_table[i].mtx, "nfsrtca", NULL,
+			    MTX_DEF);
+		}
+		mtx_init(&nfsrc_udpmtx, "nfsuc", NULL, MTX_DEF);
+		mtx_init(&nfs_v4root_mutex, "nfs4rt", NULL, MTX_DEF);
+		mtx_init(&nfsv4root_mnt.mnt_mtx, "nfs4mnt", NULL, MTX_DEF);
 		lockinit(&nfsv4root_mnt.mnt_explock, PVFS, "explock", 0, 0);
 		nfsrvd_initcache();
 		nfsd_init();
@@ -3343,10 +3374,19 @@
 			svcpool_destroy(nfsrvd_pool);
 
 		/* and get rid of the locks */
-		mtx_destroy(&nfs_cache_mutex);
+		for (i = 0; i < NFSRVCACHE_HASHSIZE; i++) {
+			mtx_destroy(&nfsrchash_table[i].mtx);
+			mtx_destroy(&nfsrcahash_table[i].mtx);
+		}
+		mtx_destroy(&nfsrc_udpmtx);
 		mtx_destroy(&nfs_v4root_mutex);
 		mtx_destroy(&nfsv4root_mnt.mnt_mtx);
+		for (i = 0; i < nfsrv_sessionhashsize; i++)
+			mtx_destroy(&nfssessionhash[i].mtx);
 		lockdestroy(&nfsv4root_mnt.mnt_explock);
+		free(nfsclienthash, M_NFSDCLIENT);
+		free(nfslockhash, M_NFSDLOCKFILE);
+		free(nfssessionhash, M_NFSDSESSION);
 		loaded = 0;
 		break;
 	default:

Modified: trunk/sys/fs/nfsserver/nfs_nfsdserv.c
===================================================================
--- trunk/sys/fs/nfsserver/nfs_nfsdserv.c	2018-05-27 22:18:11 UTC (rev 10025)
+++ trunk/sys/fs/nfsserver/nfs_nfsdserv.c	2018-05-27 22:18:25 UTC (rev 10026)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * Copyright (c) 1989, 1993
  *	The Regents of the University of California.  All rights reserved.
@@ -32,7 +33,7 @@
  */
 
 #include <sys/cdefs.h>
-__MBSDID("$MidnightBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/fs/nfsserver/nfs_nfsdserv.c 325448 2017-11-05 20:28:28Z rmacklem $");
 
 /*
  * nfs version 2, 3 and 4 server calls to vnode ops
@@ -53,6 +54,7 @@
 extern struct timeval nfsboottime;
 extern int nfs_rootfhset;
 extern int nfsrv_enable_crossmntpt;
+extern int nfsrv_statehashsize;
 #endif	/* !APPLEKEXT */
 
 static int	nfs_async = 0;
@@ -666,10 +668,14 @@
 		stp->ls_stateid.seqid = fxdr_unsigned(u_int32_t, *tl++);
 		clientid.lval[0] = stp->ls_stateid.other[0] = *tl++;
 		clientid.lval[1] = stp->ls_stateid.other[1] = *tl++;
-		if (nd->nd_flag & ND_IMPLIEDCLID) {
-			if (nd->nd_clientid.qval != clientid.qval)
-				printf("EEK! multiple clids\n");
+		if ((nd->nd_flag & ND_IMPLIEDCLID) != 0) {
+			if ((nd->nd_flag & ND_NFSV41) != 0)
+				clientid.qval = nd->nd_clientid.qval;
+			else if (nd->nd_clientid.qval != clientid.qval)
+				printf("EEK1 multiple clids\n");
 		} else {
+			if ((nd->nd_flag & ND_NFSV41) != 0)
+				printf("EEK! no clientid from session\n");
 			nd->nd_flag |= ND_IMPLIEDCLID;
 			nd->nd_clientid.qval = clientid.qval;
 		}
@@ -818,10 +824,14 @@
 		stp->ls_stateid.seqid = fxdr_unsigned(u_int32_t, *tl++);
 		clientid.lval[0] = stp->ls_stateid.other[0] = *tl++;
 		clientid.lval[1] = stp->ls_stateid.other[1] = *tl++;
-		if (nd->nd_flag & ND_IMPLIEDCLID) {
-			if (nd->nd_clientid.qval != clientid.qval)
-				printf("EEK! multiple clids\n");
+		if ((nd->nd_flag & ND_IMPLIEDCLID) != 0) {
+			if ((nd->nd_flag & ND_NFSV41) != 0)
+				clientid.qval = nd->nd_clientid.qval;
+			else if (nd->nd_clientid.qval != clientid.qval)
+				printf("EEK2 multiple clids\n");
 		} else {
+			if ((nd->nd_flag & ND_NFSV41) != 0)
+				printf("EEK! no clientid from session\n");
 			nd->nd_flag |= ND_IMPLIEDCLID;
 			nd->nd_clientid.qval = clientid.qval;
 		}
@@ -862,7 +872,7 @@
 			i = mbuf_len(mp);
 	}
 
-	if (retlen > NFS_MAXDATA || retlen < 0)
+	if (retlen > NFS_SRVMAXIO || retlen < 0)
 		nd->nd_repstat = EIO;
 	if (vnode_vtype(vp) != VREG && !nd->nd_repstat) {
 		if (nd->nd_flag & ND_NFSV3)
@@ -901,7 +911,7 @@
 		    nd->nd_md, nd->nd_dpos, nd->nd_cred, p);
 		error = nfsm_advance(nd, NFSM_RNDUP(retlen), -1);
 		if (error)
-			panic("nfsrv_write mbuf");
+			goto nfsmout;
 	}
 	if (nd->nd_flag & ND_NFSV4)
 		aftat_ret = 0;
@@ -975,7 +985,7 @@
 		goto out;
 	}
 	NFSNAMEICNDSET(&named.ni_cnd, nd->nd_cred, CREATE,
-	    LOCKPARENT | LOCKLEAF | SAVESTART);
+	    LOCKPARENT | LOCKLEAF | SAVESTART | NOCACHE);
 	nfsvno_setpathbuf(&named, &bufp, &hashp);
 	error = nfsrv_parsename(nd, bufp, hashp, &named.ni_pathlen);
 	if (error)
@@ -1186,7 +1196,7 @@
 			goto out;
 		}
 	}
-	NFSNAMEICNDSET(&named.ni_cnd, nd->nd_cred, CREATE, cnflags);
+	NFSNAMEICNDSET(&named.ni_cnd, nd->nd_cred, CREATE, cnflags | NOCACHE);
 	nfsvno_setpathbuf(&named, &bufp, &hashp);
 	error = nfsrv_parsename(nd, bufp, hashp, &named.ni_pathlen);
 	if (error)
@@ -1620,13 +1630,6 @@
 			nd->nd_repstat = NFSERR_INVAL;
 		if (tovp)
 			vrele(tovp);
-	} else if (vnode_vtype(vp) == VLNK) {
-		if (nd->nd_flag & ND_NFSV2)
-			nd->nd_repstat = NFSERR_INVAL;
-		else
-			nd->nd_repstat = NFSERR_NOTSUPP;
-		if (tovp)
-			vrele(tovp);
 	}
 	if (!nd->nd_repstat) {
 		if (nd->nd_flag & ND_NFSV4) {
@@ -1646,7 +1649,7 @@
 		}
 	}
 	NFSNAMEICNDSET(&named.ni_cnd, nd->nd_cred, CREATE,
-	    LOCKPARENT | SAVENAME);
+	    LOCKPARENT | SAVENAME | NOCACHE);
 	if (!nd->nd_repstat) {
 		nfsvno_setpathbuf(&named, &bufp, &hashp);
 		error = nfsrv_parsename(nd, bufp, hashp, &named.ni_pathlen);
@@ -1723,7 +1726,7 @@
 		*vpp = NULL;
 	NFSVNO_ATTRINIT(&nva);
 	NFSNAMEICNDSET(&named.ni_cnd, nd->nd_cred, CREATE,
-	    LOCKPARENT | SAVESTART);
+	    LOCKPARENT | SAVESTART | NOCACHE);
 	nfsvno_setpathbuf(&named, &bufp, &hashp);
 	error = nfsrv_parsename(nd, bufp, hashp, &named.ni_pathlen);
 	if (!error && !nd->nd_repstat)
@@ -1841,7 +1844,7 @@
 		goto out;
 	}
 	NFSNAMEICNDSET(&named.ni_cnd, nd->nd_cred, CREATE,
-	    LOCKPARENT | SAVENAME);
+	    LOCKPARENT | SAVENAME | NOCACHE);
 	nfsvno_setpathbuf(&named, &bufp, &hashp);
 	error = nfsrv_parsename(nd, bufp, hashp, &named.ni_pathlen);
 	if (error)
@@ -2211,10 +2214,14 @@
 		stp->ls_opentolockseq = fxdr_unsigned(int, *tl++);
 		clientid.lval[0] = *tl++;
 		clientid.lval[1] = *tl++;
-		if (nd->nd_flag & ND_IMPLIEDCLID) {
-			if (nd->nd_clientid.qval != clientid.qval)
-				printf("EEK! multiple clids\n");
+		if ((nd->nd_flag & ND_IMPLIEDCLID) != 0) {
+			if ((nd->nd_flag & ND_NFSV41) != 0)
+				clientid.qval = nd->nd_clientid.qval;
+			else if (nd->nd_clientid.qval != clientid.qval)
+				printf("EEK3 multiple clids\n");
 		} else {
+			if ((nd->nd_flag & ND_NFSV41) != 0)
+				printf("EEK! no clientid from session\n");
 			nd->nd_flag |= ND_IMPLIEDCLID;
 			nd->nd_clientid.qval = clientid.qval;
 		}
@@ -2234,10 +2241,14 @@
 		stp->ls_seq = fxdr_unsigned(int, *tl);
 		clientid.lval[0] = stp->ls_stateid.other[0];
 		clientid.lval[1] = stp->ls_stateid.other[1];
-		if (nd->nd_flag & ND_IMPLIEDCLID) {
-			if (nd->nd_clientid.qval != clientid.qval)
-				printf("EEK! multiple clids\n");
+		if ((nd->nd_flag & ND_IMPLIEDCLID) != 0) {
+			if ((nd->nd_flag & ND_NFSV41) != 0)
+				clientid.qval = nd->nd_clientid.qval;
+			else if (nd->nd_clientid.qval != clientid.qval)
+				printf("EEK4 multiple clids\n");
 		} else {
+			if ((nd->nd_flag & ND_NFSV41) != 0)
+				printf("EEK! no clientid from session\n");
 			nd->nd_flag |= ND_IMPLIEDCLID;
 			nd->nd_clientid.qval = clientid.qval;
 		}
@@ -2383,10 +2394,14 @@
 	tl += 2;
 	clientid.lval[0] = *tl++;
 	clientid.lval[1] = *tl;
-	if (nd->nd_flag & ND_IMPLIEDCLID) {
-		if (nd->nd_clientid.qval != clientid.qval)
-			printf("EEK! multiple clids\n");
+	if ((nd->nd_flag & ND_IMPLIEDCLID) != 0) {
+		if ((nd->nd_flag & ND_NFSV41) != 0)
+			clientid.qval = nd->nd_clientid.qval;
+		else if (nd->nd_clientid.qval != clientid.qval)
+			printf("EEK5 multiple clids\n");
 	} else {
+		if ((nd->nd_flag & ND_NFSV41) != 0)
+			printf("EEK! no clientid from session\n");
 		nd->nd_flag |= ND_IMPLIEDCLID;
 		nd->nd_clientid.qval = clientid.qval;
 	}
@@ -2402,8 +2417,6 @@
 	if (!nd->nd_repstat)
 	  nd->nd_repstat = nfsrv_lockctrl(vp, &stp, &lop, &cf, clientid,
 	    &stateid, exp, nd, p);
-	if (stp)
-		FREE((caddr_t)stp, M_NFSDSTATE);
 	if (nd->nd_repstat) {
 	    if (nd->nd_repstat == NFSERR_DENIED) {
 		NFSM_BUILD(tl, u_int32_t *, 7 * NFSX_UNSIGNED);
@@ -2425,6 +2438,8 @@
 	    }
 	}
 	vput(vp);
+	if (stp)
+		FREE((caddr_t)stp, M_NFSDSTATE);
 	NFSEXITCODE2(0, nd);
 	return (0);
 nfsmout:
@@ -2494,10 +2509,14 @@
 	}
 	clientid.lval[0] = stp->ls_stateid.other[0];
 	clientid.lval[1] = stp->ls_stateid.other[1];
-	if (nd->nd_flag & ND_IMPLIEDCLID) {
-		if (nd->nd_clientid.qval != clientid.qval)
-			printf("EEK! multiple clids\n");
+	if ((nd->nd_flag & ND_IMPLIEDCLID) != 0) {
+		if ((nd->nd_flag & ND_NFSV41) != 0)
+			clientid.qval = nd->nd_clientid.qval;
+		else if (nd->nd_clientid.qval != clientid.qval)
+			printf("EEK6 multiple clids\n");
 	} else {
+		if ((nd->nd_flag & ND_NFSV41) != 0)
+			printf("EEK! no clientid from session\n");
 		nd->nd_flag |= ND_IMPLIEDCLID;
 		nd->nd_clientid.qval = clientid.qval;
 	}
@@ -2538,7 +2557,7 @@
     struct nfsexstuff *exp)
 {
 	u_int32_t *tl;
-	int i;
+	int i, retext;
 	struct nfsstate *stp = NULL;
 	int error = 0, create, claim, exclusive_flag = 0;
 	u_int32_t rflags = NFSV4OPEN_LOCKTYPEPOSIX, acemask;
@@ -2575,6 +2594,39 @@
 	stp->ls_uid = nd->nd_cred->cr_uid;
 	stp->ls_seq = fxdr_unsigned(u_int32_t, *tl++);
 	i = fxdr_unsigned(int, *tl++);
+	retext = 0;
+	if ((i & (NFSV4OPEN_WANTDELEGMASK | NFSV4OPEN_WANTSIGNALDELEG |
+	    NFSV4OPEN_WANTPUSHDELEG)) != 0 && (nd->nd_flag & ND_NFSV41) != 0) {
+		retext = 1;
+		/* For now, ignore these. */
+		i &= ~(NFSV4OPEN_WANTPUSHDELEG | NFSV4OPEN_WANTSIGNALDELEG);
+		switch (i & NFSV4OPEN_WANTDELEGMASK) {
+		case NFSV4OPEN_WANTANYDELEG:
+			stp->ls_flags |= (NFSLCK_WANTRDELEG |
+			    NFSLCK_WANTWDELEG);
+			i &= ~NFSV4OPEN_WANTDELEGMASK;
+			break;
+		case NFSV4OPEN_WANTREADDELEG:
+			stp->ls_flags |= NFSLCK_WANTRDELEG;
+			i &= ~NFSV4OPEN_WANTDELEGMASK;
+			break;
+		case NFSV4OPEN_WANTWRITEDELEG:
+			stp->ls_flags |= NFSLCK_WANTWDELEG;
+			i &= ~NFSV4OPEN_WANTDELEGMASK;
+			break;
+		case NFSV4OPEN_WANTNODELEG:
+			stp->ls_flags |= NFSLCK_WANTNODELEG;
+			i &= ~NFSV4OPEN_WANTDELEGMASK;
+			break;
+		case NFSV4OPEN_WANTCANCEL:
+			printf("NFSv4: ignore Open WantCancel\n");
+			i &= ~NFSV4OPEN_WANTDELEGMASK;
+			break;
+		default:
+			/* nd_repstat will be set to NFSERR_INVAL below. */
+			break;
+		};
+	}
 	switch (i) {
 	case NFSV4OPEN_ACCESSREAD:
 		stp->ls_flags |= NFSLCK_READACCESS;
@@ -2606,10 +2658,14 @@
 	};
 	clientid.lval[0] = *tl++;
 	clientid.lval[1] = *tl;
-	if (nd->nd_flag & ND_IMPLIEDCLID) {
-		if (nd->nd_clientid.qval != clientid.qval)
-			printf("EEK! multiple clids\n");
+	if ((nd->nd_flag & ND_IMPLIEDCLID) != 0) {
+		if ((nd->nd_flag & ND_NFSV41) != 0)
+			clientid.qval = nd->nd_clientid.qval;
+		else if (nd->nd_clientid.qval != clientid.qval)
+			printf("EEK7 multiple clids\n");
 	} else {
+		if ((nd->nd_flag & ND_NFSV41) != 0)
+			printf("EEK! no clientid from session\n");
 		nd->nd_flag |= ND_IMPLIEDCLID;
 		nd->nd_clientid.qval = clientid.qval;
 	}
@@ -2649,6 +2705,28 @@
 			cverf[0] = *tl++;
 			cverf[1] = *tl;
 			break;
+		case NFSCREATE_EXCLUSIVE41:
+			NFSM_DISSECT(tl, u_int32_t *, NFSX_VERF);
+			cverf[0] = *tl++;
+			cverf[1] = *tl;
+			error = nfsv4_sattr(nd, &nva, &attrbits, aclp, p);
+			if (error != 0)
+				goto nfsmout;
+			if (NFSISSET_ATTRBIT(&attrbits,
+			    NFSATTRBIT_TIMEACCESSSET))
+				nd->nd_repstat = NFSERR_INVAL;
+			/*
+			 * If the na_gid being set is the same as that of
+			 * the directory it is going in, clear it, since
+			 * that is what will be set by default. This allows
+			 * a user that isn't in that group to do the create.
+			 */
+			if (nd->nd_repstat == 0 && NFSVNO_ISSETGID(&nva) &&
+			    nva.na_gid == dirfor.na_gid)
+				NFSVNO_UNSET(&nva, gid);
+			if (nd->nd_repstat == 0)
+				nd->nd_repstat = nfsrv_checkuidgid(nd, &nva);
+			break;
 		default:
 			nd->nd_repstat = NFSERR_BADXDR;
 			goto nfsmout;
@@ -2685,7 +2763,7 @@
 		}
 		if (create == NFSV4OPEN_CREATE)
 		    NFSNAMEICNDSET(&named.ni_cnd, nd->nd_cred, CREATE,
-			LOCKPARENT | LOCKLEAF | SAVESTART);
+			LOCKPARENT | LOCKLEAF | SAVESTART | NOCACHE);
 		else
 		    NFSNAMEICNDSET(&named.ni_cnd, nd->nd_cred, LOOKUP,
 			LOCKLEAF | SAVESTART);
@@ -2730,27 +2808,38 @@
 			exclusive_flag = 1;
 			if (!named.ni_vp)
 				nva.na_mode = 0;
+			break;
+		    case NFSCREATE_EXCLUSIVE41:
+			exclusive_flag = 1;
+			break;
 		    };
 		}
 		nfsvno_open(nd, &named, clientid, &stateid, stp,
 		    &exclusive_flag, &nva, cverf, create, aclp, &attrbits,
 		    nd->nd_cred, p, exp, &vp);
-	} else if (claim == NFSV4OPEN_CLAIMPREVIOUS) {
-		NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
-		i = fxdr_unsigned(int, *tl);
-		switch (i) {
-		case NFSV4OPEN_DELEGATEREAD:
-			stp->ls_flags |= NFSLCK_DELEGREAD;
-			break;
-		case NFSV4OPEN_DELEGATEWRITE:
-			stp->ls_flags |= NFSLCK_DELEGWRITE;
-		case NFSV4OPEN_DELEGATENONE:
-			break;
-		default:
-			nd->nd_repstat = NFSERR_BADXDR;
-			goto nfsmout;
-		};
-		stp->ls_flags |= NFSLCK_RECLAIM;
+	} else if (claim == NFSV4OPEN_CLAIMPREVIOUS || claim ==
+	    NFSV4OPEN_CLAIMFH) {
+		if (claim == NFSV4OPEN_CLAIMPREVIOUS) {
+			NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
+			i = fxdr_unsigned(int, *tl);
+			switch (i) {
+			case NFSV4OPEN_DELEGATEREAD:
+				stp->ls_flags |= NFSLCK_DELEGREAD;
+				break;
+			case NFSV4OPEN_DELEGATEWRITE:
+				stp->ls_flags |= NFSLCK_DELEGWRITE;
+			case NFSV4OPEN_DELEGATENONE:
+				break;
+			default:
+				nd->nd_repstat = NFSERR_BADXDR;
+				goto nfsmout;
+			};
+			stp->ls_flags |= NFSLCK_RECLAIM;
+		} else {
+			/* CLAIM_NULL_FH */
+			if (nd->nd_repstat == 0 && create == NFSV4OPEN_CREATE)
+				nd->nd_repstat = NFSERR_INVAL;
+		}
 		vp = dp;
 		NFSVOPLOCK(vp, LK_EXCLUSIVE | LK_RETRY);
 		if ((vp->v_iflag & VI_DOOMED) == 0)
@@ -2839,7 +2928,21 @@
 			*tl = txdr_unsigned(NFSV4OPEN_DELEGATEREAD);
 		else if (rflags & NFSV4OPEN_WRITEDELEGATE)
 			*tl = txdr_unsigned(NFSV4OPEN_DELEGATEWRITE);
-		else
+		else if (retext != 0) {
+			*tl = txdr_unsigned(NFSV4OPEN_DELEGATENONEEXT);
+			if ((rflags & NFSV4OPEN_WDCONTENTION) != 0) {
+				NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
+				*tl++ = txdr_unsigned(NFSV4OPEN_CONTENTION);
+				*tl = newnfs_false;
+			} else if ((rflags & NFSV4OPEN_WDRESOURCE) != 0) {
+				NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
+				*tl++ = txdr_unsigned(NFSV4OPEN_RESOURCE);
+				*tl = newnfs_false;
+			} else {
+				NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
+				*tl = txdr_unsigned(NFSV4OPEN_NOTWANTED);
+			}
+		} else
 			*tl = txdr_unsigned(NFSV4OPEN_DELEGATENONE);
 		if (rflags & (NFSV4OPEN_READDELEGATE|NFSV4OPEN_WRITEDELEGATE)) {
 			NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID+NFSX_UNSIGNED);
@@ -2915,10 +3018,14 @@
 	stp->ls_flags = NFSLCK_CLOSE;
 	clientid.lval[0] = stp->ls_stateid.other[0];
 	clientid.lval[1] = stp->ls_stateid.other[1];
-	if (nd->nd_flag & ND_IMPLIEDCLID) {
-		if (nd->nd_clientid.qval != clientid.qval)
-			printf("EEK! multiple clids\n");
+	if ((nd->nd_flag & ND_IMPLIEDCLID) != 0) {
+		if ((nd->nd_flag & ND_NFSV41) != 0)
+			clientid.qval = nd->nd_clientid.qval;
+		else if (nd->nd_clientid.qval != clientid.qval)
+			printf("EEK8 multiple clids\n");
 	} else {
+		if ((nd->nd_flag & ND_NFSV41) != 0)
+			printf("EEK! no clientid from session\n");
 		nd->nd_flag |= ND_IMPLIEDCLID;
 		nd->nd_clientid.qval = clientid.qval;
 	}
@@ -2955,14 +3062,18 @@
 	NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
 	clientid.lval[0] = *tl++;
 	clientid.lval[1] = *tl;
-	if (nd->nd_flag & ND_IMPLIEDCLID) {
-		if (nd->nd_clientid.qval != clientid.qval)
-			printf("EEK! multiple clids\n");
+	if ((nd->nd_flag & ND_IMPLIEDCLID) != 0) {
+		if ((nd->nd_flag & ND_NFSV41) != 0)
+			clientid.qval = nd->nd_clientid.qval;
+		else if (nd->nd_clientid.qval != clientid.qval)
+			printf("EEK9 multiple clids\n");
 	} else {
+		if ((nd->nd_flag & ND_NFSV41) != 0)
+			printf("EEK! no clientid from session\n");
 		nd->nd_flag |= ND_IMPLIEDCLID;
 		nd->nd_clientid.qval = clientid.qval;
 	}
-	nd->nd_repstat = nfsrv_delegupdate(clientid, NULL, NULL,
+	nd->nd_repstat = nfsrv_delegupdate(nd, clientid, NULL, NULL,
 	    NFSV4OP_DELEGPURGE, nd->nd_cred, p);
 nfsmout:
 	NFSEXITCODE2(error, nd);
@@ -2986,14 +3097,18 @@
 	NFSBCOPY((caddr_t)tl, (caddr_t)stateid.other, NFSX_STATEIDOTHER);
 	clientid.lval[0] = stateid.other[0];
 	clientid.lval[1] = stateid.other[1];
-	if (nd->nd_flag & ND_IMPLIEDCLID) {
-		if (nd->nd_clientid.qval != clientid.qval)
-			printf("EEK! multiple clids\n");
+	if ((nd->nd_flag & ND_IMPLIEDCLID) != 0) {
+		if ((nd->nd_flag & ND_NFSV41) != 0)
+			clientid.qval = nd->nd_clientid.qval;
+		else if (nd->nd_clientid.qval != clientid.qval)
+			printf("EEK10 multiple clids\n");
 	} else {
+		if ((nd->nd_flag & ND_NFSV41) != 0)
+			printf("EEK! no clientid from session\n");
 		nd->nd_flag |= ND_IMPLIEDCLID;
 		nd->nd_clientid.qval = clientid.qval;
 	}
-	nd->nd_repstat = nfsrv_delegupdate(clientid, &stateid, vp,
+	nd->nd_repstat = nfsrv_delegupdate(nd, clientid, &stateid, vp,
 	    NFSV4OP_DELEGRETURN, nd->nd_cred, p);
 nfsmout:
 	vput(vp);
@@ -3031,6 +3146,10 @@
 	nfsv4stateid_t stateid;
 	nfsquad_t clientid;
 
+	if ((nd->nd_flag & ND_NFSV41) != 0) {
+		nd->nd_repstat = NFSERR_NOTSUPP;
+		goto nfsmout;
+	}
 	NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID + NFSX_UNSIGNED);
 	stp->ls_ownerlen = 0;
 	stp->ls_op = nd->nd_rp;
@@ -3043,10 +3162,14 @@
 	stp->ls_flags = NFSLCK_CONFIRM;
 	clientid.lval[0] = stp->ls_stateid.other[0];
 	clientid.lval[1] = stp->ls_stateid.other[1];
-	if (nd->nd_flag & ND_IMPLIEDCLID) {
-		if (nd->nd_clientid.qval != clientid.qval)
-			printf("EEK! multiple clids\n");
+	if ((nd->nd_flag & ND_IMPLIEDCLID) != 0) {
+		if ((nd->nd_flag & ND_NFSV41) != 0)
+			clientid.qval = nd->nd_clientid.qval;
+		else if (nd->nd_clientid.qval != clientid.qval)
+			printf("EEK11 multiple clids\n");
 	} else {
+		if ((nd->nd_flag & ND_NFSV41) != 0)
+			printf("EEK! no clientid from session\n");
 		nd->nd_flag |= ND_IMPLIEDCLID;
 		nd->nd_clientid.qval = clientid.qval;
 	}
@@ -3119,10 +3242,14 @@
 
 	clientid.lval[0] = stp->ls_stateid.other[0];
 	clientid.lval[1] = stp->ls_stateid.other[1];
-	if (nd->nd_flag & ND_IMPLIEDCLID) {
-		if (nd->nd_clientid.qval != clientid.qval)
-			printf("EEK! multiple clids\n");
+	if ((nd->nd_flag & ND_IMPLIEDCLID) != 0) {
+		if ((nd->nd_flag & ND_NFSV41) != 0)
+			clientid.qval = nd->nd_clientid.qval;
+		else if (nd->nd_clientid.qval != clientid.qval)
+			printf("EEK12 multiple clids\n");
 	} else {
+		if ((nd->nd_flag & ND_NFSV41) != 0)
+			printf("EEK! no clientid from session\n");
 		nd->nd_flag |= ND_IMPLIEDCLID;
 		nd->nd_clientid.qval = clientid.qval;
 	}
@@ -3151,6 +3278,10 @@
 	int error = 0;
 	nfsquad_t clientid;
 
+	if ((nd->nd_flag & ND_NFSV41) != 0) {
+		nd->nd_repstat = NFSERR_NOTSUPP;
+		goto nfsmout;
+	}
 	if (nfs_rootfhset == 0 || nfsd_checkrootexp(nd) != 0) {
 		nd->nd_repstat = NFSERR_WRONGSEC;
 		goto nfsmout;
@@ -3158,15 +3289,19 @@
 	NFSM_DISSECT(tl, u_int32_t *, NFSX_HYPER);
 	clientid.lval[0] = *tl++;
 	clientid.lval[1] = *tl;
-	if (nd->nd_flag & ND_IMPLIEDCLID) {
-		if (nd->nd_clientid.qval != clientid.qval)
-			printf("EEK! multiple clids\n");
+	if ((nd->nd_flag & ND_IMPLIEDCLID) != 0) {
+		if ((nd->nd_flag & ND_NFSV41) != 0)
+			clientid.qval = nd->nd_clientid.qval;
+		else if (nd->nd_clientid.qval != clientid.qval)
+			printf("EEK13 multiple clids\n");
 	} else {
+		if ((nd->nd_flag & ND_NFSV41) != 0)
+			printf("EEK! no clientid from session\n");
 		nd->nd_flag |= ND_IMPLIEDCLID;
 		nd->nd_clientid.qval = clientid.qval;
 	}
 	nd->nd_repstat = nfsrv_getclient(clientid, (CLOPS_RENEWOP|CLOPS_RENEW),
-	    NULL, (nfsquad_t)((u_quad_t)0), nd, p);
+	    NULL, NULL, (nfsquad_t)((u_quad_t)0), 0, nd, p);
 nfsmout:
 	NFSEXITCODE2(error, nd);
 	return (error);
@@ -3290,6 +3425,10 @@
 	u_char *verf, *ucp, *ucp2, addrbuf[24];
 	nfsquad_t clientid, confirm;
 
+	if ((nd->nd_flag & ND_NFSV41) != 0) {
+		nd->nd_repstat = NFSERR_NOTSUPP;
+		goto nfsmout;
+	}
 	if (nfs_rootfhset == 0 || nfsd_checkrootexp(nd) != 0) {
 		nd->nd_repstat = NFSERR_WRONGSEC;
 		goto out;
@@ -3305,9 +3444,10 @@
 	idlen = i;
 	if (nd->nd_flag & ND_GSS)
 		i += nd->nd_princlen;
-	MALLOC(clp, struct nfsclient *, sizeof (struct nfsclient) + i,
-	    M_NFSDCLIENT, M_WAITOK);
-	NFSBZERO((caddr_t)clp, sizeof (struct nfsclient) + i);
+	clp = malloc(sizeof(struct nfsclient) + i, M_NFSDCLIENT, M_WAITOK |
+	    M_ZERO);
+	clp->lc_stateid = malloc(sizeof(struct nfsstatehead) *
+	    nfsrv_statehashsize, M_NFSDCLIENT, M_WAITOK);
 	NFSINITSOCKMUTEX(&clp->lc_req.nr_mtx);
 	NFSSOCKADDRALLOC(clp->lc_req.nr_nam);
 	NFSSOCKADDRSIZE(clp->lc_req.nr_nam, sizeof (struct sockaddr_in));
@@ -3367,7 +3507,8 @@
 	if (clp) {
 		NFSSOCKADDRFREE(clp->lc_req.nr_nam);
 		NFSFREEMUTEX(&clp->lc_req.nr_mtx);
-		free((caddr_t)clp, M_NFSDCLIENT);
+		free(clp->lc_stateid, M_NFSDCLIENT);
+		free(clp, M_NFSDCLIENT);
 	}
 	if (!nd->nd_repstat) {
 		NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_HYPER);
@@ -3384,7 +3525,8 @@
 	if (clp) {
 		NFSSOCKADDRFREE(clp->lc_req.nr_nam);
 		NFSFREEMUTEX(&clp->lc_req.nr_mtx);
-		free((caddr_t)clp, M_NFSDCLIENT);
+		free(clp->lc_stateid, M_NFSDCLIENT);
+		free(clp, M_NFSDCLIENT);
 	}
 	NFSEXITCODE2(error, nd);
 	return (error);
@@ -3402,6 +3544,10 @@
 	int error = 0;
 	nfsquad_t clientid, confirm;
 
+	if ((nd->nd_flag & ND_NFSV41) != 0) {
+		nd->nd_repstat = NFSERR_NOTSUPP;
+		goto nfsmout;
+	}
 	if (nfs_rootfhset == 0 || nfsd_checkrootexp(nd) != 0) {
 		nd->nd_repstat = NFSERR_WRONGSEC;
 		goto nfsmout;
@@ -3417,7 +3563,7 @@
 	 * returns the appropriate NFSERR status.
 	 */
 	nd->nd_repstat = nfsrv_getclient(clientid, (CLOPS_CONFIRM|CLOPS_RENEW),
-	    NULL, confirm, nd, p);
+	    NULL, NULL, confirm, 0, nd, p);
 nfsmout:
 	NFSEXITCODE2(error, nd);
 	return (error);
@@ -3492,6 +3638,10 @@
 	int error = 0, len;
 	nfsquad_t clientid;
 
+	if ((nd->nd_flag & ND_NFSV41) != 0) {
+		nd->nd_repstat = NFSERR_NOTSUPP;
+		goto nfsmout;
+	}
 	if (nfs_rootfhset == 0 || nfsd_checkrootexp(nd) != 0) {
 		nd->nd_repstat = NFSERR_WRONGSEC;
 		goto nfsmout;
@@ -3510,10 +3660,14 @@
 	stp->ls_uid = nd->nd_cred->cr_uid;
 	clientid.lval[0] = *tl++;
 	clientid.lval[1] = *tl;
-	if (nd->nd_flag & ND_IMPLIEDCLID) {
-		if (nd->nd_clientid.qval != clientid.qval)
-			printf("EEK! multiple clids\n");
+	if ((nd->nd_flag & ND_IMPLIEDCLID) != 0) {
+		if ((nd->nd_flag & ND_NFSV41) != 0)
+			clientid.qval = nd->nd_clientid.qval;
+		else if (nd->nd_clientid.qval != clientid.qval)
+			printf("EEK14 multiple clids\n");
 	} else {
+		if ((nd->nd_flag & ND_NFSV41) != 0)
+			printf("EEK! no clientid from session\n");
 		nd->nd_flag |= ND_IMPLIEDCLID;
 		nd->nd_clientid.qval = clientid.qval;
 	}
@@ -3531,3 +3685,397 @@
 	NFSEXITCODE2(error, nd);
 	return (error);
 }
+
+/*
+ * nfsv4 exchange_id service
+ */
+APPLESTATIC int
+nfsrvd_exchangeid(struct nfsrv_descript *nd, __unused int isdgram,
+    __unused vnode_t vp, NFSPROC_T *p, __unused struct nfsexstuff *exp)
+{
+	uint32_t *tl;
+	int error = 0, i, idlen;
+	struct nfsclient *clp = NULL;
+	nfsquad_t clientid, confirm;
+	uint8_t *verf;
+	uint32_t sp4type, v41flags;
+	uint64_t owner_minor;
+	struct timespec verstime;
+	struct sockaddr_in *sad, *rad;
+
+	if (nfs_rootfhset == 0 || nfsd_checkrootexp(nd) != 0) {
+		nd->nd_repstat = NFSERR_WRONGSEC;
+		goto nfsmout;
+	}
+	NFSM_DISSECT(tl, u_int32_t *, NFSX_VERF + NFSX_UNSIGNED);
+	verf = (uint8_t *)tl;
+	tl += (NFSX_VERF / NFSX_UNSIGNED);
+	i = fxdr_unsigned(int, *tl);
+	if (i > NFSV4_OPAQUELIMIT || i <= 0) {
+		nd->nd_repstat = NFSERR_BADXDR;
+		goto nfsmout;
+	}
+	idlen = i;
+	if (nd->nd_flag & ND_GSS)
+		i += nd->nd_princlen;
+	clp = malloc(sizeof(struct nfsclient) + i, M_NFSDCLIENT, M_WAITOK |
+	    M_ZERO);
+	clp->lc_stateid = malloc(sizeof(struct nfsstatehead) *
+	    nfsrv_statehashsize, M_NFSDCLIENT, M_WAITOK);
+	NFSINITSOCKMUTEX(&clp->lc_req.nr_mtx);
+	NFSSOCKADDRALLOC(clp->lc_req.nr_nam);
+	NFSSOCKADDRSIZE(clp->lc_req.nr_nam, sizeof (struct sockaddr_in));
+	sad = NFSSOCKADDR(nd->nd_nam, struct sockaddr_in *);
+	rad = NFSSOCKADDR(clp->lc_req.nr_nam, struct sockaddr_in *);
+	rad->sin_family = AF_INET;
+	rad->sin_addr.s_addr = 0;
+	rad->sin_port = 0;
+	if (sad->sin_family == AF_INET)
+		rad->sin_addr.s_addr = sad->sin_addr.s_addr;
+	clp->lc_req.nr_cred = NULL;
+	NFSBCOPY(verf, clp->lc_verf, NFSX_VERF);
+	clp->lc_idlen = idlen;
+	error = nfsrv_mtostr(nd, clp->lc_id, idlen);
+	if (error != 0)
+		goto nfsmout;
+	if ((nd->nd_flag & ND_GSS) != 0) {
+		clp->lc_flags = LCL_GSS | LCL_NFSV41;
+		if ((nd->nd_flag & ND_GSSINTEGRITY) != 0)
+			clp->lc_flags |= LCL_GSSINTEGRITY;
+		else if ((nd->nd_flag & ND_GSSPRIVACY) != 0)
+			clp->lc_flags |= LCL_GSSPRIVACY;
+	} else
+		clp->lc_flags = LCL_NFSV41;
+	if ((nd->nd_flag & ND_GSS) != 0 && nd->nd_princlen > 0) {
+		clp->lc_flags |= LCL_NAME;
+		clp->lc_namelen = nd->nd_princlen;
+		clp->lc_name = &clp->lc_id[idlen];
+		NFSBCOPY(nd->nd_principal, clp->lc_name, clp->lc_namelen);
+	} else {
+		clp->lc_uid = nd->nd_cred->cr_uid;
+		clp->lc_gid = nd->nd_cred->cr_gid;
+	}
+	NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
+	v41flags = fxdr_unsigned(uint32_t, *tl++);
+	if ((v41flags & ~(NFSV4EXCH_SUPPMOVEDREFER | NFSV4EXCH_SUPPMOVEDMIGR |
+	    NFSV4EXCH_BINDPRINCSTATEID | NFSV4EXCH_MASKPNFS |
+	    NFSV4EXCH_UPDCONFIRMEDRECA)) != 0) {
+		nd->nd_repstat = NFSERR_INVAL;
+		goto nfsmout;
+	}
+	if ((v41flags & NFSV4EXCH_UPDCONFIRMEDRECA) != 0)
+		confirm.lval[1] = 1;
+	else
+		confirm.lval[1] = 0;
+	v41flags = NFSV4EXCH_USENONPNFS;
+	sp4type = fxdr_unsigned(uint32_t, *tl);
+	if (sp4type != NFSV4EXCH_SP4NONE) {
+		nd->nd_repstat = NFSERR_NOTSUPP;
+		goto nfsmout;
+	}
+
+	/*
+	 * nfsrv_setclient() does the actual work of adding it to the
+	 * client list. If there is no error, the structure has been
+	 * linked into the client list and clp should no longer be used
+	 * here. When an error is returned, it has not been linked in,
+	 * so it should be free'd.
+	 */
+	nd->nd_repstat = nfsrv_setclient(nd, &clp, &clientid, &confirm, p);
+	if (clp != NULL) {
+		NFSSOCKADDRFREE(clp->lc_req.nr_nam);
+		NFSFREEMUTEX(&clp->lc_req.nr_mtx);
+		free(clp->lc_stateid, M_NFSDCLIENT);
+		free(clp, M_NFSDCLIENT);
+	}
+	if (nd->nd_repstat == 0) {
+		if (confirm.lval[1] != 0)
+			v41flags |= NFSV4EXCH_CONFIRMEDR;
+		NFSM_BUILD(tl, uint32_t *, 2 * NFSX_HYPER + 3 * NFSX_UNSIGNED);
+		*tl++ = clientid.lval[0];			/* ClientID */
+		*tl++ = clientid.lval[1];
+		*tl++ = txdr_unsigned(confirm.lval[0]);		/* SequenceID */
+		*tl++ = txdr_unsigned(v41flags);		/* Exch flags */
+		*tl++ = txdr_unsigned(NFSV4EXCH_SP4NONE);	/* No SSV */
+		owner_minor = 0;				/* Owner */
+		txdr_hyper(owner_minor, tl);			/* Minor */
+		(void)nfsm_strtom(nd, nd->nd_cred->cr_prison->pr_hostuuid,
+		    strlen(nd->nd_cred->cr_prison->pr_hostuuid)); /* Major */
+		NFSM_BUILD(tl, uint32_t *, 3 * NFSX_UNSIGNED);
+		*tl++ = txdr_unsigned(NFSX_UNSIGNED);
+		*tl++ = time_uptime;		/* Make scope a unique value. */
+		*tl = txdr_unsigned(1);
+		(void)nfsm_strtom(nd, "freebsd.org", strlen("freebsd.org"));
+		(void)nfsm_strtom(nd, version, strlen(version));
+		NFSM_BUILD(tl, uint32_t *, NFSX_V4TIME);
+		verstime.tv_sec = 1293840000;		/* Jan 1, 2011 */
+		verstime.tv_nsec = 0;
+		txdr_nfsv4time(&verstime, tl);
+	}
+	NFSEXITCODE2(0, nd);
+	return (0);
+nfsmout:
+	if (clp != NULL) {
+		NFSSOCKADDRFREE(clp->lc_req.nr_nam);
+		NFSFREEMUTEX(&clp->lc_req.nr_mtx);
+		free(clp->lc_stateid, M_NFSDCLIENT);
+		free(clp, M_NFSDCLIENT);
+	}
+	NFSEXITCODE2(error, nd);
+	return (error);
+}
+
+/*
+ * nfsv4 create session service
+ */
+APPLESTATIC int
+nfsrvd_createsession(struct nfsrv_descript *nd, __unused int isdgram,
+    __unused vnode_t vp, NFSPROC_T *p, __unused struct nfsexstuff *exp)
+{
+	uint32_t *tl;
+	int error = 0;
+	nfsquad_t clientid, confirm;
+	struct nfsdsession *sep = NULL;
+	uint32_t rdmacnt;
+
+	if (nfs_rootfhset == 0 || nfsd_checkrootexp(nd) != 0) {
+		nd->nd_repstat = NFSERR_WRONGSEC;
+		goto nfsmout;
+	}
+	sep = (struct nfsdsession *)malloc(sizeof(struct nfsdsession),
+	    M_NFSDSESSION, M_WAITOK | M_ZERO);
+	sep->sess_refcnt = 1;
+	mtx_init(&sep->sess_cbsess.nfsess_mtx, "nfscbsession", NULL, MTX_DEF);
+	NFSM_DISSECT(tl, uint32_t *, NFSX_HYPER + 2 * NFSX_UNSIGNED);
+	clientid.lval[0] = *tl++;
+	clientid.lval[1] = *tl++;
+	confirm.lval[0] = fxdr_unsigned(uint32_t, *tl++);
+	sep->sess_crflags = fxdr_unsigned(uint32_t, *tl);
+	/* Persistent sessions and RDMA are not supported. */
+	sep->sess_crflags &= NFSV4CRSESS_CONNBACKCHAN;
+
+	/* Fore channel attributes. */
+	NFSM_DISSECT(tl, uint32_t *, 7 * NFSX_UNSIGNED);
+	tl++;					/* Header pad always 0. */
+	sep->sess_maxreq = fxdr_unsigned(uint32_t, *tl++);
+	sep->sess_maxresp = fxdr_unsigned(uint32_t, *tl++);
+	sep->sess_maxrespcached = fxdr_unsigned(uint32_t, *tl++);
+	sep->sess_maxops = fxdr_unsigned(uint32_t, *tl++);
+	sep->sess_maxslots = fxdr_unsigned(uint32_t, *tl++);
+	if (sep->sess_maxslots > NFSV4_SLOTS)
+		sep->sess_maxslots = NFSV4_SLOTS;
+	rdmacnt = fxdr_unsigned(uint32_t, *tl);
+	if (rdmacnt > 1) {
+		nd->nd_repstat = NFSERR_BADXDR;
+		goto nfsmout;
+	} else if (rdmacnt == 1)
+		NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
+
+	/* Back channel attributes. */
+	NFSM_DISSECT(tl, uint32_t *, 7 * NFSX_UNSIGNED);
+	tl++;					/* Header pad always 0. */
+	sep->sess_cbmaxreq = fxdr_unsigned(uint32_t, *tl++);
+	sep->sess_cbmaxresp = fxdr_unsigned(uint32_t, *tl++);
+	sep->sess_cbmaxrespcached = fxdr_unsigned(uint32_t, *tl++);
+	sep->sess_cbmaxops = fxdr_unsigned(uint32_t, *tl++);
+	sep->sess_cbsess.nfsess_foreslots = fxdr_unsigned(uint32_t, *tl++);
+	rdmacnt = fxdr_unsigned(uint32_t, *tl);
+	if (rdmacnt > 1) {
+		nd->nd_repstat = NFSERR_BADXDR;
+		goto nfsmout;
+	} else if (rdmacnt == 1)
+		NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
+
+	NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
+	sep->sess_cbprogram = fxdr_unsigned(uint32_t, *tl);
+
+	/*
+	 * nfsrv_getclient() searches the client list for a match and
+	 * returns the appropriate NFSERR status.
+	 */
+	nd->nd_repstat = nfsrv_getclient(clientid, CLOPS_CONFIRM | CLOPS_RENEW,
+	    NULL, sep, confirm, sep->sess_cbprogram, nd, p);
+	if (nd->nd_repstat == 0) {
+		NFSM_BUILD(tl, uint32_t *, NFSX_V4SESSIONID);
+		NFSBCOPY(sep->sess_sessionid, tl, NFSX_V4SESSIONID);
+		NFSM_BUILD(tl, uint32_t *, 18 * NFSX_UNSIGNED);
+		*tl++ = txdr_unsigned(confirm.lval[0]);	/* sequenceid */
+		*tl++ = txdr_unsigned(sep->sess_crflags);
+
+		/* Fore channel attributes. */
+		*tl++ = 0;
+		*tl++ = txdr_unsigned(sep->sess_maxreq);
+		*tl++ = txdr_unsigned(sep->sess_maxresp);
+		*tl++ = txdr_unsigned(sep->sess_maxrespcached);
+		*tl++ = txdr_unsigned(sep->sess_maxops);
+		*tl++ = txdr_unsigned(sep->sess_maxslots);
+		*tl++ = txdr_unsigned(1);
+		*tl++ = txdr_unsigned(0);			/* No RDMA. */
+
+		/* Back channel attributes. */
+		*tl++ = 0;
+		*tl++ = txdr_unsigned(sep->sess_cbmaxreq);
+		*tl++ = txdr_unsigned(sep->sess_cbmaxresp);
+		*tl++ = txdr_unsigned(sep->sess_cbmaxrespcached);
+		*tl++ = txdr_unsigned(sep->sess_cbmaxops);
+		*tl++ = txdr_unsigned(sep->sess_cbsess.nfsess_foreslots);
+		*tl++ = txdr_unsigned(1);
+		*tl = txdr_unsigned(0);			/* No RDMA. */
+	}
+nfsmout:
+	if (nd->nd_repstat != 0 && sep != NULL)
+		free(sep, M_NFSDSESSION);
+	NFSEXITCODE2(error, nd);
+	return (error);
+}
+
+/*
+ * nfsv4 sequence service
+ */
+APPLESTATIC int
+nfsrvd_sequence(struct nfsrv_descript *nd, __unused int isdgram,
+    __unused vnode_t vp, __unused NFSPROC_T *p, __unused struct nfsexstuff *exp)
+{
+	uint32_t *tl;
+	uint32_t highest_slotid, sequenceid, sflags, target_highest_slotid;
+	int cache_this, error = 0;
+
+	if (nfs_rootfhset == 0 || nfsd_checkrootexp(nd) != 0) {
+		nd->nd_repstat = NFSERR_WRONGSEC;
+		goto nfsmout;
+	}
+	NFSM_DISSECT(tl, uint32_t *, NFSX_V4SESSIONID);
+	NFSBCOPY(tl, nd->nd_sessionid, NFSX_V4SESSIONID);
+	NFSM_DISSECT(tl, uint32_t *, 4 * NFSX_UNSIGNED);
+	sequenceid = fxdr_unsigned(uint32_t, *tl++);
+	nd->nd_slotid = fxdr_unsigned(uint32_t, *tl++);
+	highest_slotid = fxdr_unsigned(uint32_t, *tl++);
+	if (*tl == newnfs_true)
+		cache_this = 1;
+	else
+		cache_this = 0;
+	nd->nd_flag |= ND_HASSEQUENCE;
+	nd->nd_repstat = nfsrv_checksequence(nd, sequenceid, &highest_slotid,
+	    &target_highest_slotid, cache_this, &sflags, p);
+	if (nd->nd_repstat == 0) {
+		NFSM_BUILD(tl, uint32_t *, NFSX_V4SESSIONID);
+		NFSBCOPY(nd->nd_sessionid, tl, NFSX_V4SESSIONID);
+		NFSM_BUILD(tl, uint32_t *, 5 * NFSX_UNSIGNED);
+		*tl++ = txdr_unsigned(sequenceid);
+		*tl++ = txdr_unsigned(nd->nd_slotid);
+		*tl++ = txdr_unsigned(highest_slotid);
+		*tl++ = txdr_unsigned(target_highest_slotid);
+		*tl = txdr_unsigned(sflags);
+	}
+nfsmout:
+	NFSEXITCODE2(error, nd);
+	return (error);
+}
+
+/*
+ * nfsv4 reclaim complete service
+ */
+APPLESTATIC int
+nfsrvd_reclaimcomplete(struct nfsrv_descript *nd, __unused int isdgram,
+    __unused vnode_t vp, __unused NFSPROC_T *p, __unused struct nfsexstuff *exp)
+{
+	uint32_t *tl;
+	int error = 0;
+
+	if (nfs_rootfhset == 0 || nfsd_checkrootexp(nd) != 0) {
+		nd->nd_repstat = NFSERR_WRONGSEC;
+		goto nfsmout;
+	}
+	NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
+	if (*tl == newnfs_true)
+		nd->nd_repstat = NFSERR_NOTSUPP;
+	else
+		nd->nd_repstat = nfsrv_checkreclaimcomplete(nd);
+nfsmout:
+	NFSEXITCODE2(error, nd);
+	return (error);
+}
+
+/*
+ * nfsv4 destroy clientid service
+ */
+APPLESTATIC int
+nfsrvd_destroyclientid(struct nfsrv_descript *nd, __unused int isdgram,
+    __unused vnode_t vp, NFSPROC_T *p, __unused struct nfsexstuff *exp)
+{
+	uint32_t *tl;
+	nfsquad_t clientid;
+	int error = 0;
+
+	if (nfs_rootfhset == 0 || nfsd_checkrootexp(nd) != 0) {
+		nd->nd_repstat = NFSERR_WRONGSEC;
+		goto nfsmout;
+	}
+	NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
+	clientid.lval[0] = *tl++;
+	clientid.lval[1] = *tl;
+	nd->nd_repstat = nfsrv_destroyclient(clientid, p);
+nfsmout:
+	NFSEXITCODE2(error, nd);
+	return (error);
+}
+
+/*
+ * nfsv4 destroy session service
+ */
+APPLESTATIC int
+nfsrvd_destroysession(struct nfsrv_descript *nd, __unused int isdgram,
+    __unused vnode_t vp, __unused NFSPROC_T *p, __unused struct nfsexstuff *exp)
+{
+	uint8_t *cp, sessid[NFSX_V4SESSIONID];
+	int error = 0;
+
+	if (nfs_rootfhset == 0 || nfsd_checkrootexp(nd) != 0) {
+		nd->nd_repstat = NFSERR_WRONGSEC;
+		goto nfsmout;
+	}
+	NFSM_DISSECT(cp, uint8_t *, NFSX_V4SESSIONID);
+	NFSBCOPY(cp, sessid, NFSX_V4SESSIONID);
+	nd->nd_repstat = nfsrv_destroysession(nd, sessid);
+nfsmout:
+	NFSEXITCODE2(error, nd);
+	return (error);
+}
+
+/*
+ * nfsv4 free stateid service
+ */
+APPLESTATIC int
+nfsrvd_freestateid(struct nfsrv_descript *nd, __unused int isdgram,
+    __unused vnode_t vp, NFSPROC_T *p, __unused struct nfsexstuff *exp)
+{
+	uint32_t *tl;
+	nfsv4stateid_t stateid;
+	int error = 0;
+
+	if (nfs_rootfhset == 0 || nfsd_checkrootexp(nd) != 0) {
+		nd->nd_repstat = NFSERR_WRONGSEC;
+		goto nfsmout;
+	}
+	NFSM_DISSECT(tl, uint32_t *, NFSX_STATEID);
+	stateid.seqid = fxdr_unsigned(uint32_t, *tl++);
+	NFSBCOPY(tl, stateid.other, NFSX_STATEIDOTHER);
+	nd->nd_repstat = nfsrv_freestateid(nd, &stateid, p);
+nfsmout:
+	NFSEXITCODE2(error, nd);
+	return (error);
+}
+
+/*
+ * nfsv4 service not supported
+ */
+APPLESTATIC int
+nfsrvd_notsupp(struct nfsrv_descript *nd, __unused int isdgram,
+    __unused vnode_t vp, __unused NFSPROC_T *p, __unused struct nfsexstuff *exp)
+{
+
+	nd->nd_repstat = NFSERR_NOTSUPP;
+	NFSEXITCODE2(0, nd);
+	return (0);
+}
+

Modified: trunk/sys/fs/nfsserver/nfs_nfsdsocket.c
===================================================================
--- trunk/sys/fs/nfsserver/nfs_nfsdsocket.c	2018-05-27 22:18:11 UTC (rev 10025)
+++ trunk/sys/fs/nfsserver/nfs_nfsdsocket.c	2018-05-27 22:18:25 UTC (rev 10026)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * Copyright (c) 1989, 1993
  *	The Regents of the University of California.  All rights reserved.
@@ -32,7 +33,7 @@
  */
 
 #include <sys/cdefs.h>
-__MBSDID("$MidnightBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/fs/nfsserver/nfs_nfsdsocket.c 306663 2016-10-03 23:17:57Z rmacklem $");
 
 /*
  * Socket operations for use by the nfs server.
@@ -46,8 +47,10 @@
 extern int nfs_pubfhset, nfs_rootfhset;
 extern struct nfsv4lock nfsv4rootfs_lock;
 extern struct nfsrv_stablefirst nfsrv_stablefirst;
-extern struct nfsclienthashhead nfsclienthash[NFSCLIENTHASHSIZE];
+extern struct nfsclienthashhead *nfsclienthash;
+extern int nfsrv_clienthashsize;
 extern int nfsrc_floodlevel, nfsrc_tcpsavedreplies;
+extern int nfsd_debuglevel;
 NFSV4ROOTLOCKMUTEX;
 NFSSTATESPINLOCK;
 
@@ -131,7 +134,7 @@
 	(int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t , NFSPROC_T *, struct nfsexstuff *, struct nfsexstuff *))0,
 };
 
-int (*nfsrv4_ops0[NFSV4OP_NOPS])(struct nfsrv_descript *,
+int (*nfsrv4_ops0[NFSV41_NOPS])(struct nfsrv_descript *,
     int, vnode_t , NFSPROC_T *, struct nfsexstuff *) = {
 	(int (*)(struct nfsrv_descript *, int, vnode_t , NFSPROC_T *, struct nfsexstuff *))0,
 	(int (*)(struct nfsrv_descript *, int, vnode_t , NFSPROC_T *, struct nfsexstuff *))0,
@@ -173,9 +176,28 @@
 	nfsrvd_verify,
 	nfsrvd_write,
 	nfsrvd_releaselckown,
+	nfsrvd_notsupp,
+	nfsrvd_notsupp,
+	nfsrvd_exchangeid,
+	nfsrvd_createsession,
+	nfsrvd_destroysession,
+	nfsrvd_freestateid,
+	nfsrvd_notsupp,
+	nfsrvd_notsupp,
+	nfsrvd_notsupp,
+	nfsrvd_notsupp,
+	nfsrvd_notsupp,
+	nfsrvd_notsupp,
+	nfsrvd_notsupp,
+	nfsrvd_sequence,
+	nfsrvd_notsupp,
+	nfsrvd_notsupp,
+	nfsrvd_notsupp,
+	nfsrvd_destroyclientid,
+	nfsrvd_reclaimcomplete,
 };
 
-int (*nfsrv4_ops1[NFSV4OP_NOPS])(struct nfsrv_descript *,
+int (*nfsrv4_ops1[NFSV41_NOPS])(struct nfsrv_descript *,
     int, vnode_t , vnode_t *, fhandle_t *,
     NFSPROC_T *, struct nfsexstuff *) = {
 	(int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t *, fhandle_t *, NFSPROC_T *, struct nfsexstuff *))0,
@@ -218,9 +240,28 @@
 	(int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t *, fhandle_t *, NFSPROC_T *, struct nfsexstuff *))0,
 	(int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t *, fhandle_t *, NFSPROC_T *, struct nfsexstuff *))0,
 	(int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t *, fhandle_t *, NFSPROC_T *, struct nfsexstuff *))0,
+	(int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t *, fhandle_t *, NFSPROC_T *, struct nfsexstuff *))0,
+	(int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t *, fhandle_t *, NFSPROC_T *, struct nfsexstuff *))0,
+	(int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t *, fhandle_t *, NFSPROC_T *, struct nfsexstuff *))0,
+	(int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t *, fhandle_t *, NFSPROC_T *, struct nfsexstuff *))0,
+	(int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t *, fhandle_t *, NFSPROC_T *, struct nfsexstuff *))0,
+	(int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t *, fhandle_t *, NFSPROC_T *, struct nfsexstuff *))0,
+	(int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t *, fhandle_t *, NFSPROC_T *, struct nfsexstuff *))0,
+	(int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t *, fhandle_t *, NFSPROC_T *, struct nfsexstuff *))0,
+	(int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t *, fhandle_t *, NFSPROC_T *, struct nfsexstuff *))0,
+	(int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t *, fhandle_t *, NFSPROC_T *, struct nfsexstuff *))0,
+	(int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t *, fhandle_t *, NFSPROC_T *, struct nfsexstuff *))0,
+	(int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t *, fhandle_t *, NFSPROC_T *, struct nfsexstuff *))0,
+	(int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t *, fhandle_t *, NFSPROC_T *, struct nfsexstuff *))0,
+	(int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t *, fhandle_t *, NFSPROC_T *, struct nfsexstuff *))0,
+	(int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t *, fhandle_t *, NFSPROC_T *, struct nfsexstuff *))0,
+	(int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t *, fhandle_t *, NFSPROC_T *, struct nfsexstuff *))0,
+	(int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t *, fhandle_t *, NFSPROC_T *, struct nfsexstuff *))0,
+	(int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t *, fhandle_t *, NFSPROC_T *, struct nfsexstuff *))0,
+	(int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t *, fhandle_t *, NFSPROC_T *, struct nfsexstuff *))0,
 };
 
-int (*nfsrv4_ops2[NFSV4OP_NOPS])(struct nfsrv_descript *,
+int (*nfsrv4_ops2[NFSV41_NOPS])(struct nfsrv_descript *,
     int, vnode_t , vnode_t , NFSPROC_T *,
     struct nfsexstuff *, struct nfsexstuff *) = {
 	(int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t , NFSPROC_T *, struct nfsexstuff *, struct nfsexstuff *))0,
@@ -263,6 +304,25 @@
 	(int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t , NFSPROC_T *, struct nfsexstuff *, struct nfsexstuff *))0,
 	(int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t , NFSPROC_T *, struct nfsexstuff *, struct nfsexstuff *))0,
 	(int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t , NFSPROC_T *, struct nfsexstuff *, struct nfsexstuff *))0,
+	(int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t , NFSPROC_T *, struct nfsexstuff *, struct nfsexstuff *))0,
+	(int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t , NFSPROC_T *, struct nfsexstuff *, struct nfsexstuff *))0,
+	(int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t , NFSPROC_T *, struct nfsexstuff *, struct nfsexstuff *))0,
+	(int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t , NFSPROC_T *, struct nfsexstuff *, struct nfsexstuff *))0,
+	(int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t , NFSPROC_T *, struct nfsexstuff *, struct nfsexstuff *))0,
+	(int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t , NFSPROC_T *, struct nfsexstuff *, struct nfsexstuff *))0,
+	(int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t , NFSPROC_T *, struct nfsexstuff *, struct nfsexstuff *))0,
+	(int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t , NFSPROC_T *, struct nfsexstuff *, struct nfsexstuff *))0,
+	(int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t , NFSPROC_T *, struct nfsexstuff *, struct nfsexstuff *))0,
+	(int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t , NFSPROC_T *, struct nfsexstuff *, struct nfsexstuff *))0,
+	(int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t , NFSPROC_T *, struct nfsexstuff *, struct nfsexstuff *))0,
+	(int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t , NFSPROC_T *, struct nfsexstuff *, struct nfsexstuff *))0,
+	(int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t , NFSPROC_T *, struct nfsexstuff *, struct nfsexstuff *))0,
+	(int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t , NFSPROC_T *, struct nfsexstuff *, struct nfsexstuff *))0,
+	(int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t , NFSPROC_T *, struct nfsexstuff *, struct nfsexstuff *))0,
+	(int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t , NFSPROC_T *, struct nfsexstuff *, struct nfsexstuff *))0,
+	(int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t , NFSPROC_T *, struct nfsexstuff *, struct nfsexstuff *))0,
+	(int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t , NFSPROC_T *, struct nfsexstuff *, struct nfsexstuff *))0,
+	(int (*)(struct nfsrv_descript *, int, vnode_t , vnode_t , NFSPROC_T *, struct nfsexstuff *, struct nfsexstuff *))0,
 };
 #endif	/* !APPLEKEXT */
 
@@ -304,7 +364,7 @@
 
 /* local functions */
 static void nfsrvd_compound(struct nfsrv_descript *nd, int isdgram,
-    NFSPROC_T *p);
+    u_char *tag, int taglen, u_int32_t minorvers, NFSPROC_T *p);
 
 
 /*
@@ -314,7 +374,7 @@
 static int nfs_retfh[NFS_V3NPROCS] = { 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1,
 	1, 0, 0, 2, 2, 0, 0, 0, 0, 0, 0 };
 
-extern struct nfsv4_opflag nfsv4_opflag[NFSV4OP_NOPS];
+extern struct nfsv4_opflag nfsv4_opflag[NFSV41_NOPS];
 
 static int nfsv3to4op[NFS_V3NPROCS] = {
 	NFSPROC_NULL,
@@ -349,8 +409,8 @@
  * The NFS V4 Compound RPC is performed separately by nfsrvd_compound().
  */
 APPLESTATIC void
-nfsrvd_dorpc(struct nfsrv_descript *nd, int isdgram,
-    NFSPROC_T *p)
+nfsrvd_dorpc(struct nfsrv_descript *nd, int isdgram, u_char *tag, int taglen,
+    u_int32_t minorvers, NFSPROC_T *p)
 {
 	int error = 0, lktype;
 	vnode_t vp;
@@ -381,9 +441,12 @@
 			if (nd->nd_procnum == NFSPROC_READ ||
 			    nd->nd_procnum == NFSPROC_WRITE ||
 			    nd->nd_procnum == NFSPROC_READDIR ||
+			    nd->nd_procnum == NFSPROC_READDIRPLUS ||
 			    nd->nd_procnum == NFSPROC_READLINK ||
 			    nd->nd_procnum == NFSPROC_GETATTR ||
-			    nd->nd_procnum == NFSPROC_ACCESS)
+			    nd->nd_procnum == NFSPROC_ACCESS ||
+			    nd->nd_procnum == NFSPROC_FSSTAT ||
+			    nd->nd_procnum == NFSPROC_FSINFO)
 				lktype = LK_SHARED;
 			else
 				lktype = LK_EXCLUSIVE;
@@ -427,7 +490,7 @@
 	 * The group is indicated by the value in nfs_retfh[].
 	 */
 	if (nd->nd_flag & ND_NFSV4) {
-		nfsrvd_compound(nd, isdgram, p);
+		nfsrvd_compound(nd, isdgram, tag, taglen, minorvers, p);
 	} else {
 		if (nfs_retfh[nd->nd_procnum] == 1) {
 			if (vp)
@@ -482,15 +545,14 @@
  * vnode pointer handling.
  */
 static void
-nfsrvd_compound(struct nfsrv_descript *nd, int isdgram,
-    NFSPROC_T *p)
+nfsrvd_compound(struct nfsrv_descript *nd, int isdgram, u_char *tag,
+    int taglen, u_int32_t minorvers, NFSPROC_T *p)
 {
-	int i, op;
+	int i, lktype, op, op0 = 0;
 	u_int32_t *tl;
 	struct nfsclient *clp, *nclp;
-	int numops, taglen = -1, error = 0, igotlock;
-	u_int32_t minorvers, retops = 0, *retopsp = NULL, *repp;
-	u_char tag[NFSV4_SMALLSTR + 1], *tagstr;
+	int numops, error = 0, igotlock;
+	u_int32_t retops = 0, *retopsp = NULL, *repp;
 	vnode_t vp, nvp, savevp;
 	struct nfsrvfh fh;
 	mount_t new_mp, temp_mp = NULL;
@@ -553,7 +615,7 @@
 		 */
 		if (nfsrv_stablefirst.nsf_flags & NFSNSF_EXPIREDCLIENT) {
 			nfsrv_stablefirst.nsf_flags &= ~NFSNSF_EXPIREDCLIENT;
-			for (i = 0; i < NFSCLIENTHASHSIZE; i++) {
+			for (i = 0; i < nfsrv_clienthashsize; i++) {
 			    LIST_FOREACH_SAFE(clp, &nfsclienthash[i], lc_hash,
 				nclp) {
 				if (clp->lc_flags & LCL_EXPIREIT) {
@@ -595,31 +657,17 @@
 	savevp = vp = NULL;
 	save_fsid.val[0] = save_fsid.val[1] = 0;
 	cur_fsid.val[0] = cur_fsid.val[1] = 0;
-	NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
-	taglen = fxdr_unsigned(int, *tl);
+
+	/* If taglen < 0, there was a parsing error in nfsd_getminorvers(). */
 	if (taglen < 0) {
 		error = EBADRPC;
 		goto nfsmout;
 	}
-	if (taglen <= NFSV4_SMALLSTR)
-		tagstr = tag;
-	else
-		tagstr = malloc(taglen + 1, M_TEMP, M_WAITOK);
-	error = nfsrv_mtostr(nd, tagstr, taglen);
-	if (error) {
-		if (taglen > NFSV4_SMALLSTR)
-			free(tagstr, M_TEMP);
-		taglen = -1;
-		goto nfsmout;
-	}
+
 	(void) nfsm_strtom(nd, tag, taglen);
-	if (taglen > NFSV4_SMALLSTR) {
-		free(tagstr, M_TEMP);
-	}
 	NFSM_BUILD(retopsp, u_int32_t *, NFSX_UNSIGNED);
-	NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
-	minorvers = fxdr_unsigned(u_int32_t, *tl++);
-	if (minorvers != NFSV4_MINORVERSION)
+	NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
+	if (minorvers != NFSV4_MINORVERSION && minorvers != NFSV41_MINORVERSION)
 		nd->nd_repstat = NFSERR_MINORVERMISMATCH;
 	if (nd->nd_repstat)
 		numops = 0;
@@ -638,7 +686,10 @@
 		NFSM_BUILD(repp, u_int32_t *, 2 * NFSX_UNSIGNED);
 		*repp = *tl;
 		op = fxdr_unsigned(int, *tl);
-		if (op < NFSV4OP_ACCESS || op >= NFSV4OP_NOPS) {
+		NFSD_DEBUG(4, "op=%d\n", op);
+		if (op < NFSV4OP_ACCESS ||
+		    (op >= NFSV4OP_NOPS && (nd->nd_flag & ND_NFSV41) == 0) ||
+		    (op >= NFSV41_NOPS && (nd->nd_flag & ND_NFSV41) != 0)) {
 			nd->nd_repstat = NFSERR_OPILLEGAL;
 			*repp++ = txdr_unsigned(NFSV4OP_OPILLEGAL);
 			*repp = nfsd_errmap(nd);
@@ -647,6 +698,10 @@
 		} else {
 			repp++;
 		}
+		if (i == 0)
+			op0 = op;
+		if (i == numops - 1)
+			nd->nd_flag |= ND_LASTOP;
 
 		/*
 		 * Check for a referral on the current FH and, if so, return
@@ -661,6 +716,29 @@
 			break;
 		}
 
+		/*
+		 * For NFSv4.1, check for a Sequence Operation being first
+		 * or one of the other allowed operations by itself.
+		 */
+		if ((nd->nd_flag & ND_NFSV41) != 0) {
+			if (i != 0 && op == NFSV4OP_SEQUENCE)
+				nd->nd_repstat = NFSERR_SEQUENCEPOS;
+			else if (i == 0 && op != NFSV4OP_SEQUENCE &&
+			    op != NFSV4OP_EXCHANGEID &&
+			    op != NFSV4OP_CREATESESSION &&
+			    op != NFSV4OP_BINDCONNTOSESS &&
+			    op != NFSV4OP_DESTROYCLIENTID &&
+			    op != NFSV4OP_DESTROYSESSION)
+				nd->nd_repstat = NFSERR_OPNOTINSESS;
+			else if (i != 0 && op0 != NFSV4OP_SEQUENCE)
+				nd->nd_repstat = NFSERR_NOTONLYOP;
+			if (nd->nd_repstat != 0) {
+				*repp = nfsd_errmap(nd);
+				retops++;
+				break;
+			}
+		}
+
 		nd->nd_procnum = op;
 		/*
 		 * If over flood level, reply NFSERR_RESOURCE, if at the first
@@ -672,13 +750,13 @@
 		 * If nfsrv_mallocmget_limit() returns True, the system is near
 		 * to its limit for memory that malloc()/mget() can allocate.
 		 */
-		if (i == 0 && nd->nd_rp->rc_refcnt == 0 &&
+		if (i == 0 && (nd->nd_rp == NULL ||
+		    nd->nd_rp->rc_refcnt == 0) &&
 		    (nfsrv_mallocmget_limit() ||
 		     nfsrc_tcpsavedreplies > nfsrc_floodlevel)) {
-			if (nfsrc_tcpsavedreplies > nfsrc_floodlevel) {
-				printf("nfsd server cache flooded, try to");
-				printf(" increase nfsrc_floodlevel\n");
-			}
+			if (nfsrc_tcpsavedreplies > nfsrc_floodlevel)
+				printf("nfsd server cache flooded, try "
+				    "increasing vfs.nfsd.tcphighwater\n");
 			nd->nd_repstat = NFSERR_RESOURCE;
 			*repp = nfsd_errmap(nd);
 			if (op == NFSV4OP_SETATTR) {
@@ -694,7 +772,12 @@
 		}
 		if (nfsv4_opflag[op].savereply)
 			nd->nd_flag |= ND_SAVEREPLY;
-		NFSINCRGLOBAL(newnfsstats.srvrpccnt[nd->nd_procnum]);
+		/*
+		 * For now, newnfsstats.srvrpccnt[] doesn't have entries
+		 * for the NFSv4.1 operations.
+		 */
+		if (nd->nd_procnum < NFSV4OP_NOPS)
+			NFSINCRGLOBAL(newnfsstats.srvrpccnt[nd->nd_procnum]);
 		switch (op) {
 		case NFSV4OP_PUTFH:
 			error = nfsrv_mtofh(nd, &fh);
@@ -879,11 +962,15 @@
 				panic("nfsrvd_compound");
 			if (nfsv4_opflag[op].needscfh) {
 				if (vp != NULL) {
-					if (nfsv4_opflag[op].modifyfs)
+					lktype = nfsv4_opflag[op].lktype;
+					if (nfsv4_opflag[op].modifyfs) {
 						vn_start_write(vp, &temp_mp,
 						    V_WAIT);
-					if (NFSVOPLOCK(vp, nfsv4_opflag[op].lktype)
-					    == 0)
+						if (op == NFSV4OP_WRITE &&
+						    MNT_SHARED_WRITES(temp_mp))
+							lktype = LK_SHARED;
+					}
+					if (NFSVOPLOCK(vp, lktype) == 0)
 						VREF(vp);
 					else
 						nd->nd_repstat = NFSERR_PERM;

Modified: trunk/sys/fs/nfsserver/nfs_nfsdstate.c
===================================================================
--- trunk/sys/fs/nfsserver/nfs_nfsdstate.c	2018-05-27 22:18:11 UTC (rev 10025)
+++ trunk/sys/fs/nfsserver/nfs_nfsdstate.c	2018-05-27 22:18:25 UTC (rev 10026)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * Copyright (c) 2009 Rick Macklem, University of Guelph
  * All rights reserved.
@@ -26,7 +27,7 @@
  */
 
 #include <sys/cdefs.h>
-__MBSDID("$MidnightBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/fs/nfsserver/nfs_nfsdstate.c 327008 2017-12-19 23:00:08Z rmacklem $");
 
 #ifndef APPLEKEXT
 #include <fs/nfs/nfsport.h>
@@ -44,21 +45,61 @@
 NFSV4ROOTLOCKMUTEX;
 NFSSTATESPINLOCK;
 
+SYSCTL_DECL(_vfs_nfsd);
+int	nfsrv_statehashsize = NFSSTATEHASHSIZE;
+TUNABLE_INT("vfs.nfsd.statehashsize", &nfsrv_statehashsize);
+SYSCTL_INT(_vfs_nfsd, OID_AUTO, statehashsize, CTLFLAG_RDTUN,
+    &nfsrv_statehashsize, 0,
+    "Size of state hash table set via loader.conf");
+
+int	nfsrv_clienthashsize = NFSCLIENTHASHSIZE;
+TUNABLE_INT("vfs.nfsd.clienthashsize", &nfsrv_clienthashsize);
+SYSCTL_INT(_vfs_nfsd, OID_AUTO, clienthashsize, CTLFLAG_RDTUN,
+    &nfsrv_clienthashsize, 0,
+    "Size of client hash table set via loader.conf");
+
+int	nfsrv_lockhashsize = NFSLOCKHASHSIZE;
+TUNABLE_INT("vfs.nfsd.fhhashsize", &nfsrv_lockhashsize);
+SYSCTL_INT(_vfs_nfsd, OID_AUTO, fhhashsize, CTLFLAG_RDTUN,
+    &nfsrv_lockhashsize, 0,
+    "Size of file handle hash table set via loader.conf");
+
+int	nfsrv_sessionhashsize = NFSSESSIONHASHSIZE;
+TUNABLE_INT("vfs.nfsd.sessionhashsize", &nfsrv_sessionhashsize);
+SYSCTL_INT(_vfs_nfsd, OID_AUTO, sessionhashsize, CTLFLAG_RDTUN,
+    &nfsrv_sessionhashsize, 0,
+    "Size of session hash table set via loader.conf");
+
+static int	nfsrv_v4statelimit = NFSRV_V4STATELIMIT;
+TUNABLE_INT("vfs.nfsd.v4statelimit", &nfsrv_v4statelimit);
+SYSCTL_INT(_vfs_nfsd, OID_AUTO, v4statelimit, CTLFLAG_RWTUN,
+    &nfsrv_v4statelimit, 0,
+    "High water limit for NFSv4 opens+locks+delegations");
+
+static int	nfsrv_writedelegifpos = 0;
+SYSCTL_INT(_vfs_nfsd, OID_AUTO, writedelegifpos, CTLFLAG_RW,
+    &nfsrv_writedelegifpos, 0,
+    "Issue a write delegation for read opens if possible");
+
+static int	nfsrv_allowreadforwriteopen = 1;
+SYSCTL_INT(_vfs_nfsd, OID_AUTO, allowreadforwriteopen, CTLFLAG_RW,
+    &nfsrv_allowreadforwriteopen, 0,
+    "Allow Reads to be done with Write Access StateIDs");
+
 /*
  * Hash lists for nfs V4.
- * (Some would put them in the .h file, but I don't like declaring storage
- *  in a .h)
  */
-struct nfsclienthashhead nfsclienthash[NFSCLIENTHASHSIZE];
-struct nfslockhashhead nfslockhash[NFSLOCKHASHSIZE];
+struct nfsclienthashhead	*nfsclienthash;
+struct nfslockhashhead		*nfslockhash;
+struct nfssessionhash		*nfssessionhash;
 #endif	/* !APPLEKEXT */
 
 static u_int32_t nfsrv_openpluslock = 0, nfsrv_delegatecnt = 0;
 static time_t nfsrvboottime;
-static int nfsrv_writedelegifpos = 1;
 static int nfsrv_returnoldstateid = 0, nfsrv_clients = 0;
 static int nfsrv_clienthighwater = NFSRV_CLIENTHIGHWATER;
 static int nfsrv_nogsscallback = 0;
+static volatile int nfsrv_writedelegcnt = 0;
 
 /* local functions */
 static void nfsrv_dumpaclient(struct nfsclient *clp,
@@ -79,7 +120,7 @@
 static void nfsrv_getowner(struct nfsstatehead *hp, struct nfsstate *new_stp,
     struct nfsstate **stpp);
 static int nfsrv_getlockfh(vnode_t vp, u_short flags,
-    struct nfslockfile **new_lfpp, fhandle_t *nfhp, NFSPROC_T *p);
+    struct nfslockfile *new_lfp, fhandle_t *nfhp, NFSPROC_T *p);
 static int nfsrv_getlockfile(u_short flags, struct nfslockfile **new_lfpp,
     struct nfslockfile **lfpp, fhandle_t *nfhp, int lockit);
 static void nfsrv_insertlock(struct nfslock *new_lop,
@@ -89,10 +130,13 @@
 static int nfsrv_getipnumber(u_char *cp);
 static int nfsrv_checkrestart(nfsquad_t clientid, u_int32_t flags,
     nfsv4stateid_t *stateidp, int specialid);
-static int nfsrv_checkgrace(u_int32_t flags);
+static int nfsrv_checkgrace(struct nfsrv_descript *nd, struct nfsclient *clp,
+    u_int32_t flags);
 static int nfsrv_docallback(struct nfsclient *clp, int procnum,
     nfsv4stateid_t *stateidp, int trunc, fhandle_t *fhp,
     struct nfsvattr *nap, nfsattrbit_t *attrbitp, NFSPROC_T *p);
+static int nfsrv_cbcallargs(struct nfsrv_descript *nd, struct nfsclient *clp,
+    uint32_t callback, int op, const char *optag, struct nfsdsession **sepp);
 static u_int32_t nfsrv_nextclientindex(void);
 static u_int32_t nfsrv_nextstateindex(struct nfsclient *clp);
 static void nfsrv_markstable(struct nfsclient *clp);
@@ -123,6 +167,11 @@
     uint64_t first, uint64_t end);
 static void nfsrv_locklf(struct nfslockfile *lfp);
 static void nfsrv_unlocklf(struct nfslockfile *lfp);
+static struct nfsdsession *nfsrv_findsession(uint8_t *sessionid);
+static int nfsrv_freesession(struct nfsdsession *sep, uint8_t *sessionid);
+static int nfsv4_setcbsequence(struct nfsrv_descript *nd, struct nfsclient *clp,
+    int dont_replycache, struct nfsdsession **sepp);
+static int nfsv4_getcbsession(struct nfsclient *clp, struct nfsdsession **sepp);
 
 /*
  * Scan the client list for a match and either return the current one,
@@ -144,7 +193,7 @@
 	/*
 	 * Check for state resource limit exceeded.
 	 */
-	if (nfsrv_openpluslock > NFSRV_V4STATELIMIT) {
+	if (nfsrv_openpluslock > nfsrv_v4statelimit) {
 		error = NFSERR_RESOURCE;
 		goto out;
 	}
@@ -179,7 +228,7 @@
 	 * Search for a match in the client list.
 	 */
 	gotit = i = 0;
-	while (i < NFSCLIENTHASHSIZE && !gotit) {
+	while (i < nfsrv_clienthashsize && !gotit) {
 	    LIST_FOREACH(clp, &nfsclienthash[i], lc_hash) {
 		if (new_clp->lc_idlen == clp->lc_idlen &&
 		    !NFSBCMP(new_clp->lc_id, clp->lc_id, clp->lc_idlen)) {
@@ -187,14 +236,27 @@
 			break;
 		}
 	    }
-	    i++;
+	    if (gotit == 0)
+		i++;
 	}
 	if (!gotit ||
 	    (clp->lc_flags & (LCL_NEEDSCONFIRM | LCL_ADMINREVOKED))) {
+		if ((nd->nd_flag & ND_NFSV41) != 0 && confirmp->lval[1] != 0) {
+			/*
+			 * For NFSv4.1, if confirmp->lval[1] is non-zero, the
+			 * client is trying to update a confirmed clientid.
+			 */
+			NFSLOCKV4ROOTMUTEX();
+			nfsv4_unlock(&nfsv4rootfs_lock, 1);
+			NFSUNLOCKV4ROOTMUTEX();
+			confirmp->lval[1] = 0;
+			error = NFSERR_NOENT;
+			goto out;
+		}
 		/*
 		 * Get rid of the old one.
 		 */
-		if (i != NFSCLIENTHASHSIZE) {
+		if (i != nfsrv_clienthashsize) {
 			LIST_REMOVE(clp, lc_hash);
 			nfsrv_cleanclient(clp, p);
 			nfsrv_freedeleglist(&clp->lc_deleg);
@@ -205,7 +267,12 @@
 		 * Add it after assigning a client id to it.
 		 */
 		new_clp->lc_flags |= LCL_NEEDSCONFIRM;
-		confirmp->qval = new_clp->lc_confirm.qval = ++confirm_index;
+		if ((nd->nd_flag & ND_NFSV41) != 0)
+			new_clp->lc_confirm.lval[0] = confirmp->lval[0] =
+			    ++confirm_index;
+		else
+			confirmp->qval = new_clp->lc_confirm.qval =
+			    ++confirm_index;
 		clientidp->lval[0] = new_clp->lc_clientid.lval[0] =
 		    (u_int32_t)nfsrvboottime;
 		clientidp->lval[1] = new_clp->lc_clientid.lval[1] =
@@ -217,7 +284,8 @@
 		LIST_INIT(&new_clp->lc_open);
 		LIST_INIT(&new_clp->lc_deleg);
 		LIST_INIT(&new_clp->lc_olddeleg);
-		for (i = 0; i < NFSSTATEHASHSIZE; i++)
+		LIST_INIT(&new_clp->lc_session);
+		for (i = 0; i < nfsrv_statehashsize; i++)
 			LIST_INIT(&new_clp->lc_stateid[i]);
 		LIST_INSERT_HEAD(NFSCLIENTHASH(new_clp->lc_clientid), new_clp,
 		    lc_hash);
@@ -289,7 +357,12 @@
 		 */
 		LIST_REMOVE(clp, lc_hash);
 		new_clp->lc_flags |= LCL_NEEDSCONFIRM;
-		confirmp->qval = new_clp->lc_confirm.qval = ++confirm_index;
+		if ((nd->nd_flag & ND_NFSV41) != 0)
+			new_clp->lc_confirm.lval[0] = confirmp->lval[0] =
+			    ++confirm_index;
+		else
+			confirmp->qval = new_clp->lc_confirm.qval =
+			    ++confirm_index;
 		clientidp->lval[0] = new_clp->lc_clientid.lval[0] =
 		    nfsrvboottime;
 		clientidp->lval[1] = new_clp->lc_clientid.lval[1] =
@@ -312,7 +385,7 @@
 		    ls_list);
 		LIST_FOREACH(tstp, &new_clp->lc_olddeleg, ls_list)
 			tstp->ls_clp = new_clp;
-		for (i = 0; i < NFSSTATEHASHSIZE; i++) {
+		for (i = 0; i < nfsrv_statehashsize; i++) {
 			LIST_NEWHEAD(&new_clp->lc_stateid[i],
 			    &clp->lc_stateid[i], ls_hash);
 			LIST_FOREACH(tstp, &new_clp->lc_stateid[i], ls_hash)
@@ -342,60 +415,71 @@
 		*new_clpp = NULL;
 		goto out;
 	}
-	/*
-	 * id and verifier match, so update the net address info
-	 * and get rid of any existing callback authentication
-	 * handle, so a new one will be acquired.
-	 */
-	LIST_REMOVE(clp, lc_hash);
-	new_clp->lc_flags |= (LCL_NEEDSCONFIRM | LCL_DONTCLEAN);
-	new_clp->lc_expiry = nfsrv_leaseexpiry();
-	confirmp->qval = new_clp->lc_confirm.qval = ++confirm_index;
-	clientidp->lval[0] = new_clp->lc_clientid.lval[0] =
-	    clp->lc_clientid.lval[0];
-	clientidp->lval[1] = new_clp->lc_clientid.lval[1] =
-	    clp->lc_clientid.lval[1];
-	new_clp->lc_delegtime = clp->lc_delegtime;
-	new_clp->lc_stateindex = clp->lc_stateindex;
-	new_clp->lc_statemaxindex = clp->lc_statemaxindex;
-	new_clp->lc_cbref = 0;
-	LIST_NEWHEAD(&new_clp->lc_open, &clp->lc_open, ls_list);
-	LIST_FOREACH(tstp, &new_clp->lc_open, ls_list)
-		tstp->ls_clp = new_clp;
-	LIST_NEWHEAD(&new_clp->lc_deleg, &clp->lc_deleg, ls_list);
-	LIST_FOREACH(tstp, &new_clp->lc_deleg, ls_list)
-		tstp->ls_clp = new_clp;
-	LIST_NEWHEAD(&new_clp->lc_olddeleg, &clp->lc_olddeleg, ls_list);
-	LIST_FOREACH(tstp, &new_clp->lc_olddeleg, ls_list)
-		tstp->ls_clp = new_clp;
-	for (i = 0; i < NFSSTATEHASHSIZE; i++) {
-		LIST_NEWHEAD(&new_clp->lc_stateid[i], &clp->lc_stateid[i],
-		    ls_hash);
-		LIST_FOREACH(tstp, &new_clp->lc_stateid[i], ls_hash)
+
+	/* For NFSv4.1, mark that we found a confirmed clientid. */
+	if ((nd->nd_flag & ND_NFSV41) != 0) {
+		clientidp->lval[0] = clp->lc_clientid.lval[0];
+		clientidp->lval[1] = clp->lc_clientid.lval[1];
+		confirmp->lval[0] = 0;	/* Ignored by client */
+		confirmp->lval[1] = 1;
+	} else {
+		/*
+		 * id and verifier match, so update the net address info
+		 * and get rid of any existing callback authentication
+		 * handle, so a new one will be acquired.
+		 */
+		LIST_REMOVE(clp, lc_hash);
+		new_clp->lc_flags |= (LCL_NEEDSCONFIRM | LCL_DONTCLEAN);
+		new_clp->lc_expiry = nfsrv_leaseexpiry();
+		confirmp->qval = new_clp->lc_confirm.qval = ++confirm_index;
+		clientidp->lval[0] = new_clp->lc_clientid.lval[0] =
+		    clp->lc_clientid.lval[0];
+		clientidp->lval[1] = new_clp->lc_clientid.lval[1] =
+		    clp->lc_clientid.lval[1];
+		new_clp->lc_delegtime = clp->lc_delegtime;
+		new_clp->lc_stateindex = clp->lc_stateindex;
+		new_clp->lc_statemaxindex = clp->lc_statemaxindex;
+		new_clp->lc_cbref = 0;
+		LIST_NEWHEAD(&new_clp->lc_open, &clp->lc_open, ls_list);
+		LIST_FOREACH(tstp, &new_clp->lc_open, ls_list)
 			tstp->ls_clp = new_clp;
+		LIST_NEWHEAD(&new_clp->lc_deleg, &clp->lc_deleg, ls_list);
+		LIST_FOREACH(tstp, &new_clp->lc_deleg, ls_list)
+			tstp->ls_clp = new_clp;
+		LIST_NEWHEAD(&new_clp->lc_olddeleg, &clp->lc_olddeleg, ls_list);
+		LIST_FOREACH(tstp, &new_clp->lc_olddeleg, ls_list)
+			tstp->ls_clp = new_clp;
+		for (i = 0; i < nfsrv_statehashsize; i++) {
+			LIST_NEWHEAD(&new_clp->lc_stateid[i],
+			    &clp->lc_stateid[i], ls_hash);
+			LIST_FOREACH(tstp, &new_clp->lc_stateid[i], ls_hash)
+				tstp->ls_clp = new_clp;
+		}
+		LIST_INSERT_HEAD(NFSCLIENTHASH(new_clp->lc_clientid), new_clp,
+		    lc_hash);
+		newnfsstats.srvclients++;
+		nfsrv_openpluslock++;
+		nfsrv_clients++;
 	}
-	LIST_INSERT_HEAD(NFSCLIENTHASH(new_clp->lc_clientid), new_clp,
-	    lc_hash);
-	newnfsstats.srvclients++;
-	nfsrv_openpluslock++;
-	nfsrv_clients++;
 	NFSLOCKV4ROOTMUTEX();
 	nfsv4_unlock(&nfsv4rootfs_lock, 1);
 	NFSUNLOCKV4ROOTMUTEX();
 
-	/*
-	 * Must wait until any outstanding callback on the old clp
-	 * completes.
-	 */
-	NFSLOCKSTATE();
-	while (clp->lc_cbref) {
-		clp->lc_flags |= LCL_WAKEUPWANTED;
-		(void)mtx_sleep(clp, NFSSTATEMUTEXPTR, PZERO - 1, "nfsd clp",
-		    10 * hz);
+	if ((nd->nd_flag & ND_NFSV41) == 0) {
+		/*
+		 * Must wait until any outstanding callback on the old clp
+		 * completes.
+		 */
+		NFSLOCKSTATE();
+		while (clp->lc_cbref) {
+			clp->lc_flags |= LCL_WAKEUPWANTED;
+			(void)mtx_sleep(clp, NFSSTATEMUTEXPTR, PZERO - 1,
+			    "nfsdclp", 10 * hz);
+		}
+		NFSUNLOCKSTATE();
+		nfsrv_zapclient(clp, p);
+		*new_clpp = NULL;
 	}
-	NFSUNLOCKSTATE();
-	nfsrv_zapclient(clp, p);
-	*new_clpp = NULL;
 
 out:
 	NFSEXITCODE2(error, nd);
@@ -407,7 +491,8 @@
  */
 APPLESTATIC int
 nfsrv_getclient(nfsquad_t clientid, int opflags, struct nfsclient **clpp,
-    nfsquad_t confirm, struct nfsrv_descript *nd, NFSPROC_T *p)
+    struct nfsdsession *nsep, nfsquad_t confirm, uint32_t cbprogram,
+    struct nfsrv_descript *nd, NFSPROC_T *p)
 {
 	struct nfsclient *clp;
 	struct nfsstate *stp;
@@ -414,10 +499,15 @@
 	int i;
 	struct nfsclienthashhead *hp;
 	int error = 0, igotlock, doneok;
+	struct nfssessionhash *shp;
+	struct nfsdsession *sep;
+	uint64_t sessid[2];
+	static uint64_t next_sess = 0;
 
 	if (clpp)
 		*clpp = NULL;
-	if (nfsrvboottime != clientid.lval[0]) {
+	if ((nd == NULL || (nd->nd_flag & ND_NFSV41) == 0 ||
+	    opflags != CLOPS_RENEW) && nfsrvboottime != clientid.lval[0]) {
 		error = NFSERR_STALECLIENTID;
 		goto out;
 	}
@@ -434,17 +524,39 @@
 			igotlock = nfsv4_lock(&nfsv4rootfs_lock, 1, NULL,
 			    NFSV4ROOTLOCKMUTEXPTR, NULL);
 		} while (!igotlock);
+		/*
+		 * Create a new sessionid here, since we need to do it where
+		 * there is a mutex held to serialize update of next_sess.
+		 */
+		if ((nd->nd_flag & ND_NFSV41) != 0) {
+			sessid[0] = ++next_sess;
+			sessid[1] = clientid.qval;
+		}
 		NFSUNLOCKV4ROOTMUTEX();
 	} else if (opflags != CLOPS_RENEW) {
 		NFSLOCKSTATE();
 	}
 
-	hp = NFSCLIENTHASH(clientid);
-	LIST_FOREACH(clp, hp, lc_hash) {
-		if (clp->lc_clientid.lval[1] == clientid.lval[1])
-			break;
+	/* For NFSv4.1, the clp is acquired from the associated session. */
+	if (nd != NULL && (nd->nd_flag & ND_NFSV41) != 0 &&
+	    opflags == CLOPS_RENEW) {
+		clp = NULL;
+		if ((nd->nd_flag & ND_HASSEQUENCE) != 0) {
+			shp = NFSSESSIONHASH(nd->nd_sessionid);
+			NFSLOCKSESSION(shp);
+			sep = nfsrv_findsession(nd->nd_sessionid);
+			if (sep != NULL)
+				clp = sep->sess_clp;
+			NFSUNLOCKSESSION(shp);
+		}
+	} else {
+		hp = NFSCLIENTHASH(clientid);
+		LIST_FOREACH(clp, hp, lc_hash) {
+			if (clp->lc_clientid.lval[1] == clientid.lval[1])
+				break;
+		}
 	}
-	if (clp == LIST_END(hp)) {
+	if (clp == NULL) {
 		if (opflags & CLOPS_CONFIRM)
 			error = NFSERR_STALECLIENTID;
 		else
@@ -470,7 +582,10 @@
 	 * Perform any operations specified by the opflags.
 	 */
 	if (opflags & CLOPS_CONFIRM) {
-		if (clp->lc_confirm.qval != confirm.qval)
+		if (((nd->nd_flag & ND_NFSV41) != 0 &&
+		     clp->lc_confirm.lval[0] != confirm.lval[0]) ||
+		    ((nd->nd_flag & ND_NFSV41) == 0 &&
+		     clp->lc_confirm.qval != confirm.qval))
 			error = NFSERR_STALECLIENTID;
 		else if (nfsrv_notsamecredname(nd, clp))
 			error = NFSERR_CLIDINUSE;
@@ -485,7 +600,7 @@
 			 */
 			nfsrv_cleanclient(clp, p);
 			nfsrv_freedeleglist(&clp->lc_olddeleg);
-			if (nfsrv_checkgrace(0)) {
+			if (nfsrv_checkgrace(nd, clp, 0)) {
 			    /* In grace, so just delete delegations */
 			    nfsrv_freedeleglist(&clp->lc_deleg);
 			} else {
@@ -496,10 +611,43 @@
 			    LIST_NEWHEAD(&clp->lc_olddeleg, &clp->lc_deleg,
 				ls_list);
 			}
+			if ((nd->nd_flag & ND_NFSV41) != 0)
+			    clp->lc_program = cbprogram;
 		    }
 		    clp->lc_flags &= ~(LCL_NEEDSCONFIRM | LCL_DONTCLEAN);
 		    if (clp->lc_program)
 			clp->lc_flags |= LCL_NEEDSCBNULL;
+		    /* For NFSv4.1, link the session onto the client. */
+		    if (nsep != NULL) {
+			/* Hold a reference on the xprt for a backchannel. */
+			if ((nsep->sess_crflags & NFSV4CRSESS_CONNBACKCHAN)
+			    != 0 && clp->lc_req.nr_client == NULL) {
+			    clp->lc_req.nr_client = (struct __rpc_client *)
+				clnt_bck_create(nd->nd_xprt->xp_socket,
+				cbprogram, NFSV4_CBVERS);
+			    if (clp->lc_req.nr_client != NULL) {
+				SVC_ACQUIRE(nd->nd_xprt);
+				nd->nd_xprt->xp_p2 =
+				    clp->lc_req.nr_client->cl_private;
+				/* Disable idle timeout. */
+				nd->nd_xprt->xp_idletimeout = 0;
+				nsep->sess_cbsess.nfsess_xprt = nd->nd_xprt;
+			    } else
+				nsep->sess_crflags &= ~NFSV4CRSESS_CONNBACKCHAN;
+			}
+			NFSBCOPY(sessid, nsep->sess_sessionid,
+			    NFSX_V4SESSIONID);
+			NFSBCOPY(sessid, nsep->sess_cbsess.nfsess_sessionid,
+			    NFSX_V4SESSIONID);
+			shp = NFSSESSIONHASH(nsep->sess_sessionid);
+			NFSLOCKSTATE();
+			NFSLOCKSESSION(shp);
+			LIST_INSERT_HEAD(&shp->list, nsep, sess_hash);
+			LIST_INSERT_HEAD(&clp->lc_session, nsep, sess_list);
+			nsep->sess_clp = clp;
+			NFSUNLOCKSESSION(shp);
+			NFSUNLOCKSTATE();
+		    }
 		}
 	} else if (clp->lc_flags & LCL_NEEDSCONFIRM) {
 		error = NFSERR_EXPIRED;
@@ -511,7 +659,7 @@
 	if (!error && (opflags & CLOPS_RENEWOP)) {
 	    if (nfsrv_notsamecredname(nd, clp)) {
 		doneok = 0;
-		for (i = 0; i < NFSSTATEHASHSIZE && doneok == 0; i++) {
+		for (i = 0; i < nfsrv_statehashsize && doneok == 0; i++) {
 		    LIST_FOREACH(stp, &clp->lc_stateid[i], ls_hash) {
 			if ((stp->ls_flags & NFSLCK_OPEN) &&
 			    stp->ls_uid == nd->nd_cred->cr_uid) {
@@ -546,6 +694,74 @@
 }
 
 /*
+ * Perform the NFSv4.1 destroy clientid.
+ */
+int
+nfsrv_destroyclient(nfsquad_t clientid, NFSPROC_T *p)
+{
+	struct nfsclient *clp;
+	struct nfsclienthashhead *hp;
+	int error = 0, i, igotlock;
+
+	if (nfsrvboottime != clientid.lval[0]) {
+		error = NFSERR_STALECLIENTID;
+		goto out;
+	}
+
+	/* Lock out other nfsd threads */
+	NFSLOCKV4ROOTMUTEX();
+	nfsv4_relref(&nfsv4rootfs_lock);
+	do {
+		igotlock = nfsv4_lock(&nfsv4rootfs_lock, 1, NULL,
+		    NFSV4ROOTLOCKMUTEXPTR, NULL);
+	} while (igotlock == 0);
+	NFSUNLOCKV4ROOTMUTEX();
+
+	hp = NFSCLIENTHASH(clientid);
+	LIST_FOREACH(clp, hp, lc_hash) {
+		if (clp->lc_clientid.lval[1] == clientid.lval[1])
+			break;
+	}
+	if (clp == NULL) {
+		NFSLOCKV4ROOTMUTEX();
+		nfsv4_unlock(&nfsv4rootfs_lock, 1);
+		NFSUNLOCKV4ROOTMUTEX();
+		/* Just return ok, since it is gone. */
+		goto out;
+	}
+
+	/* Scan for state on the clientid. */
+	for (i = 0; i < nfsrv_statehashsize; i++)
+		if (!LIST_EMPTY(&clp->lc_stateid[i])) {
+			NFSLOCKV4ROOTMUTEX();
+			nfsv4_unlock(&nfsv4rootfs_lock, 1);
+			NFSUNLOCKV4ROOTMUTEX();
+			error = NFSERR_CLIENTIDBUSY;
+			goto out;
+		}
+	if (!LIST_EMPTY(&clp->lc_session) || !LIST_EMPTY(&clp->lc_deleg)) {
+		NFSLOCKV4ROOTMUTEX();
+		nfsv4_unlock(&nfsv4rootfs_lock, 1);
+		NFSUNLOCKV4ROOTMUTEX();
+		error = NFSERR_CLIENTIDBUSY;
+		goto out;
+	}
+
+	/* Destroy the clientid and return ok. */
+	nfsrv_cleanclient(clp, p);
+	nfsrv_freedeleglist(&clp->lc_deleg);
+	nfsrv_freedeleglist(&clp->lc_olddeleg);
+	LIST_REMOVE(clp, lc_hash);
+	NFSLOCKV4ROOTMUTEX();
+	nfsv4_unlock(&nfsv4rootfs_lock, 1);
+	NFSUNLOCKV4ROOTMUTEX();
+	nfsrv_zapclient(clp, p);
+out:
+	NFSEXITCODE2(error, nd);
+	return (error);
+}
+
+/*
  * Called from the new nfssvc syscall to admin revoke a clientid.
  * Returns 0 for success, error otherwise.
  */
@@ -572,7 +788,7 @@
 	 * Search for a match in the client list.
 	 */
 	gotit = i = 0;
-	while (i < NFSCLIENTHASHSIZE && !gotit) {
+	while (i < nfsrv_clienthashsize && !gotit) {
 	    LIST_FOREACH(clp, &nfsclienthash[i], lc_hash) {
 		if (revokep->nclid_idlen == clp->lc_idlen &&
 		    !NFSBCMP(revokep->nclid_id, clp->lc_id, clp->lc_idlen)) {
@@ -634,7 +850,7 @@
 	/*
 	 * Rattle through the client lists until done.
 	 */
-	while (i < NFSCLIENTHASHSIZE && cnt < maxcnt) {
+	while (i < nfsrv_clienthashsize && cnt < maxcnt) {
 	    clp = LIST_FIRST(&nfsclienthash[i]);
 	    while (clp != LIST_END(&nfsclienthash[i]) && cnt < maxcnt) {
 		nfsrv_dumpaclient(clp, &dumpp[cnt]);
@@ -902,7 +1118,7 @@
 	/*
 	 * For each client...
 	 */
-	for (i = 0; i < NFSCLIENTHASHSIZE; i++) {
+	for (i = 0; i < nfsrv_clienthashsize; i++) {
 	    clp = LIST_FIRST(&nfsclienthash[i]);
 	    while (clp != LIST_END(&nfsclienthash[i])) {
 		nclp = LIST_NEXT(clp, lc_hash);
@@ -913,7 +1129,7 @@
 			     nfsrv_clients > nfsrv_clienthighwater)) ||
 			(clp->lc_expiry + NFSRV_MOULDYLEASE) < NFSD_MONOSEC ||
 			(clp->lc_expiry < NFSD_MONOSEC &&
-			 (nfsrv_openpluslock * 10 / 9) > NFSRV_V4STATELIMIT)) {
+			 (nfsrv_openpluslock * 10 / 9) > nfsrv_v4statelimit)) {
 			/*
 			 * Lease has expired several nfsrv_lease times ago:
 			 * PLUS
@@ -952,7 +1168,7 @@
 					stp->ls_noopens++;
 					if (stp->ls_noopens > NFSNOOPEN ||
 					    (nfsrv_openpluslock * 2) >
-					    NFSRV_V4STATELIMIT)
+					    nfsrv_v4statelimit)
 						nfsrv_stablefirst.nsf_flags |=
 							NFSNSF_NOOPENS;
 				} else {
@@ -983,9 +1199,13 @@
 nfsrv_cleanclient(struct nfsclient *clp, NFSPROC_T *p)
 {
 	struct nfsstate *stp, *nstp;
+	struct nfsdsession *sep, *nsep;
 
 	LIST_FOREACH_SAFE(stp, &clp->lc_open, ls_list, nstp)
 		nfsrv_freeopenowner(stp, 1, p);
+	if ((clp->lc_flags & LCL_ADMINREVOKED) == 0)
+		LIST_FOREACH_SAFE(sep, &clp->lc_session, sess_list, nsep)
+			(void)nfsrv_freesession(sep, NULL);
 }
 
 /*
@@ -1012,7 +1232,8 @@
 	newnfs_disconnect(&clp->lc_req);
 	NFSSOCKADDRFREE(clp->lc_req.nr_nam);
 	NFSFREEMUTEX(&clp->lc_req.nr_mtx);
-	free((caddr_t)clp, M_NFSDCLIENT);
+	free(clp->lc_stateid, M_NFSDCLIENT);
+	free(clp, M_NFSDCLIENT);
 	NFSLOCKSTATE();
 	newnfsstats.srvclients--;
 	nfsrv_openpluslock--;
@@ -1047,6 +1268,8 @@
 	LIST_REMOVE(stp, ls_hash);
 	LIST_REMOVE(stp, ls_list);
 	LIST_REMOVE(stp, ls_file);
+	if ((stp->ls_flags & NFSLCK_DELEGWRITE) != 0)
+		nfsrv_writedelegcnt--;
 	lfp = stp->ls_lfp;
 	if (LIST_EMPTY(&lfp->lf_open) &&
 	    LIST_EMPTY(&lfp->lf_lock) && LIST_EMPTY(&lfp->lf_deleg) &&
@@ -1168,6 +1391,8 @@
 	vnode_t tvp = NULL;
 	uint64_t first, end;
 
+	if (vp != NULL)
+		ASSERT_VOP_UNLOCKED(vp, "nfsrv_freeallnfslocks: vnode locked");
 	lop = LIST_FIRST(&stp->ls_lock);
 	while (lop != LIST_END(&stp->ls_lock)) {
 		nlop = LIST_NEXT(lop, lo_lckowner);
@@ -1187,9 +1412,10 @@
 		if (gottvp == 0) {
 			if (nfsrv_dolocallocks == 0)
 				tvp = NULL;
-			else if (vp == NULL && cansleep != 0)
+			else if (vp == NULL && cansleep != 0) {
 				tvp = nfsvno_getvp(&lfp->lf_fh);
-			else
+				NFSVOPUNLOCK(tvp, 0);
+			} else
 				tvp = vp;
 			gottvp = 1;
 		}
@@ -1210,7 +1436,7 @@
 		lop = nlop;
 	}
 	if (vp == NULL && tvp != NULL)
-		vput(tvp);
+		vrele(tvp);
 }
 
 /*
@@ -1321,7 +1547,7 @@
 	struct nfsclient *clp = NULL;
 	u_int32_t bits;
 	int error = 0, haslock = 0, ret, reterr;
-	int getlckret, delegation = 0, filestruct_locked;
+	int getlckret, delegation = 0, filestruct_locked, vnode_unlocked = 0;
 	fhandle_t nfh;
 	uint64_t first, end;
 	uint32_t lock_flags;
@@ -1355,7 +1581,7 @@
 	 * Check for state resource limit exceeded.
 	 */
 	if ((new_stp->ls_flags & NFSLCK_LOCK) &&
-	    nfsrv_openpluslock > NFSRV_V4STATELIMIT) {
+	    nfsrv_openpluslock > nfsrv_v4statelimit) {
 		error = NFSERR_RESOURCE;
 		goto out;
 	}
@@ -1411,6 +1637,11 @@
 			 * locking rolled back.
 			 */
 			NFSUNLOCKSTATE();
+			if (vnode_unlocked == 0) {
+				ASSERT_VOP_ELOCKED(vp, "nfsrv_lockctrl1");
+				vnode_unlocked = 1;
+				NFSVOPUNLOCK(vp, 0);
+			}
 			reterr = nfsrv_locallock(vp, lfp,
 			    (new_lop->lo_flags & (NFSLCK_READ | NFSLCK_WRITE)),
 			    new_lop->lo_first, new_lop->lo_end, cfp, p);
@@ -1425,8 +1656,8 @@
 		 * lease, but the concensus seems to be that it is ok
 		 * for a server to do so.
 		 */
-		error = nfsrv_getclient(clientid, CLOPS_RENEW, &clp,
-		    (nfsquad_t)((u_quad_t)0), NULL, p);
+		error = nfsrv_getclient(clientid, CLOPS_RENEW, &clp, NULL,
+		    (nfsquad_t)((u_quad_t)0), 0, nd, p);
 
 		/*
 		 * Since NFSERR_EXPIRED, NFSERR_ADMINREVOKED are not valid
@@ -1438,8 +1669,8 @@
 		    error = 0;
 		lckstp = new_stp;
 	    } else {
-	      error = nfsrv_getclient(clientid, CLOPS_RENEW, &clp,
-		(nfsquad_t)((u_quad_t)0), NULL, p);
+	      error = nfsrv_getclient(clientid, CLOPS_RENEW, &clp, NULL,
+		(nfsquad_t)((u_quad_t)0), 0, nd, p);
 	      if (error == 0)
 		/*
 		 * Look up the stateid
@@ -1528,8 +1759,11 @@
 	       * allow for either server configuration.)
 	       */
 	      if (!error && stp->ls_stateid.seqid!=new_stp->ls_stateid.seqid &&
-		  (!(new_stp->ls_flags & NFSLCK_CHECK) ||
-		   nfsrv_returnoldstateid))
+		  (((nd->nd_flag & ND_NFSV41) == 0 &&
+		   (!(new_stp->ls_flags & NFSLCK_CHECK) ||
+		    nfsrv_returnoldstateid)) ||
+		   ((nd->nd_flag & ND_NFSV41) != 0 &&
+		    new_stp->ls_stateid.seqid != 0)))
 		    error = NFSERR_OLDSTATEID;
 	    }
 	}
@@ -1538,7 +1772,7 @@
 	 * Now we can check for grace.
 	 */
 	if (!error)
-		error = nfsrv_checkgrace(new_stp->ls_flags);
+		error = nfsrv_checkgrace(nd, clp, new_stp->ls_flags);
 	if ((new_stp->ls_flags & NFSLCK_RECLAIM) && !error &&
 		nfsrv_checkstable(clp))
 		error = NFSERR_NOGRACE;
@@ -1569,6 +1803,11 @@
 		if (filestruct_locked != 0) {
 			/* Roll back local locks. */
 			NFSUNLOCKSTATE();
+			if (vnode_unlocked == 0) {
+				ASSERT_VOP_ELOCKED(vp, "nfsrv_lockctrl2");
+				vnode_unlocked = 1;
+				NFSVOPUNLOCK(vp, 0);
+			}
 			nfsrv_locallock_rollback(vp, lfp, p);
 			NFSLOCKSTATE();
 			nfsrv_unlocklf(lfp);
@@ -1639,7 +1878,8 @@
 		       mystp->ls_flags & NFSLCK_ACCESSBITS)) ||
 		    ((new_stp->ls_flags & (NFSLCK_CHECK|NFSLCK_READACCESS)) ==
 		      (NFSLCK_CHECK | NFSLCK_READACCESS) &&
-		     !(mystp->ls_flags & NFSLCK_READACCESS)) ||
+		     !(mystp->ls_flags & NFSLCK_READACCESS) &&
+		     nfsrv_allowreadforwriteopen == 0) ||
 		    ((new_stp->ls_flags & (NFSLCK_CHECK|NFSLCK_WRITEACCESS)) ==
 		      (NFSLCK_CHECK | NFSLCK_WRITEACCESS) &&
 		     !(mystp->ls_flags & NFSLCK_WRITEACCESS))) {
@@ -1646,6 +1886,12 @@
 			if (filestruct_locked != 0) {
 				/* Roll back local locks. */
 				NFSUNLOCKSTATE();
+				if (vnode_unlocked == 0) {
+					ASSERT_VOP_ELOCKED(vp,
+					    "nfsrv_lockctrl3");
+					vnode_unlocked = 1;
+					NFSVOPUNLOCK(vp, 0);
+				}
 				nfsrv_locallock_rollback(vp, lfp, p);
 				NFSLOCKSTATE();
 				nfsrv_unlocklf(lfp);
@@ -1665,6 +1911,8 @@
 			bits = tstp->ls_flags;
 			bits >>= NFSLCK_SHIFT;
 			if (new_stp->ls_flags & bits & NFSLCK_ACCESSBITS) {
+			    KASSERT(vnode_unlocked == 0,
+				("nfsrv_lockctrl: vnode unlocked1"));
 			    ret = nfsrv_clientconflict(tstp->ls_clp, &haslock,
 				vp, p);
 			    if (ret == 1) {
@@ -1696,6 +1944,8 @@
 	 * For setattr, just get rid of all the Delegations for other clients.
 	 */
 	if (new_stp->ls_flags & NFSLCK_SETATTR) {
+		KASSERT(vnode_unlocked == 0,
+		    ("nfsrv_lockctrl: vnode unlocked2"));
 		ret = nfsrv_cleandeleg(vp, lfp, clp, &haslock, p);
 		if (ret) {
 			/*
@@ -1746,14 +1996,26 @@
 		   (new_lop->lo_flags & NFSLCK_WRITE) &&
 		  (clp != tstp->ls_clp ||
 		   (tstp->ls_flags & NFSLCK_DELEGREAD)))) {
+		ret = 0;
 		if (filestruct_locked != 0) {
 			/* Roll back local locks. */
 			NFSUNLOCKSTATE();
+			if (vnode_unlocked == 0) {
+				ASSERT_VOP_ELOCKED(vp, "nfsrv_lockctrl4");
+				NFSVOPUNLOCK(vp, 0);
+			}
 			nfsrv_locallock_rollback(vp, lfp, p);
 			NFSLOCKSTATE();
 			nfsrv_unlocklf(lfp);
+			NFSUNLOCKSTATE();
+			NFSVOPLOCK(vp, LK_EXCLUSIVE | LK_RETRY);
+			vnode_unlocked = 0;
+			if ((vp->v_iflag & VI_DOOMED) != 0)
+				ret = NFSERR_SERVERFAULT;
+			NFSLOCKSTATE();
 		}
-		ret = nfsrv_delegconflict(tstp, &haslock, p, vp);
+		if (ret == 0)
+			ret = nfsrv_delegconflict(tstp, &haslock, p, vp);
 		if (ret) {
 		    /*
 		     * nfsrv_delegconflict unlocks state when it
@@ -1785,11 +2047,18 @@
 		end = new_lop->lo_end;
 		nfsrv_updatelock(stp, new_lopp, &other_lop, lfp);
 		stateidp->seqid = ++(stp->ls_stateid.seqid);
+		if ((nd->nd_flag & ND_NFSV41) != 0 && stateidp->seqid == 0)
+			stateidp->seqid = stp->ls_stateid.seqid = 1;
 		stateidp->other[0] = stp->ls_stateid.other[0];
 		stateidp->other[1] = stp->ls_stateid.other[1];
 		stateidp->other[2] = stp->ls_stateid.other[2];
 		if (filestruct_locked != 0) {
 			NFSUNLOCKSTATE();
+			if (vnode_unlocked == 0) {
+				ASSERT_VOP_ELOCKED(vp, "nfsrv_lockctrl5");
+				vnode_unlocked = 1;
+				NFSVOPUNLOCK(vp, 0);
+			}
 			/* Update the local locks. */
 			nfsrv_localunlock(vp, lfp, first, end, p);
 			NFSLOCKSTATE();
@@ -1820,14 +2089,29 @@
 		    FREE((caddr_t)other_lop, M_NFSDLOCK);
 		    other_lop = NULL;
 		}
-		ret = nfsrv_clientconflict(lop->lo_stp->ls_clp,&haslock,vp,p);
+		if (vnode_unlocked != 0)
+		    ret = nfsrv_clientconflict(lop->lo_stp->ls_clp, &haslock,
+			NULL, p);
+		else
+		    ret = nfsrv_clientconflict(lop->lo_stp->ls_clp, &haslock,
+			vp, p);
 		if (ret == 1) {
 		    if (filestruct_locked != 0) {
+			if (vnode_unlocked == 0) {
+				ASSERT_VOP_ELOCKED(vp, "nfsrv_lockctrl6");
+				NFSVOPUNLOCK(vp, 0);
+			}
 			/* Roll back local locks. */
 			nfsrv_locallock_rollback(vp, lfp, p);
 			NFSLOCKSTATE();
 			nfsrv_unlocklf(lfp);
 			NFSUNLOCKSTATE();
+			NFSVOPLOCK(vp, LK_EXCLUSIVE | LK_RETRY);
+			vnode_unlocked = 0;
+			if ((vp->v_iflag & VI_DOOMED) != 0) {
+				error = NFSERR_SERVERFAULT;
+				goto out;
+			}
 		    }
 		    /*
 		     * nfsrv_clientconflict() unlocks state when it
@@ -1861,6 +2145,11 @@
 		if (filestruct_locked != 0 && ret == 0) {
 			/* Roll back local locks. */
 			NFSUNLOCKSTATE();
+			if (vnode_unlocked == 0) {
+				ASSERT_VOP_ELOCKED(vp, "nfsrv_lockctrl7");
+				vnode_unlocked = 1;
+				NFSVOPUNLOCK(vp, 0);
+			}
 			nfsrv_locallock_rollback(vp, lfp, p);
 			NFSLOCKSTATE();
 			nfsrv_unlocklf(lfp);
@@ -1892,6 +2181,8 @@
 	if (!(new_stp->ls_flags & NFSLCK_OPENTOLOCK)) {
 		nfsrv_updatelock(lckstp, new_lopp, &other_lop, lfp);
 		stateidp->seqid = ++(lckstp->ls_stateid.seqid);
+		if ((nd->nd_flag & ND_NFSV41) != 0 && stateidp->seqid == 0)
+			stateidp->seqid = lckstp->ls_stateid.seqid = 1;
 		stateidp->other[0] = lckstp->ls_stateid.other[0];
 		stateidp->other[1] = lckstp->ls_stateid.other[1];
 		stateidp->other[2] = lckstp->ls_stateid.other[2];
@@ -1937,6 +2228,11 @@
 		nfsv4_unlock(&nfsv4rootfs_lock, 1);
 		NFSUNLOCKV4ROOTMUTEX();
 	}
+	if (vnode_unlocked != 0) {
+		NFSVOPLOCK(vp, LK_EXCLUSIVE | LK_RETRY);
+		if (error == 0 && (vp->v_iflag & VI_DOOMED) != 0)
+			error = NFSERR_SERVERFAULT;
+	}
 	if (other_lop)
 		FREE((caddr_t)other_lop, M_NFSDLOCK);
 	NFSEXITCODE2(error, nd);
@@ -1976,7 +2272,7 @@
 	 * returns NFSERR_RESOURCE and the limit is just a rather
 	 * arbitrary high water mark, so no harm is done.
 	 */
-	if (nfsrv_openpluslock > NFSRV_V4STATELIMIT) {
+	if (nfsrv_openpluslock > nfsrv_v4statelimit) {
 		error = NFSERR_RESOURCE;
 		goto out;
 	}
@@ -1985,14 +2281,14 @@
 	MALLOC(new_lfp, struct nfslockfile *, sizeof (struct nfslockfile),
 	    M_NFSDLOCKFILE, M_WAITOK);
 	if (vp)
-		getfhret = nfsrv_getlockfh(vp, new_stp->ls_flags, &new_lfp,
+		getfhret = nfsrv_getlockfh(vp, new_stp->ls_flags, new_lfp,
 		    NULL, p);
 	NFSLOCKSTATE();
 	/*
 	 * Get the nfsclient structure.
 	 */
-	error = nfsrv_getclient(clientid, CLOPS_RENEW, &clp,
-	    (nfsquad_t)((u_quad_t)0), NULL, p);
+	error = nfsrv_getclient(clientid, CLOPS_RENEW, &clp, NULL,
+	    (nfsquad_t)((u_quad_t)0), 0, nd, p);
 
 	/*
 	 * Look up the open owner. See if it needs confirmation and
@@ -2018,7 +2314,7 @@
 	 * Check for grace.
 	 */
 	if (!error)
-		error = nfsrv_checkgrace(new_stp->ls_flags);
+		error = nfsrv_checkgrace(nd, clp, new_stp->ls_flags);
 	if ((new_stp->ls_flags & NFSLCK_RECLAIM) && !error &&
 		nfsrv_checkstable(clp))
 		error = NFSERR_NOGRACE;
@@ -2084,7 +2380,9 @@
 	     */
 	    LIST_FOREACH(stp, &lfp->lf_deleg, ls_file) {
 		if (!(stp->ls_flags & NFSLCK_OLDDELEG) &&
-		    stateidp->seqid == stp->ls_stateid.seqid &&
+		    (((nd->nd_flag & ND_NFSV41) != 0 &&
+		    stateidp->seqid == 0) ||
+		    stateidp->seqid == stp->ls_stateid.seqid) &&
 		    !NFSBCMP(stateidp->other, stp->ls_stateid.other,
 			  NFSX_STATEIDOTHER))
 			break;
@@ -2210,6 +2508,8 @@
 	struct nfsclient *clp;
 	int error = 0, haslock = 0, ret, delegate = 1, writedeleg = 1;
 	int readonly = 0, cbret = 1, getfhret = 0;
+	int gotstate = 0, len = 0;
+	u_char *clidp = NULL;
 
 	if ((new_stp->ls_flags & NFSLCK_SHAREBITS) == NFSLCK_READACCESS)
 		readonly = 1;
@@ -2228,6 +2528,7 @@
 		goto out;
 	}
 
+	clidp = malloc(NFSV4_OPAQUELIMIT, M_TEMP, M_WAITOK);
 tryagain:
 	MALLOC(new_lfp, struct nfslockfile *, sizeof (struct nfslockfile),
 	    M_NFSDLOCKFILE, M_WAITOK);
@@ -2235,7 +2536,7 @@
 	    M_NFSDSTATE, M_WAITOK);
 	MALLOC(new_deleg, struct nfsstate *, sizeof (struct nfsstate),
 	    M_NFSDSTATE, M_WAITOK);
-	getfhret = nfsrv_getlockfh(vp, new_stp->ls_flags, &new_lfp,
+	getfhret = nfsrv_getlockfh(vp, new_stp->ls_flags, new_lfp,
 	    NULL, p);
 	NFSLOCKSTATE();
 	/*
@@ -2252,8 +2553,8 @@
 	 *     storage file must be written prior to completion of state
 	 *     expiration.
 	 */
-	error = nfsrv_getclient(clientid, CLOPS_RENEW, &clp,
-	    (nfsquad_t)((u_quad_t)0), NULL, p);
+	error = nfsrv_getclient(clientid, CLOPS_RENEW, &clp, NULL,
+	    (nfsquad_t)((u_quad_t)0), 0, nd, p);
 	if (!error && (clp->lc_flags & LCL_NEEDSCBNULL) &&
 	    clp->lc_program) {
 		/*
@@ -2341,7 +2642,9 @@
 	     */
 	    LIST_FOREACH(stp, &lfp->lf_deleg, ls_file) {
 		if (!(stp->ls_flags & NFSLCK_OLDDELEG) &&
-		    stateidp->seqid == stp->ls_stateid.seqid &&
+		    (((nd->nd_flag & ND_NFSV41) != 0 &&
+		    stateidp->seqid == 0) ||
+		    stateidp->seqid == stp->ls_stateid.seqid) &&
 		    !NFSBCMP(stateidp->other, stp->ls_stateid.other,
 			NFSX_STATEIDOTHER))
 			break;
@@ -2506,7 +2809,7 @@
 		    LIST_REMOVE(stp, ls_list);
 		    LIST_REMOVE(stp, ls_hash);
 		    stp->ls_flags &= ~NFSLCK_OLDDELEG;
-		    stp->ls_stateid.seqid = delegstateidp->seqid = 0;
+		    stp->ls_stateid.seqid = delegstateidp->seqid = 1;
 		    stp->ls_stateid.other[0] = delegstateidp->other[0] =
 			clp->lc_clientid.lval[0];
 		    stp->ls_stateid.other[1] = delegstateidp->other[1] =
@@ -2527,7 +2830,7 @@
 		    /*
 		     * Now, do the associated open.
 		     */
-		    new_open->ls_stateid.seqid = 0;
+		    new_open->ls_stateid.seqid = 1;
 		    new_open->ls_stateid.other[0] = clp->lc_clientid.lval[0];
 		    new_open->ls_stateid.other[1] = clp->lc_clientid.lval[1];
 		    new_open->ls_stateid.other[2] = nfsrv_nextstateindex(clp);
@@ -2592,7 +2895,7 @@
 		 * First, add the delegation. (Although we must issue the
 		 * delegation, we can also ask for an immediate return.)
 		 */
-		new_deleg->ls_stateid.seqid = delegstateidp->seqid = 0;
+		new_deleg->ls_stateid.seqid = delegstateidp->seqid = 1;
 		new_deleg->ls_stateid.other[0] = delegstateidp->other[0] =
 		    clp->lc_clientid.lval[0];
 		new_deleg->ls_stateid.other[1] = delegstateidp->other[1] =
@@ -2603,6 +2906,7 @@
 		    new_deleg->ls_flags = (NFSLCK_DELEGWRITE |
 			NFSLCK_READACCESS | NFSLCK_WRITEACCESS);
 		    *rflagsp |= NFSV4OPEN_WRITEDELEGATE;
+		    nfsrv_writedelegcnt++;
 		} else {
 		    new_deleg->ls_flags = (NFSLCK_DELEGREAD |
 			NFSLCK_READACCESS);
@@ -2631,7 +2935,7 @@
 		/*
 		 * Now, do the associated open.
 		 */
-		new_open->ls_stateid.seqid = 0;
+		new_open->ls_stateid.seqid = 1;
 		new_open->ls_stateid.other[0] = clp->lc_clientid.lval[0];
 		new_open->ls_stateid.other[1] = clp->lc_clientid.lval[1];
 		new_open->ls_stateid.other[2] = nfsrv_nextstateindex(clp);
@@ -2686,7 +2990,7 @@
 		    stp = LIST_FIRST(&ownerstp->ls_open);
 		    stp->ls_flags = (new_stp->ls_flags & NFSLCK_SHAREBITS) |
 			NFSLCK_OPEN;
-		    stp->ls_stateid.seqid = 0;
+		    stp->ls_stateid.seqid = 1;
 		    stp->ls_uid = new_stp->ls_uid;
 		    if (lfp != stp->ls_lfp) {
 			LIST_REMOVE(stp, ls_file);
@@ -2697,18 +3001,28 @@
 		} else if (openstp) {
 		    openstp->ls_flags |= (new_stp->ls_flags & NFSLCK_SHAREBITS);
 		    openstp->ls_stateid.seqid++;
+		    if ((nd->nd_flag & ND_NFSV41) != 0 &&
+			openstp->ls_stateid.seqid == 0)
+			openstp->ls_stateid.seqid = 1;
 
 		    /*
 		     * This is where we can choose to issue a delegation.
 		     */
-		    if (delegate && nfsrv_issuedelegs &&
-			writedeleg && !NFSVNO_EXRDONLY(exp) &&
-			(nfsrv_writedelegifpos || !readonly) &&
-			(clp->lc_flags & (LCL_CALLBACKSON | LCL_CBDOWN)) ==
-			 LCL_CALLBACKSON &&
-			!NFSRV_V4DELEGLIMIT(nfsrv_delegatecnt) &&
-			NFSVNO_DELEGOK(vp)) {
-			new_deleg->ls_stateid.seqid = delegstateidp->seqid = 0;
+		    if (delegate == 0 || writedeleg == 0 ||
+			NFSVNO_EXRDONLY(exp) || (readonly != 0 &&
+			nfsrv_writedelegifpos == 0) ||
+			!NFSVNO_DELEGOK(vp) ||
+			(new_stp->ls_flags & NFSLCK_WANTRDELEG) != 0 ||
+			(clp->lc_flags & (LCL_CALLBACKSON | LCL_CBDOWN)) !=
+			 LCL_CALLBACKSON)
+			*rflagsp |= NFSV4OPEN_WDCONTENTION;
+		    else if (nfsrv_issuedelegs == 0 ||
+			NFSRV_V4DELEGLIMIT(nfsrv_delegatecnt))
+			*rflagsp |= NFSV4OPEN_WDRESOURCE;
+		    else if ((new_stp->ls_flags & NFSLCK_WANTNODELEG) != 0)
+			*rflagsp |= NFSV4OPEN_WDNOTWANTED;
+		    else {
+			new_deleg->ls_stateid.seqid = delegstateidp->seqid = 1;
 			new_deleg->ls_stateid.other[0] = delegstateidp->other[0]
 			    = clp->lc_clientid.lval[0];
 			new_deleg->ls_stateid.other[1] = delegstateidp->other[1]
@@ -2723,6 +3037,7 @@
 			new_deleg->ls_clp = clp;
 			new_deleg->ls_filerev = filerev;
 			new_deleg->ls_compref = nd->nd_compref;
+			nfsrv_writedelegcnt++;
 			LIST_INSERT_HEAD(&lfp->lf_deleg, new_deleg, ls_file);
 			LIST_INSERT_HEAD(NFSSTATEHASH(clp,
 			    new_deleg->ls_stateid), new_deleg, ls_hash);
@@ -2733,7 +3048,7 @@
 			nfsrv_delegatecnt++;
 		    }
 		} else {
-		    new_open->ls_stateid.seqid = 0;
+		    new_open->ls_stateid.seqid = 1;
 		    new_open->ls_stateid.other[0] = clp->lc_clientid.lval[0];
 		    new_open->ls_stateid.other[1] = clp->lc_clientid.lval[1];
 		    new_open->ls_stateid.other[2] = nfsrv_nextstateindex(clp);
@@ -2756,13 +3071,18 @@
 		    /*
 		     * This is where we can choose to issue a delegation.
 		     */
-		    if (delegate && nfsrv_issuedelegs &&
-			(writedeleg || readonly) &&
-			(clp->lc_flags & (LCL_CALLBACKSON | LCL_CBDOWN)) ==
-			 LCL_CALLBACKSON &&
-			!NFSRV_V4DELEGLIMIT(nfsrv_delegatecnt) &&
-			NFSVNO_DELEGOK(vp)) {
-			new_deleg->ls_stateid.seqid = delegstateidp->seqid = 0;
+		    if (delegate == 0 || (writedeleg == 0 && readonly == 0) ||
+			!NFSVNO_DELEGOK(vp) ||
+			(clp->lc_flags & (LCL_CALLBACKSON | LCL_CBDOWN)) !=
+			 LCL_CALLBACKSON)
+			*rflagsp |= NFSV4OPEN_WDCONTENTION;
+		    else if (nfsrv_issuedelegs == 0 ||
+			NFSRV_V4DELEGLIMIT(nfsrv_delegatecnt))
+			*rflagsp |= NFSV4OPEN_WDRESOURCE;
+		    else if ((new_stp->ls_flags & NFSLCK_WANTNODELEG) != 0)
+			*rflagsp |= NFSV4OPEN_WDNOTWANTED;
+		    else {
+			new_deleg->ls_stateid.seqid = delegstateidp->seqid = 1;
 			new_deleg->ls_stateid.other[0] = delegstateidp->other[0]
 			    = clp->lc_clientid.lval[0];
 			new_deleg->ls_stateid.other[1] = delegstateidp->other[1]
@@ -2770,10 +3090,12 @@
 			new_deleg->ls_stateid.other[2] = delegstateidp->other[2]
 			    = nfsrv_nextstateindex(clp);
 			if (writedeleg && !NFSVNO_EXRDONLY(exp) &&
-			    (nfsrv_writedelegifpos || !readonly)) {
+			    (nfsrv_writedelegifpos || !readonly) &&
+			    (new_stp->ls_flags & NFSLCK_WANTRDELEG) == 0) {
 			    new_deleg->ls_flags = (NFSLCK_DELEGWRITE |
 				NFSLCK_READACCESS | NFSLCK_WRITEACCESS);
 			    *rflagsp |= NFSV4OPEN_WRITEDELEGATE;
+			    nfsrv_writedelegcnt++;
 			} else {
 			    new_deleg->ls_flags = (NFSLCK_DELEGREAD |
 				NFSLCK_READACCESS);
@@ -2800,7 +3122,7 @@
 		 * Needs confirmation (unless a reclaim) and hang the
 		 * new open off it.
 		 */
-		new_open->ls_stateid.seqid = 0;
+		new_open->ls_stateid.seqid = 1;
 		new_open->ls_stateid.other[0] = clp->lc_clientid.lval[0];
 		new_open->ls_stateid.other[1] = clp->lc_clientid.lval[1];
 		new_open->ls_stateid.other[2] = nfsrv_nextstateindex(clp);
@@ -2814,6 +3136,75 @@
 		LIST_INSERT_HEAD(&lfp->lf_open, new_open, ls_file);
 		if (new_stp->ls_flags & NFSLCK_RECLAIM) {
 			new_stp->ls_flags = 0;
+		} else if ((nd->nd_flag & ND_NFSV41) != 0) {
+			/* NFSv4.1 never needs confirmation. */
+			new_stp->ls_flags = 0;
+
+			/*
+			 * This is where we can choose to issue a delegation.
+			 */
+			if (delegate && nfsrv_issuedelegs &&
+			    (writedeleg || readonly) &&
+			    (clp->lc_flags & (LCL_CALLBACKSON | LCL_CBDOWN)) ==
+			     LCL_CALLBACKSON &&
+			    !NFSRV_V4DELEGLIMIT(nfsrv_delegatecnt) &&
+			    NFSVNO_DELEGOK(vp) &&
+			    ((nd->nd_flag & ND_NFSV41) == 0 ||
+			     (new_stp->ls_flags & NFSLCK_WANTNODELEG) == 0)) {
+				new_deleg->ls_stateid.seqid =
+				    delegstateidp->seqid = 1;
+				new_deleg->ls_stateid.other[0] =
+				    delegstateidp->other[0]
+				    = clp->lc_clientid.lval[0];
+				new_deleg->ls_stateid.other[1] =
+				    delegstateidp->other[1]
+				    = clp->lc_clientid.lval[1];
+				new_deleg->ls_stateid.other[2] =
+				    delegstateidp->other[2]
+				    = nfsrv_nextstateindex(clp);
+				if (writedeleg && !NFSVNO_EXRDONLY(exp) &&
+				    (nfsrv_writedelegifpos || !readonly) &&
+				    ((nd->nd_flag & ND_NFSV41) == 0 ||
+				     (new_stp->ls_flags & NFSLCK_WANTRDELEG) ==
+				     0)) {
+					new_deleg->ls_flags =
+					    (NFSLCK_DELEGWRITE |
+					     NFSLCK_READACCESS |
+					     NFSLCK_WRITEACCESS);
+					*rflagsp |= NFSV4OPEN_WRITEDELEGATE;
+					nfsrv_writedelegcnt++;
+				} else {
+					new_deleg->ls_flags =
+					    (NFSLCK_DELEGREAD |
+					     NFSLCK_READACCESS);
+					*rflagsp |= NFSV4OPEN_READDELEGATE;
+				}
+				new_deleg->ls_uid = new_stp->ls_uid;
+				new_deleg->ls_lfp = lfp;
+				new_deleg->ls_clp = clp;
+				new_deleg->ls_filerev = filerev;
+				new_deleg->ls_compref = nd->nd_compref;
+				LIST_INSERT_HEAD(&lfp->lf_deleg, new_deleg,
+				    ls_file);
+				LIST_INSERT_HEAD(NFSSTATEHASH(clp,
+				    new_deleg->ls_stateid), new_deleg, ls_hash);
+				LIST_INSERT_HEAD(&clp->lc_deleg, new_deleg,
+				    ls_list);
+				new_deleg = NULL;
+				newnfsstats.srvdelegates++;
+				nfsrv_openpluslock++;
+				nfsrv_delegatecnt++;
+			}
+			/*
+			 * Since NFSv4.1 never does an OpenConfirm, the first
+			 * open state will be acquired here.
+			 */
+			if (!(clp->lc_flags & LCL_STAMPEDSTABLE)) {
+				clp->lc_flags |= LCL_STAMPEDSTABLE;
+				len = clp->lc_idlen;
+				NFSBCOPY(clp->lc_id, clidp, len);
+				gotstate = 1;
+			}
 		} else {
 			*rflagsp |= NFSV4OPEN_RESULTCONFIRM;
 			new_stp->ls_flags = NFSLCK_NEEDSCONFIRM;
@@ -2850,7 +3241,17 @@
 	if (new_deleg)
 		FREE((caddr_t)new_deleg, M_NFSDSTATE);
 
+	/*
+	 * If the NFSv4.1 client just acquired its first open, write a timestamp
+	 * to the stable storage file.
+	 */
+	if (gotstate != 0) {
+		nfsrv_writestable(clidp, len, NFSNST_NEWSTATE, p);
+		nfsrv_backupstable();
+	}
+
 out:
+	free(clidp, M_TEMP);
 	NFSEXITCODE2(error, nd);
 	return (error);
 }
@@ -2867,7 +3268,7 @@
 	struct nfslockfile *lfp;
 	u_int32_t bits;
 	int error = 0, gotstate = 0, len = 0;
-	u_char client[NFSV4_OPAQUELIMIT];
+	u_char *clidp = NULL;
 
 	/*
 	 * Check for restart conditions (client and server).
@@ -2877,12 +3278,13 @@
 	if (error)
 		goto out;
 
+	clidp = malloc(NFSV4_OPAQUELIMIT, M_TEMP, M_WAITOK);
 	NFSLOCKSTATE();
 	/*
 	 * Get the open structure via clientid and stateid.
 	 */
-	error = nfsrv_getclient(clientid, CLOPS_RENEW, &clp,
-	    (nfsquad_t)((u_quad_t)0), NULL, p);
+	error = nfsrv_getclient(clientid, CLOPS_RENEW, &clp, NULL,
+	    (nfsquad_t)((u_quad_t)0), 0, nd, p);
 	if (!error)
 		error = nfsrv_getstate(clp, &new_stp->ls_stateid,
 		    new_stp->ls_flags, &stp);
@@ -2901,7 +3303,10 @@
 		error = nfsrv_checkseqid(nd, new_stp->ls_seq,
 		    stp->ls_openowner, new_stp->ls_op);
 	if (!error && stp->ls_stateid.seqid != new_stp->ls_stateid.seqid &&
-	    !(new_stp->ls_flags & NFSLCK_CONFIRM))
+	    (((nd->nd_flag & ND_NFSV41) == 0 &&
+	      !(new_stp->ls_flags & NFSLCK_CONFIRM)) ||
+	     ((nd->nd_flag & ND_NFSV41) != 0 &&
+	      new_stp->ls_stateid.seqid != 0)))
 		error = NFSERR_OLDSTATEID;
 	if (!error && vnode_vtype(vp) != VREG) {
 		if (vnode_vtype(vp) == VDIR)
@@ -2929,6 +3334,8 @@
 	 * Set the return stateid.
 	 */
 	stateidp->seqid = stp->ls_stateid.seqid + 1;
+	if ((nd->nd_flag & ND_NFSV41) != 0 && stateidp->seqid == 0)
+		stateidp->seqid = 1;
 	stateidp->other[0] = stp->ls_stateid.other[0];
 	stateidp->other[1] = stp->ls_stateid.other[1];
 	stateidp->other[2] = stp->ls_stateid.other[2];
@@ -2944,10 +3351,13 @@
 			printf("Nfsv4d: stray open confirm\n");
 		stp->ls_openowner->ls_flags = 0;
 		stp->ls_stateid.seqid++;
+		if ((nd->nd_flag & ND_NFSV41) != 0 &&
+		    stp->ls_stateid.seqid == 0)
+			stp->ls_stateid.seqid = 1;
 		if (!(clp->lc_flags & LCL_STAMPEDSTABLE)) {
 			clp->lc_flags |= LCL_STAMPEDSTABLE;
 			len = clp->lc_idlen;
-			NFSBCOPY(clp->lc_id, client, len);
+			NFSBCOPY(clp->lc_id, clidp, len);
 			gotstate = 1;
 		}
 		NFSUNLOCKSTATE();
@@ -2958,11 +3368,14 @@
 			/* Get the lf lock */
 			nfsrv_locklf(lfp);
 			NFSUNLOCKSTATE();
+			ASSERT_VOP_ELOCKED(vp, "nfsrv_openupdate");
+			NFSVOPUNLOCK(vp, 0);
 			if (nfsrv_freeopen(stp, vp, 1, p) == 0) {
 				NFSLOCKSTATE();
 				nfsrv_unlocklf(lfp);
 				NFSUNLOCKSTATE();
 			}
+			NFSVOPLOCK(vp, LK_EXCLUSIVE | LK_RETRY);
 		} else {
 			(void) nfsrv_freeopen(stp, NULL, 0, p);
 			NFSUNLOCKSTATE();
@@ -2980,6 +3393,9 @@
 		}
 		stp->ls_flags = (bits | NFSLCK_OPEN);
 		stp->ls_stateid.seqid++;
+		if ((nd->nd_flag & ND_NFSV41) != 0 &&
+		    stp->ls_stateid.seqid == 0)
+			stp->ls_stateid.seqid = 1;
 		NFSUNLOCKSTATE();
 	}
 
@@ -2988,11 +3404,12 @@
 	 * to the stable storage file.
 	 */
 	if (gotstate != 0) {
-		nfsrv_writestable(client, len, NFSNST_NEWSTATE, p);
+		nfsrv_writestable(clidp, len, NFSNST_NEWSTATE, p);
 		nfsrv_backupstable();
 	}
 
 out:
+	free(clidp, M_TEMP);
 	NFSEXITCODE2(error, nd);
 	return (error);
 }
@@ -3001,8 +3418,9 @@
  * Delegation update. Does the purge and return.
  */
 APPLESTATIC int
-nfsrv_delegupdate(nfsquad_t clientid, nfsv4stateid_t *stateidp,
-    vnode_t vp, int op, struct ucred *cred, NFSPROC_T *p)
+nfsrv_delegupdate(struct nfsrv_descript *nd, nfsquad_t clientid,
+    nfsv4stateid_t *stateidp, vnode_t vp, int op, struct ucred *cred,
+    NFSPROC_T *p)
 {
 	struct nfsstate *stp;
 	struct nfsclient *clp;
@@ -3032,8 +3450,8 @@
 	 * Get the open structure via clientid and stateid.
 	 */
 	if (!error)
-	    error = nfsrv_getclient(clientid, CLOPS_RENEW, &clp,
-		(nfsquad_t)((u_quad_t)0), NULL, p);
+	    error = nfsrv_getclient(clientid, CLOPS_RENEW, &clp, NULL,
+		(nfsquad_t)((u_quad_t)0), 0, nd, p);
 	if (error) {
 		if (error == NFSERR_CBPATHDOWN)
 			error = 0;
@@ -3042,7 +3460,8 @@
 	}
 	if (!error && op == NFSV4OP_DELEGRETURN) {
 	    error = nfsrv_getstate(clp, stateidp, NFSLCK_DELEGRETURN, &stp);
-	    if (!error && stp->ls_stateid.seqid != stateidp->seqid)
+	    if (!error && stp->ls_stateid.seqid != stateidp->seqid &&
+		((nd->nd_flag & ND_NFSV41) == 0 || stateidp->seqid != 0))
 		error = NFSERR_OLDSTATEID;
 	}
 	/*
@@ -3101,8 +3520,8 @@
 	/*
 	 * Get the lock owner by name.
 	 */
-	error = nfsrv_getclient(clientid, CLOPS_RENEW, &clp,
-	    (nfsquad_t)((u_quad_t)0), NULL, p);
+	error = nfsrv_getclient(clientid, CLOPS_RENEW, &clp, NULL,
+	    (nfsquad_t)((u_quad_t)0), 0, NULL, p);
 	if (error) {
 		NFSUNLOCKSTATE();
 		goto out;
@@ -3142,11 +3561,10 @@
  * Get the file handle for a lock structure.
  */
 static int
-nfsrv_getlockfh(vnode_t vp, u_short flags,
-    struct nfslockfile **new_lfpp, fhandle_t *nfhp, NFSPROC_T *p)
+nfsrv_getlockfh(vnode_t vp, u_short flags, struct nfslockfile *new_lfp,
+    fhandle_t *nfhp, NFSPROC_T *p)
 {
 	fhandle_t *fhp = NULL;
-	struct nfslockfile *new_lfp;
 	int error;
 
 	/*
@@ -3154,7 +3572,7 @@
 	 * a fhandle_t on the stack.
 	 */
 	if (flags & NFSLCK_OPEN) {
-		new_lfp = *new_lfpp;
+		KASSERT(new_lfp != NULL, ("nfsrv_getlockfh: new_lfp NULL"));
 		fhp = &new_lfp->lf_fh;
 	} else if (nfhp) {
 		fhp = nfhp;
@@ -3420,6 +3838,9 @@
 {
 	int error = 0;
 
+	if ((nd->nd_flag & ND_NFSV41) != 0)
+		/* NFSv4.1 ignores the open_seqid and lock_seqid. */
+		goto out;
 	if (op != nd->nd_rp)
 		panic("nfsrvstate checkseqid");
 	if (!(op->rc_flag & RC_INPROG))
@@ -3475,11 +3896,11 @@
 	u_char protocol[5], addr[24];
 	int error = 0, cantparse = 0;
 	union {
-		u_long ival;
+		in_addr_t ival;
 		u_char cval[4];
 	} ip;
 	union {
-		u_short sval;
+		in_port_t sval;
 		u_char cval[2];
 	} port;
 
@@ -3573,8 +3994,10 @@
 	}
 	if (cantparse) {
 		sad = NFSSOCKADDR(nd->nd_nam, struct sockaddr_in *);
-		rad->sin_addr.s_addr = sad->sin_addr.s_addr;
-		rad->sin_port = 0x0;
+		if (sad->sin_family == AF_INET) {
+			rad->sin_addr.s_addr = sad->sin_addr.s_addr;
+			rad->sin_port = 0x0;
+		}
 		clp->lc_program = 0;
 	}
 nfsmout:
@@ -3638,7 +4061,7 @@
 		goto out;
 
 	NFSLOCKSTATE();
-	ret = nfsrv_checkgrace(flags);
+	ret = nfsrv_checkgrace(NULL, NULL, flags);
 	NFSUNLOCKSTATE();
 
 out:
@@ -3650,11 +4073,12 @@
  * Check for grace.
  */
 static int
-nfsrv_checkgrace(u_int32_t flags)
+nfsrv_checkgrace(struct nfsrv_descript *nd, struct nfsclient *clp,
+    u_int32_t flags)
 {
 	int error = 0;
 
-	if (nfsrv_stablefirst.nsf_flags & NFSNSF_GRACEOVER) {
+	if ((nfsrv_stablefirst.nsf_flags & NFSNSF_GRACEOVER) != 0) {
 		if (flags & NFSLCK_RECLAIM) {
 			error = NFSERR_NOGRACE;
 			goto out;
@@ -3664,6 +4088,12 @@
 			error = NFSERR_GRACE;
 			goto out;
 		}
+		if (nd != NULL && clp != NULL &&
+		    (nd->nd_flag & ND_NFSV41) != 0 &&
+		    (clp->lc_flags & LCL_RECLAIMCOMPLETE) != 0) {
+			error = NFSERR_NOGRACE;
+			goto out;
+		}
 
 		/*
 		 * If grace is almost over and we are still getting Reclaims,
@@ -3694,6 +4124,7 @@
 	struct ucred *cred;
 	int error = 0;
 	u_int32_t callback;
+	struct nfsdsession *sep = NULL;
 
 	cred = newnfs_getcred();
 	NFSLOCKSTATE();	/* mostly for lc_cbref++ */
@@ -3708,7 +4139,12 @@
 	 * structure for newnfs_connect() to use.
 	 */
 	clp->lc_req.nr_prog = clp->lc_program;
-	clp->lc_req.nr_vers = NFSV4_CBVERS;
+#ifdef notnow
+	if ((clp->lc_flags & LCL_NFSV41) != 0)
+		clp->lc_req.nr_vers = NFSV41_CBVERS;
+	else
+#endif
+		clp->lc_req.nr_vers = NFSV4_CBVERS;
 
 	/*
 	 * First, fill in some of the fields of nd and cr.
@@ -3716,6 +4152,8 @@
 	nd->nd_flag = ND_NFSV4;
 	if (clp->lc_flags & LCL_GSS)
 		nd->nd_flag |= ND_KERBV;
+	if ((clp->lc_flags & LCL_NFSV41) != 0)
+		nd->nd_flag |= ND_NFSV41;
 	nd->nd_repstat = 0;
 	cred->cr_uid = clp->lc_uid;
 	cred->cr_gid = clp->lc_gid;
@@ -3726,7 +4164,7 @@
 	/*
 	 * Get the first mbuf for the request.
 	 */
-	MGET(m, M_WAIT, MT_DATA);
+	MGET(m, M_WAITOK, MT_DATA);
 	mbuf_setlen(m, 0);
 	nd->nd_mreq = nd->nd_mb = m;
 	nd->nd_bpos = NFSMTOD(m, caddr_t);
@@ -3736,22 +4174,23 @@
 	 */
 	if (procnum == NFSV4OP_CBGETATTR) {
 		nd->nd_procnum = NFSV4PROC_CBCOMPOUND;
-		(void) nfsm_strtom(nd, "CB Getattr", 10);
-		NFSM_BUILD(tl, u_int32_t *, 4 * NFSX_UNSIGNED);
-		*tl++ = txdr_unsigned(NFSV4_MINORVERSION);
-		*tl++ = txdr_unsigned(callback);
-		*tl++ = txdr_unsigned(1);
-		*tl = txdr_unsigned(NFSV4OP_CBGETATTR);
-		(void) nfsm_fhtom(nd, (u_int8_t *)fhp, NFSX_MYFH, 0);
-		(void) nfsrv_putattrbit(nd, attrbitp);
+		error = nfsrv_cbcallargs(nd, clp, callback, NFSV4OP_CBGETATTR,
+		    "CB Getattr", &sep);
+		if (error != 0) {
+			mbuf_freem(nd->nd_mreq);
+			goto errout;
+		}
+		(void)nfsm_fhtom(nd, (u_int8_t *)fhp, NFSX_MYFH, 0);
+		(void)nfsrv_putattrbit(nd, attrbitp);
 	} else if (procnum == NFSV4OP_CBRECALL) {
 		nd->nd_procnum = NFSV4PROC_CBCOMPOUND;
-		(void) nfsm_strtom(nd, "CB Recall", 9);
-		NFSM_BUILD(tl, u_int32_t *, 5 * NFSX_UNSIGNED + NFSX_STATEID);
-		*tl++ = txdr_unsigned(NFSV4_MINORVERSION);
-		*tl++ = txdr_unsigned(callback);
-		*tl++ = txdr_unsigned(1);
-		*tl++ = txdr_unsigned(NFSV4OP_CBRECALL);
+		error = nfsrv_cbcallargs(nd, clp, callback, NFSV4OP_CBRECALL,
+		    "CB Recall", &sep);
+		if (error != 0) {
+			mbuf_freem(nd->nd_mreq);
+			goto errout;
+		}
+		NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED + NFSX_STATEID);
 		*tl++ = txdr_unsigned(stateidp->seqid);
 		NFSBCOPY((caddr_t)stateidp->other, (caddr_t)tl,
 		    NFSX_STATEIDOTHER);
@@ -3760,9 +4199,20 @@
 			*tl = newnfs_true;
 		else
 			*tl = newnfs_false;
-		(void) nfsm_fhtom(nd, (u_int8_t *)fhp, NFSX_MYFH, 0);
+		(void)nfsm_fhtom(nd, (u_int8_t *)fhp, NFSX_MYFH, 0);
+	} else if (procnum == NFSV4PROC_CBNULL) {
+		nd->nd_procnum = NFSV4PROC_CBNULL;
+		if ((clp->lc_flags & LCL_NFSV41) != 0) {
+			error = nfsv4_getcbsession(clp, &sep);
+			if (error != 0) {
+				mbuf_freem(nd->nd_mreq);
+				goto errout;
+			}
+		}
 	} else {
-		nd->nd_procnum = NFSV4PROC_CBNULL;
+		error = NFSERR_SERVERFAULT;
+		mbuf_freem(nd->nd_mreq);
+		goto errout;
 	}
 
 	/*
@@ -3770,7 +4220,9 @@
 	 */
 	(void) newnfs_sndlock(&clp->lc_req.nr_lock);
 	if (clp->lc_req.nr_client == NULL) {
-		if (nd->nd_procnum == NFSV4PROC_CBNULL)
+		if ((clp->lc_flags & LCL_NFSV41) != 0)
+			error = ECONNREFUSED;
+		else if (nd->nd_procnum == NFSV4PROC_CBNULL)
 			error = newnfs_connect(NULL, &clp->lc_req, cred,
 			    NULL, 1);
 		else
@@ -3779,9 +4231,32 @@
 	}
 	newnfs_sndunlock(&clp->lc_req.nr_lock);
 	if (!error) {
-		error = newnfs_request(nd, NULL, clp, &clp->lc_req, NULL,
-		    NULL, cred, clp->lc_program, NFSV4_CBVERS, NULL, 1, NULL);
+		if ((nd->nd_flag & ND_NFSV41) != 0) {
+			KASSERT(sep != NULL, ("sep NULL"));
+			if (sep->sess_cbsess.nfsess_xprt != NULL)
+				error = newnfs_request(nd, NULL, clp,
+				    &clp->lc_req, NULL, NULL, cred,
+				    clp->lc_program, clp->lc_req.nr_vers, NULL,
+				    1, NULL, &sep->sess_cbsess);
+			else {
+				/*
+				 * This should probably never occur, but if a
+				 * client somehow does an RPC without a
+				 * SequenceID Op that causes a callback just
+				 * after the nfsd threads have been terminated
+				 * and restared we could conceivably get here
+				 * without a backchannel xprt.
+				 */
+				printf("nfsrv_docallback: no xprt\n");
+				error = ECONNREFUSED;
+			}
+			nfsrv_freesession(sep, NULL);
+		} else
+			error = newnfs_request(nd, NULL, clp, &clp->lc_req,
+			    NULL, NULL, cred, clp->lc_program,
+			    clp->lc_req.nr_vers, NULL, 1, NULL, NULL);
 	}
+errout:
 	NFSFREECRED(cred);
 
 	/*
@@ -3811,7 +4286,7 @@
 		NFSUNLOCKSTATE();
 		if (nd->nd_repstat)
 			error = nd->nd_repstat;
-		else if (procnum == NFSV4OP_CBGETATTR)
+		else if (error == 0 && procnum == NFSV4OP_CBGETATTR)
 			error = nfsv4_loadattr(nd, NULL, nap, NULL, NULL, 0,
 			    NULL, NULL, NULL, NULL, NULL, 0, NULL, NULL, NULL,
 			    p, NULL);
@@ -3830,6 +4305,38 @@
 }
 
 /*
+ * Set up the compound RPC for the callback.
+ */
+static int
+nfsrv_cbcallargs(struct nfsrv_descript *nd, struct nfsclient *clp,
+    uint32_t callback, int op, const char *optag, struct nfsdsession **sepp)
+{
+	uint32_t *tl;
+	int error, len;
+
+	len = strlen(optag);
+	(void)nfsm_strtom(nd, optag, len);
+	NFSM_BUILD(tl, uint32_t *, 4 * NFSX_UNSIGNED);
+	if ((nd->nd_flag & ND_NFSV41) != 0) {
+		*tl++ = txdr_unsigned(NFSV41_MINORVERSION);
+		*tl++ = txdr_unsigned(callback);
+		*tl++ = txdr_unsigned(2);
+		*tl = txdr_unsigned(NFSV4OP_CBSEQUENCE);
+		error = nfsv4_setcbsequence(nd, clp, 1, sepp);
+		if (error != 0)
+			return (error);
+		NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
+		*tl = txdr_unsigned(op);
+	} else {
+		*tl++ = txdr_unsigned(NFSV4_MINORVERSION);
+		*tl++ = txdr_unsigned(callback);
+		*tl++ = txdr_unsigned(1);
+		*tl = txdr_unsigned(op);
+	}
+	return (0);
+}
+
+/*
  * Return the next index# for a clientid. Mostly just increment and return
  * the next one, but... if the 32bit unsigned does actually wrap around,
  * it should be rebooted.
@@ -3875,7 +4382,7 @@
 	 */
 	min_index = 0;
 	max_index = 0xffffffff;
-	for (i = 0; i < NFSSTATEHASHSIZE; i++) {
+	for (i = 0; i < nfsrv_statehashsize; i++) {
 	    LIST_FOREACH(stp, &clp->lc_stateid[i], ls_hash) {
 		if (stp->ls_stateid.other[2] > 0x80000000) {
 		    if (stp->ls_stateid.other[2] < max_index)
@@ -3899,7 +4406,7 @@
 	     *  cleanest way to code the loop.)
 	     */
 tryagain:
-	    for (i = 0; i < NFSSTATEHASHSIZE; i++) {
+	    for (i = 0; i < nfsrv_statehashsize; i++) {
 		LIST_FOREACH(stp, &clp->lc_stateid[i], ls_hash) {
 		    if (stp->ls_stateid.other[2] == canuse) {
 			canuse++;
@@ -4272,7 +4779,7 @@
 nfsrv_clientconflict(struct nfsclient *clp, int *haslockp, vnode_t vp,
     NFSPROC_T *p)
 {
-	int gotlock, lktype;
+	int gotlock, lktype = 0;
 
 	/*
 	 * If lease hasn't expired, we can't fix it.
@@ -4282,8 +4789,10 @@
 		return (0);
 	if (*haslockp == 0) {
 		NFSUNLOCKSTATE();
-		lktype = NFSVOPISLOCKED(vp);
-		NFSVOPUNLOCK(vp, 0);
+		if (vp != NULL) {
+			lktype = NFSVOPISLOCKED(vp);
+			NFSVOPUNLOCK(vp, 0);
+		}
 		NFSLOCKV4ROOTMUTEX();
 		nfsv4_relref(&nfsv4rootfs_lock);
 		do {
@@ -4292,11 +4801,12 @@
 		} while (!gotlock);
 		NFSUNLOCKV4ROOTMUTEX();
 		*haslockp = 1;
-		NFSVOPLOCK(vp, lktype | LK_RETRY);
-		if ((vp->v_iflag & VI_DOOMED) != 0)
-			return (2);
-		else
-			return (1);
+		if (vp != NULL) {
+			NFSVOPLOCK(vp, lktype | LK_RETRY);
+			if ((vp->v_iflag & VI_DOOMED) != 0)
+				return (2);
+		}
+		return (1);
 	}
 	NFSUNLOCKSTATE();
 
@@ -4337,7 +4847,7 @@
     vnode_t vp)
 {
 	struct nfsclient *clp = stp->ls_clp;
-	int gotlock, error, lktype, retrycnt, zapped_clp;
+	int gotlock, error, lktype = 0, retrycnt, zapped_clp;
 	nfsv4stateid_t tstateid;
 	fhandle_t tfh;
 
@@ -4454,8 +4964,10 @@
 	 */
 	if (*haslockp == 0) {
 		NFSUNLOCKSTATE();
-		lktype = NFSVOPISLOCKED(vp);
-		NFSVOPUNLOCK(vp, 0);
+		if (vp != NULL) {
+			lktype = NFSVOPISLOCKED(vp);
+			NFSVOPUNLOCK(vp, 0);
+		}
 		NFSLOCKV4ROOTMUTEX();
 		nfsv4_relref(&nfsv4rootfs_lock);
 		do {
@@ -4464,14 +4976,16 @@
 		} while (!gotlock);
 		NFSUNLOCKV4ROOTMUTEX();
 		*haslockp = 1;
-		NFSVOPLOCK(vp, lktype | LK_RETRY);
-		if ((vp->v_iflag & VI_DOOMED) != 0) {
-			*haslockp = 0;
-			NFSLOCKV4ROOTMUTEX();
-			nfsv4_unlock(&nfsv4rootfs_lock, 1);
-			NFSUNLOCKV4ROOTMUTEX();
-			error = NFSERR_PERM;
-			goto out;
+		if (vp != NULL) {
+			NFSVOPLOCK(vp, lktype | LK_RETRY);
+			if ((vp->v_iflag & VI_DOOMED) != 0) {
+				*haslockp = 0;
+				NFSLOCKV4ROOTMUTEX();
+				nfsv4_unlock(&nfsv4rootfs_lock, 1);
+				NFSUNLOCKV4ROOTMUTEX();
+				error = NFSERR_PERM;
+				goto out;
+			}
 		}
 		error = -1;
 		goto out;
@@ -4784,6 +5298,8 @@
 	NFSCBGETATTR_ATTRBIT(attrbitp, &cbbits);
 	if (!NFSNONZERO_ATTRBIT(&cbbits))
 		goto out;
+	if (nfsrv_writedelegcnt == 0)
+		goto out;
 
 	/*
 	 * Get the lock file structure.
@@ -4852,15 +5368,15 @@
 			    nva.na_filerev > delegfilerev) ||
 			    (NFSVNO_ISSETSIZE(&nva) &&
 			     nva.na_size != nvap->na_size)) {
-				nfsvno_updfilerev(vp, nvap, cred, p);
+				error = nfsvno_updfilerev(vp, nvap, cred, p);
 				if (NFSVNO_ISSETSIZE(&nva))
 					nvap->na_size = nva.na_size;
 			}
-		}
+		} else
+			error = 0;	/* Ignore callback errors for now. */
 	} else {
 		NFSUNLOCKSTATE();
 	}
-	error = 0;
 
 out:
 	NFSEXITCODE2(error, nd);
@@ -4884,13 +5400,13 @@
 	/*
 	 * For each client...
 	 */
-	for (i = 0; i < NFSCLIENTHASHSIZE; i++) {
+	for (i = 0; i < nfsrv_clienthashsize; i++) {
 	    LIST_FOREACH_SAFE(clp, &nfsclienthash[i], lc_hash, nclp) {
 		LIST_FOREACH_SAFE(stp, &clp->lc_open, ls_list, nstp) {
 			if (LIST_EMPTY(&stp->ls_open) &&
 			    (stp->ls_noopens > NFSNOOPEN ||
 			     (nfsrv_openpluslock * 2) >
-			     NFSRV_V4STATELIMIT))
+			     nfsrv_v4statelimit))
 				nfsrv_freeopenowner(stp, 0, p);
 		}
 	    }
@@ -5261,11 +5777,12 @@
 	/*
 	 * For each client, clean out the state and then free the structure.
 	 */
-	for (i = 0; i < NFSCLIENTHASHSIZE; i++) {
+	for (i = 0; i < nfsrv_clienthashsize; i++) {
 		LIST_FOREACH_SAFE(clp, &nfsclienthash[i], lc_hash, nclp) {
 			nfsrv_cleanclient(clp, p);
 			nfsrv_freedeleglist(&clp->lc_deleg);
 			nfsrv_freedeleglist(&clp->lc_olddeleg);
+			free(clp->lc_stateid, M_NFSDCLIENT);
 			free(clp, M_NFSDCLIENT);
 		}
 	}
@@ -5273,7 +5790,7 @@
 	/*
 	 * Also, free up any remaining lock file structures.
 	 */
-	for (i = 0; i < NFSLOCKHASHSIZE; i++) {
+	for (i = 0; i < nfsrv_lockhashsize; i++) {
 		LIST_FOREACH_SAFE(lfp, &nfslockhash[i], lf_hash, nlfp) {
 			printf("nfsd unload: fnd a lock file struct\n");
 			nfsrv_freenfslockfile(lfp);
@@ -5281,3 +5798,335 @@
 	}
 }
 
+/*
+ * Check the sequence# for the session and slot provided as an argument.
+ * Also, renew the lease if the session will return NFS_OK.
+ */
+int
+nfsrv_checksequence(struct nfsrv_descript *nd, uint32_t sequenceid,
+    uint32_t *highest_slotidp, uint32_t *target_highest_slotidp, int cache_this,
+    uint32_t *sflagsp, NFSPROC_T *p)
+{
+	struct nfsdsession *sep;
+	struct nfssessionhash *shp;
+	int error;
+	SVCXPRT *savxprt;
+
+	shp = NFSSESSIONHASH(nd->nd_sessionid);
+	NFSLOCKSESSION(shp);
+	sep = nfsrv_findsession(nd->nd_sessionid);
+	if (sep == NULL) {
+		NFSUNLOCKSESSION(shp);
+		return (NFSERR_BADSESSION);
+	}
+	error = nfsv4_seqsession(sequenceid, nd->nd_slotid, *highest_slotidp,
+	    sep->sess_slots, NULL, NFSV4_SLOTS - 1);
+	if (error != 0) {
+		NFSUNLOCKSESSION(shp);
+		return (error);
+	}
+	if (cache_this != 0)
+		nd->nd_flag |= ND_SAVEREPLY;
+	/* Renew the lease. */
+	sep->sess_clp->lc_expiry = nfsrv_leaseexpiry();
+	nd->nd_clientid.qval = sep->sess_clp->lc_clientid.qval;
+	nd->nd_flag |= ND_IMPLIEDCLID;
+
+	/*
+	 * If this session handles the backchannel, save the nd_xprt for this
+	 * RPC, since this is the one being used.
+	 */
+	if (sep->sess_clp->lc_req.nr_client != NULL &&
+	    (sep->sess_crflags & NFSV4CRSESS_CONNBACKCHAN) != 0) {
+		savxprt = sep->sess_cbsess.nfsess_xprt;
+		SVC_ACQUIRE(nd->nd_xprt);
+		nd->nd_xprt->xp_p2 =
+		    sep->sess_clp->lc_req.nr_client->cl_private;
+		nd->nd_xprt->xp_idletimeout = 0;	/* Disable timeout. */
+		sep->sess_cbsess.nfsess_xprt = nd->nd_xprt;
+		if (savxprt != NULL)
+			SVC_RELEASE(savxprt);
+	}
+
+	*sflagsp = 0;
+	if (sep->sess_clp->lc_req.nr_client == NULL)
+		*sflagsp |= NFSV4SEQ_CBPATHDOWN;
+	NFSUNLOCKSESSION(shp);
+	if (error == NFSERR_EXPIRED) {
+		*sflagsp |= NFSV4SEQ_EXPIREDALLSTATEREVOKED;
+		error = 0;
+	} else if (error == NFSERR_ADMINREVOKED) {
+		*sflagsp |= NFSV4SEQ_ADMINSTATEREVOKED;
+		error = 0;
+	}
+	*highest_slotidp = *target_highest_slotidp = NFSV4_SLOTS - 1;
+	return (0);
+}
+
+/*
+ * Check/set reclaim complete for this session/clientid.
+ */
+int
+nfsrv_checkreclaimcomplete(struct nfsrv_descript *nd)
+{
+	struct nfsdsession *sep;
+	struct nfssessionhash *shp;
+	int error = 0;
+
+	shp = NFSSESSIONHASH(nd->nd_sessionid);
+	NFSLOCKSTATE();
+	NFSLOCKSESSION(shp);
+	sep = nfsrv_findsession(nd->nd_sessionid);
+	if (sep == NULL) {
+		NFSUNLOCKSESSION(shp);
+		NFSUNLOCKSTATE();
+		return (NFSERR_BADSESSION);
+	}
+
+	/* Check to see if reclaim complete has already happened. */
+	if ((sep->sess_clp->lc_flags & LCL_RECLAIMCOMPLETE) != 0)
+		error = NFSERR_COMPLETEALREADY;
+	else
+		sep->sess_clp->lc_flags |= LCL_RECLAIMCOMPLETE;
+	NFSUNLOCKSESSION(shp);
+	NFSUNLOCKSTATE();
+	return (error);
+}
+
+/*
+ * Cache the reply in a session slot.
+ */
+void
+nfsrv_cache_session(uint8_t *sessionid, uint32_t slotid, int repstat,
+   struct mbuf **m)
+{
+	struct nfsdsession *sep;
+	struct nfssessionhash *shp;
+
+	shp = NFSSESSIONHASH(sessionid);
+	NFSLOCKSESSION(shp);
+	sep = nfsrv_findsession(sessionid);
+	if (sep == NULL) {
+		NFSUNLOCKSESSION(shp);
+		printf("nfsrv_cache_session: no session\n");
+		m_freem(*m);
+		return;
+	}
+	nfsv4_seqsess_cacherep(slotid, sep->sess_slots, repstat, m);
+	NFSUNLOCKSESSION(shp);
+}
+
+/*
+ * Search for a session that matches the sessionid.
+ */
+static struct nfsdsession *
+nfsrv_findsession(uint8_t *sessionid)
+{
+	struct nfsdsession *sep;
+	struct nfssessionhash *shp;
+
+	shp = NFSSESSIONHASH(sessionid);
+	LIST_FOREACH(sep, &shp->list, sess_hash) {
+		if (!NFSBCMP(sessionid, sep->sess_sessionid, NFSX_V4SESSIONID))
+			break;
+	}
+	return (sep);
+}
+
+/*
+ * Destroy a session.
+ */
+int
+nfsrv_destroysession(struct nfsrv_descript *nd, uint8_t *sessionid)
+{
+	int error, samesess;
+
+	samesess = 0;
+	if (!NFSBCMP(sessionid, nd->nd_sessionid, NFSX_V4SESSIONID)) {
+		samesess = 1;
+		if ((nd->nd_flag & ND_LASTOP) == 0)
+			return (NFSERR_BADSESSION);
+	}
+	error = nfsrv_freesession(NULL, sessionid);
+	if (error == 0 && samesess != 0)
+		nd->nd_flag &= ~ND_HASSEQUENCE;
+	return (error);
+}
+
+/*
+ * Free up a session structure.
+ */
+static int
+nfsrv_freesession(struct nfsdsession *sep, uint8_t *sessionid)
+{
+	struct nfssessionhash *shp;
+	int i;
+
+	NFSLOCKSTATE();
+	if (sep == NULL) {
+		shp = NFSSESSIONHASH(sessionid);
+		NFSLOCKSESSION(shp);
+		sep = nfsrv_findsession(sessionid);
+	} else {
+		shp = NFSSESSIONHASH(sep->sess_sessionid);
+		NFSLOCKSESSION(shp);
+	}
+	if (sep != NULL) {
+		sep->sess_refcnt--;
+		if (sep->sess_refcnt > 0) {
+			NFSUNLOCKSESSION(shp);
+			NFSUNLOCKSTATE();
+			return (0);
+		}
+		LIST_REMOVE(sep, sess_hash);
+		LIST_REMOVE(sep, sess_list);
+	}
+	NFSUNLOCKSESSION(shp);
+	NFSUNLOCKSTATE();
+	if (sep == NULL)
+		return (NFSERR_BADSESSION);
+	for (i = 0; i < NFSV4_SLOTS; i++)
+		if (sep->sess_slots[i].nfssl_reply != NULL)
+			m_freem(sep->sess_slots[i].nfssl_reply);
+	if (sep->sess_cbsess.nfsess_xprt != NULL)
+		SVC_RELEASE(sep->sess_cbsess.nfsess_xprt);
+	free(sep, M_NFSDSESSION);
+	return (0);
+}
+
+/*
+ * Free a stateid.
+ * RFC5661 says that it should fail when there are associated opens, locks
+ * or delegations. Since stateids represent opens, I don't see how you can
+ * free an open stateid (it will be free'd when closed), so this function
+ * only works for lock stateids (freeing the lock_owner) or delegations.
+ */
+int
+nfsrv_freestateid(struct nfsrv_descript *nd, nfsv4stateid_t *stateidp,
+    NFSPROC_T *p)
+{
+	struct nfsclient *clp;
+	struct nfsstate *stp;
+	int error;
+
+	NFSLOCKSTATE();
+	/*
+	 * Look up the stateid
+	 */
+	error = nfsrv_getclient((nfsquad_t)((u_quad_t)0), CLOPS_RENEW, &clp,
+	    NULL, (nfsquad_t)((u_quad_t)0), 0, nd, p);
+	if (error == 0) {
+		/* First, check for a delegation. */
+		LIST_FOREACH(stp, &clp->lc_deleg, ls_list) {
+			if (!NFSBCMP(stp->ls_stateid.other, stateidp->other,
+			    NFSX_STATEIDOTHER))
+				break;
+		}
+		if (stp != NULL) {
+			nfsrv_freedeleg(stp);
+			NFSUNLOCKSTATE();
+			return (error);
+		}
+	}
+	/* Not a delegation, try for a lock_owner. */
+	if (error == 0)
+		error = nfsrv_getstate(clp, stateidp, 0, &stp);
+	if (error == 0 && ((stp->ls_flags & (NFSLCK_OPEN | NFSLCK_DELEGREAD |
+	    NFSLCK_DELEGWRITE)) != 0 || (stp->ls_flags & NFSLCK_LOCK) == 0))
+		/* Not a lock_owner stateid. */
+		error = NFSERR_LOCKSHELD;
+	if (error == 0 && !LIST_EMPTY(&stp->ls_lock))
+		error = NFSERR_LOCKSHELD;
+	if (error == 0)
+		nfsrv_freelockowner(stp, NULL, 0, p);
+	NFSUNLOCKSTATE();
+	return (error);
+}
+
+/*
+ * Generate the xdr for an NFSv4.1 CBSequence Operation.
+ */
+static int
+nfsv4_setcbsequence(struct nfsrv_descript *nd, struct nfsclient *clp,
+    int dont_replycache, struct nfsdsession **sepp)
+{
+	struct nfsdsession *sep;
+	uint32_t *tl, slotseq = 0;
+	int maxslot, slotpos;
+	uint8_t sessionid[NFSX_V4SESSIONID];
+	int error;
+
+	error = nfsv4_getcbsession(clp, sepp);
+	if (error != 0)
+		return (error);
+	sep = *sepp;
+	(void)nfsv4_sequencelookup(NULL, &sep->sess_cbsess, &slotpos, &maxslot,
+	    &slotseq, sessionid);
+	KASSERT(maxslot >= 0, ("nfsv4_setcbsequence neg maxslot"));
+
+	/* Build the Sequence arguments. */
+	NFSM_BUILD(tl, uint32_t *, NFSX_V4SESSIONID + 5 * NFSX_UNSIGNED);
+	bcopy(sessionid, tl, NFSX_V4SESSIONID);
+	tl += NFSX_V4SESSIONID / NFSX_UNSIGNED;
+	nd->nd_slotseq = tl;
+	*tl++ = txdr_unsigned(slotseq);
+	*tl++ = txdr_unsigned(slotpos);
+	*tl++ = txdr_unsigned(maxslot);
+	if (dont_replycache == 0)
+		*tl++ = newnfs_true;
+	else
+		*tl++ = newnfs_false;
+	*tl = 0;			/* No referring call list, for now. */
+	nd->nd_flag |= ND_HASSEQUENCE;
+	return (0);
+}
+
+/*
+ * Get a session for the callback.
+ */
+static int
+nfsv4_getcbsession(struct nfsclient *clp, struct nfsdsession **sepp)
+{
+	struct nfsdsession *sep;
+
+	NFSLOCKSTATE();
+	LIST_FOREACH(sep, &clp->lc_session, sess_list) {
+		if ((sep->sess_crflags & NFSV4CRSESS_CONNBACKCHAN) != 0)
+			break;
+	}
+	if (sep == NULL) {
+		NFSUNLOCKSTATE();
+		return (NFSERR_BADSESSION);
+	}
+	sep->sess_refcnt++;
+	*sepp = sep;
+	NFSUNLOCKSTATE();
+	return (0);
+}
+
+/*
+ * Free up all backchannel xprts.  This needs to be done when the nfsd threads
+ * exit, since those transports will all be going away.
+ * This is only called after all the nfsd threads are done performing RPCs,
+ * so locking shouldn't be an issue.
+ */
+APPLESTATIC void
+nfsrv_freeallbackchannel_xprts(void)
+{
+	struct nfsdsession *sep;
+	struct nfsclient *clp;
+	SVCXPRT *xprt;
+	int i;
+
+	for (i = 0; i < nfsrv_clienthashsize; i++) {
+		LIST_FOREACH(clp, &nfsclienthash[i], lc_hash) {
+			LIST_FOREACH(sep, &clp->lc_session, sess_list) {
+				xprt = sep->sess_cbsess.nfsess_xprt;
+				sep->sess_cbsess.nfsess_xprt = NULL;
+				if (xprt != NULL)
+					SVC_RELEASE(xprt);
+			}
+		}
+	}
+}
+

Modified: trunk/sys/fs/nfsserver/nfs_nfsdsubs.c
===================================================================
--- trunk/sys/fs/nfsserver/nfs_nfsdsubs.c	2018-05-27 22:18:11 UTC (rev 10025)
+++ trunk/sys/fs/nfsserver/nfs_nfsdsubs.c	2018-05-27 22:18:25 UTC (rev 10026)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * Copyright (c) 1989, 1993
  *	The Regents of the University of California.  All rights reserved.
@@ -32,7 +33,7 @@
  */
 
 #include <sys/cdefs.h>
-__MBSDID("$MidnightBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/fs/nfsserver/nfs_nfsdsubs.c 299223 2016-05-07 20:17:23Z rmacklem $");
 
 #ifndef APPLEKEXT
 /*
@@ -44,8 +45,12 @@
 
 extern u_int32_t newnfs_true, newnfs_false;
 extern int nfs_pubfhset;
-extern struct nfsclienthashhead nfsclienthash[NFSCLIENTHASHSIZE];
-extern struct nfslockhashhead nfslockhash[NFSLOCKHASHSIZE];
+extern struct nfsclienthashhead *nfsclienthash;
+extern int nfsrv_clienthashsize;
+extern struct nfslockhashhead *nfslockhash;
+extern int nfsrv_lockhashsize;
+extern struct nfssessionhash *nfssessionhash;
+extern int nfsrv_sessionhashsize;
 extern int nfsrv_useacl;
 extern uid_t nfsrv_defaultuid;
 extern gid_t nfsrv_defaultgid;
@@ -56,6 +61,8 @@
 extern nfstype nfsv34_type[9];
 #endif	/* !APPLEKEXT */
 
+static u_int32_t nfsrv_isannfserr(u_int32_t);
+
 SYSCTL_DECL(_vfs_nfsd);
 
 static int	disable_checkutf8 = 0;
@@ -63,31 +70,39 @@
     &disable_checkutf8, 0,
     "Disable the NFSv4 check for a UTF8 compliant name");
 
+static int    enable_nobodycheck = 1;
+SYSCTL_INT(_vfs_nfsd, OID_AUTO, enable_nobodycheck, CTLFLAG_RW,
+    &enable_nobodycheck, 0,
+    "Enable the NFSv4 check when setting user nobody as owner");
+
+static int    enable_nogroupcheck = 1;
+SYSCTL_INT(_vfs_nfsd, OID_AUTO, enable_nogroupcheck, CTLFLAG_RW,
+    &enable_nogroupcheck, 0,
+    "Enable the NFSv4 check when setting group nogroup as owner");
+
 static char nfsrv_hexdigit(char, int *);
 
 /*
  * Maps errno values to nfs error numbers.
  * Use NFSERR_IO as the catch all for ones not specifically defined in
- * RFC 1094.
+ * RFC 1094. (It now includes the errors added for NFSv3.)
  */
-static u_char nfsrv_v2errmap[ELAST] = {
+static u_char nfsrv_v2errmap[NFSERR_REMOTE] = {
   NFSERR_PERM,	NFSERR_NOENT,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,
   NFSERR_NXIO,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,
   NFSERR_IO,	NFSERR_IO,	NFSERR_ACCES,	NFSERR_IO,	NFSERR_IO,
-  NFSERR_IO,	NFSERR_EXIST,	NFSERR_IO,	NFSERR_NODEV,	NFSERR_NOTDIR,
-  NFSERR_ISDIR,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,
+  NFSERR_IO,	NFSERR_EXIST,	NFSERR_XDEV,	NFSERR_NODEV,	NFSERR_NOTDIR,
+  NFSERR_ISDIR,	NFSERR_INVAL,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,
   NFSERR_IO,	NFSERR_FBIG,	NFSERR_NOSPC,	NFSERR_IO,	NFSERR_ROFS,
+  NFSERR_MLINK,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,
   NFSERR_IO,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,
   NFSERR_IO,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,
   NFSERR_IO,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,
   NFSERR_IO,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,
   NFSERR_IO,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,
-  NFSERR_IO,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,
   NFSERR_IO,	NFSERR_IO,	NFSERR_NAMETOL,	NFSERR_IO,	NFSERR_IO,
   NFSERR_NOTEMPTY, NFSERR_IO,	NFSERR_IO,	NFSERR_DQUOT,	NFSERR_STALE,
-  NFSERR_IO,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,
-  NFSERR_IO,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,
-  NFSERR_IO,
+  NFSERR_REMOTE,
 };
 
 /*
@@ -1130,6 +1145,7 @@
 	NFSERR_INVAL,
 	NFSERR_RESOURCE,
 	NFSERR_SERVERFAULT,
+	NFSERR_WRONGSEC,
 	0,
 };
 
@@ -1493,7 +1509,12 @@
 		else if (nd->nd_repstat == NFSERR_MINORVERMISMATCH ||
 			 nd->nd_repstat == NFSERR_OPILLEGAL)
 			return (txdr_unsigned(nd->nd_repstat));
-		else
+		else if ((nd->nd_flag & ND_NFSV41) != 0) {
+			if (nd->nd_repstat == EOPNOTSUPP)
+				nd->nd_repstat = NFSERR_NOTSUPP;
+			nd->nd_repstat = nfsrv_isannfserr(nd->nd_repstat);
+			return (txdr_unsigned(nd->nd_repstat));
+		} else
 		    errp = defaulterrp = nfsrv_v4errmap[nd->nd_procnum];
 		while (*++errp)
 			if (*errp == nd->nd_repstat)
@@ -1500,12 +1521,29 @@
 				return (txdr_unsigned(nd->nd_repstat));
 		return (txdr_unsigned(*defaulterrp));
 	}
-	if (nd->nd_repstat <= ELAST)
+	if (nd->nd_repstat <= NFSERR_REMOTE)
 		return (txdr_unsigned(nfsrv_v2errmap[nd->nd_repstat - 1]));
 	return (txdr_unsigned(NFSERR_IO));
 }
 
 /*
+ * Check to see if the error is a valid NFS one. If not, replace it with
+ * NFSERR_IO.
+ */
+static u_int32_t
+nfsrv_isannfserr(u_int32_t errval)
+{
+
+	if (errval == NFSERR_OK)
+		return (errval);
+	if (errval >= NFSERR_BADHANDLE && errval <= NFSERR_DELEGREVOKED)
+		return (errval);
+	if (errval > 0 && errval <= NFSERR_REMOTE)
+		return (nfsrv_v2errmap[errval - 1]);
+	return (NFSERR_IO);
+}
+
+/*
  * Check to see if setting a uid/gid is permitted when creating a new
  * file object. (Called when uid and/or gid is specified in the
  * settable attributes for V4.
@@ -1520,8 +1558,10 @@
 	 */
 	if (NFSVNO_NOTSETUID(nvap) && NFSVNO_NOTSETGID(nvap))
 		goto out;
-	if ((NFSVNO_ISSETUID(nvap) && nvap->na_uid == nfsrv_defaultuid)
-	    || (NFSVNO_ISSETGID(nvap) && nvap->na_gid == nfsrv_defaultgid)) {
+	if ((NFSVNO_ISSETUID(nvap) && nvap->na_uid == nfsrv_defaultuid &&
+           enable_nobodycheck == 1)
+	    || (NFSVNO_ISSETGID(nvap) && nvap->na_gid == nfsrv_defaultgid &&
+           enable_nogroupcheck == 1)) {
 		error = NFSERR_BADOWNER;
 		goto out;
 	}
@@ -1987,47 +2027,6 @@
 	return (error);
 }
 
-/*
- * Check the tcp socket sequence number has been acknowledged.
- */
-int
-nfsrv_checksockseqnum(struct socket *so, tcp_seq tcpseqval)
-{
-	tcp_seq maxseq, unaseq;
-	int error, ret;
-
-	error = nfsrv_getsocksndseq(so, &maxseq, &unaseq);
-	if (error)
-		return (0);
-	ret = SEQ_GEQ(unaseq, tcpseqval);
-	return (ret);
-}
-
-/*
- * Get the tcp sequence number to be acknowledged.
- */
-int
-nfsrv_getsockseqnum(struct socket *so, tcp_seq *tcpseqp)
-{
-	tcp_seq maxseq, unaseq;
-	u_int sbcc;
-	int error;
-
-	sbcc = so->so_snd.sb_cc;
-	error = nfsrv_getsocksndseq(so, &maxseq, &unaseq);
-	if (error)
-		return (0);
-	/*
-	 * Set the seq# to a value that will
-	 * be at least the end of the reply.
-	 * When this sequence# is acknowledged
-	 * by the client, the client has received
-	 * the reply.
-	 */
-	*tcpseqp = sbcc + maxseq;
-	return (1);
-}
-
 void
 nfsd_init(void)
 {
@@ -2042,10 +2041,20 @@
 	 * Initialize client queues. Don't free/reinitialize
 	 * them when nfsds are restarted.
 	 */
-	for (i = 0; i < NFSCLIENTHASHSIZE; i++)
+	nfsclienthash = malloc(sizeof(struct nfsclienthashhead) *
+	    nfsrv_clienthashsize, M_NFSDCLIENT, M_WAITOK | M_ZERO);
+	for (i = 0; i < nfsrv_clienthashsize; i++)
 		LIST_INIT(&nfsclienthash[i]);
-	for (i = 0; i < NFSLOCKHASHSIZE; i++)
+	nfslockhash = malloc(sizeof(struct nfslockhashhead) *
+	    nfsrv_lockhashsize, M_NFSDLOCKFILE, M_WAITOK | M_ZERO);
+	for (i = 0; i < nfsrv_lockhashsize; i++)
 		LIST_INIT(&nfslockhash[i]);
+	nfssessionhash = malloc(sizeof(struct nfssessionhash) *
+	    nfsrv_sessionhashsize, M_NFSDSESSION, M_WAITOK | M_ZERO);
+	for (i = 0; i < nfsrv_sessionhashsize; i++) {
+		mtx_init(&nfssessionhash[i].mtx, "nfssm", NULL, MTX_DEF);
+		LIST_INIT(&nfssessionhash[i].list);
+	}
 
 	/* and the v2 pubfh should be all zeros */
 	NFSBZERO(nfs_v2pubfh, NFSX_V2FH);
@@ -2073,3 +2082,42 @@
 	return (1);
 }
 
+/*
+ * Parse the first part of an NFSv4 compound to find out what the minor
+ * version# is.
+ */
+void
+nfsd_getminorvers(struct nfsrv_descript *nd, u_char *tag, u_char **tagstrp,
+    int *taglenp, u_int32_t *minversp)
+{
+	uint32_t *tl;
+	int error = 0, taglen = -1;
+	u_char *tagstr = NULL;
+
+	NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
+	taglen = fxdr_unsigned(int, *tl);
+	if (taglen < 0 || taglen > NFSV4_OPAQUELIMIT) {
+		error = EBADRPC;
+		goto nfsmout;
+	}
+	if (taglen <= NFSV4_SMALLSTR)
+		tagstr = tag;
+	else
+		tagstr = malloc(taglen + 1, M_TEMP, M_WAITOK);
+	error = nfsrv_mtostr(nd, tagstr, taglen);
+	if (error != 0)
+		goto nfsmout;
+	NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
+	*minversp = fxdr_unsigned(u_int32_t, *tl);
+	*tagstrp = tagstr;
+	if (*minversp == NFSV41_MINORVERSION)
+		nd->nd_flag |= ND_NFSV41;
+nfsmout:
+	if (error != 0) {
+		if (tagstr != NULL && taglen > NFSV4_SMALLSTR)
+			free(tagstr, M_TEMP);
+		taglen = -1;
+	}
+	*taglenp = taglen;
+}
+

Modified: trunk/sys/fs/nullfs/null.h
===================================================================
--- trunk/sys/fs/nullfs/null.h	2018-05-27 22:18:11 UTC (rev 10025)
+++ trunk/sys/fs/nullfs/null.h	2018-05-27 22:18:25 UTC (rev 10026)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * Copyright (c) 1992, 1993
  *	The Regents of the University of California.  All rights reserved.
@@ -31,7 +32,7 @@
  *
  *	@(#)null.h	8.3 (Berkeley) 8/20/94
  *
- * $MidnightBSD$
+ * $FreeBSD: stable/10/sys/fs/nullfs/null.h 250505 2013-05-11 11:17:44Z kib $
  */
 
 #ifndef	FS_NULL_H

Modified: trunk/sys/fs/nullfs/null_subr.c
===================================================================
--- trunk/sys/fs/nullfs/null_subr.c	2018-05-27 22:18:11 UTC (rev 10025)
+++ trunk/sys/fs/nullfs/null_subr.c	2018-05-27 22:18:25 UTC (rev 10026)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * Copyright (c) 1992, 1993
  *	The Regents of the University of California.  All rights reserved.
@@ -31,7 +32,7 @@
  *
  *	@(#)null_subr.c	8.7 (Berkeley) 5/14/95
  *
- * $MidnightBSD$
+ * $FreeBSD: stable/10/sys/fs/nullfs/null_subr.c 250505 2013-05-11 11:17:44Z kib $
  */
 
 #include <sys/param.h>
@@ -235,10 +236,6 @@
 	 * duplicates later, when adding new vnode to hash.
 	 * Note that duplicate can only appear in hash if the lowervp is
 	 * locked LK_SHARED.
-	 *
-	 * Do the MALLOC before the getnewvnode since doing so afterward
-	 * might cause a bogus v_data pointer to get dereferenced
-	 * elsewhere if MALLOC should block.
 	 */
 	xp = malloc(sizeof(struct null_node), M_NULLFSNODE, M_WAITOK);
 

Modified: trunk/sys/fs/nullfs/null_vfsops.c
===================================================================
--- trunk/sys/fs/nullfs/null_vfsops.c	2018-05-27 22:18:11 UTC (rev 10025)
+++ trunk/sys/fs/nullfs/null_vfsops.c	2018-05-27 22:18:25 UTC (rev 10026)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * Copyright (c) 1992, 1993, 1995
  *	The Regents of the University of California.  All rights reserved.
@@ -32,7 +33,7 @@
  *	@(#)null_vfsops.c	8.2 (Berkeley) 1/21/94
  *
  * @(#)lofs_vfsops.c	1.2 (Berkeley) 6/18/92
- * $MidnightBSD$
+ * $FreeBSD: stable/10/sys/fs/nullfs/null_vfsops.c 309530 2016-12-04 13:56:15Z kib $
  */
 
 /*
@@ -188,18 +189,21 @@
 	}
 
 	xmp->nullm_flags |= NULLM_CACHE;
-	if (vfs_getopt(mp->mnt_optnew, "nocache", NULL, NULL) == 0)
+	if (vfs_getopt(mp->mnt_optnew, "nocache", NULL, NULL) == 0 ||
+	    (xmp->nullm_vfs->mnt_kern_flag & MNTK_NULL_NOCACHE) != 0)
 		xmp->nullm_flags &= ~NULLM_CACHE;
 
 	MNT_ILOCK(mp);
 	if ((xmp->nullm_flags & NULLM_CACHE) != 0) {
 		mp->mnt_kern_flag |= lowerrootvp->v_mount->mnt_kern_flag &
-		    (MNTK_MPSAFE | MNTK_SHARED_WRITES | MNTK_LOOKUP_SHARED |
+		    (MNTK_SHARED_WRITES | MNTK_LOOKUP_SHARED |
 		    MNTK_EXTENDED_SHARED);
 	}
 	mp->mnt_kern_flag |= MNTK_LOOKUP_EXCL_DOTDOT;
+	mp->mnt_kern_flag |= lowerrootvp->v_mount->mnt_kern_flag &
+	    (MNTK_USES_BCACHE | MNTK_NO_IOPF | MNTK_UNMAPPED_BUFS);
 	MNT_IUNLOCK(mp);
-	mp->mnt_data =  xmp;
+	mp->mnt_data = xmp;
 	vfs_getnewfsid(mp);
 	if ((xmp->nullm_flags & NULLM_CACHE) != 0) {
 		MNT_ILOCK(xmp->nullm_vfs);
@@ -312,7 +316,8 @@
 
 	/* now copy across the "interesting" information and fake the rest */
 	sbp->f_type = mstat.f_type;
-	sbp->f_flags = mstat.f_flags;
+	sbp->f_flags = (sbp->f_flags & (MNT_RDONLY | MNT_NOEXEC | MNT_NOSUID |
+	    MNT_UNION | MNT_NOSYMFOLLOW)) | (mstat.f_flags & ~MNT_ROOTFS);
 	sbp->f_bsize = mstat.f_bsize;
 	sbp->f_iosize = mstat.f_iosize;
 	sbp->f_blocks = mstat.f_blocks;

Modified: trunk/sys/fs/nullfs/null_vnops.c
===================================================================
--- trunk/sys/fs/nullfs/null_vnops.c	2018-05-27 22:18:11 UTC (rev 10025)
+++ trunk/sys/fs/nullfs/null_vnops.c	2018-05-27 22:18:25 UTC (rev 10026)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * Copyright (c) 1992, 1993
  *	The Regents of the University of California.  All rights reserved.
@@ -36,7 +37,7 @@
  *	...and...
  *	@(#)null_vnodeops.c 1.20 92/07/07 UCLA Ficus project
  *
- * $MidnightBSD$
+ * $FreeBSD: stable/10/sys/fs/nullfs/null_vnops.c 295970 2016-02-24 13:48:40Z kib $
  */
 
 /*
@@ -361,9 +362,11 @@
 	struct vnode *dvp = ap->a_dvp;
 	int flags = cnp->cn_flags;
 	struct vnode *vp, *ldvp, *lvp;
+	struct mount *mp;
 	int error;
 
-	if ((flags & ISLASTCN) && (dvp->v_mount->mnt_flag & MNT_RDONLY) &&
+	mp = dvp->v_mount;
+	if ((flags & ISLASTCN) != 0 && (mp->mnt_flag & MNT_RDONLY) != 0 &&
 	    (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME))
 		return (EROFS);
 	/*
@@ -372,9 +375,47 @@
 	 */
 	ldvp = NULLVPTOLOWERVP(dvp);
 	vp = lvp = NULL;
+	KASSERT((ldvp->v_vflag & VV_ROOT) == 0 ||
+	    ((dvp->v_vflag & VV_ROOT) != 0 && (flags & ISDOTDOT) == 0),
+	    ("ldvp %p fl %#x dvp %p fl %#x flags %#x", ldvp, ldvp->v_vflag,
+	     dvp, dvp->v_vflag, flags));
+
+	/*
+	 * Hold ldvp.  The reference on it, owned by dvp, is lost in
+	 * case of dvp reclamation, and we need ldvp to move our lock
+	 * from ldvp to dvp.
+	 */
+	vhold(ldvp);
+
 	error = VOP_LOOKUP(ldvp, &lvp, cnp);
-	if (error == EJUSTRETURN && (flags & ISLASTCN) &&
-	    (dvp->v_mount->mnt_flag & MNT_RDONLY) &&
+
+	/*
+	 * VOP_LOOKUP() on lower vnode may unlock ldvp, which allows
+	 * dvp to be reclaimed due to shared v_vnlock.  Check for the
+	 * doomed state and return error.
+	 */
+	if ((error == 0 || error == EJUSTRETURN) &&
+	    (dvp->v_iflag & VI_DOOMED) != 0) {
+		error = ENOENT;
+		if (lvp != NULL)
+			vput(lvp);
+
+		/*
+		 * If vgone() did reclaimed dvp before curthread
+		 * relocked ldvp, the locks of dvp and ldpv are no
+		 * longer shared.  In this case, relock of ldvp in
+		 * lower fs VOP_LOOKUP() does not restore the locking
+		 * state of dvp.  Compensate for this by unlocking
+		 * ldvp and locking dvp, which is also correct if the
+		 * locks are still shared.
+		 */
+		VOP_UNLOCK(ldvp, 0);
+		vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY);
+	}
+	vdrop(ldvp);
+
+	if (error == EJUSTRETURN && (flags & ISLASTCN) != 0 &&
+	    (mp->mnt_flag & MNT_RDONLY) != 0 &&
 	    (cnp->cn_nameiop == CREATE || cnp->cn_nameiop == RENAME))
 		error = EROFS;
 
@@ -384,7 +425,7 @@
 			VREF(dvp);
 			vrele(lvp);
 		} else {
-			error = null_nodeget(dvp->v_mount, lvp, &vp);
+			error = null_nodeget(mp, lvp, &vp);
 			if (error == 0)
 				*ap->a_vpp = vp;
 		}
@@ -528,14 +569,16 @@
 null_remove(struct vop_remove_args *ap)
 {
 	int retval, vreleit;
-	struct vnode *lvp;
+	struct vnode *lvp, *vp;
 
-	if (vrefcnt(ap->a_vp) > 1) {
-		lvp = NULLVPTOLOWERVP(ap->a_vp);
+	vp = ap->a_vp;
+	if (vrefcnt(vp) > 1) {
+		lvp = NULLVPTOLOWERVP(vp);
 		VREF(lvp);
 		vreleit = 1;
 	} else
 		vreleit = 0;
+	VTONULL(vp)->null_flags |= NULLV_DROP;
 	retval = null_bypass(&ap->a_gen);
 	if (vreleit != 0)
 		vrele(lvp);
@@ -554,6 +597,7 @@
 	struct vnode *fvp = ap->a_fvp;
 	struct vnode *fdvp = ap->a_fdvp;
 	struct vnode *tvp = ap->a_tvp;
+	struct null_node *tnn;
 
 	/* Check for cross-device rename. */
 	if ((fvp->v_mount != tdvp->v_mount) ||
@@ -568,10 +612,22 @@
 		vrele(fvp);
 		return (EXDEV);
 	}
-	
+
+	if (tvp != NULL) {
+		tnn = VTONULL(tvp);
+		tnn->null_flags |= NULLV_DROP;
+	}
 	return (null_bypass((struct vop_generic_args *)ap));
 }
 
+static int
+null_rmdir(struct vop_rmdir_args *ap)
+{
+
+	VTONULL(ap->a_vp)->null_flags |= NULLV_DROP;
+	return (null_bypass(&ap->a_gen));
+}
+
 /*
  * We need to process our own vnode lock and then clear the
  * interlock flag as it applies only to our vnode, not the
@@ -712,7 +768,7 @@
 		 * the lower vnodes.
 		 */
 		vp->v_object = NULL;
-		vrecycle(vp, curthread);
+		vrecycle(vp);
 	}
 	return (0);
 }
@@ -733,7 +789,7 @@
 	lowervp = xp->null_lowervp;
 
 	KASSERT(lowervp != NULL && vp->v_vnlock != &vp->v_lock,
-	    ("Reclaiming inclomplete null vnode %p", vp));
+	    ("Reclaiming incomplete null vnode %p", vp));
 
 	null_hashrem(xp);
 	/*
@@ -853,15 +909,6 @@
 	return (error);
 }
 
-static int
-null_link(struct vop_link_args *ap)
-{
-
-	if (ap->a_tdvp->v_mount != ap->a_vp->v_mount)
-		return (EXDEV);
-	return (null_bypass((struct vop_generic_args *)ap));
-}
-
 /*
  * Global vfs data structures
  */
@@ -875,7 +922,6 @@
 	.vop_getwritemount =	null_getwritemount,
 	.vop_inactive =		null_inactive,
 	.vop_islocked =		vop_stdislocked,
-	.vop_link =		null_link,
 	.vop_lock1 =		null_lock,
 	.vop_lookup =		null_lookup,
 	.vop_open =		null_open,
@@ -883,6 +929,7 @@
 	.vop_reclaim =		null_reclaim,
 	.vop_remove =		null_remove,
 	.vop_rename =		null_rename,
+	.vop_rmdir =		null_rmdir,
 	.vop_setattr =		null_setattr,
 	.vop_strategy =		VOP_EOPNOTSUPP,
 	.vop_unlock =		null_unlock,



More information about the Midnightbsd-cvs mailing list