[Midnightbsd-cvs] src [9948] trunk/sys/kern: sync with freebsd

laffer1 at midnightbsd.org laffer1 at midnightbsd.org
Fri May 25 16:58:04 EDT 2018


Revision: 9948
          http://svnweb.midnightbsd.org/src/?rev=9948
Author:   laffer1
Date:     2018-05-25 16:58:03 -0400 (Fri, 25 May 2018)
Log Message:
-----------
sync with freebsd

Modified Paths:
--------------
    trunk/sys/kern/imgact_elf64.c
    trunk/sys/kern/kern_context.c
    trunk/sys/kern/kern_ctf.c
    trunk/sys/kern/kern_prot.c
    trunk/sys/kern/kern_racct.c
    trunk/sys/kern/kern_rctl.c
    trunk/sys/kern/kern_rmlock.c
    trunk/sys/kern/kern_rwlock.c
    trunk/sys/kern/kern_sdt.c
    trunk/sys/kern/kern_sema.c
    trunk/sys/kern/kern_sharedpage.c

Modified: trunk/sys/kern/imgact_elf64.c
===================================================================
--- trunk/sys/kern/imgact_elf64.c	2018-05-25 20:55:47 UTC (rev 9947)
+++ trunk/sys/kern/imgact_elf64.c	2018-05-25 20:58:03 UTC (rev 9948)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * Copyright (c) 2002 Doug Rabson
  * All rights reserved.
@@ -25,7 +26,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$MidnightBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/kern/imgact_elf64.c 116182 2003-06-11 00:56:59Z obrien $");
 
 #define __ELF_WORD_SIZE 64
 #include <kern/imgact_elf.c>

Modified: trunk/sys/kern/kern_context.c
===================================================================
--- trunk/sys/kern/kern_context.c	2018-05-25 20:55:47 UTC (rev 9947)
+++ trunk/sys/kern/kern_context.c	2018-05-25 20:58:03 UTC (rev 9948)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * Copyright (c) 2002 Daniel M. Eischen <deischen at freebsd.org>
  * All rights reserved.
@@ -25,7 +26,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$MidnightBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/kern/kern_context.c 225617 2011-09-16 13:58:51Z kmacy $");
 
 #include <sys/param.h>
 #include <sys/kernel.h>

Modified: trunk/sys/kern/kern_ctf.c
===================================================================
--- trunk/sys/kern/kern_ctf.c	2018-05-25 20:55:47 UTC (rev 9947)
+++ trunk/sys/kern/kern_ctf.c	2018-05-25 20:58:03 UTC (rev 9948)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * Copyright (c) 2008 John Birrell <jb at freebsd.org>
  * All rights reserved.
@@ -23,7 +24,7 @@
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
- * $MidnightBSD$
+ * $FreeBSD: stable/10/sys/kern/kern_ctf.c 279196 2015-02-23 01:24:10Z markj $
  */
 
 /*
@@ -68,8 +69,6 @@
 	int flags;
 	int i;
 	int nbytes;
-	ssize_t resid;
-	int vfslocked;
 	size_t sz;
 	struct nameidata nd;
 	struct thread *td = curthread;
@@ -114,23 +113,19 @@
 	 */
 	ef->ctfcnt = -1;
 
-	NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE, UIO_SYSSPACE, lf->pathname, td);
+	NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, lf->pathname, td);
 	flags = FREAD;
 	error = vn_open(&nd, &flags, 0, NULL);
 	if (error)
 		return (error);
-	vfslocked = NDHASGIANT(&nd);
 	NDFREE(&nd, NDF_ONLY_PNBUF);
 
 	/* Allocate memory for the FLF header. */
-	if ((hdr = malloc(sizeof(*hdr), M_LINKER, M_WAITOK)) == NULL) {
-		error = ENOMEM;
-		goto out;
-	}
+	hdr = malloc(sizeof(*hdr), M_LINKER, M_WAITOK);
 
 	/* Read the ELF header. */
 	if ((error = vn_rdwr(UIO_READ, nd.ni_vp, hdr, sizeof(*hdr),
-	    0, UIO_SYSSPACE, IO_NODELOCKED, td->td_ucred, NOCRED, &resid,
+	    0, UIO_SYSSPACE, IO_NODELOCKED, td->td_ucred, NOCRED, NULL,
 	    td)) != 0)
 		goto out;
 
@@ -148,15 +143,12 @@
 	}
 
 	/* Allocate memory for all the section headers */
-	if ((shdr = malloc(nbytes, M_LINKER, M_WAITOK)) == NULL) {
-		error = ENOMEM;
-		goto out;
-	}
+	shdr = malloc(nbytes, M_LINKER, M_WAITOK);
 
 	/* Read all the section headers */
 	if ((error = vn_rdwr(UIO_READ, nd.ni_vp, (caddr_t)shdr, nbytes,
 	    hdr->e_shoff, UIO_SYSSPACE, IO_NODELOCKED, td->td_ucred, NOCRED,
-	    &resid, td)) != 0)
+	    NULL, td)) != 0)
 		goto out;
 
 	/*
@@ -173,17 +165,12 @@
 	}
 
 	/* Allocate memory to buffer the section header strings. */
-	if ((shstrtab = malloc(shdr[hdr->e_shstrndx].sh_size, M_LINKER,
-	    M_WAITOK)) == NULL) {
-		error = ENOMEM;
-		goto out;
-	}
+	shstrtab = malloc(shdr[hdr->e_shstrndx].sh_size, M_LINKER, M_WAITOK);
 
 	/* Read the section header strings. */
 	if ((error = vn_rdwr(UIO_READ, nd.ni_vp, shstrtab,
 	    shdr[hdr->e_shstrndx].sh_size, shdr[hdr->e_shstrndx].sh_offset,
-	    UIO_SYSSPACE, IO_NODELOCKED, td->td_ucred, NOCRED, &resid,
-	    td)) != 0)
+	    UIO_SYSSPACE, IO_NODELOCKED, td->td_ucred, NOCRED, NULL, td)) != 0)
 		goto out;
 
 	/* Search for the section containing the CTF data. */
@@ -202,7 +189,7 @@
 	/* Read the CTF header. */
 	if ((error = vn_rdwr(UIO_READ, nd.ni_vp, ctf_hdr, sizeof(ctf_hdr),
 	    shdr[i].sh_offset, UIO_SYSSPACE, IO_NODELOCKED, td->td_ucred,
-	    NOCRED, &resid, td)) != 0)
+	    NOCRED, NULL, td)) != 0)
 		goto out;
 
 	/* Check the CTF magic number. (XXX check for big endian!) */
@@ -240,10 +227,7 @@
 		 * Allocate memory for the compressed CTF data, including
 		 * the header (which isn't compressed).
 		 */
-		if ((raw = malloc(shdr[i].sh_size, M_LINKER, M_WAITOK)) == NULL) {
-			error = ENOMEM;
-			goto out;
-		}
+		raw = malloc(shdr[i].sh_size, M_LINKER, M_WAITOK);
 	} else {
 		/*
 		 * The CTF data is not compressed, so the ELF section
@@ -256,10 +240,7 @@
 	 * Allocate memory to buffer the CTF data in it's decompressed
 	 * form.
 	 */
-	if ((ctftab = malloc(sz, M_LINKER, M_WAITOK)) == NULL) {
-		error = ENOMEM;
-		goto out;
-	}
+	ctftab = malloc(sz, M_LINKER, M_WAITOK);
 
 	/*
 	 * Read the CTF data into the raw buffer if compressed, or
@@ -267,7 +248,7 @@
 	 */
 	if ((error = vn_rdwr(UIO_READ, nd.ni_vp, raw == NULL ? ctftab : raw,
 	    shdr[i].sh_size, shdr[i].sh_offset, UIO_SYSSPACE, IO_NODELOCKED,
-	    td->td_ucred, NOCRED, &resid, td)) != 0)
+	    td->td_ucred, NOCRED, NULL, td)) != 0)
 		goto out;
 
 	/* Check if decompression is required. */
@@ -295,7 +276,9 @@
 		zs.next_in = ((uint8_t *) raw) + sizeof(ctf_hdr);
 		zs.avail_out = sz - sizeof(ctf_hdr);
 		zs.next_out = ((uint8_t *) ctftab) + sizeof(ctf_hdr);
-		if ((ret = inflate(&zs, Z_FINISH)) != Z_STREAM_END) {
+		ret = inflate(&zs, Z_FINISH);
+		inflateEnd(&zs);
+		if (ret != Z_STREAM_END) {
 			printf("%s(%d): zlib inflate returned %d\n", __func__, __LINE__, ret);
 			error = EIO;
 			goto out;
@@ -323,7 +306,6 @@
 out:
 	VOP_UNLOCK(nd.ni_vp, 0);
 	vn_close(nd.ni_vp, FREAD, td->td_ucred, td);
-	VFS_UNLOCK_GIANT(vfslocked);
 
 	if (hdr != NULL)
 		free(hdr, M_LINKER);

Modified: trunk/sys/kern/kern_prot.c
===================================================================
--- trunk/sys/kern/kern_prot.c	2018-05-25 20:55:47 UTC (rev 9947)
+++ trunk/sys/kern/kern_prot.c	2018-05-25 20:58:03 UTC (rev 9948)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * Copyright (c) 1982, 1986, 1989, 1990, 1991, 1993
  *	The Regents of the University of California.
@@ -42,7 +43,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$MidnightBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/kern/kern_prot.c 303846 2016-08-08 18:31:28Z bdrewery $");
 
 #include "opt_compat.h"
 #include "opt_inet.h"
@@ -73,7 +74,7 @@
 
 #ifdef REGRESSION
 FEATURE(regression,
-    "Kernel support for interfaces nessesary for regression testing (SECURITY RISK!)");
+    "Kernel support for interfaces necessary for regression testing (SECURITY RISK!)");
 #endif
 
 #if defined(INET) || defined(INET6)
@@ -147,7 +148,7 @@
 	return (0);
 }
 
-/* Get an arbitary pid's process group id */
+/* Get an arbitrary pid's process group id */
 #ifndef _SYS_SYSPROTO_H_
 struct getpgid_args {
 	pid_t	pid;
@@ -178,7 +179,7 @@
 }
 
 /*
- * Get an arbitary pid's session id.
+ * Get an arbitrary pid's session id.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct getsid_args {
@@ -582,7 +583,7 @@
 		change_euid(newcred, uip);
 		setsugid(p);
 	}
-	p->p_ucred = newcred;
+	proc_set_cred(p, newcred);
 	PROC_UNLOCK(p);
 #ifdef RACCT
 	racct_proc_ucred_changed(p, oldcred, newcred);
@@ -641,7 +642,7 @@
 		change_euid(newcred, euip);
 		setsugid(p);
 	}
-	p->p_ucred = newcred;
+	proc_set_cred(p, newcred);
 	PROC_UNLOCK(p);
 	uifree(euip);
 	crfree(oldcred);
@@ -741,7 +742,7 @@
 		change_egid(newcred, gid);
 		setsugid(p);
 	}
-	p->p_ucred = newcred;
+	proc_set_cred(p, newcred);
 	PROC_UNLOCK(p);
 	crfree(oldcred);
 	return (0);
@@ -787,7 +788,7 @@
 		change_egid(newcred, egid);
 		setsugid(p);
 	}
-	p->p_ucred = newcred;
+	proc_set_cred(p, newcred);
 	PROC_UNLOCK(p);
 	crfree(oldcred);
 	return (0);
@@ -860,7 +861,7 @@
 		crsetgroups_locked(newcred, ngrp, groups);
 	}
 	setsugid(p);
-	p->p_ucred = newcred;
+	proc_set_cred(p, newcred);
 	PROC_UNLOCK(p);
 	crfree(oldcred);
 	return (0);
@@ -923,7 +924,7 @@
 		change_svuid(newcred, newcred->cr_uid);
 		setsugid(p);
 	}
-	p->p_ucred = newcred;
+	proc_set_cred(p, newcred);
 	PROC_UNLOCK(p);
 #ifdef RACCT
 	racct_proc_ucred_changed(p, oldcred, newcred);
@@ -990,7 +991,7 @@
 		change_svgid(newcred, newcred->cr_groups[0]);
 		setsugid(p);
 	}
-	p->p_ucred = newcred;
+	proc_set_cred(p, newcred);
 	PROC_UNLOCK(p);
 	crfree(oldcred);
 	return (0);
@@ -1064,7 +1065,7 @@
 		change_svuid(newcred, suid);
 		setsugid(p);
 	}
-	p->p_ucred = newcred;
+	proc_set_cred(p, newcred);
 	PROC_UNLOCK(p);
 #ifdef RACCT
 	racct_proc_ucred_changed(p, oldcred, newcred);
@@ -1143,7 +1144,7 @@
 		change_svgid(newcred, sgid);
 		setsugid(p);
 	}
-	p->p_ucred = newcred;
+	proc_set_cred(p, newcred);
 	PROC_UNLOCK(p);
 	crfree(oldcred);
 	return (0);
@@ -1710,6 +1711,13 @@
 	if ((p->p_flag & P_INEXEC) != 0)
 		return (EBUSY);
 
+	/* Denied explicitely */
+	if ((p->p_flag2 & P2_NOTRACE) != 0) {
+		error = priv_check(td, PRIV_DEBUG_DENIED);
+		if (error != 0)
+			return (error);
+	}
+
 	return (0);
 }
 
@@ -1949,7 +1957,44 @@
 		crfree(cred);
 }
 
+/*
+ * Set initial process credentials.
+ * Callers are responsible for providing the reference for provided credentials.
+ */
+void
+proc_set_cred_init(struct proc *p, struct ucred *newcred)
+{
+
+	p->p_ucred = newcred;
+}
+
+/*
+ * Change process credentials.
+ * Callers are responsible for providing the reference for passed credentials
+ * and for freeing old ones.
+ *
+ * Process has to be locked except when it does not have credentials (as it
+ * should not be visible just yet) or when newcred is NULL (as this can be
+ * only used when the process is about to be freed, at which point it should
+ * not be visible anymore).
+ */
 struct ucred *
+proc_set_cred(struct proc *p, struct ucred *newcred)
+{
+	struct ucred *oldcred;
+
+	MPASS(p->p_ucred != NULL);
+	if (newcred == NULL)
+		MPASS(p->p_state == PRS_ZOMBIE);
+	else
+		PROC_LOCK_ASSERT(p, MA_OWNED);
+
+	oldcred = p->p_ucred;
+	p->p_ucred = newcred;
+	return (oldcred);
+}
+
+struct ucred *
 crcopysafe(struct proc *p, struct ucred *cr)
 {
 	struct ucred *oldcred;

Modified: trunk/sys/kern/kern_racct.c
===================================================================
--- trunk/sys/kern/kern_racct.c	2018-05-25 20:55:47 UTC (rev 9947)
+++ trunk/sys/kern/kern_racct.c	2018-05-25 20:58:03 UTC (rev 9948)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * Copyright (c) 2010 The FreeBSD Foundation
  * All rights reserved.
@@ -26,11 +27,11 @@
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
- * $MidnightBSD$
+ * $FreeBSD: stable/10/sys/kern/kern_racct.c 314348 2017-02-27 17:18:07Z avg $
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$MidnightBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/kern/kern_racct.c 314348 2017-02-27 17:18:07Z avg $");
 
 #include "opt_kdtrace.h"
 #include "opt_sched.h"
@@ -71,8 +72,16 @@
  * Do not block processes that have their %cpu usage <= pcpu_threshold.
  */
 static int pcpu_threshold = 1;
+#ifdef RACCT_DEFAULT_TO_DISABLED
+int racct_enable = 0;
+#else
+int racct_enable = 1;
+#endif
 
 SYSCTL_NODE(_kern, OID_AUTO, racct, CTLFLAG_RW, 0, "Resource Accounting");
+TUNABLE_INT("kern.racct.enable", &racct_enable);
+SYSCTL_UINT(_kern_racct, OID_AUTO, enable, CTLFLAG_RDTUN, &racct_enable,
+    0, "Enable RACCT/RCTL");
 SYSCTL_UINT(_kern_racct, OID_AUTO, pcpu_threshold, CTLFLAG_RW, &pcpu_threshold,
     0, "Processes with higher %cpu usage than this value can be throttled.");
 
@@ -96,30 +105,32 @@
 		uint64_t amount);
 
 SDT_PROVIDER_DEFINE(racct);
-SDT_PROBE_DEFINE3(racct, kernel, rusage, add, add, "struct proc *", "int",
-    "uint64_t");
-SDT_PROBE_DEFINE3(racct, kernel, rusage, add_failure, add-failure,
+SDT_PROBE_DEFINE3(racct, , rusage, add,
     "struct proc *", "int", "uint64_t");
-SDT_PROBE_DEFINE3(racct, kernel, rusage, add_cred, add-cred, "struct ucred *",
-    "int", "uint64_t");
-SDT_PROBE_DEFINE3(racct, kernel, rusage, add_force, add-force, "struct proc *",
-    "int", "uint64_t");
-SDT_PROBE_DEFINE3(racct, kernel, rusage, set, set, "struct proc *", "int",
-    "uint64_t");
-SDT_PROBE_DEFINE3(racct, kernel, rusage, set_failure, set-failure,
+SDT_PROBE_DEFINE3(racct, , rusage, add__failure,
     "struct proc *", "int", "uint64_t");
-SDT_PROBE_DEFINE3(racct, kernel, rusage, sub, sub, "struct proc *", "int",
-    "uint64_t");
-SDT_PROBE_DEFINE3(racct, kernel, rusage, sub_cred, sub-cred, "struct ucred *",
-    "int", "uint64_t");
-SDT_PROBE_DEFINE1(racct, kernel, racct, create, create, "struct racct *");
-SDT_PROBE_DEFINE1(racct, kernel, racct, destroy, destroy, "struct racct *");
-SDT_PROBE_DEFINE2(racct, kernel, racct, join, join, "struct racct *",
+SDT_PROBE_DEFINE3(racct, , rusage, add__cred,
+    "struct ucred *", "int", "uint64_t");
+SDT_PROBE_DEFINE3(racct, , rusage, add__force,
+    "struct proc *", "int", "uint64_t");
+SDT_PROBE_DEFINE3(racct, , rusage, set,
+    "struct proc *", "int", "uint64_t");
+SDT_PROBE_DEFINE3(racct, , rusage, set__failure,
+    "struct proc *", "int", "uint64_t");
+SDT_PROBE_DEFINE3(racct, , rusage, sub,
+    "struct proc *", "int", "uint64_t");
+SDT_PROBE_DEFINE3(racct, , rusage, sub__cred,
+    "struct ucred *", "int", "uint64_t");
+SDT_PROBE_DEFINE1(racct, , racct, create,
     "struct racct *");
-SDT_PROBE_DEFINE2(racct, kernel, racct, join_failure, join-failure,
+SDT_PROBE_DEFINE1(racct, , racct, destroy,
+    "struct racct *");
+SDT_PROBE_DEFINE2(racct, , racct, join,
     "struct racct *", "struct racct *");
-SDT_PROBE_DEFINE2(racct, kernel, racct, leave, leave, "struct racct *",
-    "struct racct *");
+SDT_PROBE_DEFINE2(racct, , racct, join__failure,
+    "struct racct *", "struct racct *");
+SDT_PROBE_DEFINE2(racct, , racct, leave,
+    "struct racct *", "struct racct *");
 
 int racct_types[] = {
 	[RACCT_CPU] =
@@ -314,6 +325,8 @@
 	fixpt_t p_pctcpu;
 	struct thread *td;
 
+	ASSERT_RACCT_ENABLED();
+
 	/*
 	 * If the process is swapped out, we count its %cpu usage as zero.
 	 * This behaviour is consistent with the userland ps(1) tool.
@@ -378,6 +391,7 @@
 {
 	int i;
 
+	ASSERT_RACCT_ENABLED();
 	mtx_assert(&racct_lock, MA_OWNED);
 
 	/*
@@ -399,6 +413,7 @@
 {
 	int i;
 
+	ASSERT_RACCT_ENABLED();
 	mtx_assert(&racct_lock, MA_OWNED);
 
 	/*
@@ -432,8 +447,11 @@
 racct_create(struct racct **racctp)
 {
 
-	SDT_PROBE(racct, kernel, racct, create, racctp, 0, 0, 0, 0);
+	if (!racct_enable)
+		return;
 
+	SDT_PROBE1(racct, , racct, create, racctp);
+
 	KASSERT(*racctp == NULL, ("racct already allocated"));
 
 	*racctp = uma_zalloc(racct_zone, M_WAITOK | M_ZERO);
@@ -445,8 +463,10 @@
 	int i;
 	struct racct *racct;
 
-	SDT_PROBE(racct, kernel, racct, destroy, racctp, 0, 0, 0, 0);
+	ASSERT_RACCT_ENABLED();
 
+	SDT_PROBE1(racct, , racct, destroy, racctp);
+
 	mtx_assert(&racct_lock, MA_OWNED);
 	KASSERT(racctp != NULL, ("NULL racctp"));
 	KASSERT(*racctp != NULL, ("NULL racct"));
@@ -471,6 +491,9 @@
 racct_destroy(struct racct **racct)
 {
 
+	if (!racct_enable)
+		return;
+
 	mtx_lock(&racct_lock);
 	racct_destroy_locked(racct);
 	mtx_unlock(&racct_lock);
@@ -482,10 +505,11 @@
  * may be less than zero.
  */
 static void
-racct_alloc_resource(struct racct *racct, int resource,
+racct_adjust_resource(struct racct *racct, int resource,
     uint64_t amount)
 {
 
+	ASSERT_RACCT_ENABLED();
 	mtx_assert(&racct_lock, MA_OWNED);
 	KASSERT(racct != NULL, ("NULL racct"));
 
@@ -498,16 +522,16 @@
 	
 	/*
 	 * There are some cases where the racct %cpu resource would grow
-	 * beyond 100%.
-	 * For example in racct_proc_exit() we add the process %cpu usage
-	 * to the ucred racct containers.  If too many processes terminated
-	 * in a short time span, the ucred %cpu resource could grow too much.
-	 * Also, the 4BSD scheduler sometimes returns for a thread more than
-	 * 100% cpu usage.  So we set a boundary here to 100%.
+	 * beyond 100% per core.  For example in racct_proc_exit() we add
+	 * the process %cpu usage to the ucred racct containers.  If too
+	 * many processes terminated in a short time span, the ucred %cpu
+	 * resource could grow too much.  Also, the 4BSD scheduler sometimes
+	 * returns for a thread more than 100% cpu usage. So we set a sane
+	 * boundary here to 100% * the maxumum number of CPUs.
 	 */
 	if ((resource == RACCT_PCTCPU) &&
-	    (racct->r_resources[RACCT_PCTCPU] > 100 * 1000000))
-		racct->r_resources[RACCT_PCTCPU] = 100 * 1000000;
+	    (racct->r_resources[RACCT_PCTCPU] > 100 * 1000000 * (int64_t)MAXCPU))
+		racct->r_resources[RACCT_PCTCPU] = 100 * 1000000 * (int64_t)MAXCPU;
 }
 
 static int
@@ -517,8 +541,10 @@
 	int error;
 #endif
 
-	SDT_PROBE(racct, kernel, rusage, add, p, resource, amount, 0, 0);
+	ASSERT_RACCT_ENABLED();
 
+	SDT_PROBE3(racct, , rusage, add, p, resource, amount);
+
 	/*
 	 * We need proc lock to dereference p->p_ucred.
 	 */
@@ -527,12 +553,11 @@
 #ifdef RCTL
 	error = rctl_enforce(p, resource, amount);
 	if (error && RACCT_IS_DENIABLE(resource)) {
-		SDT_PROBE(racct, kernel, rusage, add_failure, p, resource,
-		    amount, 0, 0);
+		SDT_PROBE3(racct, , rusage, add__failure, p, resource, amount);
 		return (error);
 	}
 #endif
-	racct_alloc_resource(p->p_racct, resource, amount);
+	racct_adjust_resource(p->p_racct, resource, amount);
 	racct_add_cred_locked(p->p_ucred, resource, amount);
 
 	return (0);
@@ -547,6 +572,9 @@
 {
 	int error;
 
+	if (!racct_enable)
+		return (0);
+
 	mtx_lock(&racct_lock);
 	error = racct_add_locked(p, resource, amount);
 	mtx_unlock(&racct_lock);
@@ -558,14 +586,15 @@
 {
 	struct prison *pr;
 
-	SDT_PROBE(racct, kernel, rusage, add_cred, cred, resource, amount,
-	    0, 0);
+	ASSERT_RACCT_ENABLED();
 
-	racct_alloc_resource(cred->cr_ruidinfo->ui_racct, resource, amount);
+	SDT_PROBE3(racct, , rusage, add__cred, cred, resource, amount);
+
+	racct_adjust_resource(cred->cr_ruidinfo->ui_racct, resource, amount);
 	for (pr = cred->cr_prison; pr != NULL; pr = pr->pr_parent)
-		racct_alloc_resource(pr->pr_prison_racct->prr_racct, resource,
+		racct_adjust_resource(pr->pr_prison_racct->prr_racct, resource,
 		    amount);
-	racct_alloc_resource(cred->cr_loginclass->lc_racct, resource, amount);
+	racct_adjust_resource(cred->cr_loginclass->lc_racct, resource, amount);
 }
 
 /*
@@ -578,6 +607,9 @@
 racct_add_cred(struct ucred *cred, int resource, uint64_t amount)
 {
 
+	if (!racct_enable)
+		return;
+
 	mtx_lock(&racct_lock);
 	racct_add_cred_locked(cred, resource, amount);
 	mtx_unlock(&racct_lock);
@@ -591,8 +623,11 @@
 racct_add_force(struct proc *p, int resource, uint64_t amount)
 {
 
-	SDT_PROBE(racct, kernel, rusage, add_force, p, resource, amount, 0, 0);
+	if (!racct_enable)
+		return;
 
+	SDT_PROBE3(racct, , rusage, add__force, p, resource, amount);
+
 	/*
 	 * We need proc lock to dereference p->p_ucred.
 	 */
@@ -599,7 +634,7 @@
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 
 	mtx_lock(&racct_lock);
-	racct_alloc_resource(p->p_racct, resource, amount);
+	racct_adjust_resource(p->p_racct, resource, amount);
 	mtx_unlock(&racct_lock);
 	racct_add_cred(p->p_ucred, resource, amount);
 }
@@ -613,8 +648,10 @@
 	int error;
 #endif
 
-	SDT_PROBE(racct, kernel, rusage, set, p, resource, amount, 0, 0);
+	ASSERT_RACCT_ENABLED();
 
+	SDT_PROBE3(racct, , rusage, set, p, resource, amount);
+
 	/*
 	 * We need proc lock to dereference p->p_ucred.
 	 */
@@ -645,13 +682,13 @@
 	if (diff_proc > 0) {
 		error = rctl_enforce(p, resource, diff_proc);
 		if (error && RACCT_IS_DENIABLE(resource)) {
-			SDT_PROBE(racct, kernel, rusage, set_failure, p,
-			    resource, amount, 0, 0);
+			SDT_PROBE3(racct, , rusage, set__failure, p, resource,
+			    amount);
 			return (error);
 		}
 	}
 #endif
-	racct_alloc_resource(p->p_racct, resource, diff_proc);
+	racct_adjust_resource(p->p_racct, resource, diff_proc);
 	if (diff_cred > 0)
 		racct_add_cred_locked(p->p_ucred, resource, diff_cred);
 	else if (diff_cred < 0)
@@ -672,6 +709,9 @@
 {
 	int error;
 
+	if (!racct_enable)
+		return (0);
+
 	mtx_lock(&racct_lock);
 	error = racct_set_locked(p, resource, amount);
 	mtx_unlock(&racct_lock);
@@ -684,8 +724,10 @@
 	int64_t old_amount, decayed_amount;
 	int64_t diff_proc, diff_cred;
 
-	SDT_PROBE(racct, kernel, rusage, set, p, resource, amount, 0, 0);
+	ASSERT_RACCT_ENABLED();
 
+	SDT_PROBE3(racct, , rusage, set, p, resource, amount);
+
 	/*
 	 * We need proc lock to dereference p->p_ucred.
 	 */
@@ -708,7 +750,7 @@
 	} else
 		diff_cred = diff_proc;
 
-	racct_alloc_resource(p->p_racct, resource, diff_proc);
+	racct_adjust_resource(p->p_racct, resource, diff_proc);
 	if (diff_cred > 0)
 		racct_add_cred_locked(p->p_ucred, resource, diff_cred);
 	else if (diff_cred < 0)
@@ -718,6 +760,10 @@
 void
 racct_set_force(struct proc *p, int resource, uint64_t amount)
 {
+
+	if (!racct_enable)
+		return;
+
 	mtx_lock(&racct_lock);
 	racct_set_force_locked(p, resource, amount);
 	mtx_unlock(&racct_lock);
@@ -733,6 +779,9 @@
 racct_get_limit(struct proc *p, int resource)
 {
 
+	if (!racct_enable)
+		return (UINT64_MAX);
+
 #ifdef RCTL
 	return (rctl_get_limit(p, resource));
 #else
@@ -750,6 +799,9 @@
 racct_get_available(struct proc *p, int resource)
 {
 
+	if (!racct_enable)
+		return (UINT64_MAX);
+
 #ifdef RCTL
 	return (rctl_get_available(p, resource));
 #else
@@ -766,6 +818,8 @@
 racct_pcpu_available(struct proc *p)
 {
 
+	ASSERT_RACCT_ENABLED();
+
 #ifdef RCTL
 	return (rctl_pcpu_available(p));
 #else
@@ -780,8 +834,11 @@
 racct_sub(struct proc *p, int resource, uint64_t amount)
 {
 
-	SDT_PROBE(racct, kernel, rusage, sub, p, resource, amount, 0, 0);
+	if (!racct_enable)
+		return;
 
+	SDT_PROBE3(racct, , rusage, sub, p, resource, amount);
+
 	/*
 	 * We need proc lock to dereference p->p_ucred.
 	 */
@@ -795,7 +852,7 @@
 	     "than allocated %jd for %s (pid %d)", __func__, amount, resource,
 	    (intmax_t)p->p_racct->r_resources[resource], p->p_comm, p->p_pid));
 
-	racct_alloc_resource(p->p_racct, resource, -amount);
+	racct_adjust_resource(p->p_racct, resource, -amount);
 	racct_sub_cred_locked(p->p_ucred, resource, amount);
 	mtx_unlock(&racct_lock);
 }
@@ -805,9 +862,10 @@
 {
 	struct prison *pr;
 
-	SDT_PROBE(racct, kernel, rusage, sub_cred, cred, resource, amount,
-	    0, 0);
+	ASSERT_RACCT_ENABLED();
 
+	SDT_PROBE3(racct, , rusage, sub__cred, cred, resource, amount);
+
 #ifdef notyet
 	KASSERT(RACCT_CAN_DROP(resource),
 	    ("%s: called for resource %d which can not drop", __func__,
@@ -814,11 +872,11 @@
 	     resource));
 #endif
 
-	racct_alloc_resource(cred->cr_ruidinfo->ui_racct, resource, -amount);
+	racct_adjust_resource(cred->cr_ruidinfo->ui_racct, resource, -amount);
 	for (pr = cred->cr_prison; pr != NULL; pr = pr->pr_parent)
-		racct_alloc_resource(pr->pr_prison_racct->prr_racct, resource,
+		racct_adjust_resource(pr->pr_prison_racct->prr_racct, resource,
 		    -amount);
-	racct_alloc_resource(cred->cr_loginclass->lc_racct, resource, -amount);
+	racct_adjust_resource(cred->cr_loginclass->lc_racct, resource, -amount);
 }
 
 /*
@@ -828,6 +886,9 @@
 racct_sub_cred(struct ucred *cred, int resource, uint64_t amount)
 {
 
+	if (!racct_enable)
+		return;
+
 	mtx_lock(&racct_lock);
 	racct_sub_cred_locked(cred, resource, amount);
 	mtx_unlock(&racct_lock);
@@ -841,6 +902,9 @@
 {
 	int i, error = 0;
 
+	if (!racct_enable)
+		return (0);
+
 	/*
 	 * Create racct for the child process.
 	 */
@@ -897,6 +961,9 @@
 {
 
 #ifdef RCTL
+	if (!racct_enable)
+		return;
+
 	PROC_LOCK(child);
 	mtx_lock(&racct_lock);
 	rctl_enforce(child, RACCT_NPROC, 0);
@@ -914,6 +981,9 @@
 	struct timeval wallclock;
 	uint64_t pct_estimate, pct;
 
+	if (!racct_enable)
+		return;
+
 	PROC_LOCK(p);
 	/*
 	 * We don't need to calculate rux, proc_reap() has already done this.
@@ -939,10 +1009,13 @@
 	racct_set_locked(p, RACCT_CPU, runtime);
 	racct_add_cred_locked(p->p_ucred, RACCT_PCTCPU, pct);
 
+	KASSERT(p->p_racct->r_resources[RACCT_RSS] == 0,
+	    ("process reaped with %ju allocated for RSS\n",
+	    p->p_racct->r_resources[RACCT_RSS]));
 	for (i = 0; i <= RACCT_MAX; i++) {
 		if (p->p_racct->r_resources[i] == 0)
 			continue;
-	    	if (!RACCT_IS_RECLAIMABLE(i))
+		if (!RACCT_IS_RECLAIMABLE(i))
 			continue;
 		racct_set_locked(p, i, 0);
 	}
@@ -968,6 +1041,9 @@
 	struct loginclass *oldlc, *newlc;
 	struct prison *oldpr, *newpr, *pr;
 
+	if (!racct_enable)
+		return;
+
 	PROC_LOCK_ASSERT(p, MA_NOTOWNED);
 
 	newuip = newcred->cr_ruidinfo;
@@ -1005,6 +1081,8 @@
 racct_move(struct racct *dest, struct racct *src)
 {
 
+	ASSERT_RACCT_ENABLED();
+
 	mtx_lock(&racct_lock);
 
 	racct_add_racct(dest, src);
@@ -1021,6 +1099,7 @@
 	int cpuid;
 #endif
 
+	ASSERT_RACCT_ENABLED();
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 
 	/*
@@ -1066,6 +1145,9 @@
 static void
 racct_proc_wakeup(struct proc *p)
 {
+
+	ASSERT_RACCT_ENABLED();
+
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 
 	if (p->p_throttled) {
@@ -1080,6 +1162,8 @@
 	int resource;
 	int64_t r_old, r_new;
 
+	ASSERT_RACCT_ENABLED();
+
 	resource = *(int *)res;
 	r_old = racct->r_resources[resource];
 
@@ -1096,6 +1180,9 @@
 static void
 racct_decay(int resource)
 {
+
+	ASSERT_RACCT_ENABLED();
+
 	ui_racct_foreach(racct_decay_resource, &resource, NULL);
 	loginclass_racct_foreach(racct_decay_resource, &resource, NULL);
 	prison_racct_foreach(racct_decay_resource, &resource, NULL);
@@ -1110,6 +1197,8 @@
 	uint64_t runtime;
 	uint64_t pct, pct_estimate;
 
+	ASSERT_RACCT_ENABLED();
+
 	for (;;) {
 		racct_decay(RACCT_PCTCPU);
 
@@ -1130,11 +1219,11 @@
 
 			microuptime(&wallclock);
 			timevalsub(&wallclock, &p->p_stats->p_start);
-			PROC_SLOCK(p);
+			PROC_STATLOCK(p);
 			FOREACH_THREAD_IN_PROC(p, td)
 				ruxagg(p, td);
 			runtime = cputick2usec(p->p_rux.rux_runtime);
-			PROC_SUNLOCK(p);
+			PROC_STATUNLOCK(p);
 #ifdef notyet
 			KASSERT(runtime >= p->p_prev_runtime,
 			    ("runtime < p_prev_runtime"));
@@ -1189,11 +1278,22 @@
 	racctd,
 	NULL
 };
-SYSINIT(racctd, SI_SUB_RACCTD, SI_ORDER_FIRST, kproc_start, &racctd_kp);
 
 static void
+racctd_init(void)
+{
+	if (!racct_enable)
+		return;
+
+	kproc_start(&racctd_kp);
+}
+SYSINIT(racctd, SI_SUB_RACCTD, SI_ORDER_FIRST, racctd_init, NULL);
+
+static void
 racct_init(void)
 {
+	if (!racct_enable)
+		return;
 
 	racct_zone = uma_zcreate("racct", sizeof(struct racct),
 	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);

Modified: trunk/sys/kern/kern_rctl.c
===================================================================
--- trunk/sys/kern/kern_rctl.c	2018-05-25 20:55:47 UTC (rev 9947)
+++ trunk/sys/kern/kern_rctl.c	2018-05-25 20:58:03 UTC (rev 9948)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * Copyright (c) 2010 The FreeBSD Foundation
  * All rights reserved.
@@ -26,11 +27,11 @@
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
- * $MidnightBSD$
+ * $FreeBSD: stable/10/sys/kern/kern_rctl.c 302234 2016-06-27 21:50:30Z bdrewery $
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$MidnightBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/kern/kern_rctl.c 302234 2016-06-27 21:50:30Z bdrewery $");
 
 #include <sys/param.h>
 #include <sys/bus.h>
@@ -225,6 +226,7 @@
 	int64_t available = INT64_MAX;
 	struct ucred *cred = p->p_ucred;
 
+	ASSERT_RACCT_ENABLED();
 	rw_assert(&rctl_lock, RA_LOCKED);
 
 	resource = rule->rr_resource;
@@ -264,6 +266,8 @@
 {
 	int64_t available;
 
+	ASSERT_RACCT_ENABLED();
+
 	rw_assert(&rctl_lock, RA_LOCKED);
 
 	available = rctl_available_resource(p, rule);
@@ -283,6 +287,8 @@
 	struct rctl_rule_link *link;
 	int64_t available, minavailable, limit;
 
+	ASSERT_RACCT_ENABLED();
+
 	minavailable = INT64_MAX;
 	limit = 0;
 
@@ -305,7 +311,7 @@
 
 	/*
 	 * Return slightly less than actual value of the available
-	 * %cpu resource.  This makes %cpu throttling more agressive
+	 * %cpu resource.  This makes %cpu throttling more aggressive
 	 * and lets us act sooner than the limits are already exceeded.
 	 */
 	if (limit != 0) {
@@ -334,6 +340,8 @@
 	static int curtime = 0;
 	static struct timeval lasttime;
 
+	ASSERT_RACCT_ENABLED();
+
 	rw_rlock(&rctl_lock);
 
 	/*
@@ -457,6 +465,8 @@
 	struct rctl_rule_link *link;
 	uint64_t amount = UINT64_MAX;
 
+	ASSERT_RACCT_ENABLED();
+
 	rw_rlock(&rctl_lock);
 
 	/*
@@ -487,6 +497,8 @@
 
 	minavailable = INT64_MAX;
 
+	ASSERT_RACCT_ENABLED();
+
 	rw_rlock(&rctl_lock);
 
 	/*
@@ -521,6 +533,8 @@
 rctl_rule_matches(const struct rctl_rule *rule, const struct rctl_rule *filter)
 {
 
+	ASSERT_RACCT_ENABLED();
+
 	if (filter->rr_subject_type != RCTL_SUBJECT_TYPE_UNDEFINED) {
 		if (rule->rr_subject_type != filter->rr_subject_type)
 			return (0);
@@ -635,6 +649,7 @@
 {
 	struct rctl_rule_link *link;
 
+	ASSERT_RACCT_ENABLED();
 	KASSERT(rctl_rule_fully_specified(rule), ("rule not fully specified"));
 
 	rctl_rule_acquire(rule);
@@ -652,6 +667,7 @@
 {
 	struct rctl_rule_link *link;
 
+	ASSERT_RACCT_ENABLED();
 	KASSERT(rctl_rule_fully_specified(rule), ("rule not fully specified"));
 	rw_assert(&rctl_lock, RA_WLOCKED);
 
@@ -678,6 +694,7 @@
 	int removed = 0;
 	struct rctl_rule_link *link, *linktmp;
 
+	ASSERT_RACCT_ENABLED();
 	rw_assert(&rctl_lock, RA_WLOCKED);
 
 	LIST_FOREACH_SAFE(link, &racct->r_rule_links, rrl_next, linktmp) {
@@ -696,6 +713,8 @@
 rctl_rule_acquire_subject(struct rctl_rule *rule)
 {
 
+	ASSERT_RACCT_ENABLED();
+
 	switch (rule->rr_subject_type) {
 	case RCTL_SUBJECT_TYPE_UNDEFINED:
 	case RCTL_SUBJECT_TYPE_PROCESS:
@@ -722,6 +741,8 @@
 rctl_rule_release_subject(struct rctl_rule *rule)
 {
 
+	ASSERT_RACCT_ENABLED();
+
 	switch (rule->rr_subject_type) {
 	case RCTL_SUBJECT_TYPE_UNDEFINED:
 	case RCTL_SUBJECT_TYPE_PROCESS:
@@ -749,6 +770,8 @@
 {
 	struct rctl_rule *rule;
 
+	ASSERT_RACCT_ENABLED();
+
 	rule = uma_zalloc(rctl_rule_zone, flags);
 	if (rule == NULL)
 		return (NULL);
@@ -771,6 +794,8 @@
 {
 	struct rctl_rule *copy;
 
+	ASSERT_RACCT_ENABLED();
+
 	copy = uma_zalloc(rctl_rule_zone, flags);
 	if (copy == NULL)
 		return (NULL);
@@ -793,6 +818,7 @@
 rctl_rule_acquire(struct rctl_rule *rule)
 {
 
+	ASSERT_RACCT_ENABLED();
 	KASSERT(rule->rr_refcount > 0, ("rule->rr_refcount <= 0"));
 
 	refcount_acquire(&rule->rr_refcount);
@@ -805,6 +831,7 @@
 	
 	rule = (struct rctl_rule *)context;
 
+	ASSERT_RACCT_ENABLED();
 	KASSERT(rule->rr_refcount == 0, ("rule->rr_refcount != 0"));
 	
 	/*
@@ -819,6 +846,7 @@
 rctl_rule_release(struct rctl_rule *rule)
 {
 
+	ASSERT_RACCT_ENABLED();
 	KASSERT(rule->rr_refcount > 0, ("rule->rr_refcount <= 0"));
 
 	if (refcount_release(&rule->rr_refcount)) {
@@ -838,6 +866,8 @@
 rctl_rule_fully_specified(const struct rctl_rule *rule)
 {
 
+	ASSERT_RACCT_ENABLED();
+
 	switch (rule->rr_subject_type) {
 	case RCTL_SUBJECT_TYPE_UNDEFINED:
 		return (0);
@@ -882,6 +912,8 @@
 	struct rctl_rule *rule;
 	id_t id;
 
+	ASSERT_RACCT_ENABLED();
+
 	rule = rctl_rule_alloc(M_WAITOK);
 
 	subjectstr = strsep(&rulestr, ":");
@@ -1008,6 +1040,7 @@
 	struct rctl_rule *rule2;
 	int match;
 
+	ASSERT_RACCT_ENABLED();
 	KASSERT(rctl_rule_fully_specified(rule), ("rule not fully specified"));
 
 	/*
@@ -1118,6 +1151,8 @@
 	struct rctl_rule *filter = (struct rctl_rule *)arg2;
 	int found = 0;
 
+	ASSERT_RACCT_ENABLED();
+
 	rw_wlock(&rctl_lock);
 	found += rctl_racct_remove_rules(racct, filter);
 	rw_wunlock(&rctl_lock);
@@ -1134,6 +1169,8 @@
 	int found = 0;
 	struct proc *p;
 
+	ASSERT_RACCT_ENABLED();
+
 	if (filter->rr_subject_type == RCTL_SUBJECT_TYPE_PROCESS &&
 	    filter->rr_subject.rs_proc != NULL) {
 		p = filter->rr_subject.rs_proc;
@@ -1172,6 +1209,8 @@
 {
 	int64_t amount;
 
+	ASSERT_RACCT_ENABLED();
+
 	sbuf_printf(sb, "%s:", rctl_subject_type_name(rule->rr_subject_type));
 
 	switch (rule->rr_subject_type) {
@@ -1231,6 +1270,8 @@
 	int error;
 	char *str;
 
+	ASSERT_RACCT_ENABLED();
+
 	if (inbuflen <= 0)
 		return (EINVAL);
 	if (inbuflen > RCTL_MAX_INBUFLEN)
@@ -1256,6 +1297,8 @@
 {
 	int error;
 
+	ASSERT_RACCT_ENABLED();
+
 	if (outputsbuf == NULL)
 		return (0);
 
@@ -1277,6 +1320,8 @@
 	int64_t amount;
 	struct sbuf *sb;
 
+	ASSERT_RACCT_ENABLED();
+
 	sb = sbuf_new_auto();
 	for (i = 0; i <= RACCT_MAX; i++) {
 		if (sloppy == 0 && RACCT_IS_SLOPPY(i))
@@ -1302,6 +1347,9 @@
 	struct loginclass *lc;
 	struct prison_racct *prr;
 
+	if (!racct_enable)
+		return (ENOSYS);
+
 	error = priv_check(td, PRIV_RCTL_GET_RACCT);
 	if (error != 0)
 		return (error);
@@ -1372,6 +1420,8 @@
 	struct rctl_rule_link *link;
 	struct sbuf *sb = (struct sbuf *)arg3;
 
+	ASSERT_RACCT_ENABLED();
+
 	rw_rlock(&rctl_lock);
 	LIST_FOREACH(link, &racct->r_rule_links, rrl_next) {
 		if (!rctl_rule_matches(link->rrl_rule, filter))
@@ -1393,6 +1443,9 @@
 	struct rctl_rule_link *link;
 	struct proc *p;
 
+	if (!racct_enable)
+		return (ENOSYS);
+
 	error = priv_check(td, PRIV_RCTL_GET_RULES);
 	if (error != 0)
 		return (error);
@@ -1467,6 +1520,9 @@
 	struct rctl_rule *filter;
 	struct rctl_rule_link *link;
 
+	if (!racct_enable)
+		return (ENOSYS);
+
 	error = priv_check(td, PRIV_RCTL_GET_LIMITS);
 	if (error != 0)
 		return (error);
@@ -1538,6 +1594,9 @@
 	struct rctl_rule *rule;
 	char *inputstr;
 
+	if (!racct_enable)
+		return (ENOSYS);
+
 	error = priv_check(td, PRIV_RCTL_ADD_RULE);
 	if (error != 0)
 		return (error);
@@ -1580,6 +1639,9 @@
 	struct rctl_rule *filter;
 	char *inputstr;
 
+	if (!racct_enable)
+		return (ENOSYS);
+
 	error = priv_check(td, PRIV_RCTL_REMOVE_RULE);
 	if (error != 0)
 		return (error);
@@ -1616,6 +1678,8 @@
 	struct prison_racct *newprr;
 	LIST_HEAD(, rctl_rule_link) newrules;
 
+	ASSERT_RACCT_ENABLED();
+
 	newuip = newcred->cr_ruidinfo;
 	newlc = newcred->cr_loginclass;
 	newprr = newcred->cr_prison->pr_prison_racct;
@@ -1756,6 +1820,7 @@
 
 	LIST_INIT(&child->p_racct->r_rule_links);
 
+	ASSERT_RACCT_ENABLED();
 	KASSERT(parent->p_racct != NULL, ("process without racct; p = %p", parent));
 
 	rw_wlock(&rctl_lock);
@@ -1809,6 +1874,8 @@
 {
 	struct rctl_rule_link *link;
 
+	ASSERT_RACCT_ENABLED();
+
 	rw_wlock(&rctl_lock);
 	while (!LIST_EMPTY(&racct->r_rule_links)) {
 		link = LIST_FIRST(&racct->r_rule_links);
@@ -1823,6 +1890,9 @@
 rctl_init(void)
 {
 
+	if (!racct_enable)
+		return;
+
 	rctl_rule_link_zone = uma_zcreate("rctl_rule_link",
 	    sizeof(struct rctl_rule_link), NULL, NULL, NULL, NULL,
 	    UMA_ALIGN_PTR, UMA_ZONE_NOFREE);

Modified: trunk/sys/kern/kern_rmlock.c
===================================================================
--- trunk/sys/kern/kern_rmlock.c	2018-05-25 20:55:47 UTC (rev 9947)
+++ trunk/sys/kern/kern_rmlock.c	2018-05-25 20:58:03 UTC (rev 9948)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * Copyright (c) 2007 Stephan Uphoff <ups at FreeBSD.org>
  * All rights reserved.
@@ -32,7 +33,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$MidnightBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/kern/kern_rmlock.c 323870 2017-09-21 19:24:11Z marius $");
 
 #include "opt_ddb.h"
 #include "opt_kdtrace.h"
@@ -41,6 +42,7 @@
 #include <sys/systm.h>
 
 #include <sys/kernel.h>
+#include <sys/kdb.h>
 #include <sys/ktr.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
@@ -56,35 +58,53 @@
 #include <ddb/ddb.h>
 #endif
 
+/*
+ * A cookie to mark destroyed rmlocks.  This is stored in the head of
+ * rm_activeReaders.
+ */
+#define	RM_DESTROYED	((void *)0xdead)
+
+#define	rm_destroyed(rm)						\
+	(LIST_FIRST(&(rm)->rm_activeReaders) == RM_DESTROYED)
+
 #define RMPF_ONQUEUE	1
 #define RMPF_SIGNAL	2
 
-/*
- * To support usage of rmlock in CVs and msleep yet another list for the
- * priority tracker would be needed.  Using this lock for cv and msleep also
- * does not seem very useful
- */
+#ifndef INVARIANTS
+#define	_rm_assert(c, what, file, line)
+#endif
 
-static __inline void compiler_memory_barrier(void) {
-	__asm __volatile("":::"memory");
-}
-
-static void	assert_rm(struct lock_object *lock, int what);
-static void	lock_rm(struct lock_object *lock, int how);
+static void	assert_rm(const struct lock_object *lock, int what);
+#ifdef DDB
+static void	db_show_rm(const struct lock_object *lock);
+#endif
+static void	lock_rm(struct lock_object *lock, uintptr_t how);
 #ifdef KDTRACE_HOOKS
-static int	owner_rm(struct lock_object *lock, struct thread **owner);
+static int	owner_rm(const struct lock_object *lock, struct thread **owner);
 #endif
-static int	unlock_rm(struct lock_object *lock);
+static uintptr_t unlock_rm(struct lock_object *lock);
 
 struct lock_class lock_class_rm = {
 	.lc_name = "rm",
 	.lc_flags = LC_SLEEPLOCK | LC_RECURSABLE,
 	.lc_assert = assert_rm,
-#if 0
 #ifdef DDB
-	.lc_ddb_show = db_show_rwlock,
+	.lc_ddb_show = db_show_rm,
 #endif
+	.lc_lock = lock_rm,
+	.lc_unlock = unlock_rm,
+#ifdef KDTRACE_HOOKS
+	.lc_owner = owner_rm,
 #endif
+};
+
+struct lock_class lock_class_rm_sleepable = {
+	.lc_name = "sleepable rm",
+	.lc_flags = LC_SLEEPLOCK | LC_SLEEPABLE | LC_RECURSABLE,
+	.lc_assert = assert_rm,
+#ifdef DDB
+	.lc_ddb_show = db_show_rm,
+#endif
 	.lc_lock = lock_rm,
 	.lc_unlock = unlock_rm,
 #ifdef KDTRACE_HOOKS
@@ -93,32 +113,79 @@
 };
 
 static void
-assert_rm(struct lock_object *lock, int what)
+assert_rm(const struct lock_object *lock, int what)
 {
 
-	panic("assert_rm called");
+	rm_assert((const struct rmlock *)lock, what);
 }
 
 static void
-lock_rm(struct lock_object *lock, int how)
+lock_rm(struct lock_object *lock, uintptr_t how)
 {
+	struct rmlock *rm;
+	struct rm_priotracker *tracker;
 
-	panic("lock_rm called");
+	rm = (struct rmlock *)lock;
+	if (how == 0)
+		rm_wlock(rm);
+	else {
+		tracker = (struct rm_priotracker *)how;
+		rm_rlock(rm, tracker);
+	}
 }
 
-static int
+static uintptr_t
 unlock_rm(struct lock_object *lock)
 {
+	struct thread *td;
+	struct pcpu *pc;
+	struct rmlock *rm;
+	struct rm_queue *queue;
+	struct rm_priotracker *tracker;
+	uintptr_t how;
 
-	panic("unlock_rm called");
+	rm = (struct rmlock *)lock;
+	tracker = NULL;
+	how = 0;
+	rm_assert(rm, RA_LOCKED | RA_NOTRECURSED);
+	if (rm_wowned(rm))
+		rm_wunlock(rm);
+	else {
+		/*
+		 * Find the right rm_priotracker structure for curthread.
+		 * The guarantee about its uniqueness is given by the fact
+		 * we already asserted the lock wasn't recursively acquired.
+		 */
+		critical_enter();
+		td = curthread;
+		pc = pcpu_find(curcpu);
+		for (queue = pc->pc_rm_queue.rmq_next;
+		    queue != &pc->pc_rm_queue; queue = queue->rmq_next) {
+			tracker = (struct rm_priotracker *)queue;
+				if ((tracker->rmp_rmlock == rm) &&
+				    (tracker->rmp_thread == td)) {
+					how = (uintptr_t)tracker;
+					break;
+				}
+		}
+		KASSERT(tracker != NULL,
+		    ("rm_priotracker is non-NULL when lock held in read mode"));
+		critical_exit();
+		rm_runlock(rm, tracker);
+	}
+	return (how);
 }
 
 #ifdef KDTRACE_HOOKS
 static int
-owner_rm(struct lock_object *lock, struct thread **owner)
+owner_rm(const struct lock_object *lock, struct thread **owner)
 {
+	const struct rmlock *rm;
+	struct lock_class *lc;
 
-	panic("owner_rm called");
+	rm = (const struct rmlock *)lock;
+	lc = LOCK_CLASS(&rm->rm_wlock_object);
+	return (lc->lc_owner(&rm->rm_wlock_object, owner));
 }
 #endif
 
@@ -149,6 +216,28 @@
 	pc->pc_rm_queue.rmq_next = &tracker->rmp_cpuQueue;
 }
 
+/*
+ * Return a count of the number of trackers the thread 'td' already
+ * has on this CPU for the lock 'rm'.
+ */
+static int
+rm_trackers_present(const struct pcpu *pc, const struct rmlock *rm,
+    const struct thread *td)
+{
+	struct rm_queue *queue;
+	struct rm_priotracker *tracker;
+	int count;
+
+	count = 0;
+	for (queue = pc->pc_rm_queue.rmq_next; queue != &pc->pc_rm_queue;
+	    queue = queue->rmq_next) {
+		tracker = (struct rm_priotracker *)queue;
+		if ((tracker->rmp_rmlock == rm) && (tracker->rmp_thread == td))
+			count++;
+	}
+	return (count);
+}
+
 static void inline
 rm_tracker_remove(struct pcpu *pc, struct rm_priotracker *tracker)
 {
@@ -186,12 +275,11 @@
 	}
 }
 
-CTASSERT((RM_SLEEPABLE & LO_CLASSFLAGS) == RM_SLEEPABLE);
-
 void
 rm_init_flags(struct rmlock *rm, const char *name, int opts)
 {
-	int liflags;
+	struct lock_class *lc;
+	int liflags, xflags;
 
 	liflags = 0;
 	if (!(opts & RM_NOWITNESS))
@@ -198,14 +286,23 @@
 		liflags |= LO_WITNESS;
 	if (opts & RM_RECURSE)
 		liflags |= LO_RECURSABLE;
+	if (opts & RM_NEW)
+		liflags |= LO_NEW;
 	rm->rm_writecpus = all_cpus;
 	LIST_INIT(&rm->rm_activeReaders);
 	if (opts & RM_SLEEPABLE) {
-		liflags |= RM_SLEEPABLE;
-		sx_init_flags(&rm->rm_lock_sx, "rmlock_sx", SX_RECURSE);
-	} else
-		mtx_init(&rm->rm_lock_mtx, name, "rmlock_mtx", MTX_NOWITNESS);
-	lock_init(&rm->lock_object, &lock_class_rm, name, NULL, liflags);
+		liflags |= LO_SLEEPABLE;
+		lc = &lock_class_rm_sleepable;
+		xflags = (opts & RM_NEW ? SX_NEW : 0);
+		sx_init_flags(&rm->rm_lock_sx, "rmlock_sx",
+		    xflags | SX_NOWITNESS);
+	} else {
+		lc = &lock_class_rm;
+		xflags = (opts & RM_NEW ? MTX_NEW : 0);
+		mtx_init(&rm->rm_lock_mtx, name, "rmlock_mtx",
+		    xflags | MTX_NOWITNESS);
+	}
+	lock_init(&rm->lock_object, lc, name, NULL, liflags);
 }
 
 void
@@ -219,7 +316,9 @@
 rm_destroy(struct rmlock *rm)
 {
 
-	if (rm->lock_object.lo_flags & RM_SLEEPABLE)
+	rm_assert(rm, RA_UNLOCKED);
+	LIST_FIRST(&rm->rm_activeReaders) = RM_DESTROYED;
+	if (rm->lock_object.lo_flags & LO_SLEEPABLE)
 		sx_destroy(&rm->rm_lock_sx);
 	else
 		mtx_destroy(&rm->rm_lock_mtx);
@@ -227,10 +326,10 @@
 }
 
 int
-rm_wowned(struct rmlock *rm)
+rm_wowned(const struct rmlock *rm)
 {
 
-	if (rm->lock_object.lo_flags & RM_SLEEPABLE)
+	if (rm->lock_object.lo_flags & LO_SLEEPABLE)
 		return (sx_xlocked(&rm->rm_lock_sx));
 	else
 		return (mtx_owned(&rm->rm_lock_mtx));
@@ -256,8 +355,6 @@
 _rm_rlock_hard(struct rmlock *rm, struct rm_priotracker *tracker, int trylock)
 {
 	struct pcpu *pc;
-	struct rm_queue *queue;
-	struct rm_priotracker *atracker;
 
 	critical_enter();
 	pc = pcpu_find(curcpu);
@@ -280,7 +377,7 @@
 	}
 
 	/*
-	 * We allow readers to aquire a lock even if a writer is blocked if
+	 * We allow readers to acquire a lock even if a writer is blocked if
 	 * the lock is recursive and the reader already holds the lock.
 	 */
 	if ((rm->lock_object.lo_flags & LO_RECURSABLE) != 0) {
@@ -288,20 +385,15 @@
 		 * Just grant the lock if this thread already has a tracker
 		 * for this lock on the per-cpu queue.
 		 */
-		for (queue = pc->pc_rm_queue.rmq_next;
-		    queue != &pc->pc_rm_queue; queue = queue->rmq_next) {
-			atracker = (struct rm_priotracker *)queue;
-			if ((atracker->rmp_rmlock == rm) &&
-			    (atracker->rmp_thread == tracker->rmp_thread)) {
-				mtx_lock_spin(&rm_spinlock);
-				LIST_INSERT_HEAD(&rm->rm_activeReaders,
-				    tracker, rmp_qentry);
-				tracker->rmp_flags = RMPF_ONQUEUE;
-				mtx_unlock_spin(&rm_spinlock);
-				rm_tracker_add(pc, tracker);
-				critical_exit();
-				return (1);
-			}
+		if (rm_trackers_present(pc, rm, curthread) != 0) {
+			mtx_lock_spin(&rm_spinlock);
+			LIST_INSERT_HEAD(&rm->rm_activeReaders, tracker,
+			    rmp_qentry);
+			tracker->rmp_flags = RMPF_ONQUEUE;
+			mtx_unlock_spin(&rm_spinlock);
+			rm_tracker_add(pc, tracker);
+			critical_exit();
+			return (1);
 		}
 	}
 
@@ -309,7 +401,7 @@
 	critical_exit();
 
 	if (trylock) {
-		if (rm->lock_object.lo_flags & RM_SLEEPABLE) {
+		if (rm->lock_object.lo_flags & LO_SLEEPABLE) {
 			if (!sx_try_xlock(&rm->rm_lock_sx))
 				return (0);
 		} else {
@@ -317,9 +409,11 @@
 				return (0);
 		}
 	} else {
-		if (rm->lock_object.lo_flags & RM_SLEEPABLE)
+		if (rm->lock_object.lo_flags & LO_SLEEPABLE) {
+			THREAD_SLEEPING_OK();
 			sx_xlock(&rm->rm_lock_sx);
-		else
+			THREAD_NO_SLEEPING();
+		} else
 			mtx_lock(&rm->rm_lock_mtx);
 	}
 
@@ -330,7 +424,7 @@
 	sched_pin();
 	critical_exit();
 
-	if (rm->lock_object.lo_flags & RM_SLEEPABLE)
+	if (rm->lock_object.lo_flags & LO_SLEEPABLE)
 		sx_xunlock(&rm->rm_lock_sx);
 	else
 		mtx_unlock(&rm->rm_lock_mtx);
@@ -351,9 +445,12 @@
 	tracker->rmp_thread = td;
 	tracker->rmp_rmlock = rm;
 
+	if (rm->lock_object.lo_flags & LO_SLEEPABLE)
+		THREAD_NO_SLEEPING();
+
 	td->td_critnest++;	/* critical_enter(); */
 
-	compiler_memory_barrier();
+	__compiler_membar();
 
 	pc = cpuid_to_pcpu[td->td_oncpu]; /* pcpu_find(td->td_oncpu); */
 
@@ -361,7 +458,7 @@
 
 	sched_pin();
 
-	compiler_memory_barrier();
+	__compiler_membar();
 
 	td->td_critnest--;
 
@@ -425,6 +522,9 @@
 	td->td_critnest--;
 	sched_unpin();
 
+	if (rm->lock_object.lo_flags & LO_SLEEPABLE)
+		THREAD_SLEEPING_OK();
+
 	if (0 == (td->td_owepreempt | tracker->rmp_flags))
 		return;
 
@@ -441,7 +541,7 @@
 	if (SCHEDULER_STOPPED())
 		return;
 
-	if (rm->lock_object.lo_flags & RM_SLEEPABLE)
+	if (rm->lock_object.lo_flags & LO_SLEEPABLE)
 		sx_xlock(&rm->rm_lock_sx);
 	else
 		mtx_lock(&rm->rm_lock_mtx);
@@ -484,20 +584,28 @@
 _rm_wunlock(struct rmlock *rm)
 {
 
-	if (rm->lock_object.lo_flags & RM_SLEEPABLE)
+	if (rm->lock_object.lo_flags & LO_SLEEPABLE)
 		sx_xunlock(&rm->rm_lock_sx);
 	else
 		mtx_unlock(&rm->rm_lock_mtx);
 }
 
-#ifdef LOCK_DEBUG
+#if LOCK_DEBUG > 0
 
-void _rm_wlock_debug(struct rmlock *rm, const char *file, int line)
+void
+_rm_wlock_debug(struct rmlock *rm, const char *file, int line)
 {
 
 	if (SCHEDULER_STOPPED())
 		return;
 
+	KASSERT(kdb_active != 0 || !TD_IS_IDLETHREAD(curthread),
+	    ("rm_wlock() by idle thread %p on rmlock %s @ %s:%d",
+	    curthread, rm->lock_object.lo_name, file, line));
+	KASSERT(!rm_destroyed(rm),
+	    ("rm_wlock() of destroyed rmlock @ %s:%d", file, line));
+	_rm_assert(rm, RA_UNLOCKED, file, line);
+
 	WITNESS_CHECKORDER(&rm->lock_object, LOP_NEWORDER | LOP_EXCLUSIVE,
 	    file, line, NULL);
 
@@ -505,11 +613,7 @@
 
 	LOCK_LOG_LOCK("RMWLOCK", &rm->lock_object, 0, 0, file, line);
 
-	if (rm->lock_object.lo_flags & RM_SLEEPABLE)
-		WITNESS_LOCK(&rm->rm_lock_sx.lock_object, LOP_EXCLUSIVE,
-		    file, line);	
-	else
-		WITNESS_LOCK(&rm->lock_object, LOP_EXCLUSIVE, file, line);
+	WITNESS_LOCK(&rm->lock_object, LOP_EXCLUSIVE, file, line);
 
 	curthread->td_locks++;
 
@@ -522,14 +626,13 @@
 	if (SCHEDULER_STOPPED())
 		return;
 
-	curthread->td_locks--;
-	if (rm->lock_object.lo_flags & RM_SLEEPABLE)
-		WITNESS_UNLOCK(&rm->rm_lock_sx.lock_object, LOP_EXCLUSIVE,
-		    file, line);
-	else
-		WITNESS_UNLOCK(&rm->lock_object, LOP_EXCLUSIVE, file, line);
+	KASSERT(!rm_destroyed(rm),
+	    ("rm_wunlock() of destroyed rmlock @ %s:%d", file, line));
+	_rm_assert(rm, RA_WLOCKED, file, line);
+	WITNESS_UNLOCK(&rm->lock_object, LOP_EXCLUSIVE, file, line);
 	LOCK_LOG_LOCK("RMWUNLOCK", &rm->lock_object, 0, 0, file, line);
 	_rm_wunlock(rm);
+	curthread->td_locks--;
 }
 
 int
@@ -540,20 +643,43 @@
 	if (SCHEDULER_STOPPED())
 		return (1);
 
-	if (!trylock && (rm->lock_object.lo_flags & RM_SLEEPABLE))
-		WITNESS_CHECKORDER(&rm->rm_lock_sx.lock_object, LOP_NEWORDER,
-		    file, line, NULL);
-	WITNESS_CHECKORDER(&rm->lock_object, LOP_NEWORDER, file, line, NULL);
+#ifdef INVARIANTS
+	if (!(rm->lock_object.lo_flags & LO_RECURSABLE) && !trylock) {
+		critical_enter();
+		KASSERT(rm_trackers_present(pcpu_find(curcpu), rm,
+		    curthread) == 0,
+		    ("rm_rlock: recursed on non-recursive rmlock %s @ %s:%d\n",
+		    rm->lock_object.lo_name, file, line));
+		critical_exit();
+	}
+#endif
+	KASSERT(kdb_active != 0 || !TD_IS_IDLETHREAD(curthread),
+	    ("rm_rlock() by idle thread %p on rmlock %s @ %s:%d",
+	    curthread, rm->lock_object.lo_name, file, line));
+	KASSERT(!rm_destroyed(rm),
+	    ("rm_rlock() of destroyed rmlock @ %s:%d", file, line));
+	if (!trylock) {
+		KASSERT(!rm_wowned(rm),
+		    ("rm_rlock: wlock already held for %s @ %s:%d",
+		    rm->lock_object.lo_name, file, line));
+		WITNESS_CHECKORDER(&rm->lock_object, LOP_NEWORDER, file, line,
+		    NULL);
+	}
 
 	if (_rm_rlock(rm, tracker, trylock)) {
-		LOCK_LOG_LOCK("RMRLOCK", &rm->lock_object, 0, 0, file, line);
-
+		if (trylock)
+			LOCK_LOG_TRY("RMRLOCK", &rm->lock_object, 0, 1, file,
+			    line);
+		else
+			LOCK_LOG_LOCK("RMRLOCK", &rm->lock_object, 0, 0, file,
+			    line);
 		WITNESS_LOCK(&rm->lock_object, 0, file, line);
 
 		curthread->td_locks++;
 
 		return (1);
-	}
+	} else if (trylock)
+		LOCK_LOG_TRY("RMRLOCK", &rm->lock_object, 0, 0, file, line);
 
 	return (0);
 }
@@ -566,10 +692,13 @@
 	if (SCHEDULER_STOPPED())
 		return;
 
-	curthread->td_locks--;
+	KASSERT(!rm_destroyed(rm),
+	    ("rm_runlock() of destroyed rmlock @ %s:%d", file, line));
+	_rm_assert(rm, RA_RLOCKED, file, line);
 	WITNESS_UNLOCK(&rm->lock_object, 0, file, line);
 	LOCK_LOG_LOCK("RMRUNLOCK", &rm->lock_object, 0, 0, file, line);
 	_rm_runlock(rm, tracker);
+	curthread->td_locks--;
 }
 
 #else
@@ -609,3 +738,130 @@
 }
 
 #endif
+
+#ifdef INVARIANT_SUPPORT
+#ifndef INVARIANTS
+#undef _rm_assert
+#endif
+
+/*
+ * Note that this does not need to use witness_assert() for read lock
+ * assertions since an exact count of read locks held by this thread
+ * is computable.
+ */
+void
+_rm_assert(const struct rmlock *rm, int what, const char *file, int line)
+{
+	int count;
+
+	if (panicstr != NULL)
+		return;
+	switch (what) {
+	case RA_LOCKED:
+	case RA_LOCKED | RA_RECURSED:
+	case RA_LOCKED | RA_NOTRECURSED:
+	case RA_RLOCKED:
+	case RA_RLOCKED | RA_RECURSED:
+	case RA_RLOCKED | RA_NOTRECURSED:
+		/*
+		 * Handle the write-locked case.  Unlike other
+		 * primitives, writers can never recurse.
+		 */
+		if (rm_wowned(rm)) {
+			if (what & RA_RLOCKED)
+				panic("Lock %s exclusively locked @ %s:%d\n",
+				    rm->lock_object.lo_name, file, line);
+			if (what & RA_RECURSED)
+				panic("Lock %s not recursed @ %s:%d\n",
+				    rm->lock_object.lo_name, file, line);
+			break;
+		}
+
+		critical_enter();
+		count = rm_trackers_present(pcpu_find(curcpu), rm, curthread);
+		critical_exit();
+
+		if (count == 0)
+			panic("Lock %s not %slocked @ %s:%d\n",
+			    rm->lock_object.lo_name, (what & RA_RLOCKED) ?
+			    "read " : "", file, line);
+		if (count > 1) {
+			if (what & RA_NOTRECURSED)
+				panic("Lock %s recursed @ %s:%d\n",
+				    rm->lock_object.lo_name, file, line);
+		} else if (what & RA_RECURSED)
+			panic("Lock %s not recursed @ %s:%d\n",
+			    rm->lock_object.lo_name, file, line);
+		break;
+	case RA_WLOCKED:
+		if (!rm_wowned(rm))
+			panic("Lock %s not exclusively locked @ %s:%d\n",
+			    rm->lock_object.lo_name, file, line);
+		break;
+	case RA_UNLOCKED:
+		if (rm_wowned(rm))
+			panic("Lock %s exclusively locked @ %s:%d\n",
+			    rm->lock_object.lo_name, file, line);
+
+		critical_enter();
+		count = rm_trackers_present(pcpu_find(curcpu), rm, curthread);
+		critical_exit();
+
+		if (count != 0)
+			panic("Lock %s read locked @ %s:%d\n",
+			    rm->lock_object.lo_name, file, line);
+		break;
+	default:
+		panic("Unknown rm lock assertion: %d @ %s:%d", what, file,
+		    line);
+	}
+}
+#endif /* INVARIANT_SUPPORT */
+
+#ifdef DDB
+static void
+print_tracker(struct rm_priotracker *tr)
+{
+	struct thread *td;
+
+	td = tr->rmp_thread;
+	db_printf("   thread %p (tid %d, pid %d, \"%s\") {", td, td->td_tid,
+	    td->td_proc->p_pid, td->td_name);
+	if (tr->rmp_flags & RMPF_ONQUEUE) {
+		db_printf("ONQUEUE");
+		if (tr->rmp_flags & RMPF_SIGNAL)
+			db_printf(",SIGNAL");
+	} else
+		db_printf("0");
+	db_printf("}\n");
+}
+
+static void
+db_show_rm(const struct lock_object *lock)
+{
+	struct rm_priotracker *tr;
+	struct rm_queue *queue;
+	const struct rmlock *rm;
+	struct lock_class *lc;
+	struct pcpu *pc;
+
+	rm = (const struct rmlock *)lock;
+	db_printf(" writecpus: ");
+	ddb_display_cpuset(__DEQUALIFY(const cpuset_t *, &rm->rm_writecpus));
+	db_printf("\n");
+	db_printf(" per-CPU readers:\n");
+	STAILQ_FOREACH(pc, &cpuhead, pc_allcpu)
+		for (queue = pc->pc_rm_queue.rmq_next;
+		    queue != &pc->pc_rm_queue; queue = queue->rmq_next) {
+			tr = (struct rm_priotracker *)queue;
+			if (tr->rmp_rmlock == rm)
+				print_tracker(tr);
+		}
+	db_printf(" active readers:\n");
+	LIST_FOREACH(tr, &rm->rm_activeReaders, rmp_qentry)
+		print_tracker(tr);
+	lc = LOCK_CLASS(&rm->rm_wlock_object);
+	db_printf("Backing write-lock (%s):\n", lc->lc_name);
+	lc->lc_ddb_show(&rm->rm_wlock_object);
+}
+#endif

Modified: trunk/sys/kern/kern_rwlock.c
===================================================================
--- trunk/sys/kern/kern_rwlock.c	2018-05-25 20:55:47 UTC (rev 9947)
+++ trunk/sys/kern/kern_rwlock.c	2018-05-25 20:58:03 UTC (rev 9948)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * Copyright (c) 2006 John Baldwin <jhb at FreeBSD.org>
  * All rights reserved.
@@ -10,9 +11,6 @@
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
- * 3. Neither the name of the author nor the names of any co-contributors
- *    may be used to endorse or promote products derived from this software
- *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
@@ -32,7 +30,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$MidnightBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/kern/kern_rwlock.c 323870 2017-09-21 19:24:11Z marius $");
 
 #include "opt_ddb.h"
 #include "opt_hwpmc_hooks.h"
@@ -40,6 +38,7 @@
 #include "opt_no_adaptive_rwlocks.h"
 
 #include <sys/param.h>
+#include <sys/kdb.h>
 #include <sys/ktr.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
@@ -46,6 +45,8 @@
 #include <sys/mutex.h>
 #include <sys/proc.h>
 #include <sys/rwlock.h>
+#include <sys/sched.h>
+#include <sys/smp.h>
 #include <sys/sysctl.h>
 #include <sys/systm.h>
 #include <sys/turnstile.h>
@@ -61,22 +62,23 @@
 PMC_SOFT_DECLARE( , , lock, failed);
 #endif
 
-#ifdef ADAPTIVE_RWLOCKS
-#define	ROWNER_RETRIES	10
-#define	ROWNER_LOOPS	10000
-#endif
+/*
+ * Return the rwlock address when the lock cookie address is provided.
+ * This functionality assumes that struct rwlock* have a member named rw_lock.
+ */
+#define	rwlock2rw(c)	(__containerof(c, struct rwlock, rw_lock))
 
 #ifdef DDB
 #include <ddb/ddb.h>
 
-static void	db_show_rwlock(struct lock_object *lock);
+static void	db_show_rwlock(const struct lock_object *lock);
 #endif
-static void	assert_rw(struct lock_object *lock, int what);
-static void	lock_rw(struct lock_object *lock, int how);
+static void	assert_rw(const struct lock_object *lock, int what);
+static void	lock_rw(struct lock_object *lock, uintptr_t how);
 #ifdef KDTRACE_HOOKS
-static int	owner_rw(struct lock_object *lock, struct thread **owner);
+static int	owner_rw(const struct lock_object *lock, struct thread **owner);
 #endif
-static int	unlock_rw(struct lock_object *lock);
+static uintptr_t unlock_rw(struct lock_object *lock);
 
 struct lock_class lock_class_rw = {
 	.lc_name = "rw",
@@ -92,6 +94,42 @@
 #endif
 };
 
+#ifdef ADAPTIVE_RWLOCKS
+static int rowner_retries = 10;
+static int rowner_loops = 10000;
+static SYSCTL_NODE(_debug, OID_AUTO, rwlock, CTLFLAG_RD, NULL,
+    "rwlock debugging");
+SYSCTL_INT(_debug_rwlock, OID_AUTO, retry, CTLFLAG_RW, &rowner_retries, 0, "");
+SYSCTL_INT(_debug_rwlock, OID_AUTO, loops, CTLFLAG_RW, &rowner_loops, 0, "");
+
+static struct lock_delay_config rw_delay = {
+	.initial	= 1000,
+	.step		= 500,
+	.min		= 100,
+	.max		= 5000,
+};
+
+SYSCTL_INT(_debug_rwlock, OID_AUTO, delay_initial, CTLFLAG_RW, &rw_delay.initial,
+    0, "");
+SYSCTL_INT(_debug_rwlock, OID_AUTO, delay_step, CTLFLAG_RW, &rw_delay.step,
+    0, "");
+SYSCTL_INT(_debug_rwlock, OID_AUTO, delay_min, CTLFLAG_RW, &rw_delay.min,
+    0, "");
+SYSCTL_INT(_debug_rwlock, OID_AUTO, delay_max, CTLFLAG_RW, &rw_delay.max,
+    0, "");
+
+static void
+rw_delay_sysinit(void *dummy)
+{
+
+	rw_delay.initial = mp_ncpus * 25;
+	rw_delay.step = (mp_ncpus * 25) / 2;
+	rw_delay.min = mp_ncpus * 5;
+	rw_delay.max = mp_ncpus * 25 * 10;
+}
+LOCK_DELAY_SYSINIT(rw_delay_sysinit);
+#endif
+
 /*
  * Return a pointer to the owning thread if the lock is write-locked or
  * NULL if the lock is unlocked or read-locked.
@@ -119,29 +157,29 @@
 #define	rw_owner(rw)		rw_wowner(rw)
 
 #ifndef INVARIANTS
-#define	_rw_assert(rw, what, file, line)
+#define	__rw_assert(c, what, file, line)
 #endif
 
 void
-assert_rw(struct lock_object *lock, int what)
+assert_rw(const struct lock_object *lock, int what)
 {
 
-	rw_assert((struct rwlock *)lock, what);
+	rw_assert((const struct rwlock *)lock, what);
 }
 
 void
-lock_rw(struct lock_object *lock, int how)
+lock_rw(struct lock_object *lock, uintptr_t how)
 {
 	struct rwlock *rw;
 
 	rw = (struct rwlock *)lock;
 	if (how)
+		rw_rlock(rw);
+	else
 		rw_wlock(rw);
-	else
-		rw_rlock(rw);
 }
 
-int
+uintptr_t
 unlock_rw(struct lock_object *lock)
 {
 	struct rwlock *rw;
@@ -150,18 +188,18 @@
 	rw_assert(rw, RA_LOCKED | LA_NOTRECURSED);
 	if (rw->rw_lock & RW_LOCK_READ) {
 		rw_runlock(rw);
-		return (0);
+		return (1);
 	} else {
 		rw_wunlock(rw);
-		return (1);
+		return (0);
 	}
 }
 
 #ifdef KDTRACE_HOOKS
 int
-owner_rw(struct lock_object *lock, struct thread **owner)
+owner_rw(const struct lock_object *lock, struct thread **owner)
 {
-	struct rwlock *rw = (struct rwlock *)lock;
+	const struct rwlock *rw = (const struct rwlock *)lock;
 	uintptr_t x = rw->rw_lock;
 
 	*owner = rw_wowner(rw);
@@ -171,12 +209,15 @@
 #endif
 
 void
-rw_init_flags(struct rwlock *rw, const char *name, int opts)
+_rw_init_flags(volatile uintptr_t *c, const char *name, int opts)
 {
+	struct rwlock *rw;
 	int flags;
 
+	rw = rwlock2rw(c);
+
 	MPASS((opts & ~(RW_DUPOK | RW_NOPROFILE | RW_NOWITNESS | RW_QUIET |
-	    RW_RECURSE)) == 0);
+	    RW_RECURSE | RW_NEW)) == 0);
 	ASSERT_ATOMIC_LOAD_PTR(rw->rw_lock,
 	    ("%s: rw_lock not aligned for %s: %p", __func__, name,
 	    &rw->rw_lock));
@@ -192,16 +233,21 @@
 		flags |= LO_RECURSABLE;
 	if (opts & RW_QUIET)
 		flags |= LO_QUIET;
+	if (opts & RW_NEW)
+		flags |= LO_NEW;
 
+	lock_init(&rw->lock_object, &lock_class_rw, name, NULL, flags);
 	rw->rw_lock = RW_UNLOCKED;
 	rw->rw_recurse = 0;
-	lock_init(&rw->lock_object, &lock_class_rw, name, NULL, flags);
 }
 
 void
-rw_destroy(struct rwlock *rw)
+_rw_destroy(volatile uintptr_t *c)
 {
+	struct rwlock *rw;
 
+	rw = rwlock2rw(c);
+
 	KASSERT(rw->rw_lock == RW_UNLOCKED, ("rw lock %p not unlocked", rw));
 	KASSERT(rw->rw_recurse == 0, ("rw lock %p still recursed", rw));
 	rw->rw_lock = RW_DESTROYED;
@@ -213,7 +259,7 @@
 {
 	struct rw_args *args = arg;
 
-	rw_init(args->ra_rw, args->ra_desc);
+	rw_init((struct rwlock *)args->ra_rw, args->ra_desc);
 }
 
 void
@@ -221,22 +267,30 @@
 {
 	struct rw_args_flags *args = arg;
 
-	rw_init_flags(args->ra_rw, args->ra_desc, args->ra_flags);
+	rw_init_flags((struct rwlock *)args->ra_rw, args->ra_desc,
+	    args->ra_flags);
 }
 
 int
-rw_wowned(struct rwlock *rw)
+_rw_wowned(const volatile uintptr_t *c)
 {
 
-	return (rw_wowner(rw) == curthread);
+	return (rw_wowner(rwlock2rw(c)) == curthread);
 }
 
 void
-_rw_wlock(struct rwlock *rw, const char *file, int line)
+_rw_wlock_cookie(volatile uintptr_t *c, const char *file, int line)
 {
+	struct rwlock *rw;
 
 	if (SCHEDULER_STOPPED())
 		return;
+
+	rw = rwlock2rw(c);
+
+	KASSERT(kdb_active != 0 || !TD_IS_IDLETHREAD(curthread),
+	    ("rw_wlock() by idle thread %p on rwlock %s @ %s:%d",
+	    curthread, rw->lock_object.lo_name, file, line));
 	KASSERT(rw->rw_lock != RW_DESTROYED,
 	    ("rw_wlock() of destroyed rwlock @ %s:%d", file, line));
 	WITNESS_CHECKORDER(&rw->lock_object, LOP_NEWORDER | LOP_EXCLUSIVE, file,
@@ -248,13 +302,19 @@
 }
 
 int
-_rw_try_wlock(struct rwlock *rw, const char *file, int line)
+__rw_try_wlock(volatile uintptr_t *c, const char *file, int line)
 {
+	struct rwlock *rw;
 	int rval;
 
 	if (SCHEDULER_STOPPED())
 		return (1);
 
+	rw = rwlock2rw(c);
+
+	KASSERT(kdb_active != 0 || !TD_IS_IDLETHREAD(curthread),
+	    ("rw_try_wlock() by idle thread %p on rwlock %s @ %s:%d",
+	    curthread, rw->lock_object.lo_name, file, line));
 	KASSERT(rw->rw_lock != RW_DESTROYED,
 	    ("rw_try_wlock() of destroyed rwlock @ %s:%d", file, line));
 
@@ -270,6 +330,9 @@
 	if (rval) {
 		WITNESS_LOCK(&rw->lock_object, LOP_EXCLUSIVE | LOP_TRYLOCK,
 		    file, line);
+		if (!rw_recursed(rw))
+			LOCKSTAT_PROFILE_OBTAIN_LOCK_SUCCESS(LS_RW_WLOCK_ACQUIRE,
+			    rw, 0, 0, file, line);
 		curthread->td_locks++;
 	}
 	return (rval);
@@ -276,15 +339,18 @@
 }
 
 void
-_rw_wunlock(struct rwlock *rw, const char *file, int line)
+_rw_wunlock_cookie(volatile uintptr_t *c, const char *file, int line)
 {
+	struct rwlock *rw;
 
 	if (SCHEDULER_STOPPED())
 		return;
+
+	rw = rwlock2rw(c);
+
 	KASSERT(rw->rw_lock != RW_DESTROYED,
 	    ("rw_wunlock() of destroyed rwlock @ %s:%d", file, line));
-	_rw_assert(rw, RA_WLOCKED, file, line);
-	curthread->td_locks--;
+	__rw_assert(c, RA_WLOCKED, file, line);
 	WITNESS_UNLOCK(&rw->lock_object, LOP_EXCLUSIVE, file, line);
 	LOCK_LOG_LOCK("WUNLOCK", &rw->lock_object, 0, rw->rw_recurse, file,
 	    line);
@@ -291,6 +357,7 @@
 	if (!rw_recursed(rw))
 		LOCKSTAT_PROFILE_RELEASE_LOCK(LS_RW_WUNLOCK_RELEASE, rw);
 	__rw_wunlock(rw, curthread, file, line);
+	curthread->td_locks--;
 }
 /*
  * Determines whether a new reader can acquire a lock.  Succeeds if the
@@ -305,8 +372,9 @@
     RW_LOCK_READ)
 
 void
-_rw_rlock(struct rwlock *rw, const char *file, int line)
+__rw_rlock(volatile uintptr_t *c, const char *file, int line)
 {
+	struct rwlock *rw;
 	struct turnstile *ts;
 #ifdef ADAPTIVE_RWLOCKS
 	volatile struct thread *owner;
@@ -318,26 +386,41 @@
 	int contested = 0;
 #endif
 	uintptr_t v;
+#if defined(ADAPTIVE_RWLOCKS) || defined(KDTRACE_HOOKS)
+	struct lock_delay_arg lda;
+#endif
 #ifdef KDTRACE_HOOKS
-	uint64_t spin_cnt = 0;
-	uint64_t sleep_cnt = 0;
+	uintptr_t state;
+	u_int sleep_cnt = 0;
 	int64_t sleep_time = 0;
+	int64_t all_time = 0;
 #endif
 
 	if (SCHEDULER_STOPPED())
 		return;
 
+#if defined(ADAPTIVE_RWLOCKS)
+	lock_delay_arg_init(&lda, &rw_delay);
+#elif defined(KDTRACE_HOOKS)
+	lock_delay_arg_init(&lda, NULL);
+#endif
+	rw = rwlock2rw(c);
+
+	KASSERT(kdb_active != 0 || !TD_IS_IDLETHREAD(curthread),
+	    ("rw_rlock() by idle thread %p on rwlock %s @ %s:%d",
+	    curthread, rw->lock_object.lo_name, file, line));
 	KASSERT(rw->rw_lock != RW_DESTROYED,
 	    ("rw_rlock() of destroyed rwlock @ %s:%d", file, line));
 	KASSERT(rw_wowner(rw) != curthread,
-	    ("%s (%s): wlock already held @ %s:%d", __func__,
+	    ("rw_rlock: wlock already held for %s @ %s:%d",
 	    rw->lock_object.lo_name, file, line));
 	WITNESS_CHECKORDER(&rw->lock_object, LOP_NEWORDER, file, line, NULL);
 
-	for (;;) {
 #ifdef KDTRACE_HOOKS
-		spin_cnt++;
+	all_time -= lockstat_nsecs(&rw->lock_object);
+	state = rw->rw_lock;
 #endif
+	for (;;) {
 		/*
 		 * Handle the easy case.  If no other thread has a write
 		 * lock, then try to bump up the count of read locks.  Note
@@ -366,6 +449,9 @@
 			}
 			continue;
 		}
+#ifdef KDTRACE_HOOKS
+		lda.spin_cnt++;
+#endif
 #ifdef HWPMC_HOOKS
 		PMC_SOFT_CALL( , , lock, failed);
 #endif
@@ -385,24 +471,33 @@
 					CTR3(KTR_LOCK,
 					    "%s: spinning on %p held by %p",
 					    __func__, rw, owner);
+				KTR_STATE1(KTR_SCHED, "thread",
+				    sched_tdname(curthread), "spinning",
+				    "lockname:\"%s\"", rw->lock_object.lo_name);
 				while ((struct thread*)RW_OWNER(rw->rw_lock) ==
-				    owner && TD_IS_RUNNING(owner)) {
-					cpu_spinwait();
-#ifdef KDTRACE_HOOKS
-					spin_cnt++;
-#endif
-				}
+				    owner && TD_IS_RUNNING(owner))
+					lock_delay(&lda);
+				KTR_STATE0(KTR_SCHED, "thread",
+				    sched_tdname(curthread), "running");
 				continue;
 			}
-		} else if (spintries < ROWNER_RETRIES) {
+		} else if (spintries < rowner_retries) {
 			spintries++;
-			for (i = 0; i < ROWNER_LOOPS; i++) {
+			KTR_STATE1(KTR_SCHED, "thread", sched_tdname(curthread),
+			    "spinning", "lockname:\"%s\"",
+			    rw->lock_object.lo_name);
+			for (i = 0; i < rowner_loops; i++) {
 				v = rw->rw_lock;
 				if ((v & RW_LOCK_READ) == 0 || RW_CAN_READ(v))
 					break;
 				cpu_spinwait();
 			}
-			if (i != ROWNER_LOOPS)
+#ifdef KDTRACE_HOOKS
+			lda.spin_cnt += rowner_loops - i;
+#endif
+			KTR_STATE0(KTR_SCHED, "thread", sched_tdname(curthread),
+			    "running");
+			if (i != rowner_loops)
 				continue;
 		}
 #endif
@@ -472,11 +567,11 @@
 			CTR2(KTR_LOCK, "%s: %p blocking on turnstile", __func__,
 			    rw);
 #ifdef KDTRACE_HOOKS
-		sleep_time -= lockstat_nsecs();
+		sleep_time -= lockstat_nsecs(&rw->lock_object);
 #endif
 		turnstile_wait(ts, rw_owner(rw), TS_SHARED_QUEUE);
 #ifdef KDTRACE_HOOKS
-		sleep_time += lockstat_nsecs();
+		sleep_time += lockstat_nsecs(&rw->lock_object);
 		sleep_cnt++;
 #endif
 		if (LOCK_LOG_TEST(&rw->lock_object, 0))
@@ -483,7 +578,19 @@
 			CTR2(KTR_LOCK, "%s: %p resuming from turnstile",
 			    __func__, rw);
 	}
+#ifdef KDTRACE_HOOKS
+	all_time += lockstat_nsecs(&rw->lock_object);
+	if (sleep_time)
+		LOCKSTAT_RECORD4(LS_RW_RLOCK_BLOCK, rw, sleep_time,
+		    LOCKSTAT_READER, (state & RW_LOCK_READ) == 0,
+		    (state & RW_LOCK_READ) == 0 ? 0 : RW_READERS(state));
 
+	/* Record only the loops spinning and not sleeping. */
+	if (lda.spin_cnt > sleep_cnt)
+		LOCKSTAT_RECORD4(LS_RW_RLOCK_SPIN, rw, all_time - sleep_time,
+		    LOCKSTAT_READER, (state & RW_LOCK_READ) == 0,
+		    (state & RW_LOCK_READ) == 0 ? 0 : RW_READERS(state));
+#endif
 	/*
 	 * TODO: acquire "owner of record" here.  Here be turnstile dragons
 	 * however.  turnstiles don't like owners changing between calls to
@@ -495,26 +602,23 @@
 	WITNESS_LOCK(&rw->lock_object, 0, file, line);
 	curthread->td_locks++;
 	curthread->td_rw_rlocks++;
-#ifdef KDTRACE_HOOKS
-	if (sleep_time)
-		LOCKSTAT_RECORD1(LS_RW_RLOCK_BLOCK, rw, sleep_time);
-
-	/*
-	 * Record only the loops spinning and not sleeping. 
-	 */
-	if (spin_cnt > sleep_cnt)
-		LOCKSTAT_RECORD1(LS_RW_RLOCK_SPIN, rw, (spin_cnt - sleep_cnt));
-#endif
 }
 
 int
-_rw_try_rlock(struct rwlock *rw, const char *file, int line)
+__rw_try_rlock(volatile uintptr_t *c, const char *file, int line)
 {
+	struct rwlock *rw;
 	uintptr_t x;
 
 	if (SCHEDULER_STOPPED())
 		return (1);
 
+	rw = rwlock2rw(c);
+
+	KASSERT(kdb_active != 0 || !TD_IS_IDLETHREAD(curthread),
+	    ("rw_try_rlock() by idle thread %p on rwlock %s @ %s:%d",
+	    curthread, rw->lock_object.lo_name, file, line));
+
 	for (;;) {
 		x = rw->rw_lock;
 		KASSERT(rw->rw_lock != RW_DESTROYED,
@@ -525,6 +629,8 @@
 			LOCK_LOG_TRY("RLOCK", &rw->lock_object, 0, 1, file,
 			    line);
 			WITNESS_LOCK(&rw->lock_object, LOP_TRYLOCK, file, line);
+			LOCKSTAT_PROFILE_OBTAIN_LOCK_SUCCESS(LS_RW_RLOCK_ACQUIRE,
+			    rw, 0, 0, file, line);
 			curthread->td_locks++;
 			curthread->td_rw_rlocks++;
 			return (1);
@@ -536,8 +642,9 @@
 }
 
 void
-_rw_runlock(struct rwlock *rw, const char *file, int line)
+_rw_runlock_cookie(volatile uintptr_t *c, const char *file, int line)
 {
+	struct rwlock *rw;
 	struct turnstile *ts;
 	uintptr_t x, v, queue;
 
@@ -544,11 +651,11 @@
 	if (SCHEDULER_STOPPED())
 		return;
 
+	rw = rwlock2rw(c);
+
 	KASSERT(rw->rw_lock != RW_DESTROYED,
 	    ("rw_runlock() of destroyed rwlock @ %s:%d", file, line));
-	_rw_assert(rw, RA_RLOCKED, file, line);
-	curthread->td_locks--;
-	curthread->td_rw_rlocks--;
+	__rw_assert(c, RA_RLOCKED, file, line);
 	WITNESS_UNLOCK(&rw->lock_object, 0, file, line);
 	LOCK_LOG_LOCK("RUNLOCK", &rw->lock_object, 0, 0, file, line);
 
@@ -642,6 +749,8 @@
 		break;
 	}
 	LOCKSTAT_PROFILE_RELEASE_LOCK(LS_RW_RUNLOCK_RELEASE, rw);
+	curthread->td_locks--;
+	curthread->td_rw_rlocks--;
 }
 
 /*
@@ -650,8 +759,10 @@
  * read or write lock.
  */
 void
-_rw_wlock_hard(struct rwlock *rw, uintptr_t tid, const char *file, int line)
+__rw_wlock_hard(volatile uintptr_t *c, uintptr_t tid, const char *file,
+    int line)
 {
+	struct rwlock *rw;
 	struct turnstile *ts;
 #ifdef ADAPTIVE_RWLOCKS
 	volatile struct thread *owner;
@@ -663,15 +774,26 @@
 	uint64_t waittime = 0;
 	int contested = 0;
 #endif
+#if defined(ADAPTIVE_RWLOCKS) || defined(KDTRACE_HOOKS)
+	struct lock_delay_arg lda;
+#endif
 #ifdef KDTRACE_HOOKS
-	uint64_t spin_cnt = 0;
-	uint64_t sleep_cnt = 0;
+	uintptr_t state;
+	u_int sleep_cnt = 0;
 	int64_t sleep_time = 0;
+	int64_t all_time = 0;
 #endif
 
 	if (SCHEDULER_STOPPED())
 		return;
 
+#if defined(ADAPTIVE_RWLOCKS)
+	lock_delay_arg_init(&lda, &rw_delay);
+#elif defined(KDTRACE_HOOKS)
+	lock_delay_arg_init(&lda, NULL);
+#endif
+	rw = rwlock2rw(c);
+
 	if (rw_wlocked(rw)) {
 		KASSERT(rw->lock_object.lo_flags & LO_RECURSABLE,
 		    ("%s: recursing but non-recursive rw %s @ %s:%d\n",
@@ -686,10 +808,16 @@
 		CTR5(KTR_LOCK, "%s: %s contested (lock=%p) at %s:%d", __func__,
 		    rw->lock_object.lo_name, (void *)rw->rw_lock, file, line);
 
-	while (!_rw_write_lock(rw, tid)) {
 #ifdef KDTRACE_HOOKS
-		spin_cnt++;
+	all_time -= lockstat_nsecs(&rw->lock_object);
+	state = rw->rw_lock;
 #endif
+	for (;;) {
+		if (rw->rw_lock == RW_UNLOCKED && _rw_write_lock(rw, tid))
+			break;
+#ifdef KDTRACE_HOOKS
+		lda.spin_cnt++;
+#endif
 #ifdef HWPMC_HOOKS
 		PMC_SOFT_CALL( , , lock, failed);
 #endif
@@ -707,17 +835,18 @@
 			if (LOCK_LOG_TEST(&rw->lock_object, 0))
 				CTR3(KTR_LOCK, "%s: spinning on %p held by %p",
 				    __func__, rw, owner);
+			KTR_STATE1(KTR_SCHED, "thread", sched_tdname(curthread),
+			    "spinning", "lockname:\"%s\"",
+			    rw->lock_object.lo_name);
 			while ((struct thread*)RW_OWNER(rw->rw_lock) == owner &&
-			    TD_IS_RUNNING(owner)) {
-				cpu_spinwait();
-#ifdef KDTRACE_HOOKS
-				spin_cnt++;
-#endif
-			}
+			    TD_IS_RUNNING(owner))
+				lock_delay(&lda);
+			KTR_STATE0(KTR_SCHED, "thread", sched_tdname(curthread),
+			    "running");
 			continue;
 		}
 		if ((v & RW_LOCK_READ) && RW_READERS(v) &&
-		    spintries < ROWNER_RETRIES) {
+		    spintries < rowner_retries) {
 			if (!(v & RW_LOCK_WRITE_SPINNER)) {
 				if (!atomic_cmpset_ptr(&rw->rw_lock, v,
 				    v | RW_LOCK_WRITE_SPINNER)) {
@@ -725,15 +854,20 @@
 				}
 			}
 			spintries++;
-			for (i = 0; i < ROWNER_LOOPS; i++) {
+			KTR_STATE1(KTR_SCHED, "thread", sched_tdname(curthread),
+			    "spinning", "lockname:\"%s\"",
+			    rw->lock_object.lo_name);
+			for (i = 0; i < rowner_loops; i++) {
 				if ((rw->rw_lock & RW_LOCK_WRITE_SPINNER) == 0)
 					break;
 				cpu_spinwait();
 			}
+			KTR_STATE0(KTR_SCHED, "thread", sched_tdname(curthread),
+			    "running");
 #ifdef KDTRACE_HOOKS
-			spin_cnt += ROWNER_LOOPS - i;
+			lda.spin_cnt += rowner_loops - i;
 #endif
-			if (i != ROWNER_LOOPS)
+			if (i != rowner_loops)
 				continue;
 		}
 #endif
@@ -799,11 +933,11 @@
 			CTR2(KTR_LOCK, "%s: %p blocking on turnstile", __func__,
 			    rw);
 #ifdef KDTRACE_HOOKS
-		sleep_time -= lockstat_nsecs();
+		sleep_time -= lockstat_nsecs(&rw->lock_object);
 #endif
 		turnstile_wait(ts, rw_owner(rw), TS_EXCLUSIVE_QUEUE);
 #ifdef KDTRACE_HOOKS
-		sleep_time += lockstat_nsecs();
+		sleep_time += lockstat_nsecs(&rw->lock_object);
 		sleep_cnt++;
 #endif
 		if (LOCK_LOG_TEST(&rw->lock_object, 0))
@@ -813,18 +947,21 @@
 		spintries = 0;
 #endif
 	}
-	LOCKSTAT_PROFILE_OBTAIN_LOCK_SUCCESS(LS_RW_WLOCK_ACQUIRE, rw, contested,
-	    waittime, file, line);
 #ifdef KDTRACE_HOOKS
+	all_time += lockstat_nsecs(&rw->lock_object);
 	if (sleep_time)
-		LOCKSTAT_RECORD1(LS_RW_WLOCK_BLOCK, rw, sleep_time);
+		LOCKSTAT_RECORD4(LS_RW_WLOCK_BLOCK, rw, sleep_time,
+		    LOCKSTAT_WRITER, (state & RW_LOCK_READ) == 0,
+		    (state & RW_LOCK_READ) == 0 ? 0 : RW_READERS(state));
 
-	/*
-	 * Record only the loops spinning and not sleeping.
-	 */ 
-	if (spin_cnt > sleep_cnt)
-		LOCKSTAT_RECORD1(LS_RW_WLOCK_SPIN, rw, (spin_cnt - sleep_cnt));
+	/* Record only the loops spinning and not sleeping. */
+	if (lda.spin_cnt > sleep_cnt)
+		LOCKSTAT_RECORD4(LS_RW_WLOCK_SPIN, rw, all_time - sleep_time,
+		    LOCKSTAT_WRITER, (state & RW_LOCK_READ) == 0,
+		    (state & RW_LOCK_READ) == 0 ? 0 : RW_READERS(state));
 #endif
+	LOCKSTAT_PROFILE_OBTAIN_LOCK_SUCCESS(LS_RW_WLOCK_ACQUIRE, rw, contested,
+	    waittime, file, line);
 }
 
 /*
@@ -833,8 +970,10 @@
  * least one thread is waiting on this lock.
  */
 void
-_rw_wunlock_hard(struct rwlock *rw, uintptr_t tid, const char *file, int line)
+__rw_wunlock_hard(volatile uintptr_t *c, uintptr_t tid, const char *file,
+    int line)
 {
+	struct rwlock *rw;
 	struct turnstile *ts;
 	uintptr_t v;
 	int queue;
@@ -842,6 +981,8 @@
 	if (SCHEDULER_STOPPED())
 		return;
 
+	rw = rwlock2rw(c);
+
 	if (rw_wlocked(rw) && rw_recursed(rw)) {
 		rw->rw_recurse--;
 		if (LOCK_LOG_TEST(&rw->lock_object, 0))
@@ -898,8 +1039,9 @@
  * lock.  Returns true if the upgrade succeeded and false otherwise.
  */
 int
-_rw_try_upgrade(struct rwlock *rw, const char *file, int line)
+__rw_try_upgrade(volatile uintptr_t *c, const char *file, int line)
 {
+	struct rwlock *rw;
 	uintptr_t v, x, tid;
 	struct turnstile *ts;
 	int success;
@@ -907,9 +1049,11 @@
 	if (SCHEDULER_STOPPED())
 		return (1);
 
+	rw = rwlock2rw(c);
+
 	KASSERT(rw->rw_lock != RW_DESTROYED,
 	    ("rw_try_upgrade() of destroyed rwlock @ %s:%d", file, line));
-	_rw_assert(rw, RA_RLOCKED, file, line);
+	__rw_assert(c, RA_RLOCKED, file, line);
 
 	/*
 	 * Attempt to switch from one reader to a writer.  If there
@@ -971,8 +1115,9 @@
  * Downgrade a write lock into a single read lock.
  */
 void
-_rw_downgrade(struct rwlock *rw, const char *file, int line)
+__rw_downgrade(volatile uintptr_t *c, const char *file, int line)
 {
+	struct rwlock *rw;
 	struct turnstile *ts;
 	uintptr_t tid, v;
 	int rwait, wwait;
@@ -980,9 +1125,11 @@
 	if (SCHEDULER_STOPPED())
 		return;
 
+	rw = rwlock2rw(c);
+
 	KASSERT(rw->rw_lock != RW_DESTROYED,
 	    ("rw_downgrade() of destroyed rwlock @ %s:%d", file, line));
-	_rw_assert(rw, RA_WLOCKED | RA_NOTRECURSED, file, line);
+	__rw_assert(c, RA_WLOCKED | RA_NOTRECURSED, file, line);
 #ifndef INVARIANTS
 	if (rw_recursed(rw))
 		panic("downgrade of a recursed lock");
@@ -1036,7 +1183,7 @@
 
 #ifdef INVARIANT_SUPPORT
 #ifndef INVARIANTS
-#undef _rw_assert
+#undef __rw_assert
 #endif
 
 /*
@@ -1045,16 +1192,22 @@
  * thread owns an rlock.
  */
 void
-_rw_assert(struct rwlock *rw, int what, const char *file, int line)
+__rw_assert(const volatile uintptr_t *c, int what, const char *file, int line)
 {
+	const struct rwlock *rw;
 
 	if (panicstr != NULL)
 		return;
+
+	rw = rwlock2rw(c);
+
 	switch (what) {
 	case RA_LOCKED:
 	case RA_LOCKED | RA_RECURSED:
 	case RA_LOCKED | RA_NOTRECURSED:
 	case RA_RLOCKED:
+	case RA_RLOCKED | RA_RECURSED:
+	case RA_RLOCKED | RA_NOTRECURSED:
 #ifdef WITNESS
 		witness_assert(&rw->lock_object, what, file, line);
 #else
@@ -1064,13 +1217,13 @@
 		 * has a lock at all, fail.
 		 */
 		if (rw->rw_lock == RW_UNLOCKED ||
-		    (!(rw->rw_lock & RW_LOCK_READ) && (what == RA_RLOCKED ||
+		    (!(rw->rw_lock & RW_LOCK_READ) && (what & RA_RLOCKED ||
 		    rw_wowner(rw) != curthread)))
 			panic("Lock %s not %slocked @ %s:%d\n",
-			    rw->lock_object.lo_name, (what == RA_RLOCKED) ?
+			    rw->lock_object.lo_name, (what & RA_RLOCKED) ?
 			    "read " : "", file, line);
 
-		if (!(rw->rw_lock & RW_LOCK_READ)) {
+		if (!(rw->rw_lock & RW_LOCK_READ) && !(what & RA_RLOCKED)) {
 			if (rw_recursed(rw)) {
 				if (what & RA_NOTRECURSED)
 					panic("Lock %s recursed @ %s:%d\n",
@@ -1118,12 +1271,12 @@
 
 #ifdef DDB
 void
-db_show_rwlock(struct lock_object *lock)
+db_show_rwlock(const struct lock_object *lock)
 {
-	struct rwlock *rw;
+	const struct rwlock *rw;
 	struct thread *td;
 
-	rw = (struct rwlock *)lock;
+	rw = (const struct rwlock *)lock;
 
 	db_printf(" state: ");
 	if (rw->rw_lock == RW_UNLOCKED)

Modified: trunk/sys/kern/kern_sdt.c
===================================================================
--- trunk/sys/kern/kern_sdt.c	2018-05-25 20:55:47 UTC (rev 9947)
+++ trunk/sys/kern/kern_sdt.c	2018-05-25 20:58:03 UTC (rev 9948)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * Copyright 2006-2008 John Birrell <jb at FreeBSD.org>
  *
@@ -22,318 +23,34 @@
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
- * $MidnightBSD$
- *
- * Backend for the Statically Defined Tracing (SDT) kernel support. This is
- * required to allow a module to load even though DTrace kernel support may
- * not be present. A module may be built with SDT probes in it which are
- * registered and deregistered via SYSINIT/SYSUNINIT.
- *
+ * $FreeBSD: stable/10/sys/kern/kern_sdt.c 263283 2014-03-18 00:55:19Z markj $
  */
 
 #include "opt_kdtrace.h"
 
-#include <sys/cdefs.h>
 #include <sys/param.h>
 #include <sys/systm.h>
-#include <sys/kernel.h>
-#include <sys/linker.h>
-#include <sys/lock.h>
-#include <sys/proc.h>
-#include <sys/sx.h>
+#include <sys/kdb.h>
 #include <sys/sdt.h>
 
-/*
- * This is the list of statically defined tracing providers.
- */
-static TAILQ_HEAD(sdt_provider_list_head, sdt_provider) sdt_provider_list;
+SDT_PROVIDER_DEFINE(sdt);
 
 /*
- * Mutex to serialise access to the SDT provider list.
+ * Hook for the DTrace probe function. The SDT provider will set this to
+ * dtrace_probe() when it loads.
  */
-static struct sx sdt_sx;
-
-/*
- * Hook for the DTrace probe function. The 'sdt' provider will set this
- * to dtrace_probe when it loads.
- */
 sdt_probe_func_t sdt_probe_func = sdt_probe_stub;
 
-static sdt_provider_listall_func_t sdt_provider_register_func = NULL;
-static sdt_provider_listall_func_t sdt_provider_deregister_func = NULL;
-static sdt_probe_listall_func_t sdt_probe_register_func = NULL;
-
-static void *sdt_provider_register_arg;
-static void *sdt_provider_deregister_arg;
-static void *sdt_probe_register_arg;
-
-static int sdt_provider_listall_locked(sdt_provider_listall_func_t, void *);
-
 /*
  * This is a stub for probe calls in case kernel DTrace support isn't
- * compiled in. It should never get called because there is no DTrace
- * support to enable it.
+ * enabled. It should never get called because there is no DTrace support
+ * to enable it.
  */
 void
 sdt_probe_stub(uint32_t id, uintptr_t arg0, uintptr_t arg1,
     uintptr_t arg2, uintptr_t arg3, uintptr_t arg4)
 {
-	printf("sdt_probe_stub: Why did this get called?\n");
-}
 
-/*
- * Called from SYSINIT to register a provider.
- */
-void
-sdt_provider_register(void *arg)
-{
-	struct sdt_provider *prov = arg;
-
-	sx_xlock(&sdt_sx);
-
-	TAILQ_INSERT_TAIL(&sdt_provider_list, prov, prov_entry);
-
-	TAILQ_INIT(&prov->probe_list);
-
-	if (sdt_provider_register_func != NULL)
-		sdt_provider_register_func(prov, sdt_provider_register_arg);
-
-	sx_xunlock(&sdt_sx);
+	printf("sdt_probe_stub: unexpectedly called\n");
+	kdb_backtrace();
 }
-
-/*
- * Called from SYSUNINIT to de-register a provider.
- */
-void
-sdt_provider_deregister(void *arg)
-{
-	struct sdt_provider *prov = arg;
-
-	sx_xlock(&sdt_sx);
-
-	TAILQ_REMOVE(&sdt_provider_list, prov, prov_entry);
-
-	if (sdt_provider_deregister_func != NULL)
-		sdt_provider_deregister_func(prov, sdt_provider_deregister_arg);
-
-	sx_xunlock(&sdt_sx);
-}
-
-/*
- * Called from SYSINIT to register a statically defined trace probe.
- */
-void
-sdt_probe_register(void *arg)
-{
-	struct sdt_probe *probe = arg;
-
-	/*
-	 * Check the reference structure version. Only version 1 is
-	 * supported at the moment.
-	 */
-	if (probe->version != sizeof(struct sdt_probe)) {
-		printf("%s:%s:%s has version %d when %d required\n", probe->mod, probe->func, probe->name, probe->version, (int) sizeof(struct sdt_probe));
-		return;
-	}
-
-	sx_xlock(&sdt_sx);
-
-	TAILQ_INSERT_TAIL(&probe->prov->probe_list, probe, probe_entry);
-
-	TAILQ_INIT(&probe->argtype_list);
-
-	probe->state = SDT_INIT;
-
-	if (sdt_probe_register_func != NULL)
-		sdt_probe_register_func(probe, sdt_provider_register_arg);
-
-	sx_xunlock(&sdt_sx);
-}
-
-/*
- * Called from SYSUNINIT to de-register a statically defined trace probe.
- */
-void
-sdt_probe_deregister(void *arg)
-{
-	struct sdt_probe *probe = arg;
-
-	sx_xlock(&sdt_sx);
-
-	if (probe->state == SDT_INIT) {
-		TAILQ_REMOVE(&probe->prov->probe_list, probe, probe_entry);
-		probe->state = SDT_UNINIT;
-	}
-
-	sx_xunlock(&sdt_sx);
-}
-
-/*
- * Called from SYSINIT to register a statically defined trace probe argument.
- */
-void
-sdt_argtype_register(void *arg)
-{
-	struct sdt_argtype *argtype = arg;
-
-	sx_xlock(&sdt_sx);
-
-	TAILQ_INSERT_TAIL(&argtype->probe->argtype_list, argtype, argtype_entry);
-
-	argtype->probe->n_args++;
-
-	sx_xunlock(&sdt_sx);
-}
-
-/*
- * Called from SYSUNINIT to de-register a statically defined trace probe argument.
- */
-void
-sdt_argtype_deregister(void *arg)
-{
-	struct sdt_argtype *argtype = arg;
-
-	sx_xlock(&sdt_sx);
-
-	TAILQ_REMOVE(&argtype->probe->argtype_list, argtype, argtype_entry);
-
-	sx_xunlock(&sdt_sx);
-}
-
-static void
-sdt_init(void *arg)
-{ 
-	sx_init_flags(&sdt_sx, "Statically Defined Tracing", SX_NOWITNESS);
-
-	TAILQ_INIT(&sdt_provider_list);
-}
-
-SYSINIT(sdt, SI_SUB_KDTRACE, SI_ORDER_FIRST, sdt_init, NULL);
-
-static void
-sdt_uninit(void *arg)
-{ 
-	sx_destroy(&sdt_sx);
-}
-
-SYSUNINIT(sdt, SI_SUB_KDTRACE, SI_ORDER_FIRST, sdt_uninit, NULL);
-
-/*
- * List statically defined tracing providers.
- */
-int
-sdt_provider_listall(sdt_provider_listall_func_t callback_func, void *arg)
-{
-	int error;
-
-	sx_xlock(&sdt_sx);
-	error = sdt_provider_listall_locked(callback_func, arg);
-	sx_xunlock(&sdt_sx);
-
-	return (error);
-}
-
-static int
-sdt_provider_listall_locked(sdt_provider_listall_func_t callback_func,
-    void *arg)
-{
-	int error = 0;
-	struct sdt_provider *prov;
-
-	sx_assert(&sdt_sx, SX_XLOCKED);
-
-	TAILQ_FOREACH(prov, &sdt_provider_list, prov_entry) {
-		if ((error = callback_func(prov, arg)) != 0)
-			break;
-	}
-
-	return (error);
-}
-
-/*
- * List statically defined tracing probes.
- */
-int
-sdt_probe_listall(struct sdt_provider *prov, 
-    sdt_probe_listall_func_t callback_func,void *arg)
-{
-	int error = 0;
-	int locked;
-	struct sdt_probe *probe;
-
-	locked = sx_xlocked(&sdt_sx);
-	if (!locked)
-		sx_xlock(&sdt_sx);
-
-	TAILQ_FOREACH(probe, &prov->probe_list, probe_entry) {
-		if ((error = callback_func(probe, arg)) != 0)
-			break;
-	}
-
-	if (!locked)
-		sx_xunlock(&sdt_sx);
-
-	return (error);
-}
-
-/*
- * List statically defined tracing probe arguments.
- */
-int
-sdt_argtype_listall(struct sdt_probe *probe, 
-    sdt_argtype_listall_func_t callback_func,void *arg)
-{
-	int error = 0;
-	int locked;
-	struct sdt_argtype *argtype;
-
-	locked = sx_xlocked(&sdt_sx);
-	if (!locked)
-		sx_xlock(&sdt_sx);
-
-	TAILQ_FOREACH(argtype, &probe->argtype_list, argtype_entry) {
-		if ((error = callback_func(argtype, arg)) != 0)
-			break;
-	}
-
-	if (!locked)
-		sx_xunlock(&sdt_sx);
-
-	return (error);
-}
-
-void sdt_register_callbacks(sdt_provider_listall_func_t register_prov, 
-    void *reg_prov_arg, sdt_provider_listall_func_t deregister_prov, 
-    void *dereg_prov_arg, sdt_probe_listall_func_t register_probe, 
-    void * reg_probe_arg)
-{
-
-	sx_xlock(&sdt_sx);
-	sdt_provider_register_func = register_prov;
-	sdt_provider_deregister_func = deregister_prov;
-	sdt_probe_register_func = register_probe;
-
-	sdt_provider_register_arg = reg_prov_arg;
-	sdt_provider_deregister_arg = dereg_prov_arg;
-	sdt_probe_register_arg = reg_probe_arg;
-
-	sdt_provider_listall_locked(register_prov, reg_prov_arg);
-	sx_xunlock(&sdt_sx);
-}
-
-void sdt_deregister_callbacks(void)
-{
-
-	sx_xlock(&sdt_sx);
-	sdt_provider_listall_locked(sdt_provider_deregister_func, 
-	    sdt_provider_deregister_arg);
-
-	sdt_provider_register_func = NULL;
-	sdt_provider_deregister_func = NULL;
-	sdt_probe_register_func = NULL;
-
-	sdt_provider_register_arg = NULL;
-	sdt_provider_deregister_arg = NULL;
-	sdt_probe_register_arg = NULL;
-	sx_xunlock(&sdt_sx);
-}

Modified: trunk/sys/kern/kern_sema.c
===================================================================
--- trunk/sys/kern/kern_sema.c	2018-05-25 20:55:47 UTC (rev 9947)
+++ trunk/sys/kern/kern_sema.c	2018-05-25 20:58:03 UTC (rev 9948)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * Copyright (C) 2001 Jason Evans <jasone at freebsd.org>.  All rights reserved.
  *
@@ -34,7 +35,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$MidnightBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/kern/kern_sema.c 139804 2005-01-06 23:35:40Z imp $");
 
 #include <sys/param.h>
 #include <sys/systm.h>

Modified: trunk/sys/kern/kern_sharedpage.c
===================================================================
--- trunk/sys/kern/kern_sharedpage.c	2018-05-25 20:55:47 UTC (rev 9947)
+++ trunk/sys/kern/kern_sharedpage.c	2018-05-25 20:58:03 UTC (rev 9948)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * Copyright (c) 2010, 2012 Konstantin Belousov <kib at FreeBSD.org>
  * All rights reserved.
@@ -25,7 +26,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/kern/kern_sharedpage.c 254649 2013-08-22 07:39:53Z kib $");
 
 #include "opt_compat.h"
 #include "opt_vm.h"
@@ -34,7 +35,7 @@
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
-#include <sys/mutex.h>
+#include <sys/rwlock.h>
 #include <sys/sysent.h>
 #include <sys/sysctl.h>
 #include <sys/vdso.h>
@@ -107,12 +108,11 @@
 	sx_init(&shared_page_alloc_sx, "shpsx");
 	shared_page_obj = vm_pager_allocate(OBJT_PHYS, 0, PAGE_SIZE,
 	    VM_PROT_DEFAULT, 0, NULL);
-	VM_OBJECT_LOCK(shared_page_obj);
-	m = vm_page_grab(shared_page_obj, 0, VM_ALLOC_RETRY | VM_ALLOC_NOBUSY |
-	    VM_ALLOC_ZERO);
+	VM_OBJECT_WLOCK(shared_page_obj);
+	m = vm_page_grab(shared_page_obj, 0, VM_ALLOC_NOBUSY | VM_ALLOC_ZERO);
 	m->valid = VM_PAGE_BITS_ALL;
-	VM_OBJECT_UNLOCK(shared_page_obj);
-	addr = kmem_alloc_nofault(kernel_map, PAGE_SIZE);
+	VM_OBJECT_WUNLOCK(shared_page_obj);
+	addr = kva_alloc(PAGE_SIZE);
 	pmap_qenter(addr, &m, 1);
 	shared_page_mapping = (char *)addr;
 }



More information about the Midnightbsd-cvs mailing list