[Midnightbsd-cvs] src [9948] trunk/sys/kern: sync with freebsd
laffer1 at midnightbsd.org
laffer1 at midnightbsd.org
Fri May 25 16:58:04 EDT 2018
Revision: 9948
http://svnweb.midnightbsd.org/src/?rev=9948
Author: laffer1
Date: 2018-05-25 16:58:03 -0400 (Fri, 25 May 2018)
Log Message:
-----------
sync with freebsd
Modified Paths:
--------------
trunk/sys/kern/imgact_elf64.c
trunk/sys/kern/kern_context.c
trunk/sys/kern/kern_ctf.c
trunk/sys/kern/kern_prot.c
trunk/sys/kern/kern_racct.c
trunk/sys/kern/kern_rctl.c
trunk/sys/kern/kern_rmlock.c
trunk/sys/kern/kern_rwlock.c
trunk/sys/kern/kern_sdt.c
trunk/sys/kern/kern_sema.c
trunk/sys/kern/kern_sharedpage.c
Modified: trunk/sys/kern/imgact_elf64.c
===================================================================
--- trunk/sys/kern/imgact_elf64.c 2018-05-25 20:55:47 UTC (rev 9947)
+++ trunk/sys/kern/imgact_elf64.c 2018-05-25 20:58:03 UTC (rev 9948)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
/*-
* Copyright (c) 2002 Doug Rabson
* All rights reserved.
@@ -25,7 +26,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$MidnightBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/kern/imgact_elf64.c 116182 2003-06-11 00:56:59Z obrien $");
#define __ELF_WORD_SIZE 64
#include <kern/imgact_elf.c>
Modified: trunk/sys/kern/kern_context.c
===================================================================
--- trunk/sys/kern/kern_context.c 2018-05-25 20:55:47 UTC (rev 9947)
+++ trunk/sys/kern/kern_context.c 2018-05-25 20:58:03 UTC (rev 9948)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
/*-
* Copyright (c) 2002 Daniel M. Eischen <deischen at freebsd.org>
* All rights reserved.
@@ -25,7 +26,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$MidnightBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/kern/kern_context.c 225617 2011-09-16 13:58:51Z kmacy $");
#include <sys/param.h>
#include <sys/kernel.h>
Modified: trunk/sys/kern/kern_ctf.c
===================================================================
--- trunk/sys/kern/kern_ctf.c 2018-05-25 20:55:47 UTC (rev 9947)
+++ trunk/sys/kern/kern_ctf.c 2018-05-25 20:58:03 UTC (rev 9948)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
/*-
* Copyright (c) 2008 John Birrell <jb at freebsd.org>
* All rights reserved.
@@ -23,7 +24,7 @@
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
- * $MidnightBSD$
+ * $FreeBSD: stable/10/sys/kern/kern_ctf.c 279196 2015-02-23 01:24:10Z markj $
*/
/*
@@ -68,8 +69,6 @@
int flags;
int i;
int nbytes;
- ssize_t resid;
- int vfslocked;
size_t sz;
struct nameidata nd;
struct thread *td = curthread;
@@ -114,23 +113,19 @@
*/
ef->ctfcnt = -1;
- NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE, UIO_SYSSPACE, lf->pathname, td);
+ NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, lf->pathname, td);
flags = FREAD;
error = vn_open(&nd, &flags, 0, NULL);
if (error)
return (error);
- vfslocked = NDHASGIANT(&nd);
NDFREE(&nd, NDF_ONLY_PNBUF);
/* Allocate memory for the FLF header. */
- if ((hdr = malloc(sizeof(*hdr), M_LINKER, M_WAITOK)) == NULL) {
- error = ENOMEM;
- goto out;
- }
+ hdr = malloc(sizeof(*hdr), M_LINKER, M_WAITOK);
/* Read the ELF header. */
if ((error = vn_rdwr(UIO_READ, nd.ni_vp, hdr, sizeof(*hdr),
- 0, UIO_SYSSPACE, IO_NODELOCKED, td->td_ucred, NOCRED, &resid,
+ 0, UIO_SYSSPACE, IO_NODELOCKED, td->td_ucred, NOCRED, NULL,
td)) != 0)
goto out;
@@ -148,15 +143,12 @@
}
/* Allocate memory for all the section headers */
- if ((shdr = malloc(nbytes, M_LINKER, M_WAITOK)) == NULL) {
- error = ENOMEM;
- goto out;
- }
+ shdr = malloc(nbytes, M_LINKER, M_WAITOK);
/* Read all the section headers */
if ((error = vn_rdwr(UIO_READ, nd.ni_vp, (caddr_t)shdr, nbytes,
hdr->e_shoff, UIO_SYSSPACE, IO_NODELOCKED, td->td_ucred, NOCRED,
- &resid, td)) != 0)
+ NULL, td)) != 0)
goto out;
/*
@@ -173,17 +165,12 @@
}
/* Allocate memory to buffer the section header strings. */
- if ((shstrtab = malloc(shdr[hdr->e_shstrndx].sh_size, M_LINKER,
- M_WAITOK)) == NULL) {
- error = ENOMEM;
- goto out;
- }
+ shstrtab = malloc(shdr[hdr->e_shstrndx].sh_size, M_LINKER, M_WAITOK);
/* Read the section header strings. */
if ((error = vn_rdwr(UIO_READ, nd.ni_vp, shstrtab,
shdr[hdr->e_shstrndx].sh_size, shdr[hdr->e_shstrndx].sh_offset,
- UIO_SYSSPACE, IO_NODELOCKED, td->td_ucred, NOCRED, &resid,
- td)) != 0)
+ UIO_SYSSPACE, IO_NODELOCKED, td->td_ucred, NOCRED, NULL, td)) != 0)
goto out;
/* Search for the section containing the CTF data. */
@@ -202,7 +189,7 @@
/* Read the CTF header. */
if ((error = vn_rdwr(UIO_READ, nd.ni_vp, ctf_hdr, sizeof(ctf_hdr),
shdr[i].sh_offset, UIO_SYSSPACE, IO_NODELOCKED, td->td_ucred,
- NOCRED, &resid, td)) != 0)
+ NOCRED, NULL, td)) != 0)
goto out;
/* Check the CTF magic number. (XXX check for big endian!) */
@@ -240,10 +227,7 @@
* Allocate memory for the compressed CTF data, including
* the header (which isn't compressed).
*/
- if ((raw = malloc(shdr[i].sh_size, M_LINKER, M_WAITOK)) == NULL) {
- error = ENOMEM;
- goto out;
- }
+ raw = malloc(shdr[i].sh_size, M_LINKER, M_WAITOK);
} else {
/*
* The CTF data is not compressed, so the ELF section
@@ -256,10 +240,7 @@
* Allocate memory to buffer the CTF data in it's decompressed
* form.
*/
- if ((ctftab = malloc(sz, M_LINKER, M_WAITOK)) == NULL) {
- error = ENOMEM;
- goto out;
- }
+ ctftab = malloc(sz, M_LINKER, M_WAITOK);
/*
* Read the CTF data into the raw buffer if compressed, or
@@ -267,7 +248,7 @@
*/
if ((error = vn_rdwr(UIO_READ, nd.ni_vp, raw == NULL ? ctftab : raw,
shdr[i].sh_size, shdr[i].sh_offset, UIO_SYSSPACE, IO_NODELOCKED,
- td->td_ucred, NOCRED, &resid, td)) != 0)
+ td->td_ucred, NOCRED, NULL, td)) != 0)
goto out;
/* Check if decompression is required. */
@@ -295,7 +276,9 @@
zs.next_in = ((uint8_t *) raw) + sizeof(ctf_hdr);
zs.avail_out = sz - sizeof(ctf_hdr);
zs.next_out = ((uint8_t *) ctftab) + sizeof(ctf_hdr);
- if ((ret = inflate(&zs, Z_FINISH)) != Z_STREAM_END) {
+ ret = inflate(&zs, Z_FINISH);
+ inflateEnd(&zs);
+ if (ret != Z_STREAM_END) {
printf("%s(%d): zlib inflate returned %d\n", __func__, __LINE__, ret);
error = EIO;
goto out;
@@ -323,7 +306,6 @@
out:
VOP_UNLOCK(nd.ni_vp, 0);
vn_close(nd.ni_vp, FREAD, td->td_ucred, td);
- VFS_UNLOCK_GIANT(vfslocked);
if (hdr != NULL)
free(hdr, M_LINKER);
Modified: trunk/sys/kern/kern_prot.c
===================================================================
--- trunk/sys/kern/kern_prot.c 2018-05-25 20:55:47 UTC (rev 9947)
+++ trunk/sys/kern/kern_prot.c 2018-05-25 20:58:03 UTC (rev 9948)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
/*-
* Copyright (c) 1982, 1986, 1989, 1990, 1991, 1993
* The Regents of the University of California.
@@ -42,7 +43,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$MidnightBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/kern/kern_prot.c 303846 2016-08-08 18:31:28Z bdrewery $");
#include "opt_compat.h"
#include "opt_inet.h"
@@ -73,7 +74,7 @@
#ifdef REGRESSION
FEATURE(regression,
- "Kernel support for interfaces nessesary for regression testing (SECURITY RISK!)");
+ "Kernel support for interfaces necessary for regression testing (SECURITY RISK!)");
#endif
#if defined(INET) || defined(INET6)
@@ -147,7 +148,7 @@
return (0);
}
-/* Get an arbitary pid's process group id */
+/* Get an arbitrary pid's process group id */
#ifndef _SYS_SYSPROTO_H_
struct getpgid_args {
pid_t pid;
@@ -178,7 +179,7 @@
}
/*
- * Get an arbitary pid's session id.
+ * Get an arbitrary pid's session id.
*/
#ifndef _SYS_SYSPROTO_H_
struct getsid_args {
@@ -582,7 +583,7 @@
change_euid(newcred, uip);
setsugid(p);
}
- p->p_ucred = newcred;
+ proc_set_cred(p, newcred);
PROC_UNLOCK(p);
#ifdef RACCT
racct_proc_ucred_changed(p, oldcred, newcred);
@@ -641,7 +642,7 @@
change_euid(newcred, euip);
setsugid(p);
}
- p->p_ucred = newcred;
+ proc_set_cred(p, newcred);
PROC_UNLOCK(p);
uifree(euip);
crfree(oldcred);
@@ -741,7 +742,7 @@
change_egid(newcred, gid);
setsugid(p);
}
- p->p_ucred = newcred;
+ proc_set_cred(p, newcred);
PROC_UNLOCK(p);
crfree(oldcred);
return (0);
@@ -787,7 +788,7 @@
change_egid(newcred, egid);
setsugid(p);
}
- p->p_ucred = newcred;
+ proc_set_cred(p, newcred);
PROC_UNLOCK(p);
crfree(oldcred);
return (0);
@@ -860,7 +861,7 @@
crsetgroups_locked(newcred, ngrp, groups);
}
setsugid(p);
- p->p_ucred = newcred;
+ proc_set_cred(p, newcred);
PROC_UNLOCK(p);
crfree(oldcred);
return (0);
@@ -923,7 +924,7 @@
change_svuid(newcred, newcred->cr_uid);
setsugid(p);
}
- p->p_ucred = newcred;
+ proc_set_cred(p, newcred);
PROC_UNLOCK(p);
#ifdef RACCT
racct_proc_ucred_changed(p, oldcred, newcred);
@@ -990,7 +991,7 @@
change_svgid(newcred, newcred->cr_groups[0]);
setsugid(p);
}
- p->p_ucred = newcred;
+ proc_set_cred(p, newcred);
PROC_UNLOCK(p);
crfree(oldcred);
return (0);
@@ -1064,7 +1065,7 @@
change_svuid(newcred, suid);
setsugid(p);
}
- p->p_ucred = newcred;
+ proc_set_cred(p, newcred);
PROC_UNLOCK(p);
#ifdef RACCT
racct_proc_ucred_changed(p, oldcred, newcred);
@@ -1143,7 +1144,7 @@
change_svgid(newcred, sgid);
setsugid(p);
}
- p->p_ucred = newcred;
+ proc_set_cred(p, newcred);
PROC_UNLOCK(p);
crfree(oldcred);
return (0);
@@ -1710,6 +1711,13 @@
if ((p->p_flag & P_INEXEC) != 0)
return (EBUSY);
+ /* Denied explicitely */
+ if ((p->p_flag2 & P2_NOTRACE) != 0) {
+ error = priv_check(td, PRIV_DEBUG_DENIED);
+ if (error != 0)
+ return (error);
+ }
+
return (0);
}
@@ -1949,7 +1957,44 @@
crfree(cred);
}
+/*
+ * Set initial process credentials.
+ * Callers are responsible for providing the reference for provided credentials.
+ */
+void
+proc_set_cred_init(struct proc *p, struct ucred *newcred)
+{
+
+ p->p_ucred = newcred;
+}
+
+/*
+ * Change process credentials.
+ * Callers are responsible for providing the reference for passed credentials
+ * and for freeing old ones.
+ *
+ * Process has to be locked except when it does not have credentials (as it
+ * should not be visible just yet) or when newcred is NULL (as this can be
+ * only used when the process is about to be freed, at which point it should
+ * not be visible anymore).
+ */
struct ucred *
+proc_set_cred(struct proc *p, struct ucred *newcred)
+{
+ struct ucred *oldcred;
+
+ MPASS(p->p_ucred != NULL);
+ if (newcred == NULL)
+ MPASS(p->p_state == PRS_ZOMBIE);
+ else
+ PROC_LOCK_ASSERT(p, MA_OWNED);
+
+ oldcred = p->p_ucred;
+ p->p_ucred = newcred;
+ return (oldcred);
+}
+
+struct ucred *
crcopysafe(struct proc *p, struct ucred *cr)
{
struct ucred *oldcred;
Modified: trunk/sys/kern/kern_racct.c
===================================================================
--- trunk/sys/kern/kern_racct.c 2018-05-25 20:55:47 UTC (rev 9947)
+++ trunk/sys/kern/kern_racct.c 2018-05-25 20:58:03 UTC (rev 9948)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
/*-
* Copyright (c) 2010 The FreeBSD Foundation
* All rights reserved.
@@ -26,11 +27,11 @@
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
- * $MidnightBSD$
+ * $FreeBSD: stable/10/sys/kern/kern_racct.c 314348 2017-02-27 17:18:07Z avg $
*/
#include <sys/cdefs.h>
-__FBSDID("$MidnightBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/kern/kern_racct.c 314348 2017-02-27 17:18:07Z avg $");
#include "opt_kdtrace.h"
#include "opt_sched.h"
@@ -71,8 +72,16 @@
* Do not block processes that have their %cpu usage <= pcpu_threshold.
*/
static int pcpu_threshold = 1;
+#ifdef RACCT_DEFAULT_TO_DISABLED
+int racct_enable = 0;
+#else
+int racct_enable = 1;
+#endif
SYSCTL_NODE(_kern, OID_AUTO, racct, CTLFLAG_RW, 0, "Resource Accounting");
+TUNABLE_INT("kern.racct.enable", &racct_enable);
+SYSCTL_UINT(_kern_racct, OID_AUTO, enable, CTLFLAG_RDTUN, &racct_enable,
+ 0, "Enable RACCT/RCTL");
SYSCTL_UINT(_kern_racct, OID_AUTO, pcpu_threshold, CTLFLAG_RW, &pcpu_threshold,
0, "Processes with higher %cpu usage than this value can be throttled.");
@@ -96,30 +105,32 @@
uint64_t amount);
SDT_PROVIDER_DEFINE(racct);
-SDT_PROBE_DEFINE3(racct, kernel, rusage, add, add, "struct proc *", "int",
- "uint64_t");
-SDT_PROBE_DEFINE3(racct, kernel, rusage, add_failure, add-failure,
+SDT_PROBE_DEFINE3(racct, , rusage, add,
"struct proc *", "int", "uint64_t");
-SDT_PROBE_DEFINE3(racct, kernel, rusage, add_cred, add-cred, "struct ucred *",
- "int", "uint64_t");
-SDT_PROBE_DEFINE3(racct, kernel, rusage, add_force, add-force, "struct proc *",
- "int", "uint64_t");
-SDT_PROBE_DEFINE3(racct, kernel, rusage, set, set, "struct proc *", "int",
- "uint64_t");
-SDT_PROBE_DEFINE3(racct, kernel, rusage, set_failure, set-failure,
+SDT_PROBE_DEFINE3(racct, , rusage, add__failure,
"struct proc *", "int", "uint64_t");
-SDT_PROBE_DEFINE3(racct, kernel, rusage, sub, sub, "struct proc *", "int",
- "uint64_t");
-SDT_PROBE_DEFINE3(racct, kernel, rusage, sub_cred, sub-cred, "struct ucred *",
- "int", "uint64_t");
-SDT_PROBE_DEFINE1(racct, kernel, racct, create, create, "struct racct *");
-SDT_PROBE_DEFINE1(racct, kernel, racct, destroy, destroy, "struct racct *");
-SDT_PROBE_DEFINE2(racct, kernel, racct, join, join, "struct racct *",
+SDT_PROBE_DEFINE3(racct, , rusage, add__cred,
+ "struct ucred *", "int", "uint64_t");
+SDT_PROBE_DEFINE3(racct, , rusage, add__force,
+ "struct proc *", "int", "uint64_t");
+SDT_PROBE_DEFINE3(racct, , rusage, set,
+ "struct proc *", "int", "uint64_t");
+SDT_PROBE_DEFINE3(racct, , rusage, set__failure,
+ "struct proc *", "int", "uint64_t");
+SDT_PROBE_DEFINE3(racct, , rusage, sub,
+ "struct proc *", "int", "uint64_t");
+SDT_PROBE_DEFINE3(racct, , rusage, sub__cred,
+ "struct ucred *", "int", "uint64_t");
+SDT_PROBE_DEFINE1(racct, , racct, create,
"struct racct *");
-SDT_PROBE_DEFINE2(racct, kernel, racct, join_failure, join-failure,
+SDT_PROBE_DEFINE1(racct, , racct, destroy,
+ "struct racct *");
+SDT_PROBE_DEFINE2(racct, , racct, join,
"struct racct *", "struct racct *");
-SDT_PROBE_DEFINE2(racct, kernel, racct, leave, leave, "struct racct *",
- "struct racct *");
+SDT_PROBE_DEFINE2(racct, , racct, join__failure,
+ "struct racct *", "struct racct *");
+SDT_PROBE_DEFINE2(racct, , racct, leave,
+ "struct racct *", "struct racct *");
int racct_types[] = {
[RACCT_CPU] =
@@ -314,6 +325,8 @@
fixpt_t p_pctcpu;
struct thread *td;
+ ASSERT_RACCT_ENABLED();
+
/*
* If the process is swapped out, we count its %cpu usage as zero.
* This behaviour is consistent with the userland ps(1) tool.
@@ -378,6 +391,7 @@
{
int i;
+ ASSERT_RACCT_ENABLED();
mtx_assert(&racct_lock, MA_OWNED);
/*
@@ -399,6 +413,7 @@
{
int i;
+ ASSERT_RACCT_ENABLED();
mtx_assert(&racct_lock, MA_OWNED);
/*
@@ -432,8 +447,11 @@
racct_create(struct racct **racctp)
{
- SDT_PROBE(racct, kernel, racct, create, racctp, 0, 0, 0, 0);
+ if (!racct_enable)
+ return;
+ SDT_PROBE1(racct, , racct, create, racctp);
+
KASSERT(*racctp == NULL, ("racct already allocated"));
*racctp = uma_zalloc(racct_zone, M_WAITOK | M_ZERO);
@@ -445,8 +463,10 @@
int i;
struct racct *racct;
- SDT_PROBE(racct, kernel, racct, destroy, racctp, 0, 0, 0, 0);
+ ASSERT_RACCT_ENABLED();
+ SDT_PROBE1(racct, , racct, destroy, racctp);
+
mtx_assert(&racct_lock, MA_OWNED);
KASSERT(racctp != NULL, ("NULL racctp"));
KASSERT(*racctp != NULL, ("NULL racct"));
@@ -471,6 +491,9 @@
racct_destroy(struct racct **racct)
{
+ if (!racct_enable)
+ return;
+
mtx_lock(&racct_lock);
racct_destroy_locked(racct);
mtx_unlock(&racct_lock);
@@ -482,10 +505,11 @@
* may be less than zero.
*/
static void
-racct_alloc_resource(struct racct *racct, int resource,
+racct_adjust_resource(struct racct *racct, int resource,
uint64_t amount)
{
+ ASSERT_RACCT_ENABLED();
mtx_assert(&racct_lock, MA_OWNED);
KASSERT(racct != NULL, ("NULL racct"));
@@ -498,16 +522,16 @@
/*
* There are some cases where the racct %cpu resource would grow
- * beyond 100%.
- * For example in racct_proc_exit() we add the process %cpu usage
- * to the ucred racct containers. If too many processes terminated
- * in a short time span, the ucred %cpu resource could grow too much.
- * Also, the 4BSD scheduler sometimes returns for a thread more than
- * 100% cpu usage. So we set a boundary here to 100%.
+ * beyond 100% per core. For example in racct_proc_exit() we add
+ * the process %cpu usage to the ucred racct containers. If too
+ * many processes terminated in a short time span, the ucred %cpu
+ * resource could grow too much. Also, the 4BSD scheduler sometimes
+ * returns for a thread more than 100% cpu usage. So we set a sane
+ * boundary here to 100% * the maxumum number of CPUs.
*/
if ((resource == RACCT_PCTCPU) &&
- (racct->r_resources[RACCT_PCTCPU] > 100 * 1000000))
- racct->r_resources[RACCT_PCTCPU] = 100 * 1000000;
+ (racct->r_resources[RACCT_PCTCPU] > 100 * 1000000 * (int64_t)MAXCPU))
+ racct->r_resources[RACCT_PCTCPU] = 100 * 1000000 * (int64_t)MAXCPU;
}
static int
@@ -517,8 +541,10 @@
int error;
#endif
- SDT_PROBE(racct, kernel, rusage, add, p, resource, amount, 0, 0);
+ ASSERT_RACCT_ENABLED();
+ SDT_PROBE3(racct, , rusage, add, p, resource, amount);
+
/*
* We need proc lock to dereference p->p_ucred.
*/
@@ -527,12 +553,11 @@
#ifdef RCTL
error = rctl_enforce(p, resource, amount);
if (error && RACCT_IS_DENIABLE(resource)) {
- SDT_PROBE(racct, kernel, rusage, add_failure, p, resource,
- amount, 0, 0);
+ SDT_PROBE3(racct, , rusage, add__failure, p, resource, amount);
return (error);
}
#endif
- racct_alloc_resource(p->p_racct, resource, amount);
+ racct_adjust_resource(p->p_racct, resource, amount);
racct_add_cred_locked(p->p_ucred, resource, amount);
return (0);
@@ -547,6 +572,9 @@
{
int error;
+ if (!racct_enable)
+ return (0);
+
mtx_lock(&racct_lock);
error = racct_add_locked(p, resource, amount);
mtx_unlock(&racct_lock);
@@ -558,14 +586,15 @@
{
struct prison *pr;
- SDT_PROBE(racct, kernel, rusage, add_cred, cred, resource, amount,
- 0, 0);
+ ASSERT_RACCT_ENABLED();
- racct_alloc_resource(cred->cr_ruidinfo->ui_racct, resource, amount);
+ SDT_PROBE3(racct, , rusage, add__cred, cred, resource, amount);
+
+ racct_adjust_resource(cred->cr_ruidinfo->ui_racct, resource, amount);
for (pr = cred->cr_prison; pr != NULL; pr = pr->pr_parent)
- racct_alloc_resource(pr->pr_prison_racct->prr_racct, resource,
+ racct_adjust_resource(pr->pr_prison_racct->prr_racct, resource,
amount);
- racct_alloc_resource(cred->cr_loginclass->lc_racct, resource, amount);
+ racct_adjust_resource(cred->cr_loginclass->lc_racct, resource, amount);
}
/*
@@ -578,6 +607,9 @@
racct_add_cred(struct ucred *cred, int resource, uint64_t amount)
{
+ if (!racct_enable)
+ return;
+
mtx_lock(&racct_lock);
racct_add_cred_locked(cred, resource, amount);
mtx_unlock(&racct_lock);
@@ -591,8 +623,11 @@
racct_add_force(struct proc *p, int resource, uint64_t amount)
{
- SDT_PROBE(racct, kernel, rusage, add_force, p, resource, amount, 0, 0);
+ if (!racct_enable)
+ return;
+ SDT_PROBE3(racct, , rusage, add__force, p, resource, amount);
+
/*
* We need proc lock to dereference p->p_ucred.
*/
@@ -599,7 +634,7 @@
PROC_LOCK_ASSERT(p, MA_OWNED);
mtx_lock(&racct_lock);
- racct_alloc_resource(p->p_racct, resource, amount);
+ racct_adjust_resource(p->p_racct, resource, amount);
mtx_unlock(&racct_lock);
racct_add_cred(p->p_ucred, resource, amount);
}
@@ -613,8 +648,10 @@
int error;
#endif
- SDT_PROBE(racct, kernel, rusage, set, p, resource, amount, 0, 0);
+ ASSERT_RACCT_ENABLED();
+ SDT_PROBE3(racct, , rusage, set, p, resource, amount);
+
/*
* We need proc lock to dereference p->p_ucred.
*/
@@ -645,13 +682,13 @@
if (diff_proc > 0) {
error = rctl_enforce(p, resource, diff_proc);
if (error && RACCT_IS_DENIABLE(resource)) {
- SDT_PROBE(racct, kernel, rusage, set_failure, p,
- resource, amount, 0, 0);
+ SDT_PROBE3(racct, , rusage, set__failure, p, resource,
+ amount);
return (error);
}
}
#endif
- racct_alloc_resource(p->p_racct, resource, diff_proc);
+ racct_adjust_resource(p->p_racct, resource, diff_proc);
if (diff_cred > 0)
racct_add_cred_locked(p->p_ucred, resource, diff_cred);
else if (diff_cred < 0)
@@ -672,6 +709,9 @@
{
int error;
+ if (!racct_enable)
+ return (0);
+
mtx_lock(&racct_lock);
error = racct_set_locked(p, resource, amount);
mtx_unlock(&racct_lock);
@@ -684,8 +724,10 @@
int64_t old_amount, decayed_amount;
int64_t diff_proc, diff_cred;
- SDT_PROBE(racct, kernel, rusage, set, p, resource, amount, 0, 0);
+ ASSERT_RACCT_ENABLED();
+ SDT_PROBE3(racct, , rusage, set, p, resource, amount);
+
/*
* We need proc lock to dereference p->p_ucred.
*/
@@ -708,7 +750,7 @@
} else
diff_cred = diff_proc;
- racct_alloc_resource(p->p_racct, resource, diff_proc);
+ racct_adjust_resource(p->p_racct, resource, diff_proc);
if (diff_cred > 0)
racct_add_cred_locked(p->p_ucred, resource, diff_cred);
else if (diff_cred < 0)
@@ -718,6 +760,10 @@
void
racct_set_force(struct proc *p, int resource, uint64_t amount)
{
+
+ if (!racct_enable)
+ return;
+
mtx_lock(&racct_lock);
racct_set_force_locked(p, resource, amount);
mtx_unlock(&racct_lock);
@@ -733,6 +779,9 @@
racct_get_limit(struct proc *p, int resource)
{
+ if (!racct_enable)
+ return (UINT64_MAX);
+
#ifdef RCTL
return (rctl_get_limit(p, resource));
#else
@@ -750,6 +799,9 @@
racct_get_available(struct proc *p, int resource)
{
+ if (!racct_enable)
+ return (UINT64_MAX);
+
#ifdef RCTL
return (rctl_get_available(p, resource));
#else
@@ -766,6 +818,8 @@
racct_pcpu_available(struct proc *p)
{
+ ASSERT_RACCT_ENABLED();
+
#ifdef RCTL
return (rctl_pcpu_available(p));
#else
@@ -780,8 +834,11 @@
racct_sub(struct proc *p, int resource, uint64_t amount)
{
- SDT_PROBE(racct, kernel, rusage, sub, p, resource, amount, 0, 0);
+ if (!racct_enable)
+ return;
+ SDT_PROBE3(racct, , rusage, sub, p, resource, amount);
+
/*
* We need proc lock to dereference p->p_ucred.
*/
@@ -795,7 +852,7 @@
"than allocated %jd for %s (pid %d)", __func__, amount, resource,
(intmax_t)p->p_racct->r_resources[resource], p->p_comm, p->p_pid));
- racct_alloc_resource(p->p_racct, resource, -amount);
+ racct_adjust_resource(p->p_racct, resource, -amount);
racct_sub_cred_locked(p->p_ucred, resource, amount);
mtx_unlock(&racct_lock);
}
@@ -805,9 +862,10 @@
{
struct prison *pr;
- SDT_PROBE(racct, kernel, rusage, sub_cred, cred, resource, amount,
- 0, 0);
+ ASSERT_RACCT_ENABLED();
+ SDT_PROBE3(racct, , rusage, sub__cred, cred, resource, amount);
+
#ifdef notyet
KASSERT(RACCT_CAN_DROP(resource),
("%s: called for resource %d which can not drop", __func__,
@@ -814,11 +872,11 @@
resource));
#endif
- racct_alloc_resource(cred->cr_ruidinfo->ui_racct, resource, -amount);
+ racct_adjust_resource(cred->cr_ruidinfo->ui_racct, resource, -amount);
for (pr = cred->cr_prison; pr != NULL; pr = pr->pr_parent)
- racct_alloc_resource(pr->pr_prison_racct->prr_racct, resource,
+ racct_adjust_resource(pr->pr_prison_racct->prr_racct, resource,
-amount);
- racct_alloc_resource(cred->cr_loginclass->lc_racct, resource, -amount);
+ racct_adjust_resource(cred->cr_loginclass->lc_racct, resource, -amount);
}
/*
@@ -828,6 +886,9 @@
racct_sub_cred(struct ucred *cred, int resource, uint64_t amount)
{
+ if (!racct_enable)
+ return;
+
mtx_lock(&racct_lock);
racct_sub_cred_locked(cred, resource, amount);
mtx_unlock(&racct_lock);
@@ -841,6 +902,9 @@
{
int i, error = 0;
+ if (!racct_enable)
+ return (0);
+
/*
* Create racct for the child process.
*/
@@ -897,6 +961,9 @@
{
#ifdef RCTL
+ if (!racct_enable)
+ return;
+
PROC_LOCK(child);
mtx_lock(&racct_lock);
rctl_enforce(child, RACCT_NPROC, 0);
@@ -914,6 +981,9 @@
struct timeval wallclock;
uint64_t pct_estimate, pct;
+ if (!racct_enable)
+ return;
+
PROC_LOCK(p);
/*
* We don't need to calculate rux, proc_reap() has already done this.
@@ -939,10 +1009,13 @@
racct_set_locked(p, RACCT_CPU, runtime);
racct_add_cred_locked(p->p_ucred, RACCT_PCTCPU, pct);
+ KASSERT(p->p_racct->r_resources[RACCT_RSS] == 0,
+ ("process reaped with %ju allocated for RSS\n",
+ p->p_racct->r_resources[RACCT_RSS]));
for (i = 0; i <= RACCT_MAX; i++) {
if (p->p_racct->r_resources[i] == 0)
continue;
- if (!RACCT_IS_RECLAIMABLE(i))
+ if (!RACCT_IS_RECLAIMABLE(i))
continue;
racct_set_locked(p, i, 0);
}
@@ -968,6 +1041,9 @@
struct loginclass *oldlc, *newlc;
struct prison *oldpr, *newpr, *pr;
+ if (!racct_enable)
+ return;
+
PROC_LOCK_ASSERT(p, MA_NOTOWNED);
newuip = newcred->cr_ruidinfo;
@@ -1005,6 +1081,8 @@
racct_move(struct racct *dest, struct racct *src)
{
+ ASSERT_RACCT_ENABLED();
+
mtx_lock(&racct_lock);
racct_add_racct(dest, src);
@@ -1021,6 +1099,7 @@
int cpuid;
#endif
+ ASSERT_RACCT_ENABLED();
PROC_LOCK_ASSERT(p, MA_OWNED);
/*
@@ -1066,6 +1145,9 @@
static void
racct_proc_wakeup(struct proc *p)
{
+
+ ASSERT_RACCT_ENABLED();
+
PROC_LOCK_ASSERT(p, MA_OWNED);
if (p->p_throttled) {
@@ -1080,6 +1162,8 @@
int resource;
int64_t r_old, r_new;
+ ASSERT_RACCT_ENABLED();
+
resource = *(int *)res;
r_old = racct->r_resources[resource];
@@ -1096,6 +1180,9 @@
static void
racct_decay(int resource)
{
+
+ ASSERT_RACCT_ENABLED();
+
ui_racct_foreach(racct_decay_resource, &resource, NULL);
loginclass_racct_foreach(racct_decay_resource, &resource, NULL);
prison_racct_foreach(racct_decay_resource, &resource, NULL);
@@ -1110,6 +1197,8 @@
uint64_t runtime;
uint64_t pct, pct_estimate;
+ ASSERT_RACCT_ENABLED();
+
for (;;) {
racct_decay(RACCT_PCTCPU);
@@ -1130,11 +1219,11 @@
microuptime(&wallclock);
timevalsub(&wallclock, &p->p_stats->p_start);
- PROC_SLOCK(p);
+ PROC_STATLOCK(p);
FOREACH_THREAD_IN_PROC(p, td)
ruxagg(p, td);
runtime = cputick2usec(p->p_rux.rux_runtime);
- PROC_SUNLOCK(p);
+ PROC_STATUNLOCK(p);
#ifdef notyet
KASSERT(runtime >= p->p_prev_runtime,
("runtime < p_prev_runtime"));
@@ -1189,11 +1278,22 @@
racctd,
NULL
};
-SYSINIT(racctd, SI_SUB_RACCTD, SI_ORDER_FIRST, kproc_start, &racctd_kp);
static void
+racctd_init(void)
+{
+ if (!racct_enable)
+ return;
+
+ kproc_start(&racctd_kp);
+}
+SYSINIT(racctd, SI_SUB_RACCTD, SI_ORDER_FIRST, racctd_init, NULL);
+
+static void
racct_init(void)
{
+ if (!racct_enable)
+ return;
racct_zone = uma_zcreate("racct", sizeof(struct racct),
NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
Modified: trunk/sys/kern/kern_rctl.c
===================================================================
--- trunk/sys/kern/kern_rctl.c 2018-05-25 20:55:47 UTC (rev 9947)
+++ trunk/sys/kern/kern_rctl.c 2018-05-25 20:58:03 UTC (rev 9948)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
/*-
* Copyright (c) 2010 The FreeBSD Foundation
* All rights reserved.
@@ -26,11 +27,11 @@
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
- * $MidnightBSD$
+ * $FreeBSD: stable/10/sys/kern/kern_rctl.c 302234 2016-06-27 21:50:30Z bdrewery $
*/
#include <sys/cdefs.h>
-__FBSDID("$MidnightBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/kern/kern_rctl.c 302234 2016-06-27 21:50:30Z bdrewery $");
#include <sys/param.h>
#include <sys/bus.h>
@@ -225,6 +226,7 @@
int64_t available = INT64_MAX;
struct ucred *cred = p->p_ucred;
+ ASSERT_RACCT_ENABLED();
rw_assert(&rctl_lock, RA_LOCKED);
resource = rule->rr_resource;
@@ -264,6 +266,8 @@
{
int64_t available;
+ ASSERT_RACCT_ENABLED();
+
rw_assert(&rctl_lock, RA_LOCKED);
available = rctl_available_resource(p, rule);
@@ -283,6 +287,8 @@
struct rctl_rule_link *link;
int64_t available, minavailable, limit;
+ ASSERT_RACCT_ENABLED();
+
minavailable = INT64_MAX;
limit = 0;
@@ -305,7 +311,7 @@
/*
* Return slightly less than actual value of the available
- * %cpu resource. This makes %cpu throttling more agressive
+ * %cpu resource. This makes %cpu throttling more aggressive
* and lets us act sooner than the limits are already exceeded.
*/
if (limit != 0) {
@@ -334,6 +340,8 @@
static int curtime = 0;
static struct timeval lasttime;
+ ASSERT_RACCT_ENABLED();
+
rw_rlock(&rctl_lock);
/*
@@ -457,6 +465,8 @@
struct rctl_rule_link *link;
uint64_t amount = UINT64_MAX;
+ ASSERT_RACCT_ENABLED();
+
rw_rlock(&rctl_lock);
/*
@@ -487,6 +497,8 @@
minavailable = INT64_MAX;
+ ASSERT_RACCT_ENABLED();
+
rw_rlock(&rctl_lock);
/*
@@ -521,6 +533,8 @@
rctl_rule_matches(const struct rctl_rule *rule, const struct rctl_rule *filter)
{
+ ASSERT_RACCT_ENABLED();
+
if (filter->rr_subject_type != RCTL_SUBJECT_TYPE_UNDEFINED) {
if (rule->rr_subject_type != filter->rr_subject_type)
return (0);
@@ -635,6 +649,7 @@
{
struct rctl_rule_link *link;
+ ASSERT_RACCT_ENABLED();
KASSERT(rctl_rule_fully_specified(rule), ("rule not fully specified"));
rctl_rule_acquire(rule);
@@ -652,6 +667,7 @@
{
struct rctl_rule_link *link;
+ ASSERT_RACCT_ENABLED();
KASSERT(rctl_rule_fully_specified(rule), ("rule not fully specified"));
rw_assert(&rctl_lock, RA_WLOCKED);
@@ -678,6 +694,7 @@
int removed = 0;
struct rctl_rule_link *link, *linktmp;
+ ASSERT_RACCT_ENABLED();
rw_assert(&rctl_lock, RA_WLOCKED);
LIST_FOREACH_SAFE(link, &racct->r_rule_links, rrl_next, linktmp) {
@@ -696,6 +713,8 @@
rctl_rule_acquire_subject(struct rctl_rule *rule)
{
+ ASSERT_RACCT_ENABLED();
+
switch (rule->rr_subject_type) {
case RCTL_SUBJECT_TYPE_UNDEFINED:
case RCTL_SUBJECT_TYPE_PROCESS:
@@ -722,6 +741,8 @@
rctl_rule_release_subject(struct rctl_rule *rule)
{
+ ASSERT_RACCT_ENABLED();
+
switch (rule->rr_subject_type) {
case RCTL_SUBJECT_TYPE_UNDEFINED:
case RCTL_SUBJECT_TYPE_PROCESS:
@@ -749,6 +770,8 @@
{
struct rctl_rule *rule;
+ ASSERT_RACCT_ENABLED();
+
rule = uma_zalloc(rctl_rule_zone, flags);
if (rule == NULL)
return (NULL);
@@ -771,6 +794,8 @@
{
struct rctl_rule *copy;
+ ASSERT_RACCT_ENABLED();
+
copy = uma_zalloc(rctl_rule_zone, flags);
if (copy == NULL)
return (NULL);
@@ -793,6 +818,7 @@
rctl_rule_acquire(struct rctl_rule *rule)
{
+ ASSERT_RACCT_ENABLED();
KASSERT(rule->rr_refcount > 0, ("rule->rr_refcount <= 0"));
refcount_acquire(&rule->rr_refcount);
@@ -805,6 +831,7 @@
rule = (struct rctl_rule *)context;
+ ASSERT_RACCT_ENABLED();
KASSERT(rule->rr_refcount == 0, ("rule->rr_refcount != 0"));
/*
@@ -819,6 +846,7 @@
rctl_rule_release(struct rctl_rule *rule)
{
+ ASSERT_RACCT_ENABLED();
KASSERT(rule->rr_refcount > 0, ("rule->rr_refcount <= 0"));
if (refcount_release(&rule->rr_refcount)) {
@@ -838,6 +866,8 @@
rctl_rule_fully_specified(const struct rctl_rule *rule)
{
+ ASSERT_RACCT_ENABLED();
+
switch (rule->rr_subject_type) {
case RCTL_SUBJECT_TYPE_UNDEFINED:
return (0);
@@ -882,6 +912,8 @@
struct rctl_rule *rule;
id_t id;
+ ASSERT_RACCT_ENABLED();
+
rule = rctl_rule_alloc(M_WAITOK);
subjectstr = strsep(&rulestr, ":");
@@ -1008,6 +1040,7 @@
struct rctl_rule *rule2;
int match;
+ ASSERT_RACCT_ENABLED();
KASSERT(rctl_rule_fully_specified(rule), ("rule not fully specified"));
/*
@@ -1118,6 +1151,8 @@
struct rctl_rule *filter = (struct rctl_rule *)arg2;
int found = 0;
+ ASSERT_RACCT_ENABLED();
+
rw_wlock(&rctl_lock);
found += rctl_racct_remove_rules(racct, filter);
rw_wunlock(&rctl_lock);
@@ -1134,6 +1169,8 @@
int found = 0;
struct proc *p;
+ ASSERT_RACCT_ENABLED();
+
if (filter->rr_subject_type == RCTL_SUBJECT_TYPE_PROCESS &&
filter->rr_subject.rs_proc != NULL) {
p = filter->rr_subject.rs_proc;
@@ -1172,6 +1209,8 @@
{
int64_t amount;
+ ASSERT_RACCT_ENABLED();
+
sbuf_printf(sb, "%s:", rctl_subject_type_name(rule->rr_subject_type));
switch (rule->rr_subject_type) {
@@ -1231,6 +1270,8 @@
int error;
char *str;
+ ASSERT_RACCT_ENABLED();
+
if (inbuflen <= 0)
return (EINVAL);
if (inbuflen > RCTL_MAX_INBUFLEN)
@@ -1256,6 +1297,8 @@
{
int error;
+ ASSERT_RACCT_ENABLED();
+
if (outputsbuf == NULL)
return (0);
@@ -1277,6 +1320,8 @@
int64_t amount;
struct sbuf *sb;
+ ASSERT_RACCT_ENABLED();
+
sb = sbuf_new_auto();
for (i = 0; i <= RACCT_MAX; i++) {
if (sloppy == 0 && RACCT_IS_SLOPPY(i))
@@ -1302,6 +1347,9 @@
struct loginclass *lc;
struct prison_racct *prr;
+ if (!racct_enable)
+ return (ENOSYS);
+
error = priv_check(td, PRIV_RCTL_GET_RACCT);
if (error != 0)
return (error);
@@ -1372,6 +1420,8 @@
struct rctl_rule_link *link;
struct sbuf *sb = (struct sbuf *)arg3;
+ ASSERT_RACCT_ENABLED();
+
rw_rlock(&rctl_lock);
LIST_FOREACH(link, &racct->r_rule_links, rrl_next) {
if (!rctl_rule_matches(link->rrl_rule, filter))
@@ -1393,6 +1443,9 @@
struct rctl_rule_link *link;
struct proc *p;
+ if (!racct_enable)
+ return (ENOSYS);
+
error = priv_check(td, PRIV_RCTL_GET_RULES);
if (error != 0)
return (error);
@@ -1467,6 +1520,9 @@
struct rctl_rule *filter;
struct rctl_rule_link *link;
+ if (!racct_enable)
+ return (ENOSYS);
+
error = priv_check(td, PRIV_RCTL_GET_LIMITS);
if (error != 0)
return (error);
@@ -1538,6 +1594,9 @@
struct rctl_rule *rule;
char *inputstr;
+ if (!racct_enable)
+ return (ENOSYS);
+
error = priv_check(td, PRIV_RCTL_ADD_RULE);
if (error != 0)
return (error);
@@ -1580,6 +1639,9 @@
struct rctl_rule *filter;
char *inputstr;
+ if (!racct_enable)
+ return (ENOSYS);
+
error = priv_check(td, PRIV_RCTL_REMOVE_RULE);
if (error != 0)
return (error);
@@ -1616,6 +1678,8 @@
struct prison_racct *newprr;
LIST_HEAD(, rctl_rule_link) newrules;
+ ASSERT_RACCT_ENABLED();
+
newuip = newcred->cr_ruidinfo;
newlc = newcred->cr_loginclass;
newprr = newcred->cr_prison->pr_prison_racct;
@@ -1756,6 +1820,7 @@
LIST_INIT(&child->p_racct->r_rule_links);
+ ASSERT_RACCT_ENABLED();
KASSERT(parent->p_racct != NULL, ("process without racct; p = %p", parent));
rw_wlock(&rctl_lock);
@@ -1809,6 +1874,8 @@
{
struct rctl_rule_link *link;
+ ASSERT_RACCT_ENABLED();
+
rw_wlock(&rctl_lock);
while (!LIST_EMPTY(&racct->r_rule_links)) {
link = LIST_FIRST(&racct->r_rule_links);
@@ -1823,6 +1890,9 @@
rctl_init(void)
{
+ if (!racct_enable)
+ return;
+
rctl_rule_link_zone = uma_zcreate("rctl_rule_link",
sizeof(struct rctl_rule_link), NULL, NULL, NULL, NULL,
UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
Modified: trunk/sys/kern/kern_rmlock.c
===================================================================
--- trunk/sys/kern/kern_rmlock.c 2018-05-25 20:55:47 UTC (rev 9947)
+++ trunk/sys/kern/kern_rmlock.c 2018-05-25 20:58:03 UTC (rev 9948)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
/*-
* Copyright (c) 2007 Stephan Uphoff <ups at FreeBSD.org>
* All rights reserved.
@@ -32,7 +33,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$MidnightBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/kern/kern_rmlock.c 323870 2017-09-21 19:24:11Z marius $");
#include "opt_ddb.h"
#include "opt_kdtrace.h"
@@ -41,6 +42,7 @@
#include <sys/systm.h>
#include <sys/kernel.h>
+#include <sys/kdb.h>
#include <sys/ktr.h>
#include <sys/lock.h>
#include <sys/mutex.h>
@@ -56,35 +58,53 @@
#include <ddb/ddb.h>
#endif
+/*
+ * A cookie to mark destroyed rmlocks. This is stored in the head of
+ * rm_activeReaders.
+ */
+#define RM_DESTROYED ((void *)0xdead)
+
+#define rm_destroyed(rm) \
+ (LIST_FIRST(&(rm)->rm_activeReaders) == RM_DESTROYED)
+
#define RMPF_ONQUEUE 1
#define RMPF_SIGNAL 2
-/*
- * To support usage of rmlock in CVs and msleep yet another list for the
- * priority tracker would be needed. Using this lock for cv and msleep also
- * does not seem very useful
- */
+#ifndef INVARIANTS
+#define _rm_assert(c, what, file, line)
+#endif
-static __inline void compiler_memory_barrier(void) {
- __asm __volatile("":::"memory");
-}
-
-static void assert_rm(struct lock_object *lock, int what);
-static void lock_rm(struct lock_object *lock, int how);
+static void assert_rm(const struct lock_object *lock, int what);
+#ifdef DDB
+static void db_show_rm(const struct lock_object *lock);
+#endif
+static void lock_rm(struct lock_object *lock, uintptr_t how);
#ifdef KDTRACE_HOOKS
-static int owner_rm(struct lock_object *lock, struct thread **owner);
+static int owner_rm(const struct lock_object *lock, struct thread **owner);
#endif
-static int unlock_rm(struct lock_object *lock);
+static uintptr_t unlock_rm(struct lock_object *lock);
struct lock_class lock_class_rm = {
.lc_name = "rm",
.lc_flags = LC_SLEEPLOCK | LC_RECURSABLE,
.lc_assert = assert_rm,
-#if 0
#ifdef DDB
- .lc_ddb_show = db_show_rwlock,
+ .lc_ddb_show = db_show_rm,
#endif
+ .lc_lock = lock_rm,
+ .lc_unlock = unlock_rm,
+#ifdef KDTRACE_HOOKS
+ .lc_owner = owner_rm,
#endif
+};
+
+struct lock_class lock_class_rm_sleepable = {
+ .lc_name = "sleepable rm",
+ .lc_flags = LC_SLEEPLOCK | LC_SLEEPABLE | LC_RECURSABLE,
+ .lc_assert = assert_rm,
+#ifdef DDB
+ .lc_ddb_show = db_show_rm,
+#endif
.lc_lock = lock_rm,
.lc_unlock = unlock_rm,
#ifdef KDTRACE_HOOKS
@@ -93,32 +113,79 @@
};
static void
-assert_rm(struct lock_object *lock, int what)
+assert_rm(const struct lock_object *lock, int what)
{
- panic("assert_rm called");
+ rm_assert((const struct rmlock *)lock, what);
}
static void
-lock_rm(struct lock_object *lock, int how)
+lock_rm(struct lock_object *lock, uintptr_t how)
{
+ struct rmlock *rm;
+ struct rm_priotracker *tracker;
- panic("lock_rm called");
+ rm = (struct rmlock *)lock;
+ if (how == 0)
+ rm_wlock(rm);
+ else {
+ tracker = (struct rm_priotracker *)how;
+ rm_rlock(rm, tracker);
+ }
}
-static int
+static uintptr_t
unlock_rm(struct lock_object *lock)
{
+ struct thread *td;
+ struct pcpu *pc;
+ struct rmlock *rm;
+ struct rm_queue *queue;
+ struct rm_priotracker *tracker;
+ uintptr_t how;
- panic("unlock_rm called");
+ rm = (struct rmlock *)lock;
+ tracker = NULL;
+ how = 0;
+ rm_assert(rm, RA_LOCKED | RA_NOTRECURSED);
+ if (rm_wowned(rm))
+ rm_wunlock(rm);
+ else {
+ /*
+ * Find the right rm_priotracker structure for curthread.
+ * The guarantee about its uniqueness is given by the fact
+ * we already asserted the lock wasn't recursively acquired.
+ */
+ critical_enter();
+ td = curthread;
+ pc = pcpu_find(curcpu);
+ for (queue = pc->pc_rm_queue.rmq_next;
+ queue != &pc->pc_rm_queue; queue = queue->rmq_next) {
+ tracker = (struct rm_priotracker *)queue;
+ if ((tracker->rmp_rmlock == rm) &&
+ (tracker->rmp_thread == td)) {
+ how = (uintptr_t)tracker;
+ break;
+ }
+ }
+ KASSERT(tracker != NULL,
+ ("rm_priotracker is non-NULL when lock held in read mode"));
+ critical_exit();
+ rm_runlock(rm, tracker);
+ }
+ return (how);
}
#ifdef KDTRACE_HOOKS
static int
-owner_rm(struct lock_object *lock, struct thread **owner)
+owner_rm(const struct lock_object *lock, struct thread **owner)
{
+ const struct rmlock *rm;
+ struct lock_class *lc;
- panic("owner_rm called");
+ rm = (const struct rmlock *)lock;
+ lc = LOCK_CLASS(&rm->rm_wlock_object);
+ return (lc->lc_owner(&rm->rm_wlock_object, owner));
}
#endif
@@ -149,6 +216,28 @@
pc->pc_rm_queue.rmq_next = &tracker->rmp_cpuQueue;
}
+/*
+ * Return a count of the number of trackers the thread 'td' already
+ * has on this CPU for the lock 'rm'.
+ */
+static int
+rm_trackers_present(const struct pcpu *pc, const struct rmlock *rm,
+ const struct thread *td)
+{
+ struct rm_queue *queue;
+ struct rm_priotracker *tracker;
+ int count;
+
+ count = 0;
+ for (queue = pc->pc_rm_queue.rmq_next; queue != &pc->pc_rm_queue;
+ queue = queue->rmq_next) {
+ tracker = (struct rm_priotracker *)queue;
+ if ((tracker->rmp_rmlock == rm) && (tracker->rmp_thread == td))
+ count++;
+ }
+ return (count);
+}
+
static void inline
rm_tracker_remove(struct pcpu *pc, struct rm_priotracker *tracker)
{
@@ -186,12 +275,11 @@
}
}
-CTASSERT((RM_SLEEPABLE & LO_CLASSFLAGS) == RM_SLEEPABLE);
-
void
rm_init_flags(struct rmlock *rm, const char *name, int opts)
{
- int liflags;
+ struct lock_class *lc;
+ int liflags, xflags;
liflags = 0;
if (!(opts & RM_NOWITNESS))
@@ -198,14 +286,23 @@
liflags |= LO_WITNESS;
if (opts & RM_RECURSE)
liflags |= LO_RECURSABLE;
+ if (opts & RM_NEW)
+ liflags |= LO_NEW;
rm->rm_writecpus = all_cpus;
LIST_INIT(&rm->rm_activeReaders);
if (opts & RM_SLEEPABLE) {
- liflags |= RM_SLEEPABLE;
- sx_init_flags(&rm->rm_lock_sx, "rmlock_sx", SX_RECURSE);
- } else
- mtx_init(&rm->rm_lock_mtx, name, "rmlock_mtx", MTX_NOWITNESS);
- lock_init(&rm->lock_object, &lock_class_rm, name, NULL, liflags);
+ liflags |= LO_SLEEPABLE;
+ lc = &lock_class_rm_sleepable;
+ xflags = (opts & RM_NEW ? SX_NEW : 0);
+ sx_init_flags(&rm->rm_lock_sx, "rmlock_sx",
+ xflags | SX_NOWITNESS);
+ } else {
+ lc = &lock_class_rm;
+ xflags = (opts & RM_NEW ? MTX_NEW : 0);
+ mtx_init(&rm->rm_lock_mtx, name, "rmlock_mtx",
+ xflags | MTX_NOWITNESS);
+ }
+ lock_init(&rm->lock_object, lc, name, NULL, liflags);
}
void
@@ -219,7 +316,9 @@
rm_destroy(struct rmlock *rm)
{
- if (rm->lock_object.lo_flags & RM_SLEEPABLE)
+ rm_assert(rm, RA_UNLOCKED);
+ LIST_FIRST(&rm->rm_activeReaders) = RM_DESTROYED;
+ if (rm->lock_object.lo_flags & LO_SLEEPABLE)
sx_destroy(&rm->rm_lock_sx);
else
mtx_destroy(&rm->rm_lock_mtx);
@@ -227,10 +326,10 @@
}
int
-rm_wowned(struct rmlock *rm)
+rm_wowned(const struct rmlock *rm)
{
- if (rm->lock_object.lo_flags & RM_SLEEPABLE)
+ if (rm->lock_object.lo_flags & LO_SLEEPABLE)
return (sx_xlocked(&rm->rm_lock_sx));
else
return (mtx_owned(&rm->rm_lock_mtx));
@@ -256,8 +355,6 @@
_rm_rlock_hard(struct rmlock *rm, struct rm_priotracker *tracker, int trylock)
{
struct pcpu *pc;
- struct rm_queue *queue;
- struct rm_priotracker *atracker;
critical_enter();
pc = pcpu_find(curcpu);
@@ -280,7 +377,7 @@
}
/*
- * We allow readers to aquire a lock even if a writer is blocked if
+ * We allow readers to acquire a lock even if a writer is blocked if
* the lock is recursive and the reader already holds the lock.
*/
if ((rm->lock_object.lo_flags & LO_RECURSABLE) != 0) {
@@ -288,20 +385,15 @@
* Just grant the lock if this thread already has a tracker
* for this lock on the per-cpu queue.
*/
- for (queue = pc->pc_rm_queue.rmq_next;
- queue != &pc->pc_rm_queue; queue = queue->rmq_next) {
- atracker = (struct rm_priotracker *)queue;
- if ((atracker->rmp_rmlock == rm) &&
- (atracker->rmp_thread == tracker->rmp_thread)) {
- mtx_lock_spin(&rm_spinlock);
- LIST_INSERT_HEAD(&rm->rm_activeReaders,
- tracker, rmp_qentry);
- tracker->rmp_flags = RMPF_ONQUEUE;
- mtx_unlock_spin(&rm_spinlock);
- rm_tracker_add(pc, tracker);
- critical_exit();
- return (1);
- }
+ if (rm_trackers_present(pc, rm, curthread) != 0) {
+ mtx_lock_spin(&rm_spinlock);
+ LIST_INSERT_HEAD(&rm->rm_activeReaders, tracker,
+ rmp_qentry);
+ tracker->rmp_flags = RMPF_ONQUEUE;
+ mtx_unlock_spin(&rm_spinlock);
+ rm_tracker_add(pc, tracker);
+ critical_exit();
+ return (1);
}
}
@@ -309,7 +401,7 @@
critical_exit();
if (trylock) {
- if (rm->lock_object.lo_flags & RM_SLEEPABLE) {
+ if (rm->lock_object.lo_flags & LO_SLEEPABLE) {
if (!sx_try_xlock(&rm->rm_lock_sx))
return (0);
} else {
@@ -317,9 +409,11 @@
return (0);
}
} else {
- if (rm->lock_object.lo_flags & RM_SLEEPABLE)
+ if (rm->lock_object.lo_flags & LO_SLEEPABLE) {
+ THREAD_SLEEPING_OK();
sx_xlock(&rm->rm_lock_sx);
- else
+ THREAD_NO_SLEEPING();
+ } else
mtx_lock(&rm->rm_lock_mtx);
}
@@ -330,7 +424,7 @@
sched_pin();
critical_exit();
- if (rm->lock_object.lo_flags & RM_SLEEPABLE)
+ if (rm->lock_object.lo_flags & LO_SLEEPABLE)
sx_xunlock(&rm->rm_lock_sx);
else
mtx_unlock(&rm->rm_lock_mtx);
@@ -351,9 +445,12 @@
tracker->rmp_thread = td;
tracker->rmp_rmlock = rm;
+ if (rm->lock_object.lo_flags & LO_SLEEPABLE)
+ THREAD_NO_SLEEPING();
+
td->td_critnest++; /* critical_enter(); */
- compiler_memory_barrier();
+ __compiler_membar();
pc = cpuid_to_pcpu[td->td_oncpu]; /* pcpu_find(td->td_oncpu); */
@@ -361,7 +458,7 @@
sched_pin();
- compiler_memory_barrier();
+ __compiler_membar();
td->td_critnest--;
@@ -425,6 +522,9 @@
td->td_critnest--;
sched_unpin();
+ if (rm->lock_object.lo_flags & LO_SLEEPABLE)
+ THREAD_SLEEPING_OK();
+
if (0 == (td->td_owepreempt | tracker->rmp_flags))
return;
@@ -441,7 +541,7 @@
if (SCHEDULER_STOPPED())
return;
- if (rm->lock_object.lo_flags & RM_SLEEPABLE)
+ if (rm->lock_object.lo_flags & LO_SLEEPABLE)
sx_xlock(&rm->rm_lock_sx);
else
mtx_lock(&rm->rm_lock_mtx);
@@ -484,20 +584,28 @@
_rm_wunlock(struct rmlock *rm)
{
- if (rm->lock_object.lo_flags & RM_SLEEPABLE)
+ if (rm->lock_object.lo_flags & LO_SLEEPABLE)
sx_xunlock(&rm->rm_lock_sx);
else
mtx_unlock(&rm->rm_lock_mtx);
}
-#ifdef LOCK_DEBUG
+#if LOCK_DEBUG > 0
-void _rm_wlock_debug(struct rmlock *rm, const char *file, int line)
+void
+_rm_wlock_debug(struct rmlock *rm, const char *file, int line)
{
if (SCHEDULER_STOPPED())
return;
+ KASSERT(kdb_active != 0 || !TD_IS_IDLETHREAD(curthread),
+ ("rm_wlock() by idle thread %p on rmlock %s @ %s:%d",
+ curthread, rm->lock_object.lo_name, file, line));
+ KASSERT(!rm_destroyed(rm),
+ ("rm_wlock() of destroyed rmlock @ %s:%d", file, line));
+ _rm_assert(rm, RA_UNLOCKED, file, line);
+
WITNESS_CHECKORDER(&rm->lock_object, LOP_NEWORDER | LOP_EXCLUSIVE,
file, line, NULL);
@@ -505,11 +613,7 @@
LOCK_LOG_LOCK("RMWLOCK", &rm->lock_object, 0, 0, file, line);
- if (rm->lock_object.lo_flags & RM_SLEEPABLE)
- WITNESS_LOCK(&rm->rm_lock_sx.lock_object, LOP_EXCLUSIVE,
- file, line);
- else
- WITNESS_LOCK(&rm->lock_object, LOP_EXCLUSIVE, file, line);
+ WITNESS_LOCK(&rm->lock_object, LOP_EXCLUSIVE, file, line);
curthread->td_locks++;
@@ -522,14 +626,13 @@
if (SCHEDULER_STOPPED())
return;
- curthread->td_locks--;
- if (rm->lock_object.lo_flags & RM_SLEEPABLE)
- WITNESS_UNLOCK(&rm->rm_lock_sx.lock_object, LOP_EXCLUSIVE,
- file, line);
- else
- WITNESS_UNLOCK(&rm->lock_object, LOP_EXCLUSIVE, file, line);
+ KASSERT(!rm_destroyed(rm),
+ ("rm_wunlock() of destroyed rmlock @ %s:%d", file, line));
+ _rm_assert(rm, RA_WLOCKED, file, line);
+ WITNESS_UNLOCK(&rm->lock_object, LOP_EXCLUSIVE, file, line);
LOCK_LOG_LOCK("RMWUNLOCK", &rm->lock_object, 0, 0, file, line);
_rm_wunlock(rm);
+ curthread->td_locks--;
}
int
@@ -540,20 +643,43 @@
if (SCHEDULER_STOPPED())
return (1);
- if (!trylock && (rm->lock_object.lo_flags & RM_SLEEPABLE))
- WITNESS_CHECKORDER(&rm->rm_lock_sx.lock_object, LOP_NEWORDER,
- file, line, NULL);
- WITNESS_CHECKORDER(&rm->lock_object, LOP_NEWORDER, file, line, NULL);
+#ifdef INVARIANTS
+ if (!(rm->lock_object.lo_flags & LO_RECURSABLE) && !trylock) {
+ critical_enter();
+ KASSERT(rm_trackers_present(pcpu_find(curcpu), rm,
+ curthread) == 0,
+ ("rm_rlock: recursed on non-recursive rmlock %s @ %s:%d\n",
+ rm->lock_object.lo_name, file, line));
+ critical_exit();
+ }
+#endif
+ KASSERT(kdb_active != 0 || !TD_IS_IDLETHREAD(curthread),
+ ("rm_rlock() by idle thread %p on rmlock %s @ %s:%d",
+ curthread, rm->lock_object.lo_name, file, line));
+ KASSERT(!rm_destroyed(rm),
+ ("rm_rlock() of destroyed rmlock @ %s:%d", file, line));
+ if (!trylock) {
+ KASSERT(!rm_wowned(rm),
+ ("rm_rlock: wlock already held for %s @ %s:%d",
+ rm->lock_object.lo_name, file, line));
+ WITNESS_CHECKORDER(&rm->lock_object, LOP_NEWORDER, file, line,
+ NULL);
+ }
if (_rm_rlock(rm, tracker, trylock)) {
- LOCK_LOG_LOCK("RMRLOCK", &rm->lock_object, 0, 0, file, line);
-
+ if (trylock)
+ LOCK_LOG_TRY("RMRLOCK", &rm->lock_object, 0, 1, file,
+ line);
+ else
+ LOCK_LOG_LOCK("RMRLOCK", &rm->lock_object, 0, 0, file,
+ line);
WITNESS_LOCK(&rm->lock_object, 0, file, line);
curthread->td_locks++;
return (1);
- }
+ } else if (trylock)
+ LOCK_LOG_TRY("RMRLOCK", &rm->lock_object, 0, 0, file, line);
return (0);
}
@@ -566,10 +692,13 @@
if (SCHEDULER_STOPPED())
return;
- curthread->td_locks--;
+ KASSERT(!rm_destroyed(rm),
+ ("rm_runlock() of destroyed rmlock @ %s:%d", file, line));
+ _rm_assert(rm, RA_RLOCKED, file, line);
WITNESS_UNLOCK(&rm->lock_object, 0, file, line);
LOCK_LOG_LOCK("RMRUNLOCK", &rm->lock_object, 0, 0, file, line);
_rm_runlock(rm, tracker);
+ curthread->td_locks--;
}
#else
@@ -609,3 +738,130 @@
}
#endif
+
+#ifdef INVARIANT_SUPPORT
+#ifndef INVARIANTS
+#undef _rm_assert
+#endif
+
+/*
+ * Note that this does not need to use witness_assert() for read lock
+ * assertions since an exact count of read locks held by this thread
+ * is computable.
+ */
+void
+_rm_assert(const struct rmlock *rm, int what, const char *file, int line)
+{
+ int count;
+
+ if (panicstr != NULL)
+ return;
+ switch (what) {
+ case RA_LOCKED:
+ case RA_LOCKED | RA_RECURSED:
+ case RA_LOCKED | RA_NOTRECURSED:
+ case RA_RLOCKED:
+ case RA_RLOCKED | RA_RECURSED:
+ case RA_RLOCKED | RA_NOTRECURSED:
+ /*
+ * Handle the write-locked case. Unlike other
+ * primitives, writers can never recurse.
+ */
+ if (rm_wowned(rm)) {
+ if (what & RA_RLOCKED)
+ panic("Lock %s exclusively locked @ %s:%d\n",
+ rm->lock_object.lo_name, file, line);
+ if (what & RA_RECURSED)
+ panic("Lock %s not recursed @ %s:%d\n",
+ rm->lock_object.lo_name, file, line);
+ break;
+ }
+
+ critical_enter();
+ count = rm_trackers_present(pcpu_find(curcpu), rm, curthread);
+ critical_exit();
+
+ if (count == 0)
+ panic("Lock %s not %slocked @ %s:%d\n",
+ rm->lock_object.lo_name, (what & RA_RLOCKED) ?
+ "read " : "", file, line);
+ if (count > 1) {
+ if (what & RA_NOTRECURSED)
+ panic("Lock %s recursed @ %s:%d\n",
+ rm->lock_object.lo_name, file, line);
+ } else if (what & RA_RECURSED)
+ panic("Lock %s not recursed @ %s:%d\n",
+ rm->lock_object.lo_name, file, line);
+ break;
+ case RA_WLOCKED:
+ if (!rm_wowned(rm))
+ panic("Lock %s not exclusively locked @ %s:%d\n",
+ rm->lock_object.lo_name, file, line);
+ break;
+ case RA_UNLOCKED:
+ if (rm_wowned(rm))
+ panic("Lock %s exclusively locked @ %s:%d\n",
+ rm->lock_object.lo_name, file, line);
+
+ critical_enter();
+ count = rm_trackers_present(pcpu_find(curcpu), rm, curthread);
+ critical_exit();
+
+ if (count != 0)
+ panic("Lock %s read locked @ %s:%d\n",
+ rm->lock_object.lo_name, file, line);
+ break;
+ default:
+ panic("Unknown rm lock assertion: %d @ %s:%d", what, file,
+ line);
+ }
+}
+#endif /* INVARIANT_SUPPORT */
+
+#ifdef DDB
+static void
+print_tracker(struct rm_priotracker *tr)
+{
+ struct thread *td;
+
+ td = tr->rmp_thread;
+ db_printf(" thread %p (tid %d, pid %d, \"%s\") {", td, td->td_tid,
+ td->td_proc->p_pid, td->td_name);
+ if (tr->rmp_flags & RMPF_ONQUEUE) {
+ db_printf("ONQUEUE");
+ if (tr->rmp_flags & RMPF_SIGNAL)
+ db_printf(",SIGNAL");
+ } else
+ db_printf("0");
+ db_printf("}\n");
+}
+
+static void
+db_show_rm(const struct lock_object *lock)
+{
+ struct rm_priotracker *tr;
+ struct rm_queue *queue;
+ const struct rmlock *rm;
+ struct lock_class *lc;
+ struct pcpu *pc;
+
+ rm = (const struct rmlock *)lock;
+ db_printf(" writecpus: ");
+ ddb_display_cpuset(__DEQUALIFY(const cpuset_t *, &rm->rm_writecpus));
+ db_printf("\n");
+ db_printf(" per-CPU readers:\n");
+ STAILQ_FOREACH(pc, &cpuhead, pc_allcpu)
+ for (queue = pc->pc_rm_queue.rmq_next;
+ queue != &pc->pc_rm_queue; queue = queue->rmq_next) {
+ tr = (struct rm_priotracker *)queue;
+ if (tr->rmp_rmlock == rm)
+ print_tracker(tr);
+ }
+ db_printf(" active readers:\n");
+ LIST_FOREACH(tr, &rm->rm_activeReaders, rmp_qentry)
+ print_tracker(tr);
+ lc = LOCK_CLASS(&rm->rm_wlock_object);
+ db_printf("Backing write-lock (%s):\n", lc->lc_name);
+ lc->lc_ddb_show(&rm->rm_wlock_object);
+}
+#endif
Modified: trunk/sys/kern/kern_rwlock.c
===================================================================
--- trunk/sys/kern/kern_rwlock.c 2018-05-25 20:55:47 UTC (rev 9947)
+++ trunk/sys/kern/kern_rwlock.c 2018-05-25 20:58:03 UTC (rev 9948)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
/*-
* Copyright (c) 2006 John Baldwin <jhb at FreeBSD.org>
* All rights reserved.
@@ -10,9 +11,6 @@
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
- * 3. Neither the name of the author nor the names of any co-contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
@@ -32,7 +30,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$MidnightBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/kern/kern_rwlock.c 323870 2017-09-21 19:24:11Z marius $");
#include "opt_ddb.h"
#include "opt_hwpmc_hooks.h"
@@ -40,6 +38,7 @@
#include "opt_no_adaptive_rwlocks.h"
#include <sys/param.h>
+#include <sys/kdb.h>
#include <sys/ktr.h>
#include <sys/kernel.h>
#include <sys/lock.h>
@@ -46,6 +45,8 @@
#include <sys/mutex.h>
#include <sys/proc.h>
#include <sys/rwlock.h>
+#include <sys/sched.h>
+#include <sys/smp.h>
#include <sys/sysctl.h>
#include <sys/systm.h>
#include <sys/turnstile.h>
@@ -61,22 +62,23 @@
PMC_SOFT_DECLARE( , , lock, failed);
#endif
-#ifdef ADAPTIVE_RWLOCKS
-#define ROWNER_RETRIES 10
-#define ROWNER_LOOPS 10000
-#endif
+/*
+ * Return the rwlock address when the lock cookie address is provided.
+ * This functionality assumes that struct rwlock* have a member named rw_lock.
+ */
+#define rwlock2rw(c) (__containerof(c, struct rwlock, rw_lock))
#ifdef DDB
#include <ddb/ddb.h>
-static void db_show_rwlock(struct lock_object *lock);
+static void db_show_rwlock(const struct lock_object *lock);
#endif
-static void assert_rw(struct lock_object *lock, int what);
-static void lock_rw(struct lock_object *lock, int how);
+static void assert_rw(const struct lock_object *lock, int what);
+static void lock_rw(struct lock_object *lock, uintptr_t how);
#ifdef KDTRACE_HOOKS
-static int owner_rw(struct lock_object *lock, struct thread **owner);
+static int owner_rw(const struct lock_object *lock, struct thread **owner);
#endif
-static int unlock_rw(struct lock_object *lock);
+static uintptr_t unlock_rw(struct lock_object *lock);
struct lock_class lock_class_rw = {
.lc_name = "rw",
@@ -92,6 +94,42 @@
#endif
};
+#ifdef ADAPTIVE_RWLOCKS
+static int rowner_retries = 10;
+static int rowner_loops = 10000;
+static SYSCTL_NODE(_debug, OID_AUTO, rwlock, CTLFLAG_RD, NULL,
+ "rwlock debugging");
+SYSCTL_INT(_debug_rwlock, OID_AUTO, retry, CTLFLAG_RW, &rowner_retries, 0, "");
+SYSCTL_INT(_debug_rwlock, OID_AUTO, loops, CTLFLAG_RW, &rowner_loops, 0, "");
+
+static struct lock_delay_config rw_delay = {
+ .initial = 1000,
+ .step = 500,
+ .min = 100,
+ .max = 5000,
+};
+
+SYSCTL_INT(_debug_rwlock, OID_AUTO, delay_initial, CTLFLAG_RW, &rw_delay.initial,
+ 0, "");
+SYSCTL_INT(_debug_rwlock, OID_AUTO, delay_step, CTLFLAG_RW, &rw_delay.step,
+ 0, "");
+SYSCTL_INT(_debug_rwlock, OID_AUTO, delay_min, CTLFLAG_RW, &rw_delay.min,
+ 0, "");
+SYSCTL_INT(_debug_rwlock, OID_AUTO, delay_max, CTLFLAG_RW, &rw_delay.max,
+ 0, "");
+
+static void
+rw_delay_sysinit(void *dummy)
+{
+
+ rw_delay.initial = mp_ncpus * 25;
+ rw_delay.step = (mp_ncpus * 25) / 2;
+ rw_delay.min = mp_ncpus * 5;
+ rw_delay.max = mp_ncpus * 25 * 10;
+}
+LOCK_DELAY_SYSINIT(rw_delay_sysinit);
+#endif
+
/*
* Return a pointer to the owning thread if the lock is write-locked or
* NULL if the lock is unlocked or read-locked.
@@ -119,29 +157,29 @@
#define rw_owner(rw) rw_wowner(rw)
#ifndef INVARIANTS
-#define _rw_assert(rw, what, file, line)
+#define __rw_assert(c, what, file, line)
#endif
void
-assert_rw(struct lock_object *lock, int what)
+assert_rw(const struct lock_object *lock, int what)
{
- rw_assert((struct rwlock *)lock, what);
+ rw_assert((const struct rwlock *)lock, what);
}
void
-lock_rw(struct lock_object *lock, int how)
+lock_rw(struct lock_object *lock, uintptr_t how)
{
struct rwlock *rw;
rw = (struct rwlock *)lock;
if (how)
+ rw_rlock(rw);
+ else
rw_wlock(rw);
- else
- rw_rlock(rw);
}
-int
+uintptr_t
unlock_rw(struct lock_object *lock)
{
struct rwlock *rw;
@@ -150,18 +188,18 @@
rw_assert(rw, RA_LOCKED | LA_NOTRECURSED);
if (rw->rw_lock & RW_LOCK_READ) {
rw_runlock(rw);
- return (0);
+ return (1);
} else {
rw_wunlock(rw);
- return (1);
+ return (0);
}
}
#ifdef KDTRACE_HOOKS
int
-owner_rw(struct lock_object *lock, struct thread **owner)
+owner_rw(const struct lock_object *lock, struct thread **owner)
{
- struct rwlock *rw = (struct rwlock *)lock;
+ const struct rwlock *rw = (const struct rwlock *)lock;
uintptr_t x = rw->rw_lock;
*owner = rw_wowner(rw);
@@ -171,12 +209,15 @@
#endif
void
-rw_init_flags(struct rwlock *rw, const char *name, int opts)
+_rw_init_flags(volatile uintptr_t *c, const char *name, int opts)
{
+ struct rwlock *rw;
int flags;
+ rw = rwlock2rw(c);
+
MPASS((opts & ~(RW_DUPOK | RW_NOPROFILE | RW_NOWITNESS | RW_QUIET |
- RW_RECURSE)) == 0);
+ RW_RECURSE | RW_NEW)) == 0);
ASSERT_ATOMIC_LOAD_PTR(rw->rw_lock,
("%s: rw_lock not aligned for %s: %p", __func__, name,
&rw->rw_lock));
@@ -192,16 +233,21 @@
flags |= LO_RECURSABLE;
if (opts & RW_QUIET)
flags |= LO_QUIET;
+ if (opts & RW_NEW)
+ flags |= LO_NEW;
+ lock_init(&rw->lock_object, &lock_class_rw, name, NULL, flags);
rw->rw_lock = RW_UNLOCKED;
rw->rw_recurse = 0;
- lock_init(&rw->lock_object, &lock_class_rw, name, NULL, flags);
}
void
-rw_destroy(struct rwlock *rw)
+_rw_destroy(volatile uintptr_t *c)
{
+ struct rwlock *rw;
+ rw = rwlock2rw(c);
+
KASSERT(rw->rw_lock == RW_UNLOCKED, ("rw lock %p not unlocked", rw));
KASSERT(rw->rw_recurse == 0, ("rw lock %p still recursed", rw));
rw->rw_lock = RW_DESTROYED;
@@ -213,7 +259,7 @@
{
struct rw_args *args = arg;
- rw_init(args->ra_rw, args->ra_desc);
+ rw_init((struct rwlock *)args->ra_rw, args->ra_desc);
}
void
@@ -221,22 +267,30 @@
{
struct rw_args_flags *args = arg;
- rw_init_flags(args->ra_rw, args->ra_desc, args->ra_flags);
+ rw_init_flags((struct rwlock *)args->ra_rw, args->ra_desc,
+ args->ra_flags);
}
int
-rw_wowned(struct rwlock *rw)
+_rw_wowned(const volatile uintptr_t *c)
{
- return (rw_wowner(rw) == curthread);
+ return (rw_wowner(rwlock2rw(c)) == curthread);
}
void
-_rw_wlock(struct rwlock *rw, const char *file, int line)
+_rw_wlock_cookie(volatile uintptr_t *c, const char *file, int line)
{
+ struct rwlock *rw;
if (SCHEDULER_STOPPED())
return;
+
+ rw = rwlock2rw(c);
+
+ KASSERT(kdb_active != 0 || !TD_IS_IDLETHREAD(curthread),
+ ("rw_wlock() by idle thread %p on rwlock %s @ %s:%d",
+ curthread, rw->lock_object.lo_name, file, line));
KASSERT(rw->rw_lock != RW_DESTROYED,
("rw_wlock() of destroyed rwlock @ %s:%d", file, line));
WITNESS_CHECKORDER(&rw->lock_object, LOP_NEWORDER | LOP_EXCLUSIVE, file,
@@ -248,13 +302,19 @@
}
int
-_rw_try_wlock(struct rwlock *rw, const char *file, int line)
+__rw_try_wlock(volatile uintptr_t *c, const char *file, int line)
{
+ struct rwlock *rw;
int rval;
if (SCHEDULER_STOPPED())
return (1);
+ rw = rwlock2rw(c);
+
+ KASSERT(kdb_active != 0 || !TD_IS_IDLETHREAD(curthread),
+ ("rw_try_wlock() by idle thread %p on rwlock %s @ %s:%d",
+ curthread, rw->lock_object.lo_name, file, line));
KASSERT(rw->rw_lock != RW_DESTROYED,
("rw_try_wlock() of destroyed rwlock @ %s:%d", file, line));
@@ -270,6 +330,9 @@
if (rval) {
WITNESS_LOCK(&rw->lock_object, LOP_EXCLUSIVE | LOP_TRYLOCK,
file, line);
+ if (!rw_recursed(rw))
+ LOCKSTAT_PROFILE_OBTAIN_LOCK_SUCCESS(LS_RW_WLOCK_ACQUIRE,
+ rw, 0, 0, file, line);
curthread->td_locks++;
}
return (rval);
@@ -276,15 +339,18 @@
}
void
-_rw_wunlock(struct rwlock *rw, const char *file, int line)
+_rw_wunlock_cookie(volatile uintptr_t *c, const char *file, int line)
{
+ struct rwlock *rw;
if (SCHEDULER_STOPPED())
return;
+
+ rw = rwlock2rw(c);
+
KASSERT(rw->rw_lock != RW_DESTROYED,
("rw_wunlock() of destroyed rwlock @ %s:%d", file, line));
- _rw_assert(rw, RA_WLOCKED, file, line);
- curthread->td_locks--;
+ __rw_assert(c, RA_WLOCKED, file, line);
WITNESS_UNLOCK(&rw->lock_object, LOP_EXCLUSIVE, file, line);
LOCK_LOG_LOCK("WUNLOCK", &rw->lock_object, 0, rw->rw_recurse, file,
line);
@@ -291,6 +357,7 @@
if (!rw_recursed(rw))
LOCKSTAT_PROFILE_RELEASE_LOCK(LS_RW_WUNLOCK_RELEASE, rw);
__rw_wunlock(rw, curthread, file, line);
+ curthread->td_locks--;
}
/*
* Determines whether a new reader can acquire a lock. Succeeds if the
@@ -305,8 +372,9 @@
RW_LOCK_READ)
void
-_rw_rlock(struct rwlock *rw, const char *file, int line)
+__rw_rlock(volatile uintptr_t *c, const char *file, int line)
{
+ struct rwlock *rw;
struct turnstile *ts;
#ifdef ADAPTIVE_RWLOCKS
volatile struct thread *owner;
@@ -318,26 +386,41 @@
int contested = 0;
#endif
uintptr_t v;
+#if defined(ADAPTIVE_RWLOCKS) || defined(KDTRACE_HOOKS)
+ struct lock_delay_arg lda;
+#endif
#ifdef KDTRACE_HOOKS
- uint64_t spin_cnt = 0;
- uint64_t sleep_cnt = 0;
+ uintptr_t state;
+ u_int sleep_cnt = 0;
int64_t sleep_time = 0;
+ int64_t all_time = 0;
#endif
if (SCHEDULER_STOPPED())
return;
+#if defined(ADAPTIVE_RWLOCKS)
+ lock_delay_arg_init(&lda, &rw_delay);
+#elif defined(KDTRACE_HOOKS)
+ lock_delay_arg_init(&lda, NULL);
+#endif
+ rw = rwlock2rw(c);
+
+ KASSERT(kdb_active != 0 || !TD_IS_IDLETHREAD(curthread),
+ ("rw_rlock() by idle thread %p on rwlock %s @ %s:%d",
+ curthread, rw->lock_object.lo_name, file, line));
KASSERT(rw->rw_lock != RW_DESTROYED,
("rw_rlock() of destroyed rwlock @ %s:%d", file, line));
KASSERT(rw_wowner(rw) != curthread,
- ("%s (%s): wlock already held @ %s:%d", __func__,
+ ("rw_rlock: wlock already held for %s @ %s:%d",
rw->lock_object.lo_name, file, line));
WITNESS_CHECKORDER(&rw->lock_object, LOP_NEWORDER, file, line, NULL);
- for (;;) {
#ifdef KDTRACE_HOOKS
- spin_cnt++;
+ all_time -= lockstat_nsecs(&rw->lock_object);
+ state = rw->rw_lock;
#endif
+ for (;;) {
/*
* Handle the easy case. If no other thread has a write
* lock, then try to bump up the count of read locks. Note
@@ -366,6 +449,9 @@
}
continue;
}
+#ifdef KDTRACE_HOOKS
+ lda.spin_cnt++;
+#endif
#ifdef HWPMC_HOOKS
PMC_SOFT_CALL( , , lock, failed);
#endif
@@ -385,24 +471,33 @@
CTR3(KTR_LOCK,
"%s: spinning on %p held by %p",
__func__, rw, owner);
+ KTR_STATE1(KTR_SCHED, "thread",
+ sched_tdname(curthread), "spinning",
+ "lockname:\"%s\"", rw->lock_object.lo_name);
while ((struct thread*)RW_OWNER(rw->rw_lock) ==
- owner && TD_IS_RUNNING(owner)) {
- cpu_spinwait();
-#ifdef KDTRACE_HOOKS
- spin_cnt++;
-#endif
- }
+ owner && TD_IS_RUNNING(owner))
+ lock_delay(&lda);
+ KTR_STATE0(KTR_SCHED, "thread",
+ sched_tdname(curthread), "running");
continue;
}
- } else if (spintries < ROWNER_RETRIES) {
+ } else if (spintries < rowner_retries) {
spintries++;
- for (i = 0; i < ROWNER_LOOPS; i++) {
+ KTR_STATE1(KTR_SCHED, "thread", sched_tdname(curthread),
+ "spinning", "lockname:\"%s\"",
+ rw->lock_object.lo_name);
+ for (i = 0; i < rowner_loops; i++) {
v = rw->rw_lock;
if ((v & RW_LOCK_READ) == 0 || RW_CAN_READ(v))
break;
cpu_spinwait();
}
- if (i != ROWNER_LOOPS)
+#ifdef KDTRACE_HOOKS
+ lda.spin_cnt += rowner_loops - i;
+#endif
+ KTR_STATE0(KTR_SCHED, "thread", sched_tdname(curthread),
+ "running");
+ if (i != rowner_loops)
continue;
}
#endif
@@ -472,11 +567,11 @@
CTR2(KTR_LOCK, "%s: %p blocking on turnstile", __func__,
rw);
#ifdef KDTRACE_HOOKS
- sleep_time -= lockstat_nsecs();
+ sleep_time -= lockstat_nsecs(&rw->lock_object);
#endif
turnstile_wait(ts, rw_owner(rw), TS_SHARED_QUEUE);
#ifdef KDTRACE_HOOKS
- sleep_time += lockstat_nsecs();
+ sleep_time += lockstat_nsecs(&rw->lock_object);
sleep_cnt++;
#endif
if (LOCK_LOG_TEST(&rw->lock_object, 0))
@@ -483,7 +578,19 @@
CTR2(KTR_LOCK, "%s: %p resuming from turnstile",
__func__, rw);
}
+#ifdef KDTRACE_HOOKS
+ all_time += lockstat_nsecs(&rw->lock_object);
+ if (sleep_time)
+ LOCKSTAT_RECORD4(LS_RW_RLOCK_BLOCK, rw, sleep_time,
+ LOCKSTAT_READER, (state & RW_LOCK_READ) == 0,
+ (state & RW_LOCK_READ) == 0 ? 0 : RW_READERS(state));
+ /* Record only the loops spinning and not sleeping. */
+ if (lda.spin_cnt > sleep_cnt)
+ LOCKSTAT_RECORD4(LS_RW_RLOCK_SPIN, rw, all_time - sleep_time,
+ LOCKSTAT_READER, (state & RW_LOCK_READ) == 0,
+ (state & RW_LOCK_READ) == 0 ? 0 : RW_READERS(state));
+#endif
/*
* TODO: acquire "owner of record" here. Here be turnstile dragons
* however. turnstiles don't like owners changing between calls to
@@ -495,26 +602,23 @@
WITNESS_LOCK(&rw->lock_object, 0, file, line);
curthread->td_locks++;
curthread->td_rw_rlocks++;
-#ifdef KDTRACE_HOOKS
- if (sleep_time)
- LOCKSTAT_RECORD1(LS_RW_RLOCK_BLOCK, rw, sleep_time);
-
- /*
- * Record only the loops spinning and not sleeping.
- */
- if (spin_cnt > sleep_cnt)
- LOCKSTAT_RECORD1(LS_RW_RLOCK_SPIN, rw, (spin_cnt - sleep_cnt));
-#endif
}
int
-_rw_try_rlock(struct rwlock *rw, const char *file, int line)
+__rw_try_rlock(volatile uintptr_t *c, const char *file, int line)
{
+ struct rwlock *rw;
uintptr_t x;
if (SCHEDULER_STOPPED())
return (1);
+ rw = rwlock2rw(c);
+
+ KASSERT(kdb_active != 0 || !TD_IS_IDLETHREAD(curthread),
+ ("rw_try_rlock() by idle thread %p on rwlock %s @ %s:%d",
+ curthread, rw->lock_object.lo_name, file, line));
+
for (;;) {
x = rw->rw_lock;
KASSERT(rw->rw_lock != RW_DESTROYED,
@@ -525,6 +629,8 @@
LOCK_LOG_TRY("RLOCK", &rw->lock_object, 0, 1, file,
line);
WITNESS_LOCK(&rw->lock_object, LOP_TRYLOCK, file, line);
+ LOCKSTAT_PROFILE_OBTAIN_LOCK_SUCCESS(LS_RW_RLOCK_ACQUIRE,
+ rw, 0, 0, file, line);
curthread->td_locks++;
curthread->td_rw_rlocks++;
return (1);
@@ -536,8 +642,9 @@
}
void
-_rw_runlock(struct rwlock *rw, const char *file, int line)
+_rw_runlock_cookie(volatile uintptr_t *c, const char *file, int line)
{
+ struct rwlock *rw;
struct turnstile *ts;
uintptr_t x, v, queue;
@@ -544,11 +651,11 @@
if (SCHEDULER_STOPPED())
return;
+ rw = rwlock2rw(c);
+
KASSERT(rw->rw_lock != RW_DESTROYED,
("rw_runlock() of destroyed rwlock @ %s:%d", file, line));
- _rw_assert(rw, RA_RLOCKED, file, line);
- curthread->td_locks--;
- curthread->td_rw_rlocks--;
+ __rw_assert(c, RA_RLOCKED, file, line);
WITNESS_UNLOCK(&rw->lock_object, 0, file, line);
LOCK_LOG_LOCK("RUNLOCK", &rw->lock_object, 0, 0, file, line);
@@ -642,6 +749,8 @@
break;
}
LOCKSTAT_PROFILE_RELEASE_LOCK(LS_RW_RUNLOCK_RELEASE, rw);
+ curthread->td_locks--;
+ curthread->td_rw_rlocks--;
}
/*
@@ -650,8 +759,10 @@
* read or write lock.
*/
void
-_rw_wlock_hard(struct rwlock *rw, uintptr_t tid, const char *file, int line)
+__rw_wlock_hard(volatile uintptr_t *c, uintptr_t tid, const char *file,
+ int line)
{
+ struct rwlock *rw;
struct turnstile *ts;
#ifdef ADAPTIVE_RWLOCKS
volatile struct thread *owner;
@@ -663,15 +774,26 @@
uint64_t waittime = 0;
int contested = 0;
#endif
+#if defined(ADAPTIVE_RWLOCKS) || defined(KDTRACE_HOOKS)
+ struct lock_delay_arg lda;
+#endif
#ifdef KDTRACE_HOOKS
- uint64_t spin_cnt = 0;
- uint64_t sleep_cnt = 0;
+ uintptr_t state;
+ u_int sleep_cnt = 0;
int64_t sleep_time = 0;
+ int64_t all_time = 0;
#endif
if (SCHEDULER_STOPPED())
return;
+#if defined(ADAPTIVE_RWLOCKS)
+ lock_delay_arg_init(&lda, &rw_delay);
+#elif defined(KDTRACE_HOOKS)
+ lock_delay_arg_init(&lda, NULL);
+#endif
+ rw = rwlock2rw(c);
+
if (rw_wlocked(rw)) {
KASSERT(rw->lock_object.lo_flags & LO_RECURSABLE,
("%s: recursing but non-recursive rw %s @ %s:%d\n",
@@ -686,10 +808,16 @@
CTR5(KTR_LOCK, "%s: %s contested (lock=%p) at %s:%d", __func__,
rw->lock_object.lo_name, (void *)rw->rw_lock, file, line);
- while (!_rw_write_lock(rw, tid)) {
#ifdef KDTRACE_HOOKS
- spin_cnt++;
+ all_time -= lockstat_nsecs(&rw->lock_object);
+ state = rw->rw_lock;
#endif
+ for (;;) {
+ if (rw->rw_lock == RW_UNLOCKED && _rw_write_lock(rw, tid))
+ break;
+#ifdef KDTRACE_HOOKS
+ lda.spin_cnt++;
+#endif
#ifdef HWPMC_HOOKS
PMC_SOFT_CALL( , , lock, failed);
#endif
@@ -707,17 +835,18 @@
if (LOCK_LOG_TEST(&rw->lock_object, 0))
CTR3(KTR_LOCK, "%s: spinning on %p held by %p",
__func__, rw, owner);
+ KTR_STATE1(KTR_SCHED, "thread", sched_tdname(curthread),
+ "spinning", "lockname:\"%s\"",
+ rw->lock_object.lo_name);
while ((struct thread*)RW_OWNER(rw->rw_lock) == owner &&
- TD_IS_RUNNING(owner)) {
- cpu_spinwait();
-#ifdef KDTRACE_HOOKS
- spin_cnt++;
-#endif
- }
+ TD_IS_RUNNING(owner))
+ lock_delay(&lda);
+ KTR_STATE0(KTR_SCHED, "thread", sched_tdname(curthread),
+ "running");
continue;
}
if ((v & RW_LOCK_READ) && RW_READERS(v) &&
- spintries < ROWNER_RETRIES) {
+ spintries < rowner_retries) {
if (!(v & RW_LOCK_WRITE_SPINNER)) {
if (!atomic_cmpset_ptr(&rw->rw_lock, v,
v | RW_LOCK_WRITE_SPINNER)) {
@@ -725,15 +854,20 @@
}
}
spintries++;
- for (i = 0; i < ROWNER_LOOPS; i++) {
+ KTR_STATE1(KTR_SCHED, "thread", sched_tdname(curthread),
+ "spinning", "lockname:\"%s\"",
+ rw->lock_object.lo_name);
+ for (i = 0; i < rowner_loops; i++) {
if ((rw->rw_lock & RW_LOCK_WRITE_SPINNER) == 0)
break;
cpu_spinwait();
}
+ KTR_STATE0(KTR_SCHED, "thread", sched_tdname(curthread),
+ "running");
#ifdef KDTRACE_HOOKS
- spin_cnt += ROWNER_LOOPS - i;
+ lda.spin_cnt += rowner_loops - i;
#endif
- if (i != ROWNER_LOOPS)
+ if (i != rowner_loops)
continue;
}
#endif
@@ -799,11 +933,11 @@
CTR2(KTR_LOCK, "%s: %p blocking on turnstile", __func__,
rw);
#ifdef KDTRACE_HOOKS
- sleep_time -= lockstat_nsecs();
+ sleep_time -= lockstat_nsecs(&rw->lock_object);
#endif
turnstile_wait(ts, rw_owner(rw), TS_EXCLUSIVE_QUEUE);
#ifdef KDTRACE_HOOKS
- sleep_time += lockstat_nsecs();
+ sleep_time += lockstat_nsecs(&rw->lock_object);
sleep_cnt++;
#endif
if (LOCK_LOG_TEST(&rw->lock_object, 0))
@@ -813,18 +947,21 @@
spintries = 0;
#endif
}
- LOCKSTAT_PROFILE_OBTAIN_LOCK_SUCCESS(LS_RW_WLOCK_ACQUIRE, rw, contested,
- waittime, file, line);
#ifdef KDTRACE_HOOKS
+ all_time += lockstat_nsecs(&rw->lock_object);
if (sleep_time)
- LOCKSTAT_RECORD1(LS_RW_WLOCK_BLOCK, rw, sleep_time);
+ LOCKSTAT_RECORD4(LS_RW_WLOCK_BLOCK, rw, sleep_time,
+ LOCKSTAT_WRITER, (state & RW_LOCK_READ) == 0,
+ (state & RW_LOCK_READ) == 0 ? 0 : RW_READERS(state));
- /*
- * Record only the loops spinning and not sleeping.
- */
- if (spin_cnt > sleep_cnt)
- LOCKSTAT_RECORD1(LS_RW_WLOCK_SPIN, rw, (spin_cnt - sleep_cnt));
+ /* Record only the loops spinning and not sleeping. */
+ if (lda.spin_cnt > sleep_cnt)
+ LOCKSTAT_RECORD4(LS_RW_WLOCK_SPIN, rw, all_time - sleep_time,
+ LOCKSTAT_WRITER, (state & RW_LOCK_READ) == 0,
+ (state & RW_LOCK_READ) == 0 ? 0 : RW_READERS(state));
#endif
+ LOCKSTAT_PROFILE_OBTAIN_LOCK_SUCCESS(LS_RW_WLOCK_ACQUIRE, rw, contested,
+ waittime, file, line);
}
/*
@@ -833,8 +970,10 @@
* least one thread is waiting on this lock.
*/
void
-_rw_wunlock_hard(struct rwlock *rw, uintptr_t tid, const char *file, int line)
+__rw_wunlock_hard(volatile uintptr_t *c, uintptr_t tid, const char *file,
+ int line)
{
+ struct rwlock *rw;
struct turnstile *ts;
uintptr_t v;
int queue;
@@ -842,6 +981,8 @@
if (SCHEDULER_STOPPED())
return;
+ rw = rwlock2rw(c);
+
if (rw_wlocked(rw) && rw_recursed(rw)) {
rw->rw_recurse--;
if (LOCK_LOG_TEST(&rw->lock_object, 0))
@@ -898,8 +1039,9 @@
* lock. Returns true if the upgrade succeeded and false otherwise.
*/
int
-_rw_try_upgrade(struct rwlock *rw, const char *file, int line)
+__rw_try_upgrade(volatile uintptr_t *c, const char *file, int line)
{
+ struct rwlock *rw;
uintptr_t v, x, tid;
struct turnstile *ts;
int success;
@@ -907,9 +1049,11 @@
if (SCHEDULER_STOPPED())
return (1);
+ rw = rwlock2rw(c);
+
KASSERT(rw->rw_lock != RW_DESTROYED,
("rw_try_upgrade() of destroyed rwlock @ %s:%d", file, line));
- _rw_assert(rw, RA_RLOCKED, file, line);
+ __rw_assert(c, RA_RLOCKED, file, line);
/*
* Attempt to switch from one reader to a writer. If there
@@ -971,8 +1115,9 @@
* Downgrade a write lock into a single read lock.
*/
void
-_rw_downgrade(struct rwlock *rw, const char *file, int line)
+__rw_downgrade(volatile uintptr_t *c, const char *file, int line)
{
+ struct rwlock *rw;
struct turnstile *ts;
uintptr_t tid, v;
int rwait, wwait;
@@ -980,9 +1125,11 @@
if (SCHEDULER_STOPPED())
return;
+ rw = rwlock2rw(c);
+
KASSERT(rw->rw_lock != RW_DESTROYED,
("rw_downgrade() of destroyed rwlock @ %s:%d", file, line));
- _rw_assert(rw, RA_WLOCKED | RA_NOTRECURSED, file, line);
+ __rw_assert(c, RA_WLOCKED | RA_NOTRECURSED, file, line);
#ifndef INVARIANTS
if (rw_recursed(rw))
panic("downgrade of a recursed lock");
@@ -1036,7 +1183,7 @@
#ifdef INVARIANT_SUPPORT
#ifndef INVARIANTS
-#undef _rw_assert
+#undef __rw_assert
#endif
/*
@@ -1045,16 +1192,22 @@
* thread owns an rlock.
*/
void
-_rw_assert(struct rwlock *rw, int what, const char *file, int line)
+__rw_assert(const volatile uintptr_t *c, int what, const char *file, int line)
{
+ const struct rwlock *rw;
if (panicstr != NULL)
return;
+
+ rw = rwlock2rw(c);
+
switch (what) {
case RA_LOCKED:
case RA_LOCKED | RA_RECURSED:
case RA_LOCKED | RA_NOTRECURSED:
case RA_RLOCKED:
+ case RA_RLOCKED | RA_RECURSED:
+ case RA_RLOCKED | RA_NOTRECURSED:
#ifdef WITNESS
witness_assert(&rw->lock_object, what, file, line);
#else
@@ -1064,13 +1217,13 @@
* has a lock at all, fail.
*/
if (rw->rw_lock == RW_UNLOCKED ||
- (!(rw->rw_lock & RW_LOCK_READ) && (what == RA_RLOCKED ||
+ (!(rw->rw_lock & RW_LOCK_READ) && (what & RA_RLOCKED ||
rw_wowner(rw) != curthread)))
panic("Lock %s not %slocked @ %s:%d\n",
- rw->lock_object.lo_name, (what == RA_RLOCKED) ?
+ rw->lock_object.lo_name, (what & RA_RLOCKED) ?
"read " : "", file, line);
- if (!(rw->rw_lock & RW_LOCK_READ)) {
+ if (!(rw->rw_lock & RW_LOCK_READ) && !(what & RA_RLOCKED)) {
if (rw_recursed(rw)) {
if (what & RA_NOTRECURSED)
panic("Lock %s recursed @ %s:%d\n",
@@ -1118,12 +1271,12 @@
#ifdef DDB
void
-db_show_rwlock(struct lock_object *lock)
+db_show_rwlock(const struct lock_object *lock)
{
- struct rwlock *rw;
+ const struct rwlock *rw;
struct thread *td;
- rw = (struct rwlock *)lock;
+ rw = (const struct rwlock *)lock;
db_printf(" state: ");
if (rw->rw_lock == RW_UNLOCKED)
Modified: trunk/sys/kern/kern_sdt.c
===================================================================
--- trunk/sys/kern/kern_sdt.c 2018-05-25 20:55:47 UTC (rev 9947)
+++ trunk/sys/kern/kern_sdt.c 2018-05-25 20:58:03 UTC (rev 9948)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
/*-
* Copyright 2006-2008 John Birrell <jb at FreeBSD.org>
*
@@ -22,318 +23,34 @@
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
- * $MidnightBSD$
- *
- * Backend for the Statically Defined Tracing (SDT) kernel support. This is
- * required to allow a module to load even though DTrace kernel support may
- * not be present. A module may be built with SDT probes in it which are
- * registered and deregistered via SYSINIT/SYSUNINIT.
- *
+ * $FreeBSD: stable/10/sys/kern/kern_sdt.c 263283 2014-03-18 00:55:19Z markj $
*/
#include "opt_kdtrace.h"
-#include <sys/cdefs.h>
#include <sys/param.h>
#include <sys/systm.h>
-#include <sys/kernel.h>
-#include <sys/linker.h>
-#include <sys/lock.h>
-#include <sys/proc.h>
-#include <sys/sx.h>
+#include <sys/kdb.h>
#include <sys/sdt.h>
-/*
- * This is the list of statically defined tracing providers.
- */
-static TAILQ_HEAD(sdt_provider_list_head, sdt_provider) sdt_provider_list;
+SDT_PROVIDER_DEFINE(sdt);
/*
- * Mutex to serialise access to the SDT provider list.
+ * Hook for the DTrace probe function. The SDT provider will set this to
+ * dtrace_probe() when it loads.
*/
-static struct sx sdt_sx;
-
-/*
- * Hook for the DTrace probe function. The 'sdt' provider will set this
- * to dtrace_probe when it loads.
- */
sdt_probe_func_t sdt_probe_func = sdt_probe_stub;
-static sdt_provider_listall_func_t sdt_provider_register_func = NULL;
-static sdt_provider_listall_func_t sdt_provider_deregister_func = NULL;
-static sdt_probe_listall_func_t sdt_probe_register_func = NULL;
-
-static void *sdt_provider_register_arg;
-static void *sdt_provider_deregister_arg;
-static void *sdt_probe_register_arg;
-
-static int sdt_provider_listall_locked(sdt_provider_listall_func_t, void *);
-
/*
* This is a stub for probe calls in case kernel DTrace support isn't
- * compiled in. It should never get called because there is no DTrace
- * support to enable it.
+ * enabled. It should never get called because there is no DTrace support
+ * to enable it.
*/
void
sdt_probe_stub(uint32_t id, uintptr_t arg0, uintptr_t arg1,
uintptr_t arg2, uintptr_t arg3, uintptr_t arg4)
{
- printf("sdt_probe_stub: Why did this get called?\n");
-}
-/*
- * Called from SYSINIT to register a provider.
- */
-void
-sdt_provider_register(void *arg)
-{
- struct sdt_provider *prov = arg;
-
- sx_xlock(&sdt_sx);
-
- TAILQ_INSERT_TAIL(&sdt_provider_list, prov, prov_entry);
-
- TAILQ_INIT(&prov->probe_list);
-
- if (sdt_provider_register_func != NULL)
- sdt_provider_register_func(prov, sdt_provider_register_arg);
-
- sx_xunlock(&sdt_sx);
+ printf("sdt_probe_stub: unexpectedly called\n");
+ kdb_backtrace();
}
-
-/*
- * Called from SYSUNINIT to de-register a provider.
- */
-void
-sdt_provider_deregister(void *arg)
-{
- struct sdt_provider *prov = arg;
-
- sx_xlock(&sdt_sx);
-
- TAILQ_REMOVE(&sdt_provider_list, prov, prov_entry);
-
- if (sdt_provider_deregister_func != NULL)
- sdt_provider_deregister_func(prov, sdt_provider_deregister_arg);
-
- sx_xunlock(&sdt_sx);
-}
-
-/*
- * Called from SYSINIT to register a statically defined trace probe.
- */
-void
-sdt_probe_register(void *arg)
-{
- struct sdt_probe *probe = arg;
-
- /*
- * Check the reference structure version. Only version 1 is
- * supported at the moment.
- */
- if (probe->version != sizeof(struct sdt_probe)) {
- printf("%s:%s:%s has version %d when %d required\n", probe->mod, probe->func, probe->name, probe->version, (int) sizeof(struct sdt_probe));
- return;
- }
-
- sx_xlock(&sdt_sx);
-
- TAILQ_INSERT_TAIL(&probe->prov->probe_list, probe, probe_entry);
-
- TAILQ_INIT(&probe->argtype_list);
-
- probe->state = SDT_INIT;
-
- if (sdt_probe_register_func != NULL)
- sdt_probe_register_func(probe, sdt_provider_register_arg);
-
- sx_xunlock(&sdt_sx);
-}
-
-/*
- * Called from SYSUNINIT to de-register a statically defined trace probe.
- */
-void
-sdt_probe_deregister(void *arg)
-{
- struct sdt_probe *probe = arg;
-
- sx_xlock(&sdt_sx);
-
- if (probe->state == SDT_INIT) {
- TAILQ_REMOVE(&probe->prov->probe_list, probe, probe_entry);
- probe->state = SDT_UNINIT;
- }
-
- sx_xunlock(&sdt_sx);
-}
-
-/*
- * Called from SYSINIT to register a statically defined trace probe argument.
- */
-void
-sdt_argtype_register(void *arg)
-{
- struct sdt_argtype *argtype = arg;
-
- sx_xlock(&sdt_sx);
-
- TAILQ_INSERT_TAIL(&argtype->probe->argtype_list, argtype, argtype_entry);
-
- argtype->probe->n_args++;
-
- sx_xunlock(&sdt_sx);
-}
-
-/*
- * Called from SYSUNINIT to de-register a statically defined trace probe argument.
- */
-void
-sdt_argtype_deregister(void *arg)
-{
- struct sdt_argtype *argtype = arg;
-
- sx_xlock(&sdt_sx);
-
- TAILQ_REMOVE(&argtype->probe->argtype_list, argtype, argtype_entry);
-
- sx_xunlock(&sdt_sx);
-}
-
-static void
-sdt_init(void *arg)
-{
- sx_init_flags(&sdt_sx, "Statically Defined Tracing", SX_NOWITNESS);
-
- TAILQ_INIT(&sdt_provider_list);
-}
-
-SYSINIT(sdt, SI_SUB_KDTRACE, SI_ORDER_FIRST, sdt_init, NULL);
-
-static void
-sdt_uninit(void *arg)
-{
- sx_destroy(&sdt_sx);
-}
-
-SYSUNINIT(sdt, SI_SUB_KDTRACE, SI_ORDER_FIRST, sdt_uninit, NULL);
-
-/*
- * List statically defined tracing providers.
- */
-int
-sdt_provider_listall(sdt_provider_listall_func_t callback_func, void *arg)
-{
- int error;
-
- sx_xlock(&sdt_sx);
- error = sdt_provider_listall_locked(callback_func, arg);
- sx_xunlock(&sdt_sx);
-
- return (error);
-}
-
-static int
-sdt_provider_listall_locked(sdt_provider_listall_func_t callback_func,
- void *arg)
-{
- int error = 0;
- struct sdt_provider *prov;
-
- sx_assert(&sdt_sx, SX_XLOCKED);
-
- TAILQ_FOREACH(prov, &sdt_provider_list, prov_entry) {
- if ((error = callback_func(prov, arg)) != 0)
- break;
- }
-
- return (error);
-}
-
-/*
- * List statically defined tracing probes.
- */
-int
-sdt_probe_listall(struct sdt_provider *prov,
- sdt_probe_listall_func_t callback_func,void *arg)
-{
- int error = 0;
- int locked;
- struct sdt_probe *probe;
-
- locked = sx_xlocked(&sdt_sx);
- if (!locked)
- sx_xlock(&sdt_sx);
-
- TAILQ_FOREACH(probe, &prov->probe_list, probe_entry) {
- if ((error = callback_func(probe, arg)) != 0)
- break;
- }
-
- if (!locked)
- sx_xunlock(&sdt_sx);
-
- return (error);
-}
-
-/*
- * List statically defined tracing probe arguments.
- */
-int
-sdt_argtype_listall(struct sdt_probe *probe,
- sdt_argtype_listall_func_t callback_func,void *arg)
-{
- int error = 0;
- int locked;
- struct sdt_argtype *argtype;
-
- locked = sx_xlocked(&sdt_sx);
- if (!locked)
- sx_xlock(&sdt_sx);
-
- TAILQ_FOREACH(argtype, &probe->argtype_list, argtype_entry) {
- if ((error = callback_func(argtype, arg)) != 0)
- break;
- }
-
- if (!locked)
- sx_xunlock(&sdt_sx);
-
- return (error);
-}
-
-void sdt_register_callbacks(sdt_provider_listall_func_t register_prov,
- void *reg_prov_arg, sdt_provider_listall_func_t deregister_prov,
- void *dereg_prov_arg, sdt_probe_listall_func_t register_probe,
- void * reg_probe_arg)
-{
-
- sx_xlock(&sdt_sx);
- sdt_provider_register_func = register_prov;
- sdt_provider_deregister_func = deregister_prov;
- sdt_probe_register_func = register_probe;
-
- sdt_provider_register_arg = reg_prov_arg;
- sdt_provider_deregister_arg = dereg_prov_arg;
- sdt_probe_register_arg = reg_probe_arg;
-
- sdt_provider_listall_locked(register_prov, reg_prov_arg);
- sx_xunlock(&sdt_sx);
-}
-
-void sdt_deregister_callbacks(void)
-{
-
- sx_xlock(&sdt_sx);
- sdt_provider_listall_locked(sdt_provider_deregister_func,
- sdt_provider_deregister_arg);
-
- sdt_provider_register_func = NULL;
- sdt_provider_deregister_func = NULL;
- sdt_probe_register_func = NULL;
-
- sdt_provider_register_arg = NULL;
- sdt_provider_deregister_arg = NULL;
- sdt_probe_register_arg = NULL;
- sx_xunlock(&sdt_sx);
-}
Modified: trunk/sys/kern/kern_sema.c
===================================================================
--- trunk/sys/kern/kern_sema.c 2018-05-25 20:55:47 UTC (rev 9947)
+++ trunk/sys/kern/kern_sema.c 2018-05-25 20:58:03 UTC (rev 9948)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
/*-
* Copyright (C) 2001 Jason Evans <jasone at freebsd.org>. All rights reserved.
*
@@ -34,7 +35,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$MidnightBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/kern/kern_sema.c 139804 2005-01-06 23:35:40Z imp $");
#include <sys/param.h>
#include <sys/systm.h>
Modified: trunk/sys/kern/kern_sharedpage.c
===================================================================
--- trunk/sys/kern/kern_sharedpage.c 2018-05-25 20:55:47 UTC (rev 9947)
+++ trunk/sys/kern/kern_sharedpage.c 2018-05-25 20:58:03 UTC (rev 9948)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
/*-
* Copyright (c) 2010, 2012 Konstantin Belousov <kib at FreeBSD.org>
* All rights reserved.
@@ -25,7 +26,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/kern/kern_sharedpage.c 254649 2013-08-22 07:39:53Z kib $");
#include "opt_compat.h"
#include "opt_vm.h"
@@ -34,7 +35,7 @@
#include <sys/systm.h>
#include <sys/kernel.h>
#include <sys/lock.h>
-#include <sys/mutex.h>
+#include <sys/rwlock.h>
#include <sys/sysent.h>
#include <sys/sysctl.h>
#include <sys/vdso.h>
@@ -107,12 +108,11 @@
sx_init(&shared_page_alloc_sx, "shpsx");
shared_page_obj = vm_pager_allocate(OBJT_PHYS, 0, PAGE_SIZE,
VM_PROT_DEFAULT, 0, NULL);
- VM_OBJECT_LOCK(shared_page_obj);
- m = vm_page_grab(shared_page_obj, 0, VM_ALLOC_RETRY | VM_ALLOC_NOBUSY |
- VM_ALLOC_ZERO);
+ VM_OBJECT_WLOCK(shared_page_obj);
+ m = vm_page_grab(shared_page_obj, 0, VM_ALLOC_NOBUSY | VM_ALLOC_ZERO);
m->valid = VM_PAGE_BITS_ALL;
- VM_OBJECT_UNLOCK(shared_page_obj);
- addr = kmem_alloc_nofault(kernel_map, PAGE_SIZE);
+ VM_OBJECT_WUNLOCK(shared_page_obj);
+ addr = kva_alloc(PAGE_SIZE);
pmap_qenter(addr, &m, 1);
shared_page_mapping = (char *)addr;
}
More information about the Midnightbsd-cvs
mailing list