[Midnightbsd-cvs] src [9945] trunk/sys/kern: sync with freebsd 10-stable

Fri May 25 16:53:39 EDT 2018

Revision: 9945
          http://svnweb.midnightbsd.org/src/?rev=9945
Author:   laffer1
Date:     2018-05-25 16:53:39 -0400 (Fri, 25 May 2018)
Log Message:
-----------
sync with freebsd 10-stable

Modified Paths:
--------------
    trunk/sys/kern/kern_dtrace.c
    trunk/sys/kern/kern_idle.c
    trunk/sys/kern/kern_ktrace.c
    trunk/sys/kern/kern_linker.c
    trunk/sys/kern/kern_lock.c
    trunk/sys/kern/kern_lockf.c
    trunk/sys/kern/kern_lockstat.c
    trunk/sys/kern/kern_loginclass.c
    trunk/sys/kern/kern_malloc.c
    trunk/sys/kern/kern_mbuf.c
    trunk/sys/kern/kern_mib.c
    trunk/sys/kern/kern_module.c
    trunk/sys/kern/kern_mtxpool.c
    trunk/sys/kern/kern_mutex.c
    trunk/sys/kern/kern_ntptime.c
    trunk/sys/kern/kern_osd.c

Added Paths:
-----------
    trunk/sys/kern/imgact_binmisc.c
    trunk/sys/kern/kern_ffclock.c

Added: trunk/sys/kern/imgact_binmisc.c
===================================================================

--- trunk/sys/kern/imgact_binmisc.c	                        (rev 0)
+++ trunk/sys/kern/imgact_binmisc.c	2018-05-25 20:53:39 UTC (rev 9945)
@@ -0,0 +1,766 @@
+/* $MidnightBSD$ */
+/*
+ * Copyright (c) 2013-16, Stacey D. Son
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: stable/10/sys/kern/imgact_binmisc.c 302234 2016-06-27 21:50:30Z bdrewery $");
+
+#include <sys/param.h>
+#include <sys/ctype.h>
+#include <sys/sbuf.h>
+#include <sys/systm.h>
+#include <sys/sysproto.h>
+#include <sys/exec.h>
+#include <sys/imgact.h>
+#include <sys/imgact_binmisc.h>
+#include <sys/kernel.h>
+#include <sys/libkern.h>
+#include <sys/lock.h>
+#include <sys/malloc.h>
+#include <sys/mutex.h>
+#include <sys/sysctl.h>
+
+/**
+ * Miscellaneous binary interpreter image activator.
+ *
+ * If the given target executable's header matches 'xbe_magic' field in the
+ * 'interpreter_list' then it will use the user-level interpreter specified in
+ * the 'xbe_interpreter' field to execute the binary. The 'xbe_magic' field may
+ * be adjusted to a given offset using the value in the 'xbe_moffset' field
+ * and bits of the header may be masked using the 'xbe_mask' field.  The
+ * 'interpreter_list' entries are managed using sysctl(3) as described in the
+ * <sys/imgact_binmisc.h> file.
+ */
+
+/*
+ * Node of the interpreter list.
+ */
+typedef struct imgact_binmisc_entry {
+	char				 *ibe_name;
+	uint8_t				 *ibe_magic;
+	uint32_t			  ibe_moffset;
+	uint32_t			  ibe_msize;
+	uint8_t				 *ibe_mask;
+	uint8_t				 *ibe_interpreter;
+	uint32_t			  ibe_interp_argcnt;
+	uint32_t			  ibe_interp_length;
+	uint32_t			  ibe_flags;
+	SLIST_ENTRY(imgact_binmisc_entry) link;
+} imgact_binmisc_entry_t;
+
+/*
+ * sysctl() commands.
+ */
+#define IBC_ADD		1	/* Add given entry. */
+#define IBC_REMOVE	2	/* Remove entry for a given name. */
+#define IBC_DISABLE	3	/* Disable entry for a given name. */
+#define IBC_ENABLE	4	/* Enable entry for a given name. */
+#define IBC_LOOKUP	5	/* Lookup and return entry for given name. */
+#define IBC_LIST	6	/* Get a snapshot of the interpretor list. */
+
+/*
+ * Interpreter string macros.
+ *
+ * They all start with '#' followed by a single letter:
+ */
+#define	ISM_POUND	'#'	/* "##" is the escape sequence for single #. */
+#define	ISM_OLD_ARGV0	'a'	/* "#a" is replaced with the old argv0. */
+
+MALLOC_DEFINE(M_BINMISC, KMOD_NAME, "misc binary image activator");
+
+/* The interpreter list. */
+static SLIST_HEAD(, imgact_binmisc_entry) interpreter_list =
+	SLIST_HEAD_INITIALIZER(interpreter_list);
+
+static int interp_list_entry_count = 0;
+
+static struct mtx interp_list_mtx;
+
+int imgact_binmisc_exec(struct image_params *imgp);
+
+
+/*
+ * Populate the entry with the information about the interpreter.
+ */
+static void
+imgact_binmisc_populate_interp(char *str, imgact_binmisc_entry_t *ibe)
+{
+	uint32_t len = 0, argc = 1;
+	char t[IBE_INTERP_LEN_MAX];
+	char *sp, *tp;
+
+	bzero(t, sizeof(t));
+
+	/*
+	 * Normalize interpreter string. Replace white space between args with
+	 * single space.
+	 */
+	sp = str; tp = t;
+	while (*sp != '\0') {
+		if (*sp == ' ' || *sp == '\t') {
+			if (++len > IBE_INTERP_LEN_MAX)
+				break;
+			*tp++ = ' ';
+			argc++;
+			while (*sp == ' ' || *sp == '\t')
+				sp++;
+			continue;
+		} else {
+			*tp++ = *sp++;
+			len++;
+		}
+	}
+	*tp = '\0';
+	len++;
+
+	ibe->ibe_interpreter = malloc(len, M_BINMISC, M_WAITOK|M_ZERO);
+
+	/* Populate all the ibe fields for the interpreter. */
+	memcpy(ibe->ibe_interpreter, t, len);
+	ibe->ibe_interp_argcnt = argc;
+	ibe->ibe_interp_length = len;
+}
+
+/*
+ * Allocate memory and populate a new entry for the interpreter table.
+ */
+static imgact_binmisc_entry_t *
+imgact_binmisc_new_entry(ximgact_binmisc_entry_t *xbe)
+{
+	imgact_binmisc_entry_t *ibe = NULL;
+	size_t namesz = min(strlen(xbe->xbe_name) + 1, IBE_NAME_MAX);
+
+	mtx_assert(&interp_list_mtx, MA_NOTOWNED);
+
+	ibe = malloc(sizeof(*ibe), M_BINMISC, M_WAITOK|M_ZERO);
+
+	ibe->ibe_name = malloc(namesz, M_BINMISC, M_WAITOK|M_ZERO);
+	strlcpy(ibe->ibe_name, xbe->xbe_name, namesz);
+
+	imgact_binmisc_populate_interp(xbe->xbe_interpreter, ibe);
+
+	ibe->ibe_magic = malloc(xbe->xbe_msize, M_BINMISC, M_WAITOK|M_ZERO);
+	memcpy(ibe->ibe_magic, xbe->xbe_magic, xbe->xbe_msize);
+
+	ibe->ibe_mask = malloc(xbe->xbe_msize, M_BINMISC, M_WAITOK|M_ZERO);
+	memcpy(ibe->ibe_mask, xbe->xbe_mask, xbe->xbe_msize);
+
+	ibe->ibe_moffset = xbe->xbe_moffset;
+	ibe->ibe_msize = xbe->xbe_msize;
+	ibe->ibe_flags = xbe->xbe_flags;
+
+	return (ibe);
+}
+
+/*
+ * Free the allocated memory for a given list item.
+ */
+static void
+imgact_binmisc_destroy_entry(imgact_binmisc_entry_t *ibe)
+{
+	if (!ibe)
+		return;
+	if (ibe->ibe_magic)
+		free(ibe->ibe_magic, M_BINMISC);
+	if (ibe->ibe_mask)
+		free(ibe->ibe_mask, M_BINMISC);
+	if (ibe->ibe_interpreter)
+		free(ibe->ibe_interpreter, M_BINMISC);
+	if (ibe->ibe_name)
+		free(ibe->ibe_name, M_BINMISC);
+	if (ibe)
+		free(ibe, M_BINMISC);
+}
+
+/*
+ * Find the interpreter in the list by the given name.  Return NULL if not
+ * found.
+ */
+static imgact_binmisc_entry_t *
+imgact_binmisc_find_entry(char *name)
+{
+	imgact_binmisc_entry_t *ibe;
+
+	mtx_assert(&interp_list_mtx, MA_OWNED);
+
+	SLIST_FOREACH(ibe, &interpreter_list, link) {
+		if (strncmp(name, ibe->ibe_name, IBE_NAME_MAX) == 0)
+			return (ibe);
+	}
+
+	return (NULL);
+}
+
+/*
+ * Add the given interpreter if it doesn't already exist.  Return EEXIST
+ * if the name already exist in the interpreter list.
+ */
+static int
+imgact_binmisc_add_entry(ximgact_binmisc_entry_t *xbe)
+{
+	imgact_binmisc_entry_t *ibe;
+	char *p;
+	int cnt;
+
+	if (xbe->xbe_msize > IBE_MAGIC_MAX)
+		return (EINVAL);
+
+	for(cnt = 0, p = xbe->xbe_name; *p != 0; cnt++, p++)
+		if (cnt >= IBE_NAME_MAX || !isascii((int)*p))
+			return (EINVAL);
+
+	for(cnt = 0, p = xbe->xbe_interpreter; *p != 0; cnt++, p++)
+		if (cnt >= IBE_INTERP_LEN_MAX || !isascii((int)*p))
+			return (EINVAL);
+
+	/* Make sure we don't have any invalid #'s. */
+	p = xbe->xbe_interpreter;
+	while (1) {
+		p = strchr(p, '#');
+		if (!p)
+			break;
+
+		p++;
+		switch(*p) {
+		case ISM_POUND:
+			/* "##" */
+			p++;
+			break;
+
+		case ISM_OLD_ARGV0:
+			/* "#a" */
+			p++;
+			break;
+
+		case 0:
+		default:
+			/* Anything besides the above is invalid. */
+			return (EINVAL);
+		}
+	}
+
+	mtx_lock(&interp_list_mtx);
+	if (imgact_binmisc_find_entry(xbe->xbe_name) != NULL) {
+		mtx_unlock(&interp_list_mtx);
+		return (EEXIST);
+	}
+	mtx_unlock(&interp_list_mtx);
+
+	ibe = imgact_binmisc_new_entry(xbe);
+
+	mtx_lock(&interp_list_mtx);
+	SLIST_INSERT_HEAD(&interpreter_list, ibe, link);
+	interp_list_entry_count++;
+	mtx_unlock(&interp_list_mtx);
+
+	return (0);
+}
+
+/*
+ * Remove the interpreter in the list with the given name. Return ENOENT
+ * if not found.
+ */
+static int
+imgact_binmisc_remove_entry(char *name)
+{
+	imgact_binmisc_entry_t *ibe;
+
+	mtx_lock(&interp_list_mtx);
+	if ((ibe = imgact_binmisc_find_entry(name)) == NULL) {
+		mtx_unlock(&interp_list_mtx);
+		return (ENOENT);
+	}
+	SLIST_REMOVE(&interpreter_list, ibe, imgact_binmisc_entry, link);
+	interp_list_entry_count--;
+	mtx_unlock(&interp_list_mtx);
+
+	imgact_binmisc_destroy_entry(ibe);
+
+	return (0);
+}
+
+/*
+ * Disable the interpreter in the list with the given name. Return ENOENT
+ * if not found.
+ */
+static int
+imgact_binmisc_disable_entry(char *name)
+{
+	imgact_binmisc_entry_t *ibe;
+
+	mtx_lock(&interp_list_mtx);
+	if ((ibe = imgact_binmisc_find_entry(name)) == NULL) {
+		mtx_unlock(&interp_list_mtx);
+		return (ENOENT);
+	}
+
+	ibe->ibe_flags &= ~IBF_ENABLED;
+	mtx_unlock(&interp_list_mtx);
+
+	return (0);
+}
+
+/*
+ * Enable the interpreter in the list with the given name. Return ENOENT
+ * if not found.
+ */
+static int
+imgact_binmisc_enable_entry(char *name)
+{
+	imgact_binmisc_entry_t *ibe;
+
+	mtx_lock(&interp_list_mtx);
+	if ((ibe = imgact_binmisc_find_entry(name)) == NULL) {
+		mtx_unlock(&interp_list_mtx);
+		return (ENOENT);
+	}
+
+	ibe->ibe_flags |= IBF_ENABLED;
+	mtx_unlock(&interp_list_mtx);
+
+	return (0);
+}
+
+static int
+imgact_binmisc_populate_xbe(ximgact_binmisc_entry_t *xbe,
+    imgact_binmisc_entry_t *ibe)
+{
+	uint32_t i;
+
+	mtx_assert(&interp_list_mtx, MA_OWNED);
+
+	bzero(xbe, sizeof(*xbe));
+	strlcpy(xbe->xbe_name, ibe->ibe_name, IBE_NAME_MAX);
+
+	/* Copy interpreter string.  Replace NULL breaks with space. */
+	memcpy(xbe->xbe_interpreter, ibe->ibe_interpreter,
+	    ibe->ibe_interp_length);
+	for(i = 0; i < (ibe->ibe_interp_length - 1); i++)
+		if (xbe->xbe_interpreter[i] == '\0')
+			xbe->xbe_interpreter[i] = ' ';
+
+	memcpy(xbe->xbe_magic, ibe->ibe_magic, ibe->ibe_msize);
+	memcpy(xbe->xbe_mask, ibe->ibe_mask, ibe->ibe_msize);
+	xbe->xbe_version = IBE_VERSION;
+	xbe->xbe_flags = ibe->ibe_flags;
+	xbe->xbe_moffset = ibe->ibe_moffset;
+	xbe->xbe_msize = ibe->ibe_msize;
+
+	return (0);
+}
+
+/*
+ * Retrieve the interpreter with the give name and populate the
+ * ximgact_binmisc_entry structure.  Return ENOENT if not found.
+ */
+static int
+imgact_binmisc_lookup_entry(char *name, ximgact_binmisc_entry_t *xbe)
+{
+	imgact_binmisc_entry_t *ibe;
+	int error = 0;
+
+	mtx_lock(&interp_list_mtx);
+	if ((ibe = imgact_binmisc_find_entry(name)) == NULL) {
+		mtx_unlock(&interp_list_mtx);
+		return (ENOENT);
+	}
+
+	error = imgact_binmisc_populate_xbe(xbe, ibe);
+	mtx_unlock(&interp_list_mtx);
+
+	return (error);
+}
+
+/*
+ * Get a snapshot of all the interpreter entries in the list.
+ */
+static int
+imgact_binmisc_get_all_entries(struct sysctl_req *req)
+{
+	ximgact_binmisc_entry_t *xbe, *xbep;
+	imgact_binmisc_entry_t *ibe;
+	int error = 0, count;
+
+	mtx_lock(&interp_list_mtx);
+	count = interp_list_entry_count;
+	/* Don't block in malloc() while holding lock. */
+	xbe = malloc(sizeof(*xbe) * count, M_BINMISC, M_NOWAIT|M_ZERO);
+	if (!xbe) {
+		mtx_unlock(&interp_list_mtx);
+		return (ENOMEM);
+	}
+
+	xbep = xbe;
+	SLIST_FOREACH(ibe, &interpreter_list, link) {
+		error = imgact_binmisc_populate_xbe(xbep++, ibe);
+		if (error)
+			break;
+	}
+	mtx_unlock(&interp_list_mtx);
+
+	if (!error)
+		error = SYSCTL_OUT(req, xbe, sizeof(*xbe) * count);
+
+	free(xbe, M_BINMISC);
+	return (error);
+}
+
+/*
+ * sysctl() handler for munipulating interpretor table.
+ * Not MP safe (locked by sysctl).
+ */
+static int
+sysctl_kern_binmisc(SYSCTL_HANDLER_ARGS)
+{
+	ximgact_binmisc_entry_t xbe;
+	int error = 0;
+
+	switch(arg2) {
+	case IBC_ADD:
+		/* Add an entry. Limited to IBE_MAX_ENTRIES. */
+		error = SYSCTL_IN(req, &xbe, sizeof(xbe));
+		if (error)
+			return (error);
+		if (IBE_VERSION != xbe.xbe_version)
+			return (EINVAL);
+		if (interp_list_entry_count == IBE_MAX_ENTRIES)
+			return (ENOSPC);
+		error = imgact_binmisc_add_entry(&xbe);
+		break;
+
+	case IBC_REMOVE:
+		/* Remove an entry. */
+		error = SYSCTL_IN(req, &xbe, sizeof(xbe));
+		if (error)
+			return (error);
+		if (IBE_VERSION != xbe.xbe_version)
+			return (EINVAL);
+		error = imgact_binmisc_remove_entry(xbe.xbe_name);
+		break;
+
+	case IBC_DISABLE:
+		/* Disable an entry. */
+		error = SYSCTL_IN(req, &xbe, sizeof(xbe));
+		if (error)
+			return (error);
+		if (IBE_VERSION != xbe.xbe_version)
+			return (EINVAL);
+		error = imgact_binmisc_disable_entry(xbe.xbe_name);
+		break;
+
+	case IBC_ENABLE:
+		/* Enable an entry. */
+		error = SYSCTL_IN(req, &xbe, sizeof(xbe));
+		if (error)
+			return (error);
+		if (IBE_VERSION != xbe.xbe_version)
+			return (EINVAL);
+		error = imgact_binmisc_enable_entry(xbe.xbe_name);
+		break;
+
+	case IBC_LOOKUP:
+		/* Lookup an entry. */
+		error = SYSCTL_IN(req, &xbe, sizeof(xbe));
+		if (error)
+			return (error);
+		if (IBE_VERSION != xbe.xbe_version)
+			return (EINVAL);
+		error = imgact_binmisc_lookup_entry(xbe.xbe_name, &xbe);
+		if (!error)
+			error = SYSCTL_OUT(req, &xbe, sizeof(xbe));
+		break;
+
+	case IBC_LIST:
+		/* Return a snapshot of the interpretor list. */
+
+		if (!req->oldptr) {
+			/* No pointer then just return the list size. */
+			error = SYSCTL_OUT(req, 0, interp_list_entry_count *
+			    sizeof(ximgact_binmisc_entry_t));
+			return (error);
+		} else
+			if (!req->oldlen)
+				return (EINVAL);
+
+		error = imgact_binmisc_get_all_entries(req);
+		break;
+
+	default:
+		return (EINVAL);
+	}
+
+	return (error);
+}
+
+SYSCTL_NODE(_kern, OID_AUTO, binmisc, CTLFLAG_RW, 0,
+    "Image activator for miscellaneous binaries");
+
+SYSCTL_PROC(_kern_binmisc, OID_AUTO, add,
+    CTLFLAG_MPSAFE|CTLTYPE_STRUCT|CTLFLAG_WR, NULL, IBC_ADD,
+    sysctl_kern_binmisc, "S,ximgact_binmisc_entry",
+    "Add an activator entry");
+
+SYSCTL_PROC(_kern_binmisc, OID_AUTO, remove,
+    CTLFLAG_MPSAFE|CTLTYPE_STRUCT|CTLFLAG_WR, NULL, IBC_REMOVE,
+    sysctl_kern_binmisc, "S,ximgact_binmisc_entry",
+    "Remove an activator entry");
+
+SYSCTL_PROC(_kern_binmisc, OID_AUTO, disable,
+    CTLFLAG_MPSAFE|CTLTYPE_STRUCT|CTLFLAG_WR, NULL, IBC_DISABLE,
+    sysctl_kern_binmisc, "S,ximgact_binmisc_entry",
+    "Disable an activator entry");
+
+SYSCTL_PROC(_kern_binmisc, OID_AUTO, enable,
+    CTLFLAG_MPSAFE|CTLTYPE_STRUCT|CTLFLAG_WR, NULL, IBC_ENABLE,
+    sysctl_kern_binmisc, "S,ximgact_binmisc_entry",
+    "Enable an activator entry");
+
+SYSCTL_PROC(_kern_binmisc, OID_AUTO, lookup,
+    CTLFLAG_MPSAFE|CTLTYPE_STRUCT|CTLFLAG_RW|CTLFLAG_ANYBODY, NULL, IBC_LOOKUP,
+    sysctl_kern_binmisc, "S,ximgact_binmisc_entry",
+    "Lookup an activator entry");
+
+SYSCTL_PROC(_kern_binmisc, OID_AUTO, list,
+    CTLFLAG_MPSAFE|CTLTYPE_STRUCT|CTLFLAG_RD|CTLFLAG_ANYBODY, NULL, IBC_LIST,
+    sysctl_kern_binmisc, "S,ximgact_binmisc_entry",
+    "Get snapshot of all the activator entries");
+
+static imgact_binmisc_entry_t *
+imgact_binmisc_find_interpreter(const char *image_header)
+{
+	imgact_binmisc_entry_t *ibe;
+	const char *p;
+	int i;
+	size_t sz;
+
+	mtx_assert(&interp_list_mtx, MA_OWNED);
+
+	SLIST_FOREACH(ibe, &interpreter_list, link) {
+		if (!(IBF_ENABLED & ibe->ibe_flags))
+			continue;
+
+		p = image_header + ibe->ibe_moffset;
+		sz = ibe->ibe_msize;
+		if (IBF_USE_MASK & ibe->ibe_flags) {
+			/* Compare using mask. */
+			for (i = 0; i < sz; i++)
+				if ((*p++ ^ ibe->ibe_magic[i]) &
+				    ibe->ibe_mask[i])
+					break;
+		} else {
+			for (i = 0; i < sz; i++)
+				if (*p++ ^ ibe->ibe_magic[i])
+					break;
+		}
+		if (i == ibe->ibe_msize)
+			return (ibe);
+	}
+	return (NULL);
+}
+
+int
+imgact_binmisc_exec(struct image_params *imgp)
+{
+	const char *image_header = imgp->image_header;
+	const char *fname = NULL;
+	int error = 0;
+	size_t offset, l;
+	imgact_binmisc_entry_t *ibe;
+	struct sbuf *sname;
+	char *s, *d;
+
+	/* Do we have an interpreter for the given image header? */
+	mtx_lock(&interp_list_mtx);
+	if ((ibe = imgact_binmisc_find_interpreter(image_header)) == NULL) {
+		mtx_unlock(&interp_list_mtx);
+		return (-1);
+	}
+
+	/* No interpreter nesting allowed. */
+	if (imgp->interpreted & IMGACT_BINMISC) {
+		mtx_unlock(&interp_list_mtx);
+		return (ENOEXEC);
+	}
+
+	imgp->interpreted |= IMGACT_BINMISC;
+
+	if (imgp->args->fname != NULL) {
+		fname = imgp->args->fname;
+		sname = NULL;
+	} else {
+		/* Use the fdescfs(5) path for fexecve(2). */
+		sname = sbuf_new_auto();
+		sbuf_printf(sname, "/dev/fd/%d", imgp->args->fd);
+		sbuf_finish(sname);
+		fname = sbuf_data(sname);
+	}
+
+
+	/*
+	 * We need to "push" the interpreter in the arg[] list.  To do this,
+	 * we first shift all the other values in the `begin_argv' area to
+	 * provide the exact amount of room for the values added.  Set up
+	 * `offset' as the number of bytes to be added to the `begin_argv'
+	 * area.
+	 */
+	offset = ibe->ibe_interp_length;
+
+	/* Adjust the offset for #'s. */
+	s = ibe->ibe_interpreter;
+	while (1) {
+		s = strchr(s, '#');
+		if (!s)
+			break;
+
+		s++;
+		switch(*s) {
+		case ISM_POUND:
+			/* "##" -> "#": reduce offset by one. */
+			offset--;
+			break;
+
+		case ISM_OLD_ARGV0:
+			/* "#a" -> (old argv0): increase offset to fit fname */
+			offset += strlen(fname) - 2;
+			break;
+
+		default:
+			/* Hmm... This shouldn't happen. */
+			mtx_unlock(&interp_list_mtx);
+			printf("%s: Unknown macro #%c sequence in "
+			    "interpreter string\n", KMOD_NAME, *(s + 1));
+			error = EINVAL;
+			goto done;
+		}
+		s++;
+	}
+
+	/* Check to make sure we won't overrun the stringspace. */
+	if (offset > imgp->args->stringspace) {
+		mtx_unlock(&interp_list_mtx);
+		error = E2BIG;
+		goto done;
+	}
+
+	/* Make room for the interpreter */
+	bcopy(imgp->args->begin_argv, imgp->args->begin_argv + offset,
+	    imgp->args->endp - imgp->args->begin_argv);
+
+	/* Adjust everything by the offset. */
+	imgp->args->begin_envv += offset;
+	imgp->args->endp += offset;
+	imgp->args->stringspace -= offset;
+
+	/* Add the new argument(s) in the count. */
+	imgp->args->argc += ibe->ibe_interp_argcnt;
+
+	/*
+	 * The original arg[] list has been shifted appropriately.  Copy in
+	 * the interpreter path.
+	 */
+	s = ibe->ibe_interpreter;
+	d = imgp->args->begin_argv;
+	while(*s != '\0') {
+		switch (*s) {
+		case '#':
+			/* Handle "#" in interpreter string. */
+			s++;
+			switch(*s) {
+			case ISM_POUND:
+				/* "##": Replace with a single '#' */
+				*d++ = '#';
+				break;
+
+			case ISM_OLD_ARGV0:
+				/* "#a": Replace with old arg0 (fname). */
+				if ((l = strlen(fname)) != 0) {
+					memcpy(d, fname, l);
+					d += l;
+				}
+				break;
+
+			default:
+				/* Shouldn't happen but skip it if it does. */
+				break;
+			}
+			break;
+
+		case ' ':
+			/* Replace space with NUL to separate arguments. */
+			*d++ = '\0';
+			break;
+
+		default:
+			*d++ = *s;
+			break;
+		}
+		s++;
+	}
+	*d = '\0';
+	mtx_unlock(&interp_list_mtx);
+
+	if (!error)
+		imgp->interpreter_name = imgp->args->begin_argv;
+
+
+done:
+	if (sname)
+		sbuf_delete(sname);
+	return (error);
+}
+
+static void
+imgact_binmisc_init(void *arg)
+{
+
+	mtx_init(&interp_list_mtx, KMOD_NAME, NULL, MTX_DEF);
+}
+
+static void
+imgact_binmisc_fini(void *arg)
+{
+	imgact_binmisc_entry_t *ibe, *ibe_tmp;
+
+	/* Free all the interpreters. */
+	mtx_lock(&interp_list_mtx);
+	SLIST_FOREACH_SAFE(ibe, &interpreter_list, link, ibe_tmp) {
+		SLIST_REMOVE(&interpreter_list, ibe, imgact_binmisc_entry,
+		    link);
+		imgact_binmisc_destroy_entry(ibe);
+	}
+	mtx_unlock(&interp_list_mtx);
+
+	mtx_destroy(&interp_list_mtx);
+}
+
+SYSINIT(imgact_binmisc, SI_SUB_EXEC, SI_ORDER_MIDDLE, imgact_binmisc_init, 0);
+SYSUNINIT(imgact_binmisc, SI_SUB_EXEC, SI_ORDER_MIDDLE, imgact_binmisc_fini, 0);
+
+/*
+ * Tell kern_execve.c about it, with a little help from the linker.
+ */
+static struct execsw imgact_binmisc_execsw = { imgact_binmisc_exec, KMOD_NAME };
+EXEC_SET(imgact_binmisc, imgact_binmisc_execsw);


Property changes on: trunk/sys/kern/imgact_binmisc.c
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Modified: trunk/sys/kern/kern_dtrace.c
===================================================================
--- trunk/sys/kern/kern_dtrace.c	2018-05-25 20:46:51 UTC (rev 9944)
+++ trunk/sys/kern/kern_dtrace.c	2018-05-25 20:53:39 UTC (rev 9945)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * Copyright (c) 2007-2008 John Birrell <jb at FreeBSD.org>
  * All rights reserved.
@@ -25,7 +26,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$MidnightBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/kern/kern_dtrace.c 269752 2014-08-09 14:05:01Z markj $");
 
 #include "opt_kdb.h"
 
@@ -38,6 +39,7 @@
 #include <sys/proc.h>
 #include <sys/dtrace_bsd.h>
 #include <sys/sysctl.h>
+#include <sys/sysent.h>
 
 #define KDTRACE_PROC_SIZE	64
 #define	KDTRACE_THREAD_SIZE	256
@@ -47,6 +49,14 @@
 
 static MALLOC_DEFINE(M_KDTRACE, "kdtrace", "DTrace hooks");
 
+/* Hooks used in the machine-dependent trap handlers. */
+dtrace_trap_func_t		dtrace_trap_func;
+dtrace_doubletrap_func_t	dtrace_doubletrap_func;
+dtrace_pid_probe_ptr_t		dtrace_pid_probe_ptr;
+dtrace_return_probe_ptr_t	dtrace_return_probe_ptr;
+
+systrace_probe_func_t		systrace_probe_func;
+
 /* Return the DTrace process data size compiled in the kernel hooks. */
 size_t
 kdtrace_proc_size()

Added: trunk/sys/kern/kern_ffclock.c
===================================================================
--- trunk/sys/kern/kern_ffclock.c	                        (rev 0)
+++ trunk/sys/kern/kern_ffclock.c	2018-05-25 20:53:39 UTC (rev 9945)
@@ -0,0 +1,483 @@
+/* $MidnightBSD$ */
+/*-
+ * Copyright (c) 2011 The University of Melbourne
+ * All rights reserved.
+ *
+ * This software was developed by Julien Ridoux at the University of Melbourne
+ * under sponsorship from the FreeBSD Foundation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: stable/10/sys/kern/kern_ffclock.c 273847 2014-10-30 08:04:48Z hselasky $");
+
+#include "opt_ffclock.h"
+
+#include <sys/param.h>
+#include <sys/bus.h>
+#include <sys/kernel.h>
+#include <sys/lock.h>
+#include <sys/module.h>
+#include <sys/mutex.h>
+#include <sys/priv.h>
+#include <sys/proc.h>
+#include <sys/sbuf.h>
+#include <sys/sysent.h>
+#include <sys/sysproto.h>
+#include <sys/sysctl.h>
+#include <sys/systm.h>
+#include <sys/timeffc.h>
+
+#ifdef FFCLOCK
+
+FEATURE(ffclock, "Feed-forward clock support");
+
+extern struct ffclock_estimate ffclock_estimate;
+extern struct bintime ffclock_boottime;
+extern int8_t ffclock_updated;
+extern struct mtx ffclock_mtx;
+
+/*
+ * Feed-forward clock absolute time. This should be the preferred way to read
+ * the feed-forward clock for "wall-clock" type time. The flags allow to compose
+ * various flavours of absolute time (e.g. with or without leap seconds taken
+ * into account). If valid pointers are provided, the ffcounter value and an
+ * upper bound on clock error associated with the bintime are provided.
+ * NOTE: use ffclock_convert_abs() to differ the conversion of a ffcounter value
+ * read earlier.
+ */
+void
+ffclock_abstime(ffcounter *ffcount, struct bintime *bt,
+    struct bintime *error_bound, uint32_t flags)
+{
+	struct ffclock_estimate cest;
+	ffcounter ffc;
+	ffcounter update_ffcount;
+	ffcounter ffdelta_error;
+
+	/* Get counter and corresponding time. */
+	if ((flags & FFCLOCK_FAST) == FFCLOCK_FAST)
+		ffclock_last_tick(&ffc, bt, flags);
+	else {
+		ffclock_read_counter(&ffc);
+		ffclock_convert_abs(ffc, bt, flags);
+	}
+
+	/* Current ffclock estimate, use update_ffcount as generation number. */
+	do {
+		update_ffcount = ffclock_estimate.update_ffcount;
+		bcopy(&ffclock_estimate, &cest, sizeof(struct ffclock_estimate));
+	} while (update_ffcount != ffclock_estimate.update_ffcount);
+
+	/*
+	 * Leap second adjustment. Total as seen by synchronisation algorithm
+	 * since it started. cest.leapsec_next is the ffcounter prediction of
+	 * when the next leapsecond occurs.
+	 */
+	if ((flags & FFCLOCK_LEAPSEC) == FFCLOCK_LEAPSEC) {
+		bt->sec -= cest.leapsec_total;
+		if (ffc > cest.leapsec_next)
+			bt->sec -= cest.leapsec;
+	}
+
+	/* Boot time adjustment, for uptime/monotonic clocks. */
+	if ((flags & FFCLOCK_UPTIME) == FFCLOCK_UPTIME) {
+		bintime_sub(bt, &ffclock_boottime);
+	}
+
+	/* Compute error bound if a valid pointer has been passed. */
+	if (error_bound) {
+		ffdelta_error = ffc - cest.update_ffcount;
+		ffclock_convert_diff(ffdelta_error, error_bound);
+		/* 18446744073709 = int(2^64/1e12), err_bound_rate in [ps/s] */
+		bintime_mul(error_bound, cest.errb_rate *
+		    (uint64_t)18446744073709LL);
+		/* 18446744073 = int(2^64 / 1e9), since err_abs in [ns] */
+		bintime_addx(error_bound, cest.errb_abs *
+		    (uint64_t)18446744073LL);
+	}
+
+	if (ffcount)
+		*ffcount = ffc;
+}
+
+/*
+ * Feed-forward difference clock. This should be the preferred way to convert a
+ * time interval in ffcounter values into a time interval in seconds. If a valid
+ * pointer is passed, an upper bound on the error in computing the time interval
+ * in seconds is provided.
+ */
+void
+ffclock_difftime(ffcounter ffdelta, struct bintime *bt,
+    struct bintime *error_bound)
+{
+	ffcounter update_ffcount;
+	uint32_t err_rate;
+
+	ffclock_convert_diff(ffdelta, bt);
+
+	if (error_bound) {
+		do {
+			update_ffcount = ffclock_estimate.update_ffcount;
+			err_rate = ffclock_estimate.errb_rate;
+		} while (update_ffcount != ffclock_estimate.update_ffcount);
+
+		ffclock_convert_diff(ffdelta, error_bound);
+		/* 18446744073709 = int(2^64/1e12), err_bound_rate in [ps/s] */
+		bintime_mul(error_bound, err_rate * (uint64_t)18446744073709LL);
+	}
+}
+
+/*
+ * Create a new kern.sysclock sysctl node, which will be home to some generic
+ * sysclock configuration variables. Feed-forward clock specific variables will
+ * live under the ffclock subnode.
+ */
+
+SYSCTL_NODE(_kern, OID_AUTO, sysclock, CTLFLAG_RW, 0,
+    "System clock related configuration");
+SYSCTL_NODE(_kern_sysclock, OID_AUTO, ffclock, CTLFLAG_RW, 0,
+    "Feed-forward clock configuration");
+
+static char *sysclocks[] = {"feedback", "feed-forward"};
+#define	MAX_SYSCLOCK_NAME_LEN 16
+#define	NUM_SYSCLOCKS (sizeof(sysclocks) / sizeof(*sysclocks))
+
+static int ffclock_version = 2;
+SYSCTL_INT(_kern_sysclock_ffclock, OID_AUTO, version, CTLFLAG_RD,
+    &ffclock_version, 0, "Feed-forward clock kernel version");
+
+/* List available sysclocks. */
+static int
+sysctl_kern_sysclock_available(SYSCTL_HANDLER_ARGS)
+{
+	struct sbuf *s;
+	int clk, error;
+
+	s = sbuf_new_for_sysctl(NULL, NULL,
+	    MAX_SYSCLOCK_NAME_LEN * NUM_SYSCLOCKS, req);
+	if (s == NULL)
+		return (ENOMEM);
+
+	for (clk = 0; clk < NUM_SYSCLOCKS; clk++) {
+		sbuf_cat(s, sysclocks[clk]);
+		if (clk + 1 < NUM_SYSCLOCKS)
+			sbuf_cat(s, " ");
+	}
+	error = sbuf_finish(s);
+	sbuf_delete(s);
+
+	return (error);
+}
+
+SYSCTL_PROC(_kern_sysclock, OID_AUTO, available, CTLTYPE_STRING | CTLFLAG_RD,
+    0, 0, sysctl_kern_sysclock_available, "A",
+    "List of available system clocks");
+
+/*
+ * Return the name of the active system clock if read, or attempt to change
+ * the active system clock to the user specified one if written to. The active
+ * system clock is read when calling any of the [get]{bin,nano,micro}[up]time()
+ * functions.
+ */
+static int
+sysctl_kern_sysclock_active(SYSCTL_HANDLER_ARGS)
+{
+	char newclock[MAX_SYSCLOCK_NAME_LEN];
+	int error;
+	int clk;
+
+	/* Return the name of the current active sysclock. */
+	strlcpy(newclock, sysclocks[sysclock_active], sizeof(newclock));
+	error = sysctl_handle_string(oidp, newclock, sizeof(newclock), req);
+
+	/* Check for error or no change */
+	if (error != 0 || req->newptr == NULL)
+		goto done;
+
+	/* Change the active sysclock to the user specified one: */
+	error = EINVAL;
+	for (clk = 0; clk < NUM_SYSCLOCKS; clk++) {
+		if (strncmp(newclock, sysclocks[clk],
+		    MAX_SYSCLOCK_NAME_LEN - 1)) {
+			continue;
+		}
+		sysclock_active = clk;
+		error = 0;
+		break;
+	}
+done:
+	return (error);
+}
+
+SYSCTL_PROC(_kern_sysclock, OID_AUTO, active, CTLTYPE_STRING | CTLFLAG_RW,
+    0, 0, sysctl_kern_sysclock_active, "A",
+    "Name of the active system clock which is currently serving time");
+
+static int sysctl_kern_ffclock_ffcounter_bypass = 0;
+SYSCTL_INT(_kern_sysclock_ffclock, OID_AUTO, ffcounter_bypass, CTLFLAG_RW,
+    &sysctl_kern_ffclock_ffcounter_bypass, 0,
+    "Use reliable hardware timecounter as the feed-forward counter");
+
+/*
+ * High level functions to access the Feed-Forward Clock.
+ */
+void
+ffclock_bintime(struct bintime *bt)
+{
+
+	ffclock_abstime(NULL, bt, NULL, FFCLOCK_LERP | FFCLOCK_LEAPSEC);
+}
+
+void
+ffclock_nanotime(struct timespec *tsp)
+{
+	struct bintime bt;
+
+	ffclock_abstime(NULL, &bt, NULL, FFCLOCK_LERP | FFCLOCK_LEAPSEC);
+	bintime2timespec(&bt, tsp);
+}
+
+void
+ffclock_microtime(struct timeval *tvp)
+{
+	struct bintime bt;
+
+	ffclock_abstime(NULL, &bt, NULL, FFCLOCK_LERP | FFCLOCK_LEAPSEC);
+	bintime2timeval(&bt, tvp);
+}
+
+void
+ffclock_getbintime(struct bintime *bt)
+{
+
+	ffclock_abstime(NULL, bt, NULL,
+	    FFCLOCK_LERP | FFCLOCK_LEAPSEC | FFCLOCK_FAST);
+}
+
+void
+ffclock_getnanotime(struct timespec *tsp)
+{
+	struct bintime bt;
+
+	ffclock_abstime(NULL, &bt, NULL,
+	    FFCLOCK_LERP | FFCLOCK_LEAPSEC | FFCLOCK_FAST);
+	bintime2timespec(&bt, tsp);
+}
+
+void
+ffclock_getmicrotime(struct timeval *tvp)
+{
+	struct bintime bt;
+
+	ffclock_abstime(NULL, &bt, NULL,
+	    FFCLOCK_LERP | FFCLOCK_LEAPSEC | FFCLOCK_FAST);
+	bintime2timeval(&bt, tvp);
+}
+
+void
+ffclock_binuptime(struct bintime *bt)
+{
+
+	ffclock_abstime(NULL, bt, NULL, FFCLOCK_LERP | FFCLOCK_UPTIME);
+}
+
+void
+ffclock_nanouptime(struct timespec *tsp)
+{
+	struct bintime bt;
+
+	ffclock_abstime(NULL, &bt, NULL, FFCLOCK_LERP | FFCLOCK_UPTIME);
+	bintime2timespec(&bt, tsp);
+}
+
+void
+ffclock_microuptime(struct timeval *tvp)
+{
+	struct bintime bt;
+
+	ffclock_abstime(NULL, &bt, NULL, FFCLOCK_LERP | FFCLOCK_UPTIME);
+	bintime2timeval(&bt, tvp);
+}
+
+void
+ffclock_getbinuptime(struct bintime *bt)
+{
+
+	ffclock_abstime(NULL, bt, NULL,
+	    FFCLOCK_LERP | FFCLOCK_UPTIME | FFCLOCK_FAST);
+}
+
+void
+ffclock_getnanouptime(struct timespec *tsp)
+{
+	struct bintime bt;
+
+	ffclock_abstime(NULL, &bt, NULL,
+	    FFCLOCK_LERP | FFCLOCK_UPTIME | FFCLOCK_FAST);
+	bintime2timespec(&bt, tsp);
+}
+
+void
+ffclock_getmicrouptime(struct timeval *tvp)
+{
+	struct bintime bt;
+
+	ffclock_abstime(NULL, &bt, NULL,
+	    FFCLOCK_LERP | FFCLOCK_UPTIME | FFCLOCK_FAST);
+	bintime2timeval(&bt, tvp);
+}
+
+void
+ffclock_bindifftime(ffcounter ffdelta, struct bintime *bt)
+{
+
+	ffclock_difftime(ffdelta, bt, NULL);
+}
+
+void
+ffclock_nanodifftime(ffcounter ffdelta, struct timespec *tsp)
+{
+	struct bintime bt;
+
+	ffclock_difftime(ffdelta, &bt, NULL);
+	bintime2timespec(&bt, tsp);
+}
+
+void
+ffclock_microdifftime(ffcounter ffdelta, struct timeval *tvp)
+{
+	struct bintime bt;
+
+	ffclock_difftime(ffdelta, &bt, NULL);
+	bintime2timeval(&bt, tvp);
+}
+
+/*
+ * System call allowing userland applications to retrieve the current value of
+ * the Feed-Forward Clock counter.
+ */
+#ifndef _SYS_SYSPROTO_H_
+struct ffclock_getcounter_args {
+	ffcounter *ffcount;
+};
+#endif
+/* ARGSUSED */
+int
+sys_ffclock_getcounter(struct thread *td, struct ffclock_getcounter_args *uap)
+{
+	ffcounter ffcount;
+	int error;
+
+	ffcount = 0;
+	ffclock_read_counter(&ffcount);
+	if (ffcount == 0)
+		return (EAGAIN);
+	error = copyout(&ffcount, uap->ffcount, sizeof(ffcounter));
+
+	return (error);
+}
+
+/*
+ * System call allowing the synchronisation daemon to push new feed-foward clock
+ * estimates to the kernel. Acquire ffclock_mtx to prevent concurrent updates
+ * and ensure data consistency.
+ * NOTE: ffclock_updated signals the fftimehands that new estimates are
+ * available. The updated estimates are picked up by the fftimehands on next
+ * tick, which could take as long as 1/hz seconds (if ticks are not missed).
+ */
+#ifndef _SYS_SYSPROTO_H_
+struct ffclock_setestimate_args {
+	struct ffclock_estimate *cest;
+};
+#endif
+/* ARGSUSED */
+int
+sys_ffclock_setestimate(struct thread *td, struct ffclock_setestimate_args *uap)
+{
+	struct ffclock_estimate cest;
+	int error;
+
+	/* Reuse of PRIV_CLOCK_SETTIME. */
+	if ((error = priv_check(td, PRIV_CLOCK_SETTIME)) != 0)
+		return (error);
+
+	if ((error = copyin(uap->cest, &cest, sizeof(struct ffclock_estimate)))
+	    != 0)
+		return (error);
+
+	mtx_lock(&ffclock_mtx);
+	memcpy(&ffclock_estimate, &cest, sizeof(struct ffclock_estimate));
+	ffclock_updated++;
+	mtx_unlock(&ffclock_mtx);
+	return (error);
+}
+
+/*
+ * System call allowing userland applications to retrieve the clock estimates
+ * stored within the kernel. It is useful to kickstart the synchronisation
+ * daemon with the kernel's knowledge of hardware timecounter.
+ */
+#ifndef _SYS_SYSPROTO_H_
+struct ffclock_getestimate_args {
+	struct ffclock_estimate *cest;
+};
+#endif
+/* ARGSUSED */
+int
+sys_ffclock_getestimate(struct thread *td, struct ffclock_getestimate_args *uap)
+{
+	struct ffclock_estimate cest;
+	int error;
+
+	mtx_lock(&ffclock_mtx);
+	memcpy(&cest, &ffclock_estimate, sizeof(struct ffclock_estimate));
+	mtx_unlock(&ffclock_mtx);
+	error = copyout(&cest, uap->cest, sizeof(struct ffclock_estimate));
+	return (error);
+}
+
+#else /* !FFCLOCK */
+
+int
+sys_ffclock_getcounter(struct thread *td, struct ffclock_getcounter_args *uap)
+{
+
+	return (ENOSYS);
+}
+
+int
+sys_ffclock_setestimate(struct thread *td, struct ffclock_setestimate_args *uap)
+{
+
+	return (ENOSYS);
+}
+
+int
+sys_ffclock_getestimate(struct thread *td, struct ffclock_getestimate_args *uap)
+{
+
+	return (ENOSYS);
+}
+
+#endif /* FFCLOCK */


Property changes on: trunk/sys/kern/kern_ffclock.c
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Modified: trunk/sys/kern/kern_idle.c
===================================================================
--- trunk/sys/kern/kern_idle.c	2018-05-25 20:46:51 UTC (rev 9944)
+++ trunk/sys/kern/kern_idle.c	2018-05-25 20:53:39 UTC (rev 9945)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * Copyright (C) 2000-2004 The FreeBSD Project. All rights reserved.
  *
@@ -24,7 +25,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$MidnightBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/kern/kern_idle.c 222531 2011-05-31 15:11:43Z nwhitehorn $");
 
 #include <sys/param.h>
 #include <sys/systm.h>

Modified: trunk/sys/kern/kern_ktrace.c
===================================================================
--- trunk/sys/kern/kern_ktrace.c	2018-05-25 20:46:51 UTC (rev 9944)
+++ trunk/sys/kern/kern_ktrace.c	2018-05-25 20:53:39 UTC (rev 9945)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * Copyright (c) 1989, 1993
  *	The Regents of the University of California.
@@ -32,11 +33,12 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$MidnightBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/kern/kern_ktrace.c 315562 2017-03-19 15:56:06Z kib $");
 
 #include "opt_ktrace.h"
 
 #include <sys/param.h>
+#include <sys/capsicum.h>
 #include <sys/systm.h>
 #include <sys/fcntl.h>
 #include <sys/kernel.h>
@@ -95,6 +97,7 @@
 	void	*ktr_buffer;
 	union {
 		struct	ktr_proc_ctor ktr_proc_ctor;
+		struct	ktr_cap_fail ktr_cap_fail;
 		struct	ktr_syscall ktr_syscall;
 		struct	ktr_sysret ktr_sysret;
 		struct	ktr_genio ktr_genio;
@@ -107,21 +110,20 @@
 };
 
 static int data_lengths[] = {
-	0,					/* none */
-	offsetof(struct ktr_syscall, ktr_args),	/* KTR_SYSCALL */
-	sizeof(struct ktr_sysret),		/* KTR_SYSRET */
-	0,					/* KTR_NAMEI */
-	sizeof(struct ktr_genio),		/* KTR_GENIO */
-	sizeof(struct ktr_psig),		/* KTR_PSIG */
-	sizeof(struct ktr_csw),			/* KTR_CSW */
-	0,					/* KTR_USER */
-	0,					/* KTR_STRUCT */
-	0,					/* KTR_SYSCTL */
-	sizeof(struct ktr_proc_ctor),		/* KTR_PROCCTOR */
-	0,					/* KTR_PROCDTOR */
-	0,					/* unused */
-	sizeof(struct ktr_fault),		/* KTR_FAULT */
-	sizeof(struct ktr_faultend),		/* KTR_FAULTEND */
+	[KTR_SYSCALL] = offsetof(struct ktr_syscall, ktr_args),
+	[KTR_SYSRET] = sizeof(struct ktr_sysret),
+	[KTR_NAMEI] = 0,
+	[KTR_GENIO] = sizeof(struct ktr_genio),
+	[KTR_PSIG] = sizeof(struct ktr_psig),
+	[KTR_CSW] = sizeof(struct ktr_csw),
+	[KTR_USER] = 0,
+	[KTR_STRUCT] = 0,
+	[KTR_SYSCTL] = 0,
+	[KTR_PROCCTOR] = sizeof(struct ktr_proc_ctor),
+	[KTR_PROCDTOR] = 0,
+	[KTR_CAPFAIL] = sizeof(struct ktr_cap_fail),
+	[KTR_FAULT] = sizeof(struct ktr_fault),
+	[KTR_FAULTEND] = sizeof(struct ktr_faultend),
 };
 
 static STAILQ_HEAD(, ktr_request) ktr_free;
@@ -131,7 +133,7 @@
 static u_int ktr_requestpool = KTRACE_REQUEST_POOL;
 TUNABLE_INT("kern.ktrace.request_pool", &ktr_requestpool);
 
-static u_int ktr_geniosize = PAGE_SIZE;
+u_int ktr_geniosize = PAGE_SIZE;
 TUNABLE_INT("kern.ktrace.genio_size", &ktr_geniosize);
 SYSCTL_UINT(_kern_ktrace, OID_AUTO, genio_size, CTLFLAG_RW, &ktr_geniosize,
     0, "Maximum size of genio event payload");
@@ -511,7 +513,6 @@
 	struct proc *p;
 	struct ucred *cred;
 	struct vnode *vp;
-	int vfslocked;
 
 	p = td->td_proc;
 	if (p->p_traceflag == 0)
@@ -529,11 +530,8 @@
 	ktr_freeproc(p, &cred, &vp);
 	mtx_unlock(&ktrace_mtx);
 	PROC_UNLOCK(p);
-	if (vp != NULL) {
-		vfslocked = VFS_LOCK_GIANT(vp->v_mount);
+	if (vp != NULL)
 		vrele(vp);
-		VFS_UNLOCK_GIANT(vfslocked);
-	}
 	if (cred != NULL)
 		crfree(cred);
 	ktrace_exit(td);
@@ -780,6 +778,33 @@
 }
 
 void
+ktrcapfail(type, needed, held)
+	enum ktr_cap_fail_type type;
+	const cap_rights_t *needed;
+	const cap_rights_t *held;
+{
+	struct thread *td = curthread;
+	struct ktr_request *req;
+	struct ktr_cap_fail *kcf;
+
+	req = ktr_getrequest(KTR_CAPFAIL);
+	if (req == NULL)
+		return;
+	kcf = &req->ktr_data.ktr_cap_fail;
+	kcf->cap_type = type;
+	if (needed != NULL)
+		kcf->cap_needed = *needed;
+	else
+		cap_rights_init(&kcf->cap_needed);
+	if (held != NULL)
+		kcf->cap_held = *held;
+	else
+		cap_rights_init(&kcf->cap_held);
+	ktr_enqueuerequest(td, req);
+	ktrace_exit(td);
+}
+
+void
 ktrfault(vaddr, type)
 	vm_offset_t vaddr;
 	int type;
@@ -840,7 +865,7 @@
 	int ops = KTROP(uap->ops);
 	int descend = uap->ops & KTRFLAG_DESCEND;
 	int nfound, ret = 0;
-	int flags, error = 0, vfslocked;
+	int flags, error = 0;
 	struct nameidata nd;
 	struct ucred *cred;
 
@@ -855,8 +880,7 @@
 		/*
 		 * an operation which requires a file argument.
 		 */
-		NDINIT(&nd, LOOKUP, NOFOLLOW | MPSAFE, UIO_USERSPACE,
-		    uap->fname, td);
+		NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, uap->fname, td);
 		flags = FREAD | FWRITE | O_NOFOLLOW;
 		error = vn_open(&nd, &flags, 0, NULL);
 		if (error) {
@@ -863,17 +887,14 @@
 			ktrace_exit(td);
 			return (error);
 		}
-		vfslocked = NDHASGIANT(&nd);
 		NDFREE(&nd, NDF_ONLY_PNBUF);
 		vp = nd.ni_vp;
 		VOP_UNLOCK(vp, 0);
 		if (vp->v_type != VREG) {
 			(void) vn_close(vp, FREAD|FWRITE, td->td_ucred, td);
-			VFS_UNLOCK_GIANT(vfslocked);
 			ktrace_exit(td);
 			return (EACCES);
 		}
-		VFS_UNLOCK_GIANT(vfslocked);
 	}
 	/*
 	 * Clear all uses of the tracefile.
@@ -899,10 +920,8 @@
 		}
 		sx_sunlock(&allproc_lock);
 		if (vrele_count > 0) {
-			vfslocked = VFS_LOCK_GIANT(vp->v_mount);
 			while (vrele_count-- > 0)
 				vrele(vp);
-			VFS_UNLOCK_GIANT(vfslocked);
 		}
 		goto done;
 	}
@@ -968,11 +987,8 @@
 	if (!ret)
 		error = EPERM;
 done:
-	if (vp != NULL) {
-		vfslocked = VFS_LOCK_GIANT(vp->v_mount);
+	if (vp != NULL)
 		(void) vn_close(vp, FWRITE, td->td_ucred, td);
-		VFS_UNLOCK_GIANT(vfslocked);
-	}
 	ktrace_exit(td);
 	return (error);
 #else /* !KTRACE */
@@ -1064,13 +1080,8 @@
 	if ((p->p_traceflag & KTRFAC_MASK) != 0)
 		ktrprocctor_entered(td, p);
 	PROC_UNLOCK(p);
-	if (tracevp != NULL) {
-		int vfslocked;
-
-		vfslocked = VFS_LOCK_GIANT(tracevp->v_mount);
+	if (tracevp != NULL)
 		vrele(tracevp);
-		VFS_UNLOCK_GIANT(vfslocked);
-	}
 	if (tracecred != NULL)
 		crfree(tracecred);
 
@@ -1124,7 +1135,7 @@
 	struct iovec aiov[3];
 	struct mount *mp;
 	int datalen, buflen, vrele_count;
-	int error, vfslocked;
+	int error;
 
 	/*
 	 * We hold the vnode and credential for use in I/O in case ktrace is
@@ -1182,7 +1193,6 @@
 		auio.uio_iovcnt++;
 	}
 
-	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
 	vn_start_write(vp, &mp, V_WAIT);
 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 #ifdef MAC
@@ -1195,10 +1205,8 @@
 	crfree(cred);
 	if (!error) {
 		vrele(vp);
-		VFS_UNLOCK_GIANT(vfslocked);
 		return;
 	}
-	VFS_UNLOCK_GIANT(vfslocked);
 
 	/*
 	 * If error encountered, give up tracing on this vnode.  We defer
@@ -1237,10 +1245,8 @@
 	}
 	sx_sunlock(&allproc_lock);
 
-	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
 	while (vrele_count-- > 0)
 		vrele(vp);
-	VFS_UNLOCK_GIANT(vfslocked);
 }
 
 /*

Modified: trunk/sys/kern/kern_linker.c
===================================================================
--- trunk/sys/kern/kern_linker.c	2018-05-25 20:46:51 UTC (rev 9944)
+++ trunk/sys/kern/kern_linker.c	2018-05-25 20:53:39 UTC (rev 9945)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * Copyright (c) 1997-2000 Doug Rabson
  * All rights reserved.
@@ -25,9 +26,10 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$MidnightBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/kern/kern_linker.c 325867 2017-11-15 22:35:16Z gordon $");
 
 #include "opt_ddb.h"
+#include "opt_kld.h"
 #include "opt_hwpmc_hooks.h"
 
 #include <sys/param.h>
@@ -44,6 +46,7 @@
 #include <sys/module.h>
 #include <sys/mount.h>
 #include <sys/linker.h>
+#include <sys/eventhandler.h>
 #include <sys/fcntl.h>
 #include <sys/jail.h>
 #include <sys/libkern.h>
@@ -64,20 +67,16 @@
 
 #ifdef KLD_DEBUG
 int kld_debug = 0;
-SYSCTL_INT(_debug, OID_AUTO, kld_debug, CTLFLAG_RW,
-        &kld_debug, 0, "Set various levels of KLD debug");
+SYSCTL_INT(_debug, OID_AUTO, kld_debug, CTLFLAG_RW | CTLFLAG_TUN,
+    &kld_debug, 0, "Set various levels of KLD debug");
+TUNABLE_INT("debug.kld_debug", &kld_debug);
 #endif
 
-#define	KLD_LOCK()		sx_xlock(&kld_sx)
-#define	KLD_UNLOCK()		sx_xunlock(&kld_sx)
-#define	KLD_DOWNGRADE()		sx_downgrade(&kld_sx)
-#define	KLD_LOCK_READ()		sx_slock(&kld_sx)
-#define	KLD_UNLOCK_READ()	sx_sunlock(&kld_sx)
-#define	KLD_LOCKED()		sx_xlocked(&kld_sx)
-#define	KLD_LOCK_ASSERT() do {						\
-	if (!cold)							\
-		sx_assert(&kld_sx, SX_XLOCKED);				\
-} while (0)
+/* These variables are used by kernel debuggers to enumerate loaded files. */
+const int kld_off_address = offsetof(struct linker_file, address);
+const int kld_off_filename = offsetof(struct linker_file, filename);
+const int kld_off_pathname = offsetof(struct linker_file, pathname);
+const int kld_off_next = offsetof(struct linker_file, link.tqe_next);
 
 /*
  * static char *linker_search_path(const char *name, struct mod_depend
@@ -118,7 +117,8 @@
 #define	LINKER_GET_NEXT_FILE_ID(a) do {					\
 	linker_file_t lftmp;						\
 									\
-	KLD_LOCK_ASSERT();						\
+	if (!cold)							\
+		sx_assert(&kld_sx, SA_XLOCKED);				\
 retry:									\
 	TAILQ_FOREACH(lftmp, &linker_files, link) {			\
 		if (next_file_id == lftmp->id) {			\
@@ -150,16 +150,6 @@
 		    struct mod_depend *verinfo, struct linker_file **lfpp);
 static modlist_t modlist_lookup2(const char *name, struct mod_depend *verinfo);
 
-static char *
-linker_strdup(const char *str)
-{
-	char *result;
-
-	if ((result = malloc((strlen(str) + 1), M_LINKER, M_WAITOK)) != NULL)
-		strcpy(result, str);
-	return (result);
-}
-
 static void
 linker_init(void *arg)
 {
@@ -205,6 +195,8 @@
 	KLD_DPF(FILE, ("linker_file_sysinit: calling SYSINITs for %s\n",
 	    lf->filename));
 
+	sx_assert(&kld_sx, SA_XLOCKED);
+
 	if (linker_file_lookup_set(lf, "sysinit_set", &start, &stop, NULL) != 0)
 		return;
 	/*
@@ -230,6 +222,7 @@
 	 * Traverse the (now) ordered list of system initialization tasks.
 	 * Perform each task, and continue on to the next task.
 	 */
+	sx_xunlock(&kld_sx);
 	mtx_lock(&Giant);
 	for (sipp = start; sipp < stop; sipp++) {
 		if ((*sipp)->subsystem == SI_SUB_DUMMY)
@@ -239,6 +232,7 @@
 		(*((*sipp)->func)) ((*sipp)->udata);
 	}
 	mtx_unlock(&Giant);
+	sx_xlock(&kld_sx);
 }
 
 static void
@@ -249,6 +243,8 @@
 	KLD_DPF(FILE, ("linker_file_sysuninit: calling SYSUNINITs for %s\n",
 	    lf->filename));
 
+	sx_assert(&kld_sx, SA_XLOCKED);
+
 	if (linker_file_lookup_set(lf, "sysuninit_set", &start, &stop,
 	    NULL) != 0)
 		return;
@@ -276,6 +272,7 @@
 	 * Traverse the (now) ordered list of system initialization tasks.
 	 * Perform each task, and continue on to the next task.
 	 */
+	sx_xunlock(&kld_sx);
 	mtx_lock(&Giant);
 	for (sipp = start; sipp < stop; sipp++) {
 		if ((*sipp)->subsystem == SI_SUB_DUMMY)
@@ -285,10 +282,11 @@
 		(*((*sipp)->func)) ((*sipp)->udata);
 	}
 	mtx_unlock(&Giant);
+	sx_xlock(&kld_sx);
 }
 
 static void
-linker_file_register_sysctls(linker_file_t lf)
+linker_file_register_sysctls(linker_file_t lf, bool enable)
 {
 	struct sysctl_oid **start, **stop, **oidp;
 
@@ -296,13 +294,43 @@
 	    ("linker_file_register_sysctls: registering SYSCTLs for %s\n",
 	    lf->filename));
 
+	sx_assert(&kld_sx, SA_XLOCKED);
+
 	if (linker_file_lookup_set(lf, "sysctl_set", &start, &stop, NULL) != 0)
 		return;
 
+	sx_xunlock(&kld_sx);
 	sysctl_lock();
+	for (oidp = start; oidp < stop; oidp++) {
+		if (enable)
+			sysctl_register_oid(*oidp);
+		else
+			sysctl_register_disabled_oid(*oidp);
+	}
+	sysctl_unlock();
+	sx_xlock(&kld_sx);
+}
+
+static void
+linker_file_enable_sysctls(linker_file_t lf)
+{
+	struct sysctl_oid **start, **stop, **oidp;
+
+	KLD_DPF(FILE,
+	    ("linker_file_enable_sysctls: enable SYSCTLs for %s\n",
+	    lf->filename));
+
+	sx_assert(&kld_sx, SA_XLOCKED);
+
+	if (linker_file_lookup_set(lf, "sysctl_set", &start, &stop, NULL) != 0)
+		return;
+
+	sx_xunlock(&kld_sx);
+	sysctl_lock();
 	for (oidp = start; oidp < stop; oidp++)
-		sysctl_register_oid(*oidp);
+		sysctl_enable_oid(*oidp);
 	sysctl_unlock();
+	sx_xlock(&kld_sx);
 }
 
 static void
@@ -310,16 +338,20 @@
 {
 	struct sysctl_oid **start, **stop, **oidp;
 
-	KLD_DPF(FILE, ("linker_file_unregister_sysctls: registering SYSCTLs"
+	KLD_DPF(FILE, ("linker_file_unregister_sysctls: unregistering SYSCTLs"
 	    " for %s\n", lf->filename));
 
+	sx_assert(&kld_sx, SA_XLOCKED);
+
 	if (linker_file_lookup_set(lf, "sysctl_set", &start, &stop, NULL) != 0)
 		return;
 
+	sx_xunlock(&kld_sx);
 	sysctl_lock();
 	for (oidp = start; oidp < stop; oidp++)
 		sysctl_unregister_oid(*oidp);
 	sysctl_unlock();
+	sx_xlock(&kld_sx);
 }
 
 static int
@@ -332,6 +364,8 @@
 	KLD_DPF(FILE, ("linker_file_register_modules: registering modules"
 	    " in %s\n", lf->filename));
 
+	sx_assert(&kld_sx, SA_XLOCKED);
+
 	if (linker_file_lookup_set(lf, "modmetadata_set", &start,
 	    &stop, NULL) != 0) {
 		/*
@@ -367,7 +401,9 @@
 linker_init_kernel_modules(void)
 {
 
+	sx_xlock(&kld_sx);
 	linker_file_register_modules(linker_kernel_file);
+	sx_xunlock(&kld_sx);
 }
 
 SYSINIT(linker_kernel, SI_SUB_KLD, SI_ORDER_ANY, linker_init_kernel_modules,
@@ -384,7 +420,7 @@
 	if (prison0.pr_securelevel > 0)
 		return (EPERM);
 
-	KLD_LOCK_ASSERT();
+	sx_assert(&kld_sx, SA_XLOCKED);
 	lf = linker_find_file_by_name(filename);
 	if (lf) {
 		KLD_DPF(FILE, ("linker_load_file: file %s is already loaded,"
@@ -418,10 +454,8 @@
 				return (error);
 			}
 			modules = !TAILQ_EMPTY(&lf->modules);
-			KLD_UNLOCK();
-			linker_file_register_sysctls(lf);
+			linker_file_register_sysctls(lf, false);
 			linker_file_sysinit(lf);
-			KLD_LOCK();
 			lf->flags |= LINKER_FILE_LINKED;
 
 			/*
@@ -433,6 +467,8 @@
 				linker_file_unload(lf, LINKER_UNLOAD_FORCE);
 				return (ENOEXEC);
 			}
+			linker_file_enable_sysctls(lf);
+			EVENTHANDLER_INVOKE(kld_load, lf);
 			*result = lf;
 			return (0);
 		}
@@ -472,16 +508,16 @@
 	modlist_t mod;
 	int error;
 
-	KLD_LOCK();
+	sx_xlock(&kld_sx);
 	if ((mod = modlist_lookup2(modname, verinfo)) != NULL) {
 		*result = mod->container;
 		(*result)->refs++;
-		KLD_UNLOCK();
+		sx_xunlock(&kld_sx);
 		return (0);
 	}
 
 	error = linker_load_module(NULL, modname, NULL, verinfo, result);
-	KLD_UNLOCK();
+	sx_xunlock(&kld_sx);
 	return (error);
 }
 
@@ -492,13 +528,13 @@
 	modlist_t mod;
 	int error;
 
-	KLD_LOCK();
+	sx_xlock(&kld_sx);
 	if (lf == NULL) {
 		KASSERT(modname != NULL,
 		    ("linker_release_module: no file or name"));
 		mod = modlist_lookup2(modname, verinfo);
 		if (mod == NULL) {
-			KLD_UNLOCK();
+			sx_xunlock(&kld_sx);
 			return (ESRCH);
 		}
 		lf = mod->container;
@@ -506,7 +542,7 @@
 		KASSERT(modname == NULL && verinfo == NULL,
 		    ("linker_release_module: both file and name"));
 	error =	linker_file_unload(lf, LINKER_UNLOAD_NORMAL);
-	KLD_UNLOCK();
+	sx_xunlock(&kld_sx);
 	return (error);
 }
 
@@ -519,7 +555,7 @@
 	koname = malloc(strlen(filename) + 4, M_LINKER, M_WAITOK);
 	sprintf(koname, "%s.ko", filename);
 
-	KLD_LOCK_ASSERT();
+	sx_assert(&kld_sx, SA_XLOCKED);
 	TAILQ_FOREACH(lf, &linker_files, link) {
 		if (strcmp(lf->filename, koname) == 0)
 			break;
@@ -535,7 +571,7 @@
 {
 	linker_file_t lf;
 
-	KLD_LOCK_ASSERT();
+	sx_assert(&kld_sx, SA_XLOCKED);
 	TAILQ_FOREACH(lf, &linker_files, link)
 		if (lf->id == fileid && lf->flags & LINKER_FILE_LINKED)
 			break;
@@ -548,13 +584,13 @@
 	linker_file_t lf;
 	int retval = 0;
 
-	KLD_LOCK();
+	sx_xlock(&kld_sx);
 	TAILQ_FOREACH(lf, &linker_files, link) {
 		retval = predicate(lf, context);
 		if (retval != 0)
 			break;
 	}
-	KLD_UNLOCK();
+	sx_xunlock(&kld_sx);
 	return (retval);
 }
 
@@ -564,7 +600,8 @@
 	linker_file_t lf;
 	const char *filename;
 
-	KLD_LOCK_ASSERT();
+	if (!cold)
+		sx_assert(&kld_sx, SA_XLOCKED);
 	filename = linker_basename(pathname);
 
 	KLD_DPF(FILE, ("linker_make_file: new file, filename='%s' for pathname='%s'\n", filename, pathname));
@@ -574,14 +611,12 @@
 	lf->refs = 1;
 	lf->userrefs = 0;
 	lf->flags = 0;
-	lf->filename = linker_strdup(filename);
-	lf->pathname = linker_strdup(pathname);
+	lf->filename = strdup(filename, M_LINKER);
+	lf->pathname = strdup(pathname, M_LINKER);
 	LINKER_GET_NEXT_FILE_ID(lf->id);
 	lf->ndeps = 0;
 	lf->deps = NULL;
 	lf->loadcnt = ++loadcnt;
-	lf->sdt_probes = NULL;
-	lf->sdt_nprobes = 0;
 	STAILQ_INIT(&lf->common);
 	TAILQ_INIT(&lf->modules);
 	TAILQ_INSERT_TAIL(&linker_files, lf, link);
@@ -600,7 +635,7 @@
 	if (prison0.pr_securelevel > 0)
 		return (EPERM);
 
-	KLD_LOCK_ASSERT();
+	sx_assert(&kld_sx, SA_XLOCKED);
 	KLD_DPF(FILE, ("linker_file_unload: lf->refs=%d\n", file->refs));
 
 	/* Easy case of just dropping a reference. */
@@ -609,6 +644,12 @@
 		return (0);
 	}
 
+	/* Give eventhandlers a chance to prevent the unload. */
+	error = 0;
+	EVENTHANDLER_INVOKE(kld_unload_try, file, &error);
+	if (error != 0)
+		return (EBUSY);
+
 	KLD_DPF(FILE, ("linker_file_unload: file is unloading,"
 	    " informing modules\n"));
 
@@ -673,10 +714,8 @@
 	 */
 	if (file->flags & LINKER_FILE_LINKED) {
 		file->flags &= ~LINKER_FILE_LINKED;
-		KLD_UNLOCK();
+		linker_file_unregister_sysctls(file);
 		linker_file_sysuninit(file);
-		linker_file_unregister_sysctls(file);
-		KLD_LOCK();
 	}
 	TAILQ_REMOVE(&linker_files, file, link);
 
@@ -692,6 +731,10 @@
 	}
 
 	LINKER_UNLOAD(file);
+
+	EVENTHANDLER_INVOKE(kld_unload, file->filename, file->address,
+	    file->size);
+
 	if (file->filename) {
 		free(file->filename, M_LINKER);
 		file->filename = NULL;
@@ -715,18 +758,9 @@
 {
 	linker_file_t *newdeps;
 
-	KLD_LOCK_ASSERT();
-	newdeps = malloc((file->ndeps + 1) * sizeof(linker_file_t *),
+	sx_assert(&kld_sx, SA_XLOCKED);
+	file->deps = realloc(file->deps, (file->ndeps + 1) * sizeof(*newdeps),
 	    M_LINKER, M_WAITOK | M_ZERO);
-	if (newdeps == NULL)
-		return (ENOMEM);
-
-	if (file->deps) {
-		bcopy(file->deps, newdeps,
-		    file->ndeps * sizeof(linker_file_t *));
-		free(file->deps, M_LINKER);
-	}
-	file->deps = newdeps;
 	file->deps[file->ndeps] = dep;
 	file->ndeps++;
 	KLD_DPF(FILE, ("linker_file_add_dependency:"
@@ -745,15 +779,9 @@
 linker_file_lookup_set(linker_file_t file, const char *name,
     void *firstp, void *lastp, int *countp)
 {
-	int error, locked;
 
-	locked = KLD_LOCKED();
-	if (!locked)
-		KLD_LOCK();
-	error = LINKER_LOOKUP_SET(file, name, firstp, lastp, countp);
-	if (!locked)
-		KLD_UNLOCK();
-	return (error);
+	sx_assert(&kld_sx, SA_LOCKED);
+	return (LINKER_LOOKUP_SET(file, name, firstp, lastp, countp));
 }
 
 /*
@@ -772,12 +800,12 @@
 	caddr_t sym;
 	int locked;
 
-	locked = KLD_LOCKED();
+	locked = sx_xlocked(&kld_sx);
 	if (!locked)
-		KLD_LOCK();
+		sx_xlock(&kld_sx);
 	sym = linker_file_lookup_symbol_internal(file, name, deps);
 	if (!locked)
-		KLD_UNLOCK();
+		sx_xunlock(&kld_sx);
 	return (sym);
 }
 
@@ -791,7 +819,7 @@
 	size_t common_size = 0;
 	int i;
 
-	KLD_LOCK_ASSERT();
+	sx_assert(&kld_sx, SA_XLOCKED);
 	KLD_DPF(SYM, ("linker_file_lookup_symbol: file=%p, name=%s, deps=%d\n",
 	    file, name, deps));
 
@@ -948,7 +976,7 @@
  *
  * Note that we do not obey list locking protocols here.  We really don't need
  * DDB to hang because somebody's got the lock held.  We'll take the chance
- * that the files list is inconsistant instead.
+ * that the files list is inconsistent instead.
  */
 #ifdef DDB
 int
@@ -991,9 +1019,9 @@
 {
 	int error;
 
-	KLD_LOCK();
+	sx_xlock(&kld_sx);
 	error = linker_debug_search_symbol_name(value, buf, buflen, offset);
-	KLD_UNLOCK();
+	sx_xunlock(&kld_sx);
 	return (error);
 }
 
@@ -1003,9 +1031,6 @@
 int
 kern_kldload(struct thread *td, const char *file, int *fileid)
 {
-#ifdef HWPMC_HOOKS
-	struct pmckern_map_in pkm;
-#endif
 	const char *kldname, *modname;
 	linker_file_t lf;
 	int error;
@@ -1027,7 +1052,7 @@
 	 * (kldname.ko, or kldname.ver.ko) treat it as an interface
 	 * name.
 	 */
-	if (index(file, '/') || index(file, '.')) {
+	if (strchr(file, '/') || strchr(file, '.')) {
 		kldname = file;
 		modname = NULL;
 	} else {
@@ -1035,24 +1060,16 @@
 		modname = file;
 	}
 
-	KLD_LOCK();
+	sx_xlock(&kld_sx);
 	error = linker_load_module(kldname, modname, NULL, NULL, &lf);
 	if (error) {
-		KLD_UNLOCK();
+		sx_xunlock(&kld_sx);
 		goto done;
 	}
 	lf->userrefs++;
 	if (fileid != NULL)
 		*fileid = lf->id;
-#ifdef HWPMC_HOOKS
-	KLD_DOWNGRADE();
-	pkm.pm_file = lf->filename;
-	pkm.pm_address = (uintptr_t) lf->address;
-	PMC_CALL_HOOK(td, PMC_FN_KLD_LOAD, (void *) &pkm);
-	KLD_UNLOCK_READ();
-#else
-	KLD_UNLOCK();
-#endif
+	sx_xunlock(&kld_sx);
 
 done:
 	CURVNET_RESTORE();
@@ -1081,9 +1098,6 @@
 int
 kern_kldunload(struct thread *td, int fileid, int flags)
 {
-#ifdef HWPMC_HOOKS
-	struct pmckern_map_out pkm;
-#endif
 	linker_file_t lf;
 	int error = 0;
 
@@ -1094,17 +1108,12 @@
 		return (error);
 
 	CURVNET_SET(TD_TO_VNET(td));
-	KLD_LOCK();
+	sx_xlock(&kld_sx);
 	lf = linker_find_file_by_id(fileid);
 	if (lf) {
 		KLD_DPF(FILE, ("kldunload: lf->userrefs=%d\n", lf->userrefs));
 
-		/* Check if there are DTrace probes enabled on this file. */
-		if (lf->nenabled > 0) {
-			printf("kldunload: attempt to unload file that has"
-			    " DTrace probes enabled\n");
-			error = EBUSY;
-		} else if (lf->userrefs == 0) {
+		if (lf->userrefs == 0) {
 			/*
 			 * XXX: maybe LINKER_UNLOAD_FORCE should override ?
 			 */
@@ -1112,11 +1121,6 @@
 			    " loaded by the kernel\n");
 			error = EBUSY;
 		} else {
-#ifdef HWPMC_HOOKS
-			/* Save data needed by hwpmc(4) before unloading. */
-			pkm.pm_address = (uintptr_t) lf->address;
-			pkm.pm_size = lf->size;
-#endif
 			lf->userrefs--;
 			error = linker_file_unload(lf, flags);
 			if (error)
@@ -1124,17 +1128,8 @@
 		}
 	} else
 		error = ENOENT;
+	sx_xunlock(&kld_sx);
 
-#ifdef HWPMC_HOOKS
-	if (error == 0) {
-		KLD_DOWNGRADE();
-		PMC_CALL_HOOK(td, PMC_FN_KLD_UNLOAD, (void *) &pkm);
-		KLD_UNLOCK_READ();
-	} else
-		KLD_UNLOCK();
-#else
-	KLD_UNLOCK();
-#endif
 	CURVNET_RESTORE();
 	return (error);
 }
@@ -1177,13 +1172,13 @@
 		goto out;
 
 	filename = linker_basename(pathname);
-	KLD_LOCK();
+	sx_xlock(&kld_sx);
 	lf = linker_find_file_by_name(filename);
 	if (lf)
 		td->td_retval[0] = lf->id;
 	else
 		error = ENOENT;
-	KLD_UNLOCK();
+	sx_xunlock(&kld_sx);
 out:
 	free(pathname, M_TEMP);
 	return (error);
@@ -1201,7 +1196,7 @@
 		return (error);
 #endif
 
-	KLD_LOCK();
+	sx_xlock(&kld_sx);
 	if (uap->fileid == 0)
 		lf = TAILQ_FIRST(&linker_files);
 	else {
@@ -1222,7 +1217,7 @@
 	else
 		td->td_retval[0] = 0;
 out:
-	KLD_UNLOCK();
+	sx_xunlock(&kld_sx);
 	return (error);
 }
 
@@ -1229,7 +1224,7 @@
 int
 sys_kldstat(struct thread *td, struct kldstat_args *uap)
 {
-	struct kld_file_stat stat;
+	struct kld_file_stat *stat;
 	int error, version;
 
 	/*
@@ -1242,10 +1237,12 @@
 	    version != sizeof(struct kld_file_stat))
 		return (EINVAL);
 
-	error = kern_kldstat(td, uap->fileid, &stat);
-	if (error != 0)
-		return (error);
-	return (copyout(&stat, uap->stat, version));
+	stat = malloc(sizeof(*stat), M_TEMP, M_WAITOK | M_ZERO);
+	error = kern_kldstat(td, uap->fileid, stat);
+	if (error == 0)
+		error = copyout(stat, uap->stat, version);
+	free(stat, M_TEMP);
+	return (error);
 }
 
 int
@@ -1261,10 +1258,10 @@
 		return (error);
 #endif
 
-	KLD_LOCK();
+	sx_xlock(&kld_sx);
 	lf = linker_find_file_by_id(fileid);
 	if (lf == NULL) {
-		KLD_UNLOCK();
+		sx_xunlock(&kld_sx);
 		return (ENOENT);
 	}
 
@@ -1282,7 +1279,7 @@
 	if (namelen > MAXPATHLEN)
 		namelen = MAXPATHLEN;
 	bcopy(lf->pathname, &stat->pathname[0], namelen);
-	KLD_UNLOCK();
+	sx_xunlock(&kld_sx);
 
 	td->td_retval[0] = 0;
 	return (0);
@@ -1301,7 +1298,7 @@
 		return (error);
 #endif
 
-	KLD_LOCK();
+	sx_xlock(&kld_sx);
 	lf = linker_find_file_by_id(uap->fileid);
 	if (lf) {
 		MOD_SLOCK;
@@ -1313,7 +1310,7 @@
 		MOD_SUNLOCK;
 	} else
 		error = ENOENT;
-	KLD_UNLOCK();
+	sx_xunlock(&kld_sx);
 	return (error);
 }
 
@@ -1341,7 +1338,7 @@
 	symstr = malloc(MAXPATHLEN, M_TEMP, M_WAITOK);
 	if ((error = copyinstr(lookup.symname, symstr, MAXPATHLEN, NULL)) != 0)
 		goto out;
-	KLD_LOCK();
+	sx_xlock(&kld_sx);
 	if (uap->fileid != 0) {
 		lf = linker_find_file_by_id(uap->fileid);
 		if (lf == NULL)
@@ -1367,7 +1364,7 @@
 		if (lf == NULL)
 			error = ENOENT;
 	}
-	KLD_UNLOCK();
+	sx_xunlock(&kld_sx);
 out:
 	free(symstr, M_TEMP);
 	return (error);
@@ -1476,6 +1473,7 @@
 	error = 0;
 
 	modptr = NULL;
+	sx_xlock(&kld_sx);
 	while ((modptr = preload_search_next_name(modptr)) != NULL) {
 		modname = (char *)preload_search_info(modptr, MODINFO_NAME);
 		modtype = (char *)preload_search_info(modptr, MODINFO_TYPE);
@@ -1650,7 +1648,7 @@
 		if (linker_file_lookup_set(lf, "sysinit_set", &si_start,
 		    &si_stop, NULL) == 0)
 			sysinit_add(si_start, si_stop);
-		linker_file_register_sysctls(lf);
+		linker_file_register_sysctls(lf, true);
 		lf->flags |= LINKER_FILE_LINKED;
 		continue;
 fail:
@@ -1657,6 +1655,7 @@
 		TAILQ_REMOVE(&depended_files, lf, loaded);
 		linker_file_unload(lf, LINKER_UNLOAD_FORCE);
 	}
+	sx_xunlock(&kld_sx);
 	/* woohoo! we made it! */
 }
 
@@ -1703,7 +1702,7 @@
 	struct nameidata nd;
 	struct thread *td = curthread;	/* XXX */
 	char *result, **cpp, *sep;
-	int error, len, extlen, reclen, flags, vfslocked;
+	int error, len, extlen, reclen, flags;
 	enum vtype type;
 
 	extlen = 0;
@@ -1724,11 +1723,10 @@
 		 * Attempt to open the file, and return the path if
 		 * we succeed and it's a regular file.
 		 */
-		NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE, UIO_SYSSPACE, result, td);
+		NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, result, td);
 		flags = FREAD;
 		error = vn_open(&nd, &flags, 0, NULL);
 		if (error == 0) {
-			vfslocked = NDHASGIANT(&nd);
 			NDFREE(&nd, NDF_ONLY_PNBUF);
 			type = nd.ni_vp->v_type;
 			if (vap)
@@ -1735,7 +1733,6 @@
 				VOP_GETATTR(nd.ni_vp, vap, td->td_ucred);
 			VOP_UNLOCK(nd.ni_vp, 0);
 			vn_close(nd.ni_vp, FREAD, td->td_ucred, td);
-			VFS_UNLOCK_GIANT(vfslocked);
 			if (type == VREG)
 				return (result);
 		}
@@ -1764,7 +1761,6 @@
 	u_char *cp, *recptr, *bufend, *result, *best, *pathbuf, *sep;
 	int error, ival, bestver, *intp, found, flags, clen, blen;
 	ssize_t reclen;
-	int vfslocked = 0;
 
 	result = NULL;
 	bestver = found = 0;
@@ -1776,12 +1772,11 @@
 	snprintf(pathbuf, reclen, "%.*s%s%s", pathlen, path, sep,
 	    linker_hintfile);
 
-	NDINIT(&nd, LOOKUP, NOFOLLOW | MPSAFE, UIO_SYSSPACE, pathbuf, td);
+	NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_SYSSPACE, pathbuf, td);
 	flags = FREAD;
 	error = vn_open(&nd, &flags, 0, NULL);
 	if (error)
 		goto bad;
-	vfslocked = NDHASGIANT(&nd);
 	NDFREE(&nd, NDF_ONLY_PNBUF);
 	if (nd.ni_vp->v_type != VREG)
 		goto bad;
@@ -1797,8 +1792,6 @@
 		goto bad;
 	}
 	hints = malloc(vattr.va_size, M_TEMP, M_WAITOK);
-	if (hints == NULL)
-		goto bad;
 	error = vn_rdwr(UIO_READ, nd.ni_vp, (caddr_t)hints, vattr.va_size, 0,
 	    UIO_SYSSPACE, IO_NODELOCKED, cred, NOCRED, &reclen, td);
 	if (error)
@@ -1805,7 +1798,6 @@
 		goto bad;
 	VOP_UNLOCK(nd.ni_vp, 0);
 	vn_close(nd.ni_vp, FREAD, cred, td);
-	VFS_UNLOCK_GIANT(vfslocked);
 	nd.ni_vp = NULL;
 	if (reclen != 0) {
 		printf("can't read %zd\n", reclen);
@@ -1874,7 +1866,6 @@
 	if (nd.ni_vp != NULL) {
 		VOP_UNLOCK(nd.ni_vp, 0);
 		vn_close(nd.ni_vp, FREAD, cred, td);
-		VFS_UNLOCK_GIANT(vfslocked);
 	}
 	/*
 	 * If nothing found or hints is absent - fallback to the old
@@ -1921,8 +1912,8 @@
 	int len;
 
 	/* qualified at all? */
-	if (index(name, '/'))
-		return (linker_strdup(name));
+	if (strchr(name, '/'))
+		return (strdup(name, M_LINKER));
 
 	/* traverse the linker path */
 	len = strlen(name);
@@ -1942,7 +1933,7 @@
 {
 	const char *filename;
 
-	filename = rindex(path, '/');
+	filename = strrchr(path, '/');
 	if (filename == NULL)
 		return path;
 	if (filename[1])
@@ -1962,7 +1953,7 @@
 	int i, nmappings;
 
 	nmappings = 0;
-	KLD_LOCK_READ();
+	sx_slock(&kld_sx);
 	TAILQ_FOREACH(lf, &linker_files, link)
 		nmappings++;
 
@@ -1977,7 +1968,7 @@
 		kobase[i].pm_address = (uintptr_t)lf->address;
 		i++;
 	}
-	KLD_UNLOCK_READ();
+	sx_sunlock(&kld_sx);
 
 	KASSERT(i > 0, ("linker_hpwmc_list_objects: no kernel objects?"));
 
@@ -2003,7 +1994,7 @@
 	char *pathname;
 	int error;
 
-	KLD_LOCK_ASSERT();
+	sx_assert(&kld_sx, SA_XLOCKED);
 	if (modname == NULL) {
 		/*
  		 * We have to load KLD
@@ -2015,7 +2006,7 @@
 		if (modlist_lookup2(modname, verinfo) != NULL)
 			return (EEXIST);
 		if (kldname != NULL)
-			pathname = linker_strdup(kldname);
+			pathname = strdup(kldname, M_LINKER);
 		else if (rootvnode == NULL)
 			pathname = NULL;
 		else
@@ -2075,9 +2066,9 @@
 	int ver, error = 0, count;
 
 	/*
-	 * All files are dependant on /kernel.
+	 * All files are dependent on /kernel.
 	 */
-	KLD_LOCK_ASSERT();
+	sx_assert(&kld_sx, SA_XLOCKED);
 	if (linker_kernel_file) {
 		linker_kernel_file->refs++;
 		error = linker_file_add_dependency(lf, linker_kernel_file);
@@ -2169,16 +2160,16 @@
 	error = sysctl_wire_old_buffer(req, 0);
 	if (error != 0)
 		return (error);
-	KLD_LOCK();
+	sx_xlock(&kld_sx);
 	TAILQ_FOREACH(lf, &linker_files, link) {
 		error = LINKER_EACH_FUNCTION_NAME(lf,
 		    sysctl_kern_function_list_iterate, req);
 		if (error) {
-			KLD_UNLOCK();
+			sx_xunlock(&kld_sx);
 			return (error);
 		}
 	}
-	KLD_UNLOCK();
+	sx_xunlock(&kld_sx);
 	return (SYSCTL_OUT(req, "", 1));
 }
 

Modified: trunk/sys/kern/kern_lock.c
===================================================================
--- trunk/sys/kern/kern_lock.c	2018-05-25 20:46:51 UTC (rev 9944)
+++ trunk/sys/kern/kern_lock.c	2018-05-25 20:53:39 UTC (rev 9945)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * Copyright (c) 2008 Attilio Rao <attilio at FreeBSD.org>
  * All rights reserved.
@@ -32,9 +33,10 @@
 #include "opt_kdtrace.h"
 
 #include <sys/cdefs.h>
-__FBSDID("$MidnightBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/kern/kern_lock.c 310979 2016-12-31 16:37:47Z mjg $");
 
 #include <sys/param.h>
+#include <sys/kdb.h>
 #include <sys/ktr.h>
 #include <sys/lock.h>
 #include <sys/lock_profile.h>
@@ -67,11 +69,6 @@
 #define	SQ_EXCLUSIVE_QUEUE	0
 #define	SQ_SHARED_QUEUE		1
 
-#ifdef ADAPTIVE_LOCKMGRS
-#define	ALK_RETRIES		10
-#define	ALK_LOOPS		10000
-#endif
-
 #ifndef INVARIANTS
 #define	_lockmgr_assert(lk, what, file, line)
 #define	TD_LOCKS_INC(td)
@@ -120,10 +117,11 @@
 	}								\
 } while (0)
 
-#define	LK_CAN_SHARE(x)							\
-	(((x) & LK_SHARE) && (((x) & LK_EXCLUSIVE_WAITERS) == 0 ||	\
-	((x) & LK_EXCLUSIVE_SPINNERS) == 0 ||				\
-	curthread->td_lk_slocks || (curthread->td_pflags & TDP_DEADLKTREAT)))
+#define	LK_CAN_SHARE(x, flags)						\
+	(((x) & LK_SHARE) &&						\
+	(((x) & (LK_EXCLUSIVE_WAITERS | LK_EXCLUSIVE_SPINNERS)) == 0 ||	\
+	(curthread->td_lk_slocks != 0 && !(flags & LK_NODDLKTREAT)) ||	\
+	(curthread->td_pflags & TDP_DEADLKTREAT)))
 #define	LK_TRYOP(x)							\
 	((x) & LK_NOWAIT)
 
@@ -142,15 +140,16 @@
 #define	lockmgr_xlocked(lk)						\
 	(((lk)->lk_lock & ~(LK_FLAGMASK & ~LK_SHARE)) == (uintptr_t)curthread)
 
-static void	 assert_lockmgr(struct lock_object *lock, int how);
+static void	assert_lockmgr(const struct lock_object *lock, int how);
 #ifdef DDB
-static void	 db_show_lockmgr(struct lock_object *lock);
+static void	db_show_lockmgr(const struct lock_object *lock);
 #endif
-static void	 lock_lockmgr(struct lock_object *lock, int how);
+static void	lock_lockmgr(struct lock_object *lock, uintptr_t how);
 #ifdef KDTRACE_HOOKS
-static int	 owner_lockmgr(struct lock_object *lock, struct thread **owner);
+static int	owner_lockmgr(const struct lock_object *lock,
+		    struct thread **owner);
 #endif
-static int	 unlock_lockmgr(struct lock_object *lock);
+static uintptr_t unlock_lockmgr(struct lock_object *lock);
 
 struct lock_class lock_class_lockmgr = {
 	.lc_name = "lockmgr",
@@ -166,8 +165,17 @@
 #endif
 };
 
+#ifdef ADAPTIVE_LOCKMGRS
+static u_int alk_retries = 10;
+static u_int alk_loops = 10000;
+static SYSCTL_NODE(_debug, OID_AUTO, lockmgr, CTLFLAG_RD, NULL,
+    "lockmgr debugging");
+SYSCTL_UINT(_debug_lockmgr, OID_AUTO, retries, CTLFLAG_RW, &alk_retries, 0, "");
+SYSCTL_UINT(_debug_lockmgr, OID_AUTO, loops, CTLFLAG_RW, &alk_loops, 0, "");
+#endif
+
 static __inline struct thread *
-lockmgr_xholder(struct lock *lk)
+lockmgr_xholder(const struct lock *lk)
 {
 	uintptr_t x;
 
@@ -232,8 +240,6 @@
 	u_int realexslp;
 	int queue, wakeup_swapper;
 
-	TD_LOCKS_DEC(curthread);
-	TD_SLOCKS_DEC(curthread);
 	WITNESS_UNLOCK(&lk->lock_object, 0, file, line);
 	LOCK_LOG_LOCK("SUNLOCK", &lk->lock_object, 0, 0, file, line);
 
@@ -281,7 +287,7 @@
 		 * exclusive waiters bit anyway.
 		 * Please note that lk_exslpfail count may be lying about
 		 * the real number of waiters with the LK_SLEEPFAIL flag on
-		 * because they may be used in conjuction with interruptible
+		 * because they may be used in conjunction with interruptible
 		 * sleeps so lk_exslpfail might be considered an 'upper limit'
 		 * bound, including the edge cases.
 		 */
@@ -333,11 +339,13 @@
 	}
 
 	lock_profile_release_lock(&lk->lock_object);
+	TD_LOCKS_DEC(curthread);
+	TD_SLOCKS_DEC(curthread);
 	return (wakeup_swapper);
 }
 
 static void
-assert_lockmgr(struct lock_object *lock, int what)
+assert_lockmgr(const struct lock_object *lock, int what)
 {
 
 	panic("lockmgr locks do not support assertions");
@@ -344,13 +352,13 @@
 }
 
 static void
-lock_lockmgr(struct lock_object *lock, int how)
+lock_lockmgr(struct lock_object *lock, uintptr_t how)
 {
 
 	panic("lockmgr locks do not support sleep interlocking");
 }
 
-static int
+static uintptr_t
 unlock_lockmgr(struct lock_object *lock)
 {
 
@@ -359,7 +367,7 @@
 
 #ifdef KDTRACE_HOOKS
 static int
-owner_lockmgr(struct lock_object *lock, struct thread **owner)
+owner_lockmgr(const struct lock_object *lock, struct thread **owner)
 {
 
 	panic("lockmgr locks do not support owner inquiring");
@@ -387,14 +395,16 @@
 		iflags |= LO_WITNESS;
 	if (flags & LK_QUIET)
 		iflags |= LO_QUIET;
+	if (flags & LK_IS_VNODE)
+		iflags |= LO_IS_VNODE;
 	iflags |= flags & (LK_ADAPTIVE | LK_NOSHARE);
 
+	lock_init(&lk->lock_object, &lock_class_lockmgr, wmesg, NULL, iflags);
 	lk->lk_lock = LK_UNLOCKED;
 	lk->lk_recurse = 0;
 	lk->lk_exslpfail = 0;
 	lk->lk_timo = timo;
 	lk->lk_pri = pri;
-	lock_init(&lk->lock_object, &lock_class_lockmgr, wmesg, NULL, iflags);
 	STACK_ZERO(lk);
 }
 
@@ -411,6 +421,14 @@
 }
 
 void
+lockdisableshare(struct lock *lk)
+{
+
+	lockmgr_assert(lk, KA_XLOCKED);
+	lk->lock_object.lo_flags |= LK_NOSHARE;
+}
+
+void
 lockallowrecurse(struct lock *lk)
 {
 
@@ -472,6 +490,9 @@
 	KASSERT((flags & LK_INTERLOCK) == 0 || ilk != NULL,
 	    ("%s: LK_INTERLOCK passed without valid interlock @ %s:%d",
 	    __func__, file, line));
+	KASSERT(kdb_active != 0 || !TD_IS_IDLETHREAD(curthread),
+	    ("%s: idle thread %p on lockmgr %s @ %s:%d", __func__, curthread,
+	    lk->lock_object.lo_name, file, line));
 
 	class = (flags & LK_INTERLOCK) ? LOCK_CLASS(ilk) : NULL;
 	if (panicstr != NULL) {
@@ -486,6 +507,7 @@
 			op = LK_EXCLUSIVE;
 			break;
 		case LK_UPGRADE:
+		case LK_TRYUPGRADE:
 		case LK_DOWNGRADE:
 			_lockmgr_assert(lk, KA_XLOCKED | KA_NOTRECURSED,
 			    file, line);
@@ -500,7 +522,7 @@
 	case LK_SHARED:
 		if (LK_CAN_WITNESS(flags))
 			WITNESS_CHECKORDER(&lk->lock_object, LOP_NEWORDER,
-			    file, line, ilk);
+			    file, line, flags & LK_INTERLOCK ? ilk : NULL);
 		for (;;) {
 			x = lk->lk_lock;
 
@@ -511,7 +533,7 @@
 			 * waiters, if we fail to acquire the shared lock
 			 * loop back and retry.
 			 */
-			if (LK_CAN_SHARE(x)) {
+			if (LK_CAN_SHARE(x, flags)) {
 				if (atomic_cmpset_acq_ptr(&lk->lk_lock, x,
 				    x + LK_ONE_SHARER))
 					break;
@@ -562,6 +584,9 @@
 					CTR3(KTR_LOCK,
 					    "%s: spinning on %p held by %p",
 					    __func__, lk, owner);
+				KTR_STATE1(KTR_SCHED, "thread",
+				    sched_tdname(td), "spinning",
+				    "lockname:\"%s\"", lk->lock_object.lo_name);
 
 				/*
 				 * If we are holding also an interlock drop it
@@ -577,11 +602,16 @@
 				while (LK_HOLDER(lk->lk_lock) ==
 				    (uintptr_t)owner && TD_IS_RUNNING(owner))
 					cpu_spinwait();
+				KTR_STATE0(KTR_SCHED, "thread",
+				    sched_tdname(td), "running");
 				GIANT_RESTORE();
 				continue;
 			} else if (LK_CAN_ADAPT(lk, flags) &&
 			    (x & LK_SHARE) != 0 && LK_SHARERS(x) &&
-			    spintries < ALK_RETRIES) {
+			    spintries < alk_retries) {
+				KTR_STATE1(KTR_SCHED, "thread",
+				    sched_tdname(td), "spinning",
+				    "lockname:\"%s\"", lk->lock_object.lo_name);
 				if (flags & LK_INTERLOCK) {
 					class->lc_unlock(ilk);
 					flags &= ~LK_INTERLOCK;
@@ -588,7 +618,7 @@
 				}
 				GIANT_SAVE();
 				spintries++;
-				for (i = 0; i < ALK_LOOPS; i++) {
+				for (i = 0; i < alk_loops; i++) {
 					if (LOCK_LOG_TEST(&lk->lock_object, 0))
 						CTR4(KTR_LOCK,
 				    "%s: shared spinning on %p with %u and %u",
@@ -595,12 +625,14 @@
 						    __func__, lk, spintries, i);
 					x = lk->lk_lock;
 					if ((x & LK_SHARE) == 0 ||
-					    LK_CAN_SHARE(x) != 0)
+					    LK_CAN_SHARE(x, flags) != 0)
 						break;
 					cpu_spinwait();
 				}
+				KTR_STATE0(KTR_SCHED, "thread",
+				    sched_tdname(td), "running");
 				GIANT_RESTORE();
-				if (i != ALK_LOOPS)
+				if (i != alk_loops)
 					continue;
 			}
 #endif
@@ -616,7 +648,7 @@
 			 * if the lock can be acquired in shared mode, try
 			 * again.
 			 */
-			if (LK_CAN_SHARE(x)) {
+			if (LK_CAN_SHARE(x, flags)) {
 				sleepq_release(&lk->lock_object);
 				continue;
 			}
@@ -683,6 +715,7 @@
 		}
 		break;
 	case LK_UPGRADE:
+	case LK_TRYUPGRADE:
 		_lockmgr_assert(lk, KA_SLOCKED, file, line);
 		v = lk->lk_lock;
 		x = v & LK_ALL_WAITERS;
@@ -703,6 +736,17 @@
 		}
 
 		/*
+		 * In LK_TRYUPGRADE mode, do not drop the lock,
+		 * returning EBUSY instead.
+		 */
+		if (op == LK_TRYUPGRADE) {
+			LOCK_LOG2(lk, "%s: %p failed the nowait upgrade",
+			    __func__, lk);
+			error = EBUSY;
+			break;
+		}
+
+		/*
 		 * We have been unable to succeed in upgrading, so just
 		 * give up the shared lock.
 		 */
@@ -712,7 +756,8 @@
 	case LK_EXCLUSIVE:
 		if (LK_CAN_WITNESS(flags))
 			WITNESS_CHECKORDER(&lk->lock_object, LOP_NEWORDER |
-			    LOP_EXCLUSIVE, file, line, ilk);
+			    LOP_EXCLUSIVE, file, line, flags & LK_INTERLOCK ?
+			    ilk : NULL);
 
 		/*
 		 * If curthread already holds the lock and this one is
@@ -748,8 +793,10 @@
 			break;
 		}
 
-		while (!atomic_cmpset_acq_ptr(&lk->lk_lock, LK_UNLOCKED,
-		    tid)) {
+		for (;;) {
+			if (lk->lk_lock == LK_UNLOCKED &&
+			    atomic_cmpset_acq_ptr(&lk->lk_lock, LK_UNLOCKED, tid))
+				break;
 #ifdef HWPMC_HOOKS
 			PMC_SOFT_CALL( , , lock, failed);
 #endif
@@ -781,6 +828,9 @@
 					CTR3(KTR_LOCK,
 					    "%s: spinning on %p held by %p",
 					    __func__, lk, owner);
+				KTR_STATE1(KTR_SCHED, "thread",
+				    sched_tdname(td), "spinning",
+				    "lockname:\"%s\"", lk->lock_object.lo_name);
 
 				/*
 				 * If we are holding also an interlock drop it
@@ -796,15 +846,20 @@
 				while (LK_HOLDER(lk->lk_lock) ==
 				    (uintptr_t)owner && TD_IS_RUNNING(owner))
 					cpu_spinwait();
+				KTR_STATE0(KTR_SCHED, "thread",
+				    sched_tdname(td), "running");
 				GIANT_RESTORE();
 				continue;
 			} else if (LK_CAN_ADAPT(lk, flags) &&
 			    (x & LK_SHARE) != 0 && LK_SHARERS(x) &&
-			    spintries < ALK_RETRIES) {
+			    spintries < alk_retries) {
 				if ((x & LK_EXCLUSIVE_SPINNERS) == 0 &&
 				    !atomic_cmpset_ptr(&lk->lk_lock, x,
 				    x | LK_EXCLUSIVE_SPINNERS))
 					continue;
+				KTR_STATE1(KTR_SCHED, "thread",
+				    sched_tdname(td), "spinning",
+				    "lockname:\"%s\"", lk->lock_object.lo_name);
 				if (flags & LK_INTERLOCK) {
 					class->lc_unlock(ilk);
 					flags &= ~LK_INTERLOCK;
@@ -811,7 +866,7 @@
 				}
 				GIANT_SAVE();
 				spintries++;
-				for (i = 0; i < ALK_LOOPS; i++) {
+				for (i = 0; i < alk_loops; i++) {
 					if (LOCK_LOG_TEST(&lk->lock_object, 0))
 						CTR4(KTR_LOCK,
 				    "%s: shared spinning on %p with %u and %u",
@@ -821,8 +876,10 @@
 						break;
 					cpu_spinwait();
 				}
+				KTR_STATE0(KTR_SCHED, "thread",
+				    sched_tdname(td), "running");
 				GIANT_RESTORE();
-				if (i != ALK_LOOPS)
+				if (i != alk_loops)
 					continue;
 			}
 #endif
@@ -928,9 +985,19 @@
 		}
 		break;
 	case LK_DOWNGRADE:
-		_lockmgr_assert(lk, KA_XLOCKED | KA_NOTRECURSED, file, line);
+		_lockmgr_assert(lk, KA_XLOCKED, file, line);
 		LOCK_LOG_LOCK("XDOWNGRADE", &lk->lock_object, 0, 0, file, line);
 		WITNESS_DOWNGRADE(&lk->lock_object, 0, file, line);
+
+		/*
+		 * Panic if the lock is recursed.
+		 */
+		if (lockmgr_xlocked(lk) && lockmgr_recursed(lk)) {
+			if (flags & LK_INTERLOCK)
+				class->lc_unlock(ilk);
+			panic("%s: downgrade a recursed lockmgr %s @ %s:%d\n",
+			    __func__, iwmesg, file, line);
+		}
 		TD_SLOCKS_INC(curthread);
 
 		/*
@@ -999,7 +1066,7 @@
 			 * Please note that lk_exslpfail count may be lying
 			 * about the real number of waiters with the
 			 * LK_SLEEPFAIL flag on because they may be used in
-			 * conjuction with interruptible sleeps so
+			 * conjunction with interruptible sleeps so
 			 * lk_exslpfail might be considered an 'upper limit'
 			 * bound, including the edge cases.
 			 */
@@ -1051,7 +1118,8 @@
 	case LK_DRAIN:
 		if (LK_CAN_WITNESS(flags))
 			WITNESS_CHECKORDER(&lk->lock_object, LOP_NEWORDER |
-			    LOP_EXCLUSIVE, file, line, ilk);
+			    LOP_EXCLUSIVE, file, line, flags & LK_INTERLOCK ?
+			    ilk : NULL);
 
 		/*
 		 * Trying to drain a lock we already own will result in a
@@ -1064,7 +1132,11 @@
 			    __func__, iwmesg, file, line);
 		}
 
-		while (!atomic_cmpset_acq_ptr(&lk->lk_lock, LK_UNLOCKED, tid)) {
+		for (;;) {
+			if (lk->lk_lock == LK_UNLOCKED &&
+			    atomic_cmpset_acq_ptr(&lk->lk_lock, LK_UNLOCKED, tid))
+				break;
+
 #ifdef HWPMC_HOOKS
 			PMC_SOFT_CALL( , , lock, failed);
 #endif
@@ -1111,7 +1183,7 @@
 				 * Please note that lk_exslpfail count may be
 				 * lying about the real number of waiters with
 				 * the LK_SLEEPFAIL flag on because they may
-				 * be used in conjuction with interruptible
+				 * be used in conjunction with interruptible
 				 * sleeps so lk_exslpfail might be considered
 				 * an 'upper limit' bound, including the edge
 				 * cases.
@@ -1248,9 +1320,16 @@
 		return;
 
 	tid = (uintptr_t)curthread;
-	_lockmgr_assert(lk, KA_XLOCKED | KA_NOTRECURSED, file, line);
+	_lockmgr_assert(lk, KA_XLOCKED, file, line);
 
 	/*
+	 * Panic if the lock is recursed.
+	 */
+	if (lockmgr_xlocked(lk) && lockmgr_recursed(lk))
+		panic("%s: disown a recursed lockmgr @ %s:%d\n",
+		    __func__,  file, line);
+
+	/*
 	 * If the owner is already LK_KERNPROC just skip the whole operation.
 	 */
 	if (LK_HOLDER(lk->lk_lock) != tid)
@@ -1276,7 +1355,7 @@
 }
 
 void
-lockmgr_printinfo(struct lock *lk)
+lockmgr_printinfo(const struct lock *lk)
 {
 	struct thread *td;
 	uintptr_t x;
@@ -1289,8 +1368,14 @@
 		    (uintmax_t)LK_SHARERS(lk->lk_lock));
 	else {
 		td = lockmgr_xholder(lk);
-		printf("lock type %s: EXCL by thread %p (pid %d)\n",
-		    lk->lock_object.lo_name, td, td->td_proc->p_pid);
+		if (td == (struct thread *)LK_KERNPROC)
+			printf("lock type %s: EXCL by KERNPROC\n",
+			    lk->lock_object.lo_name);
+		else
+			printf("lock type %s: EXCL by thread %p "
+			    "(pid %d, %s, tid %d)\n", lk->lock_object.lo_name,
+			    td, td->td_proc->p_pid, td->td_proc->p_comm,
+			    td->td_tid);
 	}
 
 	x = lk->lk_lock;
@@ -1305,7 +1390,7 @@
 }
 
 int
-lockstatus(struct lock *lk)
+lockstatus(const struct lock *lk)
 {
 	uintptr_t v, x;
 	int ret;
@@ -1335,7 +1420,7 @@
 #endif
 
 void
-_lockmgr_assert(struct lock *lk, int what, const char *file, int line)
+_lockmgr_assert(const struct lock *lk, int what, const char *file, int line)
 {
 	int slocked = 0;
 
@@ -1428,12 +1513,12 @@
 }
 
 static void
-db_show_lockmgr(struct lock_object *lock)
+db_show_lockmgr(const struct lock_object *lock)
 {
 	struct thread *td;
-	struct lock *lk;
+	const struct lock *lk;
 
-	lk = (struct lock *)lock;
+	lk = (const struct lock *)lock;
 
 	db_printf(" state: ");
 	if (lk->lk_lock == LK_UNLOCKED)

Modified: trunk/sys/kern/kern_lockf.c
===================================================================
--- trunk/sys/kern/kern_lockf.c	2018-05-25 20:46:51 UTC (rev 9944)
+++ trunk/sys/kern/kern_lockf.c	2018-05-25 20:53:39 UTC (rev 9945)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * Copyright (c) 2008 Isilon Inc http://www.isilon.com/
  * Authors: Doug Rabson <dfr at rabson.org>
@@ -59,7 +60,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$MidnightBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/kern/kern_lockf.c 313729 2017-02-14 13:45:38Z avg $");
 
 #include "opt_debug_lockf.h"
 
@@ -362,7 +363,7 @@
 	struct lock_owner *lo = lock->lf_owner;
 	if (lo) {
 		KASSERT(LIST_EMPTY(&lock->lf_outedges),
-		    ("freeing lock with dependancies"));
+		    ("freeing lock with dependencies"));
 		KASSERT(LIST_EMPTY(&lock->lf_inedges),
 		    ("freeing lock with dependants"));
 		sx_xlock(&lf_lock_owners_lock);
@@ -469,6 +470,9 @@
 			return (EOVERFLOW);
 		end = start + oadd;
 	}
+
+retry_setlock:
+
 	/*
 	 * Avoid the common case of unlocking when inode has no locks.
 	 */
@@ -684,7 +688,7 @@
 		break;
 	}
 
-#ifdef INVARIANTS
+#ifdef DIAGNOSTIC
 	/*
 	 * Check for some can't happen stuff. In this case, the active
 	 * lock list becoming disordered or containing mutually
@@ -745,6 +749,11 @@
 		free(freestate, M_LOCKF);
 		freestate = NULL;
 	}
+
+	if (error == EDOOFUS) {
+		KASSERT(ap->a_op == F_SETLK, ("EDOOFUS"));
+		goto retry_setlock;
+	}
 	return (error);
 }
 
@@ -819,7 +828,7 @@
 
 		/*
 		 * We can just free all the active locks since they
-		 * will have no dependancies (we removed them all
+		 * will have no dependencies (we removed them all
 		 * above). We don't need to bother locking since we
 		 * are the last thread using this state structure.
 		 */
@@ -907,7 +916,7 @@
 	struct lockf_edge *e;
 	int error;
 
-#ifdef INVARIANTS
+#ifdef DIAGNOSTIC
 	LIST_FOREACH(e, &x->lf_outedges, le_outlink)
 		KASSERT(e->le_to != y, ("adding lock edge twice"));
 #endif
@@ -1104,7 +1113,7 @@
 
 /*
  * Wake up a sleeping lock and remove it from the pending list now
- * that all its dependancies have been resolved. The caller should
+ * that all its dependencies have been resolved. The caller should
  * arrange for the lock to be added to the active list, adjusting any
  * existing locks for the same owner as needed.
  */
@@ -1129,9 +1138,9 @@
 }
 
 /*
- * Re-check all dependant locks and remove edges to locks that we no
+ * Re-check all dependent locks and remove edges to locks that we no
  * longer block. If 'all' is non-zero, the lock has been removed and
- * we must remove all the dependancies, otherwise it has simply been
+ * we must remove all the dependencies, otherwise it has simply been
  * reduced but remains active. Any pending locks which have been been
  * unblocked are added to 'granted'
  */
@@ -1157,7 +1166,7 @@
 }
 
 /*
- * Set the start of an existing active lock, updating dependancies and
+ * Set the start of an existing active lock, updating dependencies and
  * adding any newly woken locks to 'granted'.
  */
 static void
@@ -1173,7 +1182,7 @@
 }
 
 /*
- * Set the end of an existing active lock, updating dependancies and
+ * Set the end of an existing active lock, updating dependencies and
  * adding any newly woken locks to 'granted'.
  */
 static void
@@ -1196,7 +1205,7 @@
  * pending locks as a result of downgrading/unlocking. We simply
  * activate the newly granted locks by looping.
  *
- * Since the new lock already has its dependancies set up, we always
+ * Since the new lock already has its dependencies set up, we always
  * add it to the list (unless its an unlock request). This may
  * fragment the lock list in some pathological cases but its probably
  * not a real problem.
@@ -1324,7 +1333,7 @@
 	 * may allow some other pending lock to become
 	 * active. Consider this case:
 	 *
-	 * Owner	Action		Result		Dependancies
+	 * Owner	Action		Result		Dependencies
 	 * 
 	 * A:		lock [0..0]	succeeds	
 	 * B:		lock [2..2]	succeeds	
@@ -1460,7 +1469,7 @@
 		lock->lf_refs++;
 		error = sx_sleep(lock, &state->ls_lock, priority, lockstr, 0);
 		if (lf_free_lock(lock)) {
-			error = EINTR;
+			error = EDOOFUS;
 			goto out;
 		}
 
@@ -1832,7 +1841,7 @@
 	/*
 	 * This cannot cause a deadlock since any edges we would add
 	 * to splitlock already exist in lock1. We must be sure to add
-	 * necessary dependancies to splitlock before we reduce lock1
+	 * necessary dependencies to splitlock before we reduce lock1
 	 * otherwise we may accidentally grant a pending lock that
 	 * was blocked by the tail end of lock1.
 	 */

Modified: trunk/sys/kern/kern_lockstat.c
===================================================================
--- trunk/sys/kern/kern_lockstat.c	2018-05-25 20:46:51 UTC (rev 9944)
+++ trunk/sys/kern/kern_lockstat.c	2018-05-25 20:53:39 UTC (rev 9945)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * Copyright 2008-2009 Stacey Son <sson at FreeBSD.org>
  *
@@ -22,7 +23,7 @@
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
- * $MidnightBSD$
+ * $FreeBSD: stable/10/sys/kern/kern_lockstat.c 285759 2015-07-21 17:16:37Z markj $
  */
 
 /*
@@ -36,9 +37,10 @@
 
 #ifdef KDTRACE_HOOKS
 
-#include <sys/time.h>
 #include <sys/types.h>
+#include <sys/lock.h>
 #include <sys/lockstat.h>
+#include <sys/time.h>
 
 /*
  * The following must match the type definition of dtrace_probe.  It is  
@@ -47,14 +49,19 @@
 uint32_t lockstat_probemap[LS_NPROBES];
 void (*lockstat_probe_func)(uint32_t, uintptr_t, uintptr_t,
     uintptr_t, uintptr_t, uintptr_t);
+int lockstat_enabled = 0;
 
-
 uint64_t 
-lockstat_nsecs(void)
+lockstat_nsecs(struct lock_object *lo)
 {
 	struct bintime bt;
 	uint64_t ns;
 
+	if (!lockstat_enabled)
+		return (0);
+	if ((lo->lo_flags & LO_NOPROFILE) != 0)
+		return (0);
+
 	binuptime(&bt);
 	ns = bt.sec * (uint64_t)1000000000;
 	ns += ((uint64_t)1000000000 * (uint32_t)(bt.frac >> 32)) >> 32;

Modified: trunk/sys/kern/kern_loginclass.c
===================================================================
--- trunk/sys/kern/kern_loginclass.c	2018-05-25 20:46:51 UTC (rev 9944)
+++ trunk/sys/kern/kern_loginclass.c	2018-05-25 20:53:39 UTC (rev 9945)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * Copyright (c) 2011 The FreeBSD Foundation
  * All rights reserved.
@@ -26,7 +27,7 @@
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
- * $MidnightBSD$
+ * $FreeBSD: stable/10/sys/kern/kern_loginclass.c 302229 2016-06-27 21:25:01Z bdrewery $
  */
 
 /*
@@ -43,7 +44,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$MidnightBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/kern/kern_loginclass.c 302229 2016-06-27 21:25:01Z bdrewery $");
 
 #include <sys/param.h>
 #include <sys/eventhandler.h>
@@ -69,10 +70,8 @@
  * Lock protecting loginclasses list.
  */
 static struct mtx loginclasses_lock;
+MTX_SYSINIT(loginclasses_init, &loginclasses_lock, "loginclasses lock", MTX_DEF);
 
-static void lc_init(void);
-SYSINIT(loginclass, SI_SUB_CPU, SI_ORDER_FIRST, lc_init, NULL);
-
 void
 loginclass_hold(struct loginclass *lc)
 {
@@ -207,7 +206,7 @@
 	PROC_LOCK(p);
 	oldcred = crcopysafe(p, newcred);
 	newcred->cr_loginclass = newlc;
-	p->p_ucred = newcred;
+	proc_set_cred(p, newcred);
 	PROC_UNLOCK(p);
 #ifdef RACCT
 	racct_proc_ucred_changed(p, oldcred, newcred);
@@ -229,10 +228,3 @@
 		(callback)(lc->lc_racct, arg2, arg3);
 	mtx_unlock(&loginclasses_lock);
 }
-
-static void
-lc_init(void)
-{
-
-	mtx_init(&loginclasses_lock, "loginclasses lock", NULL, MTX_DEF);
-}

Modified: trunk/sys/kern/kern_malloc.c
===================================================================
--- trunk/sys/kern/kern_malloc.c	2018-05-25 20:46:51 UTC (rev 9944)
+++ trunk/sys/kern/kern_malloc.c	2018-05-25 20:53:39 UTC (rev 9945)
@@ -1,7 +1,9 @@
+/* $MidnightBSD$ */
 /*-
  * Copyright (c) 1987, 1991, 1993
  *	The Regents of the University of California.
  * Copyright (c) 2005-2009 Robert N. M. Watson
+ * Copyright (c) 2008 Otto Moerbeek <otto at drijf.net> (mallocarray)
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -43,7 +45,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$MidnightBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/kern/kern_malloc.c 328276 2018-01-23 04:37:31Z kp $");
 
 #include "opt_ddb.h"
 #include "opt_kdtrace.h"
@@ -55,7 +57,6 @@
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
-#include <sys/mbuf.h>
 #include <sys/mutex.h>
 #include <sys/vmmeter.h>
 #include <sys/proc.h>
@@ -62,9 +63,11 @@
 #include <sys/sbuf.h>
 #include <sys/sysctl.h>
 #include <sys/time.h>
+#include <sys/vmem.h>
 
 #include <vm/vm.h>
 #include <vm/pmap.h>
+#include <vm/vm_pageout.h>
 #include <vm/vm_param.h>
 #include <vm/vm_kern.h>
 #include <vm/vm_extern.h>
@@ -113,14 +116,7 @@
 MALLOC_DEFINE(M_IP6OPT, "ip6opt", "IPv6 options");
 MALLOC_DEFINE(M_IP6NDP, "ip6ndp", "IPv6 Neighbor Discovery");
 
-static void kmeminit(void *);
-SYSINIT(kmem, SI_SUB_KMEM, SI_ORDER_FIRST, kmeminit, NULL);
-
-static MALLOC_DEFINE(M_FREE, "free", "should be on free list");
-
 static struct malloc_type *kmemstatistics;
-static vm_offset_t kmembase;
-static vm_offset_t kmemlimit;
 static int kmemcount;
 
 #define KMEM_ZSHIFT	4
@@ -127,7 +123,7 @@
 #define KMEM_ZBASE	16
 #define KMEM_ZMASK	(KMEM_ZBASE - 1)
 
-#define KMEM_ZMAX	PAGE_SIZE
+#define KMEM_ZMAX	65536
 #define KMEM_ZSIZE	(KMEM_ZMAX >> KMEM_ZSHIFT)
 static uint8_t kmemsize[KMEM_ZSIZE + 1];
 
@@ -158,21 +154,10 @@
 	{1024, "1024", },
 	{2048, "2048", },
 	{4096, "4096", },
-#if PAGE_SIZE > 4096
 	{8192, "8192", },
-#if PAGE_SIZE > 8192
 	{16384, "16384", },
-#if PAGE_SIZE > 16384
 	{32768, "32768", },
-#if PAGE_SIZE > 32768
 	{65536, "65536", },
-#if PAGE_SIZE > 65536
-#error	"Unsupported PAGE_SIZE"
-#endif	/* 65536 */
-#endif	/* 32768 */
-#endif	/* 16384 */
-#endif	/* 8192 */
-#endif	/* 4096 */
 	{0, NULL},
 };
 
@@ -190,6 +175,10 @@
 SYSCTL_ULONG(_vm, OID_AUTO, kmem_size, CTLFLAG_RDTUN, &vm_kmem_size, 0,
     "Size of kernel memory");
 
+static u_long kmem_zmax = KMEM_ZMAX;
+SYSCTL_ULONG(_vm, OID_AUTO, kmem_zmax, CTLFLAG_RDTUN, &kmem_zmax, 0,
+    "Maximum allocation size that malloc(9) would use UMA as backend");
+
 static u_long vm_kmem_size_min;
 SYSCTL_ULONG(_vm, OID_AUTO, kmem_size_min, CTLFLAG_RDTUN, &vm_kmem_size_min, 0,
     "Minimum size of kernel memory");
@@ -205,12 +194,12 @@
 static int sysctl_kmem_map_size(SYSCTL_HANDLER_ARGS);
 SYSCTL_PROC(_vm, OID_AUTO, kmem_map_size,
     CTLFLAG_RD | CTLTYPE_ULONG | CTLFLAG_MPSAFE, NULL, 0,
-    sysctl_kmem_map_size, "LU", "Current kmem_map allocation size");
+    sysctl_kmem_map_size, "LU", "Current kmem allocation size");
 
 static int sysctl_kmem_map_free(SYSCTL_HANDLER_ARGS);
 SYSCTL_PROC(_vm, OID_AUTO, kmem_map_free,
     CTLFLAG_RD | CTLTYPE_ULONG | CTLFLAG_MPSAFE, NULL, 0,
-    sysctl_kmem_map_free, "LU", "Largest contiguous free range in kmem_map");
+    sysctl_kmem_map_free, "LU", "Free space in kmem");
 
 /*
  * The malloc_mtx protects the kmemstatistics linked list.
@@ -255,7 +244,7 @@
 {
 	u_long size;
 
-	size = kmem_map->size;
+	size = vmem_size(kmem_arena, VMEM_ALLOC);
 	return (sysctl_handle_long(oidp, &size, 0, req));
 }
 
@@ -264,10 +253,7 @@
 {
 	u_long size;
 
-	vm_map_lock_read(kmem_map);
-	size = kmem_map->root != NULL ? kmem_map->root->max_free :
-	    kmem_map->max_offset - kmem_map->min_offset;
-	vm_map_unlock_read(kmem_map);
+	size = vmem_size(kmem_arena, VMEM_FREE);
 	return (sysctl_handle_long(oidp, &size, 0, req));
 }
 
@@ -408,6 +394,43 @@
 }
 
 /*
+ *	contigmalloc:
+ *
+ *	Allocate a block of physically contiguous memory.
+ *
+ *	If M_NOWAIT is set, this routine will not block and return NULL if
+ *	the allocation fails.
+ */
+void *
+contigmalloc(unsigned long size, struct malloc_type *type, int flags,
+    vm_paddr_t low, vm_paddr_t high, unsigned long alignment,
+    vm_paddr_t boundary)
+{
+	void *ret;
+
+	ret = (void *)kmem_alloc_contig(kernel_arena, size, flags, low, high,
+	    alignment, boundary, VM_MEMATTR_DEFAULT);
+	if (ret != NULL)
+		malloc_type_allocated(type, round_page(size));
+	return (ret);
+}
+
+/*
+ *	contigfree:
+ *
+ *	Free a block of memory allocated by contigmalloc.
+ *
+ *	This routine may not block.
+ */
+void
+contigfree(void *addr, unsigned long size, struct malloc_type *type)
+{
+
+	kmem_free(kernel_arena, (vm_offset_t)addr, size);
+	malloc_type_freed(type, round_page(size));
+}
+
+/*
  *	malloc:
  *
  *	Allocate a block of memory.
@@ -458,7 +481,7 @@
 		   ("malloc(M_WAITOK) in interrupt context"));
 
 #ifdef DEBUG_MEMGUARD
-	if (memguard_cmp(mtp, size)) {
+	if (memguard_cmp_mtp(mtp, size)) {
 		va = memguard_alloc(size, flags);
 		if (va != NULL)
 			return (va);
@@ -470,7 +493,7 @@
 	size = redzone_size_ntor(size);
 #endif
 
-	if (size <= KMEM_ZMAX) {
+	if (size <= kmem_zmax) {
 		mtip = mtp->ks_handle;
 		if (size & KMEM_ZMASK)
 			size = (size & ~KMEM_ZMASK) + KMEM_ZBASE;
@@ -508,6 +531,16 @@
 	return ((void *) va);
 }
 
+void *
+mallocarray(size_t nmemb, size_t size, struct malloc_type *type, int flags)
+{
+
+	if (WOULD_OVERFLOW(nmemb, size))
+		panic("mallocarray: %zu * %zu overflowed", nmemb, size);
+
+	return (malloc(size * nmemb, type, flags));
+}
+
 /*
  *	free:
  *
@@ -545,7 +578,6 @@
 		panic("free: address %p(%p) has not been allocated.\n",
 		    addr, (void *)((u_long)addr & (~UMA_SLAB_MASK)));
 
-
 	if (!(slab->us_flags & UMA_SLAB_MALLOC)) {
 #ifdef INVARIANTS
 		struct malloc_type **mtpp = addr;
@@ -647,45 +679,59 @@
 }
 
 /*
- * Initialize the kernel memory allocator
+ * Wake the uma reclamation pagedaemon thread when we exhaust KVA.  It
+ * will call the lowmem handler and uma_reclaim() callbacks in a
+ * context that is safe.
  */
-/* ARGSUSED*/
 static void
-kmeminit(void *dummy)
+kmem_reclaim(vmem_t *vm, int flags)
 {
-	uint8_t indx;
+
+	uma_reclaim_wakeup();
+	pagedaemon_wakeup();
+}
+
+CTASSERT(VM_KMEM_SIZE_SCALE >= 1);
+
+/*
+ * Initialize the kernel memory (kmem) arena.
+ */
+void
+kmeminit(void)
+{
 	u_long mem_size, tmp;
-	int i;
- 
-	mtx_init(&malloc_mtx, "malloc", NULL, MTX_DEF);
 
 	/*
-	 * Try to auto-tune the kernel memory size, so that it is
-	 * more applicable for a wider range of machine sizes.  The
-	 * VM_KMEM_SIZE_MAX is dependent on the maximum KVA space
-	 * available.
+	 * Calculate the amount of kernel virtual address (KVA) space that is
+	 * preallocated to the kmem arena.  In order to support a wide range
+	 * of machines, it is a function of the physical memory size,
+	 * specifically,
 	 *
-	 * Note that the kmem_map is also used by the zone allocator,
-	 * so make sure that there is enough space.
+	 *	min(max(physical memory size / VM_KMEM_SIZE_SCALE,
+	 *	    VM_KMEM_SIZE_MIN), VM_KMEM_SIZE_MAX)
+	 *
+	 * Every architecture must define an integral value for
+	 * VM_KMEM_SIZE_SCALE.  However, the definitions of VM_KMEM_SIZE_MIN
+	 * and VM_KMEM_SIZE_MAX, which represent respectively the floor and
+	 * ceiling on this preallocation, are optional.  Typically,
+	 * VM_KMEM_SIZE_MAX is itself a function of the available KVA space on
+	 * a given architecture.
 	 */
-	vm_kmem_size = VM_KMEM_SIZE + nmbclusters * PAGE_SIZE;
 	mem_size = cnt.v_page_count;
 
-#if defined(VM_KMEM_SIZE_SCALE)
 	vm_kmem_size_scale = VM_KMEM_SIZE_SCALE;
-#endif
 	TUNABLE_INT_FETCH("vm.kmem_size_scale", &vm_kmem_size_scale);
-	if (vm_kmem_size_scale > 0 &&
-	    (mem_size / vm_kmem_size_scale) > (vm_kmem_size / PAGE_SIZE))
-		vm_kmem_size = (mem_size / vm_kmem_size_scale) * PAGE_SIZE;
+	if (vm_kmem_size_scale < 1)
+		vm_kmem_size_scale = VM_KMEM_SIZE_SCALE;
 
+	vm_kmem_size = (mem_size / vm_kmem_size_scale) * PAGE_SIZE;
+
 #if defined(VM_KMEM_SIZE_MIN)
 	vm_kmem_size_min = VM_KMEM_SIZE_MIN;
 #endif
 	TUNABLE_ULONG_FETCH("vm.kmem_size_min", &vm_kmem_size_min);
-	if (vm_kmem_size_min > 0 && vm_kmem_size < vm_kmem_size_min) {
+	if (vm_kmem_size_min > 0 && vm_kmem_size < vm_kmem_size_min)
 		vm_kmem_size = vm_kmem_size_min;
-	}
 
 #if defined(VM_KMEM_SIZE_MAX)
 	vm_kmem_size_max = VM_KMEM_SIZE_MAX;
@@ -694,26 +740,29 @@
 	if (vm_kmem_size_max > 0 && vm_kmem_size >= vm_kmem_size_max)
 		vm_kmem_size = vm_kmem_size_max;
 
-	/* Allow final override from the kernel environment */
-	TUNABLE_ULONG_FETCH("vm.kmem_size", &vm_kmem_size);
-
 	/*
-	 * Limit kmem virtual size to twice the physical memory.
-	 * This allows for kmem map sparseness, but limits the size
-	 * to something sane.  Be careful to not overflow the 32bit
-	 * ints while doing the check or the adjustment.
+	 * Alternatively, the amount of KVA space that is preallocated to the
+	 * kmem arena can be set statically at compile-time or manually
+	 * through the kernel environment.  However, it is still limited to
+	 * twice the physical memory size, which has been sufficient to handle
+	 * the most severe cases of external fragmentation in the kmem arena. 
 	 */
+#if defined(VM_KMEM_SIZE)
+	vm_kmem_size = VM_KMEM_SIZE;
+#endif
+	TUNABLE_ULONG_FETCH("vm.kmem_size", &vm_kmem_size);
 	if (vm_kmem_size / 2 / PAGE_SIZE > mem_size)
 		vm_kmem_size = 2 * mem_size * PAGE_SIZE;
 
+	vm_kmem_size = round_page(vm_kmem_size);
 #ifdef DEBUG_MEMGUARD
 	tmp = memguard_fudge(vm_kmem_size, kernel_map);
 #else
 	tmp = vm_kmem_size;
 #endif
-	kmem_map = kmem_suballoc(kernel_map, &kmembase, &kmemlimit,
-	    tmp, TRUE);
-	kmem_map->system_map = 1;
+	vmem_init(kmem_arena, "kmem arena", kva_alloc(tmp), tmp, PAGE_SIZE,
+	    0, 0);
+	vmem_set_reclaim(kmem_arena, kmem_reclaim);
 
 #ifdef DEBUG_MEMGUARD
 	/*
@@ -721,11 +770,29 @@
 	 * replacement allocator used for detecting tamper-after-free
 	 * scenarios as they occur.  It is only used for debugging.
 	 */
-	memguard_init(kmem_map);
+	memguard_init(kmem_arena);
 #endif
+}
 
+/*
+ * Initialize the kernel memory allocator
+ */
+/* ARGSUSED*/
+static void
+mallocinit(void *dummy)
+{
+	int i;
+	uint8_t indx;
+
+	mtx_init(&malloc_mtx, "malloc", NULL, MTX_DEF);
+
+	kmeminit();
+
 	uma_startup2();
 
+	if (kmem_zmax < PAGE_SIZE || kmem_zmax > KMEM_ZMAX)
+		kmem_zmax = KMEM_ZMAX;
+
 	mt_zone = uma_zcreate("mt_zone", sizeof(struct malloc_type_internal),
 #ifdef INVARIANTS
 	    mtrash_ctor, mtrash_dtor, mtrash_init, mtrash_fini,
@@ -750,9 +817,10 @@
 		}		    
 		for (;i <= size; i+= KMEM_ZBASE)
 			kmemsize[i >> KMEM_ZSHIFT] = indx;
-		
+
 	}
 }
+SYSINIT(kmem, SI_SUB_KMEM, SI_ORDER_FIRST, mallocinit, NULL);
 
 void
 malloc_init(void *data)

Modified: trunk/sys/kern/kern_mbuf.c
===================================================================
--- trunk/sys/kern/kern_mbuf.c	2018-05-25 20:46:51 UTC (rev 9944)
+++ trunk/sys/kern/kern_mbuf.c	2018-05-25 20:53:39 UTC (rev 9945)
@@ -1,6 +1,7 @@
+/* $MidnightBSD$ */
 /*-
  * Copyright (c) 2004, 2005,
- * 	Bosko Milekic <bmilekic at FreeBSD.org>.  All rights reserved.
+ *	Bosko Milekic <bmilekic at FreeBSD.org>.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -26,7 +27,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$MidnightBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/kern/kern_mbuf.c 302234 2016-06-27 21:50:30Z bdrewery $");
 
 #include "opt_param.h"
 
@@ -47,6 +48,7 @@
 #include <vm/vm_extern.h>
 #include <vm/vm_kern.h>
 #include <vm/vm_page.h>
+#include <vm/vm_map.h>
 #include <vm/uma.h>
 #include <vm/uma_int.h>
 #include <vm/uma_dbg.h>
@@ -76,7 +78,7 @@
  *  [ Cluster Zone  ]   [     Zone     ]   [ Mbuf Master Zone ]
  *        |                       \________         |
  *  [ Cluster Keg   ]                      \       /
- *        |    	                         [ Mbuf Keg   ]
+ *        |	                         [ Mbuf Keg   ]
  *  [ Cluster Slabs ]                         |
  *        |                              [ Mbuf Slabs ]
  *         \____________(VM)_________________/
@@ -91,44 +93,67 @@
  *
  * Whenever an object is allocated from the underlying global
  * memory pool it gets pre-initialized with the _zinit_ functions.
- * When the Keg's are overfull objects get decomissioned with
+ * When the Keg's are overfull objects get decommissioned with
  * _zfini_ functions and free'd back to the global memory pool.
  *
  */
 
+int nmbufs;			/* limits number of mbufs */
 int nmbclusters;		/* limits number of mbuf clusters */
 int nmbjumbop;			/* limits number of page size jumbo clusters */
 int nmbjumbo9;			/* limits number of 9k jumbo clusters */
 int nmbjumbo16;			/* limits number of 16k jumbo clusters */
-struct mbstat mbstat;
 
+static quad_t maxmbufmem;	/* overall real memory limit for all mbufs */
+
+SYSCTL_QUAD(_kern_ipc, OID_AUTO, maxmbufmem, CTLFLAG_RDTUN, &maxmbufmem, 0,
+    "Maximum real memory allocatable to various mbuf types");
+
 /*
- * tunable_mbinit() has to be run before init_maxsockets() thus
- * the SYSINIT order below is SI_ORDER_MIDDLE while init_maxsockets()
- * runs at SI_ORDER_ANY.
+ * tunable_mbinit() has to be run before any mbuf allocations are done.
  */
 static void
 tunable_mbinit(void *dummy)
 {
+	quad_t realmem;
 
-	/* This has to be done before VM init. */
+	/*
+	 * The default limit for all mbuf related memory is 1/2 of all
+	 * available kernel memory (physical or kmem).
+	 * At most it can be 3/4 of available kernel memory.
+	 */
+	realmem = qmin((quad_t)physmem * PAGE_SIZE, vm_kmem_size);
+	maxmbufmem = realmem / 2;
+	TUNABLE_QUAD_FETCH("kern.ipc.maxmbufmem", &maxmbufmem);
+	if (maxmbufmem > realmem / 4 * 3)
+		maxmbufmem = realmem / 4 * 3;
+
 	TUNABLE_INT_FETCH("kern.ipc.nmbclusters", &nmbclusters);
 	if (nmbclusters == 0)
-		nmbclusters = 1024 + maxusers * 64;
+		nmbclusters = maxmbufmem / MCLBYTES / 4;
 
 	TUNABLE_INT_FETCH("kern.ipc.nmbjumbop", &nmbjumbop);
 	if (nmbjumbop == 0)
-		nmbjumbop = nmbclusters / 2;
+		nmbjumbop = maxmbufmem / MJUMPAGESIZE / 4;
 
 	TUNABLE_INT_FETCH("kern.ipc.nmbjumbo9", &nmbjumbo9);
 	if (nmbjumbo9 == 0)
-		nmbjumbo9 = nmbclusters / 4;
+		nmbjumbo9 = maxmbufmem / MJUM9BYTES / 6;
 
 	TUNABLE_INT_FETCH("kern.ipc.nmbjumbo16", &nmbjumbo16);
 	if (nmbjumbo16 == 0)
-		nmbjumbo16 = nmbclusters / 8;
+		nmbjumbo16 = maxmbufmem / MJUM16BYTES / 6;
+
+	/*
+	 * We need at least as many mbufs as we have clusters of
+	 * the various types added together.
+	 */
+	TUNABLE_INT_FETCH("kern.ipc.nmbufs", &nmbufs);
+	if (nmbufs < nmbclusters + nmbjumbop + nmbjumbo9 + nmbjumbo16)
+		nmbufs = lmax(maxmbufmem / MSIZE / 5,
+		    nmbclusters + nmbjumbop + nmbjumbo9 + nmbjumbo16);
 }
-SYSINIT(tunable_mbinit, SI_SUB_TUNABLES, SI_ORDER_MIDDLE, tunable_mbinit, NULL);
+SYSINIT(tunable_mbinit, SI_SUB_KMEM, SI_ORDER_MIDDLE, tunable_mbinit, NULL);
 
 static int
 sysctl_nmbclusters(SYSCTL_HANDLER_ARGS)
@@ -136,11 +161,12 @@
 	int error, newnmbclusters;
 
 	newnmbclusters = nmbclusters;
-	error = sysctl_handle_int(oidp, &newnmbclusters, 0, req); 
-	if (error == 0 && req->newptr) {
-		if (newnmbclusters > nmbclusters) {
+	error = sysctl_handle_int(oidp, &newnmbclusters, 0, req);
+	if (error == 0 && req->newptr && newnmbclusters != nmbclusters) {
+		if (newnmbclusters > nmbclusters &&
+		    nmbufs >= nmbclusters + nmbjumbop + nmbjumbo9 + nmbjumbo16) {
 			nmbclusters = newnmbclusters;
-			uma_zone_set_max(zone_clust, nmbclusters);
+			nmbclusters = uma_zone_set_max(zone_clust, nmbclusters);
 			EVENTHANDLER_INVOKE(nmbclusters_change);
 		} else
 			error = EINVAL;
@@ -157,11 +183,12 @@
 	int error, newnmbjumbop;
 
 	newnmbjumbop = nmbjumbop;
-	error = sysctl_handle_int(oidp, &newnmbjumbop, 0, req); 
-	if (error == 0 && req->newptr) {
-		if (newnmbjumbop> nmbjumbop) {
+	error = sysctl_handle_int(oidp, &newnmbjumbop, 0, req);
+	if (error == 0 && req->newptr && newnmbjumbop != nmbjumbop) {
+		if (newnmbjumbop > nmbjumbop &&
+		    nmbufs >= nmbclusters + nmbjumbop + nmbjumbo9 + nmbjumbo16) {
 			nmbjumbop = newnmbjumbop;
-			uma_zone_set_max(zone_jumbop, nmbjumbop);
+			nmbjumbop = uma_zone_set_max(zone_jumbop, nmbjumbop);
 		} else
 			error = EINVAL;
 	}
@@ -169,9 +196,8 @@
 }
 SYSCTL_PROC(_kern_ipc, OID_AUTO, nmbjumbop, CTLTYPE_INT|CTLFLAG_RW,
 &nmbjumbop, 0, sysctl_nmbjumbop, "IU",
-	 "Maximum number of mbuf page size jumbo clusters allowed");
+    "Maximum number of mbuf page size jumbo clusters allowed");
 
-
 static int
 sysctl_nmbjumbo9(SYSCTL_HANDLER_ARGS)
 {
@@ -178,11 +204,12 @@
 	int error, newnmbjumbo9;
 
 	newnmbjumbo9 = nmbjumbo9;
-	error = sysctl_handle_int(oidp, &newnmbjumbo9, 0, req); 
-	if (error == 0 && req->newptr) {
-		if (newnmbjumbo9> nmbjumbo9) {
+	error = sysctl_handle_int(oidp, &newnmbjumbo9, 0, req);
+	if (error == 0 && req->newptr && newnmbjumbo9 != nmbjumbo9) {
+		if (newnmbjumbo9 > nmbjumbo9 &&
+		    nmbufs >= nmbclusters + nmbjumbop + nmbjumbo9 + nmbjumbo16) {
 			nmbjumbo9 = newnmbjumbo9;
-			uma_zone_set_max(zone_jumbo9, nmbjumbo9);
+			nmbjumbo9 = uma_zone_set_max(zone_jumbo9, nmbjumbo9);
 		} else
 			error = EINVAL;
 	}
@@ -190,7 +217,7 @@
 }
 SYSCTL_PROC(_kern_ipc, OID_AUTO, nmbjumbo9, CTLTYPE_INT|CTLFLAG_RW,
 &nmbjumbo9, 0, sysctl_nmbjumbo9, "IU",
-	"Maximum number of mbuf 9k jumbo clusters allowed"); 
+    "Maximum number of mbuf 9k jumbo clusters allowed");
 
 static int
 sysctl_nmbjumbo16(SYSCTL_HANDLER_ARGS)
@@ -198,11 +225,12 @@
 	int error, newnmbjumbo16;
 
 	newnmbjumbo16 = nmbjumbo16;
-	error = sysctl_handle_int(oidp, &newnmbjumbo16, 0, req); 
-	if (error == 0 && req->newptr) {
-		if (newnmbjumbo16> nmbjumbo16) {
+	error = sysctl_handle_int(oidp, &newnmbjumbo16, 0, req);
+	if (error == 0 && req->newptr && newnmbjumbo16 != nmbjumbo16) {
+		if (newnmbjumbo16 > nmbjumbo16 &&
+		    nmbufs >= nmbclusters + nmbjumbop + nmbjumbo9 + nmbjumbo16) {
 			nmbjumbo16 = newnmbjumbo16;
-			uma_zone_set_max(zone_jumbo16, nmbjumbo16);
+			nmbjumbo16 = uma_zone_set_max(zone_jumbo16, nmbjumbo16);
 		} else
 			error = EINVAL;
 	}
@@ -212,11 +240,27 @@
 &nmbjumbo16, 0, sysctl_nmbjumbo16, "IU",
     "Maximum number of mbuf 16k jumbo clusters allowed");
 
+static int
+sysctl_nmbufs(SYSCTL_HANDLER_ARGS)
+{
+	int error, newnmbufs;
 
+	newnmbufs = nmbufs;
+	error = sysctl_handle_int(oidp, &newnmbufs, 0, req);
+	if (error == 0 && req->newptr && newnmbufs != nmbufs) {
+		if (newnmbufs > nmbufs) {
+			nmbufs = newnmbufs;
+			nmbufs = uma_zone_set_max(zone_mbuf, nmbufs);
+			EVENTHANDLER_INVOKE(nmbufs_change);
+		} else
+			error = EINVAL;
+	}
+	return (error);
+}
+SYSCTL_PROC(_kern_ipc, OID_AUTO, nmbufs, CTLTYPE_INT|CTLFLAG_RW,
+&nmbufs, 0, sysctl_nmbufs, "IU",
+    "Maximum number of mbufs allowed");
 
-SYSCTL_STRUCT(_kern_ipc, OID_AUTO, mbstat, CTLFLAG_RD, &mbstat, mbstat,
-    "Mbuf general information and statistics");
-
 /*
  * Zones from which we allocate.
  */
@@ -241,16 +285,14 @@
 static void	mb_zfini_pack(void *, int);
 
 static void	mb_reclaim(void *);
-static void	mbuf_init(void *);
-static void    *mbuf_jumbo_alloc(uma_zone_t, int, uint8_t *, int);
+static void    *mbuf_jumbo_alloc(uma_zone_t, vm_size_t, uint8_t *, int);
 
-/* Ensure that MSIZE doesn't break dtom() - it must be a power of 2 */
+/* Ensure that MSIZE is a power of 2. */
 CTASSERT((((MSIZE - 1) ^ MSIZE) + 1) >> 1 == MSIZE);
 
 /*
  * Initialize FreeBSD Network buffer allocation.
  */
-SYSINIT(mbuf, SI_SUB_MBUF, SI_ORDER_FIRST, mbuf_init, NULL);
 static void
 mbuf_init(void *dummy)
 {
@@ -266,6 +308,9 @@
 	    NULL, NULL,
 #endif
 	    MSIZE - 1, UMA_ZONE_MAXBUCKET);
+	if (nmbufs > 0)
+		nmbufs = uma_zone_set_max(zone_mbuf, nmbufs);
+	uma_zone_set_warning(zone_mbuf, "kern.ipc.nmbufs limit reached");
 
 	zone_clust = uma_zcreate(MBUF_CLUSTER_MEM_NAME, MCLBYTES,
 	    mb_ctor_clust, mb_dtor_clust,
@@ -276,7 +321,8 @@
 #endif
 	    UMA_ALIGN_PTR, UMA_ZONE_REFCNT);
 	if (nmbclusters > 0)
-		uma_zone_set_max(zone_clust, nmbclusters);
+		nmbclusters = uma_zone_set_max(zone_clust, nmbclusters);
+	uma_zone_set_warning(zone_clust, "kern.ipc.nmbclusters limit reached");
 
 	zone_pack = uma_zsecond_create(MBUF_PACKET_MEM_NAME, mb_ctor_pack,
 	    mb_dtor_pack, mb_zinit_pack, mb_zfini_pack, zone_mbuf);
@@ -291,7 +337,8 @@
 #endif
 	    UMA_ALIGN_PTR, UMA_ZONE_REFCNT);
 	if (nmbjumbop > 0)
-		uma_zone_set_max(zone_jumbop, nmbjumbop);
+		nmbjumbop = uma_zone_set_max(zone_jumbop, nmbjumbop);
+	uma_zone_set_warning(zone_jumbop, "kern.ipc.nmbjumbop limit reached");
 
 	zone_jumbo9 = uma_zcreate(MBUF_JUMBO9_MEM_NAME, MJUM9BYTES,
 	    mb_ctor_clust, mb_dtor_clust,
@@ -301,9 +348,10 @@
 	    NULL, NULL,
 #endif
 	    UMA_ALIGN_PTR, UMA_ZONE_REFCNT);
+	uma_zone_set_allocf(zone_jumbo9, mbuf_jumbo_alloc);
 	if (nmbjumbo9 > 0)
-		uma_zone_set_max(zone_jumbo9, nmbjumbo9);
-	uma_zone_set_allocf(zone_jumbo9, mbuf_jumbo_alloc);
+		nmbjumbo9 = uma_zone_set_max(zone_jumbo9, nmbjumbo9);
+	uma_zone_set_warning(zone_jumbo9, "kern.ipc.nmbjumbo9 limit reached");
 
 	zone_jumbo16 = uma_zcreate(MBUF_JUMBO16_MEM_NAME, MJUM16BYTES,
 	    mb_ctor_clust, mb_dtor_clust,
@@ -313,9 +361,10 @@
 	    NULL, NULL,
 #endif
 	    UMA_ALIGN_PTR, UMA_ZONE_REFCNT);
+	uma_zone_set_allocf(zone_jumbo16, mbuf_jumbo_alloc);
 	if (nmbjumbo16 > 0)
-		uma_zone_set_max(zone_jumbo16, nmbjumbo16);
-	uma_zone_set_allocf(zone_jumbo16, mbuf_jumbo_alloc);
+		nmbjumbo16 = uma_zone_set_max(zone_jumbo16, nmbjumbo16);
+	uma_zone_set_warning(zone_jumbo16, "kern.ipc.nmbjumbo16 limit reached");
 
 	zone_ext_refcnt = uma_zcreate(MBUF_EXTREFCNT_MEM_NAME, sizeof(u_int),
 	    NULL, NULL,
@@ -331,26 +380,8 @@
 	 */
 	EVENTHANDLER_REGISTER(vm_lowmem, mb_reclaim, NULL,
 	    EVENTHANDLER_PRI_FIRST);
-
-	/*
-	 * [Re]set counters and local statistics knobs.
-	 * XXX Some of these should go and be replaced, but UMA stat
-	 * gathering needs to be revised.
-	 */
-	mbstat.m_mbufs = 0;
-	mbstat.m_mclusts = 0;
-	mbstat.m_drain = 0;
-	mbstat.m_msize = MSIZE;
-	mbstat.m_mclbytes = MCLBYTES;
-	mbstat.m_minclsize = MINCLSIZE;
-	mbstat.m_mlen = MLEN;
-	mbstat.m_mhlen = MHLEN;
-	mbstat.m_numtypes = MT_NTYPES;
-
-	mbstat.m_mcfail = mbstat.m_mpfail = 0;
-	mbstat.sf_iocnt = 0;
-	mbstat.sf_allocwait = mbstat.sf_allocfail = 0;
 }
+SYSINIT(mbuf, SI_SUB_MBUF, SI_ORDER_FIRST, mbuf_init, NULL);
 
 /*
  * UMA backend page allocator for the jumbo frame zones.
@@ -359,12 +390,12 @@
  * pages.
  */
 static void *
-mbuf_jumbo_alloc(uma_zone_t zone, int bytes, uint8_t *flags, int wait)
+mbuf_jumbo_alloc(uma_zone_t zone, vm_size_t bytes, uint8_t *flags, int wait)
 {
 
 	/* Inform UMA that this allocator uses kernel_map/object. */
 	*flags = UMA_SLAB_KERNEL;
-	return ((void *)kmem_alloc_contig(kernel_map, bytes, wait,
+	return ((void *)kmem_alloc_contig(kernel_arena, bytes, wait,
 	    (vm_paddr_t)0, ~(vm_paddr_t)0, 1, 0, VM_MEMATTR_DEFAULT));
 }
 
@@ -380,9 +411,7 @@
 {
 	struct mbuf *m;
 	struct mb_args *args;
-#ifdef MAC
 	int error;
-#endif
 	int flags;
 	short type;
 
@@ -389,9 +418,7 @@
 #ifdef INVARIANTS
 	trash_ctor(mem, size, arg, how);
 #endif
-	m = (struct mbuf *)mem;
 	args = (struct mb_args *)arg;
-	flags = args->flags;
 	type = args->type;
 
 	/*
@@ -401,31 +428,13 @@
 	if (type == MT_NOINIT)
 		return (0);
 
-	m->m_next = NULL;
-	m->m_nextpkt = NULL;
-	m->m_len = 0;
-	m->m_flags = flags;
-	m->m_type = type;
-	if (flags & M_PKTHDR) {
-		m->m_data = m->m_pktdat;
-		m->m_pkthdr.rcvif = NULL;
-		m->m_pkthdr.header = NULL;
-		m->m_pkthdr.len = 0;
-		m->m_pkthdr.csum_flags = 0;
-		m->m_pkthdr.csum_data = 0;
-		m->m_pkthdr.tso_segsz = 0;
-		m->m_pkthdr.ether_vtag = 0;
-		m->m_pkthdr.flowid = 0;
-		SLIST_INIT(&m->m_pkthdr.tags);
-#ifdef MAC
-		/* If the label init fails, fail the alloc */
-		error = mac_mbuf_init(m, how);
-		if (error)
-			return (error);
-#endif
-	} else
-		m->m_data = m->m_dat;
-	return (0);
+	m = (struct mbuf *)mem;
+	flags = args->flags;
+	MPASS((flags & M_NOFREE) == 0);
+
+	error = m_init(m, NULL, size, how, type, flags);
+
+	return (error);
 }
 
 /*
@@ -435,12 +444,12 @@
 mb_dtor_mbuf(void *mem, int size, void *arg)
 {
 	struct mbuf *m;
-	unsigned long flags; 
+	unsigned long flags;
 
 	m = (struct mbuf *)mem;
 	flags = (unsigned long)arg;
 
-	if ((flags & MB_NOTAGS) == 0 && (m->m_flags & M_PKTHDR) != 0)
+	if ((m->m_flags & M_PKTHDR) && !SLIST_EMPTY(&m->m_pkthdr.tags))
 		m_tag_delete_chain(m, NULL);
 	KASSERT((m->m_flags & M_EXT) == 0, ("%s: M_EXT set", __func__));
 	KASSERT((m->m_flags & M_NOFREE) == 0, ("%s: M_NOFREE set", __func__));
@@ -540,6 +549,7 @@
 		m->m_ext.ext_arg2 = NULL;
 		m->m_ext.ext_size = size;
 		m->m_ext.ext_type = type;
+		m->m_ext.ext_flags = 0;
 		m->m_ext.ref_cnt = refcnt;
 	}
 
@@ -611,10 +621,7 @@
 {
 	struct mbuf *m;
 	struct mb_args *args;
-#ifdef MAC
-	int error;
-#endif
-	int flags;
+	int error, flags;
 	short type;
 
 	m = (struct mbuf *)mem;
@@ -621,37 +628,19 @@
 	args = (struct mb_args *)arg;
 	flags = args->flags;
 	type = args->type;
+	MPASS((flags & M_NOFREE) == 0);
 
 #ifdef INVARIANTS
 	trash_ctor(m->m_ext.ext_buf, MCLBYTES, arg, how);
 #endif
-	m->m_next = NULL;
-	m->m_nextpkt = NULL;
-	m->m_data = m->m_ext.ext_buf;
-	m->m_len = 0;
-	m->m_flags = (flags | M_EXT);
-	m->m_type = type;
 
-	if (flags & M_PKTHDR) {
-		m->m_pkthdr.rcvif = NULL;
-		m->m_pkthdr.len = 0;
-		m->m_pkthdr.header = NULL;
-		m->m_pkthdr.csum_flags = 0;
-		m->m_pkthdr.csum_data = 0;
-		m->m_pkthdr.tso_segsz = 0;
-		m->m_pkthdr.ether_vtag = 0;
-		m->m_pkthdr.flowid = 0;
-		SLIST_INIT(&m->m_pkthdr.tags);
-#ifdef MAC
-		/* If the label init fails, fail the alloc */
-		error = mac_mbuf_init(m, how);
-		if (error)
-			return (error);
-#endif
-	}
+	error = m_init(m, NULL, size, how, type, flags);
+
 	/* m_ext is already initialized. */
+	m->m_data = m->m_ext.ext_buf;
+ 	m->m_flags = (flags | M_EXT);
 
-	return (0);
+	return (error);
 }
 
 int
@@ -661,15 +650,20 @@
 	int error;
 #endif
 	m->m_data = m->m_pktdat;
+	m->m_pkthdr.rcvif = NULL;
 	SLIST_INIT(&m->m_pkthdr.tags);
-	m->m_pkthdr.rcvif = NULL;
-	m->m_pkthdr.header = NULL;
 	m->m_pkthdr.len = 0;
 	m->m_pkthdr.flowid = 0;
 	m->m_pkthdr.csum_flags = 0;
-	m->m_pkthdr.csum_data = 0;
-	m->m_pkthdr.tso_segsz = 0;
-	m->m_pkthdr.ether_vtag = 0;
+	m->m_pkthdr.fibnum = 0;
+	m->m_pkthdr.cosqos = 0;
+	m->m_pkthdr.rsstype = 0;
+	m->m_pkthdr.l2hlen = 0;
+	m->m_pkthdr.l3hlen = 0;
+	m->m_pkthdr.l4hlen = 0;
+	m->m_pkthdr.l5hlen = 0;
+	m->m_pkthdr.PH_per.sixtyfour[0] = 0;
+	m->m_pkthdr.PH_loc.sixtyfour[0] = 0;
 #ifdef MAC
 	/* If the label init fails, fail the alloc */
 	error = mac_mbuf_init(m, how);

Modified: trunk/sys/kern/kern_mib.c
===================================================================
--- trunk/sys/kern/kern_mib.c	2018-05-25 20:46:51 UTC (rev 9944)
+++ trunk/sys/kern/kern_mib.c	2018-05-25 20:53:39 UTC (rev 9945)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * Copyright (c) 1982, 1986, 1989, 1993
  *	The Regents of the University of California.  All rights reserved.
@@ -36,7 +37,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$MidnightBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/kern/kern_mib.c 294283 2016-01-18 18:27:21Z jhb $");
 
 #include "opt_compat.h"
 #include "opt_posix.h"
@@ -55,35 +56,35 @@
 #include <sys/sx.h>
 #include <sys/unistd.h>
 
-SYSCTL_NODE(, 0,	  sysctl, CTLFLAG_RW, 0,
+SYSCTL_ROOT_NODE(0,	  sysctl, CTLFLAG_RW, 0,
 	"Sysctl internal magic");
-SYSCTL_NODE(, CTL_KERN,	  kern,   CTLFLAG_RW|CTLFLAG_CAPRD, 0,
+SYSCTL_ROOT_NODE(CTL_KERN,	  kern,   CTLFLAG_RW|CTLFLAG_CAPRD, 0,
 	"High kernel, proc, limits &c");
-SYSCTL_NODE(, CTL_VM,	  vm,     CTLFLAG_RW, 0,
+SYSCTL_ROOT_NODE(CTL_VM,	  vm,     CTLFLAG_RW, 0,
 	"Virtual memory");
-SYSCTL_NODE(, CTL_VFS,	  vfs,     CTLFLAG_RW, 0,
+SYSCTL_ROOT_NODE(CTL_VFS,	  vfs,     CTLFLAG_RW, 0,
 	"File system");
-SYSCTL_NODE(, CTL_NET,	  net,    CTLFLAG_RW, 0,
+SYSCTL_ROOT_NODE(CTL_NET,	  net,    CTLFLAG_RW, 0,
 	"Network, (see socket.h)");
-SYSCTL_NODE(, CTL_DEBUG,  debug,  CTLFLAG_RW, 0,
+SYSCTL_ROOT_NODE(CTL_DEBUG,  debug,  CTLFLAG_RW, 0,
 	"Debugging");
 SYSCTL_NODE(_debug, OID_AUTO,  sizeof,  CTLFLAG_RW, 0,
 	"Sizeof various things");
-SYSCTL_NODE(, CTL_HW,	  hw,     CTLFLAG_RW, 0,
+SYSCTL_ROOT_NODE(CTL_HW,	  hw,     CTLFLAG_RW, 0,
 	"hardware");
-SYSCTL_NODE(, CTL_MACHDEP, machdep, CTLFLAG_RW, 0,
+SYSCTL_ROOT_NODE(CTL_MACHDEP, machdep, CTLFLAG_RW, 0,
 	"machine dependent");
-SYSCTL_NODE(, CTL_USER,	  user,   CTLFLAG_RW, 0,
+SYSCTL_ROOT_NODE(CTL_USER,	  user,   CTLFLAG_RW, 0,
 	"user-level");
-SYSCTL_NODE(, CTL_P1003_1B,  p1003_1b,   CTLFLAG_RW, 0,
+SYSCTL_ROOT_NODE(CTL_P1003_1B,  p1003_1b,   CTLFLAG_RW, 0,
 	"p1003_1b, (see p1003_1b.h)");
 
-SYSCTL_NODE(, OID_AUTO,  compat, CTLFLAG_RW, 0,
+SYSCTL_ROOT_NODE(OID_AUTO,  compat, CTLFLAG_RW, 0,
 	"Compatibility code");
-SYSCTL_NODE(, OID_AUTO, security, CTLFLAG_RW, 0, 
+SYSCTL_ROOT_NODE(OID_AUTO, security, CTLFLAG_RW, 0, 
      	"Security");
 #ifdef REGRESSION
-SYSCTL_NODE(, OID_AUTO, regression, CTLFLAG_RW, 0,
+SYSCTL_ROOT_NODE(OID_AUTO, regression, CTLFLAG_RW, 0,
      "Regression test MIB");
 #endif
 
@@ -90,11 +91,8 @@
 SYSCTL_STRING(_kern, OID_AUTO, ident, CTLFLAG_RD|CTLFLAG_MPSAFE,
     kern_ident, 0, "Kernel identifier");
 
-SYSCTL_STRING(_kern, KERN_OSRELEASE, osrelease, CTLFLAG_RD|CTLFLAG_MPSAFE|
-    CTLFLAG_CAPRD, osrelease, 0, "Operating system release");
-
 SYSCTL_INT(_kern, KERN_OSREV, osrevision, CTLFLAG_RD|CTLFLAG_CAPRD,
-    0, BSD, "Operating system revision");
+    SYSCTL_NULL_INT_PTR, BSD, "Operating system revision");
 
 SYSCTL_STRING(_kern, KERN_VERSION, version, CTLFLAG_RD|CTLFLAG_MPSAFE,
     version, 0, "Kernel version");
@@ -105,13 +103,6 @@
 SYSCTL_STRING(_kern, KERN_OSTYPE, ostype, CTLFLAG_RD|CTLFLAG_MPSAFE|
     CTLFLAG_CAPRD, ostype, 0, "Operating system type");
 
-/*
- * NOTICE: The *userland* release date is available in
- * /usr/include/osreldate.h
- */
-SYSCTL_INT(_kern, KERN_OSRELDATE, osreldate, CTLFLAG_RD|CTLFLAG_CAPRD,
-    &osreldate, 0, "Kernel release date");
-
 SYSCTL_INT(_kern, KERN_MAXPROC, maxproc, CTLFLAG_RDTUN,
     &maxproc, 0, "Maximum number of processes");
 
@@ -122,10 +113,10 @@
     &maxusers, 0, "Hint for kernel tuning");
 
 SYSCTL_INT(_kern, KERN_ARGMAX, argmax, CTLFLAG_RD|CTLFLAG_CAPRD,
-    0, ARG_MAX, "Maximum bytes of argument to execve(2)");
+    SYSCTL_NULL_INT_PTR, ARG_MAX, "Maximum bytes of argument to execve(2)");
 
 SYSCTL_INT(_kern, KERN_POSIX1, posix1version, CTLFLAG_RD|CTLFLAG_CAPRD,
-    0, _POSIX_VERSION, "Version of POSIX attempting to comply to");
+    SYSCTL_NULL_INT_PTR, _POSIX_VERSION, "Version of POSIX attempting to comply to");
 
 SYSCTL_INT(_kern, KERN_NGROUPS, ngroups, CTLFLAG_RDTUN|CTLFLAG_CAPRD,
     &ngroups_max, 0,
@@ -132,14 +123,14 @@
     "Maximum number of supplemental groups a user can belong to");
 
 SYSCTL_INT(_kern, KERN_JOB_CONTROL, job_control, CTLFLAG_RD|CTLFLAG_CAPRD,
-    0, 1, "Whether job control is available");
+    SYSCTL_NULL_INT_PTR, 1, "Whether job control is available");
 
 #ifdef _POSIX_SAVED_IDS
 SYSCTL_INT(_kern, KERN_SAVED_IDS, saved_ids, CTLFLAG_RD|CTLFLAG_CAPRD,
-    0, 1, "Whether saved set-group/user ID is available");
+    SYSCTL_NULL_INT_PTR, 1, "Whether saved set-group/user ID is available");
 #else
 SYSCTL_INT(_kern, KERN_SAVED_IDS, saved_ids, CTLFLAG_RD|CTLFLAG_CAPRD,
-    0, 0, "Whether saved set-group/user ID is available");
+    SYSCTL_NULL_INT_PTR, 0, "Whether saved set-group/user ID is available");
 #endif
 
 char kernelname[MAXPATHLEN] = "/kernel";	/* XXX bloat */
@@ -151,10 +142,10 @@
     &mp_ncpus, 0, "Number of active CPUs");
 
 SYSCTL_INT(_hw, HW_BYTEORDER, byteorder, CTLFLAG_RD|CTLFLAG_CAPRD,
-    0, BYTE_ORDER, "System byte order");
+    SYSCTL_NULL_INT_PTR, BYTE_ORDER, "System byte order");
 
 SYSCTL_INT(_hw, HW_PAGESIZE, pagesize, CTLFLAG_RD|CTLFLAG_CAPRD,
-    0, PAGE_SIZE, "System memory page size");
+    SYSCTL_NULL_INT_PTR, PAGE_SIZE, "System memory page size");
 
 static int
 sysctl_kern_arnd(SYSCTL_HANDLER_ARGS)
@@ -261,6 +252,13 @@
 SYSCTL_PROC(_hw, HW_MACHINE_ARCH, machine_arch, CTLTYPE_STRING | CTLFLAG_RD,
     NULL, 0, sysctl_hw_machine_arch, "A", "System architecture");
 
+SYSCTL_STRING(_kern, OID_AUTO, supported_archs, CTLFLAG_RD | CTLFLAG_MPSAFE,
+#ifdef COMPAT_FREEBSD32
+    MACHINE_ARCH " " MACHINE_ARCH32, 0, "Supported architectures for binaries");
+#else
+    MACHINE_ARCH, 0, "Supported architectures for binaries");
+#endif
+
 static int
 sysctl_hostname(SYSCTL_HANDLER_ARGS)
 {
@@ -380,15 +378,8 @@
 /* Actual kernel configuration options. */
 extern char kernconfstring[];
 
-static int
-sysctl_kern_config(SYSCTL_HANDLER_ARGS)
-{
-	return (sysctl_handle_string(oidp, kernconfstring,
-	    strlen(kernconfstring), req));
-}
-
-SYSCTL_PROC(_kern, OID_AUTO, conftxt, CTLTYPE_STRING|CTLFLAG_RW, 
-    0, 0, sysctl_kern_config, "", "Kernel configuration file");
+SYSCTL_STRING(_kern, OID_AUTO, conftxt, CTLFLAG_RD, kernconfstring, 0,
+    "Kernel configuration file");
 #endif
 
 static int
@@ -429,6 +420,48 @@
     CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_PRISON | CTLFLAG_MPSAFE,
     NULL, 0, sysctl_hostid, "LU", "Host ID");
 
+/*
+ * The osrelease string is copied from the global (osrelease in vers.c) into
+ * prison0 by a sysinit and is inherited by child jails if not changed at jail
+ * creation, so we always return the copy from the current prison data.
+ */
+static int
+sysctl_osrelease(SYSCTL_HANDLER_ARGS)
+{
+	struct prison *pr;
+
+	pr = req->td->td_ucred->cr_prison;
+	return (SYSCTL_OUT(req, pr->pr_osrelease, strlen(pr->pr_osrelease) + 1));
+
+}
+
+SYSCTL_PROC(_kern, KERN_OSRELEASE, osrelease,
+    CTLTYPE_STRING | CTLFLAG_CAPRD | CTLFLAG_RD | CTLFLAG_MPSAFE,
+    NULL, 0, sysctl_osrelease, "A", "Operating system release");
+
+/*
+ * The osreldate number is copied from the global (osreldate in vers.c) into
+ * prison0 by a sysinit and is inherited by child jails if not changed at jail
+ * creation, so we always return the value from the current prison data.
+ */
+static int
+sysctl_osreldate(SYSCTL_HANDLER_ARGS)
+{
+	struct prison *pr;
+
+	pr = req->td->td_ucred->cr_prison;
+	return (SYSCTL_OUT(req, &pr->pr_osreldate, sizeof(pr->pr_osreldate)));
+
+}
+
+/*
+ * NOTICE: The *userland* release date is available in
+ * /usr/include/osreldate.h
+ */
+SYSCTL_PROC(_kern, KERN_OSRELDATE, osreldate,
+    CTLTYPE_INT | CTLFLAG_CAPRD | CTLFLAG_RD | CTLFLAG_MPSAFE,
+    NULL, 0, sysctl_osreldate, "I", "Kernel release date");
+
 SYSCTL_NODE(_kern, OID_AUTO, features, CTLFLAG_RD, 0, "Kernel Features");
 
 #ifdef COMPAT_FREEBSD4
@@ -457,50 +490,51 @@
 SYSCTL_STRING(_user, USER_CS_PATH, cs_path, CTLFLAG_RD,
     "", 0, "PATH that finds all the standard utilities");
 SYSCTL_INT(_user, USER_BC_BASE_MAX, bc_base_max, CTLFLAG_RD,
-    0, 0, "Max ibase/obase values in bc(1)");
+    SYSCTL_NULL_INT_PTR, 0, "Max ibase/obase values in bc(1)");
 SYSCTL_INT(_user, USER_BC_DIM_MAX, bc_dim_max, CTLFLAG_RD,
-    0, 0, "Max array size in bc(1)");
+    SYSCTL_NULL_INT_PTR, 0, "Max array size in bc(1)");
 SYSCTL_INT(_user, USER_BC_SCALE_MAX, bc_scale_max, CTLFLAG_RD,
-    0, 0, "Max scale value in bc(1)");
+    SYSCTL_NULL_INT_PTR, 0, "Max scale value in bc(1)");
 SYSCTL_INT(_user, USER_BC_STRING_MAX, bc_string_max, CTLFLAG_RD,
-    0, 0, "Max string length in bc(1)");
+    SYSCTL_NULL_INT_PTR, 0, "Max string length in bc(1)");
 SYSCTL_INT(_user, USER_COLL_WEIGHTS_MAX, coll_weights_max, CTLFLAG_RD,
-    0, 0, "Maximum number of weights assigned to an LC_COLLATE locale entry");
-SYSCTL_INT(_user, USER_EXPR_NEST_MAX, expr_nest_max, CTLFLAG_RD, 0, 0, "");
+    SYSCTL_NULL_INT_PTR, 0, "Maximum number of weights assigned to an LC_COLLATE locale entry");
+SYSCTL_INT(_user, USER_EXPR_NEST_MAX, expr_nest_max, CTLFLAG_RD,
+    SYSCTL_NULL_INT_PTR, 0, "");
 SYSCTL_INT(_user, USER_LINE_MAX, line_max, CTLFLAG_RD,
-    0, 0, "Max length (bytes) of a text-processing utility's input line");
+    SYSCTL_NULL_INT_PTR, 0, "Max length (bytes) of a text-processing utility's input line");
 SYSCTL_INT(_user, USER_RE_DUP_MAX, re_dup_max, CTLFLAG_RD,
-    0, 0, "Maximum number of repeats of a regexp permitted");
+    SYSCTL_NULL_INT_PTR, 0, "Maximum number of repeats of a regexp permitted");
 SYSCTL_INT(_user, USER_POSIX2_VERSION, posix2_version, CTLFLAG_RD,
-    0, 0,
+    SYSCTL_NULL_INT_PTR, 0,
     "The version of POSIX 1003.2 with which the system attempts to comply");
 SYSCTL_INT(_user, USER_POSIX2_C_BIND, posix2_c_bind, CTLFLAG_RD,
-    0, 0, "Whether C development supports the C bindings option");
+    SYSCTL_NULL_INT_PTR, 0, "Whether C development supports the C bindings option");
 SYSCTL_INT(_user, USER_POSIX2_C_DEV, posix2_c_dev, CTLFLAG_RD,
-    0, 0, "Whether system supports the C development utilities option");
+    SYSCTL_NULL_INT_PTR, 0, "Whether system supports the C development utilities option");
 SYSCTL_INT(_user, USER_POSIX2_CHAR_TERM, posix2_char_term, CTLFLAG_RD,
-    0, 0, "");
+    SYSCTL_NULL_INT_PTR, 0, "");
 SYSCTL_INT(_user, USER_POSIX2_FORT_DEV, posix2_fort_dev, CTLFLAG_RD,
-    0, 0, "Whether system supports FORTRAN development utilities");
+    SYSCTL_NULL_INT_PTR, 0, "Whether system supports FORTRAN development utilities");
 SYSCTL_INT(_user, USER_POSIX2_FORT_RUN, posix2_fort_run, CTLFLAG_RD,
-    0, 0, "Whether system supports FORTRAN runtime utilities");
+    SYSCTL_NULL_INT_PTR, 0, "Whether system supports FORTRAN runtime utilities");
 SYSCTL_INT(_user, USER_POSIX2_LOCALEDEF, posix2_localedef, CTLFLAG_RD,
-    0, 0, "Whether system supports creation of locales");
+    SYSCTL_NULL_INT_PTR, 0, "Whether system supports creation of locales");
 SYSCTL_INT(_user, USER_POSIX2_SW_DEV, posix2_sw_dev, CTLFLAG_RD,
-    0, 0, "Whether system supports software development utilities");
+    SYSCTL_NULL_INT_PTR, 0, "Whether system supports software development utilities");
 SYSCTL_INT(_user, USER_POSIX2_UPE, posix2_upe, CTLFLAG_RD,
-    0, 0, "Whether system supports the user portability utilities");
+    SYSCTL_NULL_INT_PTR, 0, "Whether system supports the user portability utilities");
 SYSCTL_INT(_user, USER_STREAM_MAX, stream_max, CTLFLAG_RD,
-    0, 0, "Min Maximum number of streams a process may have open at one time");
+    SYSCTL_NULL_INT_PTR, 0, "Min Maximum number of streams a process may have open at one time");
 SYSCTL_INT(_user, USER_TZNAME_MAX, tzname_max, CTLFLAG_RD,
-    0, 0, "Min Maximum number of types supported for timezone names");
+    SYSCTL_NULL_INT_PTR, 0, "Min Maximum number of types supported for timezone names");
 
 #include <sys/vnode.h>
 SYSCTL_INT(_debug_sizeof, OID_AUTO, vnode, CTLFLAG_RD,
-    0, sizeof(struct vnode), "sizeof(struct vnode)");
+    SYSCTL_NULL_INT_PTR, sizeof(struct vnode), "sizeof(struct vnode)");
 
 SYSCTL_INT(_debug_sizeof, OID_AUTO, proc, CTLFLAG_RD,
-    0, sizeof(struct proc), "sizeof(struct proc)");
+    SYSCTL_NULL_INT_PTR, sizeof(struct proc), "sizeof(struct proc)");
 
 static int
 sysctl_kern_pid_max(SYSCTL_HANDLER_ARGS)
@@ -533,14 +567,19 @@
 #include <sys/bio.h>
 #include <sys/buf.h>
 SYSCTL_INT(_debug_sizeof, OID_AUTO, bio, CTLFLAG_RD,
-    0, sizeof(struct bio), "sizeof(struct bio)");
+    SYSCTL_NULL_INT_PTR, sizeof(struct bio), "sizeof(struct bio)");
 SYSCTL_INT(_debug_sizeof, OID_AUTO, buf, CTLFLAG_RD,
-    0, sizeof(struct buf), "sizeof(struct buf)");
+    SYSCTL_NULL_INT_PTR, sizeof(struct buf), "sizeof(struct buf)");
 
 #include <sys/user.h>
 SYSCTL_INT(_debug_sizeof, OID_AUTO, kinfo_proc, CTLFLAG_RD,
-    0, sizeof(struct kinfo_proc), "sizeof(struct kinfo_proc)");
+    SYSCTL_NULL_INT_PTR, sizeof(struct kinfo_proc), "sizeof(struct kinfo_proc)");
 
+/* Used by kernel debuggers. */
+const int pcb_size = sizeof(struct pcb);
+SYSCTL_INT(_debug_sizeof, OID_AUTO, pcb, CTLFLAG_RD,
+    SYSCTL_NULL_INT_PTR, sizeof(struct pcb), "sizeof(struct pcb)");
+
 /* XXX compatibility, remove for 6.0 */
 #include <sys/imgact.h>
 #include <sys/imgact_elf.h>

Modified: trunk/sys/kern/kern_module.c
===================================================================
--- trunk/sys/kern/kern_module.c	2018-05-25 20:46:51 UTC (rev 9944)
+++ trunk/sys/kern/kern_module.c	2018-05-25 20:53:39 UTC (rev 9945)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * Copyright (c) 1997 Doug Rabson
  * All rights reserved.
@@ -27,7 +28,7 @@
 #include "opt_compat.h"
 
 #include <sys/cdefs.h>
-__FBSDID("$MidnightBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/kern/kern_module.c 293688 2016-01-11 19:59:56Z trasz $");
 
 #include <sys/param.h>
 #include <sys/kernel.h>
@@ -133,7 +134,7 @@
 		MOD_XLOCK;
 		if (mod->file) {
 			/*
-			 * Once a module is succesfully loaded, move
+			 * Once a module is successfully loaded, move
 			 * it to the head of the module list for this
 			 * linker file.  This resorts the list so that
 			 * when the kernel linker iterates over the
@@ -158,16 +159,12 @@
 	newmod = module_lookupbyname(data->name);
 	if (newmod != NULL) {
 		MOD_XUNLOCK;
-		printf("module_register: module %s already exists!\n",
-		    data->name);
+		printf("%s: cannot register %s from %s; already loaded from %s\n",
+		    __func__, data->name, container->filename, newmod->file->filename);
 		return (EEXIST);
 	}
 	namelen = strlen(data->name) + 1;
 	newmod = malloc(sizeof(struct module) + namelen, M_MODULE, M_WAITOK);
-	if (newmod == NULL) {
-		MOD_XUNLOCK;
-		return (ENOMEM);
-	}
 	newmod->refs = 1;
 	newmod->id = nextid++;
 	newmod->name = (char *)(newmod + 1);

Modified: trunk/sys/kern/kern_mtxpool.c
===================================================================
--- trunk/sys/kern/kern_mtxpool.c	2018-05-25 20:46:51 UTC (rev 9944)
+++ trunk/sys/kern/kern_mtxpool.c	2018-05-25 20:53:39 UTC (rev 9945)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * Copyright (c) 2001 Matthew Dillon.  All Rights Reserved.
  *
@@ -39,12 +40,12 @@
  *
  * Disadvantages:
  *	- should generally only be used as leaf mutexes.
- *	- pool/pool dependancy ordering cannot be depended on.
+ *	- pool/pool dependency ordering cannot be depended on.
  *	- possible L1 cache mastersip contention between cpus.
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$MidnightBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/kern/kern_mtxpool.c 302234 2016-06-27 21:50:30Z bdrewery $");
 
 #include <sys/param.h>
 #include <sys/proc.h>

Modified: trunk/sys/kern/kern_mutex.c
===================================================================
--- trunk/sys/kern/kern_mutex.c	2018-05-25 20:46:51 UTC (rev 9944)
+++ trunk/sys/kern/kern_mutex.c	2018-05-25 20:53:39 UTC (rev 9945)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * Copyright (c) 1998 Berkeley Software Design, Inc. All rights reserved.
  *
@@ -25,8 +26,8 @@
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
- *	from BSDI $Id: kern_mutex.c,v 1.4 2012-10-09 04:08:16 laffer1 Exp $
- *	and BSDI $Id: kern_mutex.c,v 1.4 2012-10-09 04:08:16 laffer1 Exp $
+ *	from BSDI $Id: mutex_witness.c,v 1.1.2.20 2000/04/27 03:10:27 cp Exp $
+ *	and BSDI $Id: synch_machdep.c,v 2.3.2.39 2000/04/27 03:10:25 cp Exp $
  */
 
 /*
@@ -34,7 +35,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$MidnightBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/kern/kern_mutex.c 323870 2017-09-21 19:24:11Z marius $");
 
 #include "opt_adaptive_mutexes.h"
 #include "opt_ddb.h"
@@ -57,6 +58,7 @@
 #include <sys/resourcevar.h>
 #include <sys/sched.h>
 #include <sys/sbuf.h>
+#include <sys/smp.h>
 #include <sys/sysctl.h>
 #include <sys/turnstile.h>
 #include <sys/vmmeter.h>
@@ -83,6 +85,12 @@
 #endif
 
 /*
+ * Return the mutex address when the lock cookie address is provided.
+ * This functionality assumes that struct mtx* have a member named mtx_lock.
+ */
+#define	mtxlock2mtx(c)	(__containerof(c, struct mtx, mtx_lock))
+
+/*
  * Internal utility macros.
  */
 #define mtx_unowned(m)	((m)->mtx_lock == MTX_UNOWNED)
@@ -91,17 +99,18 @@
 
 #define	mtx_owner(m)	((struct thread *)((m)->mtx_lock & ~MTX_FLAGMASK))
 
-static void	assert_mtx(struct lock_object *lock, int what);
+static void	assert_mtx(const struct lock_object *lock, int what);
 #ifdef DDB
-static void	db_show_mtx(struct lock_object *lock);
+static void	db_show_mtx(const struct lock_object *lock);
 #endif
-static void	lock_mtx(struct lock_object *lock, int how);
-static void	lock_spin(struct lock_object *lock, int how);
+static void	lock_mtx(struct lock_object *lock, uintptr_t how);
+static void	lock_spin(struct lock_object *lock, uintptr_t how);
 #ifdef KDTRACE_HOOKS
-static int	owner_mtx(struct lock_object *lock, struct thread **owner);
+static int	owner_mtx(const struct lock_object *lock,
+		    struct thread **owner);
 #endif
-static int	unlock_mtx(struct lock_object *lock);
-static int	unlock_spin(struct lock_object *lock);
+static uintptr_t unlock_mtx(struct lock_object *lock);
+static uintptr_t unlock_spin(struct lock_object *lock);
 
 /*
  * Lock classes for sleep and spin mutexes.
@@ -133,6 +142,37 @@
 #endif
 };
 
+#ifdef ADAPTIVE_MUTEXES
+static SYSCTL_NODE(_debug, OID_AUTO, mtx, CTLFLAG_RD, NULL, "mtx debugging");
+
+static struct lock_delay_config mtx_delay = {
+	.initial	= 1000,
+	.step		= 500,
+	.min		= 100,
+	.max		= 5000,
+};
+
+SYSCTL_INT(_debug_mtx, OID_AUTO, delay_initial, CTLFLAG_RW, &mtx_delay.initial,
+    0, "");
+SYSCTL_INT(_debug_mtx, OID_AUTO, delay_step, CTLFLAG_RW, &mtx_delay.step,
+    0, "");
+SYSCTL_INT(_debug_mtx, OID_AUTO, delay_min, CTLFLAG_RW, &mtx_delay.min,
+    0, "");
+SYSCTL_INT(_debug_mtx, OID_AUTO, delay_max, CTLFLAG_RW, &mtx_delay.max,
+    0, "");
+
+static void
+mtx_delay_sysinit(void *dummy)
+{
+
+	mtx_delay.initial = mp_ncpus * 25;
+	mtx_delay.step = (mp_ncpus * 25) / 2;
+	mtx_delay.min = mp_ncpus * 5;
+	mtx_delay.max = mp_ncpus * 25 * 10;
+}
+LOCK_DELAY_SYSINIT(mtx_delay_sysinit);
+#endif
+
 /*
  * System-wide mutexes
  */
@@ -140,14 +180,14 @@
 struct mtx Giant;
 
 void
-assert_mtx(struct lock_object *lock, int what)
+assert_mtx(const struct lock_object *lock, int what)
 {
 
-	mtx_assert((struct mtx *)lock, what);
+	mtx_assert((const struct mtx *)lock, what);
 }
 
 void
-lock_mtx(struct lock_object *lock, int how)
+lock_mtx(struct lock_object *lock, uintptr_t how)
 {
 
 	mtx_lock((struct mtx *)lock);
@@ -154,13 +194,13 @@
 }
 
 void
-lock_spin(struct lock_object *lock, int how)
+lock_spin(struct lock_object *lock, uintptr_t how)
 {
 
 	panic("spin locks can only use msleep_spin");
 }
 
-int
+uintptr_t
 unlock_mtx(struct lock_object *lock)
 {
 	struct mtx *m;
@@ -171,7 +211,7 @@
 	return (0);
 }
 
-int
+uintptr_t
 unlock_spin(struct lock_object *lock)
 {
 
@@ -180,9 +220,9 @@
 
 #ifdef KDTRACE_HOOKS
 int
-owner_mtx(struct lock_object *lock, struct thread **owner)
+owner_mtx(const struct lock_object *lock, struct thread **owner)
 {
-	struct mtx *m = (struct mtx *)lock;
+	const struct mtx *m = (const struct mtx *)lock;
 
 	*owner = mtx_owner(m);
 	return (mtx_unowned(m) == 0);
@@ -194,38 +234,49 @@
  * modules and can also be called from assembly language if needed.
  */
 void
-_mtx_lock_flags(struct mtx *m, int opts, const char *file, int line)
+__mtx_lock_flags(volatile uintptr_t *c, int opts, const char *file, int line)
 {
+	struct mtx *m;
 
 	if (SCHEDULER_STOPPED())
 		return;
+
+	m = mtxlock2mtx(c);
+
+	KASSERT(kdb_active != 0 || !TD_IS_IDLETHREAD(curthread),
+	    ("mtx_lock() by idle thread %p on sleep mutex %s @ %s:%d",
+	    curthread, m->lock_object.lo_name, file, line));
 	KASSERT(m->mtx_lock != MTX_DESTROYED,
 	    ("mtx_lock() of destroyed mutex @ %s:%d", file, line));
 	KASSERT(LOCK_CLASS(&m->lock_object) == &lock_class_mtx_sleep,
 	    ("mtx_lock() of spin mutex %s @ %s:%d", m->lock_object.lo_name,
 	    file, line));
-	WITNESS_CHECKORDER(&m->lock_object, opts | LOP_NEWORDER | LOP_EXCLUSIVE,
-	    file, line, NULL);
+	WITNESS_CHECKORDER(&m->lock_object, (opts & ~MTX_RECURSE) |
+	    LOP_NEWORDER | LOP_EXCLUSIVE, file, line, NULL);
 
 	__mtx_lock(m, curthread, opts, file, line);
 	LOCK_LOG_LOCK("LOCK", &m->lock_object, opts, m->mtx_recurse, file,
 	    line);
-	WITNESS_LOCK(&m->lock_object, opts | LOP_EXCLUSIVE, file, line);
+	WITNESS_LOCK(&m->lock_object, (opts & ~MTX_RECURSE) | LOP_EXCLUSIVE,
+	    file, line);
 	curthread->td_locks++;
 }
 
 void
-_mtx_unlock_flags(struct mtx *m, int opts, const char *file, int line)
+__mtx_unlock_flags(volatile uintptr_t *c, int opts, const char *file, int line)
 {
+	struct mtx *m;
 
 	if (SCHEDULER_STOPPED())
 		return;
+
+	m = mtxlock2mtx(c);
+
 	KASSERT(m->mtx_lock != MTX_DESTROYED,
 	    ("mtx_unlock() of destroyed mutex @ %s:%d", file, line));
 	KASSERT(LOCK_CLASS(&m->lock_object) == &lock_class_mtx_sleep,
 	    ("mtx_unlock() of spin mutex %s @ %s:%d", m->lock_object.lo_name,
 	    file, line));
-	curthread->td_locks--;
 	WITNESS_UNLOCK(&m->lock_object, opts | LOP_EXCLUSIVE, file, line);
 	LOCK_LOG_LOCK("UNLOCK", &m->lock_object, opts, m->mtx_recurse, file,
 	    line);
@@ -234,14 +285,20 @@
 	if (m->mtx_recurse == 0)
 		LOCKSTAT_PROFILE_RELEASE_LOCK(LS_MTX_UNLOCK_RELEASE, m);
 	__mtx_unlock(m, curthread, opts, file, line);
+	curthread->td_locks--;
 }
 
 void
-_mtx_lock_spin_flags(struct mtx *m, int opts, const char *file, int line)
+__mtx_lock_spin_flags(volatile uintptr_t *c, int opts, const char *file,
+    int line)
 {
+	struct mtx *m;
 
 	if (SCHEDULER_STOPPED())
 		return;
+
+	m = mtxlock2mtx(c);
+
 	KASSERT(m->mtx_lock != MTX_DESTROYED,
 	    ("mtx_lock_spin() of destroyed mutex @ %s:%d", file, line));
 	KASSERT(LOCK_CLASS(&m->lock_object) == &lock_class_mtx_spin,
@@ -248,9 +305,11 @@
 	    ("mtx_lock_spin() of sleep mutex %s @ %s:%d",
 	    m->lock_object.lo_name, file, line));
 	if (mtx_owned(m))
-		KASSERT((m->lock_object.lo_flags & LO_RECURSABLE) != 0,
+		KASSERT((m->lock_object.lo_flags & LO_RECURSABLE) != 0 ||
+		    (opts & MTX_RECURSE) != 0,
 	    ("mtx_lock_spin: recursed on non-recursive mutex %s @ %s:%d\n",
 		    m->lock_object.lo_name, file, line));
+	opts &= ~MTX_RECURSE;
 	WITNESS_CHECKORDER(&m->lock_object, opts | LOP_NEWORDER | LOP_EXCLUSIVE,
 	    file, line, NULL);
 	__mtx_lock_spin(m, curthread, opts, file, line);
@@ -259,12 +318,45 @@
 	WITNESS_LOCK(&m->lock_object, opts | LOP_EXCLUSIVE, file, line);
 }
 
+int
+__mtx_trylock_spin_flags(volatile uintptr_t *c, int opts, const char *file,
+    int line)
+{
+	struct mtx *m;
+
+	if (SCHEDULER_STOPPED())
+		return (1);
+
+	m = mtxlock2mtx(c);
+
+	KASSERT(m->mtx_lock != MTX_DESTROYED,
+	    ("mtx_trylock_spin() of destroyed mutex @ %s:%d", file, line));
+	KASSERT(LOCK_CLASS(&m->lock_object) == &lock_class_mtx_spin,
+	    ("mtx_trylock_spin() of sleep mutex %s @ %s:%d",
+	    m->lock_object.lo_name, file, line));
+	KASSERT((opts & MTX_RECURSE) == 0,
+	    ("mtx_trylock_spin: unsupp. opt MTX_RECURSE on mutex %s @ %s:%d\n",
+	    m->lock_object.lo_name, file, line));
+	if (__mtx_trylock_spin(m, curthread, opts, file, line)) {
+		LOCK_LOG_TRY("LOCK", &m->lock_object, opts, 1, file, line);
+		WITNESS_LOCK(&m->lock_object, opts | LOP_EXCLUSIVE, file, line);
+		return (1);
+	}
+	LOCK_LOG_TRY("LOCK", &m->lock_object, opts, 0, file, line);
+	return (0);
+}
+
 void
-_mtx_unlock_spin_flags(struct mtx *m, int opts, const char *file, int line)
+__mtx_unlock_spin_flags(volatile uintptr_t *c, int opts, const char *file,
+    int line)
 {
+	struct mtx *m;
 
 	if (SCHEDULER_STOPPED())
 		return;
+
+	m = mtxlock2mtx(c);
+
 	KASSERT(m->mtx_lock != MTX_DESTROYED,
 	    ("mtx_unlock_spin() of destroyed mutex @ %s:%d", file, line));
 	KASSERT(LOCK_CLASS(&m->lock_object) == &lock_class_mtx_spin,
@@ -284,8 +376,9 @@
  * is already owned, it will recursively acquire the lock.
  */
 int
-_mtx_trylock(struct mtx *m, int opts, const char *file, int line)
+_mtx_trylock_flags_(volatile uintptr_t *c, int opts, const char *file, int line)
 {
+	struct mtx *m;
 #ifdef LOCK_PROFILING
 	uint64_t waittime = 0;
 	int contested = 0;
@@ -295,6 +388,11 @@
 	if (SCHEDULER_STOPPED())
 		return (1);
 
+	m = mtxlock2mtx(c);
+
+	KASSERT(kdb_active != 0 || !TD_IS_IDLETHREAD(curthread),
+	    ("mtx_trylock() by idle thread %p on sleep mutex %s @ %s:%d",
+	    curthread, m->lock_object.lo_name, file, line));
 	KASSERT(m->mtx_lock != MTX_DESTROYED,
 	    ("mtx_trylock() of destroyed mutex @ %s:%d", file, line));
 	KASSERT(LOCK_CLASS(&m->lock_object) == &lock_class_mtx_sleep,
@@ -301,12 +399,14 @@
 	    ("mtx_trylock() of spin mutex %s @ %s:%d", m->lock_object.lo_name,
 	    file, line));
 
-	if (mtx_owned(m) && (m->lock_object.lo_flags & LO_RECURSABLE) != 0) {
+	if (mtx_owned(m) && ((m->lock_object.lo_flags & LO_RECURSABLE) != 0 ||
+	    (opts & MTX_RECURSE) != 0)) {
 		m->mtx_recurse++;
 		atomic_set_ptr(&m->mtx_lock, MTX_RECURSED);
 		rval = 1;
 	} else
 		rval = _mtx_obtain_lock(m, (uintptr_t)curthread);
+	opts &= ~MTX_RECURSE;
 
 	LOCK_LOG_TRY("LOCK", &m->lock_object, opts, rval, file, line);
 	if (rval) {
@@ -323,15 +423,16 @@
 }
 
 /*
- * _mtx_lock_sleep: the tougher part of acquiring an MTX_DEF lock.
+ * __mtx_lock_sleep: the tougher part of acquiring an MTX_DEF lock.
  *
  * We call this if the lock is either contested (i.e. we need to go to
  * sleep waiting for it), or if we need to recurse on it.
  */
 void
-_mtx_lock_sleep(struct mtx *m, uintptr_t tid, int opts, const char *file,
-    int line)
+__mtx_lock_sleep(volatile uintptr_t *c, uintptr_t tid, int opts,
+    const char *file, int line)
 {
+	struct mtx *m;
 	struct turnstile *ts;
 	uintptr_t v;
 #ifdef ADAPTIVE_MUTEXES
@@ -344,19 +445,31 @@
 	int contested = 0;
 	uint64_t waittime = 0;
 #endif
+#if defined(ADAPTIVE_MUTEXES) || defined(KDTRACE_HOOKS)
+	struct lock_delay_arg lda;
+#endif
 #ifdef KDTRACE_HOOKS
-	uint64_t spin_cnt = 0;
-	uint64_t sleep_cnt = 0;
+	u_int sleep_cnt = 0;
 	int64_t sleep_time = 0;
+	int64_t all_time = 0;
 #endif
 
 	if (SCHEDULER_STOPPED())
 		return;
 
+#if defined(ADAPTIVE_MUTEXES)
+	lock_delay_arg_init(&lda, &mtx_delay);
+#elif defined(KDTRACE_HOOKS)
+	lock_delay_arg_init(&lda, NULL);
+#endif
+	m = mtxlock2mtx(c);
+
 	if (mtx_owned(m)) {
-		KASSERT((m->lock_object.lo_flags & LO_RECURSABLE) != 0,
+		KASSERT((m->lock_object.lo_flags & LO_RECURSABLE) != 0 ||
+		    (opts & MTX_RECURSE) != 0,
 	    ("_mtx_lock_sleep: recursed on non-recursive mutex %s @ %s:%d\n",
 		    m->lock_object.lo_name, file, line));
+		opts &= ~MTX_RECURSE;
 		m->mtx_recurse++;
 		atomic_set_ptr(&m->mtx_lock, MTX_RECURSED);
 		if (LOCK_LOG_TEST(&m->lock_object, opts))
@@ -363,6 +476,7 @@
 			CTR1(KTR_LOCK, "_mtx_lock_sleep: %p recursing", m);
 		return;
 	}
+	opts &= ~MTX_RECURSE;
 
 #ifdef HWPMC_HOOKS
 	PMC_SOFT_CALL( , , lock, failed);
@@ -373,10 +487,15 @@
 		CTR4(KTR_LOCK,
 		    "_mtx_lock_sleep: %s contested (lock=%p) at %s:%d",
 		    m->lock_object.lo_name, (void *)m->mtx_lock, file, line);
+#ifdef KDTRACE_HOOKS
+	all_time -= lockstat_nsecs(&m->lock_object);
+#endif
 
-	while (!_mtx_obtain_lock(m, tid)) {
+	for (;;) {
+		if (m->mtx_lock == MTX_UNOWNED && _mtx_obtain_lock(m, tid))
+			break;
 #ifdef KDTRACE_HOOKS
-		spin_cnt++;
+		lda.spin_cnt++;
 #endif
 #ifdef ADAPTIVE_MUTEXES
 		/*
@@ -391,13 +510,16 @@
 					CTR3(KTR_LOCK,
 					    "%s: spinning on %p held by %p",
 					    __func__, m, owner);
+				KTR_STATE1(KTR_SCHED, "thread",
+				    sched_tdname((struct thread *)tid),
+				    "spinning", "lockname:\"%s\"",
+				    m->lock_object.lo_name);
 				while (mtx_owner(m) == owner &&
-				    TD_IS_RUNNING(owner)) {
-					cpu_spinwait();
-#ifdef KDTRACE_HOOKS
-					spin_cnt++;
-#endif
-				}
+				    TD_IS_RUNNING(owner))
+					lock_delay(&lda);
+				KTR_STATE0(KTR_SCHED, "thread",
+				    sched_tdname((struct thread *)tid),
+				    "running");
 				continue;
 			}
 		}
@@ -461,14 +583,17 @@
 		 * Block on the turnstile.
 		 */
 #ifdef KDTRACE_HOOKS
-		sleep_time -= lockstat_nsecs();
+		sleep_time -= lockstat_nsecs(&m->lock_object);
 #endif
 		turnstile_wait(ts, mtx_owner(m), TS_EXCLUSIVE_QUEUE);
 #ifdef KDTRACE_HOOKS
-		sleep_time += lockstat_nsecs();
+		sleep_time += lockstat_nsecs(&m->lock_object);
 		sleep_cnt++;
 #endif
 	}
+#ifdef KDTRACE_HOOKS
+	all_time += lockstat_nsecs(&m->lock_object);
+#endif
 #ifdef KTR
 	if (cont_logged) {
 		CTR4(KTR_CONTENTION,
@@ -485,8 +610,8 @@
 	/*
 	 * Only record the loops spinning and not sleeping. 
 	 */
-	if (spin_cnt > sleep_cnt)
-		LOCKSTAT_RECORD1(LS_MTX_LOCK_SPIN, m, (spin_cnt - sleep_cnt));
+	if (lda.spin_cnt > sleep_cnt)
+		LOCKSTAT_RECORD1(LS_MTX_LOCK_SPIN, m, (all_time - sleep_time));
 #endif
 }
 
@@ -511,33 +636,45 @@
 
 #ifdef SMP
 /*
- * _mtx_lock_spin: the tougher part of acquiring an MTX_SPIN lock.
+ * _mtx_lock_spin_cookie: the tougher part of acquiring an MTX_SPIN lock.
  *
  * This is only called if we need to actually spin for the lock. Recursion
  * is handled inline.
  */
 void
-_mtx_lock_spin(struct mtx *m, uintptr_t tid, int opts, const char *file,
-    int line)
+_mtx_lock_spin_cookie(volatile uintptr_t *c, uintptr_t tid, int opts,
+    const char *file, int line)
 {
+	struct mtx *m;
 	int i = 0;
 #ifdef LOCK_PROFILING
 	int contested = 0;
 	uint64_t waittime = 0;
 #endif
+#ifdef KDTRACE_HOOKS
+	int64_t spin_time = 0;
+#endif
 
 	if (SCHEDULER_STOPPED())
 		return;
 
+	m = mtxlock2mtx(c);
+
 	if (LOCK_LOG_TEST(&m->lock_object, opts))
 		CTR1(KTR_LOCK, "_mtx_lock_spin: %p spinning", m);
+	KTR_STATE1(KTR_SCHED, "thread", sched_tdname((struct thread *)tid),
+	    "spinning", "lockname:\"%s\"", m->lock_object.lo_name);
 
 #ifdef HWPMC_HOOKS
 	PMC_SOFT_CALL( , , lock, failed);
 #endif
 	lock_profile_obtain_lock_failed(&m->lock_object, &contested, &waittime);
-	while (!_mtx_obtain_lock(m, tid)) {
-
+#ifdef KDTRACE_HOOKS
+	spin_time -= lockstat_nsecs(&m->lock_object);
+#endif
+	for (;;) {
+		if (m->mtx_lock == MTX_UNOWNED && _mtx_obtain_lock(m, tid))
+			break;
 		/* Give interrupts a chance while we spin. */
 		spinlock_exit();
 		while (m->mtx_lock != MTX_UNOWNED) {
@@ -553,18 +690,26 @@
 		}
 		spinlock_enter();
 	}
+#ifdef KDTRACE_HOOKS
+	spin_time += lockstat_nsecs(&m->lock_object);
+#endif
 
 	if (LOCK_LOG_TEST(&m->lock_object, opts))
 		CTR1(KTR_LOCK, "_mtx_lock_spin: %p spin done", m);
+	KTR_STATE0(KTR_SCHED, "thread", sched_tdname((struct thread *)tid),
+	    "running");
 
 	LOCKSTAT_PROFILE_OBTAIN_LOCK_SUCCESS(LS_MTX_SPIN_LOCK_ACQUIRE, m,
 	    contested, waittime, (file), (line));
-	LOCKSTAT_RECORD1(LS_MTX_SPIN_LOCK_SPIN, m, i);
+#ifdef KDTRACE_HOOKS
+	if (spin_time != 0)
+		LOCKSTAT_RECORD1(LS_MTX_SPIN_LOCK_SPIN, m, spin_time);
+#endif
 }
 #endif /* SMP */
 
 void
-_thread_lock_flags(struct thread *td, int opts, const char *file, int line)
+thread_lock_flags_(struct thread *td, int opts, const char *file, int line)
 {
 	struct mtx *m;
 	uintptr_t tid;
@@ -574,15 +719,25 @@
 	uint64_t waittime = 0;
 #endif
 #ifdef KDTRACE_HOOKS
-	uint64_t spin_cnt = 0;
+	int64_t spin_time = 0;
 #endif
 
 	i = 0;
 	tid = (uintptr_t)curthread;
 
-	if (SCHEDULER_STOPPED())
+	if (SCHEDULER_STOPPED()) {
+		/*
+		 * Ensure that spinlock sections are balanced even when the
+		 * scheduler is stopped, since we may otherwise inadvertently
+		 * re-enable interrupts while dumping core.
+		 */
+		spinlock_enter();
 		return;
+	}
 
+#ifdef KDTRACE_HOOKS
+	spin_time -= lockstat_nsecs(&td->td_lock->lock_object);
+#endif
 	for (;;) {
 retry:
 		spinlock_enter();
@@ -598,10 +753,9 @@
 			    m->lock_object.lo_name, file, line));
 		WITNESS_CHECKORDER(&m->lock_object,
 		    opts | LOP_NEWORDER | LOP_EXCLUSIVE, file, line, NULL);
-		while (!_mtx_obtain_lock(m, tid)) {
-#ifdef KDTRACE_HOOKS
-			spin_cnt++;
-#endif
+		for (;;) {
+			if (m->mtx_lock == MTX_UNOWNED && _mtx_obtain_lock(m, tid))
+				break;
 			if (m->mtx_lock == tid) {
 				m->mtx_recurse++;
 				break;
@@ -630,10 +784,10 @@
 		if (m == td->td_lock)
 			break;
 		__mtx_unlock_spin(m);	/* does spinlock_exit() */
+	}
 #ifdef KDTRACE_HOOKS
-		spin_cnt++;
+	spin_time += lockstat_nsecs(&m->lock_object);
 #endif
-	}
 	if (m->mtx_recurse == 0)
 		LOCKSTAT_PROFILE_OBTAIN_LOCK_SUCCESS(LS_MTX_SPIN_LOCK_ACQUIRE,
 		    m, contested, waittime, (file), (line));
@@ -640,7 +794,7 @@
 	LOCK_LOG_LOCK("LOCK", &m->lock_object, opts, m->mtx_recurse, file,
 	    line);
 	WITNESS_LOCK(&m->lock_object, opts | LOP_EXCLUSIVE, file, line);
-	LOCKSTAT_RECORD1(LS_THREAD_LOCK_SPIN, m, spin_cnt);
+	LOCKSTAT_RECORD1(LS_THREAD_LOCK_SPIN, m, spin_time);
 }
 
 struct mtx *
@@ -677,19 +831,22 @@
 }
 
 /*
- * _mtx_unlock_sleep: the tougher part of releasing an MTX_DEF lock.
+ * __mtx_unlock_sleep: the tougher part of releasing an MTX_DEF lock.
  *
  * We are only called here if the lock is recursed or contested (i.e. we
  * need to wake up a blocked thread).
  */
 void
-_mtx_unlock_sleep(struct mtx *m, int opts, const char *file, int line)
+__mtx_unlock_sleep(volatile uintptr_t *c, int opts, const char *file, int line)
 {
+	struct mtx *m;
 	struct turnstile *ts;
 
 	if (SCHEDULER_STOPPED())
 		return;
 
+	m = mtxlock2mtx(c);
+
 	if (mtx_recursed(m)) {
 		if (--(m->mtx_recurse) == 0)
 			atomic_clear_ptr(&m->mtx_lock, MTX_RECURSED);
@@ -728,11 +885,15 @@
  */
 #ifdef INVARIANT_SUPPORT
 void
-_mtx_assert(struct mtx *m, int what, const char *file, int line)
+__mtx_assert(const volatile uintptr_t *c, int what, const char *file, int line)
 {
+	const struct mtx *m;
 
-	if (panicstr != NULL || dumping)
+	if (panicstr != NULL || dumping || SCHEDULER_STOPPED())
 		return;
+
+	m = mtxlock2mtx(c);
+
 	switch (what) {
 	case MA_OWNED:
 	case MA_OWNED | MA_RECURSED:
@@ -799,7 +960,8 @@
 {
 	struct mtx_args *margs = arg;
 
-	mtx_init(margs->ma_mtx, margs->ma_desc, NULL, margs->ma_opts);
+	mtx_init((struct mtx *)margs->ma_mtx, margs->ma_desc, NULL,
+	    margs->ma_opts);
 }
 
 /*
@@ -809,13 +971,16 @@
  * witness.
  */
 void
-mtx_init(struct mtx *m, const char *name, const char *type, int opts)
+_mtx_init(volatile uintptr_t *c, const char *name, const char *type, int opts)
 {
+	struct mtx *m;
 	struct lock_class *class;
 	int flags;
 
+	m = mtxlock2mtx(c);
+
 	MPASS((opts & ~(MTX_SPIN | MTX_QUIET | MTX_RECURSE |
-		MTX_NOWITNESS | MTX_DUPOK | MTX_NOPROFILE)) == 0);
+	    MTX_NOWITNESS | MTX_DUPOK | MTX_NOPROFILE | MTX_NEW)) == 0);
 	ASSERT_ATOMIC_LOAD_PTR(m->mtx_lock,
 	    ("%s: mtx_lock not aligned for %s: %p", __func__, name,
 	    &m->mtx_lock));
@@ -841,12 +1006,14 @@
 		flags |= LO_DUPOK;
 	if (opts & MTX_NOPROFILE)
 		flags |= LO_NOPROFILE;
+	if (opts & MTX_NEW)
+		flags |= LO_NEW;
 
 	/* Initialize mutex. */
+	lock_init(&m->lock_object, class, name, type, flags);
+
 	m->mtx_lock = MTX_UNOWNED;
 	m->mtx_recurse = 0;
-
-	lock_init(&m->lock_object, class, name, type, flags);
 }
 
 /*
@@ -856,9 +1023,12 @@
  * flags.
  */
 void
-mtx_destroy(struct mtx *m)
+_mtx_destroy(volatile uintptr_t *c)
 {
+	struct mtx *m;
 
+	m = mtxlock2mtx(c);
+
 	if (!mtx_owned(m))
 		MPASS(mtx_unowned(m));
 	else {
@@ -906,12 +1076,12 @@
 
 #ifdef DDB
 void
-db_show_mtx(struct lock_object *lock)
+db_show_mtx(const struct lock_object *lock)
 {
 	struct thread *td;
-	struct mtx *m;
+	const struct mtx *m;
 
-	m = (struct mtx *)lock;
+	m = (const struct mtx *)lock;
 
 	db_printf(" flags: {");
 	if (LOCK_CLASS(lock) == &lock_class_mtx_spin)

Modified: trunk/sys/kern/kern_ntptime.c
===================================================================
--- trunk/sys/kern/kern_ntptime.c	2018-05-25 20:46:51 UTC (rev 9944)
+++ trunk/sys/kern/kern_ntptime.c	2018-05-25 20:53:39 UTC (rev 9945)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  ***********************************************************************
  *								       *
@@ -31,7 +32,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$MidnightBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/kern/kern_ntptime.c 285611 2015-07-15 19:11:43Z delphij $");
 
 #include "opt_ntp.h"
 
@@ -148,14 +149,14 @@
 #define SHIFT_FLL	2		/* FLL loop gain (shift) */
 
 static int time_state = TIME_OK;	/* clock state */
-static int time_status = STA_UNSYNC;	/* clock status bits */
+int time_status = STA_UNSYNC;	/* clock status bits */
 static long time_tai;			/* TAI offset (s) */
 static long time_monitor;		/* last time offset scaled (ns) */
 static long time_constant;		/* poll interval (shift) (s) */
 static long time_precision = 1;		/* clock precision (ns) */
 static long time_maxerror = MAXPHASE / 1000; /* maximum error (us) */
-static long time_esterror = MAXPHASE / 1000; /* estimated error (us) */
-static long time_reftime;		/* time at last adjustment (s) */
+long time_esterror = MAXPHASE / 1000; /* estimated error (us) */
+static long time_reftime;		/* uptime at last adjustment (s) */
 static l_fp time_offset;		/* time offset (ns) */
 static l_fp time_freq;			/* frequency offset (ns/s) */
 static l_fp time_adj;			/* tick adjust (ns/s) */
@@ -301,13 +302,17 @@
 	0, sizeof(struct ntptimeval) , ntp_sysctl, "S,ntptimeval", "");
 
 #ifdef PPS_SYNC
-SYSCTL_INT(_kern_ntp_pll, OID_AUTO, pps_shiftmax, CTLFLAG_RW, &pps_shiftmax, 0, "");
-SYSCTL_INT(_kern_ntp_pll, OID_AUTO, pps_shift, CTLFLAG_RW, &pps_shift, 0, "");
+SYSCTL_INT(_kern_ntp_pll, OID_AUTO, pps_shiftmax, CTLFLAG_RW,
+    &pps_shiftmax, 0, "Max interval duration (sec) (shift)");
+SYSCTL_INT(_kern_ntp_pll, OID_AUTO, pps_shift, CTLFLAG_RW,
+    &pps_shift, 0, "Interval duration (sec) (shift)");
 SYSCTL_LONG(_kern_ntp_pll, OID_AUTO, time_monitor, CTLFLAG_RD,
-    &time_monitor, 0, "");
+    &time_monitor, 0, "Last time offset scaled (ns)");
 
-SYSCTL_OPAQUE(_kern_ntp_pll, OID_AUTO, pps_freq, CTLFLAG_RD, &pps_freq, sizeof(pps_freq), "I", "");
-SYSCTL_OPAQUE(_kern_ntp_pll, OID_AUTO, time_freq, CTLFLAG_RD, &time_freq, sizeof(time_freq), "I", "");
+SYSCTL_OPAQUE(_kern_ntp_pll, OID_AUTO, pps_freq, CTLFLAG_RD,
+    &pps_freq, sizeof(pps_freq), "I", "Scaled frequency offset (ns/sec)");
+SYSCTL_OPAQUE(_kern_ntp_pll, OID_AUTO, time_freq, CTLFLAG_RD,
+    &time_freq, sizeof(time_freq), "I", "Frequency offset (ns/sec)");
 #endif
 
 /*
@@ -692,12 +697,12 @@
 	 * otherwise, the argument offset is used to compute it.
 	 */
 	if (time_status & STA_PPSFREQ && time_status & STA_PPSSIGNAL) {
-		time_reftime = time_second;
+		time_reftime = time_uptime;
 		return;
 	}
 	if (time_status & STA_FREQHOLD || time_reftime == 0)
-		time_reftime = time_second;
-	mtemp = time_second - time_reftime;
+		time_reftime = time_uptime;
+	mtemp = time_uptime - time_reftime;
 	L_LINT(ftemp, time_monitor);
 	L_RSHIFT(ftemp, (SHIFT_PLL + 2 + time_constant) << 1);
 	L_MPY(ftemp, mtemp);
@@ -710,7 +715,7 @@
 		L_ADD(time_freq, ftemp);
 		time_status |= STA_MODE;
 	}
-	time_reftime = time_second;
+	time_reftime = time_uptime;
 	if (L_GINT(time_freq) > MAXFREQ)
 		L_LINT(time_freq, MAXFREQ);
 	else if (L_GINT(time_freq) < -MAXFREQ)
@@ -828,8 +833,15 @@
 	 * discarded. otherwise, if so enabled, the time offset is
 	 * updated. We can tolerate a modest loss of data here without
 	 * much degrading time accuracy.
-	 */
-	if (u_nsec > (pps_jitter << PPS_POPCORN)) {
+	 *
+	 * The measurements being checked here were made with the system
+	 * timecounter, so the popcorn threshold is not allowed to fall below
+	 * the number of nanoseconds in two ticks of the timecounter.  For a
+	 * timecounter running faster than 1 GHz the lower bound is 2ns, just
+	 * to avoid a nonsensical threshold of zero.
+	*/
+	if (u_nsec > lmax(pps_jitter << PPS_POPCORN, 
+	    2 * (NANOSECOND / (long)qmin(NANOSECOND, tc_getfrequency())))) {
 		time_status |= STA_PPSJITTER;
 		pps_jitcnt++;
 	} else if (time_status & STA_PPSTIME) {
@@ -1040,5 +1052,5 @@
 	    periodic_resettodr, NULL);
 }
 
-SYSINIT(periodic_resettodr, SI_SUB_RUN_SCHEDULER, SI_ORDER_MIDDLE,
+SYSINIT(periodic_resettodr, SI_SUB_LAST, SI_ORDER_MIDDLE,
 	start_periodic_resettodr, NULL);

Modified: trunk/sys/kern/kern_osd.c
===================================================================
--- trunk/sys/kern/kern_osd.c	2018-05-25 20:46:51 UTC (rev 9944)
+++ trunk/sys/kern/kern_osd.c	2018-05-25 20:53:39 UTC (rev 9945)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * Copyright (c) 2007 Pawel Jakub Dawidek <pjd at FreeBSD.org>
  * All rights reserved.
@@ -25,7 +26,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$MidnightBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/kern/kern_osd.c 298834 2016-04-30 04:01:22Z jamie $");
 
 #include <sys/param.h>
 #include <sys/kernel.h>
@@ -44,6 +45,23 @@
 
 /* OSD (Object Specific Data) */
 
+/*
+ * Lock key:
+ *  (m) osd_module_lock
+ *  (o) osd_object_lock
+ *  (l) osd_list_lock
+ */
+struct osd_master {
+	struct sx		 osd_module_lock;
+	struct rmlock		 osd_object_lock;
+	struct mtx		 osd_list_lock;
+	LIST_HEAD(, osd)	 osd_list;		/* (l) */
+	osd_destructor_t	*osd_destructors;	/* (o) */
+	osd_method_t		*osd_methods;		/* (m) */
+	u_int			 osd_ntslots;		/* (m) */
+	const u_int		 osd_nmethods;
+};
+
 static MALLOC_DEFINE(M_OSD, "osd", "Object Specific Data");
 
 static int osd_debug = 0;
@@ -62,25 +80,12 @@
     int list_locked);
 
 /*
- * Lists of objects with OSD.
- *
- * Lock key:
- *  (m) osd_module_lock
- *  (o) osd_object_lock
- *  (l) osd_list_lock
+ * List of objects with OSD.
  */
-static LIST_HEAD(, osd)	osd_list[OSD_LAST + 1];		/* (m) */
-static osd_method_t *osd_methods[OSD_LAST + 1];		/* (m) */
-static u_int osd_nslots[OSD_LAST + 1];			/* (m) */
-static osd_destructor_t *osd_destructors[OSD_LAST + 1];	/* (o) */
-static const u_int osd_nmethods[OSD_LAST + 1] = {
-	[OSD_JAIL] = PR_MAXMETHOD,
+struct osd_master osdm[OSD_LAST + 1] = {
+	[OSD_JAIL] = { .osd_nmethods = PR_MAXMETHOD },
 };
 
-static struct sx osd_module_lock[OSD_LAST + 1];
-static struct rmlock osd_object_lock[OSD_LAST + 1];
-static struct mtx osd_list_lock[OSD_LAST + 1];
-
 static void
 osd_default_destructor(void *value __unused)
 {
@@ -102,12 +107,12 @@
 	if (destructor == NULL)
 		destructor = osd_default_destructor;
 
-	sx_xlock(&osd_module_lock[type]);
+	sx_xlock(&osdm[type].osd_module_lock);
 	/*
 	 * First, we try to find unused slot.
 	 */
-	for (i = 0; i < osd_nslots[type]; i++) {
-		if (osd_destructors[type][i] == NULL) {
+	for (i = 0; i < osdm[type].osd_ntslots; i++) {
+		if (osdm[type].osd_destructors[i] == NULL) {
 			OSD_DEBUG("Unused slot found (type=%u, slot=%u).",
 			    type, i);
 			break;
@@ -116,31 +121,31 @@
 	/*
 	 * If no unused slot was found, allocate one.
 	 */
-	if (i == osd_nslots[type]) {
-		osd_nslots[type]++;
-		if (osd_nmethods[type] != 0)
-			osd_methods[type] = realloc(osd_methods[type],
-			    sizeof(osd_method_t) * osd_nslots[type] *
-			    osd_nmethods[type], M_OSD, M_WAITOK);
-		newptr = malloc(sizeof(osd_destructor_t) * osd_nslots[type],
-		    M_OSD, M_WAITOK);
-		rm_wlock(&osd_object_lock[type]);
-		bcopy(osd_destructors[type], newptr,
+	if (i == osdm[type].osd_ntslots) {
+		osdm[type].osd_ntslots++;
+		if (osdm[type].osd_nmethods != 0)
+			osdm[type].osd_methods = realloc(osdm[type].osd_methods,
+			    sizeof(osd_method_t) * osdm[type].osd_ntslots *
+			    osdm[type].osd_nmethods, M_OSD, M_WAITOK);
+		newptr = malloc(sizeof(osd_destructor_t) *
+		    osdm[type].osd_ntslots, M_OSD, M_WAITOK);
+		rm_wlock(&osdm[type].osd_object_lock);
+		bcopy(osdm[type].osd_destructors, newptr,
 		    sizeof(osd_destructor_t) * i);
-		free(osd_destructors[type], M_OSD);
-		osd_destructors[type] = newptr;
-		rm_wunlock(&osd_object_lock[type]);
+		free(osdm[type].osd_destructors, M_OSD);
+		osdm[type].osd_destructors = newptr;
+		rm_wunlock(&osdm[type].osd_object_lock);
 		OSD_DEBUG("New slot allocated (type=%u, slot=%u).",
 		    type, i + 1);
 	}
 
-	osd_destructors[type][i] = destructor;
-	if (osd_nmethods[type] != 0) {
-		for (m = 0; m < osd_nmethods[type]; m++)
-			osd_methods[type][i * osd_nmethods[type] + m] =
-			    methods != NULL ? methods[m] : NULL;
+	osdm[type].osd_destructors[i] = destructor;
+	if (osdm[type].osd_nmethods != 0) {
+		for (m = 0; m < osdm[type].osd_nmethods; m++)
+			osdm[type].osd_methods[i * osdm[type].osd_nmethods + m]
+			    = methods != NULL ? methods[m] : NULL;
 	}
-	sx_xunlock(&osd_module_lock[type]);
+	sx_xunlock(&osdm[type].osd_module_lock);
 	return (i + 1);
 }
 
@@ -151,37 +156,37 @@
 
 	KASSERT(type >= OSD_FIRST && type <= OSD_LAST, ("Invalid type."));
 	KASSERT(slot > 0, ("Invalid slot."));
-	KASSERT(osd_destructors[type][slot - 1] != NULL, ("Unused slot."));
+	KASSERT(osdm[type].osd_destructors[slot - 1] != NULL, ("Unused slot."));
 
-	sx_xlock(&osd_module_lock[type]);
-	rm_wlock(&osd_object_lock[type]);
+	sx_xlock(&osdm[type].osd_module_lock);
+	rm_wlock(&osdm[type].osd_object_lock);
 	/*
 	 * Free all OSD for the given slot.
 	 */
-	mtx_lock(&osd_list_lock[type]);
-	LIST_FOREACH_SAFE(osd, &osd_list[type], osd_next, tosd)
+	mtx_lock(&osdm[type].osd_list_lock);
+	LIST_FOREACH_SAFE(osd, &osdm[type].osd_list, osd_next, tosd)
 		do_osd_del(type, osd, slot, 1);
-	mtx_unlock(&osd_list_lock[type]);
+	mtx_unlock(&osdm[type].osd_list_lock);
 	/*
 	 * Set destructor to NULL to free the slot.
 	 */
-	osd_destructors[type][slot - 1] = NULL;
-	if (slot == osd_nslots[type]) {
-		osd_nslots[type]--;
-		osd_destructors[type] = realloc(osd_destructors[type],
-		    sizeof(osd_destructor_t) * osd_nslots[type], M_OSD,
+	osdm[type].osd_destructors[slot - 1] = NULL;
+	if (slot == osdm[type].osd_ntslots) {
+		osdm[type].osd_ntslots--;
+		osdm[type].osd_destructors = realloc(osdm[type].osd_destructors,
+		    sizeof(osd_destructor_t) * osdm[type].osd_ntslots, M_OSD,
 		    M_NOWAIT | M_ZERO);
-		if (osd_nmethods[type] != 0)
-			osd_methods[type] = realloc(osd_methods[type],
-			    sizeof(osd_method_t) * osd_nslots[type] *
-			    osd_nmethods[type], M_OSD, M_NOWAIT | M_ZERO);
+		if (osdm[type].osd_nmethods != 0)
+			osdm[type].osd_methods = realloc(osdm[type].osd_methods,
+			    sizeof(osd_method_t) * osdm[type].osd_ntslots *
+			    osdm[type].osd_nmethods, M_OSD, M_NOWAIT | M_ZERO);
 		/*
 		 * We always reallocate to smaller size, so we assume it will
 		 * always succeed.
 		 */
-		KASSERT(osd_destructors[type] != NULL &&
-		    (osd_nmethods[type] == 0 || osd_methods[type] != NULL),
-		    ("realloc() failed"));
+		KASSERT(osdm[type].osd_destructors != NULL &&
+		    (osdm[type].osd_nmethods == 0 ||
+		     osdm[type].osd_methods != NULL), ("realloc() failed"));
 		OSD_DEBUG("Deregistration of the last slot (type=%u, slot=%u).",
 		    type, slot);
 	} else {
@@ -188,68 +193,105 @@
 		OSD_DEBUG("Slot deregistration (type=%u, slot=%u).",
 		    type, slot);
 	}
-	rm_wunlock(&osd_object_lock[type]);
-	sx_xunlock(&osd_module_lock[type]);
+	rm_wunlock(&osdm[type].osd_object_lock);
+	sx_xunlock(&osdm[type].osd_module_lock);
 }
 
 int
 osd_set(u_int type, struct osd *osd, u_int slot, void *value)
 {
+
+	return (osd_set_reserved(type, osd, slot, NULL, value));
+}
+
+void *
+osd_reserve(u_int slot)
+{
+
+	KASSERT(slot > 0, ("Invalid slot."));
+
+	OSD_DEBUG("Reserving slot array (slot=%u).", slot);
+	return (malloc(sizeof(void *) * slot, M_OSD, M_WAITOK | M_ZERO));
+}
+
+int
+osd_set_reserved(u_int type, struct osd *osd, u_int slot, void *rsv,
+    void *value)
+{
 	struct rm_priotracker tracker;
 
 	KASSERT(type >= OSD_FIRST && type <= OSD_LAST, ("Invalid type."));
 	KASSERT(slot > 0, ("Invalid slot."));
-	KASSERT(osd_destructors[type][slot - 1] != NULL, ("Unused slot."));
+	KASSERT(osdm[type].osd_destructors[slot - 1] != NULL, ("Unused slot."));
 
-	rm_rlock(&osd_object_lock[type], &tracker);
+	rm_rlock(&osdm[type].osd_object_lock, &tracker);
 	if (slot > osd->osd_nslots) {
+		void *newptr;
+
 		if (value == NULL) {
 			OSD_DEBUG(
 			    "Not allocating null slot (type=%u, slot=%u).",
 			    type, slot);
-			rm_runlock(&osd_object_lock[type], &tracker);
+			rm_runlock(&osdm[type].osd_object_lock, &tracker);
+			if (rsv)
+				osd_free_reserved(rsv);
 			return (0);
-		} else if (osd->osd_nslots == 0) {
+		}
+
+		/*
+		 * Too few slots allocated here, so we need to extend or create
+		 * the array.
+		 */
+		if (rsv) {
 			/*
-			 * First OSD for this object, so we need to allocate
-			 * space and put it onto the list.
+			 * Use the reserve passed in (assumed to be
+			 * the right size).
 			 */
-			osd->osd_slots = malloc(sizeof(void *) * slot, M_OSD,
-			    M_NOWAIT | M_ZERO);
-			if (osd->osd_slots == NULL) {
-				rm_runlock(&osd_object_lock[type], &tracker);
-				return (ENOMEM);
+			newptr = rsv;
+			if (osd->osd_nslots != 0) {
+				memcpy(newptr, osd->osd_slots,
+				    sizeof(void *) * osd->osd_nslots);
+				free(osd->osd_slots, M_OSD);
 			}
-			osd->osd_nslots = slot;
-			mtx_lock(&osd_list_lock[type]);
-			LIST_INSERT_HEAD(&osd_list[type], osd, osd_next);
-			mtx_unlock(&osd_list_lock[type]);
-			OSD_DEBUG("Setting first slot (type=%u).", type);
 		} else {
-			void *newptr;
-
-			/*
-			 * Too few slots allocated here, needs to extend
-			 * the array.
-			 */
 			newptr = realloc(osd->osd_slots, sizeof(void *) * slot,
 			    M_OSD, M_NOWAIT | M_ZERO);
 			if (newptr == NULL) {
-				rm_runlock(&osd_object_lock[type], &tracker);
+				rm_runlock(&osdm[type].osd_object_lock,
+				    &tracker);
 				return (ENOMEM);
 			}
-			osd->osd_slots = newptr;
-			osd->osd_nslots = slot;
+		}
+		if (osd->osd_nslots == 0) {
+			/*
+			 * First OSD for this object, so we need to put it
+			 * onto the list.
+			 */
+			mtx_lock(&osdm[type].osd_list_lock);
+			LIST_INSERT_HEAD(&osdm[type].osd_list, osd, osd_next);
+			mtx_unlock(&osdm[type].osd_list_lock);
+			OSD_DEBUG("Setting first slot (type=%u).", type);
+		} else
 			OSD_DEBUG("Growing slots array (type=%u).", type);
-		}
-	}
+		osd->osd_slots = newptr;
+		osd->osd_nslots = slot;
+	} else if (rsv)
+		osd_free_reserved(rsv);
 	OSD_DEBUG("Setting slot value (type=%u, slot=%u, value=%p).", type,
 	    slot, value);
 	osd->osd_slots[slot - 1] = value;
-	rm_runlock(&osd_object_lock[type], &tracker);
+	rm_runlock(&osdm[type].osd_object_lock, &tracker);
 	return (0);
 }
 
+void
+osd_free_reserved(void *rsv)
+{
+
+	OSD_DEBUG("Discarding reserved slot array.");
+	free(rsv, M_OSD);
+}
+
 void *
 osd_get(u_int type, struct osd *osd, u_int slot)
 {
@@ -258,9 +300,9 @@
 
 	KASSERT(type >= OSD_FIRST && type <= OSD_LAST, ("Invalid type."));
 	KASSERT(slot > 0, ("Invalid slot."));
-	KASSERT(osd_destructors[type][slot - 1] != NULL, ("Unused slot."));
+	KASSERT(osdm[type].osd_destructors[slot - 1] != NULL, ("Unused slot."));
 
-	rm_rlock(&osd_object_lock[type], &tracker);
+	rm_rlock(&osdm[type].osd_object_lock, &tracker);
 	if (slot > osd->osd_nslots) {
 		value = NULL;
 		OSD_DEBUG("Slot doesn't exist (type=%u, slot=%u).", type, slot);
@@ -269,7 +311,7 @@
 		OSD_DEBUG("Returning slot value (type=%u, slot=%u, value=%p).",
 		    type, slot, value);
 	}
-	rm_runlock(&osd_object_lock[type], &tracker);
+	rm_runlock(&osdm[type].osd_object_lock, &tracker);
 	return (value);
 }
 
@@ -278,9 +320,9 @@
 {
 	struct rm_priotracker tracker;
 
-	rm_rlock(&osd_object_lock[type], &tracker);
+	rm_rlock(&osdm[type].osd_object_lock, &tracker);
 	do_osd_del(type, osd, slot, 0);
-	rm_runlock(&osd_object_lock[type], &tracker);
+	rm_runlock(&osdm[type].osd_object_lock, &tracker);
 }
 
 static void
@@ -290,7 +332,7 @@
 
 	KASSERT(type >= OSD_FIRST && type <= OSD_LAST, ("Invalid type."));
 	KASSERT(slot > 0, ("Invalid slot."));
-	KASSERT(osd_destructors[type][slot - 1] != NULL, ("Unused slot."));
+	KASSERT(osdm[type].osd_destructors[slot - 1] != NULL, ("Unused slot."));
 
 	OSD_DEBUG("Deleting slot (type=%u, slot=%u).", type, slot);
 
@@ -299,7 +341,7 @@
 		return;
 	}
 	if (osd->osd_slots[slot - 1] != NULL) {
-		osd_destructors[type][slot - 1](osd->osd_slots[slot - 1]);
+		osdm[type].osd_destructors[slot - 1](osd->osd_slots[slot - 1]);
 		osd->osd_slots[slot - 1] = NULL;
 	}
 	for (i = osd->osd_nslots - 1; i >= 0; i--) {
@@ -313,10 +355,10 @@
 		/* No values left for this object. */
 		OSD_DEBUG("No more slots left (type=%u).", type);
 		if (!list_locked)
-			mtx_lock(&osd_list_lock[type]);
+			mtx_lock(&osdm[type].osd_list_lock);
 		LIST_REMOVE(osd, osd_next);
 		if (!list_locked)
-			mtx_unlock(&osd_list_lock[type]);
+			mtx_unlock(&osdm[type].osd_list_lock);
 		free(osd->osd_slots, M_OSD);
 		osd->osd_slots = NULL;
 		osd->osd_nslots = 0;
@@ -342,7 +384,7 @@
 	int error, i;
 
 	KASSERT(type >= OSD_FIRST && type <= OSD_LAST, ("Invalid type."));
-	KASSERT(method < osd_nmethods[type], ("Invalid method."));
+	KASSERT(method < osdm[type].osd_nmethods, ("Invalid method."));
 
 	/*
 	 * Call this method for every slot that defines it, stopping if an
@@ -349,14 +391,14 @@
 	 * error is encountered.
 	 */
 	error = 0;
-	sx_slock(&osd_module_lock[type]);
-	for (i = 0; i < osd_nslots[type]; i++) {
-		methodfun =
-		    osd_methods[type][i * osd_nmethods[type] + method];
+	sx_slock(&osdm[type].osd_module_lock);
+	for (i = 0; i < osdm[type].osd_ntslots; i++) {
+		methodfun = osdm[type].osd_methods[i * osdm[type].osd_nmethods +
+		    method];
 		if (methodfun != NULL && (error = methodfun(obj, data)) != 0)
 			break;
 	}
-	sx_sunlock(&osd_module_lock[type]);
+	sx_sunlock(&osdm[type].osd_module_lock);
 	return (error);
 }
 
@@ -374,14 +416,14 @@
 		return;
 	}
 
-	rm_rlock(&osd_object_lock[type], &tracker);
+	rm_rlock(&osdm[type].osd_object_lock, &tracker);
 	for (i = 1; i <= osd->osd_nslots; i++) {
-		if (osd_destructors[type][i - 1] != NULL)
+		if (osdm[type].osd_destructors[i - 1] != NULL)
 			do_osd_del(type, osd, i, 0);
 		else
 			OSD_DEBUG("Unused slot (type=%u, slot=%u).", type, i);
 	}
-	rm_runlock(&osd_object_lock[type], &tracker);
+	rm_runlock(&osdm[type].osd_object_lock, &tracker);
 	OSD_DEBUG("Object exit (type=%u).", type);
 }
 
@@ -391,13 +433,13 @@
 	u_int i;
 
 	for (i = OSD_FIRST; i <= OSD_LAST; i++) {
-		osd_nslots[i] = 0;
-		LIST_INIT(&osd_list[i]);
-		sx_init(&osd_module_lock[i], "osd_module");
-		rm_init(&osd_object_lock[i], "osd_object");
-		mtx_init(&osd_list_lock[i], "osd_list", NULL, MTX_DEF);
-		osd_destructors[i] = NULL;
-		osd_methods[i] = NULL;
+		sx_init(&osdm[i].osd_module_lock, "osd_module");
+		rm_init(&osdm[i].osd_object_lock, "osd_object");
+		mtx_init(&osdm[i].osd_list_lock, "osd_list", NULL, MTX_DEF);
+		LIST_INIT(&osdm[i].osd_list);
+		osdm[i].osd_destructors = NULL;
+		osdm[i].osd_ntslots = 0;
+		osdm[i].osd_methods = NULL;
 	}
 }
 SYSINIT(osd, SI_SUB_LOCK, SI_ORDER_ANY, osd_init, NULL);