[Midnightbsd-cvs] src [9945] trunk/sys/kern: sync with freebsd 10-stable
laffer1 at midnightbsd.org
laffer1 at midnightbsd.org
Fri May 25 16:53:39 EDT 2018
Revision: 9945
http://svnweb.midnightbsd.org/src/?rev=9945
Author: laffer1
Date: 2018-05-25 16:53:39 -0400 (Fri, 25 May 2018)
Log Message:
-----------
sync with freebsd 10-stable
Modified Paths:
--------------
trunk/sys/kern/kern_dtrace.c
trunk/sys/kern/kern_idle.c
trunk/sys/kern/kern_ktrace.c
trunk/sys/kern/kern_linker.c
trunk/sys/kern/kern_lock.c
trunk/sys/kern/kern_lockf.c
trunk/sys/kern/kern_lockstat.c
trunk/sys/kern/kern_loginclass.c
trunk/sys/kern/kern_malloc.c
trunk/sys/kern/kern_mbuf.c
trunk/sys/kern/kern_mib.c
trunk/sys/kern/kern_module.c
trunk/sys/kern/kern_mtxpool.c
trunk/sys/kern/kern_mutex.c
trunk/sys/kern/kern_ntptime.c
trunk/sys/kern/kern_osd.c
Added Paths:
-----------
trunk/sys/kern/imgact_binmisc.c
trunk/sys/kern/kern_ffclock.c
Added: trunk/sys/kern/imgact_binmisc.c
===================================================================
--- trunk/sys/kern/imgact_binmisc.c (rev 0)
+++ trunk/sys/kern/imgact_binmisc.c 2018-05-25 20:53:39 UTC (rev 9945)
@@ -0,0 +1,766 @@
+/* $MidnightBSD$ */
+/*
+ * Copyright (c) 2013-16, Stacey D. Son
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: stable/10/sys/kern/imgact_binmisc.c 302234 2016-06-27 21:50:30Z bdrewery $");
+
+#include <sys/param.h>
+#include <sys/ctype.h>
+#include <sys/sbuf.h>
+#include <sys/systm.h>
+#include <sys/sysproto.h>
+#include <sys/exec.h>
+#include <sys/imgact.h>
+#include <sys/imgact_binmisc.h>
+#include <sys/kernel.h>
+#include <sys/libkern.h>
+#include <sys/lock.h>
+#include <sys/malloc.h>
+#include <sys/mutex.h>
+#include <sys/sysctl.h>
+
+/**
+ * Miscellaneous binary interpreter image activator.
+ *
+ * If the given target executable's header matches 'xbe_magic' field in the
+ * 'interpreter_list' then it will use the user-level interpreter specified in
+ * the 'xbe_interpreter' field to execute the binary. The 'xbe_magic' field may
+ * be adjusted to a given offset using the value in the 'xbe_moffset' field
+ * and bits of the header may be masked using the 'xbe_mask' field. The
+ * 'interpreter_list' entries are managed using sysctl(3) as described in the
+ * <sys/imgact_binmisc.h> file.
+ */
+
+/*
+ * Node of the interpreter list.
+ */
+typedef struct imgact_binmisc_entry {
+ char *ibe_name;
+ uint8_t *ibe_magic;
+ uint32_t ibe_moffset;
+ uint32_t ibe_msize;
+ uint8_t *ibe_mask;
+ uint8_t *ibe_interpreter;
+ uint32_t ibe_interp_argcnt;
+ uint32_t ibe_interp_length;
+ uint32_t ibe_flags;
+ SLIST_ENTRY(imgact_binmisc_entry) link;
+} imgact_binmisc_entry_t;
+
+/*
+ * sysctl() commands.
+ */
+#define IBC_ADD 1 /* Add given entry. */
+#define IBC_REMOVE 2 /* Remove entry for a given name. */
+#define IBC_DISABLE 3 /* Disable entry for a given name. */
+#define IBC_ENABLE 4 /* Enable entry for a given name. */
+#define IBC_LOOKUP 5 /* Lookup and return entry for given name. */
+#define IBC_LIST 6 /* Get a snapshot of the interpretor list. */
+
+/*
+ * Interpreter string macros.
+ *
+ * They all start with '#' followed by a single letter:
+ */
+#define ISM_POUND '#' /* "##" is the escape sequence for single #. */
+#define ISM_OLD_ARGV0 'a' /* "#a" is replaced with the old argv0. */
+
+MALLOC_DEFINE(M_BINMISC, KMOD_NAME, "misc binary image activator");
+
+/* The interpreter list. */
+static SLIST_HEAD(, imgact_binmisc_entry) interpreter_list =
+ SLIST_HEAD_INITIALIZER(interpreter_list);
+
+static int interp_list_entry_count = 0;
+
+static struct mtx interp_list_mtx;
+
+int imgact_binmisc_exec(struct image_params *imgp);
+
+
+/*
+ * Populate the entry with the information about the interpreter.
+ */
+static void
+imgact_binmisc_populate_interp(char *str, imgact_binmisc_entry_t *ibe)
+{
+ uint32_t len = 0, argc = 1;
+ char t[IBE_INTERP_LEN_MAX];
+ char *sp, *tp;
+
+ bzero(t, sizeof(t));
+
+ /*
+ * Normalize interpreter string. Replace white space between args with
+ * single space.
+ */
+ sp = str; tp = t;
+ while (*sp != '\0') {
+ if (*sp == ' ' || *sp == '\t') {
+ if (++len > IBE_INTERP_LEN_MAX)
+ break;
+ *tp++ = ' ';
+ argc++;
+ while (*sp == ' ' || *sp == '\t')
+ sp++;
+ continue;
+ } else {
+ *tp++ = *sp++;
+ len++;
+ }
+ }
+ *tp = '\0';
+ len++;
+
+ ibe->ibe_interpreter = malloc(len, M_BINMISC, M_WAITOK|M_ZERO);
+
+ /* Populate all the ibe fields for the interpreter. */
+ memcpy(ibe->ibe_interpreter, t, len);
+ ibe->ibe_interp_argcnt = argc;
+ ibe->ibe_interp_length = len;
+}
+
+/*
+ * Allocate memory and populate a new entry for the interpreter table.
+ */
+static imgact_binmisc_entry_t *
+imgact_binmisc_new_entry(ximgact_binmisc_entry_t *xbe)
+{
+ imgact_binmisc_entry_t *ibe = NULL;
+ size_t namesz = min(strlen(xbe->xbe_name) + 1, IBE_NAME_MAX);
+
+ mtx_assert(&interp_list_mtx, MA_NOTOWNED);
+
+ ibe = malloc(sizeof(*ibe), M_BINMISC, M_WAITOK|M_ZERO);
+
+ ibe->ibe_name = malloc(namesz, M_BINMISC, M_WAITOK|M_ZERO);
+ strlcpy(ibe->ibe_name, xbe->xbe_name, namesz);
+
+ imgact_binmisc_populate_interp(xbe->xbe_interpreter, ibe);
+
+ ibe->ibe_magic = malloc(xbe->xbe_msize, M_BINMISC, M_WAITOK|M_ZERO);
+ memcpy(ibe->ibe_magic, xbe->xbe_magic, xbe->xbe_msize);
+
+ ibe->ibe_mask = malloc(xbe->xbe_msize, M_BINMISC, M_WAITOK|M_ZERO);
+ memcpy(ibe->ibe_mask, xbe->xbe_mask, xbe->xbe_msize);
+
+ ibe->ibe_moffset = xbe->xbe_moffset;
+ ibe->ibe_msize = xbe->xbe_msize;
+ ibe->ibe_flags = xbe->xbe_flags;
+
+ return (ibe);
+}
+
+/*
+ * Free the allocated memory for a given list item.
+ */
+static void
+imgact_binmisc_destroy_entry(imgact_binmisc_entry_t *ibe)
+{
+ if (!ibe)
+ return;
+ if (ibe->ibe_magic)
+ free(ibe->ibe_magic, M_BINMISC);
+ if (ibe->ibe_mask)
+ free(ibe->ibe_mask, M_BINMISC);
+ if (ibe->ibe_interpreter)
+ free(ibe->ibe_interpreter, M_BINMISC);
+ if (ibe->ibe_name)
+ free(ibe->ibe_name, M_BINMISC);
+ if (ibe)
+ free(ibe, M_BINMISC);
+}
+
+/*
+ * Find the interpreter in the list by the given name. Return NULL if not
+ * found.
+ */
+static imgact_binmisc_entry_t *
+imgact_binmisc_find_entry(char *name)
+{
+ imgact_binmisc_entry_t *ibe;
+
+ mtx_assert(&interp_list_mtx, MA_OWNED);
+
+ SLIST_FOREACH(ibe, &interpreter_list, link) {
+ if (strncmp(name, ibe->ibe_name, IBE_NAME_MAX) == 0)
+ return (ibe);
+ }
+
+ return (NULL);
+}
+
+/*
+ * Add the given interpreter if it doesn't already exist. Return EEXIST
+ * if the name already exist in the interpreter list.
+ */
+static int
+imgact_binmisc_add_entry(ximgact_binmisc_entry_t *xbe)
+{
+ imgact_binmisc_entry_t *ibe;
+ char *p;
+ int cnt;
+
+ if (xbe->xbe_msize > IBE_MAGIC_MAX)
+ return (EINVAL);
+
+ for(cnt = 0, p = xbe->xbe_name; *p != 0; cnt++, p++)
+ if (cnt >= IBE_NAME_MAX || !isascii((int)*p))
+ return (EINVAL);
+
+ for(cnt = 0, p = xbe->xbe_interpreter; *p != 0; cnt++, p++)
+ if (cnt >= IBE_INTERP_LEN_MAX || !isascii((int)*p))
+ return (EINVAL);
+
+ /* Make sure we don't have any invalid #'s. */
+ p = xbe->xbe_interpreter;
+ while (1) {
+ p = strchr(p, '#');
+ if (!p)
+ break;
+
+ p++;
+ switch(*p) {
+ case ISM_POUND:
+ /* "##" */
+ p++;
+ break;
+
+ case ISM_OLD_ARGV0:
+ /* "#a" */
+ p++;
+ break;
+
+ case 0:
+ default:
+ /* Anything besides the above is invalid. */
+ return (EINVAL);
+ }
+ }
+
+ mtx_lock(&interp_list_mtx);
+ if (imgact_binmisc_find_entry(xbe->xbe_name) != NULL) {
+ mtx_unlock(&interp_list_mtx);
+ return (EEXIST);
+ }
+ mtx_unlock(&interp_list_mtx);
+
+ ibe = imgact_binmisc_new_entry(xbe);
+
+ mtx_lock(&interp_list_mtx);
+ SLIST_INSERT_HEAD(&interpreter_list, ibe, link);
+ interp_list_entry_count++;
+ mtx_unlock(&interp_list_mtx);
+
+ return (0);
+}
+
+/*
+ * Remove the interpreter in the list with the given name. Return ENOENT
+ * if not found.
+ */
+static int
+imgact_binmisc_remove_entry(char *name)
+{
+ imgact_binmisc_entry_t *ibe;
+
+ mtx_lock(&interp_list_mtx);
+ if ((ibe = imgact_binmisc_find_entry(name)) == NULL) {
+ mtx_unlock(&interp_list_mtx);
+ return (ENOENT);
+ }
+ SLIST_REMOVE(&interpreter_list, ibe, imgact_binmisc_entry, link);
+ interp_list_entry_count--;
+ mtx_unlock(&interp_list_mtx);
+
+ imgact_binmisc_destroy_entry(ibe);
+
+ return (0);
+}
+
+/*
+ * Disable the interpreter in the list with the given name. Return ENOENT
+ * if not found.
+ */
+static int
+imgact_binmisc_disable_entry(char *name)
+{
+ imgact_binmisc_entry_t *ibe;
+
+ mtx_lock(&interp_list_mtx);
+ if ((ibe = imgact_binmisc_find_entry(name)) == NULL) {
+ mtx_unlock(&interp_list_mtx);
+ return (ENOENT);
+ }
+
+ ibe->ibe_flags &= ~IBF_ENABLED;
+ mtx_unlock(&interp_list_mtx);
+
+ return (0);
+}
+
+/*
+ * Enable the interpreter in the list with the given name. Return ENOENT
+ * if not found.
+ */
+static int
+imgact_binmisc_enable_entry(char *name)
+{
+ imgact_binmisc_entry_t *ibe;
+
+ mtx_lock(&interp_list_mtx);
+ if ((ibe = imgact_binmisc_find_entry(name)) == NULL) {
+ mtx_unlock(&interp_list_mtx);
+ return (ENOENT);
+ }
+
+ ibe->ibe_flags |= IBF_ENABLED;
+ mtx_unlock(&interp_list_mtx);
+
+ return (0);
+}
+
+static int
+imgact_binmisc_populate_xbe(ximgact_binmisc_entry_t *xbe,
+ imgact_binmisc_entry_t *ibe)
+{
+ uint32_t i;
+
+ mtx_assert(&interp_list_mtx, MA_OWNED);
+
+ bzero(xbe, sizeof(*xbe));
+ strlcpy(xbe->xbe_name, ibe->ibe_name, IBE_NAME_MAX);
+
+ /* Copy interpreter string. Replace NULL breaks with space. */
+ memcpy(xbe->xbe_interpreter, ibe->ibe_interpreter,
+ ibe->ibe_interp_length);
+ for(i = 0; i < (ibe->ibe_interp_length - 1); i++)
+ if (xbe->xbe_interpreter[i] == '\0')
+ xbe->xbe_interpreter[i] = ' ';
+
+ memcpy(xbe->xbe_magic, ibe->ibe_magic, ibe->ibe_msize);
+ memcpy(xbe->xbe_mask, ibe->ibe_mask, ibe->ibe_msize);
+ xbe->xbe_version = IBE_VERSION;
+ xbe->xbe_flags = ibe->ibe_flags;
+ xbe->xbe_moffset = ibe->ibe_moffset;
+ xbe->xbe_msize = ibe->ibe_msize;
+
+ return (0);
+}
+
+/*
+ * Retrieve the interpreter with the give name and populate the
+ * ximgact_binmisc_entry structure. Return ENOENT if not found.
+ */
+static int
+imgact_binmisc_lookup_entry(char *name, ximgact_binmisc_entry_t *xbe)
+{
+ imgact_binmisc_entry_t *ibe;
+ int error = 0;
+
+ mtx_lock(&interp_list_mtx);
+ if ((ibe = imgact_binmisc_find_entry(name)) == NULL) {
+ mtx_unlock(&interp_list_mtx);
+ return (ENOENT);
+ }
+
+ error = imgact_binmisc_populate_xbe(xbe, ibe);
+ mtx_unlock(&interp_list_mtx);
+
+ return (error);
+}
+
+/*
+ * Get a snapshot of all the interpreter entries in the list.
+ */
+static int
+imgact_binmisc_get_all_entries(struct sysctl_req *req)
+{
+ ximgact_binmisc_entry_t *xbe, *xbep;
+ imgact_binmisc_entry_t *ibe;
+ int error = 0, count;
+
+ mtx_lock(&interp_list_mtx);
+ count = interp_list_entry_count;
+ /* Don't block in malloc() while holding lock. */
+ xbe = malloc(sizeof(*xbe) * count, M_BINMISC, M_NOWAIT|M_ZERO);
+ if (!xbe) {
+ mtx_unlock(&interp_list_mtx);
+ return (ENOMEM);
+ }
+
+ xbep = xbe;
+ SLIST_FOREACH(ibe, &interpreter_list, link) {
+ error = imgact_binmisc_populate_xbe(xbep++, ibe);
+ if (error)
+ break;
+ }
+ mtx_unlock(&interp_list_mtx);
+
+ if (!error)
+ error = SYSCTL_OUT(req, xbe, sizeof(*xbe) * count);
+
+ free(xbe, M_BINMISC);
+ return (error);
+}
+
+/*
+ * sysctl() handler for munipulating interpretor table.
+ * Not MP safe (locked by sysctl).
+ */
+static int
+sysctl_kern_binmisc(SYSCTL_HANDLER_ARGS)
+{
+ ximgact_binmisc_entry_t xbe;
+ int error = 0;
+
+ switch(arg2) {
+ case IBC_ADD:
+ /* Add an entry. Limited to IBE_MAX_ENTRIES. */
+ error = SYSCTL_IN(req, &xbe, sizeof(xbe));
+ if (error)
+ return (error);
+ if (IBE_VERSION != xbe.xbe_version)
+ return (EINVAL);
+ if (interp_list_entry_count == IBE_MAX_ENTRIES)
+ return (ENOSPC);
+ error = imgact_binmisc_add_entry(&xbe);
+ break;
+
+ case IBC_REMOVE:
+ /* Remove an entry. */
+ error = SYSCTL_IN(req, &xbe, sizeof(xbe));
+ if (error)
+ return (error);
+ if (IBE_VERSION != xbe.xbe_version)
+ return (EINVAL);
+ error = imgact_binmisc_remove_entry(xbe.xbe_name);
+ break;
+
+ case IBC_DISABLE:
+ /* Disable an entry. */
+ error = SYSCTL_IN(req, &xbe, sizeof(xbe));
+ if (error)
+ return (error);
+ if (IBE_VERSION != xbe.xbe_version)
+ return (EINVAL);
+ error = imgact_binmisc_disable_entry(xbe.xbe_name);
+ break;
+
+ case IBC_ENABLE:
+ /* Enable an entry. */
+ error = SYSCTL_IN(req, &xbe, sizeof(xbe));
+ if (error)
+ return (error);
+ if (IBE_VERSION != xbe.xbe_version)
+ return (EINVAL);
+ error = imgact_binmisc_enable_entry(xbe.xbe_name);
+ break;
+
+ case IBC_LOOKUP:
+ /* Lookup an entry. */
+ error = SYSCTL_IN(req, &xbe, sizeof(xbe));
+ if (error)
+ return (error);
+ if (IBE_VERSION != xbe.xbe_version)
+ return (EINVAL);
+ error = imgact_binmisc_lookup_entry(xbe.xbe_name, &xbe);
+ if (!error)
+ error = SYSCTL_OUT(req, &xbe, sizeof(xbe));
+ break;
+
+ case IBC_LIST:
+ /* Return a snapshot of the interpretor list. */
+
+ if (!req->oldptr) {
+ /* No pointer then just return the list size. */
+ error = SYSCTL_OUT(req, 0, interp_list_entry_count *
+ sizeof(ximgact_binmisc_entry_t));
+ return (error);
+ } else
+ if (!req->oldlen)
+ return (EINVAL);
+
+ error = imgact_binmisc_get_all_entries(req);
+ break;
+
+ default:
+ return (EINVAL);
+ }
+
+ return (error);
+}
+
+SYSCTL_NODE(_kern, OID_AUTO, binmisc, CTLFLAG_RW, 0,
+ "Image activator for miscellaneous binaries");
+
+SYSCTL_PROC(_kern_binmisc, OID_AUTO, add,
+ CTLFLAG_MPSAFE|CTLTYPE_STRUCT|CTLFLAG_WR, NULL, IBC_ADD,
+ sysctl_kern_binmisc, "S,ximgact_binmisc_entry",
+ "Add an activator entry");
+
+SYSCTL_PROC(_kern_binmisc, OID_AUTO, remove,
+ CTLFLAG_MPSAFE|CTLTYPE_STRUCT|CTLFLAG_WR, NULL, IBC_REMOVE,
+ sysctl_kern_binmisc, "S,ximgact_binmisc_entry",
+ "Remove an activator entry");
+
+SYSCTL_PROC(_kern_binmisc, OID_AUTO, disable,
+ CTLFLAG_MPSAFE|CTLTYPE_STRUCT|CTLFLAG_WR, NULL, IBC_DISABLE,
+ sysctl_kern_binmisc, "S,ximgact_binmisc_entry",
+ "Disable an activator entry");
+
+SYSCTL_PROC(_kern_binmisc, OID_AUTO, enable,
+ CTLFLAG_MPSAFE|CTLTYPE_STRUCT|CTLFLAG_WR, NULL, IBC_ENABLE,
+ sysctl_kern_binmisc, "S,ximgact_binmisc_entry",
+ "Enable an activator entry");
+
+SYSCTL_PROC(_kern_binmisc, OID_AUTO, lookup,
+ CTLFLAG_MPSAFE|CTLTYPE_STRUCT|CTLFLAG_RW|CTLFLAG_ANYBODY, NULL, IBC_LOOKUP,
+ sysctl_kern_binmisc, "S,ximgact_binmisc_entry",
+ "Lookup an activator entry");
+
+SYSCTL_PROC(_kern_binmisc, OID_AUTO, list,
+ CTLFLAG_MPSAFE|CTLTYPE_STRUCT|CTLFLAG_RD|CTLFLAG_ANYBODY, NULL, IBC_LIST,
+ sysctl_kern_binmisc, "S,ximgact_binmisc_entry",
+ "Get snapshot of all the activator entries");
+
+static imgact_binmisc_entry_t *
+imgact_binmisc_find_interpreter(const char *image_header)
+{
+ imgact_binmisc_entry_t *ibe;
+ const char *p;
+ int i;
+ size_t sz;
+
+ mtx_assert(&interp_list_mtx, MA_OWNED);
+
+ SLIST_FOREACH(ibe, &interpreter_list, link) {
+ if (!(IBF_ENABLED & ibe->ibe_flags))
+ continue;
+
+ p = image_header + ibe->ibe_moffset;
+ sz = ibe->ibe_msize;
+ if (IBF_USE_MASK & ibe->ibe_flags) {
+ /* Compare using mask. */
+ for (i = 0; i < sz; i++)
+ if ((*p++ ^ ibe->ibe_magic[i]) &
+ ibe->ibe_mask[i])
+ break;
+ } else {
+ for (i = 0; i < sz; i++)
+ if (*p++ ^ ibe->ibe_magic[i])
+ break;
+ }
+ if (i == ibe->ibe_msize)
+ return (ibe);
+ }
+ return (NULL);
+}
+
+int
+imgact_binmisc_exec(struct image_params *imgp)
+{
+ const char *image_header = imgp->image_header;
+ const char *fname = NULL;
+ int error = 0;
+ size_t offset, l;
+ imgact_binmisc_entry_t *ibe;
+ struct sbuf *sname;
+ char *s, *d;
+
+ /* Do we have an interpreter for the given image header? */
+ mtx_lock(&interp_list_mtx);
+ if ((ibe = imgact_binmisc_find_interpreter(image_header)) == NULL) {
+ mtx_unlock(&interp_list_mtx);
+ return (-1);
+ }
+
+ /* No interpreter nesting allowed. */
+ if (imgp->interpreted & IMGACT_BINMISC) {
+ mtx_unlock(&interp_list_mtx);
+ return (ENOEXEC);
+ }
+
+ imgp->interpreted |= IMGACT_BINMISC;
+
+ if (imgp->args->fname != NULL) {
+ fname = imgp->args->fname;
+ sname = NULL;
+ } else {
+ /* Use the fdescfs(5) path for fexecve(2). */
+ sname = sbuf_new_auto();
+ sbuf_printf(sname, "/dev/fd/%d", imgp->args->fd);
+ sbuf_finish(sname);
+ fname = sbuf_data(sname);
+ }
+
+
+ /*
+ * We need to "push" the interpreter in the arg[] list. To do this,
+ * we first shift all the other values in the `begin_argv' area to
+ * provide the exact amount of room for the values added. Set up
+ * `offset' as the number of bytes to be added to the `begin_argv'
+ * area.
+ */
+ offset = ibe->ibe_interp_length;
+
+ /* Adjust the offset for #'s. */
+ s = ibe->ibe_interpreter;
+ while (1) {
+ s = strchr(s, '#');
+ if (!s)
+ break;
+
+ s++;
+ switch(*s) {
+ case ISM_POUND:
+ /* "##" -> "#": reduce offset by one. */
+ offset--;
+ break;
+
+ case ISM_OLD_ARGV0:
+ /* "#a" -> (old argv0): increase offset to fit fname */
+ offset += strlen(fname) - 2;
+ break;
+
+ default:
+ /* Hmm... This shouldn't happen. */
+ mtx_unlock(&interp_list_mtx);
+ printf("%s: Unknown macro #%c sequence in "
+ "interpreter string\n", KMOD_NAME, *(s + 1));
+ error = EINVAL;
+ goto done;
+ }
+ s++;
+ }
+
+ /* Check to make sure we won't overrun the stringspace. */
+ if (offset > imgp->args->stringspace) {
+ mtx_unlock(&interp_list_mtx);
+ error = E2BIG;
+ goto done;
+ }
+
+ /* Make room for the interpreter */
+ bcopy(imgp->args->begin_argv, imgp->args->begin_argv + offset,
+ imgp->args->endp - imgp->args->begin_argv);
+
+ /* Adjust everything by the offset. */
+ imgp->args->begin_envv += offset;
+ imgp->args->endp += offset;
+ imgp->args->stringspace -= offset;
+
+ /* Add the new argument(s) in the count. */
+ imgp->args->argc += ibe->ibe_interp_argcnt;
+
+ /*
+ * The original arg[] list has been shifted appropriately. Copy in
+ * the interpreter path.
+ */
+ s = ibe->ibe_interpreter;
+ d = imgp->args->begin_argv;
+ while(*s != '\0') {
+ switch (*s) {
+ case '#':
+ /* Handle "#" in interpreter string. */
+ s++;
+ switch(*s) {
+ case ISM_POUND:
+ /* "##": Replace with a single '#' */
+ *d++ = '#';
+ break;
+
+ case ISM_OLD_ARGV0:
+ /* "#a": Replace with old arg0 (fname). */
+ if ((l = strlen(fname)) != 0) {
+ memcpy(d, fname, l);
+ d += l;
+ }
+ break;
+
+ default:
+ /* Shouldn't happen but skip it if it does. */
+ break;
+ }
+ break;
+
+ case ' ':
+ /* Replace space with NUL to separate arguments. */
+ *d++ = '\0';
+ break;
+
+ default:
+ *d++ = *s;
+ break;
+ }
+ s++;
+ }
+ *d = '\0';
+ mtx_unlock(&interp_list_mtx);
+
+ if (!error)
+ imgp->interpreter_name = imgp->args->begin_argv;
+
+
+done:
+ if (sname)
+ sbuf_delete(sname);
+ return (error);
+}
+
+static void
+imgact_binmisc_init(void *arg)
+{
+
+ mtx_init(&interp_list_mtx, KMOD_NAME, NULL, MTX_DEF);
+}
+
+static void
+imgact_binmisc_fini(void *arg)
+{
+ imgact_binmisc_entry_t *ibe, *ibe_tmp;
+
+ /* Free all the interpreters. */
+ mtx_lock(&interp_list_mtx);
+ SLIST_FOREACH_SAFE(ibe, &interpreter_list, link, ibe_tmp) {
+ SLIST_REMOVE(&interpreter_list, ibe, imgact_binmisc_entry,
+ link);
+ imgact_binmisc_destroy_entry(ibe);
+ }
+ mtx_unlock(&interp_list_mtx);
+
+ mtx_destroy(&interp_list_mtx);
+}
+
+SYSINIT(imgact_binmisc, SI_SUB_EXEC, SI_ORDER_MIDDLE, imgact_binmisc_init, 0);
+SYSUNINIT(imgact_binmisc, SI_SUB_EXEC, SI_ORDER_MIDDLE, imgact_binmisc_fini, 0);
+
+/*
+ * Tell kern_execve.c about it, with a little help from the linker.
+ */
+static struct execsw imgact_binmisc_execsw = { imgact_binmisc_exec, KMOD_NAME };
+EXEC_SET(imgact_binmisc, imgact_binmisc_execsw);
Property changes on: trunk/sys/kern/imgact_binmisc.c
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Modified: trunk/sys/kern/kern_dtrace.c
===================================================================
--- trunk/sys/kern/kern_dtrace.c 2018-05-25 20:46:51 UTC (rev 9944)
+++ trunk/sys/kern/kern_dtrace.c 2018-05-25 20:53:39 UTC (rev 9945)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
/*-
* Copyright (c) 2007-2008 John Birrell <jb at FreeBSD.org>
* All rights reserved.
@@ -25,7 +26,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$MidnightBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/kern/kern_dtrace.c 269752 2014-08-09 14:05:01Z markj $");
#include "opt_kdb.h"
@@ -38,6 +39,7 @@
#include <sys/proc.h>
#include <sys/dtrace_bsd.h>
#include <sys/sysctl.h>
+#include <sys/sysent.h>
#define KDTRACE_PROC_SIZE 64
#define KDTRACE_THREAD_SIZE 256
@@ -47,6 +49,14 @@
static MALLOC_DEFINE(M_KDTRACE, "kdtrace", "DTrace hooks");
+/* Hooks used in the machine-dependent trap handlers. */
+dtrace_trap_func_t dtrace_trap_func;
+dtrace_doubletrap_func_t dtrace_doubletrap_func;
+dtrace_pid_probe_ptr_t dtrace_pid_probe_ptr;
+dtrace_return_probe_ptr_t dtrace_return_probe_ptr;
+
+systrace_probe_func_t systrace_probe_func;
+
/* Return the DTrace process data size compiled in the kernel hooks. */
size_t
kdtrace_proc_size()
Added: trunk/sys/kern/kern_ffclock.c
===================================================================
--- trunk/sys/kern/kern_ffclock.c (rev 0)
+++ trunk/sys/kern/kern_ffclock.c 2018-05-25 20:53:39 UTC (rev 9945)
@@ -0,0 +1,483 @@
+/* $MidnightBSD$ */
+/*-
+ * Copyright (c) 2011 The University of Melbourne
+ * All rights reserved.
+ *
+ * This software was developed by Julien Ridoux at the University of Melbourne
+ * under sponsorship from the FreeBSD Foundation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: stable/10/sys/kern/kern_ffclock.c 273847 2014-10-30 08:04:48Z hselasky $");
+
+#include "opt_ffclock.h"
+
+#include <sys/param.h>
+#include <sys/bus.h>
+#include <sys/kernel.h>
+#include <sys/lock.h>
+#include <sys/module.h>
+#include <sys/mutex.h>
+#include <sys/priv.h>
+#include <sys/proc.h>
+#include <sys/sbuf.h>
+#include <sys/sysent.h>
+#include <sys/sysproto.h>
+#include <sys/sysctl.h>
+#include <sys/systm.h>
+#include <sys/timeffc.h>
+
+#ifdef FFCLOCK
+
+FEATURE(ffclock, "Feed-forward clock support");
+
+extern struct ffclock_estimate ffclock_estimate;
+extern struct bintime ffclock_boottime;
+extern int8_t ffclock_updated;
+extern struct mtx ffclock_mtx;
+
+/*
+ * Feed-forward clock absolute time. This should be the preferred way to read
+ * the feed-forward clock for "wall-clock" type time. The flags allow to compose
+ * various flavours of absolute time (e.g. with or without leap seconds taken
+ * into account). If valid pointers are provided, the ffcounter value and an
+ * upper bound on clock error associated with the bintime are provided.
+ * NOTE: use ffclock_convert_abs() to differ the conversion of a ffcounter value
+ * read earlier.
+ */
+void
+ffclock_abstime(ffcounter *ffcount, struct bintime *bt,
+ struct bintime *error_bound, uint32_t flags)
+{
+ struct ffclock_estimate cest;
+ ffcounter ffc;
+ ffcounter update_ffcount;
+ ffcounter ffdelta_error;
+
+ /* Get counter and corresponding time. */
+ if ((flags & FFCLOCK_FAST) == FFCLOCK_FAST)
+ ffclock_last_tick(&ffc, bt, flags);
+ else {
+ ffclock_read_counter(&ffc);
+ ffclock_convert_abs(ffc, bt, flags);
+ }
+
+ /* Current ffclock estimate, use update_ffcount as generation number. */
+ do {
+ update_ffcount = ffclock_estimate.update_ffcount;
+ bcopy(&ffclock_estimate, &cest, sizeof(struct ffclock_estimate));
+ } while (update_ffcount != ffclock_estimate.update_ffcount);
+
+ /*
+ * Leap second adjustment. Total as seen by synchronisation algorithm
+ * since it started. cest.leapsec_next is the ffcounter prediction of
+ * when the next leapsecond occurs.
+ */
+ if ((flags & FFCLOCK_LEAPSEC) == FFCLOCK_LEAPSEC) {
+ bt->sec -= cest.leapsec_total;
+ if (ffc > cest.leapsec_next)
+ bt->sec -= cest.leapsec;
+ }
+
+ /* Boot time adjustment, for uptime/monotonic clocks. */
+ if ((flags & FFCLOCK_UPTIME) == FFCLOCK_UPTIME) {
+ bintime_sub(bt, &ffclock_boottime);
+ }
+
+ /* Compute error bound if a valid pointer has been passed. */
+ if (error_bound) {
+ ffdelta_error = ffc - cest.update_ffcount;
+ ffclock_convert_diff(ffdelta_error, error_bound);
+ /* 18446744073709 = int(2^64/1e12), err_bound_rate in [ps/s] */
+ bintime_mul(error_bound, cest.errb_rate *
+ (uint64_t)18446744073709LL);
+ /* 18446744073 = int(2^64 / 1e9), since err_abs in [ns] */
+ bintime_addx(error_bound, cest.errb_abs *
+ (uint64_t)18446744073LL);
+ }
+
+ if (ffcount)
+ *ffcount = ffc;
+}
+
+/*
+ * Feed-forward difference clock. This should be the preferred way to convert a
+ * time interval in ffcounter values into a time interval in seconds. If a valid
+ * pointer is passed, an upper bound on the error in computing the time interval
+ * in seconds is provided.
+ */
+void
+ffclock_difftime(ffcounter ffdelta, struct bintime *bt,
+ struct bintime *error_bound)
+{
+ ffcounter update_ffcount;
+ uint32_t err_rate;
+
+ ffclock_convert_diff(ffdelta, bt);
+
+ if (error_bound) {
+ do {
+ update_ffcount = ffclock_estimate.update_ffcount;
+ err_rate = ffclock_estimate.errb_rate;
+ } while (update_ffcount != ffclock_estimate.update_ffcount);
+
+ ffclock_convert_diff(ffdelta, error_bound);
+ /* 18446744073709 = int(2^64/1e12), err_bound_rate in [ps/s] */
+ bintime_mul(error_bound, err_rate * (uint64_t)18446744073709LL);
+ }
+}
+
+/*
+ * Create a new kern.sysclock sysctl node, which will be home to some generic
+ * sysclock configuration variables. Feed-forward clock specific variables will
+ * live under the ffclock subnode.
+ */
+
+SYSCTL_NODE(_kern, OID_AUTO, sysclock, CTLFLAG_RW, 0,
+ "System clock related configuration");
+SYSCTL_NODE(_kern_sysclock, OID_AUTO, ffclock, CTLFLAG_RW, 0,
+ "Feed-forward clock configuration");
+
+static char *sysclocks[] = {"feedback", "feed-forward"};
+#define MAX_SYSCLOCK_NAME_LEN 16
+#define NUM_SYSCLOCKS (sizeof(sysclocks) / sizeof(*sysclocks))
+
+static int ffclock_version = 2;
+SYSCTL_INT(_kern_sysclock_ffclock, OID_AUTO, version, CTLFLAG_RD,
+ &ffclock_version, 0, "Feed-forward clock kernel version");
+
+/* List available sysclocks. */
+static int
+sysctl_kern_sysclock_available(SYSCTL_HANDLER_ARGS)
+{
+ struct sbuf *s;
+ int clk, error;
+
+ s = sbuf_new_for_sysctl(NULL, NULL,
+ MAX_SYSCLOCK_NAME_LEN * NUM_SYSCLOCKS, req);
+ if (s == NULL)
+ return (ENOMEM);
+
+ for (clk = 0; clk < NUM_SYSCLOCKS; clk++) {
+ sbuf_cat(s, sysclocks[clk]);
+ if (clk + 1 < NUM_SYSCLOCKS)
+ sbuf_cat(s, " ");
+ }
+ error = sbuf_finish(s);
+ sbuf_delete(s);
+
+ return (error);
+}
+
+SYSCTL_PROC(_kern_sysclock, OID_AUTO, available, CTLTYPE_STRING | CTLFLAG_RD,
+ 0, 0, sysctl_kern_sysclock_available, "A",
+ "List of available system clocks");
+
+/*
+ * Return the name of the active system clock if read, or attempt to change
+ * the active system clock to the user specified one if written to. The active
+ * system clock is read when calling any of the [get]{bin,nano,micro}[up]time()
+ * functions.
+ */
+static int
+sysctl_kern_sysclock_active(SYSCTL_HANDLER_ARGS)
+{
+ char newclock[MAX_SYSCLOCK_NAME_LEN];
+ int error;
+ int clk;
+
+ /* Return the name of the current active sysclock. */
+ strlcpy(newclock, sysclocks[sysclock_active], sizeof(newclock));
+ error = sysctl_handle_string(oidp, newclock, sizeof(newclock), req);
+
+ /* Check for error or no change */
+ if (error != 0 || req->newptr == NULL)
+ goto done;
+
+ /* Change the active sysclock to the user specified one: */
+ error = EINVAL;
+ for (clk = 0; clk < NUM_SYSCLOCKS; clk++) {
+ if (strncmp(newclock, sysclocks[clk],
+ MAX_SYSCLOCK_NAME_LEN - 1)) {
+ continue;
+ }
+ sysclock_active = clk;
+ error = 0;
+ break;
+ }
+done:
+ return (error);
+}
+
+SYSCTL_PROC(_kern_sysclock, OID_AUTO, active, CTLTYPE_STRING | CTLFLAG_RW,
+ 0, 0, sysctl_kern_sysclock_active, "A",
+ "Name of the active system clock which is currently serving time");
+
+static int sysctl_kern_ffclock_ffcounter_bypass = 0;
+SYSCTL_INT(_kern_sysclock_ffclock, OID_AUTO, ffcounter_bypass, CTLFLAG_RW,
+ &sysctl_kern_ffclock_ffcounter_bypass, 0,
+ "Use reliable hardware timecounter as the feed-forward counter");
+
+/*
+ * High level functions to access the Feed-Forward Clock.
+ */
+void
+ffclock_bintime(struct bintime *bt)
+{
+
+ ffclock_abstime(NULL, bt, NULL, FFCLOCK_LERP | FFCLOCK_LEAPSEC);
+}
+
+void
+ffclock_nanotime(struct timespec *tsp)
+{
+ struct bintime bt;
+
+ ffclock_abstime(NULL, &bt, NULL, FFCLOCK_LERP | FFCLOCK_LEAPSEC);
+ bintime2timespec(&bt, tsp);
+}
+
+void
+ffclock_microtime(struct timeval *tvp)
+{
+ struct bintime bt;
+
+ ffclock_abstime(NULL, &bt, NULL, FFCLOCK_LERP | FFCLOCK_LEAPSEC);
+ bintime2timeval(&bt, tvp);
+}
+
+void
+ffclock_getbintime(struct bintime *bt)
+{
+
+ ffclock_abstime(NULL, bt, NULL,
+ FFCLOCK_LERP | FFCLOCK_LEAPSEC | FFCLOCK_FAST);
+}
+
+void
+ffclock_getnanotime(struct timespec *tsp)
+{
+ struct bintime bt;
+
+ ffclock_abstime(NULL, &bt, NULL,
+ FFCLOCK_LERP | FFCLOCK_LEAPSEC | FFCLOCK_FAST);
+ bintime2timespec(&bt, tsp);
+}
+
+void
+ffclock_getmicrotime(struct timeval *tvp)
+{
+ struct bintime bt;
+
+ ffclock_abstime(NULL, &bt, NULL,
+ FFCLOCK_LERP | FFCLOCK_LEAPSEC | FFCLOCK_FAST);
+ bintime2timeval(&bt, tvp);
+}
+
+void
+ffclock_binuptime(struct bintime *bt)
+{
+
+ ffclock_abstime(NULL, bt, NULL, FFCLOCK_LERP | FFCLOCK_UPTIME);
+}
+
+void
+ffclock_nanouptime(struct timespec *tsp)
+{
+ struct bintime bt;
+
+ ffclock_abstime(NULL, &bt, NULL, FFCLOCK_LERP | FFCLOCK_UPTIME);
+ bintime2timespec(&bt, tsp);
+}
+
+void
+ffclock_microuptime(struct timeval *tvp)
+{
+ struct bintime bt;
+
+ ffclock_abstime(NULL, &bt, NULL, FFCLOCK_LERP | FFCLOCK_UPTIME);
+ bintime2timeval(&bt, tvp);
+}
+
+void
+ffclock_getbinuptime(struct bintime *bt)
+{
+
+ ffclock_abstime(NULL, bt, NULL,
+ FFCLOCK_LERP | FFCLOCK_UPTIME | FFCLOCK_FAST);
+}
+
+void
+ffclock_getnanouptime(struct timespec *tsp)
+{
+ struct bintime bt;
+
+ ffclock_abstime(NULL, &bt, NULL,
+ FFCLOCK_LERP | FFCLOCK_UPTIME | FFCLOCK_FAST);
+ bintime2timespec(&bt, tsp);
+}
+
+void
+ffclock_getmicrouptime(struct timeval *tvp)
+{
+ struct bintime bt;
+
+ ffclock_abstime(NULL, &bt, NULL,
+ FFCLOCK_LERP | FFCLOCK_UPTIME | FFCLOCK_FAST);
+ bintime2timeval(&bt, tvp);
+}
+
+void
+ffclock_bindifftime(ffcounter ffdelta, struct bintime *bt)
+{
+
+ ffclock_difftime(ffdelta, bt, NULL);
+}
+
+void
+ffclock_nanodifftime(ffcounter ffdelta, struct timespec *tsp)
+{
+ struct bintime bt;
+
+ ffclock_difftime(ffdelta, &bt, NULL);
+ bintime2timespec(&bt, tsp);
+}
+
+void
+ffclock_microdifftime(ffcounter ffdelta, struct timeval *tvp)
+{
+ struct bintime bt;
+
+ ffclock_difftime(ffdelta, &bt, NULL);
+ bintime2timeval(&bt, tvp);
+}
+
+/*
+ * System call allowing userland applications to retrieve the current value of
+ * the Feed-Forward Clock counter.
+ */
+#ifndef _SYS_SYSPROTO_H_
+struct ffclock_getcounter_args {
+ ffcounter *ffcount;
+};
+#endif
+/* ARGSUSED */
+int
+sys_ffclock_getcounter(struct thread *td, struct ffclock_getcounter_args *uap)
+{
+ ffcounter ffcount;
+ int error;
+
+ ffcount = 0;
+ ffclock_read_counter(&ffcount);
+ if (ffcount == 0)
+ return (EAGAIN);
+ error = copyout(&ffcount, uap->ffcount, sizeof(ffcounter));
+
+ return (error);
+}
+
+/*
+ * System call allowing the synchronisation daemon to push new feed-foward clock
+ * estimates to the kernel. Acquire ffclock_mtx to prevent concurrent updates
+ * and ensure data consistency.
+ * NOTE: ffclock_updated signals the fftimehands that new estimates are
+ * available. The updated estimates are picked up by the fftimehands on next
+ * tick, which could take as long as 1/hz seconds (if ticks are not missed).
+ */
+#ifndef _SYS_SYSPROTO_H_
+struct ffclock_setestimate_args {
+ struct ffclock_estimate *cest;
+};
+#endif
+/* ARGSUSED */
+int
+sys_ffclock_setestimate(struct thread *td, struct ffclock_setestimate_args *uap)
+{
+ struct ffclock_estimate cest;
+ int error;
+
+ /* Reuse of PRIV_CLOCK_SETTIME. */
+ if ((error = priv_check(td, PRIV_CLOCK_SETTIME)) != 0)
+ return (error);
+
+ if ((error = copyin(uap->cest, &cest, sizeof(struct ffclock_estimate)))
+ != 0)
+ return (error);
+
+ mtx_lock(&ffclock_mtx);
+ memcpy(&ffclock_estimate, &cest, sizeof(struct ffclock_estimate));
+ ffclock_updated++;
+ mtx_unlock(&ffclock_mtx);
+ return (error);
+}
+
+/*
+ * System call allowing userland applications to retrieve the clock estimates
+ * stored within the kernel. It is useful to kickstart the synchronisation
+ * daemon with the kernel's knowledge of hardware timecounter.
+ */
+#ifndef _SYS_SYSPROTO_H_
+struct ffclock_getestimate_args {
+ struct ffclock_estimate *cest;
+};
+#endif
+/* ARGSUSED */
+int
+sys_ffclock_getestimate(struct thread *td, struct ffclock_getestimate_args *uap)
+{
+ struct ffclock_estimate cest;
+ int error;
+
+ mtx_lock(&ffclock_mtx);
+ memcpy(&cest, &ffclock_estimate, sizeof(struct ffclock_estimate));
+ mtx_unlock(&ffclock_mtx);
+ error = copyout(&cest, uap->cest, sizeof(struct ffclock_estimate));
+ return (error);
+}
+
+#else /* !FFCLOCK */
+
+int
+sys_ffclock_getcounter(struct thread *td, struct ffclock_getcounter_args *uap)
+{
+
+ return (ENOSYS);
+}
+
+int
+sys_ffclock_setestimate(struct thread *td, struct ffclock_setestimate_args *uap)
+{
+
+ return (ENOSYS);
+}
+
+int
+sys_ffclock_getestimate(struct thread *td, struct ffclock_getestimate_args *uap)
+{
+
+ return (ENOSYS);
+}
+
+#endif /* FFCLOCK */
Property changes on: trunk/sys/kern/kern_ffclock.c
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Modified: trunk/sys/kern/kern_idle.c
===================================================================
--- trunk/sys/kern/kern_idle.c 2018-05-25 20:46:51 UTC (rev 9944)
+++ trunk/sys/kern/kern_idle.c 2018-05-25 20:53:39 UTC (rev 9945)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
/*-
* Copyright (C) 2000-2004 The FreeBSD Project. All rights reserved.
*
@@ -24,7 +25,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$MidnightBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/kern/kern_idle.c 222531 2011-05-31 15:11:43Z nwhitehorn $");
#include <sys/param.h>
#include <sys/systm.h>
Modified: trunk/sys/kern/kern_ktrace.c
===================================================================
--- trunk/sys/kern/kern_ktrace.c 2018-05-25 20:46:51 UTC (rev 9944)
+++ trunk/sys/kern/kern_ktrace.c 2018-05-25 20:53:39 UTC (rev 9945)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
/*-
* Copyright (c) 1989, 1993
* The Regents of the University of California.
@@ -32,11 +33,12 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$MidnightBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/kern/kern_ktrace.c 315562 2017-03-19 15:56:06Z kib $");
#include "opt_ktrace.h"
#include <sys/param.h>
+#include <sys/capsicum.h>
#include <sys/systm.h>
#include <sys/fcntl.h>
#include <sys/kernel.h>
@@ -95,6 +97,7 @@
void *ktr_buffer;
union {
struct ktr_proc_ctor ktr_proc_ctor;
+ struct ktr_cap_fail ktr_cap_fail;
struct ktr_syscall ktr_syscall;
struct ktr_sysret ktr_sysret;
struct ktr_genio ktr_genio;
@@ -107,21 +110,20 @@
};
static int data_lengths[] = {
- 0, /* none */
- offsetof(struct ktr_syscall, ktr_args), /* KTR_SYSCALL */
- sizeof(struct ktr_sysret), /* KTR_SYSRET */
- 0, /* KTR_NAMEI */
- sizeof(struct ktr_genio), /* KTR_GENIO */
- sizeof(struct ktr_psig), /* KTR_PSIG */
- sizeof(struct ktr_csw), /* KTR_CSW */
- 0, /* KTR_USER */
- 0, /* KTR_STRUCT */
- 0, /* KTR_SYSCTL */
- sizeof(struct ktr_proc_ctor), /* KTR_PROCCTOR */
- 0, /* KTR_PROCDTOR */
- 0, /* unused */
- sizeof(struct ktr_fault), /* KTR_FAULT */
- sizeof(struct ktr_faultend), /* KTR_FAULTEND */
+ [KTR_SYSCALL] = offsetof(struct ktr_syscall, ktr_args),
+ [KTR_SYSRET] = sizeof(struct ktr_sysret),
+ [KTR_NAMEI] = 0,
+ [KTR_GENIO] = sizeof(struct ktr_genio),
+ [KTR_PSIG] = sizeof(struct ktr_psig),
+ [KTR_CSW] = sizeof(struct ktr_csw),
+ [KTR_USER] = 0,
+ [KTR_STRUCT] = 0,
+ [KTR_SYSCTL] = 0,
+ [KTR_PROCCTOR] = sizeof(struct ktr_proc_ctor),
+ [KTR_PROCDTOR] = 0,
+ [KTR_CAPFAIL] = sizeof(struct ktr_cap_fail),
+ [KTR_FAULT] = sizeof(struct ktr_fault),
+ [KTR_FAULTEND] = sizeof(struct ktr_faultend),
};
static STAILQ_HEAD(, ktr_request) ktr_free;
@@ -131,7 +133,7 @@
static u_int ktr_requestpool = KTRACE_REQUEST_POOL;
TUNABLE_INT("kern.ktrace.request_pool", &ktr_requestpool);
-static u_int ktr_geniosize = PAGE_SIZE;
+u_int ktr_geniosize = PAGE_SIZE;
TUNABLE_INT("kern.ktrace.genio_size", &ktr_geniosize);
SYSCTL_UINT(_kern_ktrace, OID_AUTO, genio_size, CTLFLAG_RW, &ktr_geniosize,
0, "Maximum size of genio event payload");
@@ -511,7 +513,6 @@
struct proc *p;
struct ucred *cred;
struct vnode *vp;
- int vfslocked;
p = td->td_proc;
if (p->p_traceflag == 0)
@@ -529,11 +530,8 @@
ktr_freeproc(p, &cred, &vp);
mtx_unlock(&ktrace_mtx);
PROC_UNLOCK(p);
- if (vp != NULL) {
- vfslocked = VFS_LOCK_GIANT(vp->v_mount);
+ if (vp != NULL)
vrele(vp);
- VFS_UNLOCK_GIANT(vfslocked);
- }
if (cred != NULL)
crfree(cred);
ktrace_exit(td);
@@ -780,6 +778,33 @@
}
void
+ktrcapfail(type, needed, held)
+ enum ktr_cap_fail_type type;
+ const cap_rights_t *needed;
+ const cap_rights_t *held;
+{
+ struct thread *td = curthread;
+ struct ktr_request *req;
+ struct ktr_cap_fail *kcf;
+
+ req = ktr_getrequest(KTR_CAPFAIL);
+ if (req == NULL)
+ return;
+ kcf = &req->ktr_data.ktr_cap_fail;
+ kcf->cap_type = type;
+ if (needed != NULL)
+ kcf->cap_needed = *needed;
+ else
+ cap_rights_init(&kcf->cap_needed);
+ if (held != NULL)
+ kcf->cap_held = *held;
+ else
+ cap_rights_init(&kcf->cap_held);
+ ktr_enqueuerequest(td, req);
+ ktrace_exit(td);
+}
+
+void
ktrfault(vaddr, type)
vm_offset_t vaddr;
int type;
@@ -840,7 +865,7 @@
int ops = KTROP(uap->ops);
int descend = uap->ops & KTRFLAG_DESCEND;
int nfound, ret = 0;
- int flags, error = 0, vfslocked;
+ int flags, error = 0;
struct nameidata nd;
struct ucred *cred;
@@ -855,8 +880,7 @@
/*
* an operation which requires a file argument.
*/
- NDINIT(&nd, LOOKUP, NOFOLLOW | MPSAFE, UIO_USERSPACE,
- uap->fname, td);
+ NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, uap->fname, td);
flags = FREAD | FWRITE | O_NOFOLLOW;
error = vn_open(&nd, &flags, 0, NULL);
if (error) {
@@ -863,17 +887,14 @@
ktrace_exit(td);
return (error);
}
- vfslocked = NDHASGIANT(&nd);
NDFREE(&nd, NDF_ONLY_PNBUF);
vp = nd.ni_vp;
VOP_UNLOCK(vp, 0);
if (vp->v_type != VREG) {
(void) vn_close(vp, FREAD|FWRITE, td->td_ucred, td);
- VFS_UNLOCK_GIANT(vfslocked);
ktrace_exit(td);
return (EACCES);
}
- VFS_UNLOCK_GIANT(vfslocked);
}
/*
* Clear all uses of the tracefile.
@@ -899,10 +920,8 @@
}
sx_sunlock(&allproc_lock);
if (vrele_count > 0) {
- vfslocked = VFS_LOCK_GIANT(vp->v_mount);
while (vrele_count-- > 0)
vrele(vp);
- VFS_UNLOCK_GIANT(vfslocked);
}
goto done;
}
@@ -968,11 +987,8 @@
if (!ret)
error = EPERM;
done:
- if (vp != NULL) {
- vfslocked = VFS_LOCK_GIANT(vp->v_mount);
+ if (vp != NULL)
(void) vn_close(vp, FWRITE, td->td_ucred, td);
- VFS_UNLOCK_GIANT(vfslocked);
- }
ktrace_exit(td);
return (error);
#else /* !KTRACE */
@@ -1064,13 +1080,8 @@
if ((p->p_traceflag & KTRFAC_MASK) != 0)
ktrprocctor_entered(td, p);
PROC_UNLOCK(p);
- if (tracevp != NULL) {
- int vfslocked;
-
- vfslocked = VFS_LOCK_GIANT(tracevp->v_mount);
+ if (tracevp != NULL)
vrele(tracevp);
- VFS_UNLOCK_GIANT(vfslocked);
- }
if (tracecred != NULL)
crfree(tracecred);
@@ -1124,7 +1135,7 @@
struct iovec aiov[3];
struct mount *mp;
int datalen, buflen, vrele_count;
- int error, vfslocked;
+ int error;
/*
* We hold the vnode and credential for use in I/O in case ktrace is
@@ -1182,7 +1193,6 @@
auio.uio_iovcnt++;
}
- vfslocked = VFS_LOCK_GIANT(vp->v_mount);
vn_start_write(vp, &mp, V_WAIT);
vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
#ifdef MAC
@@ -1195,10 +1205,8 @@
crfree(cred);
if (!error) {
vrele(vp);
- VFS_UNLOCK_GIANT(vfslocked);
return;
}
- VFS_UNLOCK_GIANT(vfslocked);
/*
* If error encountered, give up tracing on this vnode. We defer
@@ -1237,10 +1245,8 @@
}
sx_sunlock(&allproc_lock);
- vfslocked = VFS_LOCK_GIANT(vp->v_mount);
while (vrele_count-- > 0)
vrele(vp);
- VFS_UNLOCK_GIANT(vfslocked);
}
/*
Modified: trunk/sys/kern/kern_linker.c
===================================================================
--- trunk/sys/kern/kern_linker.c 2018-05-25 20:46:51 UTC (rev 9944)
+++ trunk/sys/kern/kern_linker.c 2018-05-25 20:53:39 UTC (rev 9945)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
/*-
* Copyright (c) 1997-2000 Doug Rabson
* All rights reserved.
@@ -25,9 +26,10 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$MidnightBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/kern/kern_linker.c 325867 2017-11-15 22:35:16Z gordon $");
#include "opt_ddb.h"
+#include "opt_kld.h"
#include "opt_hwpmc_hooks.h"
#include <sys/param.h>
@@ -44,6 +46,7 @@
#include <sys/module.h>
#include <sys/mount.h>
#include <sys/linker.h>
+#include <sys/eventhandler.h>
#include <sys/fcntl.h>
#include <sys/jail.h>
#include <sys/libkern.h>
@@ -64,20 +67,16 @@
#ifdef KLD_DEBUG
int kld_debug = 0;
-SYSCTL_INT(_debug, OID_AUTO, kld_debug, CTLFLAG_RW,
- &kld_debug, 0, "Set various levels of KLD debug");
+SYSCTL_INT(_debug, OID_AUTO, kld_debug, CTLFLAG_RW | CTLFLAG_TUN,
+ &kld_debug, 0, "Set various levels of KLD debug");
+TUNABLE_INT("debug.kld_debug", &kld_debug);
#endif
-#define KLD_LOCK() sx_xlock(&kld_sx)
-#define KLD_UNLOCK() sx_xunlock(&kld_sx)
-#define KLD_DOWNGRADE() sx_downgrade(&kld_sx)
-#define KLD_LOCK_READ() sx_slock(&kld_sx)
-#define KLD_UNLOCK_READ() sx_sunlock(&kld_sx)
-#define KLD_LOCKED() sx_xlocked(&kld_sx)
-#define KLD_LOCK_ASSERT() do { \
- if (!cold) \
- sx_assert(&kld_sx, SX_XLOCKED); \
-} while (0)
+/* These variables are used by kernel debuggers to enumerate loaded files. */
+const int kld_off_address = offsetof(struct linker_file, address);
+const int kld_off_filename = offsetof(struct linker_file, filename);
+const int kld_off_pathname = offsetof(struct linker_file, pathname);
+const int kld_off_next = offsetof(struct linker_file, link.tqe_next);
/*
* static char *linker_search_path(const char *name, struct mod_depend
@@ -118,7 +117,8 @@
#define LINKER_GET_NEXT_FILE_ID(a) do { \
linker_file_t lftmp; \
\
- KLD_LOCK_ASSERT(); \
+ if (!cold) \
+ sx_assert(&kld_sx, SA_XLOCKED); \
retry: \
TAILQ_FOREACH(lftmp, &linker_files, link) { \
if (next_file_id == lftmp->id) { \
@@ -150,16 +150,6 @@
struct mod_depend *verinfo, struct linker_file **lfpp);
static modlist_t modlist_lookup2(const char *name, struct mod_depend *verinfo);
-static char *
-linker_strdup(const char *str)
-{
- char *result;
-
- if ((result = malloc((strlen(str) + 1), M_LINKER, M_WAITOK)) != NULL)
- strcpy(result, str);
- return (result);
-}
-
static void
linker_init(void *arg)
{
@@ -205,6 +195,8 @@
KLD_DPF(FILE, ("linker_file_sysinit: calling SYSINITs for %s\n",
lf->filename));
+ sx_assert(&kld_sx, SA_XLOCKED);
+
if (linker_file_lookup_set(lf, "sysinit_set", &start, &stop, NULL) != 0)
return;
/*
@@ -230,6 +222,7 @@
* Traverse the (now) ordered list of system initialization tasks.
* Perform each task, and continue on to the next task.
*/
+ sx_xunlock(&kld_sx);
mtx_lock(&Giant);
for (sipp = start; sipp < stop; sipp++) {
if ((*sipp)->subsystem == SI_SUB_DUMMY)
@@ -239,6 +232,7 @@
(*((*sipp)->func)) ((*sipp)->udata);
}
mtx_unlock(&Giant);
+ sx_xlock(&kld_sx);
}
static void
@@ -249,6 +243,8 @@
KLD_DPF(FILE, ("linker_file_sysuninit: calling SYSUNINITs for %s\n",
lf->filename));
+ sx_assert(&kld_sx, SA_XLOCKED);
+
if (linker_file_lookup_set(lf, "sysuninit_set", &start, &stop,
NULL) != 0)
return;
@@ -276,6 +272,7 @@
* Traverse the (now) ordered list of system initialization tasks.
* Perform each task, and continue on to the next task.
*/
+ sx_xunlock(&kld_sx);
mtx_lock(&Giant);
for (sipp = start; sipp < stop; sipp++) {
if ((*sipp)->subsystem == SI_SUB_DUMMY)
@@ -285,10 +282,11 @@
(*((*sipp)->func)) ((*sipp)->udata);
}
mtx_unlock(&Giant);
+ sx_xlock(&kld_sx);
}
static void
-linker_file_register_sysctls(linker_file_t lf)
+linker_file_register_sysctls(linker_file_t lf, bool enable)
{
struct sysctl_oid **start, **stop, **oidp;
@@ -296,13 +294,43 @@
("linker_file_register_sysctls: registering SYSCTLs for %s\n",
lf->filename));
+ sx_assert(&kld_sx, SA_XLOCKED);
+
if (linker_file_lookup_set(lf, "sysctl_set", &start, &stop, NULL) != 0)
return;
+ sx_xunlock(&kld_sx);
sysctl_lock();
+ for (oidp = start; oidp < stop; oidp++) {
+ if (enable)
+ sysctl_register_oid(*oidp);
+ else
+ sysctl_register_disabled_oid(*oidp);
+ }
+ sysctl_unlock();
+ sx_xlock(&kld_sx);
+}
+
+static void
+linker_file_enable_sysctls(linker_file_t lf)
+{
+ struct sysctl_oid **start, **stop, **oidp;
+
+ KLD_DPF(FILE,
+ ("linker_file_enable_sysctls: enable SYSCTLs for %s\n",
+ lf->filename));
+
+ sx_assert(&kld_sx, SA_XLOCKED);
+
+ if (linker_file_lookup_set(lf, "sysctl_set", &start, &stop, NULL) != 0)
+ return;
+
+ sx_xunlock(&kld_sx);
+ sysctl_lock();
for (oidp = start; oidp < stop; oidp++)
- sysctl_register_oid(*oidp);
+ sysctl_enable_oid(*oidp);
sysctl_unlock();
+ sx_xlock(&kld_sx);
}
static void
@@ -310,16 +338,20 @@
{
struct sysctl_oid **start, **stop, **oidp;
- KLD_DPF(FILE, ("linker_file_unregister_sysctls: registering SYSCTLs"
+ KLD_DPF(FILE, ("linker_file_unregister_sysctls: unregistering SYSCTLs"
" for %s\n", lf->filename));
+ sx_assert(&kld_sx, SA_XLOCKED);
+
if (linker_file_lookup_set(lf, "sysctl_set", &start, &stop, NULL) != 0)
return;
+ sx_xunlock(&kld_sx);
sysctl_lock();
for (oidp = start; oidp < stop; oidp++)
sysctl_unregister_oid(*oidp);
sysctl_unlock();
+ sx_xlock(&kld_sx);
}
static int
@@ -332,6 +364,8 @@
KLD_DPF(FILE, ("linker_file_register_modules: registering modules"
" in %s\n", lf->filename));
+ sx_assert(&kld_sx, SA_XLOCKED);
+
if (linker_file_lookup_set(lf, "modmetadata_set", &start,
&stop, NULL) != 0) {
/*
@@ -367,7 +401,9 @@
linker_init_kernel_modules(void)
{
+ sx_xlock(&kld_sx);
linker_file_register_modules(linker_kernel_file);
+ sx_xunlock(&kld_sx);
}
SYSINIT(linker_kernel, SI_SUB_KLD, SI_ORDER_ANY, linker_init_kernel_modules,
@@ -384,7 +420,7 @@
if (prison0.pr_securelevel > 0)
return (EPERM);
- KLD_LOCK_ASSERT();
+ sx_assert(&kld_sx, SA_XLOCKED);
lf = linker_find_file_by_name(filename);
if (lf) {
KLD_DPF(FILE, ("linker_load_file: file %s is already loaded,"
@@ -418,10 +454,8 @@
return (error);
}
modules = !TAILQ_EMPTY(&lf->modules);
- KLD_UNLOCK();
- linker_file_register_sysctls(lf);
+ linker_file_register_sysctls(lf, false);
linker_file_sysinit(lf);
- KLD_LOCK();
lf->flags |= LINKER_FILE_LINKED;
/*
@@ -433,6 +467,8 @@
linker_file_unload(lf, LINKER_UNLOAD_FORCE);
return (ENOEXEC);
}
+ linker_file_enable_sysctls(lf);
+ EVENTHANDLER_INVOKE(kld_load, lf);
*result = lf;
return (0);
}
@@ -472,16 +508,16 @@
modlist_t mod;
int error;
- KLD_LOCK();
+ sx_xlock(&kld_sx);
if ((mod = modlist_lookup2(modname, verinfo)) != NULL) {
*result = mod->container;
(*result)->refs++;
- KLD_UNLOCK();
+ sx_xunlock(&kld_sx);
return (0);
}
error = linker_load_module(NULL, modname, NULL, verinfo, result);
- KLD_UNLOCK();
+ sx_xunlock(&kld_sx);
return (error);
}
@@ -492,13 +528,13 @@
modlist_t mod;
int error;
- KLD_LOCK();
+ sx_xlock(&kld_sx);
if (lf == NULL) {
KASSERT(modname != NULL,
("linker_release_module: no file or name"));
mod = modlist_lookup2(modname, verinfo);
if (mod == NULL) {
- KLD_UNLOCK();
+ sx_xunlock(&kld_sx);
return (ESRCH);
}
lf = mod->container;
@@ -506,7 +542,7 @@
KASSERT(modname == NULL && verinfo == NULL,
("linker_release_module: both file and name"));
error = linker_file_unload(lf, LINKER_UNLOAD_NORMAL);
- KLD_UNLOCK();
+ sx_xunlock(&kld_sx);
return (error);
}
@@ -519,7 +555,7 @@
koname = malloc(strlen(filename) + 4, M_LINKER, M_WAITOK);
sprintf(koname, "%s.ko", filename);
- KLD_LOCK_ASSERT();
+ sx_assert(&kld_sx, SA_XLOCKED);
TAILQ_FOREACH(lf, &linker_files, link) {
if (strcmp(lf->filename, koname) == 0)
break;
@@ -535,7 +571,7 @@
{
linker_file_t lf;
- KLD_LOCK_ASSERT();
+ sx_assert(&kld_sx, SA_XLOCKED);
TAILQ_FOREACH(lf, &linker_files, link)
if (lf->id == fileid && lf->flags & LINKER_FILE_LINKED)
break;
@@ -548,13 +584,13 @@
linker_file_t lf;
int retval = 0;
- KLD_LOCK();
+ sx_xlock(&kld_sx);
TAILQ_FOREACH(lf, &linker_files, link) {
retval = predicate(lf, context);
if (retval != 0)
break;
}
- KLD_UNLOCK();
+ sx_xunlock(&kld_sx);
return (retval);
}
@@ -564,7 +600,8 @@
linker_file_t lf;
const char *filename;
- KLD_LOCK_ASSERT();
+ if (!cold)
+ sx_assert(&kld_sx, SA_XLOCKED);
filename = linker_basename(pathname);
KLD_DPF(FILE, ("linker_make_file: new file, filename='%s' for pathname='%s'\n", filename, pathname));
@@ -574,14 +611,12 @@
lf->refs = 1;
lf->userrefs = 0;
lf->flags = 0;
- lf->filename = linker_strdup(filename);
- lf->pathname = linker_strdup(pathname);
+ lf->filename = strdup(filename, M_LINKER);
+ lf->pathname = strdup(pathname, M_LINKER);
LINKER_GET_NEXT_FILE_ID(lf->id);
lf->ndeps = 0;
lf->deps = NULL;
lf->loadcnt = ++loadcnt;
- lf->sdt_probes = NULL;
- lf->sdt_nprobes = 0;
STAILQ_INIT(&lf->common);
TAILQ_INIT(&lf->modules);
TAILQ_INSERT_TAIL(&linker_files, lf, link);
@@ -600,7 +635,7 @@
if (prison0.pr_securelevel > 0)
return (EPERM);
- KLD_LOCK_ASSERT();
+ sx_assert(&kld_sx, SA_XLOCKED);
KLD_DPF(FILE, ("linker_file_unload: lf->refs=%d\n", file->refs));
/* Easy case of just dropping a reference. */
@@ -609,6 +644,12 @@
return (0);
}
+ /* Give eventhandlers a chance to prevent the unload. */
+ error = 0;
+ EVENTHANDLER_INVOKE(kld_unload_try, file, &error);
+ if (error != 0)
+ return (EBUSY);
+
KLD_DPF(FILE, ("linker_file_unload: file is unloading,"
" informing modules\n"));
@@ -673,10 +714,8 @@
*/
if (file->flags & LINKER_FILE_LINKED) {
file->flags &= ~LINKER_FILE_LINKED;
- KLD_UNLOCK();
+ linker_file_unregister_sysctls(file);
linker_file_sysuninit(file);
- linker_file_unregister_sysctls(file);
- KLD_LOCK();
}
TAILQ_REMOVE(&linker_files, file, link);
@@ -692,6 +731,10 @@
}
LINKER_UNLOAD(file);
+
+ EVENTHANDLER_INVOKE(kld_unload, file->filename, file->address,
+ file->size);
+
if (file->filename) {
free(file->filename, M_LINKER);
file->filename = NULL;
@@ -715,18 +758,9 @@
{
linker_file_t *newdeps;
- KLD_LOCK_ASSERT();
- newdeps = malloc((file->ndeps + 1) * sizeof(linker_file_t *),
+ sx_assert(&kld_sx, SA_XLOCKED);
+ file->deps = realloc(file->deps, (file->ndeps + 1) * sizeof(*newdeps),
M_LINKER, M_WAITOK | M_ZERO);
- if (newdeps == NULL)
- return (ENOMEM);
-
- if (file->deps) {
- bcopy(file->deps, newdeps,
- file->ndeps * sizeof(linker_file_t *));
- free(file->deps, M_LINKER);
- }
- file->deps = newdeps;
file->deps[file->ndeps] = dep;
file->ndeps++;
KLD_DPF(FILE, ("linker_file_add_dependency:"
@@ -745,15 +779,9 @@
linker_file_lookup_set(linker_file_t file, const char *name,
void *firstp, void *lastp, int *countp)
{
- int error, locked;
- locked = KLD_LOCKED();
- if (!locked)
- KLD_LOCK();
- error = LINKER_LOOKUP_SET(file, name, firstp, lastp, countp);
- if (!locked)
- KLD_UNLOCK();
- return (error);
+ sx_assert(&kld_sx, SA_LOCKED);
+ return (LINKER_LOOKUP_SET(file, name, firstp, lastp, countp));
}
/*
@@ -772,12 +800,12 @@
caddr_t sym;
int locked;
- locked = KLD_LOCKED();
+ locked = sx_xlocked(&kld_sx);
if (!locked)
- KLD_LOCK();
+ sx_xlock(&kld_sx);
sym = linker_file_lookup_symbol_internal(file, name, deps);
if (!locked)
- KLD_UNLOCK();
+ sx_xunlock(&kld_sx);
return (sym);
}
@@ -791,7 +819,7 @@
size_t common_size = 0;
int i;
- KLD_LOCK_ASSERT();
+ sx_assert(&kld_sx, SA_XLOCKED);
KLD_DPF(SYM, ("linker_file_lookup_symbol: file=%p, name=%s, deps=%d\n",
file, name, deps));
@@ -948,7 +976,7 @@
*
* Note that we do not obey list locking protocols here. We really don't need
* DDB to hang because somebody's got the lock held. We'll take the chance
- * that the files list is inconsistant instead.
+ * that the files list is inconsistent instead.
*/
#ifdef DDB
int
@@ -991,9 +1019,9 @@
{
int error;
- KLD_LOCK();
+ sx_xlock(&kld_sx);
error = linker_debug_search_symbol_name(value, buf, buflen, offset);
- KLD_UNLOCK();
+ sx_xunlock(&kld_sx);
return (error);
}
@@ -1003,9 +1031,6 @@
int
kern_kldload(struct thread *td, const char *file, int *fileid)
{
-#ifdef HWPMC_HOOKS
- struct pmckern_map_in pkm;
-#endif
const char *kldname, *modname;
linker_file_t lf;
int error;
@@ -1027,7 +1052,7 @@
* (kldname.ko, or kldname.ver.ko) treat it as an interface
* name.
*/
- if (index(file, '/') || index(file, '.')) {
+ if (strchr(file, '/') || strchr(file, '.')) {
kldname = file;
modname = NULL;
} else {
@@ -1035,24 +1060,16 @@
modname = file;
}
- KLD_LOCK();
+ sx_xlock(&kld_sx);
error = linker_load_module(kldname, modname, NULL, NULL, &lf);
if (error) {
- KLD_UNLOCK();
+ sx_xunlock(&kld_sx);
goto done;
}
lf->userrefs++;
if (fileid != NULL)
*fileid = lf->id;
-#ifdef HWPMC_HOOKS
- KLD_DOWNGRADE();
- pkm.pm_file = lf->filename;
- pkm.pm_address = (uintptr_t) lf->address;
- PMC_CALL_HOOK(td, PMC_FN_KLD_LOAD, (void *) &pkm);
- KLD_UNLOCK_READ();
-#else
- KLD_UNLOCK();
-#endif
+ sx_xunlock(&kld_sx);
done:
CURVNET_RESTORE();
@@ -1081,9 +1098,6 @@
int
kern_kldunload(struct thread *td, int fileid, int flags)
{
-#ifdef HWPMC_HOOKS
- struct pmckern_map_out pkm;
-#endif
linker_file_t lf;
int error = 0;
@@ -1094,17 +1108,12 @@
return (error);
CURVNET_SET(TD_TO_VNET(td));
- KLD_LOCK();
+ sx_xlock(&kld_sx);
lf = linker_find_file_by_id(fileid);
if (lf) {
KLD_DPF(FILE, ("kldunload: lf->userrefs=%d\n", lf->userrefs));
- /* Check if there are DTrace probes enabled on this file. */
- if (lf->nenabled > 0) {
- printf("kldunload: attempt to unload file that has"
- " DTrace probes enabled\n");
- error = EBUSY;
- } else if (lf->userrefs == 0) {
+ if (lf->userrefs == 0) {
/*
* XXX: maybe LINKER_UNLOAD_FORCE should override ?
*/
@@ -1112,11 +1121,6 @@
" loaded by the kernel\n");
error = EBUSY;
} else {
-#ifdef HWPMC_HOOKS
- /* Save data needed by hwpmc(4) before unloading. */
- pkm.pm_address = (uintptr_t) lf->address;
- pkm.pm_size = lf->size;
-#endif
lf->userrefs--;
error = linker_file_unload(lf, flags);
if (error)
@@ -1124,17 +1128,8 @@
}
} else
error = ENOENT;
+ sx_xunlock(&kld_sx);
-#ifdef HWPMC_HOOKS
- if (error == 0) {
- KLD_DOWNGRADE();
- PMC_CALL_HOOK(td, PMC_FN_KLD_UNLOAD, (void *) &pkm);
- KLD_UNLOCK_READ();
- } else
- KLD_UNLOCK();
-#else
- KLD_UNLOCK();
-#endif
CURVNET_RESTORE();
return (error);
}
@@ -1177,13 +1172,13 @@
goto out;
filename = linker_basename(pathname);
- KLD_LOCK();
+ sx_xlock(&kld_sx);
lf = linker_find_file_by_name(filename);
if (lf)
td->td_retval[0] = lf->id;
else
error = ENOENT;
- KLD_UNLOCK();
+ sx_xunlock(&kld_sx);
out:
free(pathname, M_TEMP);
return (error);
@@ -1201,7 +1196,7 @@
return (error);
#endif
- KLD_LOCK();
+ sx_xlock(&kld_sx);
if (uap->fileid == 0)
lf = TAILQ_FIRST(&linker_files);
else {
@@ -1222,7 +1217,7 @@
else
td->td_retval[0] = 0;
out:
- KLD_UNLOCK();
+ sx_xunlock(&kld_sx);
return (error);
}
@@ -1229,7 +1224,7 @@
int
sys_kldstat(struct thread *td, struct kldstat_args *uap)
{
- struct kld_file_stat stat;
+ struct kld_file_stat *stat;
int error, version;
/*
@@ -1242,10 +1237,12 @@
version != sizeof(struct kld_file_stat))
return (EINVAL);
- error = kern_kldstat(td, uap->fileid, &stat);
- if (error != 0)
- return (error);
- return (copyout(&stat, uap->stat, version));
+ stat = malloc(sizeof(*stat), M_TEMP, M_WAITOK | M_ZERO);
+ error = kern_kldstat(td, uap->fileid, stat);
+ if (error == 0)
+ error = copyout(stat, uap->stat, version);
+ free(stat, M_TEMP);
+ return (error);
}
int
@@ -1261,10 +1258,10 @@
return (error);
#endif
- KLD_LOCK();
+ sx_xlock(&kld_sx);
lf = linker_find_file_by_id(fileid);
if (lf == NULL) {
- KLD_UNLOCK();
+ sx_xunlock(&kld_sx);
return (ENOENT);
}
@@ -1282,7 +1279,7 @@
if (namelen > MAXPATHLEN)
namelen = MAXPATHLEN;
bcopy(lf->pathname, &stat->pathname[0], namelen);
- KLD_UNLOCK();
+ sx_xunlock(&kld_sx);
td->td_retval[0] = 0;
return (0);
@@ -1301,7 +1298,7 @@
return (error);
#endif
- KLD_LOCK();
+ sx_xlock(&kld_sx);
lf = linker_find_file_by_id(uap->fileid);
if (lf) {
MOD_SLOCK;
@@ -1313,7 +1310,7 @@
MOD_SUNLOCK;
} else
error = ENOENT;
- KLD_UNLOCK();
+ sx_xunlock(&kld_sx);
return (error);
}
@@ -1341,7 +1338,7 @@
symstr = malloc(MAXPATHLEN, M_TEMP, M_WAITOK);
if ((error = copyinstr(lookup.symname, symstr, MAXPATHLEN, NULL)) != 0)
goto out;
- KLD_LOCK();
+ sx_xlock(&kld_sx);
if (uap->fileid != 0) {
lf = linker_find_file_by_id(uap->fileid);
if (lf == NULL)
@@ -1367,7 +1364,7 @@
if (lf == NULL)
error = ENOENT;
}
- KLD_UNLOCK();
+ sx_xunlock(&kld_sx);
out:
free(symstr, M_TEMP);
return (error);
@@ -1476,6 +1473,7 @@
error = 0;
modptr = NULL;
+ sx_xlock(&kld_sx);
while ((modptr = preload_search_next_name(modptr)) != NULL) {
modname = (char *)preload_search_info(modptr, MODINFO_NAME);
modtype = (char *)preload_search_info(modptr, MODINFO_TYPE);
@@ -1650,7 +1648,7 @@
if (linker_file_lookup_set(lf, "sysinit_set", &si_start,
&si_stop, NULL) == 0)
sysinit_add(si_start, si_stop);
- linker_file_register_sysctls(lf);
+ linker_file_register_sysctls(lf, true);
lf->flags |= LINKER_FILE_LINKED;
continue;
fail:
@@ -1657,6 +1655,7 @@
TAILQ_REMOVE(&depended_files, lf, loaded);
linker_file_unload(lf, LINKER_UNLOAD_FORCE);
}
+ sx_xunlock(&kld_sx);
/* woohoo! we made it! */
}
@@ -1703,7 +1702,7 @@
struct nameidata nd;
struct thread *td = curthread; /* XXX */
char *result, **cpp, *sep;
- int error, len, extlen, reclen, flags, vfslocked;
+ int error, len, extlen, reclen, flags;
enum vtype type;
extlen = 0;
@@ -1724,11 +1723,10 @@
* Attempt to open the file, and return the path if
* we succeed and it's a regular file.
*/
- NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE, UIO_SYSSPACE, result, td);
+ NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, result, td);
flags = FREAD;
error = vn_open(&nd, &flags, 0, NULL);
if (error == 0) {
- vfslocked = NDHASGIANT(&nd);
NDFREE(&nd, NDF_ONLY_PNBUF);
type = nd.ni_vp->v_type;
if (vap)
@@ -1735,7 +1733,6 @@
VOP_GETATTR(nd.ni_vp, vap, td->td_ucred);
VOP_UNLOCK(nd.ni_vp, 0);
vn_close(nd.ni_vp, FREAD, td->td_ucred, td);
- VFS_UNLOCK_GIANT(vfslocked);
if (type == VREG)
return (result);
}
@@ -1764,7 +1761,6 @@
u_char *cp, *recptr, *bufend, *result, *best, *pathbuf, *sep;
int error, ival, bestver, *intp, found, flags, clen, blen;
ssize_t reclen;
- int vfslocked = 0;
result = NULL;
bestver = found = 0;
@@ -1776,12 +1772,11 @@
snprintf(pathbuf, reclen, "%.*s%s%s", pathlen, path, sep,
linker_hintfile);
- NDINIT(&nd, LOOKUP, NOFOLLOW | MPSAFE, UIO_SYSSPACE, pathbuf, td);
+ NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_SYSSPACE, pathbuf, td);
flags = FREAD;
error = vn_open(&nd, &flags, 0, NULL);
if (error)
goto bad;
- vfslocked = NDHASGIANT(&nd);
NDFREE(&nd, NDF_ONLY_PNBUF);
if (nd.ni_vp->v_type != VREG)
goto bad;
@@ -1797,8 +1792,6 @@
goto bad;
}
hints = malloc(vattr.va_size, M_TEMP, M_WAITOK);
- if (hints == NULL)
- goto bad;
error = vn_rdwr(UIO_READ, nd.ni_vp, (caddr_t)hints, vattr.va_size, 0,
UIO_SYSSPACE, IO_NODELOCKED, cred, NOCRED, &reclen, td);
if (error)
@@ -1805,7 +1798,6 @@
goto bad;
VOP_UNLOCK(nd.ni_vp, 0);
vn_close(nd.ni_vp, FREAD, cred, td);
- VFS_UNLOCK_GIANT(vfslocked);
nd.ni_vp = NULL;
if (reclen != 0) {
printf("can't read %zd\n", reclen);
@@ -1874,7 +1866,6 @@
if (nd.ni_vp != NULL) {
VOP_UNLOCK(nd.ni_vp, 0);
vn_close(nd.ni_vp, FREAD, cred, td);
- VFS_UNLOCK_GIANT(vfslocked);
}
/*
* If nothing found or hints is absent - fallback to the old
@@ -1921,8 +1912,8 @@
int len;
/* qualified at all? */
- if (index(name, '/'))
- return (linker_strdup(name));
+ if (strchr(name, '/'))
+ return (strdup(name, M_LINKER));
/* traverse the linker path */
len = strlen(name);
@@ -1942,7 +1933,7 @@
{
const char *filename;
- filename = rindex(path, '/');
+ filename = strrchr(path, '/');
if (filename == NULL)
return path;
if (filename[1])
@@ -1962,7 +1953,7 @@
int i, nmappings;
nmappings = 0;
- KLD_LOCK_READ();
+ sx_slock(&kld_sx);
TAILQ_FOREACH(lf, &linker_files, link)
nmappings++;
@@ -1977,7 +1968,7 @@
kobase[i].pm_address = (uintptr_t)lf->address;
i++;
}
- KLD_UNLOCK_READ();
+ sx_sunlock(&kld_sx);
KASSERT(i > 0, ("linker_hpwmc_list_objects: no kernel objects?"));
@@ -2003,7 +1994,7 @@
char *pathname;
int error;
- KLD_LOCK_ASSERT();
+ sx_assert(&kld_sx, SA_XLOCKED);
if (modname == NULL) {
/*
* We have to load KLD
@@ -2015,7 +2006,7 @@
if (modlist_lookup2(modname, verinfo) != NULL)
return (EEXIST);
if (kldname != NULL)
- pathname = linker_strdup(kldname);
+ pathname = strdup(kldname, M_LINKER);
else if (rootvnode == NULL)
pathname = NULL;
else
@@ -2075,9 +2066,9 @@
int ver, error = 0, count;
/*
- * All files are dependant on /kernel.
+ * All files are dependent on /kernel.
*/
- KLD_LOCK_ASSERT();
+ sx_assert(&kld_sx, SA_XLOCKED);
if (linker_kernel_file) {
linker_kernel_file->refs++;
error = linker_file_add_dependency(lf, linker_kernel_file);
@@ -2169,16 +2160,16 @@
error = sysctl_wire_old_buffer(req, 0);
if (error != 0)
return (error);
- KLD_LOCK();
+ sx_xlock(&kld_sx);
TAILQ_FOREACH(lf, &linker_files, link) {
error = LINKER_EACH_FUNCTION_NAME(lf,
sysctl_kern_function_list_iterate, req);
if (error) {
- KLD_UNLOCK();
+ sx_xunlock(&kld_sx);
return (error);
}
}
- KLD_UNLOCK();
+ sx_xunlock(&kld_sx);
return (SYSCTL_OUT(req, "", 1));
}
Modified: trunk/sys/kern/kern_lock.c
===================================================================
--- trunk/sys/kern/kern_lock.c 2018-05-25 20:46:51 UTC (rev 9944)
+++ trunk/sys/kern/kern_lock.c 2018-05-25 20:53:39 UTC (rev 9945)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
/*-
* Copyright (c) 2008 Attilio Rao <attilio at FreeBSD.org>
* All rights reserved.
@@ -32,9 +33,10 @@
#include "opt_kdtrace.h"
#include <sys/cdefs.h>
-__FBSDID("$MidnightBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/kern/kern_lock.c 310979 2016-12-31 16:37:47Z mjg $");
#include <sys/param.h>
+#include <sys/kdb.h>
#include <sys/ktr.h>
#include <sys/lock.h>
#include <sys/lock_profile.h>
@@ -67,11 +69,6 @@
#define SQ_EXCLUSIVE_QUEUE 0
#define SQ_SHARED_QUEUE 1
-#ifdef ADAPTIVE_LOCKMGRS
-#define ALK_RETRIES 10
-#define ALK_LOOPS 10000
-#endif
-
#ifndef INVARIANTS
#define _lockmgr_assert(lk, what, file, line)
#define TD_LOCKS_INC(td)
@@ -120,10 +117,11 @@
} \
} while (0)
-#define LK_CAN_SHARE(x) \
- (((x) & LK_SHARE) && (((x) & LK_EXCLUSIVE_WAITERS) == 0 || \
- ((x) & LK_EXCLUSIVE_SPINNERS) == 0 || \
- curthread->td_lk_slocks || (curthread->td_pflags & TDP_DEADLKTREAT)))
+#define LK_CAN_SHARE(x, flags) \
+ (((x) & LK_SHARE) && \
+ (((x) & (LK_EXCLUSIVE_WAITERS | LK_EXCLUSIVE_SPINNERS)) == 0 || \
+ (curthread->td_lk_slocks != 0 && !(flags & LK_NODDLKTREAT)) || \
+ (curthread->td_pflags & TDP_DEADLKTREAT)))
#define LK_TRYOP(x) \
((x) & LK_NOWAIT)
@@ -142,15 +140,16 @@
#define lockmgr_xlocked(lk) \
(((lk)->lk_lock & ~(LK_FLAGMASK & ~LK_SHARE)) == (uintptr_t)curthread)
-static void assert_lockmgr(struct lock_object *lock, int how);
+static void assert_lockmgr(const struct lock_object *lock, int how);
#ifdef DDB
-static void db_show_lockmgr(struct lock_object *lock);
+static void db_show_lockmgr(const struct lock_object *lock);
#endif
-static void lock_lockmgr(struct lock_object *lock, int how);
+static void lock_lockmgr(struct lock_object *lock, uintptr_t how);
#ifdef KDTRACE_HOOKS
-static int owner_lockmgr(struct lock_object *lock, struct thread **owner);
+static int owner_lockmgr(const struct lock_object *lock,
+ struct thread **owner);
#endif
-static int unlock_lockmgr(struct lock_object *lock);
+static uintptr_t unlock_lockmgr(struct lock_object *lock);
struct lock_class lock_class_lockmgr = {
.lc_name = "lockmgr",
@@ -166,8 +165,17 @@
#endif
};
+#ifdef ADAPTIVE_LOCKMGRS
+static u_int alk_retries = 10;
+static u_int alk_loops = 10000;
+static SYSCTL_NODE(_debug, OID_AUTO, lockmgr, CTLFLAG_RD, NULL,
+ "lockmgr debugging");
+SYSCTL_UINT(_debug_lockmgr, OID_AUTO, retries, CTLFLAG_RW, &alk_retries, 0, "");
+SYSCTL_UINT(_debug_lockmgr, OID_AUTO, loops, CTLFLAG_RW, &alk_loops, 0, "");
+#endif
+
static __inline struct thread *
-lockmgr_xholder(struct lock *lk)
+lockmgr_xholder(const struct lock *lk)
{
uintptr_t x;
@@ -232,8 +240,6 @@
u_int realexslp;
int queue, wakeup_swapper;
- TD_LOCKS_DEC(curthread);
- TD_SLOCKS_DEC(curthread);
WITNESS_UNLOCK(&lk->lock_object, 0, file, line);
LOCK_LOG_LOCK("SUNLOCK", &lk->lock_object, 0, 0, file, line);
@@ -281,7 +287,7 @@
* exclusive waiters bit anyway.
* Please note that lk_exslpfail count may be lying about
* the real number of waiters with the LK_SLEEPFAIL flag on
- * because they may be used in conjuction with interruptible
+ * because they may be used in conjunction with interruptible
* sleeps so lk_exslpfail might be considered an 'upper limit'
* bound, including the edge cases.
*/
@@ -333,11 +339,13 @@
}
lock_profile_release_lock(&lk->lock_object);
+ TD_LOCKS_DEC(curthread);
+ TD_SLOCKS_DEC(curthread);
return (wakeup_swapper);
}
static void
-assert_lockmgr(struct lock_object *lock, int what)
+assert_lockmgr(const struct lock_object *lock, int what)
{
panic("lockmgr locks do not support assertions");
@@ -344,13 +352,13 @@
}
static void
-lock_lockmgr(struct lock_object *lock, int how)
+lock_lockmgr(struct lock_object *lock, uintptr_t how)
{
panic("lockmgr locks do not support sleep interlocking");
}
-static int
+static uintptr_t
unlock_lockmgr(struct lock_object *lock)
{
@@ -359,7 +367,7 @@
#ifdef KDTRACE_HOOKS
static int
-owner_lockmgr(struct lock_object *lock, struct thread **owner)
+owner_lockmgr(const struct lock_object *lock, struct thread **owner)
{
panic("lockmgr locks do not support owner inquiring");
@@ -387,14 +395,16 @@
iflags |= LO_WITNESS;
if (flags & LK_QUIET)
iflags |= LO_QUIET;
+ if (flags & LK_IS_VNODE)
+ iflags |= LO_IS_VNODE;
iflags |= flags & (LK_ADAPTIVE | LK_NOSHARE);
+ lock_init(&lk->lock_object, &lock_class_lockmgr, wmesg, NULL, iflags);
lk->lk_lock = LK_UNLOCKED;
lk->lk_recurse = 0;
lk->lk_exslpfail = 0;
lk->lk_timo = timo;
lk->lk_pri = pri;
- lock_init(&lk->lock_object, &lock_class_lockmgr, wmesg, NULL, iflags);
STACK_ZERO(lk);
}
@@ -411,6 +421,14 @@
}
void
+lockdisableshare(struct lock *lk)
+{
+
+ lockmgr_assert(lk, KA_XLOCKED);
+ lk->lock_object.lo_flags |= LK_NOSHARE;
+}
+
+void
lockallowrecurse(struct lock *lk)
{
@@ -472,6 +490,9 @@
KASSERT((flags & LK_INTERLOCK) == 0 || ilk != NULL,
("%s: LK_INTERLOCK passed without valid interlock @ %s:%d",
__func__, file, line));
+ KASSERT(kdb_active != 0 || !TD_IS_IDLETHREAD(curthread),
+ ("%s: idle thread %p on lockmgr %s @ %s:%d", __func__, curthread,
+ lk->lock_object.lo_name, file, line));
class = (flags & LK_INTERLOCK) ? LOCK_CLASS(ilk) : NULL;
if (panicstr != NULL) {
@@ -486,6 +507,7 @@
op = LK_EXCLUSIVE;
break;
case LK_UPGRADE:
+ case LK_TRYUPGRADE:
case LK_DOWNGRADE:
_lockmgr_assert(lk, KA_XLOCKED | KA_NOTRECURSED,
file, line);
@@ -500,7 +522,7 @@
case LK_SHARED:
if (LK_CAN_WITNESS(flags))
WITNESS_CHECKORDER(&lk->lock_object, LOP_NEWORDER,
- file, line, ilk);
+ file, line, flags & LK_INTERLOCK ? ilk : NULL);
for (;;) {
x = lk->lk_lock;
@@ -511,7 +533,7 @@
* waiters, if we fail to acquire the shared lock
* loop back and retry.
*/
- if (LK_CAN_SHARE(x)) {
+ if (LK_CAN_SHARE(x, flags)) {
if (atomic_cmpset_acq_ptr(&lk->lk_lock, x,
x + LK_ONE_SHARER))
break;
@@ -562,6 +584,9 @@
CTR3(KTR_LOCK,
"%s: spinning on %p held by %p",
__func__, lk, owner);
+ KTR_STATE1(KTR_SCHED, "thread",
+ sched_tdname(td), "spinning",
+ "lockname:\"%s\"", lk->lock_object.lo_name);
/*
* If we are holding also an interlock drop it
@@ -577,11 +602,16 @@
while (LK_HOLDER(lk->lk_lock) ==
(uintptr_t)owner && TD_IS_RUNNING(owner))
cpu_spinwait();
+ KTR_STATE0(KTR_SCHED, "thread",
+ sched_tdname(td), "running");
GIANT_RESTORE();
continue;
} else if (LK_CAN_ADAPT(lk, flags) &&
(x & LK_SHARE) != 0 && LK_SHARERS(x) &&
- spintries < ALK_RETRIES) {
+ spintries < alk_retries) {
+ KTR_STATE1(KTR_SCHED, "thread",
+ sched_tdname(td), "spinning",
+ "lockname:\"%s\"", lk->lock_object.lo_name);
if (flags & LK_INTERLOCK) {
class->lc_unlock(ilk);
flags &= ~LK_INTERLOCK;
@@ -588,7 +618,7 @@
}
GIANT_SAVE();
spintries++;
- for (i = 0; i < ALK_LOOPS; i++) {
+ for (i = 0; i < alk_loops; i++) {
if (LOCK_LOG_TEST(&lk->lock_object, 0))
CTR4(KTR_LOCK,
"%s: shared spinning on %p with %u and %u",
@@ -595,12 +625,14 @@
__func__, lk, spintries, i);
x = lk->lk_lock;
if ((x & LK_SHARE) == 0 ||
- LK_CAN_SHARE(x) != 0)
+ LK_CAN_SHARE(x, flags) != 0)
break;
cpu_spinwait();
}
+ KTR_STATE0(KTR_SCHED, "thread",
+ sched_tdname(td), "running");
GIANT_RESTORE();
- if (i != ALK_LOOPS)
+ if (i != alk_loops)
continue;
}
#endif
@@ -616,7 +648,7 @@
* if the lock can be acquired in shared mode, try
* again.
*/
- if (LK_CAN_SHARE(x)) {
+ if (LK_CAN_SHARE(x, flags)) {
sleepq_release(&lk->lock_object);
continue;
}
@@ -683,6 +715,7 @@
}
break;
case LK_UPGRADE:
+ case LK_TRYUPGRADE:
_lockmgr_assert(lk, KA_SLOCKED, file, line);
v = lk->lk_lock;
x = v & LK_ALL_WAITERS;
@@ -703,6 +736,17 @@
}
/*
+ * In LK_TRYUPGRADE mode, do not drop the lock,
+ * returning EBUSY instead.
+ */
+ if (op == LK_TRYUPGRADE) {
+ LOCK_LOG2(lk, "%s: %p failed the nowait upgrade",
+ __func__, lk);
+ error = EBUSY;
+ break;
+ }
+
+ /*
* We have been unable to succeed in upgrading, so just
* give up the shared lock.
*/
@@ -712,7 +756,8 @@
case LK_EXCLUSIVE:
if (LK_CAN_WITNESS(flags))
WITNESS_CHECKORDER(&lk->lock_object, LOP_NEWORDER |
- LOP_EXCLUSIVE, file, line, ilk);
+ LOP_EXCLUSIVE, file, line, flags & LK_INTERLOCK ?
+ ilk : NULL);
/*
* If curthread already holds the lock and this one is
@@ -748,8 +793,10 @@
break;
}
- while (!atomic_cmpset_acq_ptr(&lk->lk_lock, LK_UNLOCKED,
- tid)) {
+ for (;;) {
+ if (lk->lk_lock == LK_UNLOCKED &&
+ atomic_cmpset_acq_ptr(&lk->lk_lock, LK_UNLOCKED, tid))
+ break;
#ifdef HWPMC_HOOKS
PMC_SOFT_CALL( , , lock, failed);
#endif
@@ -781,6 +828,9 @@
CTR3(KTR_LOCK,
"%s: spinning on %p held by %p",
__func__, lk, owner);
+ KTR_STATE1(KTR_SCHED, "thread",
+ sched_tdname(td), "spinning",
+ "lockname:\"%s\"", lk->lock_object.lo_name);
/*
* If we are holding also an interlock drop it
@@ -796,15 +846,20 @@
while (LK_HOLDER(lk->lk_lock) ==
(uintptr_t)owner && TD_IS_RUNNING(owner))
cpu_spinwait();
+ KTR_STATE0(KTR_SCHED, "thread",
+ sched_tdname(td), "running");
GIANT_RESTORE();
continue;
} else if (LK_CAN_ADAPT(lk, flags) &&
(x & LK_SHARE) != 0 && LK_SHARERS(x) &&
- spintries < ALK_RETRIES) {
+ spintries < alk_retries) {
if ((x & LK_EXCLUSIVE_SPINNERS) == 0 &&
!atomic_cmpset_ptr(&lk->lk_lock, x,
x | LK_EXCLUSIVE_SPINNERS))
continue;
+ KTR_STATE1(KTR_SCHED, "thread",
+ sched_tdname(td), "spinning",
+ "lockname:\"%s\"", lk->lock_object.lo_name);
if (flags & LK_INTERLOCK) {
class->lc_unlock(ilk);
flags &= ~LK_INTERLOCK;
@@ -811,7 +866,7 @@
}
GIANT_SAVE();
spintries++;
- for (i = 0; i < ALK_LOOPS; i++) {
+ for (i = 0; i < alk_loops; i++) {
if (LOCK_LOG_TEST(&lk->lock_object, 0))
CTR4(KTR_LOCK,
"%s: shared spinning on %p with %u and %u",
@@ -821,8 +876,10 @@
break;
cpu_spinwait();
}
+ KTR_STATE0(KTR_SCHED, "thread",
+ sched_tdname(td), "running");
GIANT_RESTORE();
- if (i != ALK_LOOPS)
+ if (i != alk_loops)
continue;
}
#endif
@@ -928,9 +985,19 @@
}
break;
case LK_DOWNGRADE:
- _lockmgr_assert(lk, KA_XLOCKED | KA_NOTRECURSED, file, line);
+ _lockmgr_assert(lk, KA_XLOCKED, file, line);
LOCK_LOG_LOCK("XDOWNGRADE", &lk->lock_object, 0, 0, file, line);
WITNESS_DOWNGRADE(&lk->lock_object, 0, file, line);
+
+ /*
+ * Panic if the lock is recursed.
+ */
+ if (lockmgr_xlocked(lk) && lockmgr_recursed(lk)) {
+ if (flags & LK_INTERLOCK)
+ class->lc_unlock(ilk);
+ panic("%s: downgrade a recursed lockmgr %s @ %s:%d\n",
+ __func__, iwmesg, file, line);
+ }
TD_SLOCKS_INC(curthread);
/*
@@ -999,7 +1066,7 @@
* Please note that lk_exslpfail count may be lying
* about the real number of waiters with the
* LK_SLEEPFAIL flag on because they may be used in
- * conjuction with interruptible sleeps so
+ * conjunction with interruptible sleeps so
* lk_exslpfail might be considered an 'upper limit'
* bound, including the edge cases.
*/
@@ -1051,7 +1118,8 @@
case LK_DRAIN:
if (LK_CAN_WITNESS(flags))
WITNESS_CHECKORDER(&lk->lock_object, LOP_NEWORDER |
- LOP_EXCLUSIVE, file, line, ilk);
+ LOP_EXCLUSIVE, file, line, flags & LK_INTERLOCK ?
+ ilk : NULL);
/*
* Trying to drain a lock we already own will result in a
@@ -1064,7 +1132,11 @@
__func__, iwmesg, file, line);
}
- while (!atomic_cmpset_acq_ptr(&lk->lk_lock, LK_UNLOCKED, tid)) {
+ for (;;) {
+ if (lk->lk_lock == LK_UNLOCKED &&
+ atomic_cmpset_acq_ptr(&lk->lk_lock, LK_UNLOCKED, tid))
+ break;
+
#ifdef HWPMC_HOOKS
PMC_SOFT_CALL( , , lock, failed);
#endif
@@ -1111,7 +1183,7 @@
* Please note that lk_exslpfail count may be
* lying about the real number of waiters with
* the LK_SLEEPFAIL flag on because they may
- * be used in conjuction with interruptible
+ * be used in conjunction with interruptible
* sleeps so lk_exslpfail might be considered
* an 'upper limit' bound, including the edge
* cases.
@@ -1248,9 +1320,16 @@
return;
tid = (uintptr_t)curthread;
- _lockmgr_assert(lk, KA_XLOCKED | KA_NOTRECURSED, file, line);
+ _lockmgr_assert(lk, KA_XLOCKED, file, line);
/*
+ * Panic if the lock is recursed.
+ */
+ if (lockmgr_xlocked(lk) && lockmgr_recursed(lk))
+ panic("%s: disown a recursed lockmgr @ %s:%d\n",
+ __func__, file, line);
+
+ /*
* If the owner is already LK_KERNPROC just skip the whole operation.
*/
if (LK_HOLDER(lk->lk_lock) != tid)
@@ -1276,7 +1355,7 @@
}
void
-lockmgr_printinfo(struct lock *lk)
+lockmgr_printinfo(const struct lock *lk)
{
struct thread *td;
uintptr_t x;
@@ -1289,8 +1368,14 @@
(uintmax_t)LK_SHARERS(lk->lk_lock));
else {
td = lockmgr_xholder(lk);
- printf("lock type %s: EXCL by thread %p (pid %d)\n",
- lk->lock_object.lo_name, td, td->td_proc->p_pid);
+ if (td == (struct thread *)LK_KERNPROC)
+ printf("lock type %s: EXCL by KERNPROC\n",
+ lk->lock_object.lo_name);
+ else
+ printf("lock type %s: EXCL by thread %p "
+ "(pid %d, %s, tid %d)\n", lk->lock_object.lo_name,
+ td, td->td_proc->p_pid, td->td_proc->p_comm,
+ td->td_tid);
}
x = lk->lk_lock;
@@ -1305,7 +1390,7 @@
}
int
-lockstatus(struct lock *lk)
+lockstatus(const struct lock *lk)
{
uintptr_t v, x;
int ret;
@@ -1335,7 +1420,7 @@
#endif
void
-_lockmgr_assert(struct lock *lk, int what, const char *file, int line)
+_lockmgr_assert(const struct lock *lk, int what, const char *file, int line)
{
int slocked = 0;
@@ -1428,12 +1513,12 @@
}
static void
-db_show_lockmgr(struct lock_object *lock)
+db_show_lockmgr(const struct lock_object *lock)
{
struct thread *td;
- struct lock *lk;
+ const struct lock *lk;
- lk = (struct lock *)lock;
+ lk = (const struct lock *)lock;
db_printf(" state: ");
if (lk->lk_lock == LK_UNLOCKED)
Modified: trunk/sys/kern/kern_lockf.c
===================================================================
--- trunk/sys/kern/kern_lockf.c 2018-05-25 20:46:51 UTC (rev 9944)
+++ trunk/sys/kern/kern_lockf.c 2018-05-25 20:53:39 UTC (rev 9945)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
/*-
* Copyright (c) 2008 Isilon Inc http://www.isilon.com/
* Authors: Doug Rabson <dfr at rabson.org>
@@ -59,7 +60,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$MidnightBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/kern/kern_lockf.c 313729 2017-02-14 13:45:38Z avg $");
#include "opt_debug_lockf.h"
@@ -362,7 +363,7 @@
struct lock_owner *lo = lock->lf_owner;
if (lo) {
KASSERT(LIST_EMPTY(&lock->lf_outedges),
- ("freeing lock with dependancies"));
+ ("freeing lock with dependencies"));
KASSERT(LIST_EMPTY(&lock->lf_inedges),
("freeing lock with dependants"));
sx_xlock(&lf_lock_owners_lock);
@@ -469,6 +470,9 @@
return (EOVERFLOW);
end = start + oadd;
}
+
+retry_setlock:
+
/*
* Avoid the common case of unlocking when inode has no locks.
*/
@@ -684,7 +688,7 @@
break;
}
-#ifdef INVARIANTS
+#ifdef DIAGNOSTIC
/*
* Check for some can't happen stuff. In this case, the active
* lock list becoming disordered or containing mutually
@@ -745,6 +749,11 @@
free(freestate, M_LOCKF);
freestate = NULL;
}
+
+ if (error == EDOOFUS) {
+ KASSERT(ap->a_op == F_SETLK, ("EDOOFUS"));
+ goto retry_setlock;
+ }
return (error);
}
@@ -819,7 +828,7 @@
/*
* We can just free all the active locks since they
- * will have no dependancies (we removed them all
+ * will have no dependencies (we removed them all
* above). We don't need to bother locking since we
* are the last thread using this state structure.
*/
@@ -907,7 +916,7 @@
struct lockf_edge *e;
int error;
-#ifdef INVARIANTS
+#ifdef DIAGNOSTIC
LIST_FOREACH(e, &x->lf_outedges, le_outlink)
KASSERT(e->le_to != y, ("adding lock edge twice"));
#endif
@@ -1104,7 +1113,7 @@
/*
* Wake up a sleeping lock and remove it from the pending list now
- * that all its dependancies have been resolved. The caller should
+ * that all its dependencies have been resolved. The caller should
* arrange for the lock to be added to the active list, adjusting any
* existing locks for the same owner as needed.
*/
@@ -1129,9 +1138,9 @@
}
/*
- * Re-check all dependant locks and remove edges to locks that we no
+ * Re-check all dependent locks and remove edges to locks that we no
* longer block. If 'all' is non-zero, the lock has been removed and
- * we must remove all the dependancies, otherwise it has simply been
+ * we must remove all the dependencies, otherwise it has simply been
* reduced but remains active. Any pending locks which have been been
* unblocked are added to 'granted'
*/
@@ -1157,7 +1166,7 @@
}
/*
- * Set the start of an existing active lock, updating dependancies and
+ * Set the start of an existing active lock, updating dependencies and
* adding any newly woken locks to 'granted'.
*/
static void
@@ -1173,7 +1182,7 @@
}
/*
- * Set the end of an existing active lock, updating dependancies and
+ * Set the end of an existing active lock, updating dependencies and
* adding any newly woken locks to 'granted'.
*/
static void
@@ -1196,7 +1205,7 @@
* pending locks as a result of downgrading/unlocking. We simply
* activate the newly granted locks by looping.
*
- * Since the new lock already has its dependancies set up, we always
+ * Since the new lock already has its dependencies set up, we always
* add it to the list (unless its an unlock request). This may
* fragment the lock list in some pathological cases but its probably
* not a real problem.
@@ -1324,7 +1333,7 @@
* may allow some other pending lock to become
* active. Consider this case:
*
- * Owner Action Result Dependancies
+ * Owner Action Result Dependencies
*
* A: lock [0..0] succeeds
* B: lock [2..2] succeeds
@@ -1460,7 +1469,7 @@
lock->lf_refs++;
error = sx_sleep(lock, &state->ls_lock, priority, lockstr, 0);
if (lf_free_lock(lock)) {
- error = EINTR;
+ error = EDOOFUS;
goto out;
}
@@ -1832,7 +1841,7 @@
/*
* This cannot cause a deadlock since any edges we would add
* to splitlock already exist in lock1. We must be sure to add
- * necessary dependancies to splitlock before we reduce lock1
+ * necessary dependencies to splitlock before we reduce lock1
* otherwise we may accidentally grant a pending lock that
* was blocked by the tail end of lock1.
*/
Modified: trunk/sys/kern/kern_lockstat.c
===================================================================
--- trunk/sys/kern/kern_lockstat.c 2018-05-25 20:46:51 UTC (rev 9944)
+++ trunk/sys/kern/kern_lockstat.c 2018-05-25 20:53:39 UTC (rev 9945)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
/*-
* Copyright 2008-2009 Stacey Son <sson at FreeBSD.org>
*
@@ -22,7 +23,7 @@
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
- * $MidnightBSD$
+ * $FreeBSD: stable/10/sys/kern/kern_lockstat.c 285759 2015-07-21 17:16:37Z markj $
*/
/*
@@ -36,9 +37,10 @@
#ifdef KDTRACE_HOOKS
-#include <sys/time.h>
#include <sys/types.h>
+#include <sys/lock.h>
#include <sys/lockstat.h>
+#include <sys/time.h>
/*
* The following must match the type definition of dtrace_probe. It is
@@ -47,14 +49,19 @@
uint32_t lockstat_probemap[LS_NPROBES];
void (*lockstat_probe_func)(uint32_t, uintptr_t, uintptr_t,
uintptr_t, uintptr_t, uintptr_t);
+int lockstat_enabled = 0;
-
uint64_t
-lockstat_nsecs(void)
+lockstat_nsecs(struct lock_object *lo)
{
struct bintime bt;
uint64_t ns;
+ if (!lockstat_enabled)
+ return (0);
+ if ((lo->lo_flags & LO_NOPROFILE) != 0)
+ return (0);
+
binuptime(&bt);
ns = bt.sec * (uint64_t)1000000000;
ns += ((uint64_t)1000000000 * (uint32_t)(bt.frac >> 32)) >> 32;
Modified: trunk/sys/kern/kern_loginclass.c
===================================================================
--- trunk/sys/kern/kern_loginclass.c 2018-05-25 20:46:51 UTC (rev 9944)
+++ trunk/sys/kern/kern_loginclass.c 2018-05-25 20:53:39 UTC (rev 9945)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
/*-
* Copyright (c) 2011 The FreeBSD Foundation
* All rights reserved.
@@ -26,7 +27,7 @@
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
- * $MidnightBSD$
+ * $FreeBSD: stable/10/sys/kern/kern_loginclass.c 302229 2016-06-27 21:25:01Z bdrewery $
*/
/*
@@ -43,7 +44,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$MidnightBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/kern/kern_loginclass.c 302229 2016-06-27 21:25:01Z bdrewery $");
#include <sys/param.h>
#include <sys/eventhandler.h>
@@ -69,10 +70,8 @@
* Lock protecting loginclasses list.
*/
static struct mtx loginclasses_lock;
+MTX_SYSINIT(loginclasses_init, &loginclasses_lock, "loginclasses lock", MTX_DEF);
-static void lc_init(void);
-SYSINIT(loginclass, SI_SUB_CPU, SI_ORDER_FIRST, lc_init, NULL);
-
void
loginclass_hold(struct loginclass *lc)
{
@@ -207,7 +206,7 @@
PROC_LOCK(p);
oldcred = crcopysafe(p, newcred);
newcred->cr_loginclass = newlc;
- p->p_ucred = newcred;
+ proc_set_cred(p, newcred);
PROC_UNLOCK(p);
#ifdef RACCT
racct_proc_ucred_changed(p, oldcred, newcred);
@@ -229,10 +228,3 @@
(callback)(lc->lc_racct, arg2, arg3);
mtx_unlock(&loginclasses_lock);
}
-
-static void
-lc_init(void)
-{
-
- mtx_init(&loginclasses_lock, "loginclasses lock", NULL, MTX_DEF);
-}
Modified: trunk/sys/kern/kern_malloc.c
===================================================================
--- trunk/sys/kern/kern_malloc.c 2018-05-25 20:46:51 UTC (rev 9944)
+++ trunk/sys/kern/kern_malloc.c 2018-05-25 20:53:39 UTC (rev 9945)
@@ -1,7 +1,9 @@
+/* $MidnightBSD$ */
/*-
* Copyright (c) 1987, 1991, 1993
* The Regents of the University of California.
* Copyright (c) 2005-2009 Robert N. M. Watson
+ * Copyright (c) 2008 Otto Moerbeek <otto at drijf.net> (mallocarray)
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -43,7 +45,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$MidnightBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/kern/kern_malloc.c 328276 2018-01-23 04:37:31Z kp $");
#include "opt_ddb.h"
#include "opt_kdtrace.h"
@@ -55,7 +57,6 @@
#include <sys/kernel.h>
#include <sys/lock.h>
#include <sys/malloc.h>
-#include <sys/mbuf.h>
#include <sys/mutex.h>
#include <sys/vmmeter.h>
#include <sys/proc.h>
@@ -62,9 +63,11 @@
#include <sys/sbuf.h>
#include <sys/sysctl.h>
#include <sys/time.h>
+#include <sys/vmem.h>
#include <vm/vm.h>
#include <vm/pmap.h>
+#include <vm/vm_pageout.h>
#include <vm/vm_param.h>
#include <vm/vm_kern.h>
#include <vm/vm_extern.h>
@@ -113,14 +116,7 @@
MALLOC_DEFINE(M_IP6OPT, "ip6opt", "IPv6 options");
MALLOC_DEFINE(M_IP6NDP, "ip6ndp", "IPv6 Neighbor Discovery");
-static void kmeminit(void *);
-SYSINIT(kmem, SI_SUB_KMEM, SI_ORDER_FIRST, kmeminit, NULL);
-
-static MALLOC_DEFINE(M_FREE, "free", "should be on free list");
-
static struct malloc_type *kmemstatistics;
-static vm_offset_t kmembase;
-static vm_offset_t kmemlimit;
static int kmemcount;
#define KMEM_ZSHIFT 4
@@ -127,7 +123,7 @@
#define KMEM_ZBASE 16
#define KMEM_ZMASK (KMEM_ZBASE - 1)
-#define KMEM_ZMAX PAGE_SIZE
+#define KMEM_ZMAX 65536
#define KMEM_ZSIZE (KMEM_ZMAX >> KMEM_ZSHIFT)
static uint8_t kmemsize[KMEM_ZSIZE + 1];
@@ -158,21 +154,10 @@
{1024, "1024", },
{2048, "2048", },
{4096, "4096", },
-#if PAGE_SIZE > 4096
{8192, "8192", },
-#if PAGE_SIZE > 8192
{16384, "16384", },
-#if PAGE_SIZE > 16384
{32768, "32768", },
-#if PAGE_SIZE > 32768
{65536, "65536", },
-#if PAGE_SIZE > 65536
-#error "Unsupported PAGE_SIZE"
-#endif /* 65536 */
-#endif /* 32768 */
-#endif /* 16384 */
-#endif /* 8192 */
-#endif /* 4096 */
{0, NULL},
};
@@ -190,6 +175,10 @@
SYSCTL_ULONG(_vm, OID_AUTO, kmem_size, CTLFLAG_RDTUN, &vm_kmem_size, 0,
"Size of kernel memory");
+static u_long kmem_zmax = KMEM_ZMAX;
+SYSCTL_ULONG(_vm, OID_AUTO, kmem_zmax, CTLFLAG_RDTUN, &kmem_zmax, 0,
+ "Maximum allocation size that malloc(9) would use UMA as backend");
+
static u_long vm_kmem_size_min;
SYSCTL_ULONG(_vm, OID_AUTO, kmem_size_min, CTLFLAG_RDTUN, &vm_kmem_size_min, 0,
"Minimum size of kernel memory");
@@ -205,12 +194,12 @@
static int sysctl_kmem_map_size(SYSCTL_HANDLER_ARGS);
SYSCTL_PROC(_vm, OID_AUTO, kmem_map_size,
CTLFLAG_RD | CTLTYPE_ULONG | CTLFLAG_MPSAFE, NULL, 0,
- sysctl_kmem_map_size, "LU", "Current kmem_map allocation size");
+ sysctl_kmem_map_size, "LU", "Current kmem allocation size");
static int sysctl_kmem_map_free(SYSCTL_HANDLER_ARGS);
SYSCTL_PROC(_vm, OID_AUTO, kmem_map_free,
CTLFLAG_RD | CTLTYPE_ULONG | CTLFLAG_MPSAFE, NULL, 0,
- sysctl_kmem_map_free, "LU", "Largest contiguous free range in kmem_map");
+ sysctl_kmem_map_free, "LU", "Free space in kmem");
/*
* The malloc_mtx protects the kmemstatistics linked list.
@@ -255,7 +244,7 @@
{
u_long size;
- size = kmem_map->size;
+ size = vmem_size(kmem_arena, VMEM_ALLOC);
return (sysctl_handle_long(oidp, &size, 0, req));
}
@@ -264,10 +253,7 @@
{
u_long size;
- vm_map_lock_read(kmem_map);
- size = kmem_map->root != NULL ? kmem_map->root->max_free :
- kmem_map->max_offset - kmem_map->min_offset;
- vm_map_unlock_read(kmem_map);
+ size = vmem_size(kmem_arena, VMEM_FREE);
return (sysctl_handle_long(oidp, &size, 0, req));
}
@@ -408,6 +394,43 @@
}
/*
+ * contigmalloc:
+ *
+ * Allocate a block of physically contiguous memory.
+ *
+ * If M_NOWAIT is set, this routine will not block and return NULL if
+ * the allocation fails.
+ */
+void *
+contigmalloc(unsigned long size, struct malloc_type *type, int flags,
+ vm_paddr_t low, vm_paddr_t high, unsigned long alignment,
+ vm_paddr_t boundary)
+{
+ void *ret;
+
+ ret = (void *)kmem_alloc_contig(kernel_arena, size, flags, low, high,
+ alignment, boundary, VM_MEMATTR_DEFAULT);
+ if (ret != NULL)
+ malloc_type_allocated(type, round_page(size));
+ return (ret);
+}
+
+/*
+ * contigfree:
+ *
+ * Free a block of memory allocated by contigmalloc.
+ *
+ * This routine may not block.
+ */
+void
+contigfree(void *addr, unsigned long size, struct malloc_type *type)
+{
+
+ kmem_free(kernel_arena, (vm_offset_t)addr, size);
+ malloc_type_freed(type, round_page(size));
+}
+
+/*
* malloc:
*
* Allocate a block of memory.
@@ -458,7 +481,7 @@
("malloc(M_WAITOK) in interrupt context"));
#ifdef DEBUG_MEMGUARD
- if (memguard_cmp(mtp, size)) {
+ if (memguard_cmp_mtp(mtp, size)) {
va = memguard_alloc(size, flags);
if (va != NULL)
return (va);
@@ -470,7 +493,7 @@
size = redzone_size_ntor(size);
#endif
- if (size <= KMEM_ZMAX) {
+ if (size <= kmem_zmax) {
mtip = mtp->ks_handle;
if (size & KMEM_ZMASK)
size = (size & ~KMEM_ZMASK) + KMEM_ZBASE;
@@ -508,6 +531,16 @@
return ((void *) va);
}
+void *
+mallocarray(size_t nmemb, size_t size, struct malloc_type *type, int flags)
+{
+
+ if (WOULD_OVERFLOW(nmemb, size))
+ panic("mallocarray: %zu * %zu overflowed", nmemb, size);
+
+ return (malloc(size * nmemb, type, flags));
+}
+
/*
* free:
*
@@ -545,7 +578,6 @@
panic("free: address %p(%p) has not been allocated.\n",
addr, (void *)((u_long)addr & (~UMA_SLAB_MASK)));
-
if (!(slab->us_flags & UMA_SLAB_MALLOC)) {
#ifdef INVARIANTS
struct malloc_type **mtpp = addr;
@@ -647,45 +679,59 @@
}
/*
- * Initialize the kernel memory allocator
+ * Wake the uma reclamation pagedaemon thread when we exhaust KVA. It
+ * will call the lowmem handler and uma_reclaim() callbacks in a
+ * context that is safe.
*/
-/* ARGSUSED*/
static void
-kmeminit(void *dummy)
+kmem_reclaim(vmem_t *vm, int flags)
{
- uint8_t indx;
+
+ uma_reclaim_wakeup();
+ pagedaemon_wakeup();
+}
+
+CTASSERT(VM_KMEM_SIZE_SCALE >= 1);
+
+/*
+ * Initialize the kernel memory (kmem) arena.
+ */
+void
+kmeminit(void)
+{
u_long mem_size, tmp;
- int i;
-
- mtx_init(&malloc_mtx, "malloc", NULL, MTX_DEF);
/*
- * Try to auto-tune the kernel memory size, so that it is
- * more applicable for a wider range of machine sizes. The
- * VM_KMEM_SIZE_MAX is dependent on the maximum KVA space
- * available.
+ * Calculate the amount of kernel virtual address (KVA) space that is
+ * preallocated to the kmem arena. In order to support a wide range
+ * of machines, it is a function of the physical memory size,
+ * specifically,
*
- * Note that the kmem_map is also used by the zone allocator,
- * so make sure that there is enough space.
+ * min(max(physical memory size / VM_KMEM_SIZE_SCALE,
+ * VM_KMEM_SIZE_MIN), VM_KMEM_SIZE_MAX)
+ *
+ * Every architecture must define an integral value for
+ * VM_KMEM_SIZE_SCALE. However, the definitions of VM_KMEM_SIZE_MIN
+ * and VM_KMEM_SIZE_MAX, which represent respectively the floor and
+ * ceiling on this preallocation, are optional. Typically,
+ * VM_KMEM_SIZE_MAX is itself a function of the available KVA space on
+ * a given architecture.
*/
- vm_kmem_size = VM_KMEM_SIZE + nmbclusters * PAGE_SIZE;
mem_size = cnt.v_page_count;
-#if defined(VM_KMEM_SIZE_SCALE)
vm_kmem_size_scale = VM_KMEM_SIZE_SCALE;
-#endif
TUNABLE_INT_FETCH("vm.kmem_size_scale", &vm_kmem_size_scale);
- if (vm_kmem_size_scale > 0 &&
- (mem_size / vm_kmem_size_scale) > (vm_kmem_size / PAGE_SIZE))
- vm_kmem_size = (mem_size / vm_kmem_size_scale) * PAGE_SIZE;
+ if (vm_kmem_size_scale < 1)
+ vm_kmem_size_scale = VM_KMEM_SIZE_SCALE;
+ vm_kmem_size = (mem_size / vm_kmem_size_scale) * PAGE_SIZE;
+
#if defined(VM_KMEM_SIZE_MIN)
vm_kmem_size_min = VM_KMEM_SIZE_MIN;
#endif
TUNABLE_ULONG_FETCH("vm.kmem_size_min", &vm_kmem_size_min);
- if (vm_kmem_size_min > 0 && vm_kmem_size < vm_kmem_size_min) {
+ if (vm_kmem_size_min > 0 && vm_kmem_size < vm_kmem_size_min)
vm_kmem_size = vm_kmem_size_min;
- }
#if defined(VM_KMEM_SIZE_MAX)
vm_kmem_size_max = VM_KMEM_SIZE_MAX;
@@ -694,26 +740,29 @@
if (vm_kmem_size_max > 0 && vm_kmem_size >= vm_kmem_size_max)
vm_kmem_size = vm_kmem_size_max;
- /* Allow final override from the kernel environment */
- TUNABLE_ULONG_FETCH("vm.kmem_size", &vm_kmem_size);
-
/*
- * Limit kmem virtual size to twice the physical memory.
- * This allows for kmem map sparseness, but limits the size
- * to something sane. Be careful to not overflow the 32bit
- * ints while doing the check or the adjustment.
+ * Alternatively, the amount of KVA space that is preallocated to the
+ * kmem arena can be set statically at compile-time or manually
+ * through the kernel environment. However, it is still limited to
+ * twice the physical memory size, which has been sufficient to handle
+ * the most severe cases of external fragmentation in the kmem arena.
*/
+#if defined(VM_KMEM_SIZE)
+ vm_kmem_size = VM_KMEM_SIZE;
+#endif
+ TUNABLE_ULONG_FETCH("vm.kmem_size", &vm_kmem_size);
if (vm_kmem_size / 2 / PAGE_SIZE > mem_size)
vm_kmem_size = 2 * mem_size * PAGE_SIZE;
+ vm_kmem_size = round_page(vm_kmem_size);
#ifdef DEBUG_MEMGUARD
tmp = memguard_fudge(vm_kmem_size, kernel_map);
#else
tmp = vm_kmem_size;
#endif
- kmem_map = kmem_suballoc(kernel_map, &kmembase, &kmemlimit,
- tmp, TRUE);
- kmem_map->system_map = 1;
+ vmem_init(kmem_arena, "kmem arena", kva_alloc(tmp), tmp, PAGE_SIZE,
+ 0, 0);
+ vmem_set_reclaim(kmem_arena, kmem_reclaim);
#ifdef DEBUG_MEMGUARD
/*
@@ -721,11 +770,29 @@
* replacement allocator used for detecting tamper-after-free
* scenarios as they occur. It is only used for debugging.
*/
- memguard_init(kmem_map);
+ memguard_init(kmem_arena);
#endif
+}
+/*
+ * Initialize the kernel memory allocator
+ */
+/* ARGSUSED*/
+static void
+mallocinit(void *dummy)
+{
+ int i;
+ uint8_t indx;
+
+ mtx_init(&malloc_mtx, "malloc", NULL, MTX_DEF);
+
+ kmeminit();
+
uma_startup2();
+ if (kmem_zmax < PAGE_SIZE || kmem_zmax > KMEM_ZMAX)
+ kmem_zmax = KMEM_ZMAX;
+
mt_zone = uma_zcreate("mt_zone", sizeof(struct malloc_type_internal),
#ifdef INVARIANTS
mtrash_ctor, mtrash_dtor, mtrash_init, mtrash_fini,
@@ -750,9 +817,10 @@
}
for (;i <= size; i+= KMEM_ZBASE)
kmemsize[i >> KMEM_ZSHIFT] = indx;
-
+
}
}
+SYSINIT(kmem, SI_SUB_KMEM, SI_ORDER_FIRST, mallocinit, NULL);
void
malloc_init(void *data)
Modified: trunk/sys/kern/kern_mbuf.c
===================================================================
--- trunk/sys/kern/kern_mbuf.c 2018-05-25 20:46:51 UTC (rev 9944)
+++ trunk/sys/kern/kern_mbuf.c 2018-05-25 20:53:39 UTC (rev 9945)
@@ -1,6 +1,7 @@
+/* $MidnightBSD$ */
/*-
* Copyright (c) 2004, 2005,
- * Bosko Milekic <bmilekic at FreeBSD.org>. All rights reserved.
+ * Bosko Milekic <bmilekic at FreeBSD.org>. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
@@ -26,7 +27,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$MidnightBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/kern/kern_mbuf.c 302234 2016-06-27 21:50:30Z bdrewery $");
#include "opt_param.h"
@@ -47,6 +48,7 @@
#include <vm/vm_extern.h>
#include <vm/vm_kern.h>
#include <vm/vm_page.h>
+#include <vm/vm_map.h>
#include <vm/uma.h>
#include <vm/uma_int.h>
#include <vm/uma_dbg.h>
@@ -76,7 +78,7 @@
* [ Cluster Zone ] [ Zone ] [ Mbuf Master Zone ]
* | \________ |
* [ Cluster Keg ] \ /
- * | [ Mbuf Keg ]
+ * | [ Mbuf Keg ]
* [ Cluster Slabs ] |
* | [ Mbuf Slabs ]
* \____________(VM)_________________/
@@ -91,44 +93,67 @@
*
* Whenever an object is allocated from the underlying global
* memory pool it gets pre-initialized with the _zinit_ functions.
- * When the Keg's are overfull objects get decomissioned with
+ * When the Keg's are overfull objects get decommissioned with
* _zfini_ functions and free'd back to the global memory pool.
*
*/
+int nmbufs; /* limits number of mbufs */
int nmbclusters; /* limits number of mbuf clusters */
int nmbjumbop; /* limits number of page size jumbo clusters */
int nmbjumbo9; /* limits number of 9k jumbo clusters */
int nmbjumbo16; /* limits number of 16k jumbo clusters */
-struct mbstat mbstat;
+static quad_t maxmbufmem; /* overall real memory limit for all mbufs */
+
+SYSCTL_QUAD(_kern_ipc, OID_AUTO, maxmbufmem, CTLFLAG_RDTUN, &maxmbufmem, 0,
+ "Maximum real memory allocatable to various mbuf types");
+
/*
- * tunable_mbinit() has to be run before init_maxsockets() thus
- * the SYSINIT order below is SI_ORDER_MIDDLE while init_maxsockets()
- * runs at SI_ORDER_ANY.
+ * tunable_mbinit() has to be run before any mbuf allocations are done.
*/
static void
tunable_mbinit(void *dummy)
{
+ quad_t realmem;
- /* This has to be done before VM init. */
+ /*
+ * The default limit for all mbuf related memory is 1/2 of all
+ * available kernel memory (physical or kmem).
+ * At most it can be 3/4 of available kernel memory.
+ */
+ realmem = qmin((quad_t)physmem * PAGE_SIZE, vm_kmem_size);
+ maxmbufmem = realmem / 2;
+ TUNABLE_QUAD_FETCH("kern.ipc.maxmbufmem", &maxmbufmem);
+ if (maxmbufmem > realmem / 4 * 3)
+ maxmbufmem = realmem / 4 * 3;
+
TUNABLE_INT_FETCH("kern.ipc.nmbclusters", &nmbclusters);
if (nmbclusters == 0)
- nmbclusters = 1024 + maxusers * 64;
+ nmbclusters = maxmbufmem / MCLBYTES / 4;
TUNABLE_INT_FETCH("kern.ipc.nmbjumbop", &nmbjumbop);
if (nmbjumbop == 0)
- nmbjumbop = nmbclusters / 2;
+ nmbjumbop = maxmbufmem / MJUMPAGESIZE / 4;
TUNABLE_INT_FETCH("kern.ipc.nmbjumbo9", &nmbjumbo9);
if (nmbjumbo9 == 0)
- nmbjumbo9 = nmbclusters / 4;
+ nmbjumbo9 = maxmbufmem / MJUM9BYTES / 6;
TUNABLE_INT_FETCH("kern.ipc.nmbjumbo16", &nmbjumbo16);
if (nmbjumbo16 == 0)
- nmbjumbo16 = nmbclusters / 8;
+ nmbjumbo16 = maxmbufmem / MJUM16BYTES / 6;
+
+ /*
+ * We need at least as many mbufs as we have clusters of
+ * the various types added together.
+ */
+ TUNABLE_INT_FETCH("kern.ipc.nmbufs", &nmbufs);
+ if (nmbufs < nmbclusters + nmbjumbop + nmbjumbo9 + nmbjumbo16)
+ nmbufs = lmax(maxmbufmem / MSIZE / 5,
+ nmbclusters + nmbjumbop + nmbjumbo9 + nmbjumbo16);
}
-SYSINIT(tunable_mbinit, SI_SUB_TUNABLES, SI_ORDER_MIDDLE, tunable_mbinit, NULL);
+SYSINIT(tunable_mbinit, SI_SUB_KMEM, SI_ORDER_MIDDLE, tunable_mbinit, NULL);
static int
sysctl_nmbclusters(SYSCTL_HANDLER_ARGS)
@@ -136,11 +161,12 @@
int error, newnmbclusters;
newnmbclusters = nmbclusters;
- error = sysctl_handle_int(oidp, &newnmbclusters, 0, req);
- if (error == 0 && req->newptr) {
- if (newnmbclusters > nmbclusters) {
+ error = sysctl_handle_int(oidp, &newnmbclusters, 0, req);
+ if (error == 0 && req->newptr && newnmbclusters != nmbclusters) {
+ if (newnmbclusters > nmbclusters &&
+ nmbufs >= nmbclusters + nmbjumbop + nmbjumbo9 + nmbjumbo16) {
nmbclusters = newnmbclusters;
- uma_zone_set_max(zone_clust, nmbclusters);
+ nmbclusters = uma_zone_set_max(zone_clust, nmbclusters);
EVENTHANDLER_INVOKE(nmbclusters_change);
} else
error = EINVAL;
@@ -157,11 +183,12 @@
int error, newnmbjumbop;
newnmbjumbop = nmbjumbop;
- error = sysctl_handle_int(oidp, &newnmbjumbop, 0, req);
- if (error == 0 && req->newptr) {
- if (newnmbjumbop> nmbjumbop) {
+ error = sysctl_handle_int(oidp, &newnmbjumbop, 0, req);
+ if (error == 0 && req->newptr && newnmbjumbop != nmbjumbop) {
+ if (newnmbjumbop > nmbjumbop &&
+ nmbufs >= nmbclusters + nmbjumbop + nmbjumbo9 + nmbjumbo16) {
nmbjumbop = newnmbjumbop;
- uma_zone_set_max(zone_jumbop, nmbjumbop);
+ nmbjumbop = uma_zone_set_max(zone_jumbop, nmbjumbop);
} else
error = EINVAL;
}
@@ -169,9 +196,8 @@
}
SYSCTL_PROC(_kern_ipc, OID_AUTO, nmbjumbop, CTLTYPE_INT|CTLFLAG_RW,
&nmbjumbop, 0, sysctl_nmbjumbop, "IU",
- "Maximum number of mbuf page size jumbo clusters allowed");
+ "Maximum number of mbuf page size jumbo clusters allowed");
-
static int
sysctl_nmbjumbo9(SYSCTL_HANDLER_ARGS)
{
@@ -178,11 +204,12 @@
int error, newnmbjumbo9;
newnmbjumbo9 = nmbjumbo9;
- error = sysctl_handle_int(oidp, &newnmbjumbo9, 0, req);
- if (error == 0 && req->newptr) {
- if (newnmbjumbo9> nmbjumbo9) {
+ error = sysctl_handle_int(oidp, &newnmbjumbo9, 0, req);
+ if (error == 0 && req->newptr && newnmbjumbo9 != nmbjumbo9) {
+ if (newnmbjumbo9 > nmbjumbo9 &&
+ nmbufs >= nmbclusters + nmbjumbop + nmbjumbo9 + nmbjumbo16) {
nmbjumbo9 = newnmbjumbo9;
- uma_zone_set_max(zone_jumbo9, nmbjumbo9);
+ nmbjumbo9 = uma_zone_set_max(zone_jumbo9, nmbjumbo9);
} else
error = EINVAL;
}
@@ -190,7 +217,7 @@
}
SYSCTL_PROC(_kern_ipc, OID_AUTO, nmbjumbo9, CTLTYPE_INT|CTLFLAG_RW,
&nmbjumbo9, 0, sysctl_nmbjumbo9, "IU",
- "Maximum number of mbuf 9k jumbo clusters allowed");
+ "Maximum number of mbuf 9k jumbo clusters allowed");
static int
sysctl_nmbjumbo16(SYSCTL_HANDLER_ARGS)
@@ -198,11 +225,12 @@
int error, newnmbjumbo16;
newnmbjumbo16 = nmbjumbo16;
- error = sysctl_handle_int(oidp, &newnmbjumbo16, 0, req);
- if (error == 0 && req->newptr) {
- if (newnmbjumbo16> nmbjumbo16) {
+ error = sysctl_handle_int(oidp, &newnmbjumbo16, 0, req);
+ if (error == 0 && req->newptr && newnmbjumbo16 != nmbjumbo16) {
+ if (newnmbjumbo16 > nmbjumbo16 &&
+ nmbufs >= nmbclusters + nmbjumbop + nmbjumbo9 + nmbjumbo16) {
nmbjumbo16 = newnmbjumbo16;
- uma_zone_set_max(zone_jumbo16, nmbjumbo16);
+ nmbjumbo16 = uma_zone_set_max(zone_jumbo16, nmbjumbo16);
} else
error = EINVAL;
}
@@ -212,11 +240,27 @@
&nmbjumbo16, 0, sysctl_nmbjumbo16, "IU",
"Maximum number of mbuf 16k jumbo clusters allowed");
+static int
+sysctl_nmbufs(SYSCTL_HANDLER_ARGS)
+{
+ int error, newnmbufs;
+ newnmbufs = nmbufs;
+ error = sysctl_handle_int(oidp, &newnmbufs, 0, req);
+ if (error == 0 && req->newptr && newnmbufs != nmbufs) {
+ if (newnmbufs > nmbufs) {
+ nmbufs = newnmbufs;
+ nmbufs = uma_zone_set_max(zone_mbuf, nmbufs);
+ EVENTHANDLER_INVOKE(nmbufs_change);
+ } else
+ error = EINVAL;
+ }
+ return (error);
+}
+SYSCTL_PROC(_kern_ipc, OID_AUTO, nmbufs, CTLTYPE_INT|CTLFLAG_RW,
+&nmbufs, 0, sysctl_nmbufs, "IU",
+ "Maximum number of mbufs allowed");
-SYSCTL_STRUCT(_kern_ipc, OID_AUTO, mbstat, CTLFLAG_RD, &mbstat, mbstat,
- "Mbuf general information and statistics");
-
/*
* Zones from which we allocate.
*/
@@ -241,16 +285,14 @@
static void mb_zfini_pack(void *, int);
static void mb_reclaim(void *);
-static void mbuf_init(void *);
-static void *mbuf_jumbo_alloc(uma_zone_t, int, uint8_t *, int);
+static void *mbuf_jumbo_alloc(uma_zone_t, vm_size_t, uint8_t *, int);
-/* Ensure that MSIZE doesn't break dtom() - it must be a power of 2 */
+/* Ensure that MSIZE is a power of 2. */
CTASSERT((((MSIZE - 1) ^ MSIZE) + 1) >> 1 == MSIZE);
/*
* Initialize FreeBSD Network buffer allocation.
*/
-SYSINIT(mbuf, SI_SUB_MBUF, SI_ORDER_FIRST, mbuf_init, NULL);
static void
mbuf_init(void *dummy)
{
@@ -266,6 +308,9 @@
NULL, NULL,
#endif
MSIZE - 1, UMA_ZONE_MAXBUCKET);
+ if (nmbufs > 0)
+ nmbufs = uma_zone_set_max(zone_mbuf, nmbufs);
+ uma_zone_set_warning(zone_mbuf, "kern.ipc.nmbufs limit reached");
zone_clust = uma_zcreate(MBUF_CLUSTER_MEM_NAME, MCLBYTES,
mb_ctor_clust, mb_dtor_clust,
@@ -276,7 +321,8 @@
#endif
UMA_ALIGN_PTR, UMA_ZONE_REFCNT);
if (nmbclusters > 0)
- uma_zone_set_max(zone_clust, nmbclusters);
+ nmbclusters = uma_zone_set_max(zone_clust, nmbclusters);
+ uma_zone_set_warning(zone_clust, "kern.ipc.nmbclusters limit reached");
zone_pack = uma_zsecond_create(MBUF_PACKET_MEM_NAME, mb_ctor_pack,
mb_dtor_pack, mb_zinit_pack, mb_zfini_pack, zone_mbuf);
@@ -291,7 +337,8 @@
#endif
UMA_ALIGN_PTR, UMA_ZONE_REFCNT);
if (nmbjumbop > 0)
- uma_zone_set_max(zone_jumbop, nmbjumbop);
+ nmbjumbop = uma_zone_set_max(zone_jumbop, nmbjumbop);
+ uma_zone_set_warning(zone_jumbop, "kern.ipc.nmbjumbop limit reached");
zone_jumbo9 = uma_zcreate(MBUF_JUMBO9_MEM_NAME, MJUM9BYTES,
mb_ctor_clust, mb_dtor_clust,
@@ -301,9 +348,10 @@
NULL, NULL,
#endif
UMA_ALIGN_PTR, UMA_ZONE_REFCNT);
+ uma_zone_set_allocf(zone_jumbo9, mbuf_jumbo_alloc);
if (nmbjumbo9 > 0)
- uma_zone_set_max(zone_jumbo9, nmbjumbo9);
- uma_zone_set_allocf(zone_jumbo9, mbuf_jumbo_alloc);
+ nmbjumbo9 = uma_zone_set_max(zone_jumbo9, nmbjumbo9);
+ uma_zone_set_warning(zone_jumbo9, "kern.ipc.nmbjumbo9 limit reached");
zone_jumbo16 = uma_zcreate(MBUF_JUMBO16_MEM_NAME, MJUM16BYTES,
mb_ctor_clust, mb_dtor_clust,
@@ -313,9 +361,10 @@
NULL, NULL,
#endif
UMA_ALIGN_PTR, UMA_ZONE_REFCNT);
+ uma_zone_set_allocf(zone_jumbo16, mbuf_jumbo_alloc);
if (nmbjumbo16 > 0)
- uma_zone_set_max(zone_jumbo16, nmbjumbo16);
- uma_zone_set_allocf(zone_jumbo16, mbuf_jumbo_alloc);
+ nmbjumbo16 = uma_zone_set_max(zone_jumbo16, nmbjumbo16);
+ uma_zone_set_warning(zone_jumbo16, "kern.ipc.nmbjumbo16 limit reached");
zone_ext_refcnt = uma_zcreate(MBUF_EXTREFCNT_MEM_NAME, sizeof(u_int),
NULL, NULL,
@@ -331,26 +380,8 @@
*/
EVENTHANDLER_REGISTER(vm_lowmem, mb_reclaim, NULL,
EVENTHANDLER_PRI_FIRST);
-
- /*
- * [Re]set counters and local statistics knobs.
- * XXX Some of these should go and be replaced, but UMA stat
- * gathering needs to be revised.
- */
- mbstat.m_mbufs = 0;
- mbstat.m_mclusts = 0;
- mbstat.m_drain = 0;
- mbstat.m_msize = MSIZE;
- mbstat.m_mclbytes = MCLBYTES;
- mbstat.m_minclsize = MINCLSIZE;
- mbstat.m_mlen = MLEN;
- mbstat.m_mhlen = MHLEN;
- mbstat.m_numtypes = MT_NTYPES;
-
- mbstat.m_mcfail = mbstat.m_mpfail = 0;
- mbstat.sf_iocnt = 0;
- mbstat.sf_allocwait = mbstat.sf_allocfail = 0;
}
+SYSINIT(mbuf, SI_SUB_MBUF, SI_ORDER_FIRST, mbuf_init, NULL);
/*
* UMA backend page allocator for the jumbo frame zones.
@@ -359,12 +390,12 @@
* pages.
*/
static void *
-mbuf_jumbo_alloc(uma_zone_t zone, int bytes, uint8_t *flags, int wait)
+mbuf_jumbo_alloc(uma_zone_t zone, vm_size_t bytes, uint8_t *flags, int wait)
{
/* Inform UMA that this allocator uses kernel_map/object. */
*flags = UMA_SLAB_KERNEL;
- return ((void *)kmem_alloc_contig(kernel_map, bytes, wait,
+ return ((void *)kmem_alloc_contig(kernel_arena, bytes, wait,
(vm_paddr_t)0, ~(vm_paddr_t)0, 1, 0, VM_MEMATTR_DEFAULT));
}
@@ -380,9 +411,7 @@
{
struct mbuf *m;
struct mb_args *args;
-#ifdef MAC
int error;
-#endif
int flags;
short type;
@@ -389,9 +418,7 @@
#ifdef INVARIANTS
trash_ctor(mem, size, arg, how);
#endif
- m = (struct mbuf *)mem;
args = (struct mb_args *)arg;
- flags = args->flags;
type = args->type;
/*
@@ -401,31 +428,13 @@
if (type == MT_NOINIT)
return (0);
- m->m_next = NULL;
- m->m_nextpkt = NULL;
- m->m_len = 0;
- m->m_flags = flags;
- m->m_type = type;
- if (flags & M_PKTHDR) {
- m->m_data = m->m_pktdat;
- m->m_pkthdr.rcvif = NULL;
- m->m_pkthdr.header = NULL;
- m->m_pkthdr.len = 0;
- m->m_pkthdr.csum_flags = 0;
- m->m_pkthdr.csum_data = 0;
- m->m_pkthdr.tso_segsz = 0;
- m->m_pkthdr.ether_vtag = 0;
- m->m_pkthdr.flowid = 0;
- SLIST_INIT(&m->m_pkthdr.tags);
-#ifdef MAC
- /* If the label init fails, fail the alloc */
- error = mac_mbuf_init(m, how);
- if (error)
- return (error);
-#endif
- } else
- m->m_data = m->m_dat;
- return (0);
+ m = (struct mbuf *)mem;
+ flags = args->flags;
+ MPASS((flags & M_NOFREE) == 0);
+
+ error = m_init(m, NULL, size, how, type, flags);
+
+ return (error);
}
/*
@@ -435,12 +444,12 @@
mb_dtor_mbuf(void *mem, int size, void *arg)
{
struct mbuf *m;
- unsigned long flags;
+ unsigned long flags;
m = (struct mbuf *)mem;
flags = (unsigned long)arg;
- if ((flags & MB_NOTAGS) == 0 && (m->m_flags & M_PKTHDR) != 0)
+ if ((m->m_flags & M_PKTHDR) && !SLIST_EMPTY(&m->m_pkthdr.tags))
m_tag_delete_chain(m, NULL);
KASSERT((m->m_flags & M_EXT) == 0, ("%s: M_EXT set", __func__));
KASSERT((m->m_flags & M_NOFREE) == 0, ("%s: M_NOFREE set", __func__));
@@ -540,6 +549,7 @@
m->m_ext.ext_arg2 = NULL;
m->m_ext.ext_size = size;
m->m_ext.ext_type = type;
+ m->m_ext.ext_flags = 0;
m->m_ext.ref_cnt = refcnt;
}
@@ -611,10 +621,7 @@
{
struct mbuf *m;
struct mb_args *args;
-#ifdef MAC
- int error;
-#endif
- int flags;
+ int error, flags;
short type;
m = (struct mbuf *)mem;
@@ -621,37 +628,19 @@
args = (struct mb_args *)arg;
flags = args->flags;
type = args->type;
+ MPASS((flags & M_NOFREE) == 0);
#ifdef INVARIANTS
trash_ctor(m->m_ext.ext_buf, MCLBYTES, arg, how);
#endif
- m->m_next = NULL;
- m->m_nextpkt = NULL;
- m->m_data = m->m_ext.ext_buf;
- m->m_len = 0;
- m->m_flags = (flags | M_EXT);
- m->m_type = type;
- if (flags & M_PKTHDR) {
- m->m_pkthdr.rcvif = NULL;
- m->m_pkthdr.len = 0;
- m->m_pkthdr.header = NULL;
- m->m_pkthdr.csum_flags = 0;
- m->m_pkthdr.csum_data = 0;
- m->m_pkthdr.tso_segsz = 0;
- m->m_pkthdr.ether_vtag = 0;
- m->m_pkthdr.flowid = 0;
- SLIST_INIT(&m->m_pkthdr.tags);
-#ifdef MAC
- /* If the label init fails, fail the alloc */
- error = mac_mbuf_init(m, how);
- if (error)
- return (error);
-#endif
- }
+ error = m_init(m, NULL, size, how, type, flags);
+
/* m_ext is already initialized. */
+ m->m_data = m->m_ext.ext_buf;
+ m->m_flags = (flags | M_EXT);
- return (0);
+ return (error);
}
int
@@ -661,15 +650,20 @@
int error;
#endif
m->m_data = m->m_pktdat;
+ m->m_pkthdr.rcvif = NULL;
SLIST_INIT(&m->m_pkthdr.tags);
- m->m_pkthdr.rcvif = NULL;
- m->m_pkthdr.header = NULL;
m->m_pkthdr.len = 0;
m->m_pkthdr.flowid = 0;
m->m_pkthdr.csum_flags = 0;
- m->m_pkthdr.csum_data = 0;
- m->m_pkthdr.tso_segsz = 0;
- m->m_pkthdr.ether_vtag = 0;
+ m->m_pkthdr.fibnum = 0;
+ m->m_pkthdr.cosqos = 0;
+ m->m_pkthdr.rsstype = 0;
+ m->m_pkthdr.l2hlen = 0;
+ m->m_pkthdr.l3hlen = 0;
+ m->m_pkthdr.l4hlen = 0;
+ m->m_pkthdr.l5hlen = 0;
+ m->m_pkthdr.PH_per.sixtyfour[0] = 0;
+ m->m_pkthdr.PH_loc.sixtyfour[0] = 0;
#ifdef MAC
/* If the label init fails, fail the alloc */
error = mac_mbuf_init(m, how);
Modified: trunk/sys/kern/kern_mib.c
===================================================================
--- trunk/sys/kern/kern_mib.c 2018-05-25 20:46:51 UTC (rev 9944)
+++ trunk/sys/kern/kern_mib.c 2018-05-25 20:53:39 UTC (rev 9945)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
/*-
* Copyright (c) 1982, 1986, 1989, 1993
* The Regents of the University of California. All rights reserved.
@@ -36,7 +37,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$MidnightBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/kern/kern_mib.c 294283 2016-01-18 18:27:21Z jhb $");
#include "opt_compat.h"
#include "opt_posix.h"
@@ -55,35 +56,35 @@
#include <sys/sx.h>
#include <sys/unistd.h>
-SYSCTL_NODE(, 0, sysctl, CTLFLAG_RW, 0,
+SYSCTL_ROOT_NODE(0, sysctl, CTLFLAG_RW, 0,
"Sysctl internal magic");
-SYSCTL_NODE(, CTL_KERN, kern, CTLFLAG_RW|CTLFLAG_CAPRD, 0,
+SYSCTL_ROOT_NODE(CTL_KERN, kern, CTLFLAG_RW|CTLFLAG_CAPRD, 0,
"High kernel, proc, limits &c");
-SYSCTL_NODE(, CTL_VM, vm, CTLFLAG_RW, 0,
+SYSCTL_ROOT_NODE(CTL_VM, vm, CTLFLAG_RW, 0,
"Virtual memory");
-SYSCTL_NODE(, CTL_VFS, vfs, CTLFLAG_RW, 0,
+SYSCTL_ROOT_NODE(CTL_VFS, vfs, CTLFLAG_RW, 0,
"File system");
-SYSCTL_NODE(, CTL_NET, net, CTLFLAG_RW, 0,
+SYSCTL_ROOT_NODE(CTL_NET, net, CTLFLAG_RW, 0,
"Network, (see socket.h)");
-SYSCTL_NODE(, CTL_DEBUG, debug, CTLFLAG_RW, 0,
+SYSCTL_ROOT_NODE(CTL_DEBUG, debug, CTLFLAG_RW, 0,
"Debugging");
SYSCTL_NODE(_debug, OID_AUTO, sizeof, CTLFLAG_RW, 0,
"Sizeof various things");
-SYSCTL_NODE(, CTL_HW, hw, CTLFLAG_RW, 0,
+SYSCTL_ROOT_NODE(CTL_HW, hw, CTLFLAG_RW, 0,
"hardware");
-SYSCTL_NODE(, CTL_MACHDEP, machdep, CTLFLAG_RW, 0,
+SYSCTL_ROOT_NODE(CTL_MACHDEP, machdep, CTLFLAG_RW, 0,
"machine dependent");
-SYSCTL_NODE(, CTL_USER, user, CTLFLAG_RW, 0,
+SYSCTL_ROOT_NODE(CTL_USER, user, CTLFLAG_RW, 0,
"user-level");
-SYSCTL_NODE(, CTL_P1003_1B, p1003_1b, CTLFLAG_RW, 0,
+SYSCTL_ROOT_NODE(CTL_P1003_1B, p1003_1b, CTLFLAG_RW, 0,
"p1003_1b, (see p1003_1b.h)");
-SYSCTL_NODE(, OID_AUTO, compat, CTLFLAG_RW, 0,
+SYSCTL_ROOT_NODE(OID_AUTO, compat, CTLFLAG_RW, 0,
"Compatibility code");
-SYSCTL_NODE(, OID_AUTO, security, CTLFLAG_RW, 0,
+SYSCTL_ROOT_NODE(OID_AUTO, security, CTLFLAG_RW, 0,
"Security");
#ifdef REGRESSION
-SYSCTL_NODE(, OID_AUTO, regression, CTLFLAG_RW, 0,
+SYSCTL_ROOT_NODE(OID_AUTO, regression, CTLFLAG_RW, 0,
"Regression test MIB");
#endif
@@ -90,11 +91,8 @@
SYSCTL_STRING(_kern, OID_AUTO, ident, CTLFLAG_RD|CTLFLAG_MPSAFE,
kern_ident, 0, "Kernel identifier");
-SYSCTL_STRING(_kern, KERN_OSRELEASE, osrelease, CTLFLAG_RD|CTLFLAG_MPSAFE|
- CTLFLAG_CAPRD, osrelease, 0, "Operating system release");
-
SYSCTL_INT(_kern, KERN_OSREV, osrevision, CTLFLAG_RD|CTLFLAG_CAPRD,
- 0, BSD, "Operating system revision");
+ SYSCTL_NULL_INT_PTR, BSD, "Operating system revision");
SYSCTL_STRING(_kern, KERN_VERSION, version, CTLFLAG_RD|CTLFLAG_MPSAFE,
version, 0, "Kernel version");
@@ -105,13 +103,6 @@
SYSCTL_STRING(_kern, KERN_OSTYPE, ostype, CTLFLAG_RD|CTLFLAG_MPSAFE|
CTLFLAG_CAPRD, ostype, 0, "Operating system type");
-/*
- * NOTICE: The *userland* release date is available in
- * /usr/include/osreldate.h
- */
-SYSCTL_INT(_kern, KERN_OSRELDATE, osreldate, CTLFLAG_RD|CTLFLAG_CAPRD,
- &osreldate, 0, "Kernel release date");
-
SYSCTL_INT(_kern, KERN_MAXPROC, maxproc, CTLFLAG_RDTUN,
&maxproc, 0, "Maximum number of processes");
@@ -122,10 +113,10 @@
&maxusers, 0, "Hint for kernel tuning");
SYSCTL_INT(_kern, KERN_ARGMAX, argmax, CTLFLAG_RD|CTLFLAG_CAPRD,
- 0, ARG_MAX, "Maximum bytes of argument to execve(2)");
+ SYSCTL_NULL_INT_PTR, ARG_MAX, "Maximum bytes of argument to execve(2)");
SYSCTL_INT(_kern, KERN_POSIX1, posix1version, CTLFLAG_RD|CTLFLAG_CAPRD,
- 0, _POSIX_VERSION, "Version of POSIX attempting to comply to");
+ SYSCTL_NULL_INT_PTR, _POSIX_VERSION, "Version of POSIX attempting to comply to");
SYSCTL_INT(_kern, KERN_NGROUPS, ngroups, CTLFLAG_RDTUN|CTLFLAG_CAPRD,
&ngroups_max, 0,
@@ -132,14 +123,14 @@
"Maximum number of supplemental groups a user can belong to");
SYSCTL_INT(_kern, KERN_JOB_CONTROL, job_control, CTLFLAG_RD|CTLFLAG_CAPRD,
- 0, 1, "Whether job control is available");
+ SYSCTL_NULL_INT_PTR, 1, "Whether job control is available");
#ifdef _POSIX_SAVED_IDS
SYSCTL_INT(_kern, KERN_SAVED_IDS, saved_ids, CTLFLAG_RD|CTLFLAG_CAPRD,
- 0, 1, "Whether saved set-group/user ID is available");
+ SYSCTL_NULL_INT_PTR, 1, "Whether saved set-group/user ID is available");
#else
SYSCTL_INT(_kern, KERN_SAVED_IDS, saved_ids, CTLFLAG_RD|CTLFLAG_CAPRD,
- 0, 0, "Whether saved set-group/user ID is available");
+ SYSCTL_NULL_INT_PTR, 0, "Whether saved set-group/user ID is available");
#endif
char kernelname[MAXPATHLEN] = "/kernel"; /* XXX bloat */
@@ -151,10 +142,10 @@
&mp_ncpus, 0, "Number of active CPUs");
SYSCTL_INT(_hw, HW_BYTEORDER, byteorder, CTLFLAG_RD|CTLFLAG_CAPRD,
- 0, BYTE_ORDER, "System byte order");
+ SYSCTL_NULL_INT_PTR, BYTE_ORDER, "System byte order");
SYSCTL_INT(_hw, HW_PAGESIZE, pagesize, CTLFLAG_RD|CTLFLAG_CAPRD,
- 0, PAGE_SIZE, "System memory page size");
+ SYSCTL_NULL_INT_PTR, PAGE_SIZE, "System memory page size");
static int
sysctl_kern_arnd(SYSCTL_HANDLER_ARGS)
@@ -261,6 +252,13 @@
SYSCTL_PROC(_hw, HW_MACHINE_ARCH, machine_arch, CTLTYPE_STRING | CTLFLAG_RD,
NULL, 0, sysctl_hw_machine_arch, "A", "System architecture");
+SYSCTL_STRING(_kern, OID_AUTO, supported_archs, CTLFLAG_RD | CTLFLAG_MPSAFE,
+#ifdef COMPAT_FREEBSD32
+ MACHINE_ARCH " " MACHINE_ARCH32, 0, "Supported architectures for binaries");
+#else
+ MACHINE_ARCH, 0, "Supported architectures for binaries");
+#endif
+
static int
sysctl_hostname(SYSCTL_HANDLER_ARGS)
{
@@ -380,15 +378,8 @@
/* Actual kernel configuration options. */
extern char kernconfstring[];
-static int
-sysctl_kern_config(SYSCTL_HANDLER_ARGS)
-{
- return (sysctl_handle_string(oidp, kernconfstring,
- strlen(kernconfstring), req));
-}
-
-SYSCTL_PROC(_kern, OID_AUTO, conftxt, CTLTYPE_STRING|CTLFLAG_RW,
- 0, 0, sysctl_kern_config, "", "Kernel configuration file");
+SYSCTL_STRING(_kern, OID_AUTO, conftxt, CTLFLAG_RD, kernconfstring, 0,
+ "Kernel configuration file");
#endif
static int
@@ -429,6 +420,48 @@
CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_PRISON | CTLFLAG_MPSAFE,
NULL, 0, sysctl_hostid, "LU", "Host ID");
+/*
+ * The osrelease string is copied from the global (osrelease in vers.c) into
+ * prison0 by a sysinit and is inherited by child jails if not changed at jail
+ * creation, so we always return the copy from the current prison data.
+ */
+static int
+sysctl_osrelease(SYSCTL_HANDLER_ARGS)
+{
+ struct prison *pr;
+
+ pr = req->td->td_ucred->cr_prison;
+ return (SYSCTL_OUT(req, pr->pr_osrelease, strlen(pr->pr_osrelease) + 1));
+
+}
+
+SYSCTL_PROC(_kern, KERN_OSRELEASE, osrelease,
+ CTLTYPE_STRING | CTLFLAG_CAPRD | CTLFLAG_RD | CTLFLAG_MPSAFE,
+ NULL, 0, sysctl_osrelease, "A", "Operating system release");
+
+/*
+ * The osreldate number is copied from the global (osreldate in vers.c) into
+ * prison0 by a sysinit and is inherited by child jails if not changed at jail
+ * creation, so we always return the value from the current prison data.
+ */
+static int
+sysctl_osreldate(SYSCTL_HANDLER_ARGS)
+{
+ struct prison *pr;
+
+ pr = req->td->td_ucred->cr_prison;
+ return (SYSCTL_OUT(req, &pr->pr_osreldate, sizeof(pr->pr_osreldate)));
+
+}
+
+/*
+ * NOTICE: The *userland* release date is available in
+ * /usr/include/osreldate.h
+ */
+SYSCTL_PROC(_kern, KERN_OSRELDATE, osreldate,
+ CTLTYPE_INT | CTLFLAG_CAPRD | CTLFLAG_RD | CTLFLAG_MPSAFE,
+ NULL, 0, sysctl_osreldate, "I", "Kernel release date");
+
SYSCTL_NODE(_kern, OID_AUTO, features, CTLFLAG_RD, 0, "Kernel Features");
#ifdef COMPAT_FREEBSD4
@@ -457,50 +490,51 @@
SYSCTL_STRING(_user, USER_CS_PATH, cs_path, CTLFLAG_RD,
"", 0, "PATH that finds all the standard utilities");
SYSCTL_INT(_user, USER_BC_BASE_MAX, bc_base_max, CTLFLAG_RD,
- 0, 0, "Max ibase/obase values in bc(1)");
+ SYSCTL_NULL_INT_PTR, 0, "Max ibase/obase values in bc(1)");
SYSCTL_INT(_user, USER_BC_DIM_MAX, bc_dim_max, CTLFLAG_RD,
- 0, 0, "Max array size in bc(1)");
+ SYSCTL_NULL_INT_PTR, 0, "Max array size in bc(1)");
SYSCTL_INT(_user, USER_BC_SCALE_MAX, bc_scale_max, CTLFLAG_RD,
- 0, 0, "Max scale value in bc(1)");
+ SYSCTL_NULL_INT_PTR, 0, "Max scale value in bc(1)");
SYSCTL_INT(_user, USER_BC_STRING_MAX, bc_string_max, CTLFLAG_RD,
- 0, 0, "Max string length in bc(1)");
+ SYSCTL_NULL_INT_PTR, 0, "Max string length in bc(1)");
SYSCTL_INT(_user, USER_COLL_WEIGHTS_MAX, coll_weights_max, CTLFLAG_RD,
- 0, 0, "Maximum number of weights assigned to an LC_COLLATE locale entry");
-SYSCTL_INT(_user, USER_EXPR_NEST_MAX, expr_nest_max, CTLFLAG_RD, 0, 0, "");
+ SYSCTL_NULL_INT_PTR, 0, "Maximum number of weights assigned to an LC_COLLATE locale entry");
+SYSCTL_INT(_user, USER_EXPR_NEST_MAX, expr_nest_max, CTLFLAG_RD,
+ SYSCTL_NULL_INT_PTR, 0, "");
SYSCTL_INT(_user, USER_LINE_MAX, line_max, CTLFLAG_RD,
- 0, 0, "Max length (bytes) of a text-processing utility's input line");
+ SYSCTL_NULL_INT_PTR, 0, "Max length (bytes) of a text-processing utility's input line");
SYSCTL_INT(_user, USER_RE_DUP_MAX, re_dup_max, CTLFLAG_RD,
- 0, 0, "Maximum number of repeats of a regexp permitted");
+ SYSCTL_NULL_INT_PTR, 0, "Maximum number of repeats of a regexp permitted");
SYSCTL_INT(_user, USER_POSIX2_VERSION, posix2_version, CTLFLAG_RD,
- 0, 0,
+ SYSCTL_NULL_INT_PTR, 0,
"The version of POSIX 1003.2 with which the system attempts to comply");
SYSCTL_INT(_user, USER_POSIX2_C_BIND, posix2_c_bind, CTLFLAG_RD,
- 0, 0, "Whether C development supports the C bindings option");
+ SYSCTL_NULL_INT_PTR, 0, "Whether C development supports the C bindings option");
SYSCTL_INT(_user, USER_POSIX2_C_DEV, posix2_c_dev, CTLFLAG_RD,
- 0, 0, "Whether system supports the C development utilities option");
+ SYSCTL_NULL_INT_PTR, 0, "Whether system supports the C development utilities option");
SYSCTL_INT(_user, USER_POSIX2_CHAR_TERM, posix2_char_term, CTLFLAG_RD,
- 0, 0, "");
+ SYSCTL_NULL_INT_PTR, 0, "");
SYSCTL_INT(_user, USER_POSIX2_FORT_DEV, posix2_fort_dev, CTLFLAG_RD,
- 0, 0, "Whether system supports FORTRAN development utilities");
+ SYSCTL_NULL_INT_PTR, 0, "Whether system supports FORTRAN development utilities");
SYSCTL_INT(_user, USER_POSIX2_FORT_RUN, posix2_fort_run, CTLFLAG_RD,
- 0, 0, "Whether system supports FORTRAN runtime utilities");
+ SYSCTL_NULL_INT_PTR, 0, "Whether system supports FORTRAN runtime utilities");
SYSCTL_INT(_user, USER_POSIX2_LOCALEDEF, posix2_localedef, CTLFLAG_RD,
- 0, 0, "Whether system supports creation of locales");
+ SYSCTL_NULL_INT_PTR, 0, "Whether system supports creation of locales");
SYSCTL_INT(_user, USER_POSIX2_SW_DEV, posix2_sw_dev, CTLFLAG_RD,
- 0, 0, "Whether system supports software development utilities");
+ SYSCTL_NULL_INT_PTR, 0, "Whether system supports software development utilities");
SYSCTL_INT(_user, USER_POSIX2_UPE, posix2_upe, CTLFLAG_RD,
- 0, 0, "Whether system supports the user portability utilities");
+ SYSCTL_NULL_INT_PTR, 0, "Whether system supports the user portability utilities");
SYSCTL_INT(_user, USER_STREAM_MAX, stream_max, CTLFLAG_RD,
- 0, 0, "Min Maximum number of streams a process may have open at one time");
+ SYSCTL_NULL_INT_PTR, 0, "Min Maximum number of streams a process may have open at one time");
SYSCTL_INT(_user, USER_TZNAME_MAX, tzname_max, CTLFLAG_RD,
- 0, 0, "Min Maximum number of types supported for timezone names");
+ SYSCTL_NULL_INT_PTR, 0, "Min Maximum number of types supported for timezone names");
#include <sys/vnode.h>
SYSCTL_INT(_debug_sizeof, OID_AUTO, vnode, CTLFLAG_RD,
- 0, sizeof(struct vnode), "sizeof(struct vnode)");
+ SYSCTL_NULL_INT_PTR, sizeof(struct vnode), "sizeof(struct vnode)");
SYSCTL_INT(_debug_sizeof, OID_AUTO, proc, CTLFLAG_RD,
- 0, sizeof(struct proc), "sizeof(struct proc)");
+ SYSCTL_NULL_INT_PTR, sizeof(struct proc), "sizeof(struct proc)");
static int
sysctl_kern_pid_max(SYSCTL_HANDLER_ARGS)
@@ -533,14 +567,19 @@
#include <sys/bio.h>
#include <sys/buf.h>
SYSCTL_INT(_debug_sizeof, OID_AUTO, bio, CTLFLAG_RD,
- 0, sizeof(struct bio), "sizeof(struct bio)");
+ SYSCTL_NULL_INT_PTR, sizeof(struct bio), "sizeof(struct bio)");
SYSCTL_INT(_debug_sizeof, OID_AUTO, buf, CTLFLAG_RD,
- 0, sizeof(struct buf), "sizeof(struct buf)");
+ SYSCTL_NULL_INT_PTR, sizeof(struct buf), "sizeof(struct buf)");
#include <sys/user.h>
SYSCTL_INT(_debug_sizeof, OID_AUTO, kinfo_proc, CTLFLAG_RD,
- 0, sizeof(struct kinfo_proc), "sizeof(struct kinfo_proc)");
+ SYSCTL_NULL_INT_PTR, sizeof(struct kinfo_proc), "sizeof(struct kinfo_proc)");
+/* Used by kernel debuggers. */
+const int pcb_size = sizeof(struct pcb);
+SYSCTL_INT(_debug_sizeof, OID_AUTO, pcb, CTLFLAG_RD,
+ SYSCTL_NULL_INT_PTR, sizeof(struct pcb), "sizeof(struct pcb)");
+
/* XXX compatibility, remove for 6.0 */
#include <sys/imgact.h>
#include <sys/imgact_elf.h>
Modified: trunk/sys/kern/kern_module.c
===================================================================
--- trunk/sys/kern/kern_module.c 2018-05-25 20:46:51 UTC (rev 9944)
+++ trunk/sys/kern/kern_module.c 2018-05-25 20:53:39 UTC (rev 9945)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
/*-
* Copyright (c) 1997 Doug Rabson
* All rights reserved.
@@ -27,7 +28,7 @@
#include "opt_compat.h"
#include <sys/cdefs.h>
-__FBSDID("$MidnightBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/kern/kern_module.c 293688 2016-01-11 19:59:56Z trasz $");
#include <sys/param.h>
#include <sys/kernel.h>
@@ -133,7 +134,7 @@
MOD_XLOCK;
if (mod->file) {
/*
- * Once a module is succesfully loaded, move
+ * Once a module is successfully loaded, move
* it to the head of the module list for this
* linker file. This resorts the list so that
* when the kernel linker iterates over the
@@ -158,16 +159,12 @@
newmod = module_lookupbyname(data->name);
if (newmod != NULL) {
MOD_XUNLOCK;
- printf("module_register: module %s already exists!\n",
- data->name);
+ printf("%s: cannot register %s from %s; already loaded from %s\n",
+ __func__, data->name, container->filename, newmod->file->filename);
return (EEXIST);
}
namelen = strlen(data->name) + 1;
newmod = malloc(sizeof(struct module) + namelen, M_MODULE, M_WAITOK);
- if (newmod == NULL) {
- MOD_XUNLOCK;
- return (ENOMEM);
- }
newmod->refs = 1;
newmod->id = nextid++;
newmod->name = (char *)(newmod + 1);
Modified: trunk/sys/kern/kern_mtxpool.c
===================================================================
--- trunk/sys/kern/kern_mtxpool.c 2018-05-25 20:46:51 UTC (rev 9944)
+++ trunk/sys/kern/kern_mtxpool.c 2018-05-25 20:53:39 UTC (rev 9945)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
/*-
* Copyright (c) 2001 Matthew Dillon. All Rights Reserved.
*
@@ -39,12 +40,12 @@
*
* Disadvantages:
* - should generally only be used as leaf mutexes.
- * - pool/pool dependancy ordering cannot be depended on.
+ * - pool/pool dependency ordering cannot be depended on.
* - possible L1 cache mastersip contention between cpus.
*/
#include <sys/cdefs.h>
-__FBSDID("$MidnightBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/kern/kern_mtxpool.c 302234 2016-06-27 21:50:30Z bdrewery $");
#include <sys/param.h>
#include <sys/proc.h>
Modified: trunk/sys/kern/kern_mutex.c
===================================================================
--- trunk/sys/kern/kern_mutex.c 2018-05-25 20:46:51 UTC (rev 9944)
+++ trunk/sys/kern/kern_mutex.c 2018-05-25 20:53:39 UTC (rev 9945)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
/*-
* Copyright (c) 1998 Berkeley Software Design, Inc. All rights reserved.
*
@@ -25,8 +26,8 @@
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
- * from BSDI $Id: kern_mutex.c,v 1.4 2012-10-09 04:08:16 laffer1 Exp $
- * and BSDI $Id: kern_mutex.c,v 1.4 2012-10-09 04:08:16 laffer1 Exp $
+ * from BSDI $Id: mutex_witness.c,v 1.1.2.20 2000/04/27 03:10:27 cp Exp $
+ * and BSDI $Id: synch_machdep.c,v 2.3.2.39 2000/04/27 03:10:25 cp Exp $
*/
/*
@@ -34,7 +35,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$MidnightBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/kern/kern_mutex.c 323870 2017-09-21 19:24:11Z marius $");
#include "opt_adaptive_mutexes.h"
#include "opt_ddb.h"
@@ -57,6 +58,7 @@
#include <sys/resourcevar.h>
#include <sys/sched.h>
#include <sys/sbuf.h>
+#include <sys/smp.h>
#include <sys/sysctl.h>
#include <sys/turnstile.h>
#include <sys/vmmeter.h>
@@ -83,6 +85,12 @@
#endif
/*
+ * Return the mutex address when the lock cookie address is provided.
+ * This functionality assumes that struct mtx* have a member named mtx_lock.
+ */
+#define mtxlock2mtx(c) (__containerof(c, struct mtx, mtx_lock))
+
+/*
* Internal utility macros.
*/
#define mtx_unowned(m) ((m)->mtx_lock == MTX_UNOWNED)
@@ -91,17 +99,18 @@
#define mtx_owner(m) ((struct thread *)((m)->mtx_lock & ~MTX_FLAGMASK))
-static void assert_mtx(struct lock_object *lock, int what);
+static void assert_mtx(const struct lock_object *lock, int what);
#ifdef DDB
-static void db_show_mtx(struct lock_object *lock);
+static void db_show_mtx(const struct lock_object *lock);
#endif
-static void lock_mtx(struct lock_object *lock, int how);
-static void lock_spin(struct lock_object *lock, int how);
+static void lock_mtx(struct lock_object *lock, uintptr_t how);
+static void lock_spin(struct lock_object *lock, uintptr_t how);
#ifdef KDTRACE_HOOKS
-static int owner_mtx(struct lock_object *lock, struct thread **owner);
+static int owner_mtx(const struct lock_object *lock,
+ struct thread **owner);
#endif
-static int unlock_mtx(struct lock_object *lock);
-static int unlock_spin(struct lock_object *lock);
+static uintptr_t unlock_mtx(struct lock_object *lock);
+static uintptr_t unlock_spin(struct lock_object *lock);
/*
* Lock classes for sleep and spin mutexes.
@@ -133,6 +142,37 @@
#endif
};
+#ifdef ADAPTIVE_MUTEXES
+static SYSCTL_NODE(_debug, OID_AUTO, mtx, CTLFLAG_RD, NULL, "mtx debugging");
+
+static struct lock_delay_config mtx_delay = {
+ .initial = 1000,
+ .step = 500,
+ .min = 100,
+ .max = 5000,
+};
+
+SYSCTL_INT(_debug_mtx, OID_AUTO, delay_initial, CTLFLAG_RW, &mtx_delay.initial,
+ 0, "");
+SYSCTL_INT(_debug_mtx, OID_AUTO, delay_step, CTLFLAG_RW, &mtx_delay.step,
+ 0, "");
+SYSCTL_INT(_debug_mtx, OID_AUTO, delay_min, CTLFLAG_RW, &mtx_delay.min,
+ 0, "");
+SYSCTL_INT(_debug_mtx, OID_AUTO, delay_max, CTLFLAG_RW, &mtx_delay.max,
+ 0, "");
+
+static void
+mtx_delay_sysinit(void *dummy)
+{
+
+ mtx_delay.initial = mp_ncpus * 25;
+ mtx_delay.step = (mp_ncpus * 25) / 2;
+ mtx_delay.min = mp_ncpus * 5;
+ mtx_delay.max = mp_ncpus * 25 * 10;
+}
+LOCK_DELAY_SYSINIT(mtx_delay_sysinit);
+#endif
+
/*
* System-wide mutexes
*/
@@ -140,14 +180,14 @@
struct mtx Giant;
void
-assert_mtx(struct lock_object *lock, int what)
+assert_mtx(const struct lock_object *lock, int what)
{
- mtx_assert((struct mtx *)lock, what);
+ mtx_assert((const struct mtx *)lock, what);
}
void
-lock_mtx(struct lock_object *lock, int how)
+lock_mtx(struct lock_object *lock, uintptr_t how)
{
mtx_lock((struct mtx *)lock);
@@ -154,13 +194,13 @@
}
void
-lock_spin(struct lock_object *lock, int how)
+lock_spin(struct lock_object *lock, uintptr_t how)
{
panic("spin locks can only use msleep_spin");
}
-int
+uintptr_t
unlock_mtx(struct lock_object *lock)
{
struct mtx *m;
@@ -171,7 +211,7 @@
return (0);
}
-int
+uintptr_t
unlock_spin(struct lock_object *lock)
{
@@ -180,9 +220,9 @@
#ifdef KDTRACE_HOOKS
int
-owner_mtx(struct lock_object *lock, struct thread **owner)
+owner_mtx(const struct lock_object *lock, struct thread **owner)
{
- struct mtx *m = (struct mtx *)lock;
+ const struct mtx *m = (const struct mtx *)lock;
*owner = mtx_owner(m);
return (mtx_unowned(m) == 0);
@@ -194,38 +234,49 @@
* modules and can also be called from assembly language if needed.
*/
void
-_mtx_lock_flags(struct mtx *m, int opts, const char *file, int line)
+__mtx_lock_flags(volatile uintptr_t *c, int opts, const char *file, int line)
{
+ struct mtx *m;
if (SCHEDULER_STOPPED())
return;
+
+ m = mtxlock2mtx(c);
+
+ KASSERT(kdb_active != 0 || !TD_IS_IDLETHREAD(curthread),
+ ("mtx_lock() by idle thread %p on sleep mutex %s @ %s:%d",
+ curthread, m->lock_object.lo_name, file, line));
KASSERT(m->mtx_lock != MTX_DESTROYED,
("mtx_lock() of destroyed mutex @ %s:%d", file, line));
KASSERT(LOCK_CLASS(&m->lock_object) == &lock_class_mtx_sleep,
("mtx_lock() of spin mutex %s @ %s:%d", m->lock_object.lo_name,
file, line));
- WITNESS_CHECKORDER(&m->lock_object, opts | LOP_NEWORDER | LOP_EXCLUSIVE,
- file, line, NULL);
+ WITNESS_CHECKORDER(&m->lock_object, (opts & ~MTX_RECURSE) |
+ LOP_NEWORDER | LOP_EXCLUSIVE, file, line, NULL);
__mtx_lock(m, curthread, opts, file, line);
LOCK_LOG_LOCK("LOCK", &m->lock_object, opts, m->mtx_recurse, file,
line);
- WITNESS_LOCK(&m->lock_object, opts | LOP_EXCLUSIVE, file, line);
+ WITNESS_LOCK(&m->lock_object, (opts & ~MTX_RECURSE) | LOP_EXCLUSIVE,
+ file, line);
curthread->td_locks++;
}
void
-_mtx_unlock_flags(struct mtx *m, int opts, const char *file, int line)
+__mtx_unlock_flags(volatile uintptr_t *c, int opts, const char *file, int line)
{
+ struct mtx *m;
if (SCHEDULER_STOPPED())
return;
+
+ m = mtxlock2mtx(c);
+
KASSERT(m->mtx_lock != MTX_DESTROYED,
("mtx_unlock() of destroyed mutex @ %s:%d", file, line));
KASSERT(LOCK_CLASS(&m->lock_object) == &lock_class_mtx_sleep,
("mtx_unlock() of spin mutex %s @ %s:%d", m->lock_object.lo_name,
file, line));
- curthread->td_locks--;
WITNESS_UNLOCK(&m->lock_object, opts | LOP_EXCLUSIVE, file, line);
LOCK_LOG_LOCK("UNLOCK", &m->lock_object, opts, m->mtx_recurse, file,
line);
@@ -234,14 +285,20 @@
if (m->mtx_recurse == 0)
LOCKSTAT_PROFILE_RELEASE_LOCK(LS_MTX_UNLOCK_RELEASE, m);
__mtx_unlock(m, curthread, opts, file, line);
+ curthread->td_locks--;
}
void
-_mtx_lock_spin_flags(struct mtx *m, int opts, const char *file, int line)
+__mtx_lock_spin_flags(volatile uintptr_t *c, int opts, const char *file,
+ int line)
{
+ struct mtx *m;
if (SCHEDULER_STOPPED())
return;
+
+ m = mtxlock2mtx(c);
+
KASSERT(m->mtx_lock != MTX_DESTROYED,
("mtx_lock_spin() of destroyed mutex @ %s:%d", file, line));
KASSERT(LOCK_CLASS(&m->lock_object) == &lock_class_mtx_spin,
@@ -248,9 +305,11 @@
("mtx_lock_spin() of sleep mutex %s @ %s:%d",
m->lock_object.lo_name, file, line));
if (mtx_owned(m))
- KASSERT((m->lock_object.lo_flags & LO_RECURSABLE) != 0,
+ KASSERT((m->lock_object.lo_flags & LO_RECURSABLE) != 0 ||
+ (opts & MTX_RECURSE) != 0,
("mtx_lock_spin: recursed on non-recursive mutex %s @ %s:%d\n",
m->lock_object.lo_name, file, line));
+ opts &= ~MTX_RECURSE;
WITNESS_CHECKORDER(&m->lock_object, opts | LOP_NEWORDER | LOP_EXCLUSIVE,
file, line, NULL);
__mtx_lock_spin(m, curthread, opts, file, line);
@@ -259,12 +318,45 @@
WITNESS_LOCK(&m->lock_object, opts | LOP_EXCLUSIVE, file, line);
}
+int
+__mtx_trylock_spin_flags(volatile uintptr_t *c, int opts, const char *file,
+ int line)
+{
+ struct mtx *m;
+
+ if (SCHEDULER_STOPPED())
+ return (1);
+
+ m = mtxlock2mtx(c);
+
+ KASSERT(m->mtx_lock != MTX_DESTROYED,
+ ("mtx_trylock_spin() of destroyed mutex @ %s:%d", file, line));
+ KASSERT(LOCK_CLASS(&m->lock_object) == &lock_class_mtx_spin,
+ ("mtx_trylock_spin() of sleep mutex %s @ %s:%d",
+ m->lock_object.lo_name, file, line));
+ KASSERT((opts & MTX_RECURSE) == 0,
+ ("mtx_trylock_spin: unsupp. opt MTX_RECURSE on mutex %s @ %s:%d\n",
+ m->lock_object.lo_name, file, line));
+ if (__mtx_trylock_spin(m, curthread, opts, file, line)) {
+ LOCK_LOG_TRY("LOCK", &m->lock_object, opts, 1, file, line);
+ WITNESS_LOCK(&m->lock_object, opts | LOP_EXCLUSIVE, file, line);
+ return (1);
+ }
+ LOCK_LOG_TRY("LOCK", &m->lock_object, opts, 0, file, line);
+ return (0);
+}
+
void
-_mtx_unlock_spin_flags(struct mtx *m, int opts, const char *file, int line)
+__mtx_unlock_spin_flags(volatile uintptr_t *c, int opts, const char *file,
+ int line)
{
+ struct mtx *m;
if (SCHEDULER_STOPPED())
return;
+
+ m = mtxlock2mtx(c);
+
KASSERT(m->mtx_lock != MTX_DESTROYED,
("mtx_unlock_spin() of destroyed mutex @ %s:%d", file, line));
KASSERT(LOCK_CLASS(&m->lock_object) == &lock_class_mtx_spin,
@@ -284,8 +376,9 @@
* is already owned, it will recursively acquire the lock.
*/
int
-_mtx_trylock(struct mtx *m, int opts, const char *file, int line)
+_mtx_trylock_flags_(volatile uintptr_t *c, int opts, const char *file, int line)
{
+ struct mtx *m;
#ifdef LOCK_PROFILING
uint64_t waittime = 0;
int contested = 0;
@@ -295,6 +388,11 @@
if (SCHEDULER_STOPPED())
return (1);
+ m = mtxlock2mtx(c);
+
+ KASSERT(kdb_active != 0 || !TD_IS_IDLETHREAD(curthread),
+ ("mtx_trylock() by idle thread %p on sleep mutex %s @ %s:%d",
+ curthread, m->lock_object.lo_name, file, line));
KASSERT(m->mtx_lock != MTX_DESTROYED,
("mtx_trylock() of destroyed mutex @ %s:%d", file, line));
KASSERT(LOCK_CLASS(&m->lock_object) == &lock_class_mtx_sleep,
@@ -301,12 +399,14 @@
("mtx_trylock() of spin mutex %s @ %s:%d", m->lock_object.lo_name,
file, line));
- if (mtx_owned(m) && (m->lock_object.lo_flags & LO_RECURSABLE) != 0) {
+ if (mtx_owned(m) && ((m->lock_object.lo_flags & LO_RECURSABLE) != 0 ||
+ (opts & MTX_RECURSE) != 0)) {
m->mtx_recurse++;
atomic_set_ptr(&m->mtx_lock, MTX_RECURSED);
rval = 1;
} else
rval = _mtx_obtain_lock(m, (uintptr_t)curthread);
+ opts &= ~MTX_RECURSE;
LOCK_LOG_TRY("LOCK", &m->lock_object, opts, rval, file, line);
if (rval) {
@@ -323,15 +423,16 @@
}
/*
- * _mtx_lock_sleep: the tougher part of acquiring an MTX_DEF lock.
+ * __mtx_lock_sleep: the tougher part of acquiring an MTX_DEF lock.
*
* We call this if the lock is either contested (i.e. we need to go to
* sleep waiting for it), or if we need to recurse on it.
*/
void
-_mtx_lock_sleep(struct mtx *m, uintptr_t tid, int opts, const char *file,
- int line)
+__mtx_lock_sleep(volatile uintptr_t *c, uintptr_t tid, int opts,
+ const char *file, int line)
{
+ struct mtx *m;
struct turnstile *ts;
uintptr_t v;
#ifdef ADAPTIVE_MUTEXES
@@ -344,19 +445,31 @@
int contested = 0;
uint64_t waittime = 0;
#endif
+#if defined(ADAPTIVE_MUTEXES) || defined(KDTRACE_HOOKS)
+ struct lock_delay_arg lda;
+#endif
#ifdef KDTRACE_HOOKS
- uint64_t spin_cnt = 0;
- uint64_t sleep_cnt = 0;
+ u_int sleep_cnt = 0;
int64_t sleep_time = 0;
+ int64_t all_time = 0;
#endif
if (SCHEDULER_STOPPED())
return;
+#if defined(ADAPTIVE_MUTEXES)
+ lock_delay_arg_init(&lda, &mtx_delay);
+#elif defined(KDTRACE_HOOKS)
+ lock_delay_arg_init(&lda, NULL);
+#endif
+ m = mtxlock2mtx(c);
+
if (mtx_owned(m)) {
- KASSERT((m->lock_object.lo_flags & LO_RECURSABLE) != 0,
+ KASSERT((m->lock_object.lo_flags & LO_RECURSABLE) != 0 ||
+ (opts & MTX_RECURSE) != 0,
("_mtx_lock_sleep: recursed on non-recursive mutex %s @ %s:%d\n",
m->lock_object.lo_name, file, line));
+ opts &= ~MTX_RECURSE;
m->mtx_recurse++;
atomic_set_ptr(&m->mtx_lock, MTX_RECURSED);
if (LOCK_LOG_TEST(&m->lock_object, opts))
@@ -363,6 +476,7 @@
CTR1(KTR_LOCK, "_mtx_lock_sleep: %p recursing", m);
return;
}
+ opts &= ~MTX_RECURSE;
#ifdef HWPMC_HOOKS
PMC_SOFT_CALL( , , lock, failed);
@@ -373,10 +487,15 @@
CTR4(KTR_LOCK,
"_mtx_lock_sleep: %s contested (lock=%p) at %s:%d",
m->lock_object.lo_name, (void *)m->mtx_lock, file, line);
+#ifdef KDTRACE_HOOKS
+ all_time -= lockstat_nsecs(&m->lock_object);
+#endif
- while (!_mtx_obtain_lock(m, tid)) {
+ for (;;) {
+ if (m->mtx_lock == MTX_UNOWNED && _mtx_obtain_lock(m, tid))
+ break;
#ifdef KDTRACE_HOOKS
- spin_cnt++;
+ lda.spin_cnt++;
#endif
#ifdef ADAPTIVE_MUTEXES
/*
@@ -391,13 +510,16 @@
CTR3(KTR_LOCK,
"%s: spinning on %p held by %p",
__func__, m, owner);
+ KTR_STATE1(KTR_SCHED, "thread",
+ sched_tdname((struct thread *)tid),
+ "spinning", "lockname:\"%s\"",
+ m->lock_object.lo_name);
while (mtx_owner(m) == owner &&
- TD_IS_RUNNING(owner)) {
- cpu_spinwait();
-#ifdef KDTRACE_HOOKS
- spin_cnt++;
-#endif
- }
+ TD_IS_RUNNING(owner))
+ lock_delay(&lda);
+ KTR_STATE0(KTR_SCHED, "thread",
+ sched_tdname((struct thread *)tid),
+ "running");
continue;
}
}
@@ -461,14 +583,17 @@
* Block on the turnstile.
*/
#ifdef KDTRACE_HOOKS
- sleep_time -= lockstat_nsecs();
+ sleep_time -= lockstat_nsecs(&m->lock_object);
#endif
turnstile_wait(ts, mtx_owner(m), TS_EXCLUSIVE_QUEUE);
#ifdef KDTRACE_HOOKS
- sleep_time += lockstat_nsecs();
+ sleep_time += lockstat_nsecs(&m->lock_object);
sleep_cnt++;
#endif
}
+#ifdef KDTRACE_HOOKS
+ all_time += lockstat_nsecs(&m->lock_object);
+#endif
#ifdef KTR
if (cont_logged) {
CTR4(KTR_CONTENTION,
@@ -485,8 +610,8 @@
/*
* Only record the loops spinning and not sleeping.
*/
- if (spin_cnt > sleep_cnt)
- LOCKSTAT_RECORD1(LS_MTX_LOCK_SPIN, m, (spin_cnt - sleep_cnt));
+ if (lda.spin_cnt > sleep_cnt)
+ LOCKSTAT_RECORD1(LS_MTX_LOCK_SPIN, m, (all_time - sleep_time));
#endif
}
@@ -511,33 +636,45 @@
#ifdef SMP
/*
- * _mtx_lock_spin: the tougher part of acquiring an MTX_SPIN lock.
+ * _mtx_lock_spin_cookie: the tougher part of acquiring an MTX_SPIN lock.
*
* This is only called if we need to actually spin for the lock. Recursion
* is handled inline.
*/
void
-_mtx_lock_spin(struct mtx *m, uintptr_t tid, int opts, const char *file,
- int line)
+_mtx_lock_spin_cookie(volatile uintptr_t *c, uintptr_t tid, int opts,
+ const char *file, int line)
{
+ struct mtx *m;
int i = 0;
#ifdef LOCK_PROFILING
int contested = 0;
uint64_t waittime = 0;
#endif
+#ifdef KDTRACE_HOOKS
+ int64_t spin_time = 0;
+#endif
if (SCHEDULER_STOPPED())
return;
+ m = mtxlock2mtx(c);
+
if (LOCK_LOG_TEST(&m->lock_object, opts))
CTR1(KTR_LOCK, "_mtx_lock_spin: %p spinning", m);
+ KTR_STATE1(KTR_SCHED, "thread", sched_tdname((struct thread *)tid),
+ "spinning", "lockname:\"%s\"", m->lock_object.lo_name);
#ifdef HWPMC_HOOKS
PMC_SOFT_CALL( , , lock, failed);
#endif
lock_profile_obtain_lock_failed(&m->lock_object, &contested, &waittime);
- while (!_mtx_obtain_lock(m, tid)) {
-
+#ifdef KDTRACE_HOOKS
+ spin_time -= lockstat_nsecs(&m->lock_object);
+#endif
+ for (;;) {
+ if (m->mtx_lock == MTX_UNOWNED && _mtx_obtain_lock(m, tid))
+ break;
/* Give interrupts a chance while we spin. */
spinlock_exit();
while (m->mtx_lock != MTX_UNOWNED) {
@@ -553,18 +690,26 @@
}
spinlock_enter();
}
+#ifdef KDTRACE_HOOKS
+ spin_time += lockstat_nsecs(&m->lock_object);
+#endif
if (LOCK_LOG_TEST(&m->lock_object, opts))
CTR1(KTR_LOCK, "_mtx_lock_spin: %p spin done", m);
+ KTR_STATE0(KTR_SCHED, "thread", sched_tdname((struct thread *)tid),
+ "running");
LOCKSTAT_PROFILE_OBTAIN_LOCK_SUCCESS(LS_MTX_SPIN_LOCK_ACQUIRE, m,
contested, waittime, (file), (line));
- LOCKSTAT_RECORD1(LS_MTX_SPIN_LOCK_SPIN, m, i);
+#ifdef KDTRACE_HOOKS
+ if (spin_time != 0)
+ LOCKSTAT_RECORD1(LS_MTX_SPIN_LOCK_SPIN, m, spin_time);
+#endif
}
#endif /* SMP */
void
-_thread_lock_flags(struct thread *td, int opts, const char *file, int line)
+thread_lock_flags_(struct thread *td, int opts, const char *file, int line)
{
struct mtx *m;
uintptr_t tid;
@@ -574,15 +719,25 @@
uint64_t waittime = 0;
#endif
#ifdef KDTRACE_HOOKS
- uint64_t spin_cnt = 0;
+ int64_t spin_time = 0;
#endif
i = 0;
tid = (uintptr_t)curthread;
- if (SCHEDULER_STOPPED())
+ if (SCHEDULER_STOPPED()) {
+ /*
+ * Ensure that spinlock sections are balanced even when the
+ * scheduler is stopped, since we may otherwise inadvertently
+ * re-enable interrupts while dumping core.
+ */
+ spinlock_enter();
return;
+ }
+#ifdef KDTRACE_HOOKS
+ spin_time -= lockstat_nsecs(&td->td_lock->lock_object);
+#endif
for (;;) {
retry:
spinlock_enter();
@@ -598,10 +753,9 @@
m->lock_object.lo_name, file, line));
WITNESS_CHECKORDER(&m->lock_object,
opts | LOP_NEWORDER | LOP_EXCLUSIVE, file, line, NULL);
- while (!_mtx_obtain_lock(m, tid)) {
-#ifdef KDTRACE_HOOKS
- spin_cnt++;
-#endif
+ for (;;) {
+ if (m->mtx_lock == MTX_UNOWNED && _mtx_obtain_lock(m, tid))
+ break;
if (m->mtx_lock == tid) {
m->mtx_recurse++;
break;
@@ -630,10 +784,10 @@
if (m == td->td_lock)
break;
__mtx_unlock_spin(m); /* does spinlock_exit() */
+ }
#ifdef KDTRACE_HOOKS
- spin_cnt++;
+ spin_time += lockstat_nsecs(&m->lock_object);
#endif
- }
if (m->mtx_recurse == 0)
LOCKSTAT_PROFILE_OBTAIN_LOCK_SUCCESS(LS_MTX_SPIN_LOCK_ACQUIRE,
m, contested, waittime, (file), (line));
@@ -640,7 +794,7 @@
LOCK_LOG_LOCK("LOCK", &m->lock_object, opts, m->mtx_recurse, file,
line);
WITNESS_LOCK(&m->lock_object, opts | LOP_EXCLUSIVE, file, line);
- LOCKSTAT_RECORD1(LS_THREAD_LOCK_SPIN, m, spin_cnt);
+ LOCKSTAT_RECORD1(LS_THREAD_LOCK_SPIN, m, spin_time);
}
struct mtx *
@@ -677,19 +831,22 @@
}
/*
- * _mtx_unlock_sleep: the tougher part of releasing an MTX_DEF lock.
+ * __mtx_unlock_sleep: the tougher part of releasing an MTX_DEF lock.
*
* We are only called here if the lock is recursed or contested (i.e. we
* need to wake up a blocked thread).
*/
void
-_mtx_unlock_sleep(struct mtx *m, int opts, const char *file, int line)
+__mtx_unlock_sleep(volatile uintptr_t *c, int opts, const char *file, int line)
{
+ struct mtx *m;
struct turnstile *ts;
if (SCHEDULER_STOPPED())
return;
+ m = mtxlock2mtx(c);
+
if (mtx_recursed(m)) {
if (--(m->mtx_recurse) == 0)
atomic_clear_ptr(&m->mtx_lock, MTX_RECURSED);
@@ -728,11 +885,15 @@
*/
#ifdef INVARIANT_SUPPORT
void
-_mtx_assert(struct mtx *m, int what, const char *file, int line)
+__mtx_assert(const volatile uintptr_t *c, int what, const char *file, int line)
{
+ const struct mtx *m;
- if (panicstr != NULL || dumping)
+ if (panicstr != NULL || dumping || SCHEDULER_STOPPED())
return;
+
+ m = mtxlock2mtx(c);
+
switch (what) {
case MA_OWNED:
case MA_OWNED | MA_RECURSED:
@@ -799,7 +960,8 @@
{
struct mtx_args *margs = arg;
- mtx_init(margs->ma_mtx, margs->ma_desc, NULL, margs->ma_opts);
+ mtx_init((struct mtx *)margs->ma_mtx, margs->ma_desc, NULL,
+ margs->ma_opts);
}
/*
@@ -809,13 +971,16 @@
* witness.
*/
void
-mtx_init(struct mtx *m, const char *name, const char *type, int opts)
+_mtx_init(volatile uintptr_t *c, const char *name, const char *type, int opts)
{
+ struct mtx *m;
struct lock_class *class;
int flags;
+ m = mtxlock2mtx(c);
+
MPASS((opts & ~(MTX_SPIN | MTX_QUIET | MTX_RECURSE |
- MTX_NOWITNESS | MTX_DUPOK | MTX_NOPROFILE)) == 0);
+ MTX_NOWITNESS | MTX_DUPOK | MTX_NOPROFILE | MTX_NEW)) == 0);
ASSERT_ATOMIC_LOAD_PTR(m->mtx_lock,
("%s: mtx_lock not aligned for %s: %p", __func__, name,
&m->mtx_lock));
@@ -841,12 +1006,14 @@
flags |= LO_DUPOK;
if (opts & MTX_NOPROFILE)
flags |= LO_NOPROFILE;
+ if (opts & MTX_NEW)
+ flags |= LO_NEW;
/* Initialize mutex. */
+ lock_init(&m->lock_object, class, name, type, flags);
+
m->mtx_lock = MTX_UNOWNED;
m->mtx_recurse = 0;
-
- lock_init(&m->lock_object, class, name, type, flags);
}
/*
@@ -856,9 +1023,12 @@
* flags.
*/
void
-mtx_destroy(struct mtx *m)
+_mtx_destroy(volatile uintptr_t *c)
{
+ struct mtx *m;
+ m = mtxlock2mtx(c);
+
if (!mtx_owned(m))
MPASS(mtx_unowned(m));
else {
@@ -906,12 +1076,12 @@
#ifdef DDB
void
-db_show_mtx(struct lock_object *lock)
+db_show_mtx(const struct lock_object *lock)
{
struct thread *td;
- struct mtx *m;
+ const struct mtx *m;
- m = (struct mtx *)lock;
+ m = (const struct mtx *)lock;
db_printf(" flags: {");
if (LOCK_CLASS(lock) == &lock_class_mtx_spin)
Modified: trunk/sys/kern/kern_ntptime.c
===================================================================
--- trunk/sys/kern/kern_ntptime.c 2018-05-25 20:46:51 UTC (rev 9944)
+++ trunk/sys/kern/kern_ntptime.c 2018-05-25 20:53:39 UTC (rev 9945)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
/*-
***********************************************************************
* *
@@ -31,7 +32,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$MidnightBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/kern/kern_ntptime.c 285611 2015-07-15 19:11:43Z delphij $");
#include "opt_ntp.h"
@@ -148,14 +149,14 @@
#define SHIFT_FLL 2 /* FLL loop gain (shift) */
static int time_state = TIME_OK; /* clock state */
-static int time_status = STA_UNSYNC; /* clock status bits */
+int time_status = STA_UNSYNC; /* clock status bits */
static long time_tai; /* TAI offset (s) */
static long time_monitor; /* last time offset scaled (ns) */
static long time_constant; /* poll interval (shift) (s) */
static long time_precision = 1; /* clock precision (ns) */
static long time_maxerror = MAXPHASE / 1000; /* maximum error (us) */
-static long time_esterror = MAXPHASE / 1000; /* estimated error (us) */
-static long time_reftime; /* time at last adjustment (s) */
+long time_esterror = MAXPHASE / 1000; /* estimated error (us) */
+static long time_reftime; /* uptime at last adjustment (s) */
static l_fp time_offset; /* time offset (ns) */
static l_fp time_freq; /* frequency offset (ns/s) */
static l_fp time_adj; /* tick adjust (ns/s) */
@@ -301,13 +302,17 @@
0, sizeof(struct ntptimeval) , ntp_sysctl, "S,ntptimeval", "");
#ifdef PPS_SYNC
-SYSCTL_INT(_kern_ntp_pll, OID_AUTO, pps_shiftmax, CTLFLAG_RW, &pps_shiftmax, 0, "");
-SYSCTL_INT(_kern_ntp_pll, OID_AUTO, pps_shift, CTLFLAG_RW, &pps_shift, 0, "");
+SYSCTL_INT(_kern_ntp_pll, OID_AUTO, pps_shiftmax, CTLFLAG_RW,
+ &pps_shiftmax, 0, "Max interval duration (sec) (shift)");
+SYSCTL_INT(_kern_ntp_pll, OID_AUTO, pps_shift, CTLFLAG_RW,
+ &pps_shift, 0, "Interval duration (sec) (shift)");
SYSCTL_LONG(_kern_ntp_pll, OID_AUTO, time_monitor, CTLFLAG_RD,
- &time_monitor, 0, "");
+ &time_monitor, 0, "Last time offset scaled (ns)");
-SYSCTL_OPAQUE(_kern_ntp_pll, OID_AUTO, pps_freq, CTLFLAG_RD, &pps_freq, sizeof(pps_freq), "I", "");
-SYSCTL_OPAQUE(_kern_ntp_pll, OID_AUTO, time_freq, CTLFLAG_RD, &time_freq, sizeof(time_freq), "I", "");
+SYSCTL_OPAQUE(_kern_ntp_pll, OID_AUTO, pps_freq, CTLFLAG_RD,
+ &pps_freq, sizeof(pps_freq), "I", "Scaled frequency offset (ns/sec)");
+SYSCTL_OPAQUE(_kern_ntp_pll, OID_AUTO, time_freq, CTLFLAG_RD,
+ &time_freq, sizeof(time_freq), "I", "Frequency offset (ns/sec)");
#endif
/*
@@ -692,12 +697,12 @@
* otherwise, the argument offset is used to compute it.
*/
if (time_status & STA_PPSFREQ && time_status & STA_PPSSIGNAL) {
- time_reftime = time_second;
+ time_reftime = time_uptime;
return;
}
if (time_status & STA_FREQHOLD || time_reftime == 0)
- time_reftime = time_second;
- mtemp = time_second - time_reftime;
+ time_reftime = time_uptime;
+ mtemp = time_uptime - time_reftime;
L_LINT(ftemp, time_monitor);
L_RSHIFT(ftemp, (SHIFT_PLL + 2 + time_constant) << 1);
L_MPY(ftemp, mtemp);
@@ -710,7 +715,7 @@
L_ADD(time_freq, ftemp);
time_status |= STA_MODE;
}
- time_reftime = time_second;
+ time_reftime = time_uptime;
if (L_GINT(time_freq) > MAXFREQ)
L_LINT(time_freq, MAXFREQ);
else if (L_GINT(time_freq) < -MAXFREQ)
@@ -828,8 +833,15 @@
* discarded. otherwise, if so enabled, the time offset is
* updated. We can tolerate a modest loss of data here without
* much degrading time accuracy.
- */
- if (u_nsec > (pps_jitter << PPS_POPCORN)) {
+ *
+ * The measurements being checked here were made with the system
+ * timecounter, so the popcorn threshold is not allowed to fall below
+ * the number of nanoseconds in two ticks of the timecounter. For a
+ * timecounter running faster than 1 GHz the lower bound is 2ns, just
+ * to avoid a nonsensical threshold of zero.
+ */
+ if (u_nsec > lmax(pps_jitter << PPS_POPCORN,
+ 2 * (NANOSECOND / (long)qmin(NANOSECOND, tc_getfrequency())))) {
time_status |= STA_PPSJITTER;
pps_jitcnt++;
} else if (time_status & STA_PPSTIME) {
@@ -1040,5 +1052,5 @@
periodic_resettodr, NULL);
}
-SYSINIT(periodic_resettodr, SI_SUB_RUN_SCHEDULER, SI_ORDER_MIDDLE,
+SYSINIT(periodic_resettodr, SI_SUB_LAST, SI_ORDER_MIDDLE,
start_periodic_resettodr, NULL);
Modified: trunk/sys/kern/kern_osd.c
===================================================================
--- trunk/sys/kern/kern_osd.c 2018-05-25 20:46:51 UTC (rev 9944)
+++ trunk/sys/kern/kern_osd.c 2018-05-25 20:53:39 UTC (rev 9945)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
/*-
* Copyright (c) 2007 Pawel Jakub Dawidek <pjd at FreeBSD.org>
* All rights reserved.
@@ -25,7 +26,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$MidnightBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/kern/kern_osd.c 298834 2016-04-30 04:01:22Z jamie $");
#include <sys/param.h>
#include <sys/kernel.h>
@@ -44,6 +45,23 @@
/* OSD (Object Specific Data) */
+/*
+ * Lock key:
+ * (m) osd_module_lock
+ * (o) osd_object_lock
+ * (l) osd_list_lock
+ */
+struct osd_master {
+ struct sx osd_module_lock;
+ struct rmlock osd_object_lock;
+ struct mtx osd_list_lock;
+ LIST_HEAD(, osd) osd_list; /* (l) */
+ osd_destructor_t *osd_destructors; /* (o) */
+ osd_method_t *osd_methods; /* (m) */
+ u_int osd_ntslots; /* (m) */
+ const u_int osd_nmethods;
+};
+
static MALLOC_DEFINE(M_OSD, "osd", "Object Specific Data");
static int osd_debug = 0;
@@ -62,25 +80,12 @@
int list_locked);
/*
- * Lists of objects with OSD.
- *
- * Lock key:
- * (m) osd_module_lock
- * (o) osd_object_lock
- * (l) osd_list_lock
+ * List of objects with OSD.
*/
-static LIST_HEAD(, osd) osd_list[OSD_LAST + 1]; /* (m) */
-static osd_method_t *osd_methods[OSD_LAST + 1]; /* (m) */
-static u_int osd_nslots[OSD_LAST + 1]; /* (m) */
-static osd_destructor_t *osd_destructors[OSD_LAST + 1]; /* (o) */
-static const u_int osd_nmethods[OSD_LAST + 1] = {
- [OSD_JAIL] = PR_MAXMETHOD,
+struct osd_master osdm[OSD_LAST + 1] = {
+ [OSD_JAIL] = { .osd_nmethods = PR_MAXMETHOD },
};
-static struct sx osd_module_lock[OSD_LAST + 1];
-static struct rmlock osd_object_lock[OSD_LAST + 1];
-static struct mtx osd_list_lock[OSD_LAST + 1];
-
static void
osd_default_destructor(void *value __unused)
{
@@ -102,12 +107,12 @@
if (destructor == NULL)
destructor = osd_default_destructor;
- sx_xlock(&osd_module_lock[type]);
+ sx_xlock(&osdm[type].osd_module_lock);
/*
* First, we try to find unused slot.
*/
- for (i = 0; i < osd_nslots[type]; i++) {
- if (osd_destructors[type][i] == NULL) {
+ for (i = 0; i < osdm[type].osd_ntslots; i++) {
+ if (osdm[type].osd_destructors[i] == NULL) {
OSD_DEBUG("Unused slot found (type=%u, slot=%u).",
type, i);
break;
@@ -116,31 +121,31 @@
/*
* If no unused slot was found, allocate one.
*/
- if (i == osd_nslots[type]) {
- osd_nslots[type]++;
- if (osd_nmethods[type] != 0)
- osd_methods[type] = realloc(osd_methods[type],
- sizeof(osd_method_t) * osd_nslots[type] *
- osd_nmethods[type], M_OSD, M_WAITOK);
- newptr = malloc(sizeof(osd_destructor_t) * osd_nslots[type],
- M_OSD, M_WAITOK);
- rm_wlock(&osd_object_lock[type]);
- bcopy(osd_destructors[type], newptr,
+ if (i == osdm[type].osd_ntslots) {
+ osdm[type].osd_ntslots++;
+ if (osdm[type].osd_nmethods != 0)
+ osdm[type].osd_methods = realloc(osdm[type].osd_methods,
+ sizeof(osd_method_t) * osdm[type].osd_ntslots *
+ osdm[type].osd_nmethods, M_OSD, M_WAITOK);
+ newptr = malloc(sizeof(osd_destructor_t) *
+ osdm[type].osd_ntslots, M_OSD, M_WAITOK);
+ rm_wlock(&osdm[type].osd_object_lock);
+ bcopy(osdm[type].osd_destructors, newptr,
sizeof(osd_destructor_t) * i);
- free(osd_destructors[type], M_OSD);
- osd_destructors[type] = newptr;
- rm_wunlock(&osd_object_lock[type]);
+ free(osdm[type].osd_destructors, M_OSD);
+ osdm[type].osd_destructors = newptr;
+ rm_wunlock(&osdm[type].osd_object_lock);
OSD_DEBUG("New slot allocated (type=%u, slot=%u).",
type, i + 1);
}
- osd_destructors[type][i] = destructor;
- if (osd_nmethods[type] != 0) {
- for (m = 0; m < osd_nmethods[type]; m++)
- osd_methods[type][i * osd_nmethods[type] + m] =
- methods != NULL ? methods[m] : NULL;
+ osdm[type].osd_destructors[i] = destructor;
+ if (osdm[type].osd_nmethods != 0) {
+ for (m = 0; m < osdm[type].osd_nmethods; m++)
+ osdm[type].osd_methods[i * osdm[type].osd_nmethods + m]
+ = methods != NULL ? methods[m] : NULL;
}
- sx_xunlock(&osd_module_lock[type]);
+ sx_xunlock(&osdm[type].osd_module_lock);
return (i + 1);
}
@@ -151,37 +156,37 @@
KASSERT(type >= OSD_FIRST && type <= OSD_LAST, ("Invalid type."));
KASSERT(slot > 0, ("Invalid slot."));
- KASSERT(osd_destructors[type][slot - 1] != NULL, ("Unused slot."));
+ KASSERT(osdm[type].osd_destructors[slot - 1] != NULL, ("Unused slot."));
- sx_xlock(&osd_module_lock[type]);
- rm_wlock(&osd_object_lock[type]);
+ sx_xlock(&osdm[type].osd_module_lock);
+ rm_wlock(&osdm[type].osd_object_lock);
/*
* Free all OSD for the given slot.
*/
- mtx_lock(&osd_list_lock[type]);
- LIST_FOREACH_SAFE(osd, &osd_list[type], osd_next, tosd)
+ mtx_lock(&osdm[type].osd_list_lock);
+ LIST_FOREACH_SAFE(osd, &osdm[type].osd_list, osd_next, tosd)
do_osd_del(type, osd, slot, 1);
- mtx_unlock(&osd_list_lock[type]);
+ mtx_unlock(&osdm[type].osd_list_lock);
/*
* Set destructor to NULL to free the slot.
*/
- osd_destructors[type][slot - 1] = NULL;
- if (slot == osd_nslots[type]) {
- osd_nslots[type]--;
- osd_destructors[type] = realloc(osd_destructors[type],
- sizeof(osd_destructor_t) * osd_nslots[type], M_OSD,
+ osdm[type].osd_destructors[slot - 1] = NULL;
+ if (slot == osdm[type].osd_ntslots) {
+ osdm[type].osd_ntslots--;
+ osdm[type].osd_destructors = realloc(osdm[type].osd_destructors,
+ sizeof(osd_destructor_t) * osdm[type].osd_ntslots, M_OSD,
M_NOWAIT | M_ZERO);
- if (osd_nmethods[type] != 0)
- osd_methods[type] = realloc(osd_methods[type],
- sizeof(osd_method_t) * osd_nslots[type] *
- osd_nmethods[type], M_OSD, M_NOWAIT | M_ZERO);
+ if (osdm[type].osd_nmethods != 0)
+ osdm[type].osd_methods = realloc(osdm[type].osd_methods,
+ sizeof(osd_method_t) * osdm[type].osd_ntslots *
+ osdm[type].osd_nmethods, M_OSD, M_NOWAIT | M_ZERO);
/*
* We always reallocate to smaller size, so we assume it will
* always succeed.
*/
- KASSERT(osd_destructors[type] != NULL &&
- (osd_nmethods[type] == 0 || osd_methods[type] != NULL),
- ("realloc() failed"));
+ KASSERT(osdm[type].osd_destructors != NULL &&
+ (osdm[type].osd_nmethods == 0 ||
+ osdm[type].osd_methods != NULL), ("realloc() failed"));
OSD_DEBUG("Deregistration of the last slot (type=%u, slot=%u).",
type, slot);
} else {
@@ -188,68 +193,105 @@
OSD_DEBUG("Slot deregistration (type=%u, slot=%u).",
type, slot);
}
- rm_wunlock(&osd_object_lock[type]);
- sx_xunlock(&osd_module_lock[type]);
+ rm_wunlock(&osdm[type].osd_object_lock);
+ sx_xunlock(&osdm[type].osd_module_lock);
}
int
osd_set(u_int type, struct osd *osd, u_int slot, void *value)
{
+
+ return (osd_set_reserved(type, osd, slot, NULL, value));
+}
+
+void *
+osd_reserve(u_int slot)
+{
+
+ KASSERT(slot > 0, ("Invalid slot."));
+
+ OSD_DEBUG("Reserving slot array (slot=%u).", slot);
+ return (malloc(sizeof(void *) * slot, M_OSD, M_WAITOK | M_ZERO));
+}
+
+int
+osd_set_reserved(u_int type, struct osd *osd, u_int slot, void *rsv,
+ void *value)
+{
struct rm_priotracker tracker;
KASSERT(type >= OSD_FIRST && type <= OSD_LAST, ("Invalid type."));
KASSERT(slot > 0, ("Invalid slot."));
- KASSERT(osd_destructors[type][slot - 1] != NULL, ("Unused slot."));
+ KASSERT(osdm[type].osd_destructors[slot - 1] != NULL, ("Unused slot."));
- rm_rlock(&osd_object_lock[type], &tracker);
+ rm_rlock(&osdm[type].osd_object_lock, &tracker);
if (slot > osd->osd_nslots) {
+ void *newptr;
+
if (value == NULL) {
OSD_DEBUG(
"Not allocating null slot (type=%u, slot=%u).",
type, slot);
- rm_runlock(&osd_object_lock[type], &tracker);
+ rm_runlock(&osdm[type].osd_object_lock, &tracker);
+ if (rsv)
+ osd_free_reserved(rsv);
return (0);
- } else if (osd->osd_nslots == 0) {
+ }
+
+ /*
+ * Too few slots allocated here, so we need to extend or create
+ * the array.
+ */
+ if (rsv) {
/*
- * First OSD for this object, so we need to allocate
- * space and put it onto the list.
+ * Use the reserve passed in (assumed to be
+ * the right size).
*/
- osd->osd_slots = malloc(sizeof(void *) * slot, M_OSD,
- M_NOWAIT | M_ZERO);
- if (osd->osd_slots == NULL) {
- rm_runlock(&osd_object_lock[type], &tracker);
- return (ENOMEM);
+ newptr = rsv;
+ if (osd->osd_nslots != 0) {
+ memcpy(newptr, osd->osd_slots,
+ sizeof(void *) * osd->osd_nslots);
+ free(osd->osd_slots, M_OSD);
}
- osd->osd_nslots = slot;
- mtx_lock(&osd_list_lock[type]);
- LIST_INSERT_HEAD(&osd_list[type], osd, osd_next);
- mtx_unlock(&osd_list_lock[type]);
- OSD_DEBUG("Setting first slot (type=%u).", type);
} else {
- void *newptr;
-
- /*
- * Too few slots allocated here, needs to extend
- * the array.
- */
newptr = realloc(osd->osd_slots, sizeof(void *) * slot,
M_OSD, M_NOWAIT | M_ZERO);
if (newptr == NULL) {
- rm_runlock(&osd_object_lock[type], &tracker);
+ rm_runlock(&osdm[type].osd_object_lock,
+ &tracker);
return (ENOMEM);
}
- osd->osd_slots = newptr;
- osd->osd_nslots = slot;
+ }
+ if (osd->osd_nslots == 0) {
+ /*
+ * First OSD for this object, so we need to put it
+ * onto the list.
+ */
+ mtx_lock(&osdm[type].osd_list_lock);
+ LIST_INSERT_HEAD(&osdm[type].osd_list, osd, osd_next);
+ mtx_unlock(&osdm[type].osd_list_lock);
+ OSD_DEBUG("Setting first slot (type=%u).", type);
+ } else
OSD_DEBUG("Growing slots array (type=%u).", type);
- }
- }
+ osd->osd_slots = newptr;
+ osd->osd_nslots = slot;
+ } else if (rsv)
+ osd_free_reserved(rsv);
OSD_DEBUG("Setting slot value (type=%u, slot=%u, value=%p).", type,
slot, value);
osd->osd_slots[slot - 1] = value;
- rm_runlock(&osd_object_lock[type], &tracker);
+ rm_runlock(&osdm[type].osd_object_lock, &tracker);
return (0);
}
+void
+osd_free_reserved(void *rsv)
+{
+
+ OSD_DEBUG("Discarding reserved slot array.");
+ free(rsv, M_OSD);
+}
+
void *
osd_get(u_int type, struct osd *osd, u_int slot)
{
@@ -258,9 +300,9 @@
KASSERT(type >= OSD_FIRST && type <= OSD_LAST, ("Invalid type."));
KASSERT(slot > 0, ("Invalid slot."));
- KASSERT(osd_destructors[type][slot - 1] != NULL, ("Unused slot."));
+ KASSERT(osdm[type].osd_destructors[slot - 1] != NULL, ("Unused slot."));
- rm_rlock(&osd_object_lock[type], &tracker);
+ rm_rlock(&osdm[type].osd_object_lock, &tracker);
if (slot > osd->osd_nslots) {
value = NULL;
OSD_DEBUG("Slot doesn't exist (type=%u, slot=%u).", type, slot);
@@ -269,7 +311,7 @@
OSD_DEBUG("Returning slot value (type=%u, slot=%u, value=%p).",
type, slot, value);
}
- rm_runlock(&osd_object_lock[type], &tracker);
+ rm_runlock(&osdm[type].osd_object_lock, &tracker);
return (value);
}
@@ -278,9 +320,9 @@
{
struct rm_priotracker tracker;
- rm_rlock(&osd_object_lock[type], &tracker);
+ rm_rlock(&osdm[type].osd_object_lock, &tracker);
do_osd_del(type, osd, slot, 0);
- rm_runlock(&osd_object_lock[type], &tracker);
+ rm_runlock(&osdm[type].osd_object_lock, &tracker);
}
static void
@@ -290,7 +332,7 @@
KASSERT(type >= OSD_FIRST && type <= OSD_LAST, ("Invalid type."));
KASSERT(slot > 0, ("Invalid slot."));
- KASSERT(osd_destructors[type][slot - 1] != NULL, ("Unused slot."));
+ KASSERT(osdm[type].osd_destructors[slot - 1] != NULL, ("Unused slot."));
OSD_DEBUG("Deleting slot (type=%u, slot=%u).", type, slot);
@@ -299,7 +341,7 @@
return;
}
if (osd->osd_slots[slot - 1] != NULL) {
- osd_destructors[type][slot - 1](osd->osd_slots[slot - 1]);
+ osdm[type].osd_destructors[slot - 1](osd->osd_slots[slot - 1]);
osd->osd_slots[slot - 1] = NULL;
}
for (i = osd->osd_nslots - 1; i >= 0; i--) {
@@ -313,10 +355,10 @@
/* No values left for this object. */
OSD_DEBUG("No more slots left (type=%u).", type);
if (!list_locked)
- mtx_lock(&osd_list_lock[type]);
+ mtx_lock(&osdm[type].osd_list_lock);
LIST_REMOVE(osd, osd_next);
if (!list_locked)
- mtx_unlock(&osd_list_lock[type]);
+ mtx_unlock(&osdm[type].osd_list_lock);
free(osd->osd_slots, M_OSD);
osd->osd_slots = NULL;
osd->osd_nslots = 0;
@@ -342,7 +384,7 @@
int error, i;
KASSERT(type >= OSD_FIRST && type <= OSD_LAST, ("Invalid type."));
- KASSERT(method < osd_nmethods[type], ("Invalid method."));
+ KASSERT(method < osdm[type].osd_nmethods, ("Invalid method."));
/*
* Call this method for every slot that defines it, stopping if an
@@ -349,14 +391,14 @@
* error is encountered.
*/
error = 0;
- sx_slock(&osd_module_lock[type]);
- for (i = 0; i < osd_nslots[type]; i++) {
- methodfun =
- osd_methods[type][i * osd_nmethods[type] + method];
+ sx_slock(&osdm[type].osd_module_lock);
+ for (i = 0; i < osdm[type].osd_ntslots; i++) {
+ methodfun = osdm[type].osd_methods[i * osdm[type].osd_nmethods +
+ method];
if (methodfun != NULL && (error = methodfun(obj, data)) != 0)
break;
}
- sx_sunlock(&osd_module_lock[type]);
+ sx_sunlock(&osdm[type].osd_module_lock);
return (error);
}
@@ -374,14 +416,14 @@
return;
}
- rm_rlock(&osd_object_lock[type], &tracker);
+ rm_rlock(&osdm[type].osd_object_lock, &tracker);
for (i = 1; i <= osd->osd_nslots; i++) {
- if (osd_destructors[type][i - 1] != NULL)
+ if (osdm[type].osd_destructors[i - 1] != NULL)
do_osd_del(type, osd, i, 0);
else
OSD_DEBUG("Unused slot (type=%u, slot=%u).", type, i);
}
- rm_runlock(&osd_object_lock[type], &tracker);
+ rm_runlock(&osdm[type].osd_object_lock, &tracker);
OSD_DEBUG("Object exit (type=%u).", type);
}
@@ -391,13 +433,13 @@
u_int i;
for (i = OSD_FIRST; i <= OSD_LAST; i++) {
- osd_nslots[i] = 0;
- LIST_INIT(&osd_list[i]);
- sx_init(&osd_module_lock[i], "osd_module");
- rm_init(&osd_object_lock[i], "osd_object");
- mtx_init(&osd_list_lock[i], "osd_list", NULL, MTX_DEF);
- osd_destructors[i] = NULL;
- osd_methods[i] = NULL;
+ sx_init(&osdm[i].osd_module_lock, "osd_module");
+ rm_init(&osdm[i].osd_object_lock, "osd_object");
+ mtx_init(&osdm[i].osd_list_lock, "osd_list", NULL, MTX_DEF);
+ LIST_INIT(&osdm[i].osd_list);
+ osdm[i].osd_destructors = NULL;
+ osdm[i].osd_ntslots = 0;
+ osdm[i].osd_methods = NULL;
}
}
SYSINIT(osd, SI_SUB_LOCK, SI_ORDER_ANY, osd_init, NULL);
More information about the Midnightbsd-cvs
mailing list