[Midnightbsd-cvs] src [9946] trunk/sys/kern: sync with freebsd 10-stable

laffer1 at midnightbsd.org laffer1 at midnightbsd.org
Fri May 25 16:55:11 EDT 2018


Revision: 9946
          http://svnweb.midnightbsd.org/src/?rev=9946
Author:   laffer1
Date:     2018-05-25 16:55:11 -0400 (Fri, 25 May 2018)
Log Message:
-----------
sync with freebsd 10-stable

Modified Paths:
--------------
    trunk/sys/kern/kern_physio.c
    trunk/sys/kern/kern_pmc.c
    trunk/sys/kern/kern_poll.c
    trunk/sys/kern/kern_priv.c
    trunk/sys/kern/kern_proc.c

Added Paths:
-----------
    trunk/sys/kern/kern_procctl.c

Modified: trunk/sys/kern/kern_physio.c
===================================================================
--- trunk/sys/kern/kern_physio.c	2018-05-25 20:53:39 UTC (rev 9945)
+++ trunk/sys/kern/kern_physio.c	2018-05-25 20:55:11 UTC (rev 9946)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * Copyright (c) 1994 John S. Dyson
  * All rights reserved.
@@ -18,7 +19,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$MidnightBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/kern/kern_physio.c 290705 2015-11-12 08:47:10Z hselasky $");
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -25,28 +26,33 @@
 #include <sys/bio.h>
 #include <sys/buf.h>
 #include <sys/conf.h>
+#include <sys/malloc.h>
 #include <sys/proc.h>
 #include <sys/uio.h>
+#include <geom/geom.h>
 
 #include <vm/vm.h>
+#include <vm/vm_page.h>
 #include <vm/vm_extern.h>
+#include <vm/vm_map.h>
 
 int
 physio(struct cdev *dev, struct uio *uio, int ioflag)
 {
-	struct buf *bp;
 	struct cdevsw *csw;
+	struct buf *pbuf;
+	struct bio *bp;
+	struct vm_page **pages;
 	caddr_t sa;
-	u_int iolen;
-	int error, i;
+	u_int iolen, poff;
+	int error, i, npages, maxpages;
+	vm_prot_t prot;
 
-	/* Keep the process UPAGES from being swapped. XXX: why ? */
-	PHOLD(curproc);
+	csw = dev->si_devsw;
+	/* check if character device is being destroyed */
+	if (csw == NULL)
+		return (ENXIO);
 
-	bp = getpbuf(NULL);
-	sa = bp->b_data;
-	error = 0;
-
 	/* XXX: sanity check */
 	if(dev->si_iosize_max < PAGE_SIZE) {
 		printf("WARNING: %s si_iosize_max=%d, using DFLTPHYS.\n",
@@ -54,59 +60,132 @@
 		dev->si_iosize_max = DFLTPHYS;
 	}
 
+	/*
+	 * If the driver does not want I/O to be split, that means that we
+	 * need to reject any requests that will not fit into one buffer.
+	 */
+	if (dev->si_flags & SI_NOSPLIT &&
+	    (uio->uio_resid > dev->si_iosize_max || uio->uio_resid > MAXPHYS ||
+	    uio->uio_iovcnt > 1)) {
+		/*
+		 * Tell the user why his I/O was rejected.
+		 */
+		if (uio->uio_resid > dev->si_iosize_max)
+			uprintf("%s: request size=%zd > si_iosize_max=%d; "
+			    "cannot split request\n", devtoname(dev),
+			    uio->uio_resid, dev->si_iosize_max);
+		if (uio->uio_resid > MAXPHYS)
+			uprintf("%s: request size=%zd > MAXPHYS=%d; "
+			    "cannot split request\n", devtoname(dev),
+			    uio->uio_resid, MAXPHYS);
+		if (uio->uio_iovcnt > 1)
+			uprintf("%s: request vectors=%d > 1; "
+			    "cannot split request\n", devtoname(dev),
+			    uio->uio_iovcnt);
+		return (EFBIG);
+	}
+
+	/*
+	 * Keep the process UPAGES from being swapped.  Processes swapped
+	 * out while holding pbufs, used by swapper, may lead to deadlock.
+	 */
+	PHOLD(curproc);
+
+	bp = g_alloc_bio();
+	if (uio->uio_segflg != UIO_USERSPACE) {
+		pbuf = NULL;
+		pages = NULL;
+	} else if ((dev->si_flags & SI_UNMAPPED) && unmapped_buf_allowed) {
+		pbuf = NULL;
+		maxpages = btoc(MIN(uio->uio_resid, MAXPHYS)) + 1;
+		pages = malloc(sizeof(*pages) * maxpages, M_DEVBUF, M_WAITOK);
+	} else {
+		pbuf = getpbuf(NULL);
+		sa = pbuf->b_data;
+		maxpages = btoc(MAXPHYS);
+		pages = pbuf->b_pages;
+	}
+	prot = VM_PROT_READ;
+	if (uio->uio_rw == UIO_READ)
+		prot |= VM_PROT_WRITE;	/* Less backwards than it looks */
+	error = 0;
 	for (i = 0; i < uio->uio_iovcnt; i++) {
 		while (uio->uio_iov[i].iov_len) {
-			bp->b_flags = 0;
+			bzero(bp, sizeof(*bp));
 			if (uio->uio_rw == UIO_READ) {
-				bp->b_iocmd = BIO_READ;
+				bp->bio_cmd = BIO_READ;
 				curthread->td_ru.ru_inblock++;
 			} else {
-				bp->b_iocmd = BIO_WRITE;
+				bp->bio_cmd = BIO_WRITE;
 				curthread->td_ru.ru_oublock++;
 			}
-			bp->b_iodone = bdone;
-			bp->b_data = uio->uio_iov[i].iov_base;
-			bp->b_bcount = uio->uio_iov[i].iov_len;
-			bp->b_offset = uio->uio_offset;
-			bp->b_iooffset = uio->uio_offset;
-			bp->b_saveaddr = sa;
+			bp->bio_offset = uio->uio_offset;
+			bp->bio_data = uio->uio_iov[i].iov_base;
+			bp->bio_length = uio->uio_iov[i].iov_len;
+			if (bp->bio_length > dev->si_iosize_max)
+				bp->bio_length = dev->si_iosize_max;
+			if (bp->bio_length > MAXPHYS)
+				bp->bio_length = MAXPHYS;
 
-			/* Don't exceed drivers iosize limit */
-			if (bp->b_bcount > dev->si_iosize_max)
-				bp->b_bcount = dev->si_iosize_max;
-
-			/* 
-			 * Make sure the pbuf can map the request
-			 * XXX: The pbuf has kvasize = MAXPHYS so a request
-			 * XXX: larger than MAXPHYS - PAGE_SIZE must be
-			 * XXX: page aligned or it will be fragmented.
+			/*
+			 * Make sure the pbuf can map the request.
+			 * The pbuf has kvasize = MAXPHYS, so a request
+			 * larger than MAXPHYS - PAGE_SIZE must be
+			 * page aligned or it will be fragmented.
 			 */
-			iolen = ((vm_offset_t) bp->b_data) & PAGE_MASK;
-			if ((bp->b_bcount + iolen) > bp->b_kvasize) {
-				bp->b_bcount = bp->b_kvasize;
-				if (iolen != 0)
-					bp->b_bcount -= PAGE_SIZE;
+			poff = (vm_offset_t)bp->bio_data & PAGE_MASK;
+			if (pbuf && bp->bio_length + poff > pbuf->b_kvasize) {
+				if (dev->si_flags & SI_NOSPLIT) {
+					uprintf("%s: request ptr %p is not "
+					    "on a page boundary; cannot split "
+					    "request\n", devtoname(dev),
+					    bp->bio_data);
+					error = EFBIG;
+					goto doerror;
+				}
+				bp->bio_length = pbuf->b_kvasize;
+				if (poff != 0)
+					bp->bio_length -= PAGE_SIZE;
 			}
-			bp->b_bufsize = bp->b_bcount;
 
-			bp->b_blkno = btodb(bp->b_offset);
-			csw = dev->si_devsw;
-			if (uio->uio_segflg == UIO_USERSPACE) {
-				if (vmapbuf(bp) < 0) {
+			bp->bio_bcount = bp->bio_length;
+			bp->bio_dev = dev;
+
+			if (pages) {
+				if ((npages = vm_fault_quick_hold_pages(
+				    &curproc->p_vmspace->vm_map,
+				    (vm_offset_t)bp->bio_data, bp->bio_length,
+				    prot, pages, maxpages)) < 0) {
 					error = EFAULT;
 					goto doerror;
 				}
+				if (pbuf) {
+					pmap_qenter((vm_offset_t)sa,
+					    pages, npages);
+					bp->bio_data = sa + poff;
+				} else {
+					bp->bio_ma = pages;
+					bp->bio_ma_n = npages;
+					bp->bio_ma_offset = poff;
+					bp->bio_data = unmapped_buf;
+					bp->bio_flags |= BIO_UNMAPPED;
+				}
 			}
-			dev_strategy_csw(dev, csw, bp);
+
+			csw->d_strategy(bp);
 			if (uio->uio_rw == UIO_READ)
-				bwait(bp, PRIBIO, "physrd");
+				biowait(bp, "physrd");
 			else
-				bwait(bp, PRIBIO, "physwr");
+				biowait(bp, "physwr");
 
-			if (uio->uio_segflg == UIO_USERSPACE)
-				vunmapbuf(bp);
-			iolen = bp->b_bcount - bp->b_resid;
-			if (iolen == 0 && !(bp->b_ioflags & BIO_ERROR))
+			if (pages) {
+				if (pbuf)
+					pmap_qremove((vm_offset_t)sa, npages);
+				vm_page_unhold_pages(pages, npages);
+			}
+
+			iolen = bp->bio_length - bp->bio_resid;
+			if (iolen == 0 && !(bp->bio_flags & BIO_ERROR))
 				goto doerror;	/* EOF */
 			uio->uio_iov[i].iov_len -= iolen;
 			uio->uio_iov[i].iov_base =
@@ -113,14 +192,18 @@
 			    (char *)uio->uio_iov[i].iov_base + iolen;
 			uio->uio_resid -= iolen;
 			uio->uio_offset += iolen;
-			if( bp->b_ioflags & BIO_ERROR) {
-				error = bp->b_error;
+			if (bp->bio_flags & BIO_ERROR) {
+				error = bp->bio_error;
 				goto doerror;
 			}
 		}
 	}
 doerror:
-	relpbuf(bp, NULL);
+	if (pbuf)
+		relpbuf(pbuf, NULL);
+	else if (pages)
+		free(pages, M_DEVBUF);
+	g_destroy_bio(bp);
 	PRELE(curproc);
 	return (error);
 }

Modified: trunk/sys/kern/kern_pmc.c
===================================================================
--- trunk/sys/kern/kern_pmc.c	2018-05-25 20:53:39 UTC (rev 9945)
+++ trunk/sys/kern/kern_pmc.c	2018-05-25 20:55:11 UTC (rev 9946)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * Copyright (c) 2003-2008 Joseph Koshy
  * Copyright (c) 2007 The FreeBSD Foundation
@@ -29,7 +30,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$MidnightBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/kern/kern_pmc.c 233628 2012-03-28 20:58:30Z fabient $");
 
 #include "opt_hwpmc_hooks.h"
 

Modified: trunk/sys/kern/kern_poll.c
===================================================================
--- trunk/sys/kern/kern_poll.c	2018-05-25 20:53:39 UTC (rev 9945)
+++ trunk/sys/kern/kern_poll.c	2018-05-25 20:55:11 UTC (rev 9946)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * Copyright (c) 2001-2002 Luigi Rizzo
  *
@@ -26,7 +27,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$MidnightBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/kern/kern_poll.c 261276 2014-01-29 21:57:00Z brooks $");
 
 #include "opt_device_polling.h"
 
@@ -87,12 +88,11 @@
  * The following constraints hold
  *
  *	1 <= poll_each_burst <= poll_burst <= poll_burst_max
- *	0 <= poll_each_burst
  *	MIN_POLL_BURST_MAX <= poll_burst_max <= MAX_POLL_BURST_MAX
  */
 
 #define MIN_POLL_BURST_MAX	10
-#define MAX_POLL_BURST_MAX	1000
+#define MAX_POLL_BURST_MAX	20000
 
 static uint32_t poll_burst = 5;
 static uint32_t poll_burst_max = 150;	/* good for 100Mbit net and HZ=1000 */
@@ -170,7 +170,7 @@
 	error = sysctl_handle_int(oidp, &val, 0, req);
 	if (error || !req->newptr )
 		return (error);
-	if (val < 0 || val > 99)
+	if (val > 99)
 		return (EINVAL);
 
 	mtx_lock(&poll_mtx);
@@ -268,7 +268,7 @@
 	EVENTHANDLER_REGISTER(shutdown_post_sync, poll_shutdown, NULL,
 	    SHUTDOWN_PRI_LAST);
 }
-SYSINIT(device_poll, SI_SUB_CLOCKS, SI_ORDER_MIDDLE, init_device_poll, NULL);
+SYSINIT(device_poll, SI_SUB_SOFTINTR, SI_ORDER_MIDDLE, init_device_poll, NULL);
 
 
 /*

Modified: trunk/sys/kern/kern_priv.c
===================================================================
--- trunk/sys/kern/kern_priv.c	2018-05-25 20:53:39 UTC (rev 9945)
+++ trunk/sys/kern/kern_priv.c	2018-05-25 20:55:11 UTC (rev 9946)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * Copyright (c) 2006 nCircle Network Security, Inc.
  * Copyright (c) 2009 Robert N. M. Watson
@@ -31,7 +32,7 @@
 #include "opt_kdtrace.h"
 
 #include <sys/cdefs.h>
-__FBSDID("$MidnightBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/kern/kern_priv.c 260817 2014-01-17 10:58:59Z avg $");
 
 #include <sys/param.h>
 #include <sys/jail.h>
@@ -52,7 +53,7 @@
  * uid 0 is offered no special privilege in the kernel security policy.
  * Setting it to zero may seriously impact the functionality of many existing
  * userland programs, and should not be done without careful consideration of
- * the consequences. 
+ * the consequences.
  */
 static int	suser_enabled = 1;
 SYSCTL_INT(_security_bsd, OID_AUTO, suser_enabled, CTLFLAG_RW,
@@ -59,14 +60,14 @@
     &suser_enabled, 0, "processes with uid 0 have privilege");
 TUNABLE_INT("security.bsd.suser_enabled", &suser_enabled);
 
-static int	unprivileged_mlock = 0;
+static int	unprivileged_mlock = 1;
 SYSCTL_INT(_security_bsd, OID_AUTO, unprivileged_mlock, CTLFLAG_RW|CTLFLAG_TUN,
     &unprivileged_mlock, 0, "Allow non-root users to call mlock(2)");
 TUNABLE_INT("security.bsd.unprivileged_mlock", &unprivileged_mlock);
 
 SDT_PROVIDER_DEFINE(priv);
-SDT_PROBE_DEFINE1(priv, kernel, priv_check, priv_ok, priv-ok, "int");
-SDT_PROBE_DEFINE1(priv, kernel, priv_check, priv_err, priv-err, "int");
+SDT_PROBE_DEFINE1(priv, kernel, priv_check, priv__ok, "int");
+SDT_PROBE_DEFINE1(priv, kernel, priv_check, priv__err, "int");
 
 /*
  * Check a credential for privilege.  Lots of good reasons to deny privilege;
@@ -104,10 +105,10 @@
 		 * mlockall(2)/munlockall(2).
 		 */
 		switch (priv) {
-			case PRIV_VM_MLOCK:
-			case PRIV_VM_MUNLOCK:
-				error = 0;
-				goto out;
+		case PRIV_VM_MLOCK:
+		case PRIV_VM_MUNLOCK:
+			error = 0;
+			goto out;
 		}
 	}
 
@@ -132,7 +133,6 @@
 				goto out;
 			}
 			break;
-
 		default:
 			if (cred->cr_uid == 0) {
 				error = 0;
@@ -143,6 +143,16 @@
 	}
 
 	/*
+	 * Writes to kernel/physical memory are a typical root-only operation,
+	 * but non-root users are expected to be able to read it (provided they
+	 * have permission to access /dev/[k]mem).
+	 */
+	if (priv == PRIV_KMEM_READ) {
+		error = 0;
+		goto out;
+	}
+
+	/*
 	 * Now check with MAC, if enabled, to see if a policy module grants
 	 * privilege.
 	 */
@@ -159,13 +169,10 @@
 	 */
 	error = EPERM;
 out:
-	if (error) {
-		SDT_PROBE(priv, kernel, priv_check, priv_err, priv, 0, 0, 0,
-		    0);
-	} else {
-		SDT_PROBE(priv, kernel, priv_check, priv_ok, priv, 0, 0, 0,
-		    0);
-	}
+	if (error)
+		SDT_PROBE1(priv, kernel, priv_check, priv__err, priv);
+	else
+		SDT_PROBE1(priv, kernel, priv_check, priv__ok, priv);
 	return (error);
 }
 

Modified: trunk/sys/kern/kern_proc.c
===================================================================
--- trunk/sys/kern/kern_proc.c	2018-05-25 20:53:39 UTC (rev 9945)
+++ trunk/sys/kern/kern_proc.c	2018-05-25 20:55:11 UTC (rev 9946)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * Copyright (c) 1982, 1986, 1989, 1991, 1993
  *	The Regents of the University of California.  All rights reserved.
@@ -30,7 +31,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/kern/kern_proc.c 328571 2018-01-29 23:43:04Z jhb $");
 
 #include "opt_compat.h"
 #include "opt_ddb.h"
@@ -55,6 +56,7 @@
 #include <sys/ptrace.h>
 #include <sys/refcount.h>
 #include <sys/resourcevar.h>
+#include <sys/rwlock.h>
 #include <sys/sbuf.h>
 #include <sys/sysent.h>
 #include <sys/sched.h>
@@ -91,33 +93,15 @@
 #endif
 
 SDT_PROVIDER_DEFINE(proc);
-SDT_PROBE_DEFINE(proc, kernel, ctor, entry, entry);
-SDT_PROBE_ARGTYPE(proc, kernel, ctor, entry, 0, "struct proc *");
-SDT_PROBE_ARGTYPE(proc, kernel, ctor, entry, 1, "int");
-SDT_PROBE_ARGTYPE(proc, kernel, ctor, entry, 2, "void *");
-SDT_PROBE_ARGTYPE(proc, kernel, ctor, entry, 3, "int");
-SDT_PROBE_DEFINE(proc, kernel, ctor, return, return);
-SDT_PROBE_ARGTYPE(proc, kernel, ctor, return, 0, "struct proc *");
-SDT_PROBE_ARGTYPE(proc, kernel, ctor, return, 1, "int");
-SDT_PROBE_ARGTYPE(proc, kernel, ctor, return, 2, "void *");
-SDT_PROBE_ARGTYPE(proc, kernel, ctor, return, 3, "int");
-SDT_PROBE_DEFINE(proc, kernel, dtor, entry, entry);
-SDT_PROBE_ARGTYPE(proc, kernel, dtor, entry, 0, "struct proc *");
-SDT_PROBE_ARGTYPE(proc, kernel, dtor, entry, 1, "int");
-SDT_PROBE_ARGTYPE(proc, kernel, dtor, entry, 2, "void *");
-SDT_PROBE_ARGTYPE(proc, kernel, dtor, entry, 3, "struct thread *");
-SDT_PROBE_DEFINE(proc, kernel, dtor, return, return);
-SDT_PROBE_ARGTYPE(proc, kernel, dtor, return, 0, "struct proc *");
-SDT_PROBE_ARGTYPE(proc, kernel, dtor, return, 1, "int");
-SDT_PROBE_ARGTYPE(proc, kernel, dtor, return, 2, "void *");
-SDT_PROBE_DEFINE(proc, kernel, init, entry, entry);
-SDT_PROBE_ARGTYPE(proc, kernel, init, entry, 0, "struct proc *");
-SDT_PROBE_ARGTYPE(proc, kernel, init, entry, 1, "int");
-SDT_PROBE_ARGTYPE(proc, kernel, init, entry, 2, "int");
-SDT_PROBE_DEFINE(proc, kernel, init, return, return);
-SDT_PROBE_ARGTYPE(proc, kernel, init, return, 0, "struct proc *");
-SDT_PROBE_ARGTYPE(proc, kernel, init, return, 1, "int");
-SDT_PROBE_ARGTYPE(proc, kernel, init, return, 2, "int");
+SDT_PROBE_DEFINE4(proc, , ctor, entry, "struct proc *", "int", "void *",
+    "int");
+SDT_PROBE_DEFINE4(proc, , ctor, return, "struct proc *", "int", "void *",
+    "int");
+SDT_PROBE_DEFINE4(proc, , dtor, entry, "struct proc *", "int", "void *",
+    "struct thread *");
+SDT_PROBE_DEFINE3(proc, , dtor, return, "struct proc *", "int", "void *");
+SDT_PROBE_DEFINE3(proc, , init, entry, "struct proc *", "int", "int");
+SDT_PROBE_DEFINE3(proc, , init, return, "struct proc *", "int", "int");
 
 MALLOC_DEFINE(M_PGRP, "pgrp", "process group header");
 MALLOC_DEFINE(M_SESSION, "session", "session header");
@@ -153,9 +137,28 @@
 struct mtx ppeers_lock;
 uma_zone_t proc_zone;
 
+/*
+ * The offset of various fields in struct proc and struct thread.
+ * These are used by kernel debuggers to enumerate kernel threads and
+ * processes.
+ */
+const int proc_off_p_pid = offsetof(struct proc, p_pid);
+const int proc_off_p_comm = offsetof(struct proc, p_comm);
+const int proc_off_p_list = offsetof(struct proc, p_list);
+const int proc_off_p_threads = offsetof(struct proc, p_threads);
+const int thread_off_td_tid = offsetof(struct thread, td_tid);
+const int thread_off_td_name = offsetof(struct thread, td_name);
+const int thread_off_td_oncpu = offsetof(struct thread, td_oncpu);
+const int thread_off_td_pcb = offsetof(struct thread, td_pcb);
+const int thread_off_td_plist = offsetof(struct thread, td_plist);
+
 int kstack_pages = KSTACK_PAGES;
 SYSCTL_INT(_kern, OID_AUTO, kstack_pages, CTLFLAG_RD, &kstack_pages, 0,
     "Kernel stack size in pages");
+static int vmmap_skip_res_cnt = 0;
+SYSCTL_INT(_kern, OID_AUTO, proc_vmmap_skip_resident_count, CTLFLAG_RW,
+    &vmmap_skip_res_cnt, 0,
+    "Skip calculation of the pages resident count in kern.proc.vmmap");
 
 CTASSERT(sizeof(struct kinfo_proc) == KINFO_PROC_SIZE);
 #ifdef COMPAT_FREEBSD32
@@ -189,11 +192,17 @@
 proc_ctor(void *mem, int size, void *arg, int flags)
 {
 	struct proc *p;
+	struct thread *td;
 
 	p = (struct proc *)mem;
-	SDT_PROBE(proc, kernel, ctor , entry, p, size, arg, flags, 0);
+	SDT_PROBE4(proc, , ctor , entry, p, size, arg, flags);
 	EVENTHANDLER_INVOKE(process_ctor, p);
-	SDT_PROBE(proc, kernel, ctor , return, p, size, arg, flags, 0);
+	SDT_PROBE4(proc, , ctor , return, p, size, arg, flags);
+	td = FIRST_THREAD_IN_PROC(p);
+	if (td != NULL) {
+		/* Make sure all thread constructors are executed */
+		EVENTHANDLER_INVOKE(thread_ctor, td);
+	}
 	return (0);
 }
 
@@ -209,7 +218,7 @@
 	/* INVARIANTS checks go here */
 	p = (struct proc *)mem;
 	td = FIRST_THREAD_IN_PROC(p);
-	SDT_PROBE(proc, kernel, dtor, entry, p, size, arg, td, 0);
+	SDT_PROBE4(proc, , dtor, entry, p, size, arg, td);
 	if (td != NULL) {
 #ifdef INVARIANTS
 		KASSERT((p->p_numthreads == 1),
@@ -218,11 +227,14 @@
 #endif
 		/* Free all OSD associated to this thread. */
 		osd_thread_exit(td);
+
+		/* Make sure all thread destructors are executed */
+		EVENTHANDLER_INVOKE(thread_dtor, td);
 	}
 	EVENTHANDLER_INVOKE(process_dtor, p);
 	if (p->p_ksi != NULL)
 		KASSERT(! KSI_ONQ(p->p_ksi), ("SIGCHLD queue"));
-	SDT_PROBE(proc, kernel, dtor, return, p, size, arg, 0, 0);
+	SDT_PROBE3(proc, , dtor, return, p, size, arg);
 }
 
 /*
@@ -234,7 +246,7 @@
 	struct proc *p;
 
 	p = (struct proc *)mem;
-	SDT_PROBE(proc, kernel, init, entry, p, size, flags, 0, 0);
+	SDT_PROBE3(proc, , init, entry, p, size, flags);
 	p->p_sched = (struct p_sched *)&p[1];
 	bzero(&p->p_mtx, sizeof(struct mtx));
 	mtx_init(&p->p_mtx, "process lock", NULL, MTX_DEF | MTX_DUPOK);
@@ -244,7 +256,8 @@
 	TAILQ_INIT(&p->p_threads);	     /* all threads in proc */
 	EVENTHANDLER_INVOKE(process_init, p);
 	p->p_stats = pstats_alloc();
-	SDT_PROBE(proc, kernel, init, return, p, size, flags, 0, 0);
+	p->p_pgrp = NULL;
+	SDT_PROBE3(proc, , init, return, p, size, flags);
 	return (0);
 }
 
@@ -274,14 +287,15 @@
  * Is p an inferior of the current process?
  */
 int
-inferior(p)
-	register struct proc *p;
+inferior(struct proc *p)
 {
 
 	sx_assert(&proctree_lock, SX_LOCKED);
-	for (; p != curproc; p = p->p_pptr)
+	PROC_LOCK_ASSERT(p, MA_OWNED);
+	for (; p != curproc; p = proc_realparent(p)) {
 		if (p->p_pid == 0)
 			return (0);
+	}
 	return (1);
 }
 
@@ -701,7 +715,7 @@
 
 	LIST_FOREACH(p, &pg->pg_members, p_pglist) {
 		PROC_LOCK(p);
-		if (P_SHOULDSTOP(p)) {
+		if (P_SHOULDSTOP(p) == P_STOPPED_SIG) {
 			PROC_UNLOCK(p);
 			LIST_FOREACH(p, &pg->pg_members, p_pglist) {
 				PROC_LOCK(p);
@@ -736,9 +750,7 @@
 	}
 }
 
-#include "opt_ddb.h"
 #ifdef DDB
-#include <ddb/ddb.h>
 
 DB_SHOW_COMMAND(pgrpdump, pgrpdump)
 {
@@ -818,6 +830,7 @@
 	kp->ki_fd = p->p_fd;
 	kp->ki_vmspace = p->p_vmspace;
 	kp->ki_flag = p->p_flag;
+	kp->ki_flag2 = p->p_flag2;
 	cred = p->p_ucred;
 	if (cred) {
 		kp->ki_uid = cred->cr_uid;
@@ -878,13 +891,14 @@
 	kp->ki_swtime = (ticks - p->p_swtick) / hz;
 	kp->ki_pid = p->p_pid;
 	kp->ki_nice = p->p_nice;
+	kp->ki_fibnum = p->p_fibnum;
 	kp->ki_start = p->p_stats->p_start;
 	timevaladd(&kp->ki_start, &boottime);
-	PROC_SLOCK(p);
+	PROC_STATLOCK(p);
 	rufetch(p, &kp->ki_rusage);
 	kp->ki_runtime = cputick2usec(p->p_rux.rux_runtime);
 	calcru(p, &kp->ki_rusage.ru_utime, &kp->ki_rusage.ru_stime);
-	PROC_SUNLOCK(p);
+	PROC_STATUNLOCK(p);
 	calccru(p, &kp->ki_childutime, &kp->ki_childstime);
 	/* Some callers want child times in a single value. */
 	kp->ki_childtime = kp->ki_childstime;
@@ -949,13 +963,20 @@
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 
 	if (preferthread)
-		PROC_SLOCK(p);
+		PROC_STATLOCK(p);
 	thread_lock(td);
 	if (td->td_wmesg != NULL)
 		strlcpy(kp->ki_wmesg, td->td_wmesg, sizeof(kp->ki_wmesg));
 	else
 		bzero(kp->ki_wmesg, sizeof(kp->ki_wmesg));
-	strlcpy(kp->ki_tdname, td->td_name, sizeof(kp->ki_tdname));
+	if (strlcpy(kp->ki_tdname, td->td_name, sizeof(kp->ki_tdname)) >=
+	    sizeof(kp->ki_tdname)) {
+		strlcpy(kp->ki_moretdname,
+		    td->td_name + sizeof(kp->ki_tdname) - 1,
+		    sizeof(kp->ki_moretdname));
+	} else {
+		bzero(kp->ki_moretdname, sizeof(kp->ki_moretdname));
+	}
 	if (TD_ON_LOCK(td)) {
 		kp->ki_kiflag |= KI_LOCKBLOCK;
 		strlcpy(kp->ki_lockname, td->td_lockname,
@@ -1016,7 +1037,7 @@
 	kp->ki_sigmask = td->td_sigmask;
 	thread_unlock(td);
 	if (preferthread)
-		PROC_SUNLOCK(p);
+		PROC_STATUNLOCK(p);
 }
 
 /*
@@ -1176,6 +1197,9 @@
 	bcopy(ki->ki_comm, ki32->ki_comm, COMMLEN + 1);
 	bcopy(ki->ki_emul, ki32->ki_emul, KI_EMULNAMELEN + 1);
 	bcopy(ki->ki_loginclass, ki32->ki_loginclass, LOGINCLASSLEN + 1);
+	bcopy(ki->ki_moretdname, ki32->ki_moretdname, MAXCOMLEN - TDNAMLEN + 1);
+	CP(*ki, *ki32, ki_flag2);
+	CP(*ki, *ki32, ki_fibnum);
 	CP(*ki, *ki32, ki_cr_flags);
 	CP(*ki, *ki32, ki_jid);
 	CP(*ki, *ki32, ki_numthreads);
@@ -1186,6 +1210,7 @@
 	PTRTRIM_CP(*ki, *ki32, ki_pcb);
 	PTRTRIM_CP(*ki, *ki32, ki_kstack);
 	PTRTRIM_CP(*ki, *ki32, ki_udata);
+	PTRTRIM_CP(*ki, *ki32, ki_tdaddr);
 	CP(*ki, *ki32, ki_sflag);
 	CP(*ki, *ki32, ki_tdflags);
 }
@@ -1210,10 +1235,12 @@
 #ifdef COMPAT_FREEBSD32
 		if ((flags & KERN_PROC_MASK32) != 0) {
 			freebsd32_kinfo_proc_out(&ki, &ki32);
-			error = sbuf_bcat(sb, &ki32, sizeof(ki32));
+			if (sbuf_bcat(sb, &ki32, sizeof(ki32)) != 0)
+				error = ENOMEM;
 		} else
 #endif
-			error = sbuf_bcat(sb, &ki, sizeof(ki));
+			if (sbuf_bcat(sb, &ki, sizeof(ki)) != 0)
+				error = ENOMEM;
 	} else {
 		FOREACH_THREAD_IN_PROC(p, td) {
 			fill_kinfo_thread(td, &ki, 1);
@@ -1220,11 +1247,13 @@
 #ifdef COMPAT_FREEBSD32
 			if ((flags & KERN_PROC_MASK32) != 0) {
 				freebsd32_kinfo_proc_out(&ki, &ki32);
-				error = sbuf_bcat(sb, &ki32, sizeof(ki32));
+				if (sbuf_bcat(sb, &ki32, sizeof(ki32)) != 0)
+					error = ENOMEM;
 			} else
 #endif
-				error = sbuf_bcat(sb, &ki, sizeof(ki));
-			if (error)
+				if (sbuf_bcat(sb, &ki, sizeof(ki)) != 0)
+					error = ENOMEM;
+			if (error != 0)
 				break;
 		}
 	}
@@ -1775,7 +1804,8 @@
 		else
 #endif
 			size = vsize * sizeof(Elf_Auxinfo);
-		error = sbuf_bcat(sb, auxv, size);
+		if (sbuf_bcat(sb, auxv, size) != 0)
+			error = ENOMEM;
 		free(auxv, M_TEMP);
 	}
 	return (error);
@@ -1829,7 +1859,7 @@
 	if (error != 0 || req->newptr == NULL)
 		return (error);
 
-	if (req->newlen + sizeof(struct pargs) > ps_arg_cache_limit)
+	if (req->newlen > ps_arg_cache_limit - sizeof(struct pargs))
 		return (ENOMEM);
 	newpa = pargs_alloc(req->newlen);
 	error = SYSCTL_IN(req, newpa->ar_args, req->newlen);
@@ -1919,7 +1949,7 @@
 	struct proc *p;
 	struct vnode *vp;
 	char *retbuf, *freebuf;
-	int error, vfslocked;
+	int error;
 
 	if (arglen != 1)
 		return (EINVAL);
@@ -1941,9 +1971,7 @@
 	if (*pidp != -1)
 		PROC_UNLOCK(p);
 	error = vn_fullpath(req->td, vp, &retbuf, &freebuf);
-	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
 	vrele(vp);
-	VFS_UNLOCK_GIANT(vfslocked);
 	if (error)
 		return (error);
 	error = SYSCTL_OUT(req, retbuf, strlen(retbuf) + 1);
@@ -2004,13 +2032,12 @@
 	}
 	kve = malloc(sizeof(*kve), M_TEMP, M_WAITOK);
 
-	map = &p->p_vmspace->vm_map;	/* XXXRW: More locking required? */
+	map = &vm->vm_map;
 	vm_map_lock_read(map);
 	for (entry = map->header.next; entry != &map->header;
 	    entry = entry->next) {
 		vm_object_t obj, tobj, lobj;
 		vm_offset_t addr;
-		int vfslocked;
 
 		if (entry->eflags & MAP_ENTRY_IS_SUB_MAP)
 			continue;
@@ -2021,7 +2048,7 @@
 		kve->kve_private_resident = 0;
 		obj = entry->object.vm_object;
 		if (obj != NULL) {
-			VM_OBJECT_LOCK(obj);
+			VM_OBJECT_RLOCK(obj);
 			if (obj->shadow_count == 1)
 				kve->kve_private_resident =
 				    obj->resident_page_count;
@@ -2035,16 +2062,18 @@
 		}
 
 		for (lobj = tobj = obj; tobj; tobj = tobj->backing_object) {
-			if (tobj != obj)
-				VM_OBJECT_LOCK(tobj);
+			if (tobj != obj) {
+				VM_OBJECT_RLOCK(tobj);
+				kve->kve_offset += tobj->backing_object_offset;
+			}
 			if (lobj != obj)
-				VM_OBJECT_UNLOCK(lobj);
+				VM_OBJECT_RUNLOCK(lobj);
 			lobj = tobj;
 		}
 
 		kve->kve_start = (void*)entry->start;
 		kve->kve_end = (void*)entry->end;
-		kve->kve_offset = (off_t)entry->offset;
+		kve->kve_offset += (off_t)entry->offset;
 
 		if (entry->protection & VM_PROT_READ)
 			kve->kve_protection |= KVME_PROT_READ;
@@ -2059,10 +2088,6 @@
 			kve->kve_flags |= KVME_FLAG_NEEDS_COPY;
 		if (entry->eflags & MAP_ENTRY_NOCOREDUMP)
 			kve->kve_flags |= KVME_FLAG_NOCOREDUMP;
-		if (entry->eflags & MAP_ENTRY_GROWS_UP)
-			kve->kve_flags |= KVME_FLAG_GROWS_UP;
-		if (entry->eflags & MAP_ENTRY_GROWS_DOWN)
-			kve->kve_flags |= KVME_FLAG_GROWS_DOWN;
 
 		last_timestamp = map->timestamp;
 		vm_map_unlock_read(map);
@@ -2083,7 +2108,15 @@
 				vref(vp);
 				break;
 			case OBJT_SWAP:
-				kve->kve_type = KVME_TYPE_SWAP;
+				if ((lobj->flags & OBJ_TMPFS_NODE) != 0) {
+					kve->kve_type = KVME_TYPE_VNODE;
+					if ((lobj->flags & OBJ_TMPFS) != 0) {
+						vp = lobj->un_pager.swp.swp_tmpfs;
+						vref(vp);
+					}
+				} else {
+					kve->kve_type = KVME_TYPE_SWAP;
+				}
 				break;
 			case OBJT_DEVICE:
 				kve->kve_type = KVME_TYPE_DEVICE;
@@ -2102,16 +2135,15 @@
 				break;
 			}
 			if (lobj != obj)
-				VM_OBJECT_UNLOCK(lobj);
+				VM_OBJECT_RUNLOCK(lobj);
 
 			kve->kve_ref_count = obj->ref_count;
 			kve->kve_shadow_count = obj->shadow_count;
-			VM_OBJECT_UNLOCK(obj);
+			VM_OBJECT_RUNLOCK(obj);
 			if (vp != NULL) {
 				vn_fullpath(curthread, vp, &fullpath,
 				    &freepath);
 				cred = curthread->td_ucred;
-				vfslocked = VFS_LOCK_GIANT(vp->v_mount);
 				vn_lock(vp, LK_SHARED | LK_RETRY);
 				if (VOP_GETATTR(vp, &va, cred) == 0) {
 					kve->kve_fileid = va.va_fileid;
@@ -2118,7 +2150,6 @@
 					kve->kve_fsid = va.va_fsid;
 				}
 				vput(vp);
-				VFS_UNLOCK_GIANT(vfslocked);
 			}
 		} else {
 			kve->kve_type = KVME_TYPE_NONE;
@@ -2151,22 +2182,84 @@
 CTASSERT(sizeof(struct kinfo_vmentry) == KINFO_VMENTRY_SIZE);
 #endif
 
+static void
+kern_proc_vmmap_resident(vm_map_t map, vm_map_entry_t entry,
+    struct kinfo_vmentry *kve)
+{
+	vm_object_t obj, tobj;
+	vm_page_t m, m_adv;
+	vm_offset_t addr;
+	vm_paddr_t locked_pa;
+	vm_pindex_t pi, pi_adv, pindex;
+
+	locked_pa = 0;
+	obj = entry->object.vm_object;
+	addr = entry->start;
+	m_adv = NULL;
+	pi = OFF_TO_IDX(entry->offset);
+	for (; addr < entry->end; addr += IDX_TO_OFF(pi_adv), pi += pi_adv) {
+		if (m_adv != NULL) {
+			m = m_adv;
+		} else {
+			pi_adv = OFF_TO_IDX(entry->end - addr);
+			pindex = pi;
+			for (tobj = obj;; tobj = tobj->backing_object) {
+				m = vm_page_find_least(tobj, pindex);
+				if (m != NULL) {
+					if (m->pindex == pindex)
+						break;
+					if (pi_adv > m->pindex - pindex) {
+						pi_adv = m->pindex - pindex;
+						m_adv = m;
+					}
+				}
+				if (tobj->backing_object == NULL)
+					goto next;
+				pindex += OFF_TO_IDX(tobj->
+				    backing_object_offset);
+			}
+		}
+		m_adv = NULL;
+		if (m->psind != 0 && addr + pagesizes[1] <= entry->end &&
+		    (addr & (pagesizes[1] - 1)) == 0 &&
+		    (pmap_mincore(map->pmap, addr, &locked_pa) &
+		    MINCORE_SUPER) != 0) {
+			kve->kve_flags |= KVME_FLAG_SUPER;
+			pi_adv = OFF_TO_IDX(pagesizes[1]);
+		} else {
+			/*
+			 * We do not test the found page on validity.
+			 * Either the page is busy and being paged in,
+			 * or it was invalidated.  The first case
+			 * should be counted as resident, the second
+			 * is not so clear; we do account both.
+			 */
+			pi_adv = 1;
+		}
+		kve->kve_resident += pi_adv;
+next:;
+	}
+	PA_UNLOCK_COND(locked_pa);
+}
+
 /*
  * Must be called with the process locked and will return unlocked.
  */
 int
-kern_proc_vmmap_out(struct proc *p, struct sbuf *sb)
+kern_proc_vmmap_out(struct proc *p, struct sbuf *sb, ssize_t maxlen, int flags)
 {
 	vm_map_entry_t entry, tmp_entry;
-	unsigned int last_timestamp;
+	struct vattr va;
+	vm_map_t map;
+	vm_object_t obj, tobj, lobj;
 	char *fullpath, *freepath;
 	struct kinfo_vmentry *kve;
-	struct vattr va;
 	struct ucred *cred;
-	int error;
 	struct vnode *vp;
 	struct vmspace *vm;
-	vm_map_t map;
+	vm_offset_t addr;
+	unsigned int last_timestamp;
+	int error;
 
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 
@@ -2177,56 +2270,43 @@
 		PRELE(p);
 		return (ESRCH);
 	}
-	kve = malloc(sizeof(*kve), M_TEMP, M_WAITOK);
+	kve = malloc(sizeof(*kve), M_TEMP, M_WAITOK | M_ZERO);
 
 	error = 0;
-	map = &vm->vm_map;	/* XXXRW: More locking required? */
+	map = &vm->vm_map;
 	vm_map_lock_read(map);
 	for (entry = map->header.next; entry != &map->header;
 	    entry = entry->next) {
-		vm_object_t obj, tobj, lobj;
-		vm_offset_t addr;
-		vm_paddr_t locked_pa;
-		int vfslocked, mincoreinfo;
-
 		if (entry->eflags & MAP_ENTRY_IS_SUB_MAP)
 			continue;
 
+		addr = entry->end;
 		bzero(kve, sizeof(*kve));
-
-		kve->kve_private_resident = 0;
 		obj = entry->object.vm_object;
 		if (obj != NULL) {
-			VM_OBJECT_LOCK(obj);
-			if (obj->shadow_count == 1)
+			for (tobj = obj; tobj != NULL;
+			    tobj = tobj->backing_object) {
+				VM_OBJECT_RLOCK(tobj);
+				kve->kve_offset += tobj->backing_object_offset;
+				lobj = tobj;
+			}
+			if (obj->backing_object == NULL)
 				kve->kve_private_resident =
 				    obj->resident_page_count;
+			if (!vmmap_skip_res_cnt)
+				kern_proc_vmmap_resident(map, entry, kve);
+			for (tobj = obj; tobj != NULL;
+			    tobj = tobj->backing_object) {
+				if (tobj != obj && tobj != lobj)
+					VM_OBJECT_RUNLOCK(tobj);
+			}
+		} else {
+			lobj = NULL;
 		}
-		kve->kve_resident = 0;
-		addr = entry->start;
-		while (addr < entry->end) {
-			locked_pa = 0;
-			mincoreinfo = pmap_mincore(map->pmap, addr, &locked_pa);
-			if (locked_pa != 0)
-				vm_page_unlock(PHYS_TO_VM_PAGE(locked_pa));
-			if (mincoreinfo & MINCORE_INCORE)
-				kve->kve_resident++;
-			if (mincoreinfo & MINCORE_SUPER)
-				kve->kve_flags |= KVME_FLAG_SUPER;
-			addr += PAGE_SIZE;
-		}
 
-		for (lobj = tobj = obj; tobj; tobj = tobj->backing_object) {
-			if (tobj != obj)
-				VM_OBJECT_LOCK(tobj);
-			if (lobj != obj)
-				VM_OBJECT_UNLOCK(lobj);
-			lobj = tobj;
-		}
-
 		kve->kve_start = entry->start;
 		kve->kve_end = entry->end;
-		kve->kve_offset = entry->offset;
+		kve->kve_offset += entry->offset;
 
 		if (entry->protection & VM_PROT_READ)
 			kve->kve_protection |= KVME_PROT_READ;
@@ -2251,7 +2331,7 @@
 
 		freepath = NULL;
 		fullpath = "";
-		if (lobj) {
+		if (lobj != NULL) {
 			vp = NULL;
 			switch (lobj->type) {
 			case OBJT_DEFAULT:
@@ -2263,7 +2343,15 @@
 				vref(vp);
 				break;
 			case OBJT_SWAP:
-				kve->kve_type = KVME_TYPE_SWAP;
+				if ((lobj->flags & OBJ_TMPFS_NODE) != 0) {
+					kve->kve_type = KVME_TYPE_VNODE;
+					if ((lobj->flags & OBJ_TMPFS) != 0) {
+						vp = lobj->un_pager.swp.swp_tmpfs;
+						vref(vp);
+					}
+				} else {
+					kve->kve_type = KVME_TYPE_SWAP;
+				}
 				break;
 			case OBJT_DEVICE:
 				kve->kve_type = KVME_TYPE_DEVICE;
@@ -2277,22 +2365,24 @@
 			case OBJT_SG:
 				kve->kve_type = KVME_TYPE_SG;
 				break;
+			case OBJT_MGTDEVICE:
+				kve->kve_type = KVME_TYPE_MGTDEVICE;
+				break;
 			default:
 				kve->kve_type = KVME_TYPE_UNKNOWN;
 				break;
 			}
 			if (lobj != obj)
-				VM_OBJECT_UNLOCK(lobj);
+				VM_OBJECT_RUNLOCK(lobj);
 
 			kve->kve_ref_count = obj->ref_count;
 			kve->kve_shadow_count = obj->shadow_count;
-			VM_OBJECT_UNLOCK(obj);
+			VM_OBJECT_RUNLOCK(obj);
 			if (vp != NULL) {
 				vn_fullpath(curthread, vp, &fullpath,
 				    &freepath);
 				kve->kve_vn_type = vntype_to_kinfo(vp->v_type);
 				cred = curthread->td_ucred;
-				vfslocked = VFS_LOCK_GIANT(vp->v_mount);
 				vn_lock(vp, LK_SHARED | LK_RETRY);
 				if (VOP_GETATTR(vp, &va, cred) == 0) {
 					kve->kve_vn_fileid = va.va_fileid;
@@ -2304,7 +2394,6 @@
 					kve->kve_status = KF_ATTR_VALID;
 				}
 				vput(vp);
-				VFS_UNLOCK_GIANT(vfslocked);
 			}
 		} else {
 			kve->kve_type = KVME_TYPE_NONE;
@@ -2317,13 +2406,27 @@
 			free(freepath, M_TEMP);
 
 		/* Pack record size down */
-		kve->kve_structsize = offsetof(struct kinfo_vmentry, kve_path) +
-		    strlen(kve->kve_path) + 1;
+		if ((flags & KERN_VMMAP_PACK_KINFO) != 0)
+			kve->kve_structsize =
+			    offsetof(struct kinfo_vmentry, kve_path) +
+			    strlen(kve->kve_path) + 1;
+		else
+			kve->kve_structsize = sizeof(*kve);
 		kve->kve_structsize = roundup(kve->kve_structsize,
 		    sizeof(uint64_t));
-		error = sbuf_bcat(sb, kve, kve->kve_structsize);
+
+		/* Halt filling and truncate rather than exceeding maxlen */
+		if (maxlen != -1 && maxlen < kve->kve_structsize) {
+			error = 0;
+			vm_map_lock_read(map);
+			break;
+		} else if (maxlen != -1)
+			maxlen -= kve->kve_structsize;
+
+		if (sbuf_bcat(sb, kve, kve->kve_structsize) != 0)
+			error = ENOMEM;
 		vm_map_lock_read(map);
-		if (error)
+		if (error != 0)
 			break;
 		if (last_timestamp != map->timestamp) {
 			vm_map_lookup_entry(map, addr - 1, &tmp_entry);
@@ -2351,7 +2454,7 @@
 		sbuf_delete(&sb);
 		return (error);
 	}
-	error = kern_proc_vmmap_out(p, &sb);
+	error = kern_proc_vmmap_out(p, &sb, -1, KERN_VMMAP_PACK_KINFO);
 	error2 = sbuf_finish(&sb);
 	sbuf_delete(&sb);
 	return (error != 0 ? error : error2);
@@ -2378,10 +2481,8 @@
 	st = stack_create();
 
 	lwpidarray = NULL;
-	numthreads = 0;
 	PROC_LOCK(p);
-repeat:
-	if (numthreads < p->p_numthreads) {
+	do {
 		if (lwpidarray != NULL) {
 			free(lwpidarray, M_TEMP);
 			lwpidarray = NULL;
@@ -2391,9 +2492,7 @@
 		lwpidarray = malloc(sizeof(*lwpidarray) * numthreads, M_TEMP,
 		    M_WAITOK | M_ZERO);
 		PROC_LOCK(p);
-		goto repeat;
-	}
-	i = 0;
+	} while (numthreads < p->p_numthreads);
 
 	/*
 	 * XXXRW: During the below loop, execve(2) and countless other sorts
@@ -2404,6 +2503,7 @@
 	 * have changed, in which case the right to extract debug info might
 	 * no longer be assured.
 	 */
+	i = 0;
 	FOREACH_THREAD_IN_PROC(p, td) {
 		KASSERT(i < numthreads,
 		    ("sysctl_kern_proc_kstack: numthreads"));
@@ -2655,6 +2755,60 @@
 	return (error);
 }
 
+static int
+sysctl_kern_proc_sigtramp(SYSCTL_HANDLER_ARGS)
+{
+	int *name = (int *)arg1;
+	u_int namelen = arg2;
+	struct proc *p;
+	struct kinfo_sigtramp kst;
+	const struct sysentvec *sv;
+	int error;
+#ifdef COMPAT_FREEBSD32
+	struct kinfo_sigtramp32 kst32;
+#endif
+
+	if (namelen != 1)
+		return (EINVAL);
+
+	error = pget((pid_t)name[0], PGET_CANDEBUG, &p);
+	if (error != 0)
+		return (error);
+	sv = p->p_sysent;
+#ifdef COMPAT_FREEBSD32
+	if ((req->flags & SCTL_MASK32) != 0) {
+		bzero(&kst32, sizeof(kst32));
+		if (SV_PROC_FLAG(p, SV_ILP32)) {
+			if (sv->sv_sigcode_base != 0) {
+				kst32.ksigtramp_start = sv->sv_sigcode_base;
+				kst32.ksigtramp_end = sv->sv_sigcode_base +
+				    *sv->sv_szsigcode;
+			} else {
+				kst32.ksigtramp_start = sv->sv_psstrings -
+				    *sv->sv_szsigcode;
+				kst32.ksigtramp_end = sv->sv_psstrings;
+			}
+		}
+		PROC_UNLOCK(p);
+		error = SYSCTL_OUT(req, &kst32, sizeof(kst32));
+		return (error);
+	}
+#endif
+	bzero(&kst, sizeof(kst));
+	if (sv->sv_sigcode_base != 0) {
+		kst.ksigtramp_start = (char *)sv->sv_sigcode_base;
+		kst.ksigtramp_end = (char *)sv->sv_sigcode_base +
+		    *sv->sv_szsigcode;
+	} else {
+		kst.ksigtramp_start = (char *)sv->sv_psstrings -
+		    *sv->sv_szsigcode;
+		kst.ksigtramp_end = (char *)sv->sv_psstrings;
+	}
+	PROC_UNLOCK(p);
+	error = SYSCTL_OUT(req, &kst, sizeof(kst));
+	return (error);
+}
+
 SYSCTL_NODE(_kern, KERN_PROC, proc, CTLFLAG_RD,  0, "Process table");
 
 SYSCTL_PROC(_kern_proc, KERN_PROC_ALL, all, CTLFLAG_RD|CTLTYPE_STRUCT|
@@ -2689,7 +2843,7 @@
 	sysctl_kern_proc, "Return process table, no threads");
 
 static SYSCTL_NODE(_kern_proc, KERN_PROC_ARGS, args,
-	CTLFLAG_RW | CTLFLAG_ANYBODY | CTLFLAG_MPSAFE,
+	CTLFLAG_RW | CTLFLAG_CAPWR | CTLFLAG_ANYBODY | CTLFLAG_MPSAFE,
 	sysctl_kern_proc_args, "Process argument list");
 
 static SYSCTL_NODE(_kern_proc, KERN_PROC_ENV, env, CTLFLAG_RD | CTLFLAG_MPSAFE,
@@ -2763,3 +2917,141 @@
 static SYSCTL_NODE(_kern_proc, KERN_PROC_OSREL, osrel, CTLFLAG_RW |
 	CTLFLAG_ANYBODY | CTLFLAG_MPSAFE, sysctl_kern_proc_osrel,
 	"Process binary osreldate");
+
+static SYSCTL_NODE(_kern_proc, KERN_PROC_SIGTRAMP, sigtramp, CTLFLAG_RD |
+	CTLFLAG_MPSAFE, sysctl_kern_proc_sigtramp,
+	"Process signal trampoline location");
+
+int allproc_gen;
+
+/*
+ * stop_all_proc() purpose is to stop all process which have usermode,
+ * except current process for obvious reasons.  This makes it somewhat
+ * unreliable when invoked from multithreaded process.  The service
+ * must not be user-callable anyway.
+ */
+void
+stop_all_proc(void)
+{
+	struct proc *cp, *p;
+	int r, gen;
+	bool restart, seen_stopped, seen_exiting, stopped_some;
+
+	cp = curproc;
+allproc_loop:
+	sx_xlock(&allproc_lock);
+	gen = allproc_gen;
+	seen_exiting = seen_stopped = stopped_some = restart = false;
+	LIST_REMOVE(cp, p_list);
+	LIST_INSERT_HEAD(&allproc, cp, p_list);
+	for (;;) {
+		p = LIST_NEXT(cp, p_list);
+		if (p == NULL)
+			break;
+		LIST_REMOVE(cp, p_list);
+		LIST_INSERT_AFTER(p, cp, p_list);
+		PROC_LOCK(p);
+		if ((p->p_flag & (P_KTHREAD | P_SYSTEM |
+		    P_TOTAL_STOP)) != 0) {
+			PROC_UNLOCK(p);
+			continue;
+		}
+		if ((p->p_flag & P_WEXIT) != 0) {
+			seen_exiting = true;
+			PROC_UNLOCK(p);
+			continue;
+		}
+		if (P_SHOULDSTOP(p) == P_STOPPED_SINGLE) {
+			/*
+			 * Stopped processes are tolerated when there
+			 * are no other processes which might continue
+			 * them.  P_STOPPED_SINGLE but not
+			 * P_TOTAL_STOP process still has at least one
+			 * thread running.
+			 */
+			seen_stopped = true;
+			PROC_UNLOCK(p);
+			continue;
+		}
+		_PHOLD(p);
+		sx_xunlock(&allproc_lock);
+		r = thread_single(p, SINGLE_ALLPROC);
+		if (r != 0)
+			restart = true;
+		else
+			stopped_some = true;
+		_PRELE(p);
+		PROC_UNLOCK(p);
+		sx_xlock(&allproc_lock);
+	}
+	/* Catch forked children we did not see in iteration. */
+	if (gen != allproc_gen)
+		restart = true;
+	sx_xunlock(&allproc_lock);
+	if (restart || stopped_some || seen_exiting || seen_stopped) {
+		kern_yield(PRI_USER);
+		goto allproc_loop;
+	}
+}
+
+void
+resume_all_proc(void)
+{
+	struct proc *cp, *p;
+
+	cp = curproc;
+	sx_xlock(&allproc_lock);
+	LIST_REMOVE(cp, p_list);
+	LIST_INSERT_HEAD(&allproc, cp, p_list);
+	for (;;) {
+		p = LIST_NEXT(cp, p_list);
+		if (p == NULL)
+			break;
+		LIST_REMOVE(cp, p_list);
+		LIST_INSERT_AFTER(p, cp, p_list);
+		PROC_LOCK(p);
+		if ((p->p_flag & P_TOTAL_STOP) != 0) {
+			sx_xunlock(&allproc_lock);
+			_PHOLD(p);
+			thread_single_end(p, SINGLE_ALLPROC);
+			_PRELE(p);
+			PROC_UNLOCK(p);
+			sx_xlock(&allproc_lock);
+		} else {
+			PROC_UNLOCK(p);
+		}
+	}
+	sx_xunlock(&allproc_lock);
+}
+
+/* #define	TOTAL_STOP_DEBUG	1 */
+#ifdef TOTAL_STOP_DEBUG
+volatile static int ap_resume;
+#include <sys/mount.h>
+
+static int
+sysctl_debug_stop_all_proc(SYSCTL_HANDLER_ARGS)
+{
+	int error, val;
+
+	val = 0;
+	ap_resume = 0;
+	error = sysctl_handle_int(oidp, &val, 0, req);
+	if (error != 0 || req->newptr == NULL)
+		return (error);
+	if (val != 0) {
+		stop_all_proc();
+		syncer_suspend();
+		while (ap_resume == 0)
+			;
+		syncer_resume();
+		resume_all_proc();
+	}
+	return (0);
+}
+
+SYSCTL_PROC(_debug, OID_AUTO, stop_all_proc, CTLTYPE_INT | CTLFLAG_RW |
+    CTLFLAG_MPSAFE, __DEVOLATILE(int *, &ap_resume), 0,
+    sysctl_debug_stop_all_proc, "I",
+    "");
+#endif

Added: trunk/sys/kern/kern_procctl.c
===================================================================
--- trunk/sys/kern/kern_procctl.c	                        (rev 0)
+++ trunk/sys/kern/kern_procctl.c	2018-05-25 20:55:11 UTC (rev 9946)
@@ -0,0 +1,588 @@
+/* $MidnightBSD$ */
+/*-
+ * Copyright (c) 2014 John Baldwin
+ * Copyright (c) 2014 The FreeBSD Foundation
+ *
+ * Portions of this software were developed by Konstantin Belousov
+ * under sponsorship from the FreeBSD Foundation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: stable/10/sys/kern/kern_procctl.c 326397 2017-11-30 14:38:07Z kib $");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/capability.h>
+#include <sys/lock.h>
+#include <sys/mutex.h>
+#include <sys/priv.h>
+#include <sys/proc.h>
+#include <sys/procctl.h>
+#include <sys/sx.h>
+#include <sys/syscallsubr.h>
+#include <sys/sysproto.h>
+#include <sys/wait.h>
+
+static int
+protect_setchild(struct thread *td, struct proc *p, int flags)
+{
+
+	PROC_LOCK_ASSERT(p, MA_OWNED);
+	if (p->p_flag & P_SYSTEM || p_cansched(td, p) != 0)
+		return (0);
+	if (flags & PPROT_SET) {
+		p->p_flag |= P_PROTECTED;
+		if (flags & PPROT_INHERIT)
+			p->p_flag2 |= P2_INHERIT_PROTECTED;
+	} else {
+		p->p_flag &= ~P_PROTECTED;
+		p->p_flag2 &= ~P2_INHERIT_PROTECTED;
+	}
+	return (1);
+}
+
+static int
+protect_setchildren(struct thread *td, struct proc *top, int flags)
+{
+	struct proc *p;
+	int ret;
+
+	p = top;
+	ret = 0;
+	sx_assert(&proctree_lock, SX_LOCKED);
+	for (;;) {
+		ret |= protect_setchild(td, p, flags);
+		PROC_UNLOCK(p);
+		/*
+		 * If this process has children, descend to them next,
+		 * otherwise do any siblings, and if done with this level,
+		 * follow back up the tree (but not past top).
+		 */
+		if (!LIST_EMPTY(&p->p_children))
+			p = LIST_FIRST(&p->p_children);
+		else for (;;) {
+			if (p == top) {
+				PROC_LOCK(p);
+				return (ret);
+			}
+			if (LIST_NEXT(p, p_sibling)) {
+				p = LIST_NEXT(p, p_sibling);
+				break;
+			}
+			p = p->p_pptr;
+		}
+		PROC_LOCK(p);
+	}
+}
+
+static int
+protect_set(struct thread *td, struct proc *p, int flags)
+{
+	int error, ret;
+
+	switch (PPROT_OP(flags)) {
+	case PPROT_SET:
+	case PPROT_CLEAR:
+		break;
+	default:
+		return (EINVAL);
+	}
+
+	if ((PPROT_FLAGS(flags) & ~(PPROT_DESCEND | PPROT_INHERIT)) != 0)
+		return (EINVAL);
+
+	error = priv_check(td, PRIV_VM_MADV_PROTECT);
+	if (error)
+		return (error);
+
+	if (flags & PPROT_DESCEND)
+		ret = protect_setchildren(td, p, flags);
+	else
+		ret = protect_setchild(td, p, flags);
+	if (ret == 0)
+		return (EPERM);
+	return (0);
+}
+
+static int
+reap_acquire(struct thread *td, struct proc *p)
+{
+
+	sx_assert(&proctree_lock, SX_XLOCKED);
+	if (p != curproc)
+		return (EPERM);
+	if ((p->p_treeflag & P_TREE_REAPER) != 0)
+		return (EBUSY);
+	p->p_treeflag |= P_TREE_REAPER;
+	/*
+	 * We do not reattach existing children and the whole tree
+	 * under them to us, since p->p_reaper already seen them.
+	 */
+	return (0);
+}
+
+static int
+reap_release(struct thread *td, struct proc *p)
+{
+
+	sx_assert(&proctree_lock, SX_XLOCKED);
+	if (p != curproc)
+		return (EPERM);
+	if (p == initproc)
+		return (EINVAL);
+	if ((p->p_treeflag & P_TREE_REAPER) == 0)
+		return (EINVAL);
+	reaper_abandon_children(p, false);
+	return (0);
+}
+
+static int
+reap_status(struct thread *td, struct proc *p,
+    struct procctl_reaper_status *rs)
+{
+	struct proc *reap, *p2, *first_p;
+
+	sx_assert(&proctree_lock, SX_LOCKED);
+	bzero(rs, sizeof(*rs));
+	if ((p->p_treeflag & P_TREE_REAPER) == 0) {
+		reap = p->p_reaper;
+	} else {
+		reap = p;
+		rs->rs_flags |= REAPER_STATUS_OWNED;
+	}
+	if (reap == initproc)
+		rs->rs_flags |= REAPER_STATUS_REALINIT;
+	rs->rs_reaper = reap->p_pid;
+	rs->rs_descendants = 0;
+	rs->rs_children = 0;
+	if (!LIST_EMPTY(&reap->p_reaplist)) {
+		first_p = LIST_FIRST(&reap->p_children);
+		if (first_p == NULL)
+			first_p = LIST_FIRST(&reap->p_reaplist);
+		rs->rs_pid = first_p->p_pid;
+		LIST_FOREACH(p2, &reap->p_reaplist, p_reapsibling) {
+			if (proc_realparent(p2) == reap)
+				rs->rs_children++;
+			rs->rs_descendants++;
+		}
+	} else {
+		rs->rs_pid = -1;
+	}
+	return (0);
+}
+
+static int
+reap_getpids(struct thread *td, struct proc *p, struct procctl_reaper_pids *rp)
+{
+	struct proc *reap, *p2;
+	struct procctl_reaper_pidinfo *pi, *pip;
+	u_int i, n;
+	int error;
+
+	sx_assert(&proctree_lock, SX_LOCKED);
+	PROC_UNLOCK(p);
+	reap = (p->p_treeflag & P_TREE_REAPER) == 0 ? p->p_reaper : p;
+	n = i = 0;
+	error = 0;
+	LIST_FOREACH(p2, &reap->p_reaplist, p_reapsibling)
+		n++;
+	sx_unlock(&proctree_lock);
+	if (rp->rp_count < n)
+		n = rp->rp_count;
+	pi = malloc(n * sizeof(*pi), M_TEMP, M_WAITOK);
+	sx_slock(&proctree_lock);
+	LIST_FOREACH(p2, &reap->p_reaplist, p_reapsibling) {
+		if (i == n)
+			break;
+		pip = &pi[i];
+		bzero(pip, sizeof(*pip));
+		pip->pi_pid = p2->p_pid;
+		pip->pi_subtree = p2->p_reapsubtree;
+		pip->pi_flags = REAPER_PIDINFO_VALID;
+		if (proc_realparent(p2) == reap)
+			pip->pi_flags |= REAPER_PIDINFO_CHILD;
+		if ((p2->p_treeflag & P_TREE_REAPER) != 0)
+			pip->pi_flags |= REAPER_PIDINFO_REAPER;
+		i++;
+	}
+	sx_sunlock(&proctree_lock);
+	error = copyout(pi, rp->rp_pids, i * sizeof(*pi));
+	free(pi, M_TEMP);
+	sx_slock(&proctree_lock);
+	PROC_LOCK(p);
+	return (error);
+}
+
+static void
+reap_kill_proc(struct thread *td, struct proc *p2, ksiginfo_t *ksi,
+    struct procctl_reaper_kill *rk, int *error)
+{
+	int error1;
+
+	PROC_LOCK(p2);
+	error1 = p_cansignal(td, p2, rk->rk_sig);
+	if (error1 == 0) {
+		pksignal(p2, rk->rk_sig, ksi);
+		rk->rk_killed++;
+		*error = error1;
+	} else if (*error == ESRCH) {
+		rk->rk_fpid = p2->p_pid;
+		*error = error1;
+	}
+	PROC_UNLOCK(p2);
+}
+
+struct reap_kill_tracker {
+	struct proc *parent;
+	TAILQ_ENTRY(reap_kill_tracker) link;
+};
+
+TAILQ_HEAD(reap_kill_tracker_head, reap_kill_tracker);
+
+static void
+reap_kill_sched(struct reap_kill_tracker_head *tracker, struct proc *p2)
+{
+	struct reap_kill_tracker *t;
+
+	t = malloc(sizeof(struct reap_kill_tracker), M_TEMP, M_WAITOK);
+	t->parent = p2;
+	TAILQ_INSERT_TAIL(tracker, t, link);
+}
+
+static int
+reap_kill(struct thread *td, struct proc *p, struct procctl_reaper_kill *rk)
+{
+	struct proc *reap, *p2;
+	ksiginfo_t ksi;
+	struct reap_kill_tracker_head tracker;
+	struct reap_kill_tracker *t;
+	int error;
+
+	sx_assert(&proctree_lock, SX_LOCKED);
+	if (IN_CAPABILITY_MODE(td))
+		return (ECAPMODE);
+	if (rk->rk_sig <= 0 || rk->rk_sig > _SIG_MAXSIG ||
+	    (rk->rk_flags & ~(REAPER_KILL_CHILDREN |
+	    REAPER_KILL_SUBTREE)) != 0 || (rk->rk_flags &
+	    (REAPER_KILL_CHILDREN | REAPER_KILL_SUBTREE)) ==
+	    (REAPER_KILL_CHILDREN | REAPER_KILL_SUBTREE))
+		return (EINVAL);
+	PROC_UNLOCK(p);
+	reap = (p->p_treeflag & P_TREE_REAPER) == 0 ? p->p_reaper : p;
+	ksiginfo_init(&ksi);
+	ksi.ksi_signo = rk->rk_sig;
+	ksi.ksi_code = SI_USER;
+	ksi.ksi_pid = td->td_proc->p_pid;
+	ksi.ksi_uid = td->td_ucred->cr_ruid;
+	error = ESRCH;
+	rk->rk_killed = 0;
+	rk->rk_fpid = -1;
+	if ((rk->rk_flags & REAPER_KILL_CHILDREN) != 0) {
+		for (p2 = LIST_FIRST(&reap->p_children); p2 != NULL;
+		    p2 = LIST_NEXT(p2, p_sibling)) {
+			reap_kill_proc(td, p2, &ksi, rk, &error);
+			/*
+			 * Do not end the loop on error, signal
+			 * everything we can.
+			 */
+		}
+	} else {
+		TAILQ_INIT(&tracker);
+		reap_kill_sched(&tracker, reap);
+		while ((t = TAILQ_FIRST(&tracker)) != NULL) {
+			MPASS((t->parent->p_treeflag & P_TREE_REAPER) != 0);
+			TAILQ_REMOVE(&tracker, t, link);
+			for (p2 = LIST_FIRST(&t->parent->p_reaplist); p2 != NULL;
+			    p2 = LIST_NEXT(p2, p_reapsibling)) {
+				if (t->parent == reap &&
+				    (rk->rk_flags & REAPER_KILL_SUBTREE) != 0 &&
+				    p2->p_reapsubtree != rk->rk_subtree)
+					continue;
+				if ((p2->p_treeflag & P_TREE_REAPER) != 0)
+					reap_kill_sched(&tracker, p2);
+				reap_kill_proc(td, p2, &ksi, rk, &error);
+			}
+			free(t, M_TEMP);
+		}
+	}
+	PROC_LOCK(p);
+	return (error);
+}
+
+static int
+trace_ctl(struct thread *td, struct proc *p, int state)
+{
+
+	PROC_LOCK_ASSERT(p, MA_OWNED);
+
+	/*
+	 * Ktrace changes p_traceflag from or to zero under the
+	 * process lock, so the test does not need to acquire ktrace
+	 * mutex.
+	 */
+	if ((p->p_flag & P_TRACED) != 0 || p->p_traceflag != 0)
+		return (EBUSY);
+
+	switch (state) {
+	case PROC_TRACE_CTL_ENABLE:
+		if (td->td_proc != p)
+			return (EPERM);
+		p->p_flag2 &= ~(P2_NOTRACE | P2_NOTRACE_EXEC);
+		break;
+	case PROC_TRACE_CTL_DISABLE_EXEC:
+		p->p_flag2 |= P2_NOTRACE_EXEC | P2_NOTRACE;
+		break;
+	case PROC_TRACE_CTL_DISABLE:
+		if ((p->p_flag2 & P2_NOTRACE_EXEC) != 0) {
+			KASSERT((p->p_flag2 & P2_NOTRACE) != 0,
+			    ("dandling P2_NOTRACE_EXEC"));
+			if (td->td_proc != p)
+				return (EPERM);
+			p->p_flag2 &= ~P2_NOTRACE_EXEC;
+		} else {
+			p->p_flag2 |= P2_NOTRACE;
+		}
+		break;
+	default:
+		return (EINVAL);
+	}
+	return (0);
+}
+
+static int
+trace_status(struct thread *td, struct proc *p, int *data)
+{
+
+	if ((p->p_flag2 & P2_NOTRACE) != 0) {
+		KASSERT((p->p_flag & P_TRACED) == 0,
+		    ("%d traced but tracing disabled", p->p_pid));
+		*data = -1;
+	} else if ((p->p_flag & P_TRACED) != 0) {
+		*data = p->p_pptr->p_pid;
+	} else {
+		*data = 0;
+	}
+	return (0);
+}
+
+#ifndef _SYS_SYSPROTO_H_
+struct procctl_args {
+	idtype_t idtype;
+	id_t	id;
+	int	com;
+	void	*data;
+};
+#endif
+/* ARGSUSED */
+int
+sys_procctl(struct thread *td, struct procctl_args *uap)
+{
+	void *data;
+	union {
+		struct procctl_reaper_status rs;
+		struct procctl_reaper_pids rp;
+		struct procctl_reaper_kill rk;
+	} x;
+	int error, error1, flags;
+
+	switch (uap->com) {
+	case PROC_SPROTECT:
+	case PROC_TRACE_CTL:
+		error = copyin(uap->data, &flags, sizeof(flags));
+		if (error != 0)
+			return (error);
+		data = &flags;
+		break;
+	case PROC_REAP_ACQUIRE:
+	case PROC_REAP_RELEASE:
+		if (uap->data != NULL)
+			return (EINVAL);
+		data = NULL;
+		break;
+	case PROC_REAP_STATUS:
+		data = &x.rs;
+		break;
+	case PROC_REAP_GETPIDS:
+		error = copyin(uap->data, &x.rp, sizeof(x.rp));
+		if (error != 0)
+			return (error);
+		data = &x.rp;
+		break;
+	case PROC_REAP_KILL:
+		error = copyin(uap->data, &x.rk, sizeof(x.rk));
+		if (error != 0)
+			return (error);
+		data = &x.rk;
+		break;
+	case PROC_TRACE_STATUS:
+		data = &flags;
+		break;
+	default:
+		return (EINVAL);
+	}
+	error = kern_procctl(td, uap->idtype, uap->id, uap->com, data);
+	switch (uap->com) {
+	case PROC_REAP_STATUS:
+		if (error == 0)
+			error = copyout(&x.rs, uap->data, sizeof(x.rs));
+		break;
+	case PROC_REAP_KILL:
+		error1 = copyout(&x.rk, uap->data, sizeof(x.rk));
+		if (error == 0)
+			error = error1;
+		break;
+	case PROC_TRACE_STATUS:
+		if (error == 0)
+			error = copyout(&flags, uap->data, sizeof(flags));
+		break;
+	}
+	return (error);
+}
+
+static int
+kern_procctl_single(struct thread *td, struct proc *p, int com, void *data)
+{
+
+	PROC_LOCK_ASSERT(p, MA_OWNED);
+	switch (com) {
+	case PROC_SPROTECT:
+		return (protect_set(td, p, *(int *)data));
+	case PROC_REAP_ACQUIRE:
+		return (reap_acquire(td, p));
+	case PROC_REAP_RELEASE:
+		return (reap_release(td, p));
+	case PROC_REAP_STATUS:
+		return (reap_status(td, p, data));
+	case PROC_REAP_GETPIDS:
+		return (reap_getpids(td, p, data));
+	case PROC_REAP_KILL:
+		return (reap_kill(td, p, data));
+	case PROC_TRACE_CTL:
+		return (trace_ctl(td, p, *(int *)data));
+	case PROC_TRACE_STATUS:
+		return (trace_status(td, p, data));
+	default:
+		return (EINVAL);
+	}
+}
+
+int
+kern_procctl(struct thread *td, idtype_t idtype, id_t id, int com, void *data)
+{
+	struct pgrp *pg;
+	struct proc *p;
+	int error, first_error, ok;
+	bool tree_locked;
+
+	switch (com) {
+	case PROC_REAP_ACQUIRE:
+	case PROC_REAP_RELEASE:
+	case PROC_REAP_STATUS:
+	case PROC_REAP_GETPIDS:
+	case PROC_REAP_KILL:
+	case PROC_TRACE_STATUS:
+		if (idtype != P_PID)
+			return (EINVAL);
+	}
+
+	switch (com) {
+	case PROC_SPROTECT:
+	case PROC_REAP_STATUS:
+	case PROC_REAP_GETPIDS:
+	case PROC_REAP_KILL:
+	case PROC_TRACE_CTL:
+		sx_slock(&proctree_lock);
+		tree_locked = true;
+		break;
+	case PROC_REAP_ACQUIRE:
+	case PROC_REAP_RELEASE:
+		sx_xlock(&proctree_lock);
+		tree_locked = true;
+		break;
+	case PROC_TRACE_STATUS:
+		tree_locked = false;
+		break;
+	default:
+		return (EINVAL);
+	}
+
+	switch (idtype) {
+	case P_PID:
+		p = pfind(id);
+		if (p == NULL) {
+			error = ESRCH;
+			break;
+		}
+		error = p_cansee(td, p);
+		if (error == 0)
+			error = kern_procctl_single(td, p, com, data);
+		PROC_UNLOCK(p);
+		break;
+	case P_PGID:
+		/*
+		 * Attempt to apply the operation to all members of the
+		 * group.  Ignore processes in the group that can't be
+		 * seen.  Ignore errors so long as at least one process is
+		 * able to complete the request successfully.
+		 */
+		pg = pgfind(id);
+		if (pg == NULL) {
+			error = ESRCH;
+			break;
+		}
+		PGRP_UNLOCK(pg);
+		ok = 0;
+		first_error = 0;
+		LIST_FOREACH(p, &pg->pg_members, p_pglist) {
+			PROC_LOCK(p);
+			if (p->p_state == PRS_NEW || p_cansee(td, p) != 0) {
+				PROC_UNLOCK(p);
+				continue;
+			}
+			error = kern_procctl_single(td, p, com, data);
+			PROC_UNLOCK(p);
+			if (error == 0)
+				ok = 1;
+			else if (first_error == 0)
+				first_error = error;
+		}
+		if (ok)
+			error = 0;
+		else if (first_error != 0)
+			error = first_error;
+		else
+			/*
+			 * Was not able to see any processes in the
+			 * process group.
+			 */
+			error = ESRCH;
+		break;
+	default:
+		error = EINVAL;
+		break;
+	}
+	if (tree_locked)
+		sx_unlock(&proctree_lock);
+	return (error);
+}


Property changes on: trunk/sys/kern/kern_procctl.c
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property


More information about the Midnightbsd-cvs mailing list