[Midnightbsd-cvs] src [9955] trunk/sys/kern: sync with freebsd

laffer1 at midnightbsd.org laffer1 at midnightbsd.org
Sat May 26 10:26:34 EDT 2018


Revision: 9955
          http://svnweb.midnightbsd.org/src/?rev=9955
Author:   laffer1
Date:     2018-05-26 10:26:33 -0400 (Sat, 26 May 2018)
Log Message:
-----------
 sync with freebsd

Modified Paths:
--------------
    trunk/sys/kern/sysv_ipc.c
    trunk/sys/kern/sysv_msg.c
    trunk/sys/kern/sysv_sem.c
    trunk/sys/kern/sysv_shm.c

Modified: trunk/sys/kern/sysv_ipc.c
===================================================================
--- trunk/sys/kern/sysv_ipc.c	2018-05-26 14:25:55 UTC (rev 9954)
+++ trunk/sys/kern/sysv_ipc.c	2018-05-26 14:26:33 UTC (rev 9955)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*	$NetBSD: sysv_ipc.c,v 1.7 1994/06/29 06:33:11 cgd Exp $	*/
 /*-
  * Copyright (c) 1994 Herb Peyerl <hpeyerl at novatel.ca>
@@ -34,7 +35,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$MidnightBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/kern/sysv_ipc.c 205322 2010-03-19 11:01:51Z kib $");
 
 #include "opt_compat.h"
 #include "opt_sysvipc.h"

Modified: trunk/sys/kern/sysv_msg.c
===================================================================
--- trunk/sys/kern/sysv_msg.c	2018-05-26 14:25:55 UTC (rev 9954)
+++ trunk/sys/kern/sysv_msg.c	2018-05-26 14:26:33 UTC (rev 9955)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * Implementation of SVID messages
  *
@@ -48,7 +49,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$MidnightBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/kern/sysv_msg.c 329741 2018-02-21 18:32:57Z brooks $");
 
 #include "opt_compat.h"
 #include "opt_sysvipc.h"
@@ -62,8 +63,10 @@
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/module.h>
+#include <sys/mount.h>
 #include <sys/msg.h>
 #include <sys/racct.h>
+#include <sys/sx.h>
 #include <sys/syscall.h>
 #include <sys/syscallsubr.h>
 #include <sys/sysent.h>
@@ -80,6 +83,14 @@
 static int msginit(void);
 static int msgunload(void);
 static int sysvmsg_modload(struct module *, int, void *);
+static void msq_remove(struct msqid_kernel *);
+static struct prison *msg_find_prison(struct ucred *);
+static int msq_prison_cansee(struct prison *, struct msqid_kernel *);
+static int msg_prison_check(void *, void *);
+static int msg_prison_set(void *, void *);
+static int msg_prison_get(void *, void *);
+static int msg_prison_remove(void *, void *);
+static void msg_prison_cleanup(struct prison *);
 
 
 #ifdef MSG_DEBUG
@@ -155,6 +166,7 @@
 static struct msg *msghdrs;	/* MSGTQL msg headers */
 static struct msqid_kernel *msqids;	/* MSGMNI msqid_kernel struct's */
 static struct mtx msq_mtx;	/* global mutex for message queues. */
+static unsigned msg_prison_slot;/* prison OSD slot */
 
 static struct syscall_helper_data msg_syscalls[] = {
 	SYSCALL_INIT_HELPER(msgctl),
@@ -194,7 +206,15 @@
 static int
 msginit()
 {
+	struct prison *pr;
+	void *rsv;
 	int i, error;
+	osd_method_t methods[PR_MAXMETHOD] = {
+	    [PR_METHOD_CHECK] =		msg_prison_check,
+	    [PR_METHOD_SET] =		msg_prison_set,
+	    [PR_METHOD_GET] =		msg_prison_get,
+	    [PR_METHOD_REMOVE] =	msg_prison_remove,
+	};
 
 	TUNABLE_INT_FETCH("kern.ipc.msgseg", &msginfo.msgseg);
 	TUNABLE_INT_FETCH("kern.ipc.msgssz", &msginfo.msgssz);
@@ -258,6 +278,29 @@
 	}
 	mtx_init(&msq_mtx, "msq", NULL, MTX_DEF);
 
+	/* Set current prisons according to their allow.sysvipc. */
+	msg_prison_slot = osd_jail_register(NULL, methods);
+	rsv = osd_reserve(msg_prison_slot);
+	prison_lock(&prison0);
+	(void)osd_jail_set_reserved(&prison0, msg_prison_slot, rsv, &prison0);
+	prison_unlock(&prison0);
+	rsv = NULL;
+	sx_slock(&allprison_lock);
+	TAILQ_FOREACH(pr, &allprison, pr_list) {
+		if (rsv == NULL)
+			rsv = osd_reserve(msg_prison_slot);
+		prison_lock(pr);
+		if ((pr->pr_allow & PR_ALLOW_SYSVIPC) && pr->pr_ref > 0) {
+			(void)osd_jail_set_reserved(pr, msg_prison_slot, rsv,
+			    &prison0);
+			rsv = NULL;
+		}
+		prison_unlock(pr);
+	}
+	if (rsv != NULL)
+		osd_free_reserved(rsv);
+	sx_sunlock(&allprison_lock);
+
 	error = syscall_helper_register(msg_syscalls);
 	if (error != 0)
 		return (error);
@@ -284,12 +327,6 @@
 #endif
 
 	for (msqid = 0; msqid < msginfo.msgmni; msqid++) {
-		/*
-		 * Look for an unallocated and unlocked msqid_ds.
-		 * msqid_ds's can be locked by msgsnd or msgrcv while
-		 * they are copying the message in/out.  We can't
-		 * re-use the entry until they release it.
-		 */
 		msqkptr = &msqids[msqid];
 		if (msqkptr->u.msg_qbytes != 0 ||
 		    (msqkptr->u.msg_perm.mode & MSG_LOCKED) != 0)
@@ -298,6 +335,8 @@
 	if (msqid != msginfo.msgmni)
 		return (EBUSY);
 
+	if (msg_prison_slot != 0)
+		osd_jail_deregister(msg_prison_slot);
 #ifdef MAC
 	for (i = 0; i < msginfo.msgtql; i++)
 		mac_sysvmsg_destroy(&msghdrs[i]);
@@ -372,6 +411,67 @@
 #endif
 }
 
+static void
+msq_remove(struct msqid_kernel *msqkptr)
+{
+	struct msg *msghdr;
+
+	racct_sub_cred(msqkptr->cred, RACCT_NMSGQ, 1);
+	racct_sub_cred(msqkptr->cred, RACCT_MSGQQUEUED, msqkptr->u.msg_qnum);
+	racct_sub_cred(msqkptr->cred, RACCT_MSGQSIZE, msqkptr->u.msg_cbytes);
+	crfree(msqkptr->cred);
+	msqkptr->cred = NULL;
+
+	/* Free the message headers */
+	msghdr = msqkptr->u.msg_first;
+	while (msghdr != NULL) {
+		struct msg *msghdr_tmp;
+
+		/* Free the segments of each message */
+		msqkptr->u.msg_cbytes -= msghdr->msg_ts;
+		msqkptr->u.msg_qnum--;
+		msghdr_tmp = msghdr;
+		msghdr = msghdr->msg_next;
+		msg_freehdr(msghdr_tmp);
+	}
+
+	if (msqkptr->u.msg_cbytes != 0)
+		panic("msg_cbytes is screwed up");
+	if (msqkptr->u.msg_qnum != 0)
+		panic("msg_qnum is screwed up");
+
+	msqkptr->u.msg_qbytes = 0;	/* Mark it as free */
+
+#ifdef MAC
+	mac_sysvmsq_cleanup(msqkptr);
+#endif
+
+	wakeup(msqkptr);
+}
+
+static struct prison *
+msg_find_prison(struct ucred *cred)
+{
+	struct prison *pr, *rpr;
+
+	pr = cred->cr_prison;
+	prison_lock(pr);
+	rpr = osd_jail_get(pr, msg_prison_slot);
+	prison_unlock(pr);
+	return rpr;
+}
+
+static int
+msq_prison_cansee(struct prison *rpr, struct msqid_kernel *msqkptr)
+{
+
+	if (msqkptr->cred == NULL ||
+	    !(rpr == msqkptr->cred->cr_prison ||
+	      prison_ischild(rpr, msqkptr->cred->cr_prison)))
+		return (EINVAL);
+	return (0);
+}
+
 #ifndef _SYS_SYSPROTO_H_
 struct msgctl_args {
 	int	msqid;
@@ -408,8 +508,10 @@
 {
 	int rval, error, msqix;
 	register struct msqid_kernel *msqkptr;
+	struct prison *rpr;
 
-	if (!prison_allow(td->td_ucred, PR_ALLOW_SYSVIPC))
+	rpr = msg_find_prison(td->td_ucred);
+	if (rpr == NULL)
 		return (ENOSYS);
 
 	msqix = IPCID_TO_IX(msqid);
@@ -433,6 +535,13 @@
 		error = EINVAL;
 		goto done2;
 	}
+
+	error = msq_prison_cansee(rpr, msqkptr);
+	if (error != 0) {
+		DPRINTF(("requester can't see prison\n"));
+		goto done2;
+	}
+
 #ifdef MAC
 	error = mac_sysvmsq_check_msqctl(td->td_ucred, msqkptr, cmd);
 	if (error != 0)
@@ -446,7 +555,9 @@
 
 	case IPC_RMID:
 	{
+#ifdef MAC
 		struct msg *msghdr;
+#endif
 		if ((error = ipcperm(td, &msqkptr->u.msg_perm, IPC_M)))
 			goto done2;
 
@@ -468,37 +579,7 @@
 		}
 #endif
 
-		racct_sub_cred(msqkptr->cred, RACCT_NMSGQ, 1);
-		racct_sub_cred(msqkptr->cred, RACCT_MSGQQUEUED, msqkptr->u.msg_qnum);
-		racct_sub_cred(msqkptr->cred, RACCT_MSGQSIZE, msqkptr->u.msg_cbytes);
-		crfree(msqkptr->cred);
-		msqkptr->cred = NULL;
-
-		/* Free the message headers */
-		msghdr = msqkptr->u.msg_first;
-		while (msghdr != NULL) {
-			struct msg *msghdr_tmp;
-
-			/* Free the segments of each message */
-			msqkptr->u.msg_cbytes -= msghdr->msg_ts;
-			msqkptr->u.msg_qnum--;
-			msghdr_tmp = msghdr;
-			msghdr = msghdr->msg_next;
-			msg_freehdr(msghdr_tmp);
-		}
-
-		if (msqkptr->u.msg_cbytes != 0)
-			panic("msg_cbytes is screwed up");
-		if (msqkptr->u.msg_qnum != 0)
-			panic("msg_qnum is screwed up");
-
-		msqkptr->u.msg_qbytes = 0;	/* Mark it as free */
-
-#ifdef MAC
-		mac_sysvmsq_cleanup(msqkptr);
-#endif
-
-		wakeup(msqkptr);
+		msq_remove(msqkptr);
 	}
 
 		break;
@@ -535,6 +616,8 @@
 			goto done2;
 		}
 		*msqbuf = msqkptr->u;
+		if (td->td_ucred->cr_prison != msqkptr->cred->cr_prison)
+			msqbuf->msg_perm.key = IPC_PRIVATE;
 		break;
 
 	default:
@@ -570,7 +653,7 @@
 
 	DPRINTF(("msgget(0x%x, 0%o)\n", key, msgflg));
 
-	if (!prison_allow(td->td_ucred, PR_ALLOW_SYSVIPC))
+	if (msg_find_prison(cred) == NULL)
 		return (ENOSYS);
 
 	mtx_lock(&msq_mtx);
@@ -578,6 +661,8 @@
 		for (msqid = 0; msqid < msginfo.msgmni; msqid++) {
 			msqkptr = &msqids[msqid];
 			if (msqkptr->u.msg_qbytes != 0 &&
+			    msqkptr->cred != NULL &&
+			    msqkptr->cred->cr_prison == cred->cr_prison &&
 			    msqkptr->u.msg_perm.key == key)
 				break;
 		}
@@ -623,12 +708,14 @@
 			goto done2;
 		}
 #ifdef RACCT
-		PROC_LOCK(td->td_proc);
-		error = racct_add(td->td_proc, RACCT_NMSGQ, 1);
-		PROC_UNLOCK(td->td_proc);
-		if (error != 0) {
-			error = ENOSPC;
-			goto done2;
+		if (racct_enable) {
+			PROC_LOCK(td->td_proc);
+			error = racct_add(td->td_proc, RACCT_NMSGQ, 1);
+			PROC_UNLOCK(td->td_proc);
+			if (error != 0) {
+				error = ENOSPC;
+				goto done2;
+			}
 		}
 #endif
 		DPRINTF(("msqid %d is available\n", msqid));
@@ -688,12 +775,14 @@
 	int msqix, segs_needed, error = 0;
 	register struct msqid_kernel *msqkptr;
 	register struct msg *msghdr;
+	struct prison *rpr;
 	short next;
 #ifdef RACCT
 	size_t saved_msgsz;
 #endif
 
-	if (!prison_allow(td->td_ucred, PR_ALLOW_SYSVIPC))
+	rpr = msg_find_prison(td->td_ucred);
+	if (rpr == NULL)
 		return (ENOSYS);
 
 	mtx_lock(&msq_mtx);
@@ -718,6 +807,11 @@
 		goto done2;
 	}
 
+	if ((error = msq_prison_cansee(rpr, msqkptr))) {
+		DPRINTF(("requester can't see prison\n"));
+		goto done2;
+	}
+
 	if ((error = ipcperm(td, &msqkptr->u.msg_perm, IPC_W))) {
 		DPRINTF(("requester doesn't have write access\n"));
 		goto done2;
@@ -730,20 +824,22 @@
 #endif
 
 #ifdef RACCT
-	PROC_LOCK(td->td_proc);
-	if (racct_add(td->td_proc, RACCT_MSGQQUEUED, 1)) {
+	if (racct_enable) {
+		PROC_LOCK(td->td_proc);
+		if (racct_add(td->td_proc, RACCT_MSGQQUEUED, 1)) {
+			PROC_UNLOCK(td->td_proc);
+			error = EAGAIN;
+			goto done2;
+		}
+		saved_msgsz = msgsz;
+		if (racct_add(td->td_proc, RACCT_MSGQSIZE, msgsz)) {
+			racct_sub(td->td_proc, RACCT_MSGQQUEUED, 1);
+			PROC_UNLOCK(td->td_proc);
+			error = EAGAIN;
+			goto done2;
+		}
 		PROC_UNLOCK(td->td_proc);
-		error = EAGAIN;
-		goto done2;
 	}
-	saved_msgsz = msgsz;
-	if (racct_add(td->td_proc, RACCT_MSGQSIZE, msgsz)) {
-		racct_sub(td->td_proc, RACCT_MSGQQUEUED, 1);
-		PROC_UNLOCK(td->td_proc);
-		error = EAGAIN;
-		goto done2;
-	}
-	PROC_UNLOCK(td->td_proc);
 #endif
 
 	segs_needed = (msgsz + msginfo.msgssz - 1) / msginfo.msgssz;
@@ -1000,7 +1096,7 @@
 	td->td_retval[0] = 0;
 done3:
 #ifdef RACCT
-	if (error != 0) {
+	if (racct_enable && error != 0) {
 		PROC_LOCK(td->td_proc);
 		racct_sub(td->td_proc, RACCT_MSGQQUEUED, 1);
 		racct_sub(td->td_proc, RACCT_MSGQSIZE, saved_msgsz);
@@ -1054,10 +1150,12 @@
 	size_t len;
 	register struct msqid_kernel *msqkptr;
 	register struct msg *msghdr;
+	struct prison *rpr;
 	int msqix, error = 0;
 	short next;
 
-	if (!prison_allow(td->td_ucred, PR_ALLOW_SYSVIPC))
+	rpr = msg_find_prison(td->td_ucred);
+	if (rpr == NULL)
 		return (ENOSYS);
 
 	msqix = IPCID_TO_IX(msqid);
@@ -1081,6 +1179,11 @@
 		goto done2;
 	}
 
+	if ((error = msq_prison_cansee(rpr, msqkptr))) {
+		DPRINTF(("requester can't see prison\n"));
+		goto done2;
+	}
+
 	if ((error = ipcperm(td, &msqkptr->u.msg_perm, IPC_R))) {
 		DPRINTF(("requester doesn't have read access\n"));
 		goto done2;
@@ -1320,9 +1423,29 @@
 static int
 sysctl_msqids(SYSCTL_HANDLER_ARGS)
 {
+	struct msqid_kernel tmsqk;
+	struct prison *pr, *rpr;
+	int error, i;
 
-	return (SYSCTL_OUT(req, msqids,
-	    sizeof(struct msqid_kernel) * msginfo.msgmni));
+	pr = req->td->td_ucred->cr_prison;
+	rpr = msg_find_prison(req->td->td_ucred);
+	error = 0;
+	for (i = 0; i < msginfo.msgmni; i++) {
+		mtx_lock(&msq_mtx);
+		if (msqids[i].u.msg_qbytes == 0 || rpr == NULL ||
+		    msq_prison_cansee(rpr, &msqids[i]) != 0)
+			bzero(&tmsqk, sizeof(tmsqk));
+		else {
+			tmsqk = msqids[i];
+			if (tmsqk.cred->cr_prison != pr)
+				tmsqk.u.msg_perm.key = IPC_PRIVATE;
+		}
+		mtx_unlock(&msq_mtx);
+		error = SYSCTL_OUT(req, &tmsqk, sizeof(tmsqk));
+		if (error != 0)
+			break;
+	}
+	return (error);
 }
 
 SYSCTL_INT(_kern_ipc, OID_AUTO, msgmax, CTLFLAG_RD, &msginfo.msgmax, 0,
@@ -1337,9 +1460,186 @@
     "Size of a message segment");
 SYSCTL_INT(_kern_ipc, OID_AUTO, msgseg, CTLFLAG_RDTUN, &msginfo.msgseg, 0,
     "Number of message segments");
-SYSCTL_PROC(_kern_ipc, OID_AUTO, msqids, CTLTYPE_OPAQUE | CTLFLAG_RD,
-    NULL, 0, sysctl_msqids, "", "Message queue IDs");
+SYSCTL_PROC(_kern_ipc, OID_AUTO, msqids,
+    CTLTYPE_OPAQUE | CTLFLAG_RD | CTLFLAG_MPSAFE,
+    NULL, 0, sysctl_msqids, "",
+    "Array of struct msqid_kernel for each potential message queue");
 
+static int
+msg_prison_check(void *obj, void *data)
+{
+	struct prison *pr = obj;
+	struct prison *prpr;
+	struct vfsoptlist *opts = data;
+	int error, jsys;
+
+	/*
+	 * sysvmsg is a jailsys integer.
+	 * It must be "disable" if the parent jail is disabled.
+	 */
+	error = vfs_copyopt(opts, "sysvmsg", &jsys, sizeof(jsys));
+	if (error != ENOENT) {
+		if (error != 0)
+			return (error);
+		switch (jsys) {
+		case JAIL_SYS_DISABLE:
+			break;
+		case JAIL_SYS_NEW:
+		case JAIL_SYS_INHERIT:
+			prison_lock(pr->pr_parent);
+			prpr = osd_jail_get(pr->pr_parent, msg_prison_slot);
+			prison_unlock(pr->pr_parent);
+			if (prpr == NULL)
+				return (EPERM);
+			break;
+		default:
+			return (EINVAL);
+		}
+	}
+
+	return (0);
+}
+
+static int
+msg_prison_set(void *obj, void *data)
+{
+	struct prison *pr = obj;
+	struct prison *tpr, *orpr, *nrpr, *trpr;
+	struct vfsoptlist *opts = data;
+	void *rsv;
+	int jsys, descend;
+
+	/*
+	 * sysvmsg controls which jail is the root of the associated msgs (this
+	 * jail or same as the parent), or if the feature is available at all.
+	 */
+	if (vfs_copyopt(opts, "sysvmsg", &jsys, sizeof(jsys)) == ENOENT)
+		jsys = vfs_flagopt(opts, "allow.sysvipc", NULL, 0)
+		    ? JAIL_SYS_INHERIT
+		    : vfs_flagopt(opts, "allow.nosysvipc", NULL, 0)
+		    ? JAIL_SYS_DISABLE
+		    : -1;
+	if (jsys == JAIL_SYS_DISABLE) {
+		prison_lock(pr);
+		orpr = osd_jail_get(pr, msg_prison_slot);
+		if (orpr != NULL)
+			osd_jail_del(pr, msg_prison_slot);
+		prison_unlock(pr);
+		if (orpr != NULL) {
+			if (orpr == pr)
+				msg_prison_cleanup(pr);
+			/* Disable all child jails as well. */
+			FOREACH_PRISON_DESCENDANT(pr, tpr, descend) {
+				prison_lock(tpr);
+				trpr = osd_jail_get(tpr, msg_prison_slot);
+				if (trpr != NULL) {
+					osd_jail_del(tpr, msg_prison_slot);
+					prison_unlock(tpr);
+					if (trpr == tpr)
+						msg_prison_cleanup(tpr);
+				} else {
+					prison_unlock(tpr);
+					descend = 0;
+				}
+			}
+		}
+	} else if (jsys != -1) {
+		if (jsys == JAIL_SYS_NEW)
+			nrpr = pr;
+		else {
+			prison_lock(pr->pr_parent);
+			nrpr = osd_jail_get(pr->pr_parent, msg_prison_slot);
+			prison_unlock(pr->pr_parent);
+		}
+		rsv = osd_reserve(msg_prison_slot);
+		prison_lock(pr);
+		orpr = osd_jail_get(pr, msg_prison_slot);
+		if (orpr != nrpr)
+			(void)osd_jail_set_reserved(pr, msg_prison_slot, rsv,
+			    nrpr);
+		else
+			osd_free_reserved(rsv);
+		prison_unlock(pr);
+		if (orpr != nrpr) {
+			if (orpr == pr)
+				msg_prison_cleanup(pr);
+			if (orpr != NULL) {
+				/* Change child jails matching the old root, */
+				FOREACH_PRISON_DESCENDANT(pr, tpr, descend) {
+					prison_lock(tpr);
+					trpr = osd_jail_get(tpr,
+					    msg_prison_slot);
+					if (trpr == orpr) {
+						(void)osd_jail_set(tpr,
+						    msg_prison_slot, nrpr);
+						prison_unlock(tpr);
+						if (trpr == tpr)
+							msg_prison_cleanup(tpr);
+					} else {
+						prison_unlock(tpr);
+						descend = 0;
+					}
+				}
+			}
+		}
+	}
+
+	return (0);
+}
+
+static int
+msg_prison_get(void *obj, void *data)
+{
+	struct prison *pr = obj;
+	struct prison *rpr;
+	struct vfsoptlist *opts = data;
+	int error, jsys;
+
+	/* Set sysvmsg based on the jail's root prison. */
+	prison_lock(pr);
+	rpr = osd_jail_get(pr, msg_prison_slot);
+	prison_unlock(pr);
+	jsys = rpr == NULL ? JAIL_SYS_DISABLE
+	    : rpr == pr ? JAIL_SYS_NEW : JAIL_SYS_INHERIT;
+	error = vfs_setopt(opts, "sysvmsg", &jsys, sizeof(jsys));
+	if (error == ENOENT)
+		error = 0;
+	return (error);
+}
+
+static int
+msg_prison_remove(void *obj, void *data __unused)
+{
+	struct prison *pr = obj;
+	struct prison *rpr;
+
+	prison_lock(pr);
+	rpr = osd_jail_get(pr, msg_prison_slot);
+	prison_unlock(pr);
+	if (rpr == pr)
+		msg_prison_cleanup(pr);
+	return (0);
+}
+
+static void
+msg_prison_cleanup(struct prison *pr)
+{
+	struct msqid_kernel *msqkptr;
+	int i;
+
+	/* Remove any msqs that belong to this jail. */
+	mtx_lock(&msq_mtx);
+	for (i = 0; i < msginfo.msgmni; i++) {
+		msqkptr = &msqids[i];
+		if (msqkptr->u.msg_qbytes != 0 &&
+		    msqkptr->cred != NULL && msqkptr->cred->cr_prison == pr)
+			msq_remove(msqkptr);
+	}
+	mtx_unlock(&msq_mtx);
+}
+
+SYSCTL_JAIL_PARAM_SYS_NODE(sysvmsg, CTLFLAG_RW, "SYSV message queues");
+
 #ifdef COMPAT_FREEBSD32
 int
 freebsd32_msgsys(struct thread *td, struct freebsd32_msgsys_args *uap)
@@ -1518,8 +1818,6 @@
 {
 	int error;
 
-	if (!prison_allow(td->td_ucred, PR_ALLOW_SYSVIPC))
-		return (ENOSYS);
 	if (uap->which < 0 ||
 	    uap->which >= sizeof(msgcalls)/sizeof(msgcalls[0]))
 		return (EINVAL);

Modified: trunk/sys/kern/sysv_sem.c
===================================================================
--- trunk/sys/kern/sysv_sem.c	2018-05-26 14:25:55 UTC (rev 9954)
+++ trunk/sys/kern/sysv_sem.c	2018-05-26 14:26:33 UTC (rev 9955)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * Implementation of SVID semaphores
  *
@@ -37,7 +38,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$MidnightBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/kern/sysv_sem.c 329741 2018-02-21 18:32:57Z brooks $");
 
 #include "opt_compat.h"
 #include "opt_sysvipc.h"
@@ -53,6 +54,7 @@
 #include <sys/mutex.h>
 #include <sys/racct.h>
 #include <sys/sem.h>
+#include <sys/sx.h>
 #include <sys/syscall.h>
 #include <sys/syscallsubr.h>
 #include <sys/sysent.h>
@@ -78,7 +80,16 @@
 static int semunload(void);
 static void semexit_myhook(void *arg, struct proc *p);
 static int sysctl_sema(SYSCTL_HANDLER_ARGS);
-static int semvalid(int semid, struct semid_kernel *semakptr);
+static int semvalid(int semid, struct prison *rpr,
+    struct semid_kernel *semakptr);
+static void sem_remove(int semidx, struct ucred *cred);
+static struct prison *sem_find_prison(struct ucred *);
+static int sem_prison_cansee(struct prison *, struct semid_kernel *);
+static int sem_prison_check(void *, void *);
+static int sem_prison_set(void *, void *);
+static int sem_prison_get(void *, void *);
+static int sem_prison_remove(void *, void *);
+static void sem_prison_cleanup(struct prison *);
 
 #ifndef _SYS_SYSPROTO_H_
 struct __semctl_args;
@@ -104,6 +115,7 @@
 LIST_HEAD(, sem_undo) semu_free_list;	/* list of free undo structures */
 static int	*semu;		/* undo structure pool */
 static eventhandler_tag semexit_tag;
+static unsigned sem_prison_slot;	/* prison OSD slot */
 
 #define SEMUNDO_MTX		sem_undo_mtx
 #define SEMUNDO_LOCK()		mtx_lock(&SEMUNDO_MTX);
@@ -208,8 +220,10 @@
     "Semaphore maximum value");
 SYSCTL_INT(_kern_ipc, OID_AUTO, semaem, CTLFLAG_RW, &seminfo.semaem, 0,
     "Adjust on exit max value");
-SYSCTL_PROC(_kern_ipc, OID_AUTO, sema, CTLTYPE_OPAQUE | CTLFLAG_RD,
-    NULL, 0, sysctl_sema, "", "Semaphore id pool");
+SYSCTL_PROC(_kern_ipc, OID_AUTO, sema,
+    CTLTYPE_OPAQUE | CTLFLAG_RD | CTLFLAG_MPSAFE,
+    NULL, 0, sysctl_sema, "",
+    "Array of struct semid_kernel for each potential semaphore");
 
 static struct syscall_helper_data sem_syscalls[] = {
 	SYSCALL_INIT_HELPER(__semctl),
@@ -247,7 +261,15 @@
 static int
 seminit(void)
 {
+	struct prison *pr;
+	void *rsv;
 	int i, error;
+	osd_method_t methods[PR_MAXMETHOD] = {
+	    [PR_METHOD_CHECK] =		sem_prison_check,
+	    [PR_METHOD_SET] =		sem_prison_set,
+	    [PR_METHOD_GET] =		sem_prison_get,
+	    [PR_METHOD_REMOVE] =	sem_prison_remove,
+	};
 
 	TUNABLE_INT_FETCH("kern.ipc.semmni", &seminfo.semmni);
 	TUNABLE_INT_FETCH("kern.ipc.semmns", &seminfo.semmns);
@@ -288,6 +310,29 @@
 	semexit_tag = EVENTHANDLER_REGISTER(process_exit, semexit_myhook, NULL,
 	    EVENTHANDLER_PRI_ANY);
 
+	/* Set current prisons according to their allow.sysvipc. */
+	sem_prison_slot = osd_jail_register(NULL, methods);
+	rsv = osd_reserve(sem_prison_slot);
+	prison_lock(&prison0);
+	(void)osd_jail_set_reserved(&prison0, sem_prison_slot, rsv, &prison0);
+	prison_unlock(&prison0);
+	rsv = NULL;
+	sx_slock(&allprison_lock);
+	TAILQ_FOREACH(pr, &allprison, pr_list) {
+		if (rsv == NULL)
+			rsv = osd_reserve(sem_prison_slot);
+		prison_lock(pr);
+		if ((pr->pr_allow & PR_ALLOW_SYSVIPC) && pr->pr_ref > 0) {
+			(void)osd_jail_set_reserved(pr, sem_prison_slot, rsv,
+			    &prison0);
+			rsv = NULL;
+		}
+		prison_unlock(pr);
+	}
+	if (rsv != NULL)
+		osd_free_reserved(rsv);
+	sx_sunlock(&allprison_lock);
+
 	error = syscall_helper_register(sem_syscalls);
 	if (error != 0)
 		return (error);
@@ -313,6 +358,8 @@
 #endif
 	syscall_helper_unregister(sem_syscalls);
 	EVENTHANDLER_DEREGISTER(process_exit, semexit_tag);
+	if (sem_prison_slot != 0)
+		osd_jail_deregister(sem_prison_slot);
 #ifdef MAC
 	for (i = 0; i < seminfo.semmni; i++)
 		mac_sysvsem_destroy(&sema[i]);
@@ -499,13 +546,76 @@
 }
 
 static int
-semvalid(int semid, struct semid_kernel *semakptr)
+semvalid(int semid, struct prison *rpr, struct semid_kernel *semakptr)
 {
 
 	return ((semakptr->u.sem_perm.mode & SEM_ALLOC) == 0 ||
-	    semakptr->u.sem_perm.seq != IPCID_TO_SEQ(semid) ? EINVAL : 0);
+	    semakptr->u.sem_perm.seq != IPCID_TO_SEQ(semid) ||
+	    sem_prison_cansee(rpr, semakptr) ? EINVAL : 0);
 }
 
+static void
+sem_remove(int semidx, struct ucred *cred)
+{
+	struct semid_kernel *semakptr;
+	int i;
+
+	KASSERT(semidx >= 0 && semidx < seminfo.semmni,
+		("semidx out of bounds"));
+	semakptr = &sema[semidx];
+	semakptr->u.sem_perm.cuid = cred ? cred->cr_uid : 0;
+	semakptr->u.sem_perm.uid = cred ? cred->cr_uid : 0;
+	semakptr->u.sem_perm.mode = 0;
+	racct_sub_cred(semakptr->cred, RACCT_NSEM, semakptr->u.sem_nsems);
+	crfree(semakptr->cred);
+	semakptr->cred = NULL;
+	SEMUNDO_LOCK();
+	semundo_clear(semidx, -1);
+	SEMUNDO_UNLOCK();
+#ifdef MAC
+	mac_sysvsem_cleanup(semakptr);
+#endif
+	wakeup(semakptr);
+	for (i = 0; i < seminfo.semmni; i++) {
+		if ((sema[i].u.sem_perm.mode & SEM_ALLOC) &&
+		    sema[i].u.sem_base > semakptr->u.sem_base)
+			mtx_lock_flags(&sema_mtx[i], LOP_DUPOK);
+	}
+	for (i = semakptr->u.sem_base - sem; i < semtot; i++)
+		sem[i] = sem[i + semakptr->u.sem_nsems];
+	for (i = 0; i < seminfo.semmni; i++) {
+		if ((sema[i].u.sem_perm.mode & SEM_ALLOC) &&
+		    sema[i].u.sem_base > semakptr->u.sem_base) {
+			sema[i].u.sem_base -= semakptr->u.sem_nsems;
+			mtx_unlock(&sema_mtx[i]);
+		}
+	}
+	semtot -= semakptr->u.sem_nsems;
+}
+
+static struct prison *
+sem_find_prison(struct ucred *cred)
+{
+	struct prison *pr, *rpr;
+
+	pr = cred->cr_prison;
+	prison_lock(pr);
+	rpr = osd_jail_get(pr, sem_prison_slot);
+	prison_unlock(pr);
+	return rpr;
+}
+
+static int
+sem_prison_cansee(struct prison *rpr, struct semid_kernel *semakptr)
+{
+
+	if (semakptr->cred == NULL ||
+	    !(rpr == semakptr->cred->cr_prison ||
+	      prison_ischild(rpr, semakptr->cred->cr_prison)))
+		return (EINVAL);
+	return (0);
+}
+
 /*
  * Note that the user-mode half of this passes a union, not a pointer.
  */
@@ -582,6 +692,7 @@
 	u_short *array;
 	struct ucred *cred = td->td_ucred;
 	int i, error;
+	struct prison *rpr;
 	struct semid_ds *sbuf;
 	struct semid_kernel *semakptr;
 	struct mtx *sema_mtxp;
@@ -590,7 +701,9 @@
 
 	DPRINTF(("call to semctl(%d, %d, %d, 0x%p)\n",
 	    semid, semnum, cmd, arg));
-	if (!prison_allow(td->td_ucred, PR_ALLOW_SYSVIPC))
+
+	rpr = sem_find_prison(td->td_ucred);
+	if (sem == NULL)
 		return (ENOSYS);
 
 	array = NULL;
@@ -610,6 +723,8 @@
 			error = EINVAL;
 			goto done2;
 		}
+		if ((error = sem_prison_cansee(rpr, semakptr)))
+			goto done2;
 		if ((error = ipcperm(td, &semakptr->u.sem_perm, IPC_R)))
 			goto done2;
 #ifdef MAC
@@ -618,6 +733,8 @@
 			goto done2;
 #endif
 		bcopy(&semakptr->u, arg->buf, sizeof(struct semid_ds));
+		if (cred->cr_prison != semakptr->cred->cr_prison)
+			arg->buf->sem_perm.key = IPC_PRIVATE;
 		*rval = IXSEQ_TO_IPCID(semid, semakptr->u.sem_perm);
 		mtx_unlock(sema_mtxp);
 		return (0);
@@ -632,6 +749,7 @@
 	if (cmd == IPC_RMID)
 		mtx_lock(&sem_mtx);
 	mtx_lock(sema_mtxp);
+
 #ifdef MAC
 	error = mac_sysvsem_check_semctl(cred, semakptr, cmd);
 	if (error != 0)
@@ -643,42 +761,15 @@
 
 	switch (cmd) {
 	case IPC_RMID:
-		if ((error = semvalid(semid, semakptr)) != 0)
+		if ((error = semvalid(semid, rpr, semakptr)) != 0)
 			goto done2;
 		if ((error = ipcperm(td, &semakptr->u.sem_perm, IPC_M)))
 			goto done2;
-		semakptr->u.sem_perm.cuid = cred->cr_uid;
-		semakptr->u.sem_perm.uid = cred->cr_uid;
-		semakptr->u.sem_perm.mode = 0;
-		racct_sub_cred(semakptr->cred, RACCT_NSEM, semakptr->u.sem_nsems);
-		crfree(semakptr->cred);
-		semakptr->cred = NULL;
-		SEMUNDO_LOCK();
-		semundo_clear(semidx, -1);
-		SEMUNDO_UNLOCK();
-#ifdef MAC
-		mac_sysvsem_cleanup(semakptr);
-#endif
-		wakeup(semakptr);
-		for (i = 0; i < seminfo.semmni; i++) {
-			if ((sema[i].u.sem_perm.mode & SEM_ALLOC) &&
-			    sema[i].u.sem_base > semakptr->u.sem_base)
-				mtx_lock_flags(&sema_mtx[i], LOP_DUPOK);
-		}
-		for (i = semakptr->u.sem_base - sem; i < semtot; i++)
-			sem[i] = sem[i + semakptr->u.sem_nsems];
-		for (i = 0; i < seminfo.semmni; i++) {
-			if ((sema[i].u.sem_perm.mode & SEM_ALLOC) &&
-			    sema[i].u.sem_base > semakptr->u.sem_base) {
-				sema[i].u.sem_base -= semakptr->u.sem_nsems;
-				mtx_unlock(&sema_mtx[i]);
-			}
-		}
-		semtot -= semakptr->u.sem_nsems;
+		sem_remove(semidx, cred);
 		break;
 
 	case IPC_SET:
-		if ((error = semvalid(semid, semakptr)) != 0)
+		if ((error = semvalid(semid, rpr, semakptr)) != 0)
 			goto done2;
 		if ((error = ipcperm(td, &semakptr->u.sem_perm, IPC_M)))
 			goto done2;
@@ -691,15 +782,17 @@
 		break;
 
 	case IPC_STAT:
-		if ((error = semvalid(semid, semakptr)) != 0)
+		if ((error = semvalid(semid, rpr, semakptr)) != 0)
 			goto done2;
 		if ((error = ipcperm(td, &semakptr->u.sem_perm, IPC_R)))
 			goto done2;
 		bcopy(&semakptr->u, arg->buf, sizeof(struct semid_ds));
+		if (cred->cr_prison != semakptr->cred->cr_prison)
+			arg->buf->sem_perm.key = IPC_PRIVATE;
 		break;
 
 	case GETNCNT:
-		if ((error = semvalid(semid, semakptr)) != 0)
+		if ((error = semvalid(semid, rpr, semakptr)) != 0)
 			goto done2;
 		if ((error = ipcperm(td, &semakptr->u.sem_perm, IPC_R)))
 			goto done2;
@@ -711,7 +804,7 @@
 		break;
 
 	case GETPID:
-		if ((error = semvalid(semid, semakptr)) != 0)
+		if ((error = semvalid(semid, rpr, semakptr)) != 0)
 			goto done2;
 		if ((error = ipcperm(td, &semakptr->u.sem_perm, IPC_R)))
 			goto done2;
@@ -723,7 +816,7 @@
 		break;
 
 	case GETVAL:
-		if ((error = semvalid(semid, semakptr)) != 0)
+		if ((error = semvalid(semid, rpr, semakptr)) != 0)
 			goto done2;
 		if ((error = ipcperm(td, &semakptr->u.sem_perm, IPC_R)))
 			goto done2;
@@ -759,7 +852,7 @@
 		mtx_unlock(sema_mtxp);		    
 		array = malloc(sizeof(*array) * count, M_TEMP, M_WAITOK);
 		mtx_lock(sema_mtxp);
-		if ((error = semvalid(semid, semakptr)) != 0)
+		if ((error = semvalid(semid, rpr, semakptr)) != 0)
 			goto done2;
 		KASSERT(count == semakptr->u.sem_nsems, ("nsems changed"));
 		if ((error = ipcperm(td, &semakptr->u.sem_perm, IPC_R)))
@@ -772,7 +865,7 @@
 		break;
 
 	case GETZCNT:
-		if ((error = semvalid(semid, semakptr)) != 0)
+		if ((error = semvalid(semid, rpr, semakptr)) != 0)
 			goto done2;
 		if ((error = ipcperm(td, &semakptr->u.sem_perm, IPC_R)))
 			goto done2;
@@ -784,7 +877,7 @@
 		break;
 
 	case SETVAL:
-		if ((error = semvalid(semid, semakptr)) != 0)
+		if ((error = semvalid(semid, rpr, semakptr)) != 0)
 			goto done2;
 		if ((error = ipcperm(td, &semakptr->u.sem_perm, IPC_W)))
 			goto done2;
@@ -815,7 +908,7 @@
 		mtx_lock(sema_mtxp);
 		if (error)
 			break;
-		if ((error = semvalid(semid, semakptr)) != 0)
+		if ((error = semvalid(semid, rpr, semakptr)) != 0)
 			goto done2;
 		KASSERT(count == semakptr->u.sem_nsems, ("nsems changed"));
 		if ((error = ipcperm(td, &semakptr->u.sem_perm, IPC_W)))
@@ -865,7 +958,8 @@
 	struct ucred *cred = td->td_ucred;
 
 	DPRINTF(("semget(0x%x, %d, 0%o)\n", key, nsems, semflg));
-	if (!prison_allow(td->td_ucred, PR_ALLOW_SYSVIPC))
+
+	if (sem_find_prison(cred) == NULL)
 		return (ENOSYS);
 
 	mtx_lock(&sem_mtx);
@@ -872,11 +966,18 @@
 	if (key != IPC_PRIVATE) {
 		for (semid = 0; semid < seminfo.semmni; semid++) {
 			if ((sema[semid].u.sem_perm.mode & SEM_ALLOC) &&
+			    sema[semid].cred != NULL &&
+			    sema[semid].cred->cr_prison == cred->cr_prison &&
 			    sema[semid].u.sem_perm.key == key)
 				break;
 		}
 		if (semid < seminfo.semmni) {
 			DPRINTF(("found public key\n"));
+			if ((semflg & IPC_CREAT) && (semflg & IPC_EXCL)) {
+				DPRINTF(("not exclusive\n"));
+				error = EEXIST;
+				goto done2;
+			}
 			if ((error = ipcperm(td, &sema[semid].u.sem_perm,
 			    semflg & 0700))) {
 				goto done2;
@@ -886,11 +987,6 @@
 				error = EINVAL;
 				goto done2;
 			}
-			if ((semflg & IPC_CREAT) && (semflg & IPC_EXCL)) {
-				DPRINTF(("not exclusive\n"));
-				error = EEXIST;
-				goto done2;
-			}
 #ifdef MAC
 			error = mac_sysvsem_check_semget(cred, &sema[semid]);
 			if (error != 0)
@@ -925,12 +1021,14 @@
 			goto done2;
 		}
 #ifdef RACCT
-		PROC_LOCK(td->td_proc);
-		error = racct_add(td->td_proc, RACCT_NSEM, nsems);
-		PROC_UNLOCK(td->td_proc);
-		if (error != 0) {
-			error = ENOSPC;
-			goto done2;
+		if (racct_enable) {
+			PROC_LOCK(td->td_proc);
+			error = racct_add(td->td_proc, RACCT_NSEM, nsems);
+			PROC_UNLOCK(td->td_proc);
+			if (error != 0) {
+				error = ENOSPC;
+				goto done2;
+			}
 		}
 #endif
 		DPRINTF(("semid %d is available\n", semid));
@@ -986,6 +1084,7 @@
 	struct sembuf small_sops[SMALL_SOPS];
 	int semid = uap->semid;
 	size_t nsops = uap->nsops;
+	struct prison *rpr;
 	struct sembuf *sops;
 	struct semid_kernel *semakptr;
 	struct sembuf *sopptr = 0;
@@ -1002,7 +1101,8 @@
 #endif
 	DPRINTF(("call to semop(%d, %p, %u)\n", semid, sops, nsops));
 
-	if (!prison_allow(td->td_ucred, PR_ALLOW_SYSVIPC))
+	rpr = sem_find_prison(td->td_ucred);
+	if (sem == NULL)
 		return (ENOSYS);
 
 	semid = IPCID_TO_IX(semid);	/* Convert back to zero origin */
@@ -1019,12 +1119,15 @@
 		return (E2BIG);
 	} else {
 #ifdef RACCT
-		PROC_LOCK(td->td_proc);
-		if (nsops > racct_get_available(td->td_proc, RACCT_NSEMOP)) {
+		if (racct_enable) {
+			PROC_LOCK(td->td_proc);
+			if (nsops >
+			    racct_get_available(td->td_proc, RACCT_NSEMOP)) {
+				PROC_UNLOCK(td->td_proc);
+				return (E2BIG);
+			}
 			PROC_UNLOCK(td->td_proc);
-			return (E2BIG);
 		}
-		PROC_UNLOCK(td->td_proc);
 #endif
 
 		sops = malloc(nsops * sizeof(*sops), M_TEMP, M_WAITOK);
@@ -1049,8 +1152,10 @@
 		error = EINVAL;
 		goto done2;
 	}
+	if ((error = sem_prison_cansee(rpr, semakptr)) != 0)
+		goto done2;
 	/*
-	 * Initial pass thru sops to see what permissions are needed.
+	 * Initial pass through sops to see what permissions are needed.
 	 * Also perform any checks that don't need repeating on each
 	 * attempt to satisfy the request vector.
 	 */
@@ -1372,11 +1477,207 @@
 static int
 sysctl_sema(SYSCTL_HANDLER_ARGS)
 {
+	struct prison *pr, *rpr;
+	struct semid_kernel tsemak;
+	int error, i;
 
-	return (SYSCTL_OUT(req, sema,
-	    sizeof(struct semid_kernel) * seminfo.semmni));
+	pr = req->td->td_ucred->cr_prison;
+	rpr = sem_find_prison(req->td->td_ucred);
+	error = 0;
+	for (i = 0; i < seminfo.semmni; i++) {
+		mtx_lock(&sema_mtx[i]);
+		if ((sema[i].u.sem_perm.mode & SEM_ALLOC) == 0 ||
+		    rpr == NULL || sem_prison_cansee(rpr, &sema[i]) != 0)
+			bzero(&tsemak, sizeof(tsemak));
+		else {
+			tsemak = sema[i];
+			if (tsemak.cred->cr_prison != pr)
+				tsemak.u.sem_perm.key = IPC_PRIVATE;
+		}
+		mtx_unlock(&sema_mtx[i]);
+		error = SYSCTL_OUT(req, &tsemak, sizeof(tsemak));
+		if (error != 0)
+			break;
+	}
+	return (error);
 }
 
+static int
+sem_prison_check(void *obj, void *data)
+{
+	struct prison *pr = obj;
+	struct prison *prpr;
+	struct vfsoptlist *opts = data;
+	int error, jsys;
+
+	/*
+	 * sysvsem is a jailsys integer.
+	 * It must be "disable" if the parent jail is disabled.
+	 */
+	error = vfs_copyopt(opts, "sysvsem", &jsys, sizeof(jsys));
+	if (error != ENOENT) {
+		if (error != 0)
+			return (error);
+		switch (jsys) {
+		case JAIL_SYS_DISABLE:
+			break;
+		case JAIL_SYS_NEW:
+		case JAIL_SYS_INHERIT:
+			prison_lock(pr->pr_parent);
+			prpr = osd_jail_get(pr->pr_parent, sem_prison_slot);
+			prison_unlock(pr->pr_parent);
+			if (prpr == NULL)
+				return (EPERM);
+			break;
+		default:
+			return (EINVAL);
+		}
+	}
+
+	return (0);
+}
+
+static int
+sem_prison_set(void *obj, void *data)
+{
+	struct prison *pr = obj;
+	struct prison *tpr, *orpr, *nrpr, *trpr;
+	struct vfsoptlist *opts = data;
+	void *rsv;
+	int jsys, descend;
+
+	/*
+	 * sysvsem controls which jail is the root of the associated sems (this
+	 * jail or same as the parent), or if the feature is available at all.
+	 */
+	if (vfs_copyopt(opts, "sysvsem", &jsys, sizeof(jsys)) == ENOENT)
+		jsys = vfs_flagopt(opts, "allow.sysvipc", NULL, 0)
+		    ? JAIL_SYS_INHERIT
+		    : vfs_flagopt(opts, "allow.nosysvipc", NULL, 0)
+		    ? JAIL_SYS_DISABLE
+		    : -1;
+	if (jsys == JAIL_SYS_DISABLE) {
+		prison_lock(pr);
+		orpr = osd_jail_get(pr, sem_prison_slot);
+		if (orpr != NULL)
+			osd_jail_del(pr, sem_prison_slot);
+		prison_unlock(pr);
+		if (orpr != NULL) {
+			if (orpr == pr)
+				sem_prison_cleanup(pr);
+			/* Disable all child jails as well. */
+			FOREACH_PRISON_DESCENDANT(pr, tpr, descend) {
+				prison_lock(tpr);
+				trpr = osd_jail_get(tpr, sem_prison_slot);
+				if (trpr != NULL) {
+					osd_jail_del(tpr, sem_prison_slot);
+					prison_unlock(tpr);
+					if (trpr == tpr)
+						sem_prison_cleanup(tpr);
+				} else {
+					prison_unlock(tpr);
+					descend = 0;
+				}
+			}
+		}
+	} else if (jsys != -1) {
+		if (jsys == JAIL_SYS_NEW)
+			nrpr = pr;
+		else {
+			prison_lock(pr->pr_parent);
+			nrpr = osd_jail_get(pr->pr_parent, sem_prison_slot);
+			prison_unlock(pr->pr_parent);
+		}
+		rsv = osd_reserve(sem_prison_slot);
+		prison_lock(pr);
+		orpr = osd_jail_get(pr, sem_prison_slot);
+		if (orpr != nrpr)
+			(void)osd_jail_set_reserved(pr, sem_prison_slot, rsv,
+			    nrpr);
+		else
+			osd_free_reserved(rsv);
+		prison_unlock(pr);
+		if (orpr != nrpr) {
+			if (orpr == pr)
+				sem_prison_cleanup(pr);
+			if (orpr != NULL) {
+				/* Change child jails matching the old root, */
+				FOREACH_PRISON_DESCENDANT(pr, tpr, descend) {
+					prison_lock(tpr);
+					trpr = osd_jail_get(tpr,
+					    sem_prison_slot);
+					if (trpr == orpr) {
+						(void)osd_jail_set(tpr,
+						    sem_prison_slot, nrpr);
+						prison_unlock(tpr);
+						if (trpr == tpr)
+							sem_prison_cleanup(tpr);
+					} else {
+						prison_unlock(tpr);
+						descend = 0;
+					}
+				}
+			}
+		}
+	}
+
+	return (0);
+}
+
+static int
+sem_prison_get(void *obj, void *data)
+{
+	struct prison *pr = obj;
+	struct prison *rpr;
+	struct vfsoptlist *opts = data;
+	int error, jsys;
+
+	/* Set sysvsem based on the jail's root prison. */
+	prison_lock(pr);
+	rpr = osd_jail_get(pr, sem_prison_slot);
+	prison_unlock(pr);
+	jsys = rpr == NULL ? JAIL_SYS_DISABLE
+	    : rpr == pr ? JAIL_SYS_NEW : JAIL_SYS_INHERIT;
+	error = vfs_setopt(opts, "sysvsem", &jsys, sizeof(jsys));
+	if (error == ENOENT)
+		error = 0;
+	return (error);
+}
+
+static int
+sem_prison_remove(void *obj, void *data __unused)
+{
+	struct prison *pr = obj;
+	struct prison *rpr;
+
+	prison_lock(pr);
+	rpr = osd_jail_get(pr, sem_prison_slot);
+	prison_unlock(pr);
+	if (rpr == pr)
+		sem_prison_cleanup(pr);
+	return (0);
+}
+
+static void
+sem_prison_cleanup(struct prison *pr)
+{
+	int i;
+
+	/* Remove any sems that belong to this jail. */
+	mtx_lock(&sem_mtx);
+	for (i = 0; i < seminfo.semmni; i++) {
+		if ((sema[i].u.sem_perm.mode & SEM_ALLOC) &&
+		    sema[i].cred != NULL && sema[i].cred->cr_prison == pr) {
+			mtx_lock(&sema_mtx[i]);
+			sem_remove(i, NULL);
+			mtx_unlock(&sema_mtx[i]);
+		}
+	}
+	mtx_unlock(&sem_mtx);
+}
+
+SYSCTL_JAIL_PARAM_SYS_NODE(sysvsem, CTLFLAG_RW, "SYSV semaphores");
+
 #if defined(COMPAT_FREEBSD4) || defined(COMPAT_FREEBSD5) || \
     defined(COMPAT_FREEBSD6) || defined(COMPAT_FREEBSD7)
 
@@ -1403,8 +1704,6 @@
 {
 	int error;
 
-	if (!prison_allow(td->td_ucred, PR_ALLOW_SYSVIPC))
-		return (ENOSYS);
 	if (uap->which < 0 ||
 	    uap->which >= sizeof(semcalls)/sizeof(semcalls[0]))
 		return (EINVAL);

Modified: trunk/sys/kern/sysv_shm.c
===================================================================
--- trunk/sys/kern/sysv_shm.c	2018-05-26 14:25:55 UTC (rev 9954)
+++ trunk/sys/kern/sysv_shm.c	2018-05-26 14:26:33 UTC (rev 9955)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*	$NetBSD: sysv_shm.c,v 1.23 1994/07/04 23:25:12 glass Exp $	*/
 /*-
  * Copyright (c) 1994 Adam Glass and Charles Hannum.  All rights reserved.
@@ -60,7 +61,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$MidnightBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/kern/sysv_shm.c 329741 2018-02-21 18:32:57Z brooks $");
 
 #include "opt_compat.h"
 #include "opt_sysvipc.h"
@@ -79,6 +80,7 @@
 #include <sys/mutex.h>
 #include <sys/racct.h>
 #include <sys/resourcevar.h>
+#include <sys/rwlock.h>
 #include <sys/stat.h>
 #include <sys/syscall.h>
 #include <sys/syscallsubr.h>
@@ -108,11 +110,11 @@
 #define	SHMSEG_FREE     	0x0200
 #define	SHMSEG_REMOVED  	0x0400
 #define	SHMSEG_ALLOCATED	0x0800
-#define	SHMSEG_WANTED		0x1000
 
 static int shm_last_free, shm_nused, shmalloced;
 vm_size_t shm_committed;
-static struct shmid_kernel	*shmsegs;
+static struct shmid_kernel *shmsegs;
+static unsigned shm_prison_slot;
 
 struct shmmap_state {
 	vm_offset_t va;
@@ -120,9 +122,8 @@
 };
 
 static void shm_deallocate_segment(struct shmid_kernel *);
-static int shm_find_segment_by_key(key_t);
-static struct shmid_kernel *shm_find_segment_by_shmid(int);
-static struct shmid_kernel *shm_find_segment_by_shmidx(int);
+static int shm_find_segment_by_key(struct prison *, key_t);
+static struct shmid_kernel *shm_find_segment(struct prison *, int, bool);
 static int shm_delete_mapping(struct vmspace *vm, struct shmmap_state *);
 static void shmrealloc(void);
 static int shminit(void);
@@ -131,6 +132,14 @@
 static void shmexit_myhook(struct vmspace *vm);
 static void shmfork_myhook(struct proc *p1, struct proc *p2);
 static int sysctl_shmsegs(SYSCTL_HANDLER_ARGS);
+static void shm_remove(struct shmid_kernel *, int);
+static struct prison *shm_find_prison(struct ucred *);
+static int shm_prison_cansee(struct prison *, struct shmid_kernel *);
+static int shm_prison_check(void *, void *);
+static int shm_prison_set(void *, void *);
+static int shm_prison_get(void *, void *);
+static int shm_prison_remove(void *, void *);
+static void shm_prison_cleanup(struct prison *);
 
 /*
  * Tuneable values.
@@ -180,30 +189,40 @@
 SYSCTL_INT(_kern_ipc, OID_AUTO, shm_allow_removed, CTLFLAG_RW,
     &shm_allow_removed, 0,
     "Enable/Disable attachment to attached segments marked for removal");
-SYSCTL_PROC(_kern_ipc, OID_AUTO, shmsegs, CTLTYPE_OPAQUE | CTLFLAG_RD,
-    NULL, 0, sysctl_shmsegs, "",
-    "Current number of shared memory segments allocated");
+SYSCTL_PROC(_kern_ipc, OID_AUTO, shmsegs, CTLTYPE_OPAQUE | CTLFLAG_RD |
+    CTLFLAG_MPSAFE, NULL, 0, sysctl_shmsegs, "",
+    "Array of struct shmid_kernel for each potential shared memory segment");
 
+static struct sx sysvshmsx;
+#define	SYSVSHM_LOCK()		sx_xlock(&sysvshmsx)
+#define	SYSVSHM_UNLOCK()	sx_xunlock(&sysvshmsx)
+#define	SYSVSHM_ASSERT_LOCKED()	sx_assert(&sysvshmsx, SA_XLOCKED)
+
 static int
-shm_find_segment_by_key(key)
-	key_t key;
+shm_find_segment_by_key(struct prison *pr, key_t key)
 {
 	int i;
 
 	for (i = 0; i < shmalloced; i++)
 		if ((shmsegs[i].u.shm_perm.mode & SHMSEG_ALLOCATED) &&
+		    shmsegs[i].cred != NULL &&
+		    shmsegs[i].cred->cr_prison == pr &&
 		    shmsegs[i].u.shm_perm.key == key)
 			return (i);
 	return (-1);
 }
 
+/*
+ * Finds segment either by shmid if is_shmid is true, or by segnum if
+ * is_shmid is false.
+ */
 static struct shmid_kernel *
-shm_find_segment_by_shmid(int shmid)
+shm_find_segment(struct prison *rpr, int arg, bool is_shmid)
 {
+	struct shmid_kernel *shmseg;
 	int segnum;
-	struct shmid_kernel *shmseg;
 
-	segnum = IPCID_TO_IX(shmid);
+	segnum = is_shmid ? IPCID_TO_IX(arg) : arg;
 	if (segnum < 0 || segnum >= shmalloced)
 		return (NULL);
 	shmseg = &shmsegs[segnum];
@@ -210,33 +229,18 @@
 	if ((shmseg->u.shm_perm.mode & SHMSEG_ALLOCATED) == 0 ||
 	    (!shm_allow_removed &&
 	     (shmseg->u.shm_perm.mode & SHMSEG_REMOVED) != 0) ||
-	    shmseg->u.shm_perm.seq != IPCID_TO_SEQ(shmid))
+	    (is_shmid && shmseg->u.shm_perm.seq != IPCID_TO_SEQ(arg)) ||
+	    shm_prison_cansee(rpr, shmseg) != 0)
 		return (NULL);
 	return (shmseg);
 }
 
-static struct shmid_kernel *
-shm_find_segment_by_shmidx(int segnum)
-{
-	struct shmid_kernel *shmseg;
-
-	if (segnum < 0 || segnum >= shmalloced)
-		return (NULL);
-	shmseg = &shmsegs[segnum];
-	if ((shmseg->u.shm_perm.mode & SHMSEG_ALLOCATED) == 0 ||
-	    (!shm_allow_removed &&
-	     (shmseg->u.shm_perm.mode & SHMSEG_REMOVED) != 0))
-		return (NULL);
-	return (shmseg);
-}
-
 static void
-shm_deallocate_segment(shmseg)
-	struct shmid_kernel *shmseg;
+shm_deallocate_segment(struct shmid_kernel *shmseg)
 {
 	vm_size_t size;
 
-	GIANT_REQUIRED;
+	SYSVSHM_ASSERT_LOCKED();
 
 	vm_object_deallocate(shmseg->object);
 	shmseg->object = NULL;
@@ -260,9 +264,11 @@
 	int segnum, result;
 	vm_size_t size;
 
-	GIANT_REQUIRED;
+	SYSVSHM_ASSERT_LOCKED();
+	segnum = IPCID_TO_IX(shmmap_s->shmid);
+	KASSERT(segnum >= 0 && segnum < shmalloced,
+	    ("segnum %d shmalloced %d", segnum, shmalloced));
 
-	segnum = IPCID_TO_IX(shmmap_s->shmid);
 	shmseg = &shmsegs[segnum];
 	size = round_page(shmseg->u.shm_segsz);
 	result = vm_map_remove(&vm->vm_map, shmmap_s->va, shmmap_s->va + size);
@@ -278,81 +284,108 @@
 	return (0);
 }
 
-#ifndef _SYS_SYSPROTO_H_
-struct shmdt_args {
-	const void *shmaddr;
-};
-#endif
-int
-sys_shmdt(td, uap)
-	struct thread *td;
-	struct shmdt_args *uap;
+static void
+shm_remove(struct shmid_kernel *shmseg, int segnum)
 {
+
+	shmseg->u.shm_perm.key = IPC_PRIVATE;
+	shmseg->u.shm_perm.mode |= SHMSEG_REMOVED;
+	if (shmseg->u.shm_nattch <= 0) {
+		shm_deallocate_segment(shmseg);
+		shm_last_free = segnum;
+	}
+}
+
+static struct prison *
+shm_find_prison(struct ucred *cred)
+{
+	struct prison *pr, *rpr;
+
+	pr = cred->cr_prison;
+	prison_lock(pr);
+	rpr = osd_jail_get(pr, shm_prison_slot);
+	prison_unlock(pr);
+	return rpr;
+}
+
+static int
+shm_prison_cansee(struct prison *rpr, struct shmid_kernel *shmseg)
+{
+
+	if (shmseg->cred == NULL ||
+	    !(rpr == shmseg->cred->cr_prison ||
+	      prison_ischild(rpr, shmseg->cred->cr_prison)))
+		return (EINVAL);
+	return (0);
+}
+
+static int
+kern_shmdt_locked(struct thread *td, const void *shmaddr)
+{
 	struct proc *p = td->td_proc;
 	struct shmmap_state *shmmap_s;
 #ifdef MAC
 	struct shmid_kernel *shmsegptr;
 #endif
-	int i;
-	int error = 0;
+	int error, i;
 
-	if (!prison_allow(td->td_ucred, PR_ALLOW_SYSVIPC))
+	SYSVSHM_ASSERT_LOCKED();
+	if (shm_find_prison(td->td_ucred) == NULL)
 		return (ENOSYS);
-	mtx_lock(&Giant);
 	shmmap_s = p->p_vmspace->vm_shm;
- 	if (shmmap_s == NULL) {
-		error = EINVAL;
-		goto done2;
-	}
+ 	if (shmmap_s == NULL)
+		return (EINVAL);
 	for (i = 0; i < shminfo.shmseg; i++, shmmap_s++) {
 		if (shmmap_s->shmid != -1 &&
-		    shmmap_s->va == (vm_offset_t)uap->shmaddr) {
+		    shmmap_s->va == (vm_offset_t)shmaddr) {
 			break;
 		}
 	}
-	if (i == shminfo.shmseg) {
-		error = EINVAL;
-		goto done2;
-	}
+	if (i == shminfo.shmseg)
+		return (EINVAL);
 #ifdef MAC
 	shmsegptr = &shmsegs[IPCID_TO_IX(shmmap_s->shmid)];
 	error = mac_sysvshm_check_shmdt(td->td_ucred, shmsegptr);
 	if (error != 0)
-		goto done2;
+		return (error);
 #endif
 	error = shm_delete_mapping(p->p_vmspace, shmmap_s);
-done2:
-	mtx_unlock(&Giant);
 	return (error);
 }
 
 #ifndef _SYS_SYSPROTO_H_
-struct shmat_args {
-	int shmid;
+struct shmdt_args {
 	const void *shmaddr;
-	int shmflg;
 };
 #endif
 int
-kern_shmat(td, shmid, shmaddr, shmflg)
-	struct thread *td;
-	int shmid;
-	const void *shmaddr;
-	int shmflg;
+sys_shmdt(struct thread *td, struct shmdt_args *uap)
 {
+	int error;
+
+	SYSVSHM_LOCK();
+	error = kern_shmdt_locked(td, uap->shmaddr);
+	SYSVSHM_UNLOCK();
+	return (error);
+}
+
+static int
+kern_shmat_locked(struct thread *td, int shmid, const void *shmaddr,
+    int shmflg)
+{
+	struct prison *rpr;
 	struct proc *p = td->td_proc;
-	int i, flags;
 	struct shmid_kernel *shmseg;
-	struct shmmap_state *shmmap_s = NULL;
+	struct shmmap_state *shmmap_s;
 	vm_offset_t attach_va;
 	vm_prot_t prot;
 	vm_size_t size;
-	int rv;
-	int error = 0;
+	int error, i, rv;
 
-	if (!prison_allow(td->td_ucred, PR_ALLOW_SYSVIPC))
+	SYSVSHM_ASSERT_LOCKED();
+	rpr = shm_find_prison(td->td_ucred);
+	if (rpr == NULL)
 		return (ENOSYS);
-	mtx_lock(&Giant);
 	shmmap_s = p->p_vmspace->vm_shm;
 	if (shmmap_s == NULL) {
 		shmmap_s = malloc(shminfo.shmseg * sizeof(struct shmmap_state),
@@ -359,21 +392,20 @@
 		    M_SHM, M_WAITOK);
 		for (i = 0; i < shminfo.shmseg; i++)
 			shmmap_s[i].shmid = -1;
+		KASSERT(p->p_vmspace->vm_shm == NULL, ("raced"));
 		p->p_vmspace->vm_shm = shmmap_s;
 	}
-	shmseg = shm_find_segment_by_shmid(shmid);
-	if (shmseg == NULL) {
-		error = EINVAL;
-		goto done2;
-	}
+	shmseg = shm_find_segment(rpr, shmid, true);
+	if (shmseg == NULL)
+		return (EINVAL);
 	error = ipcperm(td, &shmseg->u.shm_perm,
 	    (shmflg & SHM_RDONLY) ? IPC_R : IPC_R|IPC_W);
-	if (error)
-		goto done2;
+	if (error != 0)
+		return (error);
 #ifdef MAC
 	error = mac_sysvshm_check_shmat(td->td_ucred, shmseg, shmflg);
 	if (error != 0)
-		goto done2;
+		return (error);
 #endif
 	for (i = 0; i < shminfo.shmseg; i++) {
 		if (shmmap_s->shmid == -1)
@@ -380,25 +412,19 @@
 			break;
 		shmmap_s++;
 	}
-	if (i >= shminfo.shmseg) {
-		error = EMFILE;
-		goto done2;
-	}
+	if (i >= shminfo.shmseg)
+		return (EMFILE);
 	size = round_page(shmseg->u.shm_segsz);
 	prot = VM_PROT_READ;
 	if ((shmflg & SHM_RDONLY) == 0)
 		prot |= VM_PROT_WRITE;
-	flags = MAP_ANON | MAP_SHARED;
-	if (shmaddr) {
-		flags |= MAP_FIXED;
-		if (shmflg & SHM_RND) {
+	if (shmaddr != NULL) {
+		if ((shmflg & SHM_RND) != 0)
 			attach_va = (vm_offset_t)shmaddr & ~(SHMLBA-1);
-		} else if (((vm_offset_t)shmaddr & (SHMLBA-1)) == 0) {
+		else if (((vm_offset_t)shmaddr & (SHMLBA-1)) == 0)
 			attach_va = (vm_offset_t)shmaddr;
-		} else {
-			error = EINVAL;
-			goto done2;
-		}
+		else
+			return (EINVAL);
 	} else {
 		/*
 		 * This is just a hint to vm_map_find() about where to
@@ -412,12 +438,11 @@
 
 	vm_object_reference(shmseg->object);
 	rv = vm_map_find(&p->p_vmspace->vm_map, shmseg->object,
-	    0, &attach_va, size, (flags & MAP_FIXED) ? VMFS_NO_SPACE :
-	    VMFS_ANY_SPACE, prot, prot, MAP_INHERIT_SHARE);
+	    0, &attach_va, size, 0, shmaddr != NULL ? VMFS_NO_SPACE :
+	    VMFS_OPTIMAL_SPACE, prot, prot, MAP_INHERIT_SHARE);
 	if (rv != KERN_SUCCESS) {
 		vm_object_deallocate(shmseg->object);
-		error = ENOMEM;
-		goto done2;
+		return (ENOMEM);
 	}
 
 	shmmap_s->va = attach_va;
@@ -426,34 +451,51 @@
 	shmseg->u.shm_atime = time_second;
 	shmseg->u.shm_nattch++;
 	td->td_retval[0] = attach_va;
-done2:
-	mtx_unlock(&Giant);
 	return (error);
 }
 
 int
-sys_shmat(td, uap)
-	struct thread *td;
-	struct shmat_args *uap;
+kern_shmat(struct thread *td, int shmid, const void *shmaddr, int shmflg)
 {
-	return kern_shmat(td, uap->shmid, uap->shmaddr, uap->shmflg);
+	int error;
+
+	SYSVSHM_LOCK();
+	error = kern_shmat_locked(td, shmid, shmaddr, shmflg);
+	SYSVSHM_UNLOCK();
+	return (error);
 }
 
+#ifndef _SYS_SYSPROTO_H_
+struct shmat_args {
+	int shmid;
+	const void *shmaddr;
+	int shmflg;
+};
+#endif
 int
-kern_shmctl(td, shmid, cmd, buf, bufsz)
-	struct thread *td;
-	int shmid;
-	int cmd;
-	void *buf;
-	size_t *bufsz;
+sys_shmat(struct thread *td, struct shmat_args *uap)
 {
-	int error = 0;
+
+	return (kern_shmat(td, uap->shmid, uap->shmaddr, uap->shmflg));
+}
+
+static int
+kern_shmctl_locked(struct thread *td, int shmid, int cmd, void *buf,
+    size_t *bufsz)
+{
+	struct prison *rpr;
 	struct shmid_kernel *shmseg;
+	struct shmid_ds *shmidp;
+	struct shm_info shm_info;
+	int error;
 
-	if (!prison_allow(td->td_ucred, PR_ALLOW_SYSVIPC))
+	SYSVSHM_ASSERT_LOCKED();
+
+	rpr = shm_find_prison(td->td_ucred);
+	if (rpr == NULL)
 		return (ENOSYS);
 
-	mtx_lock(&Giant);
+	error = 0;
 	switch (cmd) {
 	/*
 	 * It is possible that kern_shmctl is being called from the Linux ABI
@@ -469,9 +511,8 @@
 		if (bufsz)
 			*bufsz = sizeof(shminfo);
 		td->td_retval[0] = shmalloced;
-		goto done2;
+		return (0);
 	case SHM_INFO: {
-		struct shm_info shm_info;
 		shm_info.used_ids = shm_nused;
 		shm_info.shm_rss = 0;	/*XXX where to get from ? */
 		shm_info.shm_tot = 0;	/*XXX where to get from ? */
@@ -479,62 +520,54 @@
 		shm_info.swap_attempts = 0;	/*XXX where to get from ? */
 		shm_info.swap_successes = 0;	/*XXX where to get from ? */
 		memcpy(buf, &shm_info, sizeof(shm_info));
-		if (bufsz)
+		if (bufsz != NULL)
 			*bufsz = sizeof(shm_info);
 		td->td_retval[0] = shmalloced;
-		goto done2;
+		return (0);
 	}
 	}
-	if (cmd == SHM_STAT)
-		shmseg = shm_find_segment_by_shmidx(shmid);
-	else
-		shmseg = shm_find_segment_by_shmid(shmid);
-	if (shmseg == NULL) {
-		error = EINVAL;
-		goto done2;
-	}
+	shmseg = shm_find_segment(rpr, shmid, cmd != SHM_STAT);
+	if (shmseg == NULL)
+		return (EINVAL);
 #ifdef MAC
 	error = mac_sysvshm_check_shmctl(td->td_ucred, shmseg, cmd);
 	if (error != 0)
-		goto done2;
+		return (error);
 #endif
 	switch (cmd) {
 	case SHM_STAT:
 	case IPC_STAT:
+		shmidp = (struct shmid_ds *)buf;
 		error = ipcperm(td, &shmseg->u.shm_perm, IPC_R);
-		if (error)
-			goto done2;
-		memcpy(buf, &shmseg->u, sizeof(struct shmid_ds));
-		if (bufsz)
+		if (error != 0)
+			return (error);
+		memcpy(shmidp, &shmseg->u, sizeof(struct shmid_ds));
+		if (td->td_ucred->cr_prison != shmseg->cred->cr_prison)
+			shmidp->shm_perm.key = IPC_PRIVATE;
+		if (bufsz != NULL)
 			*bufsz = sizeof(struct shmid_ds);
-		if (cmd == SHM_STAT)
-			td->td_retval[0] = IXSEQ_TO_IPCID(shmid, shmseg->u.shm_perm);
+		if (cmd == SHM_STAT) {
+			td->td_retval[0] = IXSEQ_TO_IPCID(shmid,
+			    shmseg->u.shm_perm);
+		}
 		break;
-	case IPC_SET: {
-		struct shmid_ds *shmid;
-
-		shmid = (struct shmid_ds *)buf;
+	case IPC_SET:
+		shmidp = (struct shmid_ds *)buf;
 		error = ipcperm(td, &shmseg->u.shm_perm, IPC_M);
-		if (error)
-			goto done2;
-		shmseg->u.shm_perm.uid = shmid->shm_perm.uid;
-		shmseg->u.shm_perm.gid = shmid->shm_perm.gid;
+		if (error != 0)
+			return (error);
+		shmseg->u.shm_perm.uid = shmidp->shm_perm.uid;
+		shmseg->u.shm_perm.gid = shmidp->shm_perm.gid;
 		shmseg->u.shm_perm.mode =
 		    (shmseg->u.shm_perm.mode & ~ACCESSPERMS) |
-		    (shmid->shm_perm.mode & ACCESSPERMS);
+		    (shmidp->shm_perm.mode & ACCESSPERMS);
 		shmseg->u.shm_ctime = time_second;
 		break;
-	}
 	case IPC_RMID:
 		error = ipcperm(td, &shmseg->u.shm_perm, IPC_M);
-		if (error)
-			goto done2;
-		shmseg->u.shm_perm.key = IPC_PRIVATE;
-		shmseg->u.shm_perm.mode |= SHMSEG_REMOVED;
-		if (shmseg->u.shm_nattch <= 0) {
-			shm_deallocate_segment(shmseg);
-			shm_last_free = IPCID_TO_IX(shmid);
-		}
+		if (error != 0)
+			return (error);
+		shm_remove(shmseg, IPCID_TO_IX(shmid));
 		break;
 #if 0
 	case SHM_LOCK:
@@ -544,11 +577,21 @@
 		error = EINVAL;
 		break;
 	}
-done2:
-	mtx_unlock(&Giant);
 	return (error);
 }
 
+int
+kern_shmctl(struct thread *td, int shmid, int cmd, void *buf, size_t *bufsz)
+{
+	int error;
+
+	SYSVSHM_LOCK();
+	error = kern_shmctl_locked(td, shmid, cmd, buf, bufsz);
+	SYSVSHM_UNLOCK();
+	return (error);
+}
+
+
 #ifndef _SYS_SYSPROTO_H_
 struct shmctl_args {
 	int shmid;
@@ -557,9 +600,7 @@
 };
 #endif
 int
-sys_shmctl(td, uap)
-	struct thread *td;
-	struct shmctl_args *uap;
+sys_shmctl(struct thread *td, struct shmctl_args *uap)
 {
 	int error = 0;
 	struct shmid_ds buf;
@@ -601,28 +642,18 @@
 
 
 static int
-shmget_existing(td, uap, mode, segnum)
-	struct thread *td;
-	struct shmget_args *uap;
-	int mode;
-	int segnum;
+shmget_existing(struct thread *td, struct shmget_args *uap, int mode,
+    int segnum)
 {
 	struct shmid_kernel *shmseg;
+#ifdef MAC
 	int error;
+#endif
 
+	SYSVSHM_ASSERT_LOCKED();
+	KASSERT(segnum >= 0 && segnum < shmalloced,
+	    ("segnum %d shmalloced %d", segnum, shmalloced));
 	shmseg = &shmsegs[segnum];
-	if (shmseg->u.shm_perm.mode & SHMSEG_REMOVED) {
-		/*
-		 * This segment is in the process of being allocated.  Wait
-		 * until it's done, and look the key up again (in case the
-		 * allocation failed or it was freed).
-		 */
-		shmseg->u.shm_perm.mode |= SHMSEG_WANTED;
-		error = tsleep(shmseg, PLOCK | PCATCH, "shmget", 0);
-		if (error)
-			return (error);
-		return (EAGAIN);
-	}
 	if ((uap->shmflg & (IPC_CREAT | IPC_EXCL)) == (IPC_CREAT | IPC_EXCL))
 		return (EEXIST);
 #ifdef MAC
@@ -637,18 +668,15 @@
 }
 
 static int
-shmget_allocate_segment(td, uap, mode)
-	struct thread *td;
-	struct shmget_args *uap;
-	int mode;
+shmget_allocate_segment(struct thread *td, struct shmget_args *uap, int mode)
 {
-	int i, segnum, shmid;
-	size_t size;
 	struct ucred *cred = td->td_ucred;
 	struct shmid_kernel *shmseg;
 	vm_object_t shm_object;
+	int i, segnum;
+	size_t size;
 
-	GIANT_REQUIRED;
+	SYSVSHM_ASSERT_LOCKED();
 
 	if (uap->size < shminfo.shmmin || uap->size > shminfo.shmmax)
 		return (EINVAL);
@@ -669,30 +697,26 @@
 		segnum = shm_last_free;
 		shm_last_free = -1;
 	}
+	KASSERT(segnum >= 0 && segnum < shmalloced,
+	    ("segnum %d shmalloced %d", segnum, shmalloced));
 	shmseg = &shmsegs[segnum];
 #ifdef RACCT
-	PROC_LOCK(td->td_proc);
-	if (racct_add(td->td_proc, RACCT_NSHM, 1)) {
+	if (racct_enable) {
+		PROC_LOCK(td->td_proc);
+		if (racct_add(td->td_proc, RACCT_NSHM, 1)) {
+			PROC_UNLOCK(td->td_proc);
+			return (ENOSPC);
+		}
+		if (racct_add(td->td_proc, RACCT_SHMSIZE, size)) {
+			racct_sub(td->td_proc, RACCT_NSHM, 1);
+			PROC_UNLOCK(td->td_proc);
+			return (ENOMEM);
+		}
 		PROC_UNLOCK(td->td_proc);
-		return (ENOSPC);
 	}
-	if (racct_add(td->td_proc, RACCT_SHMSIZE, size)) {
-		racct_sub(td->td_proc, RACCT_NSHM, 1);
-		PROC_UNLOCK(td->td_proc);
-		return (ENOMEM);
-	}
-	PROC_UNLOCK(td->td_proc);
 #endif
+
 	/*
-	 * In case we sleep in malloc(), mark the segment present but deleted
-	 * so that noone else tries to create the same key.
-	 */
-	shmseg->u.shm_perm.mode = SHMSEG_ALLOCATED | SHMSEG_REMOVED;
-	shmseg->u.shm_perm.key = uap->key;
-	shmseg->u.shm_perm.seq = (shmseg->u.shm_perm.seq + 1) & 0x7fff;
-	shmid = IXSEQ_TO_IPCID(segnum, shmseg->u.shm_perm);
-	
-	/*
 	 * We make sure that we have allocated a pager before we need
 	 * to.
 	 */
@@ -700,23 +724,27 @@
 	    0, size, VM_PROT_DEFAULT, 0, cred);
 	if (shm_object == NULL) {
 #ifdef RACCT
-		PROC_LOCK(td->td_proc);
-		racct_sub(td->td_proc, RACCT_NSHM, 1);
-		racct_sub(td->td_proc, RACCT_SHMSIZE, size);
-		PROC_UNLOCK(td->td_proc);
+		if (racct_enable) {
+			PROC_LOCK(td->td_proc);
+			racct_sub(td->td_proc, RACCT_NSHM, 1);
+			racct_sub(td->td_proc, RACCT_SHMSIZE, size);
+			PROC_UNLOCK(td->td_proc);
+		}
 #endif
 		return (ENOMEM);
 	}
-	VM_OBJECT_LOCK(shm_object);
+	shm_object->pg_color = 0;
+	VM_OBJECT_WLOCK(shm_object);
 	vm_object_clear_flag(shm_object, OBJ_ONEMAPPING);
-	vm_object_set_flag(shm_object, OBJ_NOSPLIT);
-	VM_OBJECT_UNLOCK(shm_object);
+	vm_object_set_flag(shm_object, OBJ_COLORED | OBJ_NOSPLIT);
+	VM_OBJECT_WUNLOCK(shm_object);
 
 	shmseg->object = shm_object;
 	shmseg->u.shm_perm.cuid = shmseg->u.shm_perm.uid = cred->cr_uid;
 	shmseg->u.shm_perm.cgid = shmseg->u.shm_perm.gid = cred->cr_gid;
-	shmseg->u.shm_perm.mode = (shmseg->u.shm_perm.mode & SHMSEG_WANTED) |
-	    (mode & ACCESSPERMS) | SHMSEG_ALLOCATED;
+	shmseg->u.shm_perm.mode = (mode & ACCESSPERMS) | SHMSEG_ALLOCATED;
+	shmseg->u.shm_perm.key = uap->key;
+	shmseg->u.shm_perm.seq = (shmseg->u.shm_perm.seq + 1) & 0x7fff;
 	shmseg->cred = crhold(cred);
 	shmseg->u.shm_segsz = uap->size;
 	shmseg->u.shm_cpid = td->td_proc->p_pid;
@@ -728,15 +756,8 @@
 	shmseg->u.shm_ctime = time_second;
 	shm_committed += btoc(size);
 	shm_nused++;
-	if (shmseg->u.shm_perm.mode & SHMSEG_WANTED) {
-		/*
-		 * Somebody else wanted this key while we were asleep.  Wake
-		 * them up now.
-		 */
-		shmseg->u.shm_perm.mode &= ~SHMSEG_WANTED;
-		wakeup(shmseg);
-	}
-	td->td_retval[0] = shmid;
+	td->td_retval[0] = IXSEQ_TO_IPCID(segnum, shmseg->u.shm_perm);
+
 	return (0);
 }
 
@@ -748,54 +769,53 @@
 };
 #endif
 int
-sys_shmget(td, uap)
-	struct thread *td;
-	struct shmget_args *uap;
+sys_shmget(struct thread *td, struct shmget_args *uap)
 {
 	int segnum, mode;
 	int error;
 
-	if (!prison_allow(td->td_ucred, PR_ALLOW_SYSVIPC))
+	if (shm_find_prison(td->td_ucred) == NULL)
 		return (ENOSYS);
-	mtx_lock(&Giant);
 	mode = uap->shmflg & ACCESSPERMS;
-	if (uap->key != IPC_PRIVATE) {
-	again:
-		segnum = shm_find_segment_by_key(uap->key);
-		if (segnum >= 0) {
+	SYSVSHM_LOCK();
+	if (uap->key == IPC_PRIVATE) {
+		error = shmget_allocate_segment(td, uap, mode);
+	} else {
+		segnum = shm_find_segment_by_key(td->td_ucred->cr_prison,
+		    uap->key);
+		if (segnum >= 0)
 			error = shmget_existing(td, uap, mode, segnum);
-			if (error == EAGAIN)
-				goto again;
-			goto done2;
-		}
-		if ((uap->shmflg & IPC_CREAT) == 0) {
+		else if ((uap->shmflg & IPC_CREAT) == 0)
 			error = ENOENT;
-			goto done2;
-		}
+		else
+			error = shmget_allocate_segment(td, uap, mode);
 	}
-	error = shmget_allocate_segment(td, uap, mode);
-done2:
-	mtx_unlock(&Giant);
+	SYSVSHM_UNLOCK();
 	return (error);
 }
 
 static void
-shmfork_myhook(p1, p2)
-	struct proc *p1, *p2;
+shmfork_myhook(struct proc *p1, struct proc *p2)
 {
 	struct shmmap_state *shmmap_s;
 	size_t size;
 	int i;
 
-	mtx_lock(&Giant);
+	SYSVSHM_LOCK();
 	size = shminfo.shmseg * sizeof(struct shmmap_state);
 	shmmap_s = malloc(size, M_SHM, M_WAITOK);
 	bcopy(p1->p_vmspace->vm_shm, shmmap_s, size);
 	p2->p_vmspace->vm_shm = shmmap_s;
-	for (i = 0; i < shminfo.shmseg; i++, shmmap_s++)
-		if (shmmap_s->shmid != -1)
+	for (i = 0; i < shminfo.shmseg; i++, shmmap_s++) {
+		if (shmmap_s->shmid != -1) {
+			KASSERT(IPCID_TO_IX(shmmap_s->shmid) >= 0 &&
+			    IPCID_TO_IX(shmmap_s->shmid) < shmalloced,
+			    ("segnum %d shmalloced %d",
+			    IPCID_TO_IX(shmmap_s->shmid), shmalloced));
 			shmsegs[IPCID_TO_IX(shmmap_s->shmid)].u.shm_nattch++;
-	mtx_unlock(&Giant);
+		}
+	}
+	SYSVSHM_UNLOCK();
 }
 
 static void
@@ -804,14 +824,15 @@
 	struct shmmap_state *base, *shm;
 	int i;
 
-	if ((base = vm->vm_shm) != NULL) {
+	base = vm->vm_shm;
+	if (base != NULL) {
 		vm->vm_shm = NULL;
-		mtx_lock(&Giant);
+		SYSVSHM_LOCK();
 		for (i = 0, shm = base; i < shminfo.shmseg; i++, shm++) {
 			if (shm->shmid != -1)
 				shm_delete_mapping(vm, shm);
 		}
-		mtx_unlock(&Giant);
+		SYSVSHM_UNLOCK();
 		free(base, M_SHM);
 	}
 }
@@ -819,15 +840,15 @@
 static void
 shmrealloc(void)
 {
+	struct shmid_kernel *newsegs;
 	int i;
-	struct shmid_kernel *newsegs;
 
+	SYSVSHM_ASSERT_LOCKED();
+
 	if (shmalloced >= shminfo.shmmni)
 		return;
 
 	newsegs = malloc(shminfo.shmmni * sizeof(*newsegs), M_SHM, M_WAITOK);
-	if (newsegs == NULL)
-		return;
 	for (i = 0; i < shmalloced; i++)
 		bcopy(&shmsegs[i], &newsegs[i], sizeof(newsegs[0]));
 	for (; i < shminfo.shmmni; i++) {
@@ -880,9 +901,17 @@
 #endif
 
 static int
-shminit()
+shminit(void)
 {
+	struct prison *pr;
+	void *rsv;
 	int i, error;
+	osd_method_t methods[PR_MAXMETHOD] = {
+	    [PR_METHOD_CHECK] =		shm_prison_check,
+	    [PR_METHOD_SET] =		shm_prison_set,
+	    [PR_METHOD_GET] =		shm_prison_get,
+	    [PR_METHOD_REMOVE] =	shm_prison_remove,
+	};
 
 #ifndef BURN_BRIDGES
 	if (TUNABLE_ULONG_FETCH("kern.ipc.shmmaxpgs", &shminfo.shmall) != 0)
@@ -889,14 +918,14 @@
 		printf("kern.ipc.shmmaxpgs is now called kern.ipc.shmall!\n");
 #endif
 	TUNABLE_ULONG_FETCH("kern.ipc.shmall", &shminfo.shmall);
-
-	/* Initialize shmmax dealing with possible overflow. */
-	for (i = PAGE_SIZE; i > 0; i--) {
-		shminfo.shmmax = shminfo.shmall * i;
-		if (shminfo.shmmax >= shminfo.shmall)
-			break;
+	if (!TUNABLE_ULONG_FETCH("kern.ipc.shmmax", &shminfo.shmmax)) {
+		/* Initialize shmmax dealing with possible overflow. */
+		for (i = PAGE_SIZE; i > 0; i--) {
+			shminfo.shmmax = shminfo.shmall * i;
+			if (shminfo.shmmax >= shminfo.shmall)
+				break;
+		}
 	}
-
 	TUNABLE_ULONG_FETCH("kern.ipc.shmmin", &shminfo.shmmin);
 	TUNABLE_ULONG_FETCH("kern.ipc.shmmni", &shminfo.shmmni);
 	TUNABLE_ULONG_FETCH("kern.ipc.shmseg", &shminfo.shmseg);
@@ -914,9 +943,33 @@
 	shm_last_free = 0;
 	shm_nused = 0;
 	shm_committed = 0;
+	sx_init(&sysvshmsx, "sysvshmsx");
 	shmexit_hook = &shmexit_myhook;
 	shmfork_hook = &shmfork_myhook;
 
+	/* Set current prisons according to their allow.sysvipc. */
+	shm_prison_slot = osd_jail_register(NULL, methods);
+	rsv = osd_reserve(shm_prison_slot);
+	prison_lock(&prison0);
+	(void)osd_jail_set_reserved(&prison0, shm_prison_slot, rsv, &prison0);
+	prison_unlock(&prison0);
+	rsv = NULL;
+	sx_slock(&allprison_lock);
+	TAILQ_FOREACH(pr, &allprison, pr_list) {
+		if (rsv == NULL)
+			rsv = osd_reserve(shm_prison_slot);
+		prison_lock(pr);
+		if ((pr->pr_allow & PR_ALLOW_SYSVIPC) && pr->pr_ref > 0) {
+			(void)osd_jail_set_reserved(pr, shm_prison_slot, rsv,
+			    &prison0);
+			rsv = NULL;
+		}
+		prison_unlock(pr);
+	}
+	if (rsv != NULL)
+		osd_free_reserved(rsv);
+	sx_sunlock(&allprison_lock);
+
 	error = syscall_helper_register(shm_syscalls);
 	if (error != 0)
 		return (error);
@@ -929,7 +982,7 @@
 }
 
 static int
-shmunload()
+shmunload(void)
 {
 	int i;	
 
@@ -940,6 +993,8 @@
 	syscall32_helper_unregister(shm32_syscalls);
 #endif
 	syscall_helper_unregister(shm_syscalls);
+	if (shm_prison_slot != 0)
+		osd_jail_deregister(shm_prison_slot);
 
 	for (i = 0; i < shmalloced; i++) {
 #ifdef MAC
@@ -956,6 +1011,7 @@
 	free(shmsegs, M_SHM);
 	shmexit_hook = NULL;
 	shmfork_hook = NULL;
+	sx_destroy(&sysvshmsx);
 	return (0);
 }
 
@@ -962,10 +1018,209 @@
 static int
 sysctl_shmsegs(SYSCTL_HANDLER_ARGS)
 {
+	struct shmid_kernel tshmseg;
+	struct prison *pr, *rpr;
+	int error, i;
 
-	return (SYSCTL_OUT(req, shmsegs, shmalloced * sizeof(shmsegs[0])));
+	SYSVSHM_LOCK();
+	pr = req->td->td_ucred->cr_prison;
+	rpr = shm_find_prison(req->td->td_ucred);
+	error = 0;
+	for (i = 0; i < shmalloced; i++) {
+		if ((shmsegs[i].u.shm_perm.mode & SHMSEG_ALLOCATED) == 0 ||
+		    rpr == NULL || shm_prison_cansee(rpr, &shmsegs[i]) != 0) {
+			bzero(&tshmseg, sizeof(tshmseg));
+			tshmseg.u.shm_perm.mode = SHMSEG_FREE;
+		} else {
+			tshmseg = shmsegs[i];
+			if (tshmseg.cred->cr_prison != pr)
+				tshmseg.u.shm_perm.key = IPC_PRIVATE;
+		}
+		error = SYSCTL_OUT(req, &tshmseg, sizeof(tshmseg));
+		if (error != 0)
+			break;
+	}
+	SYSVSHM_UNLOCK();
+	return (error);
 }
 
+static int
+shm_prison_check(void *obj, void *data)
+{
+	struct prison *pr = obj;
+	struct prison *prpr;
+	struct vfsoptlist *opts = data;
+	int error, jsys;
+
+	/*
+	 * sysvshm is a jailsys integer.
+	 * It must be "disable" if the parent jail is disabled.
+	 */
+	error = vfs_copyopt(opts, "sysvshm", &jsys, sizeof(jsys));
+	if (error != ENOENT) {
+		if (error != 0)
+			return (error);
+		switch (jsys) {
+		case JAIL_SYS_DISABLE:
+			break;
+		case JAIL_SYS_NEW:
+		case JAIL_SYS_INHERIT:
+			prison_lock(pr->pr_parent);
+			prpr = osd_jail_get(pr->pr_parent, shm_prison_slot);
+			prison_unlock(pr->pr_parent);
+			if (prpr == NULL)
+				return (EPERM);
+			break;
+		default:
+			return (EINVAL);
+		}
+	}
+
+	return (0);
+}
+
+static int
+shm_prison_set(void *obj, void *data)
+{
+	struct prison *pr = obj;
+	struct prison *tpr, *orpr, *nrpr, *trpr;
+	struct vfsoptlist *opts = data;
+	void *rsv;
+	int jsys, descend;
+
+	/*
+	 * sysvshm controls which jail is the root of the associated segments
+	 * (this jail or same as the parent), or if the feature is available
+	 * at all.
+	 */
+	if (vfs_copyopt(opts, "sysvshm", &jsys, sizeof(jsys)) == ENOENT)
+		jsys = vfs_flagopt(opts, "allow.sysvipc", NULL, 0)
+		    ? JAIL_SYS_INHERIT
+		    : vfs_flagopt(opts, "allow.nosysvipc", NULL, 0)
+		    ? JAIL_SYS_DISABLE
+		    : -1;
+	if (jsys == JAIL_SYS_DISABLE) {
+		prison_lock(pr);
+		orpr = osd_jail_get(pr, shm_prison_slot);
+		if (orpr != NULL)
+			osd_jail_del(pr, shm_prison_slot);
+		prison_unlock(pr);
+		if (orpr != NULL) {
+			if (orpr == pr)
+				shm_prison_cleanup(pr);
+			/* Disable all child jails as well. */
+			FOREACH_PRISON_DESCENDANT(pr, tpr, descend) {
+				prison_lock(tpr);
+				trpr = osd_jail_get(tpr, shm_prison_slot);
+				if (trpr != NULL) {
+					osd_jail_del(tpr, shm_prison_slot);
+					prison_unlock(tpr);
+					if (trpr == tpr)
+						shm_prison_cleanup(tpr);
+				} else {
+					prison_unlock(tpr);
+					descend = 0;
+				}
+			}
+		}
+	} else if (jsys != -1) {
+		if (jsys == JAIL_SYS_NEW)
+			nrpr = pr;
+		else {
+			prison_lock(pr->pr_parent);
+			nrpr = osd_jail_get(pr->pr_parent, shm_prison_slot);
+			prison_unlock(pr->pr_parent);
+		}
+		rsv = osd_reserve(shm_prison_slot);
+		prison_lock(pr);
+		orpr = osd_jail_get(pr, shm_prison_slot);
+		if (orpr != nrpr)
+			(void)osd_jail_set_reserved(pr, shm_prison_slot, rsv,
+			    nrpr);
+		else
+			osd_free_reserved(rsv);
+		prison_unlock(pr);
+		if (orpr != nrpr) {
+			if (orpr == pr)
+				shm_prison_cleanup(pr);
+			if (orpr != NULL) {
+				/* Change child jails matching the old root, */
+				FOREACH_PRISON_DESCENDANT(pr, tpr, descend) {
+					prison_lock(tpr);
+					trpr = osd_jail_get(tpr,
+					    shm_prison_slot);
+					if (trpr == orpr) {
+						(void)osd_jail_set(tpr,
+						    shm_prison_slot, nrpr);
+						prison_unlock(tpr);
+						if (trpr == tpr)
+							shm_prison_cleanup(tpr);
+					} else {
+						prison_unlock(tpr);
+						descend = 0;
+					}
+				}
+			}
+		}
+	}
+
+	return (0);
+}
+
+static int
+shm_prison_get(void *obj, void *data)
+{
+	struct prison *pr = obj;
+	struct prison *rpr;
+	struct vfsoptlist *opts = data;
+	int error, jsys;
+
+	/* Set sysvshm based on the jail's root prison. */
+	prison_lock(pr);
+	rpr = osd_jail_get(pr, shm_prison_slot);
+	prison_unlock(pr);
+	jsys = rpr == NULL ? JAIL_SYS_DISABLE
+	    : rpr == pr ? JAIL_SYS_NEW : JAIL_SYS_INHERIT;
+	error = vfs_setopt(opts, "sysvshm", &jsys, sizeof(jsys));
+	if (error == ENOENT)
+		error = 0;
+	return (error);
+}
+
+static int
+shm_prison_remove(void *obj, void *data __unused)
+{
+	struct prison *pr = obj;
+	struct prison *rpr;
+
+	SYSVSHM_LOCK();
+	prison_lock(pr);
+	rpr = osd_jail_get(pr, shm_prison_slot);
+	prison_unlock(pr);
+	if (rpr == pr)
+		shm_prison_cleanup(pr);
+	SYSVSHM_UNLOCK();
+	return (0);
+}
+
+static void
+shm_prison_cleanup(struct prison *pr)
+{
+	struct shmid_kernel *shmseg;
+	int i;
+
+	/* Remove any segments that belong to this jail. */
+	for (i = 0; i < shmalloced; i++) {
+		shmseg = &shmsegs[i];
+		if ((shmseg->u.shm_perm.mode & SHMSEG_ALLOCATED) &&
+		    shmseg->cred != NULL && shmseg->cred->cr_prison == pr) {
+			shm_remove(shmseg, i);
+		}
+	}
+}
+
+SYSCTL_JAIL_PARAM_SYS_NODE(sysvshm, CTLFLAG_RW, "SYSV shared memory");
+
 #if defined(__i386__) && (defined(COMPAT_FREEBSD4) || defined(COMPAT_43))
 struct oshmid_ds {
 	struct	ipc_perm_old shm_perm;	/* operation perms */
@@ -990,46 +1245,46 @@
 {
 #ifdef COMPAT_43
 	int error = 0;
+	struct prison *rpr;
 	struct shmid_kernel *shmseg;
 	struct oshmid_ds outbuf;
 
-	if (!prison_allow(td->td_ucred, PR_ALLOW_SYSVIPC))
+	rpr = shm_find_prison(td->td_ucred);
+	if (rpr == NULL)
 		return (ENOSYS);
-	mtx_lock(&Giant);
-	shmseg = shm_find_segment_by_shmid(uap->shmid);
+	if (uap->cmd != IPC_STAT) {
+		return (freebsd7_shmctl(td,
+		    (struct freebsd7_shmctl_args *)uap));
+	}
+	SYSVSHM_LOCK();
+	shmseg = shm_find_segment(rpr, uap->shmid, true);
 	if (shmseg == NULL) {
-		error = EINVAL;
-		goto done2;
+		SYSVSHM_UNLOCK();
+		return (EINVAL);
 	}
-	switch (uap->cmd) {
-	case IPC_STAT:
-		error = ipcperm(td, &shmseg->u.shm_perm, IPC_R);
-		if (error)
-			goto done2;
+	error = ipcperm(td, &shmseg->u.shm_perm, IPC_R);
+	if (error != 0) {
+		SYSVSHM_UNLOCK();
+		return (error);
+	}
 #ifdef MAC
-		error = mac_sysvshm_check_shmctl(td->td_ucred, shmseg, uap->cmd);
-		if (error != 0)
-			goto done2;
+	error = mac_sysvshm_check_shmctl(td->td_ucred, shmseg, uap->cmd);
+	if (error != 0) {
+		SYSVSHM_UNLOCK();
+		return (error);
+	}
 #endif
-		ipcperm_new2old(&shmseg->u.shm_perm, &outbuf.shm_perm);
-		outbuf.shm_segsz = shmseg->u.shm_segsz;
-		outbuf.shm_cpid = shmseg->u.shm_cpid;
-		outbuf.shm_lpid = shmseg->u.shm_lpid;
-		outbuf.shm_nattch = shmseg->u.shm_nattch;
-		outbuf.shm_atime = shmseg->u.shm_atime;
-		outbuf.shm_dtime = shmseg->u.shm_dtime;
-		outbuf.shm_ctime = shmseg->u.shm_ctime;
-		outbuf.shm_handle = shmseg->object;
-		error = copyout(&outbuf, uap->ubuf, sizeof(outbuf));
-		if (error)
-			goto done2;
-		break;
-	default:
-		error = freebsd7_shmctl(td, (struct freebsd7_shmctl_args *)uap);
-		break;
-	}
-done2:
-	mtx_unlock(&Giant);
+	ipcperm_new2old(&shmseg->u.shm_perm, &outbuf.shm_perm);
+	outbuf.shm_segsz = shmseg->u.shm_segsz;
+	outbuf.shm_cpid = shmseg->u.shm_cpid;
+	outbuf.shm_lpid = shmseg->u.shm_lpid;
+	outbuf.shm_nattch = shmseg->u.shm_nattch;
+	outbuf.shm_atime = shmseg->u.shm_atime;
+	outbuf.shm_dtime = shmseg->u.shm_dtime;
+	outbuf.shm_ctime = shmseg->u.shm_ctime;
+	outbuf.shm_handle = shmseg->object;
+	SYSVSHM_UNLOCK();
+	error = copyout(&outbuf, uap->ubuf, sizeof(outbuf));
 	return (error);
 #else
 	return (EINVAL);
@@ -1043,27 +1298,23 @@
 	(sy_call_t *)freebsd7_shmctl
 };
 
+#ifndef _SYS_SYSPROTO_H_
+/* XXX actually varargs. */
+struct shmsys_args {
+	int	which;
+	int	a2;
+	int	a3;
+	int	a4;
+};
+#endif
 int
-sys_shmsys(td, uap)
-	struct thread *td;
-	/* XXX actually varargs. */
-	struct shmsys_args /* {
-		int	which;
-		int	a2;
-		int	a3;
-		int	a4;
-	} */ *uap;
+sys_shmsys(struct thread *td, struct shmsys_args *uap)
 {
 	int error;
 
-	if (!prison_allow(td->td_ucred, PR_ALLOW_SYSVIPC))
-		return (ENOSYS);
-	if (uap->which < 0 ||
-	    uap->which >= sizeof(shmcalls)/sizeof(shmcalls[0]))
+	if (uap->which < 0 || uap->which >= nitems(shmcalls))
 		return (EINVAL);
-	mtx_lock(&Giant);
 	error = (*shmcalls[uap->which])(td, &uap->a2);
-	mtx_unlock(&Giant);
 	return (error);
 }
 
@@ -1304,9 +1555,7 @@
 };
 #endif
 int
-freebsd7_shmctl(td, uap)
-	struct thread *td;
-	struct freebsd7_shmctl_args *uap;
+freebsd7_shmctl(struct thread *td, struct freebsd7_shmctl_args *uap)
 {
 	int error = 0;
 	struct shmid_ds_old old;



More information about the Midnightbsd-cvs mailing list