[Midnightbsd-cvs] src [8615] trunk/sys: fix locked memory accounting with MAP_WIREFUTRE flag.
laffer1 at midnightbsd.org
laffer1 at midnightbsd.org
Sun Sep 25 13:43:11 EDT 2016
Revision: 8615
http://svnweb.midnightbsd.org/src/?rev=8615
Author: laffer1
Date: 2016-09-25 13:43:11 -0400 (Sun, 25 Sep 2016)
Log Message:
-----------
fix locked memory accounting with MAP_WIREFUTRE flag. add sysctl vm.old_mlock to turn off accounting. add sysctl to allow unpriv users to call mlock
Modified Paths:
--------------
trunk/sys/kern/kern_priv.c
trunk/sys/vm/vm.h
trunk/sys/vm/vm_map.c
trunk/sys/vm/vm_mmap.c
trunk/sys/vm/vm_unix.c
Modified: trunk/sys/kern/kern_priv.c
===================================================================
--- trunk/sys/kern/kern_priv.c 2016-09-25 17:39:47 UTC (rev 8614)
+++ trunk/sys/kern/kern_priv.c 2016-09-25 17:43:11 UTC (rev 8615)
@@ -59,6 +59,11 @@
&suser_enabled, 0, "processes with uid 0 have privilege");
TUNABLE_INT("security.bsd.suser_enabled", &suser_enabled);
+static int unprivileged_mlock = 1;
+SYSCTL_INT(_security_bsd, OID_AUTO, unprivileged_mlock, CTLFLAG_RW|CTLFLAG_TUN,
+ &unprivileged_mlock, 0, "Allow non-root users to call mlock(2)");
+TUNABLE_INT("security.bsd.unprivileged_mlock", &unprivileged_mlock);
+
SDT_PROVIDER_DEFINE(priv);
SDT_PROBE_DEFINE1(priv, kernel, priv_check, priv_ok, priv-ok, "int");
SDT_PROBE_DEFINE1(priv, kernel, priv_check, priv_err, priv-err, "int");
@@ -93,6 +98,19 @@
if (error)
goto out;
+ if (unprivileged_mlock) {
+ /*
+ * Allow unprivileged users to call mlock(2)/munlock(2) and
+ * mlockall(2)/munlockall(2).
+ */
+ switch (priv) {
+ case PRIV_VM_MLOCK:
+ case PRIV_VM_MUNLOCK:
+ error = 0;
+ goto out;
+ }
+ }
+
/*
* Having determined if privilege is restricted by various policies,
* now determine if privilege is granted. At this point, any policy
Modified: trunk/sys/vm/vm.h
===================================================================
--- trunk/sys/vm/vm.h 2016-09-25 17:39:47 UTC (rev 8614)
+++ trunk/sys/vm/vm.h 2016-09-25 17:43:11 UTC (rev 8615)
@@ -141,6 +141,8 @@
extern struct kva_md_info kmi;
extern void vm_ksubmap_init(struct kva_md_info *);
+extern int old_mlock;
+
struct ucred;
int swap_reserve(vm_ooffset_t incr);
int swap_reserve_by_cred(vm_ooffset_t incr, struct ucred *cred);
Modified: trunk/sys/vm/vm_map.c
===================================================================
--- trunk/sys/vm/vm_map.c 2016-09-25 17:39:47 UTC (rev 8614)
+++ trunk/sys/vm/vm_map.c 2016-09-25 17:43:11 UTC (rev 8615)
@@ -3248,7 +3248,7 @@
vm_offset_t bot, top;
vm_size_t growsize, init_ssize;
int orient, rv;
- rlim_t vmemlim;
+ rlim_t lmemlim, vmemlim;
/*
* The stack orientation is piggybacked with the cow argument.
@@ -3267,9 +3267,10 @@
growsize = sgrowsiz;
init_ssize = (max_ssize < growsize) ? max_ssize : growsize;
- PROC_LOCK(curthread->td_proc);
- vmemlim = lim_cur(curthread->td_proc, RLIMIT_VMEM);
- PROC_UNLOCK(curthread->td_proc);
+ PROC_LOCK(curproc);
+ lmemlim = lim_cur(curproc, RLIMIT_MEMLOCK);
+ vmemlim = lim_cur(curproc, RLIMIT_VMEM);
+ PROC_UNLOCK(curproc);
vm_map_lock(map);
@@ -3279,6 +3280,14 @@
return (KERN_NO_SPACE);
}
+ if (!old_mlock && map->flags & MAP_WIREFUTURE) {
+ if (ptoa(vmspace_wired_count(curproc->p_vmspace)) +
+ init_ssize > lmemlim) {
+ vm_map_unlock(map);
+ return (KERN_NO_SPACE);
+ }
+ }
+
/* If we would blow our VMEM resource limit, no go */
if (map->size + init_ssize > vmemlim) {
vm_map_unlock(map);
@@ -3360,7 +3369,7 @@
vm_offset_t end;
vm_size_t growsize;
size_t grow_amount, max_grow;
- rlim_t stacklim, vmemlim;
+ rlim_t lmemlim, stacklim, vmemlim;
int is_procstack, rv;
struct ucred *cred;
#ifdef notyet
@@ -3372,6 +3381,7 @@
Retry:
PROC_LOCK(p);
+ lmemlim = lim_cur(p, RLIMIT_MEMLOCK);
stacklim = lim_cur(p, RLIMIT_STACK);
vmemlim = lim_cur(p, RLIMIT_VMEM);
PROC_UNLOCK(p);
@@ -3494,7 +3504,25 @@
if (is_procstack && (ctob(vm->vm_ssize) + grow_amount > limit))
grow_amount = limit - ctob(vm->vm_ssize);
#endif
-
+ if (!old_mlock && map->flags & MAP_WIREFUTURE) {
+ if (ptoa(vmspace_wired_count(p->p_vmspace)) + grow_amount >
+ lmemlim) {
+ vm_map_unlock_read(map);
+ rv = KERN_NO_SPACE;
+ goto out;
+ }
+#ifdef RACCT
+ PROC_LOCK(p);
+ if (racct_set(p, RACCT_MEMLOCK,
+ ptoa(vmspace_wired_count(p->p_vmspace)) + grow_amount)) {
+ PROC_UNLOCK(p);
+ vm_map_unlock_read(map);
+ rv = KERN_NO_SPACE;
+ goto out;
+ }
+ PROC_UNLOCK(p);
+#endif
+ }
/* If we would blow our VMEM resource limit, no go */
if (map->size + grow_amount > vmemlim) {
vm_map_unlock_read(map);
@@ -3615,6 +3643,11 @@
PROC_LOCK(p);
error = racct_set(p, RACCT_VMEM, map->size);
KASSERT(error == 0, ("decreasing RACCT_VMEM failed"));
+ if (!old_mlock) {
+ error = racct_set(p, RACCT_MEMLOCK,
+ ptoa(vmspace_wired_count(p->p_vmspace)));
+ KASSERT(error == 0, ("decreasing RACCT_MEMLOCK failed"));
+ }
error = racct_set(p, RACCT_STACK, ctob(vm->vm_ssize));
KASSERT(error == 0, ("decreasing RACCT_STACK failed"));
PROC_UNLOCK(p);
Modified: trunk/sys/vm/vm_mmap.c
===================================================================
--- trunk/sys/vm/vm_mmap.c 2016-09-25 17:39:47 UTC (rev 8614)
+++ trunk/sys/vm/vm_mmap.c 2016-09-25 17:43:11 UTC (rev 8615)
@@ -59,6 +59,7 @@
#include <sys/racct.h>
#include <sys/resource.h>
#include <sys/resourcevar.h>
+#include <sys/sysctl.h>
#include <sys/vnode.h>
#include <sys/fcntl.h>
#include <sys/file.h>
@@ -87,6 +88,11 @@
#include <sys/pmckern.h>
#endif
+int old_mlock = 0;
+SYSCTL_INT(_vm, OID_AUTO, old_mlock, CTLFLAG_RW | CTLFLAG_TUN, &old_mlock, 0,
+ "Do not apply RLIMIT_MEMLOCK on mlockall");
+TUNABLE_INT("vm.old_mlock", &old_mlock);
+
#ifndef _SYS_SYSPROTO_H_
struct sbrk_args {
int incr;
@@ -1100,27 +1106,25 @@
int error;
map = &td->td_proc->p_vmspace->vm_map;
- error = 0;
+ error = priv_check(td, PRIV_VM_MLOCK);
+ if (error)
+ return (error);
if ((uap->how == 0) || ((uap->how & ~(MCL_CURRENT|MCL_FUTURE)) != 0))
return (EINVAL);
-#if 0
/*
* If wiring all pages in the process would cause it to exceed
* a hard resource limit, return ENOMEM.
*/
- PROC_LOCK(td->td_proc);
- if (map->size > lim_cur(td->td_proc, RLIMIT_MEMLOCK)) {
+ if (!old_mlock && uap->how & MCL_CURRENT) {
+ PROC_LOCK(td->td_proc);
+ if (map->size > lim_cur(td->td_proc, RLIMIT_MEMLOCK)) {
+ PROC_UNLOCK(td->td_proc);
+ return (ENOMEM);
+ }
PROC_UNLOCK(td->td_proc);
- return (ENOMEM);
}
- PROC_UNLOCK(td->td_proc);
-#else
- error = priv_check(td, PRIV_VM_MLOCK);
- if (error)
- return (error);
-#endif
#ifdef RACCT
PROC_LOCK(td->td_proc);
error = racct_set(td->td_proc, RACCT_MEMLOCK, map->size);
@@ -1483,6 +1487,24 @@
PROC_LOCK(td->td_proc);
if (td->td_proc->p_vmspace->vm_map.size + size >
lim_cur(td->td_proc, RLIMIT_VMEM)) {
+ if (!old_mlock && map->flags & MAP_WIREFUTURE) {
+ if (ptoa(vmspace_wired_count(td->td_proc->p_vmspace)) +
+ size > lim_cur(td->td_proc, RLIMIT_MEMLOCK)) {
+ racct_set_force(td->td_proc, RACCT_VMEM,
+ map->size);
+ PROC_UNLOCK(td->td_proc);
+ return (ENOMEM);
+ }
+ error = racct_set(td->td_proc, RACCT_MEMLOCK,
+ ptoa(vmspace_wired_count(td->td_proc->p_vmspace)) +
+ size);
+ if (error != 0) {
+ racct_set_force(td->td_proc, RACCT_VMEM,
+ map->size);
+ PROC_UNLOCK(td->td_proc);
+ return (error);
+ }
+ }
PROC_UNLOCK(td->td_proc);
return (ENOMEM);
}
Modified: trunk/sys/vm/vm_unix.c
===================================================================
--- trunk/sys/vm/vm_unix.c 2016-09-25 17:39:47 UTC (rev 8614)
+++ trunk/sys/vm/vm_unix.c 2016-09-25 17:43:11 UTC (rev 8615)
@@ -77,7 +77,7 @@
{
struct vmspace *vm = td->td_proc->p_vmspace;
vm_offset_t new, old, base;
- rlim_t datalim, vmemlim;
+ rlim_t datalim, lmemlim, vmemlim;
int prot, rv;
int error = 0;
boolean_t do_map_wirefuture;
@@ -84,6 +84,7 @@
PROC_LOCK(td->td_proc);
datalim = lim_cur(td->td_proc, RLIMIT_DATA);
+ lmemlim = lim_cur(td->td_proc, RLIMIT_MEMLOCK);
vmemlim = lim_cur(td->td_proc, RLIMIT_VMEM);
PROC_UNLOCK(td->td_proc);
@@ -116,6 +117,13 @@
goto done;
}
if (new > old) {
+ if (!old_mlock && vm->vm_map.flags & MAP_WIREFUTURE) {
+ if (ptoa(vmspace_wired_count(td->td_proc->p_vmspace)) +
+ (new - old) > lmemlim) {
+ error = ENOMEM;
+ goto done;
+ }
+ }
if (vm->vm_map.size + (new - old) > vmemlim) {
error = ENOMEM;
goto done;
@@ -136,6 +144,20 @@
error = ENOMEM;
goto done;
}
+ if (!old_mlock && vm->vm_map.flags & MAP_WIREFUTURE) {
+ error = racct_set(td->td_proc, RACCT_MEMLOCK,
+ ptoa(vmspace_wired_count(td->td_proc->p_vmspace)) +
+ (new - old));
+ if (error != 0) {
+ racct_set_force(td->td_proc, RACCT_DATA,
+ old - base);
+ racct_set_force(td->td_proc, RACCT_VMEM,
+ vm->vm_map.size);
+ PROC_UNLOCK(td->td_proc);
+ error = ENOMEM;
+ goto done;
+ }
+ }
PROC_UNLOCK(td->td_proc);
#endif
prot = VM_PROT_RW;
@@ -152,6 +174,11 @@
PROC_LOCK(td->td_proc);
racct_set_force(td->td_proc, RACCT_DATA, old - base);
racct_set_force(td->td_proc, RACCT_VMEM, vm->vm_map.size);
+ if (!old_mlock && vm->vm_map.flags & MAP_WIREFUTURE) {
+ racct_set_force(td->td_proc, RACCT_MEMLOCK,
+ ptoa(vmspace_wired_count(
+ td->td_proc->p_vmspace)));
+ }
PROC_UNLOCK(td->td_proc);
#endif
error = ENOMEM;
@@ -183,6 +210,10 @@
PROC_LOCK(td->td_proc);
racct_set_force(td->td_proc, RACCT_DATA, new - base);
racct_set_force(td->td_proc, RACCT_VMEM, vm->vm_map.size);
+ if (!old_mlock && vm->vm_map.flags & MAP_WIREFUTURE) {
+ racct_set_force(td->td_proc, RACCT_MEMLOCK,
+ ptoa(vmspace_wired_count(td->td_proc->p_vmspace)));
+ }
PROC_UNLOCK(td->td_proc);
#endif
}
More information about the Midnightbsd-cvs
mailing list