[Midnightbsd-cvs] src [9950] trunk/sys/kern: sync with freebsd
laffer1 at midnightbsd.org
laffer1 at midnightbsd.org
Fri May 25 17:07:10 EDT 2018
Revision: 9950
http://svnweb.midnightbsd.org/src/?rev=9950
Author: laffer1
Date: 2018-05-25 17:07:09 -0400 (Fri, 25 May 2018)
Log Message:
-----------
sync with freebsd
Modified Paths:
--------------
trunk/sys/kern/kern_time.c
trunk/sys/kern/kern_timeout.c
trunk/sys/kern/kern_umtx.c
trunk/sys/kern/kern_uuid.c
trunk/sys/kern/ksched.c
trunk/sys/kern/link_elf.c
trunk/sys/kern/link_elf_obj.c
trunk/sys/kern/md4c.c
trunk/sys/kern/md5c.c
trunk/sys/kern/p1003_1b.c
trunk/sys/kern/posix4_mib.c
trunk/sys/kern/sched_4bsd.c
trunk/sys/kern/sched_ule.c
trunk/sys/kern/stack_protector.c
trunk/sys/kern/subr_acl_nfs4.c
trunk/sys/kern/subr_blist.c
trunk/sys/kern/subr_bufring.c
trunk/sys/kern/subr_bus.c
trunk/sys/kern/subr_clock.c
trunk/sys/kern/subr_devstat.c
trunk/sys/kern/subr_disk.c
trunk/sys/kern/subr_dummy_vdso_tc.c
trunk/sys/kern/subr_eventhandler.c
trunk/sys/kern/subr_fattime.c
trunk/sys/kern/subr_firmware.c
trunk/sys/kern/subr_hash.c
trunk/sys/kern/subr_hints.c
trunk/sys/kern/subr_kdb.c
trunk/sys/kern/subr_kobj.c
trunk/sys/kern/subr_lock.c
trunk/sys/kern/subr_log.c
trunk/sys/kern/subr_mbpool.c
trunk/sys/kern/subr_mchain.c
trunk/sys/kern/subr_module.c
trunk/sys/kern/subr_msgbuf.c
Added Paths:
-----------
trunk/sys/kern/subr_bus_dma.c
trunk/sys/kern/subr_busdma_bufalloc.c
trunk/sys/kern/subr_counter.c
trunk/sys/kern/subr_dnvlist.c
trunk/sys/kern/subr_nvlist.c
trunk/sys/kern/subr_nvpair.c
Modified: trunk/sys/kern/kern_time.c
===================================================================
--- trunk/sys/kern/kern_time.c 2018-05-25 20:59:46 UTC (rev 9949)
+++ trunk/sys/kern/kern_time.c 2018-05-25 21:07:09 UTC (rev 9950)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
/*-
* Copyright (c) 1982, 1986, 1989, 1993
* The Regents of the University of California. All rights reserved.
@@ -30,8 +31,10 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$MidnightBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/kern/kern_time.c 330422 2018-03-04 23:31:25Z bdrewery $");
+#include "opt_ktrace.h"
+
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/limits.h>
@@ -43,6 +46,7 @@
#include <sys/resourcevar.h>
#include <sys/signalvar.h>
#include <sys/kernel.h>
+#include <sys/sleepqueue.h>
#include <sys/syscallsubr.h>
#include <sys/sysctl.h>
#include <sys/sysent.h>
@@ -53,11 +57,20 @@
#include <sys/timers.h>
#include <sys/timetc.h>
#include <sys/vnode.h>
+#ifdef KTRACE
+#include <sys/ktrace.h>
+#endif
#include <vm/vm.h>
#include <vm/vm_extern.h>
#define MAX_CLOCKS (CLOCK_MONOTONIC+1)
+#define CPUCLOCK_BIT 0x80000000
+#define CPUCLOCK_PROCESS_BIT 0x40000000
+#define CPUCLOCK_ID_MASK (~(CPUCLOCK_BIT|CPUCLOCK_PROCESS_BIT))
+#define MAKE_THREAD_CPUCLOCK(tid) (CPUCLOCK_BIT|(tid))
+#define MAKE_PROCESS_CPUCLOCK(pid) \
+ (CPUCLOCK_BIT|CPUCLOCK_PROCESS_BIT|(pid))
static struct kclock posix_clocks[MAX_CLOCKS];
static uma_zone_t itimer_zone = NULL;
@@ -91,9 +104,6 @@
static int realtimer_delete(struct itimer *);
static void realtimer_clocktime(clockid_t, struct timespec *);
static void realtimer_expire(void *);
-static int kern_timer_create(struct thread *, clockid_t,
- struct sigevent *, int *, int);
-static int kern_timer_delete(struct thread *, int);
int register_posix_clock(int, struct kclock *);
void itimer_fire(struct itimer *it);
@@ -165,6 +175,57 @@
}
#ifndef _SYS_SYSPROTO_H_
+struct clock_getcpuclockid2_args {
+ id_t id;
+ int which,
+ clockid_t *clock_id;
+};
+#endif
+/* ARGSUSED */
+int
+sys_clock_getcpuclockid2(struct thread *td, struct clock_getcpuclockid2_args *uap)
+{
+ clockid_t clk_id;
+ int error;
+
+ error = kern_clock_getcpuclockid2(td, uap->id, uap->which, &clk_id);
+ if (error == 0)
+ error = copyout(&clk_id, uap->clock_id, sizeof(clockid_t));
+ return (error);
+}
+
+int
+kern_clock_getcpuclockid2(struct thread *td, id_t id, int which,
+ clockid_t *clk_id)
+{
+ struct proc *p;
+ pid_t pid;
+ lwpid_t tid;
+ int error;
+
+ switch (which) {
+ case CPUCLOCK_WHICH_PID:
+ if (id != 0) {
+ error = pget(id, PGET_CANSEE | PGET_NOTID, &p);
+ if (error != 0)
+ return (error);
+ PROC_UNLOCK(p);
+ pid = id;
+ } else {
+ pid = td->td_proc->p_pid;
+ }
+ *clk_id = MAKE_PROCESS_CPUCLOCK(pid);
+ return (0);
+ case CPUCLOCK_WHICH_TID:
+ tid = id == 0 ? td->td_tid : id;
+ *clk_id = MAKE_THREAD_CPUCLOCK(tid);
+ return (0);
+ default:
+ return (EINVAL);
+ }
+}
+
+#ifndef _SYS_SYSPROTO_H_
struct clock_gettime_args {
clockid_t clock_id;
struct timespec *tp;
@@ -184,12 +245,80 @@
return (error);
}
+static inline void
+cputick2timespec(uint64_t runtime, struct timespec *ats)
+{
+ runtime = cputick2usec(runtime);
+ ats->tv_sec = runtime / 1000000;
+ ats->tv_nsec = runtime % 1000000 * 1000;
+}
+
+static void
+get_thread_cputime(struct thread *targettd, struct timespec *ats)
+{
+ uint64_t runtime, curtime, switchtime;
+
+ if (targettd == NULL) { /* current thread */
+ critical_enter();
+ switchtime = PCPU_GET(switchtime);
+ curtime = cpu_ticks();
+ runtime = curthread->td_runtime;
+ critical_exit();
+ runtime += curtime - switchtime;
+ } else {
+ thread_lock(targettd);
+ runtime = targettd->td_runtime;
+ thread_unlock(targettd);
+ }
+ cputick2timespec(runtime, ats);
+}
+
+static void
+get_process_cputime(struct proc *targetp, struct timespec *ats)
+{
+ uint64_t runtime;
+ struct rusage ru;
+
+ PROC_STATLOCK(targetp);
+ rufetch(targetp, &ru);
+ runtime = targetp->p_rux.rux_runtime;
+ PROC_STATUNLOCK(targetp);
+ cputick2timespec(runtime, ats);
+}
+
+static int
+get_cputime(struct thread *td, clockid_t clock_id, struct timespec *ats)
+{
+ struct proc *p, *p2;
+ struct thread *td2;
+ lwpid_t tid;
+ pid_t pid;
+ int error;
+
+ p = td->td_proc;
+ if ((clock_id & CPUCLOCK_PROCESS_BIT) == 0) {
+ tid = clock_id & CPUCLOCK_ID_MASK;
+ td2 = tdfind(tid, p->p_pid);
+ if (td2 == NULL)
+ return (EINVAL);
+ get_thread_cputime(td2, ats);
+ PROC_UNLOCK(td2->td_proc);
+ } else {
+ pid = clock_id & CPUCLOCK_ID_MASK;
+ error = pget(pid, PGET_CANSEE, &p2);
+ if (error != 0)
+ return (EINVAL);
+ get_process_cputime(p2, ats);
+ PROC_UNLOCK(p2);
+ }
+ return (0);
+}
+
int
kern_clock_gettime(struct thread *td, clockid_t clock_id, struct timespec *ats)
{
struct timeval sys, user;
struct proc *p;
- uint64_t runtime, curtime, switchtime;
p = td->td_proc;
switch (clock_id) {
@@ -202,17 +331,17 @@
break;
case CLOCK_VIRTUAL:
PROC_LOCK(p);
- PROC_SLOCK(p);
+ PROC_STATLOCK(p);
calcru(p, &user, &sys);
- PROC_SUNLOCK(p);
+ PROC_STATUNLOCK(p);
PROC_UNLOCK(p);
TIMEVAL_TO_TIMESPEC(&user, ats);
break;
case CLOCK_PROF:
PROC_LOCK(p);
- PROC_SLOCK(p);
+ PROC_STATLOCK(p);
calcru(p, &user, &sys);
- PROC_SUNLOCK(p);
+ PROC_STATUNLOCK(p);
PROC_UNLOCK(p);
timevaladd(&user, &sys);
TIMEVAL_TO_TIMESPEC(&user, ats);
@@ -232,17 +361,17 @@
ats->tv_nsec = 0;
break;
case CLOCK_THREAD_CPUTIME_ID:
- critical_enter();
- switchtime = PCPU_GET(switchtime);
- curtime = cpu_ticks();
- runtime = td->td_runtime;
- critical_exit();
- runtime = cputick2usec(runtime + curtime - switchtime);
- ats->tv_sec = runtime / 1000000;
- ats->tv_nsec = runtime % 1000000 * 1000;
+ get_thread_cputime(NULL, ats);
break;
+ case CLOCK_PROCESS_CPUTIME_ID:
+ PROC_LOCK(p);
+ get_process_cputime(p, ats);
+ PROC_UNLOCK(p);
+ break;
default:
- return (EINVAL);
+ if ((int)clock_id >= 0)
+ return (EINVAL);
+ return (get_cputime(td, clock_id, ats));
}
return (0);
}
@@ -336,6 +465,8 @@
ts->tv_nsec = 0;
break;
case CLOCK_THREAD_CPUTIME_ID:
+ case CLOCK_PROCESS_CPUTIME_ID:
+ cputime:
/* sync with cputick2usec */
ts->tv_nsec = 1000000 / cpu_tickrate();
if (ts->tv_nsec == 0)
@@ -342,18 +473,21 @@
ts->tv_nsec = 1000;
break;
default:
+ if ((int)clock_id < 0)
+ goto cputime;
return (EINVAL);
}
return (0);
}
-static int nanowait;
+static uint8_t nanowait[MAXCPU];
int
kern_nanosleep(struct thread *td, struct timespec *rqt, struct timespec *rmt)
{
- struct timespec ts, ts2, ts3;
- struct timeval tv;
+ struct timespec ts;
+ sbintime_t sbt, sbtt, prec, tmp;
+ time_t over;
int error;
if (rqt->tv_nsec < 0 || rqt->tv_nsec >= 1000000000)
@@ -360,30 +494,37 @@
return (EINVAL);
if (rqt->tv_sec < 0 || (rqt->tv_sec == 0 && rqt->tv_nsec == 0))
return (0);
- getnanouptime(&ts);
- timespecadd(&ts, rqt);
- TIMESPEC_TO_TIMEVAL(&tv, rqt);
- for (;;) {
- error = tsleep(&nanowait, PWAIT | PCATCH, "nanslp",
- tvtohz(&tv));
- getnanouptime(&ts2);
- if (error != EWOULDBLOCK) {
- if (error == ERESTART)
- error = EINTR;
- if (rmt != NULL) {
- timespecsub(&ts, &ts2);
- if (ts.tv_sec < 0)
- timespecclear(&ts);
- *rmt = ts;
- }
- return (error);
+ ts = *rqt;
+ if (ts.tv_sec > INT32_MAX / 2) {
+ over = ts.tv_sec - INT32_MAX / 2;
+ ts.tv_sec -= over;
+ } else
+ over = 0;
+ tmp = tstosbt(ts);
+ prec = tmp;
+ prec >>= tc_precexp;
+ if (TIMESEL(&sbt, tmp))
+ sbt += tc_tick_sbt;
+ sbt += tmp;
+ error = tsleep_sbt(&nanowait[curcpu], PWAIT | PCATCH, "nanslp",
+ sbt, prec, C_ABSOLUTE);
+ if (error != EWOULDBLOCK) {
+ if (error == ERESTART)
+ error = EINTR;
+ if (TIMESEL(&sbtt, tmp))
+ sbtt += tc_tick_sbt;
+ if (rmt != NULL) {
+ ts = sbttots(sbt - sbtt);
+ ts.tv_sec += over;
+ if (ts.tv_sec < 0)
+ timespecclear(&ts);
+ *rmt = ts;
}
- if (timespeccmp(&ts2, &ts, >=))
+ if (sbtt >= sbt)
return (0);
- ts3 = ts;
- timespecsub(&ts3, &ts2);
- TIMESPEC_TO_TIMEVAL(&tv, &ts3);
+ return (error);
}
+ return (0);
}
#ifndef _SYS_SYSPROTO_H_
@@ -407,7 +548,7 @@
!useracc((caddr_t)uap->rmtp, sizeof(rmt), VM_PROT_WRITE))
return (EFAULT);
error = kern_nanosleep(td, &rqt, &rmt);
- if (error && uap->rmtp) {
+ if (error == EINTR && uap->rmtp) {
int error2;
error2 = copyout(&rmt, uap->rmtp, sizeof(rmt));
@@ -554,7 +695,7 @@
*aitv = p->p_realtimer;
PROC_UNLOCK(p);
if (timevalisset(&aitv->it_value)) {
- getmicrouptime(&ctv);
+ microuptime(&ctv);
if (timevalcmp(&aitv->it_value, &ctv, <))
timevalclear(&aitv->it_value);
else
@@ -561,10 +702,14 @@
timevalsub(&aitv->it_value, &ctv);
}
} else {
- PROC_SLOCK(p);
+ PROC_ITIMLOCK(p);
*aitv = p->p_stats->p_timer[which];
- PROC_SUNLOCK(p);
+ PROC_ITIMUNLOCK(p);
}
+#ifdef KTRACE
+ if (KTRPOINT(td, KTR_STRUCT))
+ ktritimerval(aitv);
+#endif
return (0);
}
@@ -599,6 +744,7 @@
{
struct proc *p = td->td_proc;
struct timeval ctv;
+ sbintime_t sbt, pr;
if (aitv == NULL)
return (kern_getitimer(td, which, oitv));
@@ -605,11 +751,17 @@
if (which > ITIMER_PROF)
return (EINVAL);
- if (itimerfix(&aitv->it_value))
+#ifdef KTRACE
+ if (KTRPOINT(td, KTR_STRUCT))
+ ktritimerval(aitv);
+#endif
+ if (itimerfix(&aitv->it_value) ||
+ aitv->it_value.tv_sec > INT32_MAX / 2)
return (EINVAL);
if (!timevalisset(&aitv->it_value))
timevalclear(&aitv->it_interval);
- else if (itimerfix(&aitv->it_interval))
+ else if (itimerfix(&aitv->it_interval) ||
+ aitv->it_interval.tv_sec > INT32_MAX / 2)
return (EINVAL);
if (which == ITIMER_REAL) {
@@ -616,11 +768,13 @@
PROC_LOCK(p);
if (timevalisset(&p->p_realtimer.it_value))
callout_stop(&p->p_itcallout);
- getmicrouptime(&ctv);
+ microuptime(&ctv);
if (timevalisset(&aitv->it_value)) {
- callout_reset(&p->p_itcallout, tvtohz(&aitv->it_value),
- realitexpire, p);
+ pr = tvtosbt(aitv->it_value) >> tc_precexp;
timevaladd(&aitv->it_value, &ctv);
+ sbt = tvtosbt(aitv->it_value);
+ callout_reset_sbt(&p->p_itcallout, sbt, pr,
+ realitexpire, p, C_ABSOLUTE);
}
*oitv = p->p_realtimer;
p->p_realtimer = *aitv;
@@ -632,11 +786,23 @@
timevalsub(&oitv->it_value, &ctv);
}
} else {
- PROC_SLOCK(p);
+ if (aitv->it_interval.tv_sec == 0 &&
+ aitv->it_interval.tv_usec != 0 &&
+ aitv->it_interval.tv_usec < tick)
+ aitv->it_interval.tv_usec = tick;
+ if (aitv->it_value.tv_sec == 0 &&
+ aitv->it_value.tv_usec != 0 &&
+ aitv->it_value.tv_usec < tick)
+ aitv->it_value.tv_usec = tick;
+ PROC_ITIMLOCK(p);
*oitv = p->p_stats->p_timer[which];
p->p_stats->p_timer[which] = *aitv;
- PROC_SUNLOCK(p);
+ PROC_ITIMUNLOCK(p);
}
+#ifdef KTRACE
+ if (KTRPOINT(td, KTR_STRUCT))
+ ktritimerval(oitv);
+#endif
return (0);
}
@@ -656,7 +822,8 @@
realitexpire(void *arg)
{
struct proc *p;
- struct timeval ctv, ntv;
+ struct timeval ctv;
+ sbintime_t isbt;
p = (struct proc *)arg;
kern_psignal(p, SIGALRM);
@@ -666,19 +833,17 @@
wakeup(&p->p_itcallout);
return;
}
- for (;;) {
+ isbt = tvtosbt(p->p_realtimer.it_interval);
+ if (isbt >= sbt_timethreshold)
+ getmicrouptime(&ctv);
+ else
+ microuptime(&ctv);
+ do {
timevaladd(&p->p_realtimer.it_value,
&p->p_realtimer.it_interval);
- getmicrouptime(&ctv);
- if (timevalcmp(&p->p_realtimer.it_value, &ctv, >)) {
- ntv = p->p_realtimer.it_value;
- timevalsub(&ntv, &ctv);
- callout_reset(&p->p_itcallout, tvtohz(&ntv) - 1,
- realitexpire, p);
- return;
- }
- }
- /*NOTREACHED*/
+ } while (timevalcmp(&p->p_realtimer.it_value, &ctv, <=));
+ callout_reset_sbt(&p->p_itcallout, tvtosbt(p->p_realtimer.it_value),
+ isbt >> tc_precexp, realitexpire, p, C_ABSOLUTE);
}
/*
@@ -693,8 +858,9 @@
if (tv->tv_sec < 0 || tv->tv_usec < 0 || tv->tv_usec >= 1000000)
return (EINVAL);
- if (tv->tv_sec == 0 && tv->tv_usec != 0 && tv->tv_usec < tick)
- tv->tv_usec = tick;
+ if (tv->tv_sec == 0 && tv->tv_usec != 0 &&
+ tv->tv_usec < (u_int)tick / 16)
+ tv->tv_usec = (u_int)tick / 16;
return (0);
}
@@ -835,7 +1001,7 @@
return (maxpps != 0);
} else {
(*curpps)++; /* NB: ignore potential overflow */
- return (maxpps < 0 || *curpps < maxpps);
+ return (maxpps < 0 || *curpps <= maxpps);
}
}
@@ -922,31 +1088,30 @@
int
sys_ktimer_create(struct thread *td, struct ktimer_create_args *uap)
{
- struct sigevent *evp1, ev;
+ struct sigevent *evp, ev;
int id;
int error;
- if (uap->evp != NULL) {
+ if (uap->evp == NULL) {
+ evp = NULL;
+ } else {
error = copyin(uap->evp, &ev, sizeof(ev));
if (error != 0)
return (error);
- evp1 = &ev;
- } else
- evp1 = NULL;
-
- error = kern_timer_create(td, uap->clock_id, evp1, &id, -1);
-
+ evp = &ev;
+ }
+ error = kern_ktimer_create(td, uap->clock_id, evp, &id, -1);
if (error == 0) {
error = copyout(&id, uap->timerid, sizeof(int));
if (error != 0)
- kern_timer_delete(td, id);
+ kern_ktimer_delete(td, id);
}
return (error);
}
-static int
-kern_timer_create(struct thread *td, clockid_t clock_id,
- struct sigevent *evp, int *timerid, int preset_id)
+int
+kern_ktimer_create(struct thread *td, clockid_t clock_id, struct sigevent *evp,
+ int *timerid, int preset_id)
{
struct proc *p = td->td_proc;
struct itimer *it;
@@ -1061,7 +1226,8 @@
int
sys_ktimer_delete(struct thread *td, struct ktimer_delete_args *uap)
{
- return (kern_timer_delete(td, uap->timerid));
+
+ return (kern_ktimer_delete(td, uap->timerid));
}
static struct itimer *
@@ -1083,8 +1249,8 @@
return (it);
}
-static int
-kern_timer_delete(struct thread *td, int timerid)
+int
+kern_ktimer_delete(struct thread *td, int timerid)
{
struct proc *p = td->td_proc;
struct itimer *it;
@@ -1126,8 +1292,6 @@
int
sys_ktimer_settime(struct thread *td, struct ktimer_settime_args *uap)
{
- struct proc *p = td->td_proc;
- struct itimer *it;
struct itimerspec val, oval, *ovalp;
int error;
@@ -1134,27 +1298,34 @@
error = copyin(uap->value, &val, sizeof(val));
if (error != 0)
return (error);
-
- if (uap->ovalue != NULL)
- ovalp = &oval;
- else
- ovalp = NULL;
+ ovalp = uap->ovalue != NULL ? &oval : NULL;
+ error = kern_ktimer_settime(td, uap->timerid, uap->flags, &val, ovalp);
+ if (error == 0 && uap->ovalue != NULL)
+ error = copyout(ovalp, uap->ovalue, sizeof(*ovalp));
+ return (error);
+}
+int
+kern_ktimer_settime(struct thread *td, int timer_id, int flags,
+ struct itimerspec *val, struct itimerspec *oval)
+{
+ struct proc *p;
+ struct itimer *it;
+ int error;
+
+ p = td->td_proc;
PROC_LOCK(p);
- if (uap->timerid < 3 ||
- (it = itimer_find(p, uap->timerid)) == NULL) {
+ if (timer_id < 3 || (it = itimer_find(p, timer_id)) == NULL) {
PROC_UNLOCK(p);
error = EINVAL;
} else {
PROC_UNLOCK(p);
itimer_enter(it);
- error = CLOCK_CALL(it->it_clockid, timer_settime,
- (it, uap->flags, &val, ovalp));
+ error = CLOCK_CALL(it->it_clockid, timer_settime, (it,
+ flags, val, oval));
itimer_leave(it);
ITIMER_UNLOCK(it);
}
- if (error == 0 && uap->ovalue != NULL)
- error = copyout(ovalp, uap->ovalue, sizeof(*ovalp));
return (error);
}
@@ -1167,26 +1338,34 @@
int
sys_ktimer_gettime(struct thread *td, struct ktimer_gettime_args *uap)
{
- struct proc *p = td->td_proc;
- struct itimer *it;
struct itimerspec val;
int error;
+ error = kern_ktimer_gettime(td, uap->timerid, &val);
+ if (error == 0)
+ error = copyout(&val, uap->value, sizeof(val));
+ return (error);
+}
+
+int
+kern_ktimer_gettime(struct thread *td, int timer_id, struct itimerspec *val)
+{
+ struct proc *p;
+ struct itimer *it;
+ int error;
+
+ p = td->td_proc;
PROC_LOCK(p);
- if (uap->timerid < 3 ||
- (it = itimer_find(p, uap->timerid)) == NULL) {
+ if (timer_id < 3 || (it = itimer_find(p, timer_id)) == NULL) {
PROC_UNLOCK(p);
error = EINVAL;
} else {
PROC_UNLOCK(p);
itimer_enter(it);
- error = CLOCK_CALL(it->it_clockid, timer_gettime,
- (it, &val));
+ error = CLOCK_CALL(it->it_clockid, timer_gettime, (it, val));
itimer_leave(it);
ITIMER_UNLOCK(it);
}
- if (error == 0)
- error = copyout(&val, uap->value, sizeof(val));
return (error);
}
@@ -1198,13 +1377,20 @@
int
sys_ktimer_getoverrun(struct thread *td, struct ktimer_getoverrun_args *uap)
{
+
+ return (kern_ktimer_getoverrun(td, uap->timerid));
+}
+
+int
+kern_ktimer_getoverrun(struct thread *td, int timer_id)
+{
struct proc *p = td->td_proc;
struct itimer *it;
int error ;
PROC_LOCK(p);
- if (uap->timerid < 3 ||
- (it = itimer_find(p, uap->timerid)) == NULL) {
+ if (timer_id < 3 ||
+ (it = itimer_find(p, timer_id)) == NULL) {
PROC_UNLOCK(p);
error = EINVAL;
} else {
@@ -1481,7 +1667,7 @@
panic("unhandled event");
for (; i < TIMER_MAX; ++i) {
if ((it = its->its_timers[i]) != NULL)
- kern_timer_delete(curthread, i);
+ kern_ktimer_delete(curthread, i);
}
if (its->its_timers[0] == NULL &&
its->its_timers[1] == NULL &&
Modified: trunk/sys/kern/kern_timeout.c
===================================================================
--- trunk/sys/kern/kern_timeout.c 2018-05-25 20:59:46 UTC (rev 9949)
+++ trunk/sys/kern/kern_timeout.c 2018-05-25 21:07:09 UTC (rev 9950)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
/*-
* Copyright (c) 1982, 1986, 1991, 1993
* The Regents of the University of California. All rights reserved.
@@ -35,15 +36,20 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$MidnightBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/kern/kern_timeout.c 305853 2016-09-16 00:14:26Z hiren $");
+#include "opt_callout_profiling.h"
#include "opt_kdtrace.h"
+#include "opt_ddb.h"
+#if defined(__arm__)
+#include "opt_timer.h"
+#endif
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/bus.h>
#include <sys/callout.h>
-#include <sys/condvar.h>
+#include <sys/file.h>
#include <sys/interrupt.h>
#include <sys/kernel.h>
#include <sys/ktr.h>
@@ -56,18 +62,24 @@
#include <sys/sysctl.h>
#include <sys/smp.h>
+#ifdef DDB
+#include <ddb/ddb.h>
+#include <machine/_inttypes.h>
+#endif
+
#ifdef SMP
#include <machine/cpu.h>
#endif
+#ifndef NO_EVENTTIMERS
+DPCPU_DECLARE(sbintime_t, hardclocktime);
+#endif
+
SDT_PROVIDER_DEFINE(callout_execute);
-SDT_PROBE_DEFINE(callout_execute, kernel, , callout_start, callout-start);
-SDT_PROBE_ARGTYPE(callout_execute, kernel, , callout_start, 0,
- "struct callout *");
-SDT_PROBE_DEFINE(callout_execute, kernel, , callout_end, callout-end);
-SDT_PROBE_ARGTYPE(callout_execute, kernel, , callout_end, 0,
- "struct callout *");
+SDT_PROBE_DEFINE1(callout_execute, , , callout__start, "struct callout *");
+SDT_PROBE_DEFINE1(callout_execute, , , callout__end, "struct callout *");
+#ifdef CALLOUT_PROFILING
static int avg_depth;
SYSCTL_INT(_debug, OID_AUTO, to_avg_depth, CTLFLAG_RD, &avg_depth, 0,
"Average number of items examined per softclock call. Units = 1/1000");
@@ -80,65 +92,85 @@
static int avg_mpcalls;
SYSCTL_INT(_debug, OID_AUTO, to_avg_mpcalls, CTLFLAG_RD, &avg_mpcalls, 0,
"Average number of MP callouts made per softclock call. Units = 1/1000");
+static int avg_depth_dir;
+SYSCTL_INT(_debug, OID_AUTO, to_avg_depth_dir, CTLFLAG_RD, &avg_depth_dir, 0,
+ "Average number of direct callouts examined per callout_process call. "
+ "Units = 1/1000");
+static int avg_lockcalls_dir;
+SYSCTL_INT(_debug, OID_AUTO, to_avg_lockcalls_dir, CTLFLAG_RD,
+ &avg_lockcalls_dir, 0, "Average number of lock direct callouts made per "
+ "callout_process call. Units = 1/1000");
+static int avg_mpcalls_dir;
+SYSCTL_INT(_debug, OID_AUTO, to_avg_mpcalls_dir, CTLFLAG_RD, &avg_mpcalls_dir,
+ 0, "Average number of MP direct callouts made per callout_process call. "
+ "Units = 1/1000");
+#endif
+
+static int ncallout;
+SYSCTL_INT(_kern, OID_AUTO, ncallout, CTLFLAG_RDTUN, &ncallout, 0,
+ "Number of entries in callwheel and size of timeout() preallocation");
+
/*
* TODO:
* allocate more timeout table slots when table overflows.
*/
-int callwheelsize, callwheelbits, callwheelmask;
+u_int callwheelsize, callwheelmask;
/*
- * The callout cpu migration entity represents informations necessary for
- * describing the migrating callout to the new callout cpu.
+ * The callout cpu exec entities represent informations necessary for
+ * describing the state of callouts currently running on the CPU and the ones
+ * necessary for migrating callouts to the new callout cpu. In particular,
+ * the first entry of the array cc_exec_entity holds informations for callout
+ * running in SWI thread context, while the second one holds informations
+ * for callout running directly from hardware interrupt context.
* The cached informations are very important for deferring migration when
* the migrating callout is already running.
*/
-struct cc_mig_ent {
+struct cc_exec {
+ struct callout *cc_curr;
#ifdef SMP
- void (*ce_migration_func)(void *);
- void *ce_migration_arg;
- int ce_migration_cpu;
- int ce_migration_ticks;
+ void (*ce_migration_func)(void *);
+ void *ce_migration_arg;
+ int ce_migration_cpu;
+ sbintime_t ce_migration_time;
+ sbintime_t ce_migration_prec;
#endif
+ bool cc_cancel;
+ bool cc_waiting;
};
-
+
/*
* There is one struct callout_cpu per cpu, holding all relevant
* state for the callout processing thread on the individual CPU.
- * In particular:
- * cc_ticks is incremented once per tick in callout_cpu().
- * It tracks the global 'ticks' but in a way that the individual
- * threads should not worry about races in the order in which
- * hardclock() and hardclock_cpu() run on the various CPUs.
- * cc_softclock is advanced in callout_cpu() to point to the
- * first entry in cc_callwheel that may need handling. In turn,
- * a softclock() is scheduled so it can serve the various entries i
- * such that cc_softclock <= i <= cc_ticks .
- * XXX maybe cc_softclock and cc_ticks should be volatile ?
- *
- * cc_ticks is also used in callout_reset_cpu() to determine
- * when the callout should be served.
*/
struct callout_cpu {
- struct cc_mig_ent cc_migrating_entity;
- struct mtx cc_lock;
+ struct mtx_padalign cc_lock;
+ struct cc_exec cc_exec_entity[2];
+ struct callout *cc_next;
struct callout *cc_callout;
- struct callout_tailq *cc_callwheel;
- struct callout_list cc_callfree;
- struct callout *cc_next;
- struct callout *cc_curr;
+ struct callout_list *cc_callwheel;
+ struct callout_tailq cc_expireq;
+ struct callout_slist cc_callfree;
+ sbintime_t cc_firstevent;
+ sbintime_t cc_lastscan;
void *cc_cookie;
- int cc_ticks;
- int cc_softticks;
- int cc_cancel;
- int cc_waiting;
- int cc_firsttick;
+ u_int cc_bucket;
+ u_int cc_inited;
+ char cc_ktr_event_name[20];
};
+#define callout_migrating(c) ((c)->c_iflags & CALLOUT_DFRMIGRATION)
+
+#define cc_exec_curr(cc, dir) cc->cc_exec_entity[dir].cc_curr
+#define cc_exec_next(cc) cc->cc_next
+#define cc_exec_cancel(cc, dir) cc->cc_exec_entity[dir].cc_cancel
+#define cc_exec_waiting(cc, dir) cc->cc_exec_entity[dir].cc_waiting
#ifdef SMP
-#define cc_migration_func cc_migrating_entity.ce_migration_func
-#define cc_migration_arg cc_migrating_entity.ce_migration_arg
-#define cc_migration_cpu cc_migrating_entity.ce_migration_cpu
-#define cc_migration_ticks cc_migrating_entity.ce_migration_ticks
+#define cc_migration_func(cc, dir) cc->cc_exec_entity[dir].ce_migration_func
+#define cc_migration_arg(cc, dir) cc->cc_exec_entity[dir].ce_migration_arg
+#define cc_migration_cpu(cc, dir) cc->cc_exec_entity[dir].ce_migration_cpu
+#define cc_migration_time(cc, dir) cc->cc_exec_entity[dir].ce_migration_time
+#define cc_migration_prec(cc, dir) cc->cc_exec_entity[dir].ce_migration_prec
struct callout_cpu cc_cpu[MAXCPU];
#define CPUBLOCK MAXCPU
@@ -154,39 +186,49 @@
#define CC_LOCK_ASSERT(cc) mtx_assert(&(cc)->cc_lock, MA_OWNED)
static int timeout_cpu;
-void (*callout_new_inserted)(int cpu, int ticks) = NULL;
+static void callout_cpu_init(struct callout_cpu *cc, int cpu);
+static void softclock_call_cc(struct callout *c, struct callout_cpu *cc,
+#ifdef CALLOUT_PROFILING
+ int *mpcalls, int *lockcalls, int *gcalls,
+#endif
+ int direct);
+
static MALLOC_DEFINE(M_CALLOUT, "callout", "Callout datastructures");
/**
* Locked by cc_lock:
- * cc_curr - If a callout is in progress, it is curr_callout.
- * If curr_callout is non-NULL, threads waiting in
+ * cc_curr - If a callout is in progress, it is cc_curr.
+ * If cc_curr is non-NULL, threads waiting in
* callout_drain() will be woken up as soon as the
* relevant callout completes.
- * cc_cancel - Changing to 1 with both callout_lock and c_lock held
+ * cc_cancel - Changing to 1 with both callout_lock and cc_lock held
* guarantees that the current callout will not run.
* The softclock() function sets this to 0 before it
* drops callout_lock to acquire c_lock, and it calls
* the handler only if curr_cancelled is still 0 after
- * c_lock is successfully acquired.
+ * cc_lock is successfully acquired.
* cc_waiting - If a thread is waiting in callout_drain(), then
* callout_wait is nonzero. Set only when
- * curr_callout is non-NULL.
+ * cc_curr is non-NULL.
*/
/*
- * Resets the migration entity tied to a specific callout cpu.
+ * Resets the execution entity tied to a specific callout cpu.
*/
static void
-cc_cme_cleanup(struct callout_cpu *cc)
+cc_cce_cleanup(struct callout_cpu *cc, int direct)
{
+ cc_exec_curr(cc, direct) = NULL;
+ cc_exec_cancel(cc, direct) = false;
+ cc_exec_waiting(cc, direct) = false;
#ifdef SMP
- cc->cc_migration_cpu = CPUBLOCK;
- cc->cc_migration_ticks = 0;
- cc->cc_migration_func = NULL;
- cc->cc_migration_arg = NULL;
+ cc_migration_cpu(cc, direct) = CPUBLOCK;
+ cc_migration_time(cc, direct) = 0;
+ cc_migration_prec(cc, direct) = 0;
+ cc_migration_func(cc, direct) = NULL;
+ cc_migration_arg(cc, direct) = NULL;
#endif
}
@@ -194,11 +236,11 @@
* Checks if migration is requested by a specific callout cpu.
*/
static int
-cc_cme_migrating(struct callout_cpu *cc)
+cc_cce_migrating(struct callout_cpu *cc, int direct)
{
#ifdef SMP
- return (cc->cc_migration_cpu != CPUBLOCK);
+ return (cc_migration_cpu(cc, direct) != CPUBLOCK);
#else
return (0);
#endif
@@ -205,36 +247,50 @@
}
/*
- * kern_timeout_callwheel_alloc() - kernel low level callwheel initialization
- *
- * This code is called very early in the kernel initialization sequence,
- * and may be called more then once.
+ * Kernel low level callwheel initialization
+ * called on cpu0 during kernel startup.
*/
-caddr_t
-kern_timeout_callwheel_alloc(caddr_t v)
+static void
+callout_callwheel_init(void *dummy)
{
struct callout_cpu *cc;
- timeout_cpu = PCPU_GET(cpuid);
- cc = CC_CPU(timeout_cpu);
/*
- * Calculate callout wheel size
+ * Calculate the size of the callout wheel and the preallocated
+ * timeout() structures.
+ * XXX: Clip callout to result of previous function of maxusers
+ * maximum 384. This is still huge, but acceptable.
*/
- for (callwheelsize = 1, callwheelbits = 0;
- callwheelsize < ncallout;
- callwheelsize <<= 1, ++callwheelbits)
- ;
+ memset(CC_CPU(0), 0, sizeof(cc_cpu));
+ ncallout = imin(16 + maxproc + maxfiles, 18508);
+ TUNABLE_INT_FETCH("kern.ncallout", &ncallout);
+
+ /*
+ * Calculate callout wheel size, should be next power of two higher
+ * than 'ncallout'.
+ */
+ callwheelsize = 1 << fls(ncallout);
callwheelmask = callwheelsize - 1;
- cc->cc_callout = (struct callout *)v;
- v = (caddr_t)(cc->cc_callout + ncallout);
- cc->cc_callwheel = (struct callout_tailq *)v;
- v = (caddr_t)(cc->cc_callwheel + callwheelsize);
- return(v);
+ /*
+ * Only cpu0 handles timeout(9) and receives a preallocation.
+ *
+ * XXX: Once all timeout(9) consumers are converted this can
+ * be removed.
+ */
+ timeout_cpu = PCPU_GET(cpuid);
+ cc = CC_CPU(timeout_cpu);
+ cc->cc_callout = malloc(ncallout * sizeof(struct callout),
+ M_CALLOUT, M_WAITOK);
+ callout_cpu_init(cc, timeout_cpu);
}
+SYSINIT(callwheel_init, SI_SUB_CPU, SI_ORDER_ANY, callout_callwheel_init, NULL);
+/*
+ * Initialize the per-cpu callout structures.
+ */
static void
-callout_cpu_init(struct callout_cpu *cc)
+callout_cpu_init(struct callout_cpu *cc, int cpu)
{
struct callout *c;
int i;
@@ -241,16 +297,23 @@
mtx_init(&cc->cc_lock, "callout", NULL, MTX_SPIN | MTX_RECURSE);
SLIST_INIT(&cc->cc_callfree);
- for (i = 0; i < callwheelsize; i++) {
- TAILQ_INIT(&cc->cc_callwheel[i]);
- }
- cc_cme_cleanup(cc);
- if (cc->cc_callout == NULL)
+ cc->cc_inited = 1;
+ cc->cc_callwheel = malloc(sizeof(struct callout_list) * callwheelsize,
+ M_CALLOUT, M_WAITOK);
+ for (i = 0; i < callwheelsize; i++)
+ LIST_INIT(&cc->cc_callwheel[i]);
+ TAILQ_INIT(&cc->cc_expireq);
+ cc->cc_firstevent = SBT_MAX;
+ for (i = 0; i < 2; i++)
+ cc_cce_cleanup(cc, i);
+ snprintf(cc->cc_ktr_event_name, sizeof(cc->cc_ktr_event_name),
+ "callwheel cpu %d", cpu);
+ if (cc->cc_callout == NULL) /* Only cpu0 handles timeout(9) */
return;
for (i = 0; i < ncallout; i++) {
c = &cc->cc_callout[i];
callout_init(c, 0);
- c->c_flags = CALLOUT_LOCAL_ALLOC;
+ c->c_iflags = CALLOUT_LOCAL_ALLOC;
SLIST_INSERT_HEAD(&cc->cc_callfree, c, c_links.sle);
}
}
@@ -286,19 +349,6 @@
#endif
/*
- * kern_timeout_callwheel_init() - initialize previously reserved callwheel
- * space.
- *
- * This code is called just once, after the space reserved for the
- * callout wheel has been finalized.
- */
-void
-kern_timeout_callwheel_init(void)
-{
- callout_cpu_init(CC_CPU(timeout_cpu));
-}
-
-/*
* Start standard softclock thread.
*/
static void
@@ -318,78 +368,159 @@
if (cpu == timeout_cpu)
continue;
cc = CC_CPU(cpu);
+ cc->cc_callout = NULL; /* Only cpu0 handles timeout(9). */
+ callout_cpu_init(cc, cpu);
if (swi_add(NULL, "clock", softclock, cc, SWI_CLOCK,
INTR_MPSAFE, &cc->cc_cookie))
panic("died while creating standard software ithreads");
- cc->cc_callout = NULL; /* Only cpu0 handles timeout(). */
- cc->cc_callwheel = malloc(
- sizeof(struct callout_tailq) * callwheelsize, M_CALLOUT,
- M_WAITOK);
- callout_cpu_init(cc);
}
#endif
}
-
SYSINIT(start_softclock, SI_SUB_SOFTINTR, SI_ORDER_FIRST, start_softclock, NULL);
+#define CC_HASH_SHIFT 8
+
+static inline u_int
+callout_hash(sbintime_t sbt)
+{
+
+ return (sbt >> (32 - CC_HASH_SHIFT));
+}
+
+static inline u_int
+callout_get_bucket(sbintime_t sbt)
+{
+
+ return (callout_hash(sbt) & callwheelmask);
+}
+
void
-callout_tick(void)
+callout_process(sbintime_t now)
{
+ struct callout *tmp, *tmpn;
struct callout_cpu *cc;
- int need_softclock;
- int bucket;
+ struct callout_list *sc;
+ sbintime_t first, last, max, tmp_max;
+ uint32_t lookahead;
+ u_int firstb, lastb, nowb;
+#ifdef CALLOUT_PROFILING
+ int depth_dir = 0, mpcalls_dir = 0, lockcalls_dir = 0;
+#endif
+ cc = CC_SELF();
+ mtx_lock_spin_flags(&cc->cc_lock, MTX_QUIET);
+
+ /* Compute the buckets of the last scan and present times. */
+ firstb = callout_hash(cc->cc_lastscan);
+ cc->cc_lastscan = now;
+ nowb = callout_hash(now);
+
+ /* Compute the last bucket and minimum time of the bucket after it. */
+ if (nowb == firstb)
+ lookahead = (SBT_1S / 16);
+ else if (nowb - firstb == 1)
+ lookahead = (SBT_1S / 8);
+ else
+ lookahead = (SBT_1S / 2);
+ first = last = now;
+ first += (lookahead / 2);
+ last += lookahead;
+ last &= (0xffffffffffffffffLLU << (32 - CC_HASH_SHIFT));
+ lastb = callout_hash(last) - 1;
+ max = last;
+
/*
- * Process callouts at a very low cpu priority, so we don't keep the
- * relatively high clock interrupt priority any longer than necessary.
+ * Check if we wrapped around the entire wheel from the last scan.
+ * In case, we need to scan entirely the wheel for pending callouts.
*/
- need_softclock = 0;
- cc = CC_SELF();
- mtx_lock_spin_flags(&cc->cc_lock, MTX_QUIET);
- cc->cc_firsttick = cc->cc_ticks = ticks;
- for (; (cc->cc_softticks - cc->cc_ticks) <= 0; cc->cc_softticks++) {
- bucket = cc->cc_softticks & callwheelmask;
- if (!TAILQ_EMPTY(&cc->cc_callwheel[bucket])) {
- need_softclock = 1;
- break;
+ if (lastb - firstb >= callwheelsize) {
+ lastb = firstb + callwheelsize - 1;
+ if (nowb - firstb >= callwheelsize)
+ nowb = lastb;
+ }
+
+ /* Iterate callwheel from firstb to nowb and then up to lastb. */
+ do {
+ sc = &cc->cc_callwheel[firstb & callwheelmask];
+ tmp = LIST_FIRST(sc);
+ while (tmp != NULL) {
+ /* Run the callout if present time within allowed. */
+ if (tmp->c_time <= now) {
+ /*
+ * Consumer told us the callout may be run
+ * directly from hardware interrupt context.
+ */
+ if (tmp->c_iflags & CALLOUT_DIRECT) {
+#ifdef CALLOUT_PROFILING
+ ++depth_dir;
+#endif
+ cc_exec_next(cc) =
+ LIST_NEXT(tmp, c_links.le);
+ cc->cc_bucket = firstb & callwheelmask;
+ LIST_REMOVE(tmp, c_links.le);
+ softclock_call_cc(tmp, cc,
+#ifdef CALLOUT_PROFILING
+ &mpcalls_dir, &lockcalls_dir, NULL,
+#endif
+ 1);
+ tmp = cc_exec_next(cc);
+ cc_exec_next(cc) = NULL;
+ } else {
+ tmpn = LIST_NEXT(tmp, c_links.le);
+ LIST_REMOVE(tmp, c_links.le);
+ TAILQ_INSERT_TAIL(&cc->cc_expireq,
+ tmp, c_links.tqe);
+ tmp->c_iflags |= CALLOUT_PROCESSED;
+ tmp = tmpn;
+ }
+ continue;
+ }
+ /* Skip events from distant future. */
+ if (tmp->c_time >= max)
+ goto next;
+ /*
+ * Event minimal time is bigger than present maximal
+ * time, so it cannot be aggregated.
+ */
+ if (tmp->c_time > last) {
+ lastb = nowb;
+ goto next;
+ }
+ /* Update first and last time, respecting this event. */
+ if (tmp->c_time < first)
+ first = tmp->c_time;
+ tmp_max = tmp->c_time + tmp->c_precision;
+ if (tmp_max < last)
+ last = tmp_max;
+next:
+ tmp = LIST_NEXT(tmp, c_links.le);
}
- }
+ /* Proceed with the next bucket. */
+ firstb++;
+ /*
+ * Stop if we looked after present time and found
+ * some event we can't execute at now.
+ * Stop if we looked far enough into the future.
+ */
+ } while (((int)(firstb - lastb)) <= 0);
+ cc->cc_firstevent = last;
+#ifndef NO_EVENTTIMERS
+ cpu_new_callout(curcpu, last, first);
+#endif
+#ifdef CALLOUT_PROFILING
+ avg_depth_dir += (depth_dir * 1000 - avg_depth_dir) >> 8;
+ avg_mpcalls_dir += (mpcalls_dir * 1000 - avg_mpcalls_dir) >> 8;
+ avg_lockcalls_dir += (lockcalls_dir * 1000 - avg_lockcalls_dir) >> 8;
+#endif
mtx_unlock_spin_flags(&cc->cc_lock, MTX_QUIET);
/*
* swi_sched acquires the thread lock, so we don't want to call it
* with cc_lock held; incorrect locking order.
*/
- if (need_softclock)
+ if (!TAILQ_EMPTY(&cc->cc_expireq))
swi_sched(cc->cc_cookie, 0);
}
-int
-callout_tickstofirst(int limit)
-{
- struct callout_cpu *cc;
- struct callout *c;
- struct callout_tailq *sc;
- int curticks;
- int skip = 1;
-
- cc = CC_SELF();
- mtx_lock_spin_flags(&cc->cc_lock, MTX_QUIET);
- curticks = cc->cc_ticks;
- while( skip < ncallout && skip < limit ) {
- sc = &cc->cc_callwheel[ (curticks+skip) & callwheelmask ];
- /* search scanning ticks */
- TAILQ_FOREACH( c, sc, c_links.tqe ){
- if (c->c_time - curticks <= ncallout)
- goto out;
- }
- skip++;
- }
-out:
- cc->cc_firsttick = curticks + skip;
- mtx_unlock_spin_flags(&cc->cc_lock, MTX_QUIET);
- return (skip);
-}
-
static struct callout_cpu *
callout_lock(struct callout *c)
{
@@ -415,26 +546,44 @@
}
static void
-callout_cc_add(struct callout *c, struct callout_cpu *cc, int to_ticks,
- void (*func)(void *), void *arg, int cpu)
+callout_cc_add(struct callout *c, struct callout_cpu *cc,
+ sbintime_t sbt, sbintime_t precision, void (*func)(void *),
+ void *arg, int cpu, int flags)
{
+ int bucket;
CC_LOCK_ASSERT(cc);
-
- if (to_ticks <= 0)
- to_ticks = 1;
+ if (sbt < cc->cc_lastscan)
+ sbt = cc->cc_lastscan;
c->c_arg = arg;
- c->c_flags |= (CALLOUT_ACTIVE | CALLOUT_PENDING);
+ c->c_iflags |= CALLOUT_PENDING;
+ c->c_iflags &= ~CALLOUT_PROCESSED;
+ c->c_flags |= CALLOUT_ACTIVE;
+ if (flags & C_DIRECT_EXEC)
+ c->c_iflags |= CALLOUT_DIRECT;
c->c_func = func;
- c->c_time = ticks + to_ticks;
- TAILQ_INSERT_TAIL(&cc->cc_callwheel[c->c_time & callwheelmask],
- c, c_links.tqe);
- if ((c->c_time - cc->cc_firsttick) < 0 &&
- callout_new_inserted != NULL) {
- cc->cc_firsttick = c->c_time;
- (*callout_new_inserted)(cpu,
- to_ticks + (ticks - cc->cc_ticks));
+ c->c_time = sbt;
+ c->c_precision = precision;
+ bucket = callout_get_bucket(c->c_time);
+ CTR3(KTR_CALLOUT, "precision set for %p: %d.%08x",
+ c, (int)(c->c_precision >> 32),
+ (u_int)(c->c_precision & 0xffffffff));
+ LIST_INSERT_HEAD(&cc->cc_callwheel[bucket], c, c_links.le);
+ if (cc->cc_bucket == bucket)
+ cc_exec_next(cc) = c;
+#ifndef NO_EVENTTIMERS
+ /*
+ * Inform the eventtimers(4) subsystem there's a new callout
+ * that has been inserted, but only if really required.
+ */
+ if (SBT_MAX - c->c_time < c->c_precision)
+ c->c_precision = SBT_MAX - c->c_time;
+ sbt = c->c_time + c->c_precision;
+ if (sbt < cc->cc_firstevent) {
+ cc->cc_firstevent = sbt;
+ cpu_new_callout(cpu, sbt, c->c_time);
}
+#endif
}
static void
@@ -441,7 +590,7 @@
callout_cc_del(struct callout *c, struct callout_cpu *cc)
{
- if ((c->c_flags & CALLOUT_LOCAL_ALLOC) == 0)
+ if ((c->c_iflags & CALLOUT_LOCAL_ALLOC) == 0)
return;
c->c_func = NULL;
SLIST_INSERT_HEAD(&cc->cc_callfree, c, c_links.sle);
@@ -448,100 +597,122 @@
}
static void
-softclock_call_cc(struct callout *c, struct callout_cpu *cc, int *mpcalls,
- int *lockcalls, int *gcalls)
+softclock_call_cc(struct callout *c, struct callout_cpu *cc,
+#ifdef CALLOUT_PROFILING
+ int *mpcalls, int *lockcalls, int *gcalls,
+#endif
+ int direct)
{
+ struct rm_priotracker tracker;
void (*c_func)(void *);
void *c_arg;
struct lock_class *class;
struct lock_object *c_lock;
- int c_flags, sharedlock;
+ uintptr_t lock_status;
+ int c_iflags;
#ifdef SMP
struct callout_cpu *new_cc;
void (*new_func)(void *);
void *new_arg;
- int new_cpu, new_ticks;
+ int flags, new_cpu;
+ sbintime_t new_prec, new_time;
#endif
-#ifdef DIAGNOSTIC
- struct bintime bt1, bt2;
+#if defined(DIAGNOSTIC) || defined(CALLOUT_PROFILING)
+ sbintime_t sbt1, sbt2;
struct timespec ts2;
- static uint64_t maxdt = 36893488147419102LL; /* 2 msec */
+ static sbintime_t maxdt = 2 * SBT_1MS; /* 2 msec */
static timeout_t *lastfunc;
#endif
- KASSERT((c->c_flags & (CALLOUT_PENDING | CALLOUT_ACTIVE)) ==
- (CALLOUT_PENDING | CALLOUT_ACTIVE),
- ("softclock_call_cc: pend|act %p %x", c, c->c_flags));
+ KASSERT((c->c_iflags & CALLOUT_PENDING) == CALLOUT_PENDING,
+ ("softclock_call_cc: pend %p %x", c, c->c_iflags));
+ KASSERT((c->c_flags & CALLOUT_ACTIVE) == CALLOUT_ACTIVE,
+ ("softclock_call_cc: act %p %x", c, c->c_flags));
class = (c->c_lock != NULL) ? LOCK_CLASS(c->c_lock) : NULL;
- sharedlock = (c->c_flags & CALLOUT_SHAREDLOCK) ? 0 : 1;
+ lock_status = 0;
+ if (c->c_flags & CALLOUT_SHAREDLOCK) {
+ if (class == &lock_class_rm)
+ lock_status = (uintptr_t)&tracker;
+ else
+ lock_status = 1;
+ }
c_lock = c->c_lock;
c_func = c->c_func;
c_arg = c->c_arg;
- c_flags = c->c_flags;
- if (c->c_flags & CALLOUT_LOCAL_ALLOC)
- c->c_flags = CALLOUT_LOCAL_ALLOC;
+ c_iflags = c->c_iflags;
+ if (c->c_iflags & CALLOUT_LOCAL_ALLOC)
+ c->c_iflags = CALLOUT_LOCAL_ALLOC;
else
- c->c_flags &= ~CALLOUT_PENDING;
- cc->cc_curr = c;
- cc->cc_cancel = 0;
+ c->c_iflags &= ~CALLOUT_PENDING;
+
+ cc_exec_curr(cc, direct) = c;
+ cc_exec_cancel(cc, direct) = false;
CC_UNLOCK(cc);
if (c_lock != NULL) {
- class->lc_lock(c_lock, sharedlock);
+ class->lc_lock(c_lock, lock_status);
/*
* The callout may have been cancelled
* while we switched locks.
*/
- if (cc->cc_cancel) {
+ if (cc_exec_cancel(cc, direct)) {
class->lc_unlock(c_lock);
goto skip;
}
/* The callout cannot be stopped now. */
- cc->cc_cancel = 1;
-
+ cc_exec_cancel(cc, direct) = true;
if (c_lock == &Giant.lock_object) {
+#ifdef CALLOUT_PROFILING
(*gcalls)++;
- CTR3(KTR_CALLOUT, "callout %p func %p arg %p",
+#endif
+ CTR3(KTR_CALLOUT, "callout giant %p func %p arg %p",
c, c_func, c_arg);
} else {
+#ifdef CALLOUT_PROFILING
(*lockcalls)++;
+#endif
CTR3(KTR_CALLOUT, "callout lock %p func %p arg %p",
c, c_func, c_arg);
}
} else {
+#ifdef CALLOUT_PROFILING
(*mpcalls)++;
- CTR3(KTR_CALLOUT, "callout mpsafe %p func %p arg %p",
+#endif
+ CTR3(KTR_CALLOUT, "callout %p func %p arg %p",
c, c_func, c_arg);
}
-#ifdef DIAGNOSTIC
- binuptime(&bt1);
+ KTR_STATE3(KTR_SCHED, "callout", cc->cc_ktr_event_name, "running",
+ "func:%p", c_func, "arg:%p", c_arg, "direct:%d", direct);
+#if defined(DIAGNOSTIC) || defined(CALLOUT_PROFILING)
+ sbt1 = sbinuptime();
#endif
THREAD_NO_SLEEPING();
- SDT_PROBE(callout_execute, kernel, , callout_start, c, 0, 0, 0, 0);
+ SDT_PROBE1(callout_execute, , , callout__start, c);
c_func(c_arg);
- SDT_PROBE(callout_execute, kernel, , callout_end, c, 0, 0, 0, 0);
+ SDT_PROBE1(callout_execute, , , callout__end, c);
THREAD_SLEEPING_OK();
-#ifdef DIAGNOSTIC
- binuptime(&bt2);
- bintime_sub(&bt2, &bt1);
- if (bt2.frac > maxdt) {
- if (lastfunc != c_func || bt2.frac > maxdt * 2) {
- bintime2timespec(&bt2, &ts2);
+#if defined(DIAGNOSTIC) || defined(CALLOUT_PROFILING)
+ sbt2 = sbinuptime();
+ sbt2 -= sbt1;
+ if (sbt2 > maxdt) {
+ if (lastfunc != c_func || sbt2 > maxdt * 2) {
+ ts2 = sbttots(sbt2);
printf(
"Expensive timeout(9) function: %p(%p) %jd.%09ld s\n",
c_func, c_arg, (intmax_t)ts2.tv_sec, ts2.tv_nsec);
}
- maxdt = bt2.frac;
+ maxdt = sbt2;
lastfunc = c_func;
}
#endif
+ KTR_STATE0(KTR_SCHED, "callout", cc->cc_ktr_event_name, "idle");
CTR1(KTR_CALLOUT, "callout %p finished", c);
- if ((c_flags & CALLOUT_RETURNUNLOCKED) == 0)
+ if ((c_iflags & CALLOUT_RETURNUNLOCKED) == 0)
class->lc_unlock(c_lock);
skip:
CC_LOCK(cc);
- KASSERT(cc->cc_curr == c, ("mishandled cc_curr"));
- cc->cc_curr = NULL;
- if (cc->cc_waiting) {
+ KASSERT(cc_exec_curr(cc, direct) == c, ("mishandled cc_curr"));
+ cc_exec_curr(cc, direct) = NULL;
+ if (cc_exec_waiting(cc, direct)) {
/*
* There is someone waiting for the
* callout to complete.
@@ -548,21 +719,21 @@
* If the callout was scheduled for
* migration just cancel it.
*/
- if (cc_cme_migrating(cc)) {
- cc_cme_cleanup(cc);
+ if (cc_cce_migrating(cc, direct)) {
+ cc_cce_cleanup(cc, direct);
/*
* It should be assert here that the callout is not
* destroyed but that is not easy.
*/
- c->c_flags &= ~CALLOUT_DFRMIGRATION;
+ c->c_iflags &= ~CALLOUT_DFRMIGRATION;
}
- cc->cc_waiting = 0;
+ cc_exec_waiting(cc, direct) = false;
CC_UNLOCK(cc);
- wakeup(&cc->cc_waiting);
+ wakeup(&cc_exec_waiting(cc, direct));
CC_LOCK(cc);
- } else if (cc_cme_migrating(cc)) {
- KASSERT((c_flags & CALLOUT_LOCAL_ALLOC) == 0,
+ } else if (cc_cce_migrating(cc, direct)) {
+ KASSERT((c_iflags & CALLOUT_LOCAL_ALLOC) == 0,
("Migrating legacy callout %p", c));
#ifdef SMP
/*
@@ -569,11 +740,12 @@
* If the callout was scheduled for
* migration just perform it now.
*/
- new_cpu = cc->cc_migration_cpu;
- new_ticks = cc->cc_migration_ticks;
- new_func = cc->cc_migration_func;
- new_arg = cc->cc_migration_arg;
- cc_cme_cleanup(cc);
+ new_cpu = cc_migration_cpu(cc, direct);
+ new_time = cc_migration_time(cc, direct);
+ new_prec = cc_migration_prec(cc, direct);
+ new_func = cc_migration_func(cc, direct);
+ new_arg = cc_migration_arg(cc, direct);
+ cc_cce_cleanup(cc, direct);
/*
* It should be assert here that the callout is not destroyed
@@ -581,7 +753,7 @@
*
* As first thing, handle deferred callout stops.
*/
- if ((c->c_flags & CALLOUT_DFRMIGRATION) == 0) {
+ if (!callout_migrating(c)) {
CTR3(KTR_CALLOUT,
"deferred cancelled %p func %p arg %p",
c, new_func, new_arg);
@@ -588,11 +760,12 @@
callout_cc_del(c, cc);
return;
}
- c->c_flags &= ~CALLOUT_DFRMIGRATION;
+ c->c_iflags &= ~CALLOUT_DFRMIGRATION;
new_cc = callout_cpu_switch(c, cc, new_cpu);
- callout_cc_add(c, new_cc, new_ticks, new_func, new_arg,
- new_cpu);
+ flags = (direct) ? C_DIRECT_EXEC : 0;
+ callout_cc_add(c, new_cc, new_time, new_prec, new_func,
+ new_arg, new_cpu, flags);
CC_UNLOCK(new_cc);
CC_LOCK(cc);
#else
@@ -603,19 +776,19 @@
* If the current callout is locally allocated (from
* timeout(9)) then put it on the freelist.
*
- * Note: we need to check the cached copy of c_flags because
+ * Note: we need to check the cached copy of c_iflags because
* if it was not local, then it's not safe to deref the
* callout pointer.
*/
- KASSERT((c_flags & CALLOUT_LOCAL_ALLOC) == 0 ||
- c->c_flags == CALLOUT_LOCAL_ALLOC,
+ KASSERT((c_iflags & CALLOUT_LOCAL_ALLOC) == 0 ||
+ c->c_iflags == CALLOUT_LOCAL_ALLOC,
("corrupted callout"));
- if (c_flags & CALLOUT_LOCAL_ALLOC)
+ if (c_iflags & CALLOUT_LOCAL_ALLOC)
callout_cc_del(c, cc);
}
/*
- * The callout mechanism is based on the work of Adam M. Costello and
+ * The callout mechanism is based on the work of Adam M. Costello and
* George Varghese, published in a technical report entitled "Redesigning
* the BSD Callout and Timer Facilities" and modified slightly for inclusion
* in FreeBSD by Justin T. Gibbs. The original work on the data structures
@@ -635,63 +808,29 @@
{
struct callout_cpu *cc;
struct callout *c;
- struct callout_tailq *bucket;
- int curticks;
- int steps; /* #steps since we last allowed interrupts */
- int depth;
- int mpcalls;
- int lockcalls;
- int gcalls;
+#ifdef CALLOUT_PROFILING
+ int depth = 0, gcalls = 0, lockcalls = 0, mpcalls = 0;
+#endif
-#ifndef MAX_SOFTCLOCK_STEPS
-#define MAX_SOFTCLOCK_STEPS 100 /* Maximum allowed value of steps. */
-#endif /* MAX_SOFTCLOCK_STEPS */
-
- mpcalls = 0;
- lockcalls = 0;
- gcalls = 0;
- depth = 0;
- steps = 0;
cc = (struct callout_cpu *)arg;
CC_LOCK(cc);
- while (cc->cc_softticks - 1 != cc->cc_ticks) {
- /*
- * cc_softticks may be modified by hard clock, so cache
- * it while we work on a given bucket.
- */
- curticks = cc->cc_softticks;
- cc->cc_softticks++;
- bucket = &cc->cc_callwheel[curticks & callwheelmask];
- c = TAILQ_FIRST(bucket);
- while (c != NULL) {
- depth++;
- if (c->c_time != curticks) {
- c = TAILQ_NEXT(c, c_links.tqe);
- ++steps;
- if (steps >= MAX_SOFTCLOCK_STEPS) {
- cc->cc_next = c;
- /* Give interrupts a chance. */
- CC_UNLOCK(cc);
- ; /* nothing */
- CC_LOCK(cc);
- c = cc->cc_next;
- steps = 0;
- }
- } else {
- cc->cc_next = TAILQ_NEXT(c, c_links.tqe);
- TAILQ_REMOVE(bucket, c, c_links.tqe);
- softclock_call_cc(c, cc, &mpcalls,
- &lockcalls, &gcalls);
- steps = 0;
- c = cc->cc_next;
- }
- }
+ while ((c = TAILQ_FIRST(&cc->cc_expireq)) != NULL) {
+ TAILQ_REMOVE(&cc->cc_expireq, c, c_links.tqe);
+ softclock_call_cc(c, cc,
+#ifdef CALLOUT_PROFILING
+ &mpcalls, &lockcalls, &gcalls,
+#endif
+ 0);
+#ifdef CALLOUT_PROFILING
+ ++depth;
+#endif
}
+#ifdef CALLOUT_PROFILING
avg_depth += (depth * 1000 - avg_depth) >> 8;
avg_mpcalls += (mpcalls * 1000 - avg_mpcalls) >> 8;
avg_lockcalls += (lockcalls * 1000 - avg_lockcalls) >> 8;
avg_gcalls += (gcalls * 1000 - avg_gcalls) >> 8;
- cc->cc_next = NULL;
+#endif
CC_UNLOCK(cc);
}
@@ -706,7 +845,7 @@
* Initialize a handle so that using it with untimeout is benign.
*
* See AT&T BCI Driver Reference Manual for specification. This
- * implementation differs from that one in that although an
+ * implementation differs from that one in that although an
* identification value is returned from timeout, the original
* arguments to timeout as well as the identifier are used to
* identify entries for untimeout.
@@ -764,6 +903,56 @@
handle->callout = NULL;
}
+void
+callout_when(sbintime_t sbt, sbintime_t precision, int flags,
+ sbintime_t *res, sbintime_t *prec_res)
+{
+ sbintime_t to_sbt, to_pr;
+
+ if ((flags & (C_ABSOLUTE | C_PRECALC)) != 0) {
+ *res = sbt;
+ *prec_res = precision;
+ return;
+ }
+ if ((flags & C_HARDCLOCK) != 0 && sbt < tick_sbt)
+ sbt = tick_sbt;
+ if ((flags & C_HARDCLOCK) != 0 ||
+#ifdef NO_EVENTTIMERS
+ sbt >= sbt_timethreshold) {
+ to_sbt = getsbinuptime();
+
+ /* Add safety belt for the case of hz > 1000. */
+ to_sbt += tc_tick_sbt - tick_sbt;
+#else
+ sbt >= sbt_tickthreshold) {
+ /*
+ * Obtain the time of the last hardclock() call on
+ * this CPU directly from the kern_clocksource.c.
+ * This value is per-CPU, but it is equal for all
+ * active ones.
+ */
+#ifdef __LP64__
+ to_sbt = DPCPU_GET(hardclocktime);
+#else
+ spinlock_enter();
+ to_sbt = DPCPU_GET(hardclocktime);
+ spinlock_exit();
+#endif
+#endif
+ if ((flags & C_HARDCLOCK) == 0)
+ to_sbt += tick_sbt;
+ } else
+ to_sbt = sbinuptime();
+ if (SBT_MAX - to_sbt < sbt)
+ to_sbt = SBT_MAX;
+ else
+ to_sbt += sbt;
+ *res = to_sbt;
+ to_pr = ((C_PRELGET(flags) < 0) ? sbt >> tc_precexp :
+ sbt >> C_PRELGET(flags));
+ *prec_res = to_pr > precision ? to_pr : precision;
+}
+
/*
* New interface; clients allocate their own callout structures.
*
@@ -781,28 +970,56 @@
* callout_deactivate() - marks the callout as having been serviced
*/
int
-callout_reset_on(struct callout *c, int to_ticks, void (*ftn)(void *),
- void *arg, int cpu)
+callout_reset_sbt_on(struct callout *c, sbintime_t sbt, sbintime_t prec,
+ void (*ftn)(void *), void *arg, int cpu, int flags)
{
+ sbintime_t to_sbt, precision;
struct callout_cpu *cc;
- int cancelled = 0;
+ int cancelled, direct;
+ int ignore_cpu=0;
+ cancelled = 0;
+ if (cpu == -1) {
+ ignore_cpu = 1;
+ } else if ((cpu >= MAXCPU) ||
+ ((CC_CPU(cpu))->cc_inited == 0)) {
+ /* Invalid CPU spec */
+ panic("Invalid CPU in callout %d", cpu);
+ }
+ callout_when(sbt, prec, flags, &to_sbt, &precision);
+
+ /*
+ * This flag used to be added by callout_cc_add, but the
+ * first time you call this we could end up with the
+ * wrong direct flag if we don't do it before we add.
+ */
+ if (flags & C_DIRECT_EXEC) {
+ direct = 1;
+ } else {
+ direct = 0;
+ }
+ KASSERT(!direct || c->c_lock == NULL,
+ ("%s: direct callout %p has lock", __func__, c));
+ cc = callout_lock(c);
/*
* Don't allow migration of pre-allocated callouts lest they
- * become unbalanced.
+ * become unbalanced or handle the case where the user does
+ * not care.
*/
- if (c->c_flags & CALLOUT_LOCAL_ALLOC)
+ if ((c->c_iflags & CALLOUT_LOCAL_ALLOC) ||
+ ignore_cpu) {
cpu = c->c_cpu;
- cc = callout_lock(c);
- if (cc->cc_curr == c) {
+ }
+
+ if (cc_exec_curr(cc, direct) == c) {
/*
* We're being asked to reschedule a callout which is
* currently in progress. If there is a lock then we
* can cancel the callout if it has not really started.
*/
- if (c->c_lock != NULL && !cc->cc_cancel)
- cancelled = cc->cc_cancel = 1;
- if (cc->cc_waiting) {
+ if (c->c_lock != NULL && !cc_exec_cancel(cc, direct))
+ cancelled = cc_exec_cancel(cc, direct) = true;
+ if (cc_exec_waiting(cc, direct)) {
/*
* Someone has called callout_drain to kill this
* callout. Don't reschedule.
@@ -813,16 +1030,37 @@
CC_UNLOCK(cc);
return (cancelled);
}
+#ifdef SMP
+ if (callout_migrating(c)) {
+ /*
+ * This only occurs when a second callout_reset_sbt_on
+ * is made after a previous one moved it into
+ * deferred migration (below). Note we do *not* change
+ * the prev_cpu even though the previous target may
+ * be different.
+ */
+ cc_migration_cpu(cc, direct) = cpu;
+ cc_migration_time(cc, direct) = to_sbt;
+ cc_migration_prec(cc, direct) = precision;
+ cc_migration_func(cc, direct) = ftn;
+ cc_migration_arg(cc, direct) = arg;
+ cancelled = 1;
+ CC_UNLOCK(cc);
+ return (cancelled);
+ }
+#endif
}
- if (c->c_flags & CALLOUT_PENDING) {
- if (cc->cc_next == c) {
- cc->cc_next = TAILQ_NEXT(c, c_links.tqe);
+ if (c->c_iflags & CALLOUT_PENDING) {
+ if ((c->c_iflags & CALLOUT_PROCESSED) == 0) {
+ if (cc_exec_next(cc) == c)
+ cc_exec_next(cc) = LIST_NEXT(c, c_links.le);
+ LIST_REMOVE(c, c_links.le);
+ } else {
+ TAILQ_REMOVE(&cc->cc_expireq, c, c_links.tqe);
}
- TAILQ_REMOVE(&cc->cc_callwheel[c->c_time & callwheelmask], c,
- c_links.tqe);
-
cancelled = 1;
- c->c_flags &= ~(CALLOUT_ACTIVE | CALLOUT_PENDING);
+ c->c_iflags &= ~ CALLOUT_PENDING;
+ c->c_flags &= ~ CALLOUT_ACTIVE;
}
#ifdef SMP
@@ -832,15 +1070,34 @@
* to a more appropriate moment.
*/
if (c->c_cpu != cpu) {
- if (cc->cc_curr == c) {
- cc->cc_migration_cpu = cpu;
- cc->cc_migration_ticks = to_ticks;
- cc->cc_migration_func = ftn;
- cc->cc_migration_arg = arg;
- c->c_flags |= CALLOUT_DFRMIGRATION;
- CTR5(KTR_CALLOUT,
- "migration of %p func %p arg %p in %d to %u deferred",
- c, c->c_func, c->c_arg, to_ticks, cpu);
+ if (cc_exec_curr(cc, direct) == c) {
+ /*
+ * Pending will have been removed since we are
+ * actually executing the callout on another
+ * CPU. That callout should be waiting on the
+ * lock the caller holds. If we set both
+ * active/and/pending after we return and the
+ * lock on the executing callout proceeds, it
+ * will then see pending is true and return.
+ * At the return from the actual callout execution
+ * the migration will occur in softclock_call_cc
+ * and this new callout will be placed on the
+ * new CPU via a call to callout_cpu_switch() which
+ * will get the lock on the right CPU followed
+ * by a call callout_cc_add() which will add it there.
+ * (see above in softclock_call_cc()).
+ */
+ cc_migration_cpu(cc, direct) = cpu;
+ cc_migration_time(cc, direct) = to_sbt;
+ cc_migration_prec(cc, direct) = precision;
+ cc_migration_func(cc, direct) = ftn;
+ cc_migration_arg(cc, direct) = arg;
+ c->c_iflags |= (CALLOUT_DFRMIGRATION | CALLOUT_PENDING);
+ c->c_flags |= CALLOUT_ACTIVE;
+ CTR6(KTR_CALLOUT,
+ "migration of %p func %p arg %p in %d.%08x to %u deferred",
+ c, c->c_func, c->c_arg, (int)(to_sbt >> 32),
+ (u_int)(to_sbt & 0xffffffff), cpu);
CC_UNLOCK(cc);
return (cancelled);
}
@@ -848,9 +1105,10 @@
}
#endif
- callout_cc_add(c, cc, to_ticks, ftn, arg, cpu);
- CTR5(KTR_CALLOUT, "%sscheduled %p func %p arg %p in %d",
- cancelled ? "re" : "", c, c->c_func, c->c_arg, to_ticks);
+ callout_cc_add(c, cc, to_sbt, precision, ftn, arg, cpu, flags);
+ CTR6(KTR_CALLOUT, "%sscheduled %p func %p arg %p in %d.%08x",
+ cancelled ? "re" : "", c, c->c_func, c->c_arg, (int)(to_sbt >> 32),
+ (u_int)(to_sbt & 0xffffffff));
CC_UNLOCK(cc);
return (cancelled);
@@ -872,19 +1130,20 @@
}
int
-_callout_stop_safe(c, safe)
+_callout_stop_safe(c, flags)
struct callout *c;
- int safe;
+ int flags;
{
struct callout_cpu *cc, *old_cc;
struct lock_class *class;
- int use_lock, sq_locked;
+ int direct, sq_locked, use_lock;
+ int not_on_a_list;
/*
* Some old subsystems don't hold Giant while running a callout_stop(),
* so just discard this check for the moment.
*/
- if (!safe && c->c_lock != NULL) {
+ if ((flags & CS_DRAIN) == 0 && c->c_lock != NULL) {
if (c->c_lock == &Giant.lock_object)
use_lock = mtx_owned(&Giant);
else {
@@ -894,12 +1153,38 @@
}
} else
use_lock = 0;
-
+ if (c->c_iflags & CALLOUT_DIRECT) {
+ direct = 1;
+ } else {
+ direct = 0;
+ }
sq_locked = 0;
old_cc = NULL;
again:
cc = callout_lock(c);
+ if ((c->c_iflags & (CALLOUT_DFRMIGRATION | CALLOUT_PENDING)) ==
+ (CALLOUT_DFRMIGRATION | CALLOUT_PENDING) &&
+ ((c->c_flags & CALLOUT_ACTIVE) == CALLOUT_ACTIVE)) {
+ /*
+ * Special case where this slipped in while we
+ * were migrating *as* the callout is about to
+ * execute. The caller probably holds the lock
+ * the callout wants.
+ *
+ * Get rid of the migration first. Then set
+ * the flag that tells this code *not* to
+ * try to remove it from any lists (its not
+ * on one yet). When the callout wheel runs,
+ * it will ignore this callout.
+ */
+ c->c_iflags &= ~CALLOUT_PENDING;
+ c->c_flags &= ~CALLOUT_ACTIVE;
+ not_on_a_list = 1;
+ } else {
+ not_on_a_list = 0;
+ }
+
/*
* If the callout was migrating while the callout cpu lock was
* dropped, just drop the sleepqueue lock and check the states
@@ -908,7 +1193,7 @@
if (sq_locked != 0 && cc != old_cc) {
#ifdef SMP
CC_UNLOCK(cc);
- sleepq_release(&old_cc->cc_waiting);
+ sleepq_release(&cc_exec_waiting(old_cc, direct));
sq_locked = 0;
old_cc = NULL;
goto again;
@@ -922,7 +1207,7 @@
* don't attempt to remove it from the queue. We can try to
* stop it by other means however.
*/
- if (!(c->c_flags & CALLOUT_PENDING)) {
+ if (!(c->c_iflags & CALLOUT_PENDING)) {
c->c_flags &= ~CALLOUT_ACTIVE;
/*
@@ -929,16 +1214,16 @@
* If it wasn't on the queue and it isn't the current
* callout, then we can't stop it, so just bail.
*/
- if (cc->cc_curr != c) {
+ if (cc_exec_curr(cc, direct) != c) {
CTR3(KTR_CALLOUT, "failed to stop %p func %p arg %p",
c, c->c_func, c->c_arg);
CC_UNLOCK(cc);
if (sq_locked)
- sleepq_release(&cc->cc_waiting);
+ sleepq_release(&cc_exec_waiting(cc, direct));
return (0);
}
- if (safe) {
+ if ((flags & CS_DRAIN) != 0) {
/*
* The current callout is running (or just
* about to run) and blocking is allowed, so
@@ -945,8 +1230,7 @@
* just wait for the current invocation to
* finish.
*/
- while (cc->cc_curr == c) {
-
+ while (cc_exec_curr(cc, direct) == c) {
/*
* Use direct calls to sleepqueue interface
* instead of cv/msleep in order to avoid
@@ -966,7 +1250,8 @@
*/
if (!sq_locked) {
CC_UNLOCK(cc);
- sleepq_lock(&cc->cc_waiting);
+ sleepq_lock(
+ &cc_exec_waiting(cc, direct));
sq_locked = 1;
old_cc = cc;
goto again;
@@ -978,13 +1263,16 @@
* will be packed up, just let softclock()
* take care of it.
*/
- cc->cc_waiting = 1;
+ cc_exec_waiting(cc, direct) = true;
DROP_GIANT();
CC_UNLOCK(cc);
- sleepq_add(&cc->cc_waiting,
+ sleepq_add(
+ &cc_exec_waiting(cc, direct),
&cc->cc_lock.lock_object, "codrain",
SLEEPQ_SLEEP, 0);
- sleepq_wait(&cc->cc_waiting, 0);
+ sleepq_wait(
+ &cc_exec_waiting(cc, direct),
+ 0);
sq_locked = 0;
old_cc = NULL;
@@ -992,7 +1280,9 @@
PICKUP_GIANT();
CC_LOCK(cc);
}
- } else if (use_lock && !cc->cc_cancel) {
+ } else if (use_lock &&
+ !cc_exec_cancel(cc, direct)) {
+
/*
* The current callout is waiting for its
* lock which we hold. Cancel the callout
@@ -1000,20 +1290,52 @@
* lock, the callout will be skipped in
* softclock().
*/
- cc->cc_cancel = 1;
+ cc_exec_cancel(cc, direct) = true;
CTR3(KTR_CALLOUT, "cancelled %p func %p arg %p",
c, c->c_func, c->c_arg);
- KASSERT(!cc_cme_migrating(cc),
+ KASSERT(!cc_cce_migrating(cc, direct),
("callout wrongly scheduled for migration"));
+ if (callout_migrating(c)) {
+ c->c_iflags &= ~CALLOUT_DFRMIGRATION;
+#ifdef SMP
+ cc_migration_cpu(cc, direct) = CPUBLOCK;
+ cc_migration_time(cc, direct) = 0;
+ cc_migration_prec(cc, direct) = 0;
+ cc_migration_func(cc, direct) = NULL;
+ cc_migration_arg(cc, direct) = NULL;
+#endif
+ }
CC_UNLOCK(cc);
KASSERT(!sq_locked, ("sleepqueue chain locked"));
return (1);
- } else if ((c->c_flags & CALLOUT_DFRMIGRATION) != 0) {
- c->c_flags &= ~CALLOUT_DFRMIGRATION;
+ } else if (callout_migrating(c)) {
+ /*
+ * The callout is currently being serviced
+ * and the "next" callout is scheduled at
+ * its completion with a migration. We remove
+ * the migration flag so it *won't* get rescheduled,
+ * but we can't stop the one thats running so
+ * we return 0.
+ */
+ c->c_iflags &= ~CALLOUT_DFRMIGRATION;
+#ifdef SMP
+ /*
+ * We can't call cc_cce_cleanup here since
+ * if we do it will remove .ce_curr and
+ * its still running. This will prevent a
+ * reschedule of the callout when the
+ * execution completes.
+ */
+ cc_migration_cpu(cc, direct) = CPUBLOCK;
+ cc_migration_time(cc, direct) = 0;
+ cc_migration_prec(cc, direct) = 0;
+ cc_migration_func(cc, direct) = NULL;
+ cc_migration_arg(cc, direct) = NULL;
+#endif
CTR3(KTR_CALLOUT, "postponing stop %p func %p arg %p",
c, c->c_func, c->c_arg);
CC_UNLOCK(cc);
- return (1);
+ return ((flags & CS_MIGRBLOCK) != 0);
}
CTR3(KTR_CALLOUT, "failed to stop %p func %p arg %p",
c, c->c_func, c->c_arg);
@@ -1022,18 +1344,23 @@
return (0);
}
if (sq_locked)
- sleepq_release(&cc->cc_waiting);
+ sleepq_release(&cc_exec_waiting(cc, direct));
- c->c_flags &= ~(CALLOUT_ACTIVE | CALLOUT_PENDING);
+ c->c_iflags &= ~CALLOUT_PENDING;
+ c->c_flags &= ~CALLOUT_ACTIVE;
CTR3(KTR_CALLOUT, "cancelled %p func %p arg %p",
c, c->c_func, c->c_arg);
- if (cc->cc_next == c)
- cc->cc_next = TAILQ_NEXT(c, c_links.tqe);
- TAILQ_REMOVE(&cc->cc_callwheel[c->c_time & callwheelmask], c,
- c_links.tqe);
+ if (not_on_a_list == 0) {
+ if ((c->c_iflags & CALLOUT_PROCESSED) == 0) {
+ if (cc_exec_next(cc) == c)
+ cc_exec_next(cc) = LIST_NEXT(c, c_links.le);
+ LIST_REMOVE(c, c_links.le);
+ } else {
+ TAILQ_REMOVE(&cc->cc_expireq, c, c_links.tqe);
+ }
+ }
callout_cc_del(c, cc);
-
CC_UNLOCK(cc);
return (1);
}
@@ -1046,10 +1373,10 @@
bzero(c, sizeof *c);
if (mpsafe) {
c->c_lock = NULL;
- c->c_flags = CALLOUT_RETURNUNLOCKED;
+ c->c_iflags = CALLOUT_RETURNUNLOCKED;
} else {
c->c_lock = &Giant.lock_object;
- c->c_flags = 0;
+ c->c_iflags = 0;
}
c->c_cpu = timeout_cpu;
}
@@ -1069,7 +1396,7 @@
KASSERT(lock == NULL || !(LOCK_CLASS(lock)->lc_flags &
(LC_SPINLOCK | LC_SLEEPABLE)), ("%s: invalid lock class",
__func__));
- c->c_flags = flags & (CALLOUT_RETURNUNLOCKED | CALLOUT_SHAREDLOCK);
+ c->c_iflags = flags & (CALLOUT_RETURNUNLOCKED | CALLOUT_SHAREDLOCK);
c->c_cpu = timeout_cpu;
}
@@ -1086,7 +1413,7 @@
* which set the timer can do the maintanence the timer was for as close
* as possible to the originally intended time. Testing this code for a
* week showed that resuming from a suspend resulted in 22 to 25 timers
- * firing, which seemed independant on whether the suspend was 2 hours or
+ * firing, which seemed independent on whether the suspend was 2 hours or
* 2 days. Your milage may vary. - Ken Key <key at cs.utk.edu>
*/
void
@@ -1138,3 +1465,152 @@
return;
}
#endif /* APM_FIXUP_CALLTODO */
+
+static int
+flssbt(sbintime_t sbt)
+{
+
+ sbt += (uint64_t)sbt >> 1;
+ if (sizeof(long) >= sizeof(sbintime_t))
+ return (flsl(sbt));
+ if (sbt >= SBT_1S)
+ return (flsl(((uint64_t)sbt) >> 32) + 32);
+ return (flsl(sbt));
+}
+
+/*
+ * Dump immediate statistic snapshot of the scheduled callouts.
+ */
+static int
+sysctl_kern_callout_stat(SYSCTL_HANDLER_ARGS)
+{
+ struct callout *tmp;
+ struct callout_cpu *cc;
+ struct callout_list *sc;
+ sbintime_t maxpr, maxt, medpr, medt, now, spr, st, t;
+ int ct[64], cpr[64], ccpbk[32];
+ int error, val, i, count, tcum, pcum, maxc, c, medc;
+#ifdef SMP
+ int cpu;
+#endif
+
+ val = 0;
+ error = sysctl_handle_int(oidp, &val, 0, req);
+ if (error != 0 || req->newptr == NULL)
+ return (error);
+ count = maxc = 0;
+ st = spr = maxt = maxpr = 0;
+ bzero(ccpbk, sizeof(ccpbk));
+ bzero(ct, sizeof(ct));
+ bzero(cpr, sizeof(cpr));
+ now = sbinuptime();
+#ifdef SMP
+ CPU_FOREACH(cpu) {
+ cc = CC_CPU(cpu);
+#else
+ cc = CC_CPU(timeout_cpu);
+#endif
+ CC_LOCK(cc);
+ for (i = 0; i < callwheelsize; i++) {
+ sc = &cc->cc_callwheel[i];
+ c = 0;
+ LIST_FOREACH(tmp, sc, c_links.le) {
+ c++;
+ t = tmp->c_time - now;
+ if (t < 0)
+ t = 0;
+ st += t / SBT_1US;
+ spr += tmp->c_precision / SBT_1US;
+ if (t > maxt)
+ maxt = t;
+ if (tmp->c_precision > maxpr)
+ maxpr = tmp->c_precision;
+ ct[flssbt(t)]++;
+ cpr[flssbt(tmp->c_precision)]++;
+ }
+ if (c > maxc)
+ maxc = c;
+ ccpbk[fls(c + c / 2)]++;
+ count += c;
+ }
+ CC_UNLOCK(cc);
+#ifdef SMP
+ }
+#endif
+
+ for (i = 0, tcum = 0; i < 64 && tcum < count / 2; i++)
+ tcum += ct[i];
+ medt = (i >= 2) ? (((sbintime_t)1) << (i - 2)) : 0;
+ for (i = 0, pcum = 0; i < 64 && pcum < count / 2; i++)
+ pcum += cpr[i];
+ medpr = (i >= 2) ? (((sbintime_t)1) << (i - 2)) : 0;
+ for (i = 0, c = 0; i < 32 && c < count / 2; i++)
+ c += ccpbk[i];
+ medc = (i >= 2) ? (1 << (i - 2)) : 0;
+
+ printf("Scheduled callouts statistic snapshot:\n");
+ printf(" Callouts: %6d Buckets: %6d*%-3d Bucket size: 0.%06ds\n",
+ count, callwheelsize, mp_ncpus, 1000000 >> CC_HASH_SHIFT);
+ printf(" C/Bk: med %5d avg %6d.%06jd max %6d\n",
+ medc,
+ count / callwheelsize / mp_ncpus,
+ (uint64_t)count * 1000000 / callwheelsize / mp_ncpus % 1000000,
+ maxc);
+ printf(" Time: med %5jd.%06jds avg %6jd.%06jds max %6jd.%06jds\n",
+ medt / SBT_1S, (medt & 0xffffffff) * 1000000 >> 32,
+ (st / count) / 1000000, (st / count) % 1000000,
+ maxt / SBT_1S, (maxt & 0xffffffff) * 1000000 >> 32);
+ printf(" Prec: med %5jd.%06jds avg %6jd.%06jds max %6jd.%06jds\n",
+ medpr / SBT_1S, (medpr & 0xffffffff) * 1000000 >> 32,
+ (spr / count) / 1000000, (spr / count) % 1000000,
+ maxpr / SBT_1S, (maxpr & 0xffffffff) * 1000000 >> 32);
+ printf(" Distribution: \tbuckets\t time\t tcum\t"
+ " prec\t pcum\n");
+ for (i = 0, tcum = pcum = 0; i < 64; i++) {
+ if (ct[i] == 0 && cpr[i] == 0)
+ continue;
+ t = (i != 0) ? (((sbintime_t)1) << (i - 1)) : 0;
+ tcum += ct[i];
+ pcum += cpr[i];
+ printf(" %10jd.%06jds\t 2**%d\t%7d\t%7d\t%7d\t%7d\n",
+ t / SBT_1S, (t & 0xffffffff) * 1000000 >> 32,
+ i - 1 - (32 - CC_HASH_SHIFT),
+ ct[i], tcum, cpr[i], pcum);
+ }
+ return (error);
+}
+SYSCTL_PROC(_kern, OID_AUTO, callout_stat,
+ CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
+ 0, 0, sysctl_kern_callout_stat, "I",
+ "Dump immediate statistic snapshot of the scheduled callouts");
+
+#ifdef DDB
+static void
+_show_callout(struct callout *c)
+{
+
+ db_printf("callout %p\n", c);
+#define C_DB_PRINTF(f, e) db_printf(" %s = " f "\n", #e, c->e);
+ db_printf(" &c_links = %p\n", &(c->c_links));
+ C_DB_PRINTF("%" PRId64, c_time);
+ C_DB_PRINTF("%" PRId64, c_precision);
+ C_DB_PRINTF("%p", c_arg);
+ C_DB_PRINTF("%p", c_func);
+ C_DB_PRINTF("%p", c_lock);
+ C_DB_PRINTF("%#x", c_flags);
+ C_DB_PRINTF("%#x", c_iflags);
+ C_DB_PRINTF("%d", c_cpu);
+#undef C_DB_PRINTF
+}
+
+DB_SHOW_COMMAND(callout, db_show_callout)
+{
+
+ if (!have_addr) {
+ db_printf("usage: show callout <struct callout *>\n");
+ return;
+ }
+
+ _show_callout((struct callout *)addr);
+}
+#endif /* DDB */
Modified: trunk/sys/kern/kern_umtx.c
===================================================================
--- trunk/sys/kern/kern_umtx.c 2018-05-25 20:59:46 UTC (rev 9949)
+++ trunk/sys/kern/kern_umtx.c 2018-05-25 21:07:09 UTC (rev 9950)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
/*-
* Copyright (c) 2004, David Xu <davidxu at freebsd.org>
* Copyright (c) 2002, Jeffrey Roberson <jeff at freebsd.org>
@@ -26,7 +27,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/kern/kern_umtx.c 330678 2018-03-09 01:21:22Z brooks $");
#include "opt_compat.h"
#include "opt_umtx_profiling.h"
@@ -39,6 +40,7 @@
#include <sys/mutex.h>
#include <sys/priv.h>
#include <sys/proc.h>
+#include <sys/sbuf.h>
#include <sys/sched.h>
#include <sys/smp.h>
#include <sys/sysctl.h>
@@ -64,6 +66,11 @@
#define _UMUTEX_TRY 1
#define _UMUTEX_WAIT 2
+#ifdef UMTX_PROFILING
+#define UPROF_PERC_BIGGER(w, f, sw, sf) \
+ (((w) > (sw)) || ((w) == (sw) && (f) > (sf)))
+#endif
+
/* Priority inheritance mutex info. */
struct umtx_pi {
/* Owner thread */
@@ -157,13 +164,12 @@
TAILQ_HEAD(,umtx_pi) uc_pi_list;
#ifdef UMTX_PROFILING
- int length;
- int max_length;
+ u_int length;
+ u_int max_length;
#endif
};
#define UMTXQ_LOCKED_ASSERT(uc) mtx_assert(&(uc)->uc_lock, MA_OWNED)
-#define UMTXQ_BUSY_ASSERT(uc) KASSERT(&(uc)->uc_busy, ("umtx chain is not busy"))
/*
* Don't propagate time-sharing priority, there is a security reason,
@@ -187,6 +193,12 @@
#define BUSY_SPINS 200
+struct abs_timeout {
+ int clockid;
+ struct timespec cur;
+ struct timespec end;
+};
+
static uma_zone_t umtx_pi_zone;
static struct umtxq_chain umtxq_chains[2][UMTX_CHAINS];
static MALLOC_DEFINE(M_UMTX, "umtx", "UMTX queue memory");
@@ -211,7 +223,7 @@
static void umtxq_unbusy(struct umtx_key *key);
static void umtxq_insert_queue(struct umtx_q *uq, int q);
static void umtxq_remove_queue(struct umtx_q *uq, int q);
-static int umtxq_sleep(struct umtx_q *uq, const char *wmesg, int timo);
+static int umtxq_sleep(struct umtx_q *uq, const char *wmesg, struct abs_timeout *);
static int umtxq_count(struct umtx_key *key);
static struct umtx_pi *umtx_pi_alloc(int);
static void umtx_pi_free(struct umtx_pi *pi);
@@ -246,6 +258,117 @@
"max_length1", CTLFLAG_RD, &umtxq_chains[1][i].max_length, 0, NULL);
}
}
+
+static int
+sysctl_debug_umtx_chains_peaks(SYSCTL_HANDLER_ARGS)
+{
+ char buf[512];
+ struct sbuf sb;
+ struct umtxq_chain *uc;
+ u_int fract, i, j, tot, whole;
+ u_int sf0, sf1, sf2, sf3, sf4;
+ u_int si0, si1, si2, si3, si4;
+ u_int sw0, sw1, sw2, sw3, sw4;
+
+ sbuf_new(&sb, buf, sizeof(buf), SBUF_FIXEDLEN);
+ for (i = 0; i < 2; i++) {
+ tot = 0;
+ for (j = 0; j < UMTX_CHAINS; ++j) {
+ uc = &umtxq_chains[i][j];
+ mtx_lock(&uc->uc_lock);
+ tot += uc->max_length;
+ mtx_unlock(&uc->uc_lock);
+ }
+ if (tot == 0)
+ sbuf_printf(&sb, "%u) Empty ", i);
+ else {
+ sf0 = sf1 = sf2 = sf3 = sf4 = 0;
+ si0 = si1 = si2 = si3 = si4 = 0;
+ sw0 = sw1 = sw2 = sw3 = sw4 = 0;
+ for (j = 0; j < UMTX_CHAINS; j++) {
+ uc = &umtxq_chains[i][j];
+ mtx_lock(&uc->uc_lock);
+ whole = uc->max_length * 100;
+ mtx_unlock(&uc->uc_lock);
+ fract = (whole % tot) * 100;
+ if (UPROF_PERC_BIGGER(whole, fract, sw0, sf0)) {
+ sf0 = fract;
+ si0 = j;
+ sw0 = whole;
+ } else if (UPROF_PERC_BIGGER(whole, fract, sw1,
+ sf1)) {
+ sf1 = fract;
+ si1 = j;
+ sw1 = whole;
+ } else if (UPROF_PERC_BIGGER(whole, fract, sw2,
+ sf2)) {
+ sf2 = fract;
+ si2 = j;
+ sw2 = whole;
+ } else if (UPROF_PERC_BIGGER(whole, fract, sw3,
+ sf3)) {
+ sf3 = fract;
+ si3 = j;
+ sw3 = whole;
+ } else if (UPROF_PERC_BIGGER(whole, fract, sw4,
+ sf4)) {
+ sf4 = fract;
+ si4 = j;
+ sw4 = whole;
+ }
+ }
+ sbuf_printf(&sb, "queue %u:\n", i);
+ sbuf_printf(&sb, "1st: %u.%u%% idx: %u\n", sw0 / tot,
+ sf0 / tot, si0);
+ sbuf_printf(&sb, "2nd: %u.%u%% idx: %u\n", sw1 / tot,
+ sf1 / tot, si1);
+ sbuf_printf(&sb, "3rd: %u.%u%% idx: %u\n", sw2 / tot,
+ sf2 / tot, si2);
+ sbuf_printf(&sb, "4th: %u.%u%% idx: %u\n", sw3 / tot,
+ sf3 / tot, si3);
+ sbuf_printf(&sb, "5th: %u.%u%% idx: %u\n", sw4 / tot,
+ sf4 / tot, si4);
+ }
+ }
+ sbuf_trim(&sb);
+ sbuf_finish(&sb);
+ sysctl_handle_string(oidp, sbuf_data(&sb), sbuf_len(&sb), req);
+ sbuf_delete(&sb);
+ return (0);
+}
+
+static int
+sysctl_debug_umtx_chains_clear(SYSCTL_HANDLER_ARGS)
+{
+ struct umtxq_chain *uc;
+ u_int i, j;
+ int clear, error;
+
+ clear = 0;
+ error = sysctl_handle_int(oidp, &clear, 0, req);
+ if (error != 0 || req->newptr == NULL)
+ return (error);
+
+ if (clear != 0) {
+ for (i = 0; i < 2; ++i) {
+ for (j = 0; j < UMTX_CHAINS; ++j) {
+ uc = &umtxq_chains[i][j];
+ mtx_lock(&uc->uc_lock);
+ uc->length = 0;
+ uc->max_length = 0;
+ mtx_unlock(&uc->uc_lock);
+ }
+ }
+ }
+ return (0);
+}
+
+SYSCTL_PROC(_debug_umtx_chains, OID_AUTO, clear,
+ CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 0,
+ sysctl_debug_umtx_chains_clear, "I", "Clear umtx chains statistics");
+SYSCTL_PROC(_debug_umtx_chains, OID_AUTO, peaks,
+ CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, 0, 0,
+ sysctl_debug_umtx_chains_peaks, "A", "Highest peaks in chains max length");
#endif
static void
@@ -274,7 +397,7 @@
#ifdef UMTX_PROFILING
umtx_init_profiling();
#endif
- mtx_init(&umtx_lock, "umtx lock", NULL, MTX_SPIN);
+ mtx_init(&umtx_lock, "umtx lock", NULL, MTX_DEF);
EVENTHANDLER_REGISTER(process_exec, umtx_exec_hook, NULL,
EVENTHANDLER_PRI_ANY);
}
@@ -387,6 +510,15 @@
wakeup_one(uc);
}
+static inline void
+umtxq_unbusy_unlocked(struct umtx_key *key)
+{
+
+ umtxq_lock(key);
+ umtxq_unbusy(key);
+ umtxq_unlock(key);
+}
+
static struct umtxq_queue *
umtxq_queue_lookup(struct umtx_key *key, int q)
{
@@ -419,19 +551,19 @@
uh = uq->uq_spare_queue;
uh->key = uq->uq_key;
LIST_INSERT_HEAD(&uc->uc_queue[q], uh, link);
+#ifdef UMTX_PROFILING
+ uc->length++;
+ if (uc->length > uc->max_length) {
+ uc->max_length = uc->length;
+ if (uc->max_length > max_length)
+ max_length = uc->max_length;
+ }
+#endif
}
uq->uq_spare_queue = NULL;
TAILQ_INSERT_TAIL(&uh->head, uq, uq_link);
uh->length++;
-#ifdef UMTX_PROFILING
- uc->length++;
- if (uc->length > uc->max_length) {
- uc->max_length = uc->length;
- if (uc->max_length > max_length)
- max_length = uc->max_length;
- }
-#endif
uq->uq_flags |= UQF_UMTXQ;
uq->uq_cur_queue = uh;
return;
@@ -449,13 +581,13 @@
uh = uq->uq_cur_queue;
TAILQ_REMOVE(&uh->head, uq, uq_link);
uh->length--;
-#ifdef UMTX_PROFILING
- uc->length--;
-#endif
uq->uq_flags &= ~UQF_UMTXQ;
if (TAILQ_EMPTY(&uh->head)) {
KASSERT(uh->length == 0,
("inconsistent umtxq_queue length"));
+#ifdef UMTX_PROFILING
+ uc->length--;
+#endif
LIST_REMOVE(uh, link);
} else {
uh = LIST_FIRST(&uc->uc_spare_queue);
@@ -505,6 +637,32 @@
return (0);
}
+static int
+umtxq_check_susp(struct thread *td)
+{
+ struct proc *p;
+ int error;
+
+ /*
+ * The check for TDF_NEEDSUSPCHK is racy, but it is enough to
+ * eventually break the lockstep loop.
+ */
+ if ((td->td_flags & TDF_NEEDSUSPCHK) == 0)
+ return (0);
+ error = 0;
+ p = td->td_proc;
+ PROC_LOCK(p);
+ if (P_SHOULDSTOP(p) ||
+ ((p->p_flag & P_TRACED) && (td->td_dbgflags & TDB_SUSPEND))) {
+ if (p->p_flag & P_SINGLE_EXIT)
+ error = EINTR;
+ else
+ error = ERESTART;
+ }
+ PROC_UNLOCK(p);
+ return (error);
+}
+
/*
* Wake up threads waiting on an userland object.
*/
@@ -547,23 +705,88 @@
wakeup(uq);
}
+static inline int
+tstohz(const struct timespec *tsp)
+{
+ struct timeval tv;
+
+ TIMESPEC_TO_TIMEVAL(&tv, tsp);
+ return tvtohz(&tv);
+}
+
+static void
+abs_timeout_init(struct abs_timeout *timo, int clockid, int absolute,
+ const struct timespec *timeout)
+{
+
+ timo->clockid = clockid;
+ if (!absolute) {
+ kern_clock_gettime(curthread, clockid, &timo->end);
+ timo->cur = timo->end;
+ timespecadd(&timo->end, timeout);
+ } else {
+ timo->end = *timeout;
+ kern_clock_gettime(curthread, clockid, &timo->cur);
+ }
+}
+
+static void
+abs_timeout_init2(struct abs_timeout *timo, const struct _umtx_time *umtxtime)
+{
+
+ abs_timeout_init(timo, umtxtime->_clockid,
+ (umtxtime->_flags & UMTX_ABSTIME) != 0,
+ &umtxtime->_timeout);
+}
+
+static inline void
+abs_timeout_update(struct abs_timeout *timo)
+{
+ kern_clock_gettime(curthread, timo->clockid, &timo->cur);
+}
+
+static int
+abs_timeout_gethz(struct abs_timeout *timo)
+{
+ struct timespec tts;
+
+ if (timespeccmp(&timo->end, &timo->cur, <=))
+ return (-1);
+ tts = timo->end;
+ timespecsub(&tts, &timo->cur);
+ return (tstohz(&tts));
+}
+
/*
* Put thread into sleep state, before sleeping, check if
* thread was removed from umtx queue.
*/
static inline int
-umtxq_sleep(struct umtx_q *uq, const char *wmesg, int timo)
+umtxq_sleep(struct umtx_q *uq, const char *wmesg, struct abs_timeout *abstime)
{
struct umtxq_chain *uc;
- int error;
+ int error, timo;
uc = umtxq_getchain(&uq->uq_key);
UMTXQ_LOCKED_ASSERT(uc);
- if (!(uq->uq_flags & UQF_UMTXQ))
- return (0);
- error = msleep(uq, &uc->uc_lock, PCATCH, wmesg, timo);
- if (error == EWOULDBLOCK)
- error = ETIMEDOUT;
+ for (;;) {
+ if (!(uq->uq_flags & UQF_UMTXQ))
+ return (0);
+ if (abstime != NULL) {
+ timo = abs_timeout_gethz(abstime);
+ if (timo < 0)
+ return (ETIMEDOUT);
+ } else
+ timo = 0;
+ error = msleep(uq, &uc->uc_lock, PCATCH | PDROP, wmesg, timo);
+ if (error != EWOULDBLOCK) {
+ umtxq_lock(&uq->uq_key);
+ break;
+ }
+ if (abstime != NULL)
+ abs_timeout_update(abstime);
+ umtxq_lock(&uq->uq_key);
+ }
return (error);
}
@@ -627,8 +850,10 @@
* Lock a umtx object.
*/
static int
-_do_lock_umtx(struct thread *td, struct umtx *umtx, u_long id, int timo)
+do_lock_umtx(struct thread *td, struct umtx *umtx, u_long id,
+ const struct timespec *timeout)
{
+ struct abs_timeout timo;
struct umtx_q *uq;
u_long owner;
u_long old;
@@ -635,6 +860,8 @@
int error = 0;
uq = td->td_umtxq;
+ if (timeout != NULL)
+ abs_timeout_init(&timo, CLOCK_REALTIME, 0, timeout);
/*
* Care must be exercised when dealing with umtx structure. It
@@ -666,6 +893,10 @@
if (owner == -1)
return (EFAULT);
+ error = umtxq_check_susp(td);
+ if (error != 0)
+ break;
+
/* If this failed the lock has changed, restart. */
continue;
}
@@ -675,7 +906,7 @@
* exit immediately.
*/
if (error != 0)
- return (error);
+ break;
if ((error = umtx_key_get(umtx, TYPE_SIMPLE_LOCK,
AUTO_SHARE, &uq->uq_key)) != 0)
@@ -711,48 +942,21 @@
*/
umtxq_lock(&uq->uq_key);
if (old == owner)
- error = umtxq_sleep(uq, "umtx", timo);
+ error = umtxq_sleep(uq, "umtx", timeout == NULL ? NULL :
+ &timo);
umtxq_remove(uq);
umtxq_unlock(&uq->uq_key);
umtx_key_release(&uq->uq_key);
+
+ if (error == 0)
+ error = umtxq_check_susp(td);
}
- return (0);
-}
-
-/*
- * Lock a umtx object.
- */
-static int
-do_lock_umtx(struct thread *td, struct umtx *umtx, u_long id,
- struct timespec *timeout)
-{
- struct timespec ts, ts2, ts3;
- struct timeval tv;
- int error;
-
if (timeout == NULL) {
- error = _do_lock_umtx(td, umtx, id, 0);
/* Mutex locking is restarted if it is interrupted. */
if (error == EINTR)
error = ERESTART;
} else {
- getnanouptime(&ts);
- timespecadd(&ts, timeout);
- TIMESPEC_TO_TIMEVAL(&tv, timeout);
- for (;;) {
- error = _do_lock_umtx(td, umtx, id, tvtohz(&tv));
- if (error != ETIMEDOUT)
- break;
- getnanouptime(&ts2);
- if (timespeccmp(&ts2, &ts, >=)) {
- error = ETIMEDOUT;
- break;
- }
- ts3 = ts;
- timespecsub(&ts3, &ts2);
- TIMESPEC_TO_TIMEVAL(&tv, &ts3);
- }
/* Timed-locking is not restarted. */
if (error == ERESTART)
error = EINTR;
@@ -827,8 +1031,10 @@
* Lock a umtx object.
*/
static int
-_do_lock_umtx32(struct thread *td, uint32_t *m, uint32_t id, int timo)
+do_lock_umtx32(struct thread *td, uint32_t *m, uint32_t id,
+ const struct timespec *timeout)
{
+ struct abs_timeout timo;
struct umtx_q *uq;
uint32_t owner;
uint32_t old;
@@ -836,6 +1042,9 @@
uq = td->td_umtxq;
+ if (timeout != NULL)
+ abs_timeout_init(&timo, CLOCK_REALTIME, 0, timeout);
+
/*
* Care must be exercised when dealing with umtx structure. It
* can fault on any access.
@@ -865,6 +1074,10 @@
if (owner == -1)
return (EFAULT);
+ error = umtxq_check_susp(td);
+ if (error != 0)
+ break;
+
/* If this failed the lock has changed, restart. */
continue;
}
@@ -910,48 +1123,21 @@
*/
umtxq_lock(&uq->uq_key);
if (old == owner)
- error = umtxq_sleep(uq, "umtx", timo);
+ error = umtxq_sleep(uq, "umtx", timeout == NULL ?
+ NULL : &timo);
umtxq_remove(uq);
umtxq_unlock(&uq->uq_key);
umtx_key_release(&uq->uq_key);
+
+ if (error == 0)
+ error = umtxq_check_susp(td);
}
- return (0);
-}
-
-/*
- * Lock a umtx object.
- */
-static int
-do_lock_umtx32(struct thread *td, void *m, uint32_t id,
- struct timespec *timeout)
-{
- struct timespec ts, ts2, ts3;
- struct timeval tv;
- int error;
-
if (timeout == NULL) {
- error = _do_lock_umtx32(td, m, id, 0);
/* Mutex locking is restarted if it is interrupted. */
if (error == EINTR)
error = ERESTART;
} else {
- getnanouptime(&ts);
- timespecadd(&ts, timeout);
- TIMESPEC_TO_TIMEVAL(&tv, timeout);
- for (;;) {
- error = _do_lock_umtx32(td, m, id, tvtohz(&tv));
- if (error != ETIMEDOUT)
- break;
- getnanouptime(&ts2);
- if (timespeccmp(&ts2, &ts, >=)) {
- error = ETIMEDOUT;
- break;
- }
- ts3 = ts;
- timespecsub(&ts3, &ts2);
- TIMESPEC_TO_TIMEVAL(&tv, &ts3);
- }
/* Timed-locking is not restarted. */
if (error == ERESTART)
error = EINTR;
@@ -1026,12 +1212,12 @@
*/
static int
do_wait(struct thread *td, void *addr, u_long id,
- struct timespec *timeout, int compat32, int is_private)
+ struct _umtx_time *timeout, int compat32, int is_private)
{
+ struct abs_timeout timo;
struct umtx_q *uq;
- struct timespec ts, ts2, ts3;
- struct timeval tv;
u_long tmp;
+ uint32_t tmp32;
int error = 0;
uq = td->td_umtxq;
@@ -1039,50 +1225,36 @@
is_private ? THREAD_SHARE : AUTO_SHARE, &uq->uq_key)) != 0)
return (error);
+ if (timeout != NULL)
+ abs_timeout_init2(&timo, timeout);
+
umtxq_lock(&uq->uq_key);
umtxq_insert(uq);
umtxq_unlock(&uq->uq_key);
- if (compat32 == 0)
- tmp = fuword(addr);
- else
- tmp = (unsigned int)fuword32(addr);
- if (tmp != id) {
- umtxq_lock(&uq->uq_key);
- umtxq_remove(uq);
- umtxq_unlock(&uq->uq_key);
- } else if (timeout == NULL) {
- umtxq_lock(&uq->uq_key);
- error = umtxq_sleep(uq, "uwait", 0);
- umtxq_remove(uq);
- umtxq_unlock(&uq->uq_key);
+ if (compat32 == 0) {
+ error = fueword(addr, &tmp);
+ if (error != 0)
+ error = EFAULT;
} else {
- getnanouptime(&ts);
- timespecadd(&ts, timeout);
- TIMESPEC_TO_TIMEVAL(&tv, timeout);
- umtxq_lock(&uq->uq_key);
- for (;;) {
- error = umtxq_sleep(uq, "uwait", tvtohz(&tv));
- if (!(uq->uq_flags & UQF_UMTXQ)) {
- error = 0;
- break;
- }
- if (error != ETIMEDOUT)
- break;
- umtxq_unlock(&uq->uq_key);
- getnanouptime(&ts2);
- if (timespeccmp(&ts2, &ts, >=)) {
- error = ETIMEDOUT;
- umtxq_lock(&uq->uq_key);
- break;
- }
- ts3 = ts;
- timespecsub(&ts3, &ts2);
- TIMESPEC_TO_TIMEVAL(&tv, &ts3);
- umtxq_lock(&uq->uq_key);
- }
+ error = fueword32(addr, &tmp32);
+ if (error == 0)
+ tmp = tmp32;
+ else
+ error = EFAULT;
+ }
+ umtxq_lock(&uq->uq_key);
+ if (error == 0) {
+ if (tmp == id)
+ error = umtxq_sleep(uq, "uwait", timeout == NULL ?
+ NULL : &timo);
+ if ((uq->uq_flags & UQF_UMTXQ) == 0)
+ error = 0;
+ else
+ umtxq_remove(uq);
+ } else if ((uq->uq_flags & UQF_UMTXQ) != 0) {
umtxq_remove(uq);
- umtxq_unlock(&uq->uq_key);
}
+ umtxq_unlock(&uq->uq_key);
umtx_key_release(&uq->uq_key);
if (error == ERESTART)
error = EINTR;
@@ -1102,7 +1274,7 @@
is_private ? THREAD_SHARE : AUTO_SHARE, &key)) != 0)
return (ret);
umtxq_lock(&key);
- ret = umtxq_signal(&key, n_wake);
+ umtxq_signal(&key, n_wake);
umtxq_unlock(&key);
umtx_key_release(&key);
return (0);
@@ -1112,15 +1284,19 @@
* Lock PTHREAD_PRIO_NONE protocol POSIX mutex.
*/
static int
-_do_lock_normal(struct thread *td, struct umutex *m, uint32_t flags, int timo,
- int mode)
+do_lock_normal(struct thread *td, struct umutex *m, uint32_t flags,
+ struct _umtx_time *timeout, int mode)
{
+ struct abs_timeout timo;
struct umtx_q *uq;
uint32_t owner, old, id;
- int error = 0;
+ int error, rv;
id = td->td_tid;
uq = td->td_umtxq;
+ error = 0;
+ if (timeout != NULL)
+ abs_timeout_init2(&timo, timeout);
/*
* Care must be exercised when dealing with umtx structure. It
@@ -1127,7 +1303,9 @@
* can fault on any access.
*/
for (;;) {
- owner = fuword32(__DEVOLATILE(void *, &m->m_owner));
+ rv = fueword32(&m->m_owner, &owner);
+ if (rv == -1)
+ return (EFAULT);
if (mode == _UMUTEX_WAIT) {
if (owner == UMUTEX_UNOWNED || owner == UMUTEX_CONTESTED)
return (0);
@@ -1135,27 +1313,31 @@
/*
* Try the uncontested case. This should be done in userland.
*/
- owner = casuword32(&m->m_owner, UMUTEX_UNOWNED, id);
+ rv = casueword32(&m->m_owner, UMUTEX_UNOWNED,
+ &owner, id);
+ /* The address was invalid. */
+ if (rv == -1)
+ return (EFAULT);
/* The acquire succeeded. */
if (owner == UMUTEX_UNOWNED)
return (0);
- /* The address was invalid. */
- if (owner == -1)
- return (EFAULT);
-
/* If no one owns it but it is contested try to acquire it. */
if (owner == UMUTEX_CONTESTED) {
- owner = casuword32(&m->m_owner,
- UMUTEX_CONTESTED, id | UMUTEX_CONTESTED);
+ rv = casueword32(&m->m_owner,
+ UMUTEX_CONTESTED, &owner,
+ id | UMUTEX_CONTESTED);
+ /* The address was invalid. */
+ if (rv == -1)
+ return (EFAULT);
if (owner == UMUTEX_CONTESTED)
return (0);
- /* The address was invalid. */
- if (owner == -1)
- return (EFAULT);
+ rv = umtxq_check_susp(td);
+ if (rv != 0)
+ return (rv);
/* If this failed the lock has changed, restart. */
continue;
@@ -1191,10 +1373,11 @@
* either some one else has acquired the lock or it has been
* released.
*/
- old = casuword32(&m->m_owner, owner, owner | UMUTEX_CONTESTED);
+ rv = casueword32(&m->m_owner, owner, &old,
+ owner | UMUTEX_CONTESTED);
/* The address was invalid. */
- if (old == -1) {
+ if (rv == -1) {
umtxq_lock(&uq->uq_key);
umtxq_remove(uq);
umtxq_unbusy(&uq->uq_key);
@@ -1211,10 +1394,14 @@
umtxq_lock(&uq->uq_key);
umtxq_unbusy(&uq->uq_key);
if (old == owner)
- error = umtxq_sleep(uq, "umtxn", timo);
+ error = umtxq_sleep(uq, "umtxn", timeout == NULL ?
+ NULL : &timo);
umtxq_remove(uq);
umtxq_unlock(&uq->uq_key);
umtx_key_release(&uq->uq_key);
+
+ if (error == 0)
+ error = umtxq_check_susp(td);
}
return (0);
@@ -1221,9 +1408,6 @@
}
/*
- * Lock PTHREAD_PRIO_NONE protocol POSIX mutex.
- */
-/*
* Unlock PTHREAD_PRIO_NONE protocol POSIX mutex.
*/
static int
@@ -1238,8 +1422,8 @@
/*
* Make sure we own this mtx.
*/
- owner = fuword32(__DEVOLATILE(uint32_t *, &m->m_owner));
- if (owner == -1)
+ error = fueword32(&m->m_owner, &owner);
+ if (error == -1)
return (EFAULT);
if ((owner & ~UMUTEX_CONTESTED) != id)
@@ -1246,8 +1430,8 @@
return (EPERM);
if ((owner & UMUTEX_CONTESTED) == 0) {
- old = casuword32(&m->m_owner, owner, UMUTEX_UNOWNED);
- if (old == -1)
+ error = casueword32(&m->m_owner, owner, &old, UMUTEX_UNOWNED);
+ if (error == -1)
return (EFAULT);
if (old == owner)
return (0);
@@ -1269,14 +1453,14 @@
* there is zero or one thread only waiting for it.
* Otherwise, it must be marked as contested.
*/
- old = casuword32(&m->m_owner, owner,
- count <= 1 ? UMUTEX_UNOWNED : UMUTEX_CONTESTED);
+ error = casueword32(&m->m_owner, owner, &old,
+ count <= 1 ? UMUTEX_UNOWNED : UMUTEX_CONTESTED);
umtxq_lock(&key);
umtxq_signal(&key,1);
umtxq_unbusy(&key);
umtxq_unlock(&key);
umtx_key_release(&key);
- if (old == -1)
+ if (error == -1)
return (EFAULT);
if (old != owner)
return (EINVAL);
@@ -1296,14 +1480,16 @@
int error;
int count;
- owner = fuword32(__DEVOLATILE(uint32_t *, &m->m_owner));
- if (owner == -1)
+ error = fueword32(&m->m_owner, &owner);
+ if (error == -1)
return (EFAULT);
if ((owner & ~UMUTEX_CONTESTED) != 0)
return (0);
- flags = fuword32(&m->m_flags);
+ error = fueword32(&m->m_flags, &flags);
+ if (error == -1)
+ return (EFAULT);
/* We should only ever be in here for contested locks */
if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX, GET_SHARE(flags),
@@ -1315,16 +1501,20 @@
count = umtxq_count(&key);
umtxq_unlock(&key);
- if (count <= 1)
- owner = casuword32(&m->m_owner, UMUTEX_CONTESTED, UMUTEX_UNOWNED);
+ if (count <= 1) {
+ error = casueword32(&m->m_owner, UMUTEX_CONTESTED, &owner,
+ UMUTEX_UNOWNED);
+ if (error == -1)
+ error = EFAULT;
+ }
umtxq_lock(&key);
- if (count != 0 && (owner & ~UMUTEX_CONTESTED) == 0)
+ if (error == 0 && count != 0 && (owner & ~UMUTEX_CONTESTED) == 0)
umtxq_signal(&key, 1);
umtxq_unbusy(&key);
umtxq_unlock(&key);
umtx_key_release(&key);
- return (0);
+ return (error);
}
/*
@@ -1367,31 +1557,47 @@
* any memory.
*/
if (count > 1) {
- owner = fuword32(__DEVOLATILE(uint32_t *, &m->m_owner));
- while ((owner & UMUTEX_CONTESTED) ==0) {
- old = casuword32(&m->m_owner, owner,
- owner|UMUTEX_CONTESTED);
+ error = fueword32(&m->m_owner, &owner);
+ if (error == -1)
+ error = EFAULT;
+ while (error == 0 && (owner & UMUTEX_CONTESTED) == 0) {
+ error = casueword32(&m->m_owner, owner, &old,
+ owner | UMUTEX_CONTESTED);
+ if (error == -1) {
+ error = EFAULT;
+ break;
+ }
if (old == owner)
break;
owner = old;
+ error = umtxq_check_susp(td);
+ if (error != 0)
+ break;
}
} else if (count == 1) {
- owner = fuword32(__DEVOLATILE(uint32_t *, &m->m_owner));
- while ((owner & ~UMUTEX_CONTESTED) != 0 &&
+ error = fueword32(&m->m_owner, &owner);
+ if (error == -1)
+ error = EFAULT;
+ while (error == 0 && (owner & ~UMUTEX_CONTESTED) != 0 &&
(owner & UMUTEX_CONTESTED) == 0) {
- old = casuword32(&m->m_owner, owner,
- owner|UMUTEX_CONTESTED);
+ error = casueword32(&m->m_owner, owner, &old,
+ owner | UMUTEX_CONTESTED);
+ if (error == -1) {
+ error = EFAULT;
+ break;
+ }
if (old == owner)
break;
owner = old;
+ error = umtxq_check_susp(td);
+ if (error != 0)
+ break;
}
}
umtxq_lock(&key);
- if (owner == -1) {
- error = EFAULT;
+ if (error == EFAULT) {
umtxq_signal(&key, INT_MAX);
- }
- else if (count != 0 && (owner & ~UMUTEX_CONTESTED) == 0)
+ } else if (count != 0 && (owner & ~UMUTEX_CONTESTED) == 0)
umtxq_signal(&key, 1);
umtxq_unbusy(&key);
umtxq_unlock(&key);
@@ -1462,7 +1668,48 @@
return (1);
}
+static struct umtx_pi *
+umtx_pi_next(struct umtx_pi *pi)
+{
+ struct umtx_q *uq_owner;
+
+ if (pi->pi_owner == NULL)
+ return (NULL);
+ uq_owner = pi->pi_owner->td_umtxq;
+ if (uq_owner == NULL)
+ return (NULL);
+ return (uq_owner->uq_pi_blocked);
+}
+
/*
+ * Floyd's Cycle-Finding Algorithm.
+ */
+static bool
+umtx_pi_check_loop(struct umtx_pi *pi)
+{
+ struct umtx_pi *pi1; /* fast iterator */
+
+ mtx_assert(&umtx_lock, MA_OWNED);
+ if (pi == NULL)
+ return (false);
+ pi1 = pi;
+ for (;;) {
+ pi = umtx_pi_next(pi);
+ if (pi == NULL)
+ break;
+ pi1 = umtx_pi_next(pi1);
+ if (pi1 == NULL)
+ break;
+ pi1 = umtx_pi_next(pi1);
+ if (pi1 == NULL)
+ break;
+ if (pi == pi1)
+ return (true);
+ }
+ return (false);
+}
+
+/*
* Propagate priority when a thread is blocked on POSIX
* PI mutex.
*/
@@ -1479,6 +1726,8 @@
pi = uq->uq_pi_blocked;
if (pi == NULL)
return;
+ if (umtx_pi_check_loop(pi))
+ return;
for (;;) {
td = pi->pi_owner;
@@ -1522,6 +1771,8 @@
mtx_assert(&umtx_lock, MA_OWNED);
+ if (umtx_pi_check_loop(pi))
+ return;
while (pi != NULL && pi->pi_owner != NULL) {
pri = PRI_MAX;
uq_owner = pi->pi_owner->td_umtxq;
@@ -1555,23 +1806,35 @@
uq_owner = owner->td_umtxq;
mtx_assert(&umtx_lock, MA_OWNED);
if (pi->pi_owner != NULL)
- panic("pi_ower != NULL");
+ panic("pi_owner != NULL");
pi->pi_owner = owner;
TAILQ_INSERT_TAIL(&uq_owner->uq_pi_contested, pi, pi_link);
}
+
/*
+ * Disown a PI mutex, and remove it from the owned list.
+ */
+static void
+umtx_pi_disown(struct umtx_pi *pi)
+{
+
+ mtx_assert(&umtx_lock, MA_OWNED);
+ TAILQ_REMOVE(&pi->pi_owner->td_umtxq->uq_pi_contested, pi, pi_link);
+ pi->pi_owner = NULL;
+}
+
+/*
* Claim ownership of a PI mutex.
*/
static int
umtx_pi_claim(struct umtx_pi *pi, struct thread *owner)
{
- struct umtx_q *uq, *uq_owner;
+ struct umtx_q *uq;
- uq_owner = owner->td_umtxq;
- mtx_lock_spin(&umtx_lock);
+ mtx_lock(&umtx_lock);
if (pi->pi_owner == owner) {
- mtx_unlock_spin(&umtx_lock);
+ mtx_unlock(&umtx_lock);
return (0);
}
@@ -1579,7 +1842,7 @@
/*
* userland may have already messed the mutex, sigh.
*/
- mtx_unlock_spin(&umtx_lock);
+ mtx_unlock(&umtx_lock);
return (EPERM);
}
umtx_pi_setowner(pi, owner);
@@ -1593,7 +1856,7 @@
sched_lend_user_prio(owner, pri);
thread_unlock(owner);
}
- mtx_unlock_spin(&umtx_lock);
+ mtx_unlock(&umtx_lock);
return (0);
}
@@ -1608,7 +1871,7 @@
struct umtx_pi *pi;
uq = td->td_umtxq;
- mtx_lock_spin(&umtx_lock);
+ mtx_lock(&umtx_lock);
/*
* Pick up the lock that td is blocked on.
*/
@@ -1617,7 +1880,7 @@
umtx_pi_adjust_thread(pi, td);
umtx_repropagate_priority(pi);
}
- mtx_unlock_spin(&umtx_lock);
+ mtx_unlock(&umtx_lock);
}
/*
@@ -1625,7 +1888,7 @@
*/
static int
umtxq_sleep_pi(struct umtx_q *uq, struct umtx_pi *pi,
- uint32_t owner, const char *wmesg, int timo)
+ uint32_t owner, const char *wmesg, struct abs_timeout *timo)
{
struct umtxq_chain *uc;
struct thread *td, *td1;
@@ -1637,14 +1900,14 @@
KASSERT(td == curthread, ("inconsistent uq_thread"));
uc = umtxq_getchain(&uq->uq_key);
UMTXQ_LOCKED_ASSERT(uc);
- UMTXQ_BUSY_ASSERT(uc);
+ KASSERT(uc->uc_busy != 0, ("umtx chain is not busy"));
umtxq_insert(uq);
- mtx_lock_spin(&umtx_lock);
+ mtx_lock(&umtx_lock);
if (pi->pi_owner == NULL) {
- mtx_unlock_spin(&umtx_lock);
+ mtx_unlock(&umtx_lock);
/* XXX Only look up thread in current process. */
td1 = tdfind(owner, curproc->p_pid);
- mtx_lock_spin(&umtx_lock);
+ mtx_lock(&umtx_lock);
if (td1 != NULL) {
if (pi->pi_owner == NULL)
umtx_pi_setowner(pi, td1);
@@ -1668,18 +1931,13 @@
td->td_flags |= TDF_UPIBLOCKED;
thread_unlock(td);
umtx_propagate_priority(td);
- mtx_unlock_spin(&umtx_lock);
+ mtx_unlock(&umtx_lock);
umtxq_unbusy(&uq->uq_key);
- if (uq->uq_flags & UQF_UMTXQ) {
- error = msleep(uq, &uc->uc_lock, PCATCH, wmesg, timo);
- if (error == EWOULDBLOCK)
- error = ETIMEDOUT;
- if (uq->uq_flags & UQF_UMTXQ) {
- umtxq_remove(uq);
- }
- }
- mtx_lock_spin(&umtx_lock);
+ error = umtxq_sleep(uq, wmesg, timo);
+ umtxq_remove(uq);
+
+ mtx_lock(&umtx_lock);
uq->uq_pi_blocked = NULL;
thread_lock(td);
td->td_flags &= ~TDF_UPIBLOCKED;
@@ -1686,7 +1944,7 @@
thread_unlock(td);
TAILQ_REMOVE(&pi->pi_blocked, uq, uq_lockq);
umtx_repropagate_priority(pi);
- mtx_unlock_spin(&umtx_lock);
+ mtx_unlock(&umtx_lock);
umtxq_unlock(&uq->uq_key);
return (error);
@@ -1718,15 +1976,12 @@
UMTXQ_LOCKED_ASSERT(uc);
KASSERT(pi->pi_refcount > 0, ("invalid reference count"));
if (--pi->pi_refcount == 0) {
- mtx_lock_spin(&umtx_lock);
- if (pi->pi_owner != NULL) {
- TAILQ_REMOVE(&pi->pi_owner->td_umtxq->uq_pi_contested,
- pi, pi_link);
- pi->pi_owner = NULL;
- }
+ mtx_lock(&umtx_lock);
+ if (pi->pi_owner != NULL)
+ umtx_pi_disown(pi);
KASSERT(TAILQ_EMPTY(&pi->pi_blocked),
("blocked queue not empty"));
- mtx_unlock_spin(&umtx_lock);
+ mtx_unlock(&umtx_lock);
TAILQ_REMOVE(&uc->uc_pi_list, pi, pi_hashlink);
umtx_pi_free(pi);
}
@@ -1769,13 +2024,14 @@
* Lock a PI mutex.
*/
static int
-_do_lock_pi(struct thread *td, struct umutex *m, uint32_t flags, int timo,
- int try)
+do_lock_pi(struct thread *td, struct umutex *m, uint32_t flags,
+ struct _umtx_time *timeout, int try)
{
+ struct abs_timeout timo;
struct umtx_q *uq;
struct umtx_pi *pi, *new_pi;
uint32_t id, owner, old;
- int error;
+ int error, rv;
id = td->td_tid;
uq = td->td_umtxq;
@@ -1783,6 +2039,10 @@
if ((error = umtx_key_get(m, TYPE_PI_UMUTEX, GET_SHARE(flags),
&uq->uq_key)) != 0)
return (error);
+
+ if (timeout != NULL)
+ abs_timeout_init2(&timo, timeout);
+
umtxq_lock(&uq->uq_key);
pi = umtx_pi_lookup(&uq->uq_key);
if (pi == NULL) {
@@ -1814,7 +2074,12 @@
/*
* Try the uncontested case. This should be done in userland.
*/
- owner = casuword32(&m->m_owner, UMUTEX_UNOWNED, id);
+ rv = casueword32(&m->m_owner, UMUTEX_UNOWNED, &owner, id);
+ /* The address was invalid. */
+ if (rv == -1) {
+ error = EFAULT;
+ break;
+ }
/* The acquire succeeded. */
if (owner == UMUTEX_UNOWNED) {
@@ -1822,16 +2087,15 @@
break;
}
- /* The address was invalid. */
- if (owner == -1) {
- error = EFAULT;
- break;
- }
-
/* If no one owns it but it is contested try to acquire it. */
if (owner == UMUTEX_CONTESTED) {
- owner = casuword32(&m->m_owner,
- UMUTEX_CONTESTED, id | UMUTEX_CONTESTED);
+ rv = casueword32(&m->m_owner,
+ UMUTEX_CONTESTED, &owner, id | UMUTEX_CONTESTED);
+ /* The address was invalid. */
+ if (rv == -1) {
+ error = EFAULT;
+ break;
+ }
if (owner == UMUTEX_CONTESTED) {
umtxq_lock(&uq->uq_key);
@@ -1839,21 +2103,29 @@
error = umtx_pi_claim(pi, td);
umtxq_unbusy(&uq->uq_key);
umtxq_unlock(&uq->uq_key);
+ if (error != 0) {
+ /*
+ * Since we're going to return an
+ * error, restore the m_owner to its
+ * previous, unowned state to avoid
+ * compounding the problem.
+ */
+ (void)casuword32(&m->m_owner,
+ id | UMUTEX_CONTESTED,
+ UMUTEX_CONTESTED);
+ }
break;
}
- /* The address was invalid. */
- if (owner == -1) {
- error = EFAULT;
+ error = umtxq_check_susp(td);
+ if (error != 0)
break;
- }
/* If this failed the lock has changed, restart. */
continue;
}
- if ((flags & UMUTEX_ERROR_CHECK) != 0 &&
- (owner & ~UMUTEX_CONTESTED) == id) {
+ if ((owner & ~UMUTEX_CONTESTED) == id) {
error = EDEADLK;
break;
}
@@ -1880,13 +2152,12 @@
* either some one else has acquired the lock or it has been
* released.
*/
- old = casuword32(&m->m_owner, owner, owner | UMUTEX_CONTESTED);
+ rv = casueword32(&m->m_owner, owner, &old,
+ owner | UMUTEX_CONTESTED);
/* The address was invalid. */
- if (old == -1) {
- umtxq_lock(&uq->uq_key);
- umtxq_unbusy(&uq->uq_key);
- umtxq_unlock(&uq->uq_key);
+ if (rv == -1) {
+ umtxq_unbusy_unlocked(&uq->uq_key);
error = EFAULT;
break;
}
@@ -1897,13 +2168,19 @@
* and we need to retry or we lost a race to the thread
* unlocking the umtx.
*/
- if (old == owner)
+ if (old == owner) {
error = umtxq_sleep_pi(uq, pi, owner & ~UMUTEX_CONTESTED,
- "umtxpi", timo);
- else {
+ "umtxpi", timeout == NULL ? NULL : &timo);
+ if (error != 0)
+ continue;
+ } else {
umtxq_unbusy(&uq->uq_key);
umtxq_unlock(&uq->uq_key);
}
+
+ error = umtxq_check_susp(td);
+ if (error != 0)
+ break;
}
umtxq_lock(&uq->uq_key);
@@ -1932,8 +2209,8 @@
/*
* Make sure we own this mtx.
*/
- owner = fuword32(__DEVOLATILE(uint32_t *, &m->m_owner));
- if (owner == -1)
+ error = fueword32(&m->m_owner, &owner);
+ if (error == -1)
return (EFAULT);
if ((owner & ~UMUTEX_CONTESTED) != id)
@@ -1941,8 +2218,8 @@
/* This should be done in userland */
if ((owner & UMUTEX_CONTESTED) == 0) {
- old = casuword32(&m->m_owner, owner, UMUTEX_UNOWNED);
- if (old == -1)
+ error = casueword32(&m->m_owner, owner, &old, UMUTEX_UNOWNED);
+ if (error == -1)
return (EFAULT);
if (old == owner)
return (0);
@@ -1958,11 +2235,11 @@
umtxq_busy(&key);
count = umtxq_count_pi(&key, &uq_first);
if (uq_first != NULL) {
- mtx_lock_spin(&umtx_lock);
+ mtx_lock(&umtx_lock);
pi = uq_first->uq_pi_blocked;
KASSERT(pi != NULL, ("pi == NULL?"));
- if (pi->pi_owner != curthread) {
- mtx_unlock_spin(&umtx_lock);
+ if (pi->pi_owner != td) {
+ mtx_unlock(&umtx_lock);
umtxq_unbusy(&key);
umtxq_unlock(&key);
umtx_key_release(&key);
@@ -1969,9 +2246,8 @@
/* userland messed the mutex */
return (EPERM);
}
- uq_me = curthread->td_umtxq;
- pi->pi_owner = NULL;
- TAILQ_REMOVE(&uq_me->uq_pi_contested, pi, pi_link);
+ uq_me = td->td_umtxq;
+ umtx_pi_disown(pi);
/* get highest priority thread which is still sleeping. */
uq_first = TAILQ_FIRST(&pi->pi_blocked);
while (uq_first != NULL &&
@@ -1986,12 +2262,31 @@
pri = UPRI(uq_first2->uq_thread);
}
}
- thread_lock(curthread);
- sched_lend_user_prio(curthread, pri);
- thread_unlock(curthread);
- mtx_unlock_spin(&umtx_lock);
+ thread_lock(td);
+ sched_lend_user_prio(td, pri);
+ thread_unlock(td);
+ mtx_unlock(&umtx_lock);
if (uq_first)
umtxq_signal_thread(uq_first);
+ } else {
+ pi = umtx_pi_lookup(&key);
+ /*
+ * A umtx_pi can exist if a signal or timeout removed the
+ * last waiter from the umtxq, but there is still
+ * a thread in do_lock_pi() holding the umtx_pi.
+ */
+ if (pi != NULL) {
+ /*
+ * The umtx_pi can be unowned, such as when a thread
+ * has just entered do_lock_pi(), allocated the
+ * umtx_pi, and unlocked the umtxq.
+ * If the current thread owns it, it must disown it.
+ */
+ mtx_lock(&umtx_lock);
+ if (pi->pi_owner == td)
+ umtx_pi_disown(pi);
+ mtx_unlock(&umtx_lock);
+ }
}
umtxq_unlock(&key);
@@ -2000,14 +2295,12 @@
* there is zero or one thread only waiting for it.
* Otherwise, it must be marked as contested.
*/
- old = casuword32(&m->m_owner, owner,
- count <= 1 ? UMUTEX_UNOWNED : UMUTEX_CONTESTED);
+ error = casueword32(&m->m_owner, owner, &old,
+ count <= 1 ? UMUTEX_UNOWNED : UMUTEX_CONTESTED);
- umtxq_lock(&key);
- umtxq_unbusy(&key);
- umtxq_unlock(&key);
+ umtxq_unbusy_unlocked(&key);
umtx_key_release(&key);
- if (old == -1)
+ if (error == -1)
return (EFAULT);
if (old != owner)
return (EINVAL);
@@ -2018,14 +2311,15 @@
* Lock a PP mutex.
*/
static int
-_do_lock_pp(struct thread *td, struct umutex *m, uint32_t flags, int timo,
- int try)
+do_lock_pp(struct thread *td, struct umutex *m, uint32_t flags,
+ struct _umtx_time *timeout, int try)
{
+ struct abs_timeout timo;
struct umtx_q *uq, *uq2;
struct umtx_pi *pi;
uint32_t ceiling;
uint32_t owner, id;
- int error, pri, old_inherited_pri, su;
+ int error, pri, old_inherited_pri, su, rv;
id = td->td_tid;
uq = td->td_umtxq;
@@ -2032,6 +2326,10 @@
if ((error = umtx_key_get(m, TYPE_PP_UMUTEX, GET_SHARE(flags),
&uq->uq_key)) != 0)
return (error);
+
+ if (timeout != NULL)
+ abs_timeout_init2(&timo, timeout);
+
su = (priv_check(td, PRIV_SCHED_RTPRIO) == 0);
for (;;) {
old_inherited_pri = uq->uq_inherited_pri;
@@ -2039,15 +2337,20 @@
umtxq_busy(&uq->uq_key);
umtxq_unlock(&uq->uq_key);
- ceiling = RTP_PRIO_MAX - fuword32(&m->m_ceilings[0]);
+ rv = fueword32(&m->m_ceilings[0], &ceiling);
+ if (rv == -1) {
+ error = EFAULT;
+ goto out;
+ }
+ ceiling = RTP_PRIO_MAX - ceiling;
if (ceiling > RTP_PRIO_MAX) {
error = EINVAL;
goto out;
}
- mtx_lock_spin(&umtx_lock);
+ mtx_lock(&umtx_lock);
if (UPRI(td) < PRI_MIN_REALTIME + ceiling) {
- mtx_unlock_spin(&umtx_lock);
+ mtx_unlock(&umtx_lock);
error = EINVAL;
goto out;
}
@@ -2058,10 +2361,15 @@
sched_lend_user_prio(td, uq->uq_inherited_pri);
thread_unlock(td);
}
- mtx_unlock_spin(&umtx_lock);
+ mtx_unlock(&umtx_lock);
- owner = casuword32(&m->m_owner,
- UMUTEX_CONTESTED, id | UMUTEX_CONTESTED);
+ rv = casueword32(&m->m_owner,
+ UMUTEX_CONTESTED, &owner, id | UMUTEX_CONTESTED);
+ /* The address was invalid. */
+ if (rv == -1) {
+ error = EFAULT;
+ break;
+ }
if (owner == UMUTEX_CONTESTED) {
error = 0;
@@ -2068,12 +2376,6 @@
break;
}
- /* The address was invalid. */
- if (owner == -1) {
- error = EFAULT;
- break;
- }
-
if ((flags & UMUTEX_ERROR_CHECK) != 0 &&
(owner & ~UMUTEX_CONTESTED) == id) {
error = EDEADLK;
@@ -2095,11 +2397,12 @@
umtxq_lock(&uq->uq_key);
umtxq_insert(uq);
umtxq_unbusy(&uq->uq_key);
- error = umtxq_sleep(uq, "umtxpp", timo);
+ error = umtxq_sleep(uq, "umtxpp", timeout == NULL ?
+ NULL : &timo);
umtxq_remove(uq);
umtxq_unlock(&uq->uq_key);
- mtx_lock_spin(&umtx_lock);
+ mtx_lock(&umtx_lock);
uq->uq_inherited_pri = old_inherited_pri;
pri = PRI_MAX;
TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) {
@@ -2114,11 +2417,11 @@
thread_lock(td);
sched_lend_user_prio(td, pri);
thread_unlock(td);
- mtx_unlock_spin(&umtx_lock);
+ mtx_unlock(&umtx_lock);
}
if (error != 0) {
- mtx_lock_spin(&umtx_lock);
+ mtx_lock(&umtx_lock);
uq->uq_inherited_pri = old_inherited_pri;
pri = PRI_MAX;
TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) {
@@ -2133,13 +2436,11 @@
thread_lock(td);
sched_lend_user_prio(td, pri);
thread_unlock(td);
- mtx_unlock_spin(&umtx_lock);
+ mtx_unlock(&umtx_lock);
}
out:
- umtxq_lock(&uq->uq_key);
- umtxq_unbusy(&uq->uq_key);
- umtxq_unlock(&uq->uq_key);
+ umtxq_unbusy_unlocked(&uq->uq_key);
umtx_key_release(&uq->uq_key);
return (error);
}
@@ -2164,8 +2465,8 @@
/*
* Make sure we own this mtx.
*/
- owner = fuword32(__DEVOLATILE(uint32_t *, &m->m_owner));
- if (owner == -1)
+ error = fueword32(&m->m_owner, &owner);
+ if (error == -1)
return (EFAULT);
if ((owner & ~UMUTEX_CONTESTED) != id)
@@ -2196,8 +2497,7 @@
* to lock the mutex, it is necessary because thread priority
* has to be adjusted for such mutex.
*/
- error = suword32(__DEVOLATILE(uint32_t *, &m->m_owner),
- UMUTEX_CONTESTED);
+ error = suword32(&m->m_owner, UMUTEX_CONTESTED);
umtxq_lock(&key);
if (error == 0)
@@ -2208,7 +2508,7 @@
if (error == -1)
error = EFAULT;
else {
- mtx_lock_spin(&umtx_lock);
+ mtx_lock(&umtx_lock);
if (su != 0)
uq->uq_inherited_pri = new_inherited_pri;
pri = PRI_MAX;
@@ -2224,7 +2524,7 @@
thread_lock(td);
sched_lend_user_prio(td, pri);
thread_unlock(td);
- mtx_unlock_spin(&umtx_lock);
+ mtx_unlock(&umtx_lock);
}
umtx_key_release(&key);
return (error);
@@ -2238,9 +2538,11 @@
uint32_t save_ceiling;
uint32_t owner, id;
uint32_t flags;
- int error;
+ int error, rv;
- flags = fuword32(&m->m_flags);
+ error = fueword32(&m->m_flags, &flags);
+ if (error == -1)
+ return (EFAULT);
if ((flags & UMUTEX_PRIO_PROTECT) == 0)
return (EINVAL);
if (ceiling > RTP_PRIO_MAX)
@@ -2255,25 +2557,26 @@
umtxq_busy(&uq->uq_key);
umtxq_unlock(&uq->uq_key);
- save_ceiling = fuword32(&m->m_ceilings[0]);
+ rv = fueword32(&m->m_ceilings[0], &save_ceiling);
+ if (rv == -1) {
+ error = EFAULT;
+ break;
+ }
- owner = casuword32(&m->m_owner,
- UMUTEX_CONTESTED, id | UMUTEX_CONTESTED);
+ rv = casueword32(&m->m_owner,
+ UMUTEX_CONTESTED, &owner, id | UMUTEX_CONTESTED);
+ if (rv == -1) {
+ error = EFAULT;
+ break;
+ }
if (owner == UMUTEX_CONTESTED) {
suword32(&m->m_ceilings[0], ceiling);
- suword32(__DEVOLATILE(uint32_t *, &m->m_owner),
- UMUTEX_CONTESTED);
+ suword32(&m->m_owner, UMUTEX_CONTESTED);
error = 0;
break;
}
- /* The address was invalid. */
- if (owner == -1) {
- error = EFAULT;
- break;
- }
-
if ((owner & ~UMUTEX_CONTESTED) == id) {
suword32(&m->m_ceilings[0], ceiling);
error = 0;
@@ -2295,7 +2598,7 @@
umtxq_lock(&uq->uq_key);
umtxq_insert(uq);
umtxq_unbusy(&uq->uq_key);
- error = umtxq_sleep(uq, "umtxpp", 0);
+ error = umtxq_sleep(uq, "umtxpp", NULL);
umtxq_remove(uq);
umtxq_unlock(&uq->uq_key);
}
@@ -2310,59 +2613,37 @@
return (error);
}
-static int
-_do_lock_umutex(struct thread *td, struct umutex *m, int flags, int timo,
- int mode)
-{
- switch(flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT)) {
- case 0:
- return (_do_lock_normal(td, m, flags, timo, mode));
- case UMUTEX_PRIO_INHERIT:
- return (_do_lock_pi(td, m, flags, timo, mode));
- case UMUTEX_PRIO_PROTECT:
- return (_do_lock_pp(td, m, flags, timo, mode));
- }
- return (EINVAL);
-}
-
/*
* Lock a userland POSIX mutex.
*/
static int
do_lock_umutex(struct thread *td, struct umutex *m,
- struct timespec *timeout, int mode)
+ struct _umtx_time *timeout, int mode)
{
- struct timespec ts, ts2, ts3;
- struct timeval tv;
uint32_t flags;
int error;
- flags = fuword32(&m->m_flags);
- if (flags == -1)
+ error = fueword32(&m->m_flags, &flags);
+ if (error == -1)
return (EFAULT);
+ switch(flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT)) {
+ case 0:
+ error = do_lock_normal(td, m, flags, timeout, mode);
+ break;
+ case UMUTEX_PRIO_INHERIT:
+ error = do_lock_pi(td, m, flags, timeout, mode);
+ break;
+ case UMUTEX_PRIO_PROTECT:
+ error = do_lock_pp(td, m, flags, timeout, mode);
+ break;
+ default:
+ return (EINVAL);
+ }
if (timeout == NULL) {
- error = _do_lock_umutex(td, m, flags, 0, mode);
- /* Mutex locking is restarted if it is interrupted. */
if (error == EINTR && mode != _UMUTEX_WAIT)
error = ERESTART;
} else {
- getnanouptime(&ts);
- timespecadd(&ts, timeout);
- TIMESPEC_TO_TIMEVAL(&tv, timeout);
- for (;;) {
- error = _do_lock_umutex(td, m, flags, tvtohz(&tv), mode);
- if (error != ETIMEDOUT)
- break;
- getnanouptime(&ts2);
- if (timespeccmp(&ts2, &ts, >=)) {
- error = ETIMEDOUT;
- break;
- }
- ts3 = ts;
- timespecsub(&ts3, &ts2);
- TIMESPEC_TO_TIMEVAL(&tv, &ts3);
- }
/* Timed-locking is not restarted. */
if (error == ERESTART)
error = EINTR;
@@ -2377,9 +2658,10 @@
do_unlock_umutex(struct thread *td, struct umutex *m)
{
uint32_t flags;
+ int error;
- flags = fuword32(&m->m_flags);
- if (flags == -1)
+ error = fueword32(&m->m_flags, &flags);
+ if (error == -1)
return (EFAULT);
switch(flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT)) {
@@ -2398,24 +2680,29 @@
do_cv_wait(struct thread *td, struct ucond *cv, struct umutex *m,
struct timespec *timeout, u_long wflags)
{
+ struct abs_timeout timo;
struct umtx_q *uq;
- struct timeval tv;
- struct timespec cts, ets, tts;
- uint32_t flags;
- uint32_t clockid;
+ uint32_t flags, clockid, hasw;
int error;
uq = td->td_umtxq;
- flags = fuword32(&cv->c_flags);
+ error = fueword32(&cv->c_flags, &flags);
+ if (error == -1)
+ return (EFAULT);
error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &uq->uq_key);
if (error != 0)
return (error);
if ((wflags & CVWAIT_CLOCKID) != 0) {
- clockid = fuword32(&cv->c_clockid);
+ error = fueword32(&cv->c_clockid, &clockid);
+ if (error == -1) {
+ umtx_key_release(&uq->uq_key);
+ return (EFAULT);
+ }
if (clockid < CLOCK_REALTIME ||
clockid >= CLOCK_THREAD_CPUTIME_ID) {
/* hmm, only HW clock id will work. */
+ umtx_key_release(&uq->uq_key);
return (EINVAL);
}
} else {
@@ -2431,45 +2718,22 @@
* Set c_has_waiters to 1 before releasing user mutex, also
* don't modify cache line when unnecessary.
*/
- if (fuword32(__DEVOLATILE(uint32_t *, &cv->c_has_waiters)) == 0)
- suword32(__DEVOLATILE(uint32_t *, &cv->c_has_waiters), 1);
+ error = fueword32(&cv->c_has_waiters, &hasw);
+ if (error == 0 && hasw == 0)
+ suword32(&cv->c_has_waiters, 1);
- umtxq_lock(&uq->uq_key);
- umtxq_unbusy(&uq->uq_key);
- umtxq_unlock(&uq->uq_key);
+ umtxq_unbusy_unlocked(&uq->uq_key);
error = do_unlock_umutex(td, m);
+
+ if (timeout != NULL)
+ abs_timeout_init(&timo, clockid, ((wflags & CVWAIT_ABSTIME) != 0),
+ timeout);
umtxq_lock(&uq->uq_key);
if (error == 0) {
- if (timeout == NULL) {
- error = umtxq_sleep(uq, "ucond", 0);
- } else {
- if ((wflags & CVWAIT_ABSTIME) == 0) {
- kern_clock_gettime(td, clockid, &ets);
- timespecadd(&ets, timeout);
- tts = *timeout;
- } else { /* absolute time */
- ets = *timeout;
- tts = *timeout;
- kern_clock_gettime(td, clockid, &cts);
- timespecsub(&tts, &cts);
- }
- TIMESPEC_TO_TIMEVAL(&tv, &tts);
- for (;;) {
- error = umtxq_sleep(uq, "ucond", tvtohz(&tv));
- if (error != ETIMEDOUT)
- break;
- kern_clock_gettime(td, clockid, &cts);
- if (timespeccmp(&cts, &ets, >=)) {
- error = ETIMEDOUT;
- break;
- }
- tts = ets;
- timespecsub(&tts, &cts);
- TIMESPEC_TO_TIMEVAL(&tv, &tts);
- }
- }
+ error = umtxq_sleep(uq, "ucond", timeout == NULL ?
+ NULL : &timo);
}
if ((uq->uq_flags & UQF_UMTXQ) == 0)
@@ -2486,9 +2750,7 @@
umtxq_remove(uq);
if (oldlen == 1) {
umtxq_unlock(&uq->uq_key);
- suword32(
- __DEVOLATILE(uint32_t *,
- &cv->c_has_waiters), 0);
+ suword32(&cv->c_has_waiters, 0);
umtxq_lock(&uq->uq_key);
}
}
@@ -2512,7 +2774,9 @@
int error, cnt, nwake;
uint32_t flags;
- flags = fuword32(&cv->c_flags);
+ error = fueword32(&cv->c_flags, &flags);
+ if (error == -1)
+ return (EFAULT);
if ((error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &key)) != 0)
return (error);
umtxq_lock(&key);
@@ -2521,8 +2785,9 @@
nwake = umtxq_signal(&key, 1);
if (cnt <= nwake) {
umtxq_unlock(&key);
- error = suword32(
- __DEVOLATILE(uint32_t *, &cv->c_has_waiters), 0);
+ error = suword32(&cv->c_has_waiters, 0);
+ if (error == -1)
+ error = EFAULT;
umtxq_lock(&key);
}
umtxq_unbusy(&key);
@@ -2538,7 +2803,9 @@
int error;
uint32_t flags;
- flags = fuword32(&cv->c_flags);
+ error = fueword32(&cv->c_flags, &flags);
+ if (error == -1)
+ return (EFAULT);
if ((error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &key)) != 0)
return (error);
@@ -2547,11 +2814,11 @@
umtxq_signal(&key, INT_MAX);
umtxq_unlock(&key);
- error = suword32(__DEVOLATILE(uint32_t *, &cv->c_has_waiters), 0);
+ error = suword32(&cv->c_has_waiters, 0);
+ if (error == -1)
+ error = EFAULT;
- umtxq_lock(&key);
- umtxq_unbusy(&key);
- umtxq_unlock(&key);
+ umtxq_unbusy_unlocked(&key);
umtx_key_release(&key);
return (error);
@@ -2558,26 +2825,37 @@
}
static int
-do_rw_rdlock(struct thread *td, struct urwlock *rwlock, long fflag, int timo)
+do_rw_rdlock(struct thread *td, struct urwlock *rwlock, long fflag, struct _umtx_time *timeout)
{
+ struct abs_timeout timo;
struct umtx_q *uq;
uint32_t flags, wrflags;
int32_t state, oldstate;
int32_t blocked_readers;
- int error;
+ int error, error1, rv;
uq = td->td_umtxq;
- flags = fuword32(&rwlock->rw_flags);
+ error = fueword32(&rwlock->rw_flags, &flags);
+ if (error == -1)
+ return (EFAULT);
error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key);
if (error != 0)
return (error);
+ if (timeout != NULL)
+ abs_timeout_init2(&timo, timeout);
+
wrflags = URWLOCK_WRITE_OWNER;
if (!(fflag & URWLOCK_PREFER_READER) && !(flags & URWLOCK_PREFER_READER))
wrflags |= URWLOCK_WRITE_WAITERS;
for (;;) {
- state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
+ rv = fueword32(&rwlock->rw_state, &state);
+ if (rv == -1) {
+ umtx_key_release(&uq->uq_key);
+ return (EFAULT);
+ }
+
/* try to lock it */
while (!(state & wrflags)) {
if (__predict_false(URWLOCK_READER_COUNT(state) == URWLOCK_MAX_READERS)) {
@@ -2584,11 +2862,19 @@
umtx_key_release(&uq->uq_key);
return (EAGAIN);
}
- oldstate = casuword32(&rwlock->rw_state, state, state + 1);
+ rv = casueword32(&rwlock->rw_state, state,
+ &oldstate, state + 1);
+ if (rv == -1) {
+ umtx_key_release(&uq->uq_key);
+ return (EFAULT);
+ }
if (oldstate == state) {
umtx_key_release(&uq->uq_key);
return (0);
}
+ error = umtxq_check_susp(td);
+ if (error != 0)
+ break;
state = oldstate;
}
@@ -2604,27 +2890,49 @@
* re-read the state, in case it changed between the try-lock above
* and the check below
*/
- state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
+ rv = fueword32(&rwlock->rw_state, &state);
+ if (rv == -1)
+ error = EFAULT;
/* set read contention bit */
- while ((state & wrflags) && !(state & URWLOCK_READ_WAITERS)) {
- oldstate = casuword32(&rwlock->rw_state, state, state | URWLOCK_READ_WAITERS);
+ while (error == 0 && (state & wrflags) &&
+ !(state & URWLOCK_READ_WAITERS)) {
+ rv = casueword32(&rwlock->rw_state, state,
+ &oldstate, state | URWLOCK_READ_WAITERS);
+ if (rv == -1) {
+ error = EFAULT;
+ break;
+ }
if (oldstate == state)
goto sleep;
state = oldstate;
+ error = umtxq_check_susp(td);
+ if (error != 0)
+ break;
}
+ if (error != 0) {
+ umtxq_unbusy_unlocked(&uq->uq_key);
+ break;
+ }
/* state is changed while setting flags, restart */
if (!(state & wrflags)) {
- umtxq_lock(&uq->uq_key);
- umtxq_unbusy(&uq->uq_key);
- umtxq_unlock(&uq->uq_key);
+ umtxq_unbusy_unlocked(&uq->uq_key);
+ error = umtxq_check_susp(td);
+ if (error != 0)
+ break;
continue;
}
sleep:
/* contention bit is set, before sleeping, increase read waiter count */
- blocked_readers = fuword32(&rwlock->rw_blocked_readers);
+ rv = fueword32(&rwlock->rw_blocked_readers,
+ &blocked_readers);
+ if (rv == -1) {
+ umtxq_unbusy_unlocked(&uq->uq_key);
+ error = EFAULT;
+ break;
+ }
suword32(&rwlock->rw_blocked_readers, blocked_readers+1);
while (state & wrflags) {
@@ -2632,7 +2940,8 @@
umtxq_insert(uq);
umtxq_unbusy(&uq->uq_key);
- error = umtxq_sleep(uq, "urdlck", timo);
+ error = umtxq_sleep(uq, "urdlck", timeout == NULL ?
+ NULL : &timo);
umtxq_busy(&uq->uq_key);
umtxq_remove(uq);
@@ -2639,54 +2948,53 @@
umtxq_unlock(&uq->uq_key);
if (error)
break;
- state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
+ rv = fueword32(&rwlock->rw_state, &state);
+ if (rv == -1) {
+ error = EFAULT;
+ break;
+ }
}
/* decrease read waiter count, and may clear read contention bit */
- blocked_readers = fuword32(&rwlock->rw_blocked_readers);
+ rv = fueword32(&rwlock->rw_blocked_readers,
+ &blocked_readers);
+ if (rv == -1) {
+ umtxq_unbusy_unlocked(&uq->uq_key);
+ error = EFAULT;
+ break;
+ }
suword32(&rwlock->rw_blocked_readers, blocked_readers-1);
if (blocked_readers == 1) {
- state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
+ rv = fueword32(&rwlock->rw_state, &state);
+ if (rv == -1) {
+ umtxq_unbusy_unlocked(&uq->uq_key);
+ error = EFAULT;
+ break;
+ }
for (;;) {
- oldstate = casuword32(&rwlock->rw_state, state,
- state & ~URWLOCK_READ_WAITERS);
+ rv = casueword32(&rwlock->rw_state, state,
+ &oldstate, state & ~URWLOCK_READ_WAITERS);
+ if (rv == -1) {
+ error = EFAULT;
+ break;
+ }
if (oldstate == state)
break;
state = oldstate;
+ error1 = umtxq_check_susp(td);
+ if (error1 != 0) {
+ if (error == 0)
+ error = error1;
+ break;
+ }
}
}
- umtxq_lock(&uq->uq_key);
- umtxq_unbusy(&uq->uq_key);
- umtxq_unlock(&uq->uq_key);
+ umtxq_unbusy_unlocked(&uq->uq_key);
+ if (error != 0)
+ break;
}
umtx_key_release(&uq->uq_key);
- return (error);
-}
-
-static int
-do_rw_rdlock2(struct thread *td, void *obj, long val, struct timespec *timeout)
-{
- struct timespec ts, ts2, ts3;
- struct timeval tv;
- int error;
-
- getnanouptime(&ts);
- timespecadd(&ts, timeout);
- TIMESPEC_TO_TIMEVAL(&tv, timeout);
- for (;;) {
- error = do_rw_rdlock(td, obj, val, tvtohz(&tv));
- if (error != ETIMEDOUT)
- break;
- getnanouptime(&ts2);
- if (timespeccmp(&ts2, &ts, >=)) {
- error = ETIMEDOUT;
- break;
- }
- ts3 = ts;
- timespecsub(&ts3, &ts2);
- TIMESPEC_TO_TIMEVAL(&tv, &ts3);
- }
if (error == ERESTART)
error = EINTR;
return (error);
@@ -2693,31 +3001,49 @@
}
static int
-do_rw_wrlock(struct thread *td, struct urwlock *rwlock, int timo)
+do_rw_wrlock(struct thread *td, struct urwlock *rwlock, struct _umtx_time *timeout)
{
+ struct abs_timeout timo;
struct umtx_q *uq;
uint32_t flags;
int32_t state, oldstate;
int32_t blocked_writers;
int32_t blocked_readers;
- int error;
+ int error, error1, rv;
uq = td->td_umtxq;
- flags = fuword32(&rwlock->rw_flags);
+ error = fueword32(&rwlock->rw_flags, &flags);
+ if (error == -1)
+ return (EFAULT);
error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key);
if (error != 0)
return (error);
+ if (timeout != NULL)
+ abs_timeout_init2(&timo, timeout);
+
blocked_readers = 0;
for (;;) {
- state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
+ rv = fueword32(&rwlock->rw_state, &state);
+ if (rv == -1) {
+ umtx_key_release(&uq->uq_key);
+ return (EFAULT);
+ }
while (!(state & URWLOCK_WRITE_OWNER) && URWLOCK_READER_COUNT(state) == 0) {
- oldstate = casuword32(&rwlock->rw_state, state, state | URWLOCK_WRITE_OWNER);
+ rv = casueword32(&rwlock->rw_state, state,
+ &oldstate, state | URWLOCK_WRITE_OWNER);
+ if (rv == -1) {
+ umtx_key_release(&uq->uq_key);
+ return (EFAULT);
+ }
if (oldstate == state) {
umtx_key_release(&uq->uq_key);
return (0);
}
state = oldstate;
+ error = umtxq_check_susp(td);
+ if (error != 0)
+ break;
}
if (error) {
@@ -2742,24 +3068,46 @@
* re-read the state, in case it changed between the try-lock above
* and the check below
*/
- state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
+ rv = fueword32(&rwlock->rw_state, &state);
+ if (rv == -1)
+ error = EFAULT;
- while (((state & URWLOCK_WRITE_OWNER) || URWLOCK_READER_COUNT(state) != 0) &&
- (state & URWLOCK_WRITE_WAITERS) == 0) {
- oldstate = casuword32(&rwlock->rw_state, state, state | URWLOCK_WRITE_WAITERS);
+ while (error == 0 && ((state & URWLOCK_WRITE_OWNER) ||
+ URWLOCK_READER_COUNT(state) != 0) &&
+ (state & URWLOCK_WRITE_WAITERS) == 0) {
+ rv = casueword32(&rwlock->rw_state, state,
+ &oldstate, state | URWLOCK_WRITE_WAITERS);
+ if (rv == -1) {
+ error = EFAULT;
+ break;
+ }
if (oldstate == state)
goto sleep;
state = oldstate;
+ error = umtxq_check_susp(td);
+ if (error != 0)
+ break;
}
+ if (error != 0) {
+ umtxq_unbusy_unlocked(&uq->uq_key);
+ break;
+ }
if (!(state & URWLOCK_WRITE_OWNER) && URWLOCK_READER_COUNT(state) == 0) {
- umtxq_lock(&uq->uq_key);
- umtxq_unbusy(&uq->uq_key);
- umtxq_unlock(&uq->uq_key);
+ umtxq_unbusy_unlocked(&uq->uq_key);
+ error = umtxq_check_susp(td);
+ if (error != 0)
+ break;
continue;
}
sleep:
- blocked_writers = fuword32(&rwlock->rw_blocked_writers);
+ rv = fueword32(&rwlock->rw_blocked_writers,
+ &blocked_writers);
+ if (rv == -1) {
+ umtxq_unbusy_unlocked(&uq->uq_key);
+ error = EFAULT;
+ break;
+ }
suword32(&rwlock->rw_blocked_writers, blocked_writers+1);
while ((state & URWLOCK_WRITE_OWNER) || URWLOCK_READER_COUNT(state) != 0) {
@@ -2767,7 +3115,8 @@
umtxq_insert_queue(uq, UMTX_EXCLUSIVE_QUEUE);
umtxq_unbusy(&uq->uq_key);
- error = umtxq_sleep(uq, "uwrlck", timo);
+ error = umtxq_sleep(uq, "uwrlck", timeout == NULL ?
+ NULL : &timo);
umtxq_busy(&uq->uq_key);
umtxq_remove_queue(uq, UMTX_EXCLUSIVE_QUEUE);
@@ -2774,56 +3123,64 @@
umtxq_unlock(&uq->uq_key);
if (error)
break;
- state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
+ rv = fueword32(&rwlock->rw_state, &state);
+ if (rv == -1) {
+ error = EFAULT;
+ break;
+ }
}
- blocked_writers = fuword32(&rwlock->rw_blocked_writers);
+ rv = fueword32(&rwlock->rw_blocked_writers,
+ &blocked_writers);
+ if (rv == -1) {
+ umtxq_unbusy_unlocked(&uq->uq_key);
+ error = EFAULT;
+ break;
+ }
suword32(&rwlock->rw_blocked_writers, blocked_writers-1);
if (blocked_writers == 1) {
- state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
+ rv = fueword32(&rwlock->rw_state, &state);
+ if (rv == -1) {
+ umtxq_unbusy_unlocked(&uq->uq_key);
+ error = EFAULT;
+ break;
+ }
for (;;) {
- oldstate = casuword32(&rwlock->rw_state, state,
- state & ~URWLOCK_WRITE_WAITERS);
+ rv = casueword32(&rwlock->rw_state, state,
+ &oldstate, state & ~URWLOCK_WRITE_WAITERS);
+ if (rv == -1) {
+ error = EFAULT;
+ break;
+ }
if (oldstate == state)
break;
state = oldstate;
+ error1 = umtxq_check_susp(td);
+ /*
+ * We are leaving the URWLOCK_WRITE_WAITERS
+ * behind, but this should not harm the
+ * correctness.
+ */
+ if (error1 != 0) {
+ if (error == 0)
+ error = error1;
+ break;
+ }
}
- blocked_readers = fuword32(&rwlock->rw_blocked_readers);
+ rv = fueword32(&rwlock->rw_blocked_readers,
+ &blocked_readers);
+ if (rv == -1) {
+ umtxq_unbusy_unlocked(&uq->uq_key);
+ error = EFAULT;
+ break;
+ }
} else
blocked_readers = 0;
- umtxq_lock(&uq->uq_key);
- umtxq_unbusy(&uq->uq_key);
- umtxq_unlock(&uq->uq_key);
+ umtxq_unbusy_unlocked(&uq->uq_key);
}
umtx_key_release(&uq->uq_key);
- return (error);
-}
-
-static int
-do_rw_wrlock2(struct thread *td, void *obj, struct timespec *timeout)
-{
- struct timespec ts, ts2, ts3;
- struct timeval tv;
- int error;
-
- getnanouptime(&ts);
- timespecadd(&ts, timeout);
- TIMESPEC_TO_TIMEVAL(&tv, timeout);
- for (;;) {
- error = do_rw_wrlock(td, obj, tvtohz(&tv));
- if (error != ETIMEDOUT)
- break;
- getnanouptime(&ts2);
- if (timespeccmp(&ts2, &ts, >=)) {
- error = ETIMEDOUT;
- break;
- }
- ts3 = ts;
- timespecsub(&ts3, &ts2);
- TIMESPEC_TO_TIMEVAL(&tv, &ts3);
- }
if (error == ERESTART)
error = EINTR;
return (error);
@@ -2835,19 +3192,29 @@
struct umtx_q *uq;
uint32_t flags;
int32_t state, oldstate;
- int error, q, count;
+ int error, rv, q, count;
uq = td->td_umtxq;
- flags = fuword32(&rwlock->rw_flags);
+ error = fueword32(&rwlock->rw_flags, &flags);
+ if (error == -1)
+ return (EFAULT);
error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key);
if (error != 0)
return (error);
- state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
+ error = fueword32(&rwlock->rw_state, &state);
+ if (error == -1) {
+ error = EFAULT;
+ goto out;
+ }
if (state & URWLOCK_WRITE_OWNER) {
for (;;) {
- oldstate = casuword32(&rwlock->rw_state, state,
- state & ~URWLOCK_WRITE_OWNER);
+ rv = casueword32(&rwlock->rw_state, state,
+ &oldstate, state & ~URWLOCK_WRITE_OWNER);
+ if (rv == -1) {
+ error = EFAULT;
+ goto out;
+ }
if (oldstate != state) {
state = oldstate;
if (!(oldstate & URWLOCK_WRITE_OWNER)) {
@@ -2854,13 +3221,20 @@
error = EPERM;
goto out;
}
+ error = umtxq_check_susp(td);
+ if (error != 0)
+ goto out;
} else
break;
}
} else if (URWLOCK_READER_COUNT(state) != 0) {
for (;;) {
- oldstate = casuword32(&rwlock->rw_state, state,
- state - 1);
+ rv = casueword32(&rwlock->rw_state, state,
+ &oldstate, state - 1);
+ if (rv == -1) {
+ error = EFAULT;
+ goto out;
+ }
if (oldstate != state) {
state = oldstate;
if (URWLOCK_READER_COUNT(oldstate) == 0) {
@@ -2867,8 +3241,10 @@
error = EPERM;
goto out;
}
- }
- else
+ error = umtxq_check_susp(td);
+ if (error != 0)
+ goto out;
+ } else
break;
}
} else {
@@ -2909,62 +3285,43 @@
}
static int
-do_sem_wait(struct thread *td, struct _usem *sem, struct timespec *timeout)
+do_sem_wait(struct thread *td, struct _usem *sem, struct _umtx_time *timeout)
{
+ struct abs_timeout timo;
struct umtx_q *uq;
- struct timeval tv;
- struct timespec cts, ets, tts;
- uint32_t flags, count;
- int error;
+ uint32_t flags, count, count1;
+ int error, rv;
uq = td->td_umtxq;
- flags = fuword32(&sem->_flags);
+ error = fueword32(&sem->_flags, &flags);
+ if (error == -1)
+ return (EFAULT);
error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &uq->uq_key);
if (error != 0)
return (error);
+
+ if (timeout != NULL)
+ abs_timeout_init2(&timo, timeout);
+
umtxq_lock(&uq->uq_key);
umtxq_busy(&uq->uq_key);
umtxq_insert(uq);
umtxq_unlock(&uq->uq_key);
-
- if (fuword32(__DEVOLATILE(uint32_t *, &sem->_has_waiters)) == 0)
- casuword32(__DEVOLATILE(uint32_t *, &sem->_has_waiters), 0, 1);
-
- count = fuword32(__DEVOLATILE(uint32_t *, &sem->_count));
- if (count != 0) {
+ rv = casueword32(&sem->_has_waiters, 0, &count1, 1);
+ if (rv == 0)
+ rv = fueword32(&sem->_count, &count);
+ if (rv == -1 || count != 0) {
umtxq_lock(&uq->uq_key);
umtxq_unbusy(&uq->uq_key);
umtxq_remove(uq);
umtxq_unlock(&uq->uq_key);
umtx_key_release(&uq->uq_key);
- return (0);
+ return (rv == -1 ? EFAULT : 0);
}
-
umtxq_lock(&uq->uq_key);
umtxq_unbusy(&uq->uq_key);
- umtxq_unlock(&uq->uq_key);
- umtxq_lock(&uq->uq_key);
- if (timeout == NULL) {
- error = umtxq_sleep(uq, "usem", 0);
- } else {
- getnanouptime(&ets);
- timespecadd(&ets, timeout);
- TIMESPEC_TO_TIMEVAL(&tv, timeout);
- for (;;) {
- error = umtxq_sleep(uq, "usem", tvtohz(&tv));
- if (error != ETIMEDOUT)
- break;
- getnanouptime(&cts);
- if (timespeccmp(&cts, &ets, >=)) {
- error = ETIMEDOUT;
- break;
- }
- tts = ets;
- timespecsub(&tts, &cts);
- TIMESPEC_TO_TIMEVAL(&tv, &tts);
- }
- }
+ error = umtxq_sleep(uq, "usem", timeout == NULL ? NULL : &timo);
if ((uq->uq_flags & UQF_UMTXQ) == 0)
error = 0;
@@ -2971,7 +3328,8 @@
else {
umtxq_remove(uq);
/* A relative timeout cannot be restarted. */
- if (error == ERESTART && timeout != NULL)
+ if (error == ERESTART && timeout != NULL &&
+ (timeout->_flags & UMTX_ABSTIME) == 0)
error = EINTR;
}
umtxq_unlock(&uq->uq_key);
@@ -2986,21 +3344,31 @@
do_sem_wake(struct thread *td, struct _usem *sem)
{
struct umtx_key key;
- int error, cnt, nwake;
+ int error, cnt;
uint32_t flags;
- flags = fuword32(&sem->_flags);
+ error = fueword32(&sem->_flags, &flags);
+ if (error == -1)
+ return (EFAULT);
if ((error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &key)) != 0)
return (error);
umtxq_lock(&key);
umtxq_busy(&key);
cnt = umtxq_count(&key);
- nwake = umtxq_signal(&key, 1);
- if (cnt <= nwake) {
- umtxq_unlock(&key);
- error = suword32(
- __DEVOLATILE(uint32_t *, &sem->_has_waiters), 0);
- umtxq_lock(&key);
+ if (cnt > 0) {
+ /*
+ * Check if count is greater than 0, this means the memory is
+ * still being referenced by user code, so we can safely
+ * update _has_waiters flag.
+ */
+ if (cnt == 1) {
+ umtxq_unlock(&key);
+ error = suword32(&sem->_has_waiters, 0);
+ umtxq_lock(&key);
+ if (error == -1)
+ error = EFAULT;
+ }
+ umtxq_signal(&key, 1);
}
umtxq_unbusy(&key);
umtxq_unlock(&key);
@@ -3012,7 +3380,7 @@
sys__umtx_lock(struct thread *td, struct _umtx_lock_args *uap)
/* struct umtx *umtx */
{
- return _do_lock_umtx(td, uap->umtx, td->td_tid, 0);
+ return do_lock_umtx(td, uap->umtx, td->td_tid, 0);
}
int
@@ -3037,6 +3405,25 @@
return (error);
}
+static inline int
+umtx_copyin_umtx_time(const void *addr, size_t size, struct _umtx_time *tp)
+{
+ int error;
+
+ if (size <= sizeof(struct timespec)) {
+ tp->_clockid = CLOCK_REALTIME;
+ tp->_flags = 0;
+ error = copyin(addr, &tp->_timeout, sizeof(struct timespec));
+ } else
+ error = copyin(addr, tp, sizeof(struct _umtx_time));
+ if (error != 0)
+ return (error);
+ if (tp->_timeout.tv_sec < 0 ||
+ tp->_timeout.tv_nsec >= 1000000000 || tp->_timeout.tv_nsec < 0)
+ return (EINVAL);
+ return (0);
+}
+
static int
__umtx_op_lock_umtx(struct thread *td, struct _umtx_op_args *uap)
{
@@ -3064,52 +3451,55 @@
static int
__umtx_op_wait(struct thread *td, struct _umtx_op_args *uap)
{
- struct timespec *ts, timeout;
+ struct _umtx_time timeout, *tm_p;
int error;
if (uap->uaddr2 == NULL)
- ts = NULL;
+ tm_p = NULL;
else {
- error = umtx_copyin_timeout(uap->uaddr2, &timeout);
+ error = umtx_copyin_umtx_time(
+ uap->uaddr2, (size_t)uap->uaddr1, &timeout);
if (error != 0)
return (error);
- ts = &timeout;
+ tm_p = &timeout;
}
- return do_wait(td, uap->obj, uap->val, ts, 0, 0);
+ return do_wait(td, uap->obj, uap->val, tm_p, 0, 0);
}
static int
__umtx_op_wait_uint(struct thread *td, struct _umtx_op_args *uap)
{
- struct timespec *ts, timeout;
+ struct _umtx_time timeout, *tm_p;
int error;
if (uap->uaddr2 == NULL)
- ts = NULL;
+ tm_p = NULL;
else {
- error = umtx_copyin_timeout(uap->uaddr2, &timeout);
+ error = umtx_copyin_umtx_time(
+ uap->uaddr2, (size_t)uap->uaddr1, &timeout);
if (error != 0)
return (error);
- ts = &timeout;
+ tm_p = &timeout;
}
- return do_wait(td, uap->obj, uap->val, ts, 1, 0);
+ return do_wait(td, uap->obj, uap->val, tm_p, 1, 0);
}
static int
__umtx_op_wait_uint_private(struct thread *td, struct _umtx_op_args *uap)
{
- struct timespec *ts, timeout;
+ struct _umtx_time *tm_p, timeout;
int error;
if (uap->uaddr2 == NULL)
- ts = NULL;
+ tm_p = NULL;
else {
- error = umtx_copyin_timeout(uap->uaddr2, &timeout);
+ error = umtx_copyin_umtx_time(
+ uap->uaddr2, (size_t)uap->uaddr1, &timeout);
if (error != 0)
return (error);
- ts = &timeout;
+ tm_p = &timeout;
}
- return do_wait(td, uap->obj, uap->val, ts, 1, 1);
+ return do_wait(td, uap->obj, uap->val, tm_p, 1, 1);
}
static int
@@ -3153,19 +3543,20 @@
static int
__umtx_op_lock_umutex(struct thread *td, struct _umtx_op_args *uap)
{
- struct timespec *ts, timeout;
+ struct _umtx_time *tm_p, timeout;
int error;
/* Allow a null timespec (wait forever). */
if (uap->uaddr2 == NULL)
- ts = NULL;
+ tm_p = NULL;
else {
- error = umtx_copyin_timeout(uap->uaddr2, &timeout);
+ error = umtx_copyin_umtx_time(
+ uap->uaddr2, (size_t)uap->uaddr1, &timeout);
if (error != 0)
return (error);
- ts = &timeout;
+ tm_p = &timeout;
}
- return do_lock_umutex(td, uap->obj, ts, 0);
+ return do_lock_umutex(td, uap->obj, tm_p, 0);
}
static int
@@ -3177,19 +3568,20 @@
static int
__umtx_op_wait_umutex(struct thread *td, struct _umtx_op_args *uap)
{
- struct timespec *ts, timeout;
+ struct _umtx_time *tm_p, timeout;
int error;
/* Allow a null timespec (wait forever). */
if (uap->uaddr2 == NULL)
- ts = NULL;
+ tm_p = NULL;
else {
- error = umtx_copyin_timeout(uap->uaddr2, &timeout);
+ error = umtx_copyin_umtx_time(
+ uap->uaddr2, (size_t)uap->uaddr1, &timeout);
if (error != 0)
return (error);
- ts = &timeout;
+ tm_p = &timeout;
}
- return do_lock_umutex(td, uap->obj, ts, _UMUTEX_WAIT);
+ return do_lock_umutex(td, uap->obj, tm_p, _UMUTEX_WAIT);
}
static int
@@ -3243,7 +3635,7 @@
static int
__umtx_op_rw_rdlock(struct thread *td, struct _umtx_op_args *uap)
{
- struct timespec timeout;
+ struct _umtx_time timeout;
int error;
/* Allow a null timespec (wait forever). */
@@ -3250,10 +3642,11 @@
if (uap->uaddr2 == NULL) {
error = do_rw_rdlock(td, uap->obj, uap->val, 0);
} else {
- error = umtx_copyin_timeout(uap->uaddr2, &timeout);
+ error = umtx_copyin_umtx_time(uap->uaddr2,
+ (size_t)uap->uaddr1, &timeout);
if (error != 0)
return (error);
- error = do_rw_rdlock2(td, uap->obj, uap->val, &timeout);
+ error = do_rw_rdlock(td, uap->obj, uap->val, &timeout);
}
return (error);
}
@@ -3261,7 +3654,7 @@
static int
__umtx_op_rw_wrlock(struct thread *td, struct _umtx_op_args *uap)
{
- struct timespec timeout;
+ struct _umtx_time timeout;
int error;
/* Allow a null timespec (wait forever). */
@@ -3268,11 +3661,12 @@
if (uap->uaddr2 == NULL) {
error = do_rw_wrlock(td, uap->obj, 0);
} else {
- error = umtx_copyin_timeout(uap->uaddr2, &timeout);
+ error = umtx_copyin_umtx_time(uap->uaddr2,
+ (size_t)uap->uaddr1, &timeout);
if (error != 0)
return (error);
- error = do_rw_wrlock2(td, uap->obj, &timeout);
+ error = do_rw_wrlock(td, uap->obj, &timeout);
}
return (error);
}
@@ -3286,19 +3680,20 @@
static int
__umtx_op_sem_wait(struct thread *td, struct _umtx_op_args *uap)
{
- struct timespec *ts, timeout;
+ struct _umtx_time *tm_p, timeout;
int error;
/* Allow a null timespec (wait forever). */
if (uap->uaddr2 == NULL)
- ts = NULL;
+ tm_p = NULL;
else {
- error = umtx_copyin_timeout(uap->uaddr2, &timeout);
+ error = umtx_copyin_umtx_time(
+ uap->uaddr2, (size_t)uap->uaddr1, &timeout);
if (error != 0)
return (error);
- ts = &timeout;
+ tm_p = &timeout;
}
- return (do_sem_wait(td, uap->obj, ts));
+ return (do_sem_wait(td, uap->obj, tm_p));
}
static int
@@ -3369,6 +3764,12 @@
int32_t tv_nsec;
};
+struct umtx_time32 {
+ struct timespec32 timeout;
+ uint32_t flags;
+ uint32_t clockid;
+};
+
static inline int
umtx_copyin_timeout32(void *addr, struct timespec *tsp)
{
@@ -3389,6 +3790,30 @@
return (error);
}
+static inline int
+umtx_copyin_umtx_time32(const void *addr, size_t size, struct _umtx_time *tp)
+{
+ struct umtx_time32 t32;
+ int error;
+
+ t32.clockid = CLOCK_REALTIME;
+ t32.flags = 0;
+ if (size <= sizeof(struct timespec32))
+ error = copyin(addr, &t32.timeout, sizeof(struct timespec32));
+ else
+ error = copyin(addr, &t32, sizeof(struct umtx_time32));
+ if (error != 0)
+ return (error);
+ if (t32.timeout.tv_sec < 0 ||
+ t32.timeout.tv_nsec >= 1000000000 || t32.timeout.tv_nsec < 0)
+ return (EINVAL);
+ tp->_timeout.tv_sec = t32.timeout.tv_sec;
+ tp->_timeout.tv_nsec = t32.timeout.tv_nsec;
+ tp->_flags = t32.flags;
+ tp->_clockid = t32.clockid;
+ return (0);
+}
+
static int
__umtx_op_lock_umtx_compat32(struct thread *td, struct _umtx_op_args *uap)
{
@@ -3416,54 +3841,57 @@
static int
__umtx_op_wait_compat32(struct thread *td, struct _umtx_op_args *uap)
{
- struct timespec *ts, timeout;
+ struct _umtx_time *tm_p, timeout;
int error;
if (uap->uaddr2 == NULL)
- ts = NULL;
+ tm_p = NULL;
else {
- error = umtx_copyin_timeout32(uap->uaddr2, &timeout);
+ error = umtx_copyin_umtx_time32(uap->uaddr2,
+ (size_t)uap->uaddr1, &timeout);
if (error != 0)
return (error);
- ts = &timeout;
+ tm_p = &timeout;
}
- return do_wait(td, uap->obj, uap->val, ts, 1, 0);
+ return do_wait(td, uap->obj, uap->val, tm_p, 1, 0);
}
static int
__umtx_op_lock_umutex_compat32(struct thread *td, struct _umtx_op_args *uap)
{
- struct timespec *ts, timeout;
+ struct _umtx_time *tm_p, timeout;
int error;
/* Allow a null timespec (wait forever). */
if (uap->uaddr2 == NULL)
- ts = NULL;
+ tm_p = NULL;
else {
- error = umtx_copyin_timeout32(uap->uaddr2, &timeout);
+ error = umtx_copyin_umtx_time32(uap->uaddr2,
+ (size_t)uap->uaddr1, &timeout);
if (error != 0)
return (error);
- ts = &timeout;
+ tm_p = &timeout;
}
- return do_lock_umutex(td, uap->obj, ts, 0);
+ return do_lock_umutex(td, uap->obj, tm_p, 0);
}
static int
__umtx_op_wait_umutex_compat32(struct thread *td, struct _umtx_op_args *uap)
{
- struct timespec *ts, timeout;
+ struct _umtx_time *tm_p, timeout;
int error;
/* Allow a null timespec (wait forever). */
if (uap->uaddr2 == NULL)
- ts = NULL;
+ tm_p = NULL;
else {
- error = umtx_copyin_timeout32(uap->uaddr2, &timeout);
+ error = umtx_copyin_umtx_time32(uap->uaddr2,
+ (size_t)uap->uaddr1, &timeout);
if (error != 0)
return (error);
- ts = &timeout;
+ tm_p = &timeout;
}
- return do_lock_umutex(td, uap->obj, ts, _UMUTEX_WAIT);
+ return do_lock_umutex(td, uap->obj, tm_p, _UMUTEX_WAIT);
}
static int
@@ -3487,7 +3915,7 @@
static int
__umtx_op_rw_rdlock_compat32(struct thread *td, struct _umtx_op_args *uap)
{
- struct timespec timeout;
+ struct _umtx_time timeout;
int error;
/* Allow a null timespec (wait forever). */
@@ -3494,10 +3922,11 @@
if (uap->uaddr2 == NULL) {
error = do_rw_rdlock(td, uap->obj, uap->val, 0);
} else {
- error = umtx_copyin_timeout32(uap->uaddr2, &timeout);
+ error = umtx_copyin_umtx_time32(uap->uaddr2,
+ (size_t)uap->uaddr1, &timeout);
if (error != 0)
return (error);
- error = do_rw_rdlock2(td, uap->obj, uap->val, &timeout);
+ error = do_rw_rdlock(td, uap->obj, uap->val, &timeout);
}
return (error);
}
@@ -3505,7 +3934,7 @@
static int
__umtx_op_rw_wrlock_compat32(struct thread *td, struct _umtx_op_args *uap)
{
- struct timespec timeout;
+ struct _umtx_time timeout;
int error;
/* Allow a null timespec (wait forever). */
@@ -3512,11 +3941,11 @@
if (uap->uaddr2 == NULL) {
error = do_rw_wrlock(td, uap->obj, 0);
} else {
- error = umtx_copyin_timeout32(uap->uaddr2, &timeout);
+ error = umtx_copyin_umtx_time32(uap->uaddr2,
+ (size_t)uap->uaddr1, &timeout);
if (error != 0)
return (error);
-
- error = do_rw_wrlock2(td, uap->obj, &timeout);
+ error = do_rw_wrlock(td, uap->obj, &timeout);
}
return (error);
}
@@ -3524,36 +3953,38 @@
static int
__umtx_op_wait_uint_private_compat32(struct thread *td, struct _umtx_op_args *uap)
{
- struct timespec *ts, timeout;
+ struct _umtx_time *tm_p, timeout;
int error;
if (uap->uaddr2 == NULL)
- ts = NULL;
+ tm_p = NULL;
else {
- error = umtx_copyin_timeout32(uap->uaddr2, &timeout);
+ error = umtx_copyin_umtx_time32(
+ uap->uaddr2, (size_t)uap->uaddr1,&timeout);
if (error != 0)
return (error);
- ts = &timeout;
+ tm_p = &timeout;
}
- return do_wait(td, uap->obj, uap->val, ts, 1, 1);
+ return do_wait(td, uap->obj, uap->val, tm_p, 1, 1);
}
static int
__umtx_op_sem_wait_compat32(struct thread *td, struct _umtx_op_args *uap)
{
- struct timespec *ts, timeout;
+ struct _umtx_time *tm_p, timeout;
int error;
/* Allow a null timespec (wait forever). */
if (uap->uaddr2 == NULL)
- ts = NULL;
+ tm_p = NULL;
else {
- error = umtx_copyin_timeout32(uap->uaddr2, &timeout);
+ error = umtx_copyin_umtx_time32(uap->uaddr2,
+ (size_t)uap->uaddr1, &timeout);
if (error != 0)
return (error);
- ts = &timeout;
+ tm_p = &timeout;
}
- return (do_sem_wait(td, uap->obj, ts));
+ return (do_sem_wait(td, uap->obj, tm_p));
}
static int
@@ -3679,13 +4110,13 @@
if ((uq = td->td_umtxq) == NULL)
return;
- mtx_lock_spin(&umtx_lock);
+ mtx_lock(&umtx_lock);
uq->uq_inherited_pri = PRI_MAX;
while ((pi = TAILQ_FIRST(&uq->uq_pi_contested)) != NULL) {
pi->pi_owner = NULL;
TAILQ_REMOVE(&uq->uq_pi_contested, pi, pi_link);
}
- mtx_unlock_spin(&umtx_lock);
+ mtx_unlock(&umtx_lock);
thread_lock(td);
sched_lend_user_prio(td, PRI_MAX);
thread_unlock(td);
Modified: trunk/sys/kern/kern_uuid.c
===================================================================
--- trunk/sys/kern/kern_uuid.c 2018-05-25 20:59:46 UTC (rev 9949)
+++ trunk/sys/kern/kern_uuid.c 2018-05-25 21:07:09 UTC (rev 9950)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
/*-
* Copyright (c) 2002 Marcel Moolenaar
* All rights reserved.
@@ -25,7 +26,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$MidnightBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/kern/kern_uuid.c 262239 2014-02-20 08:55:59Z brueffer $");
#include <sys/param.h>
#include <sys/endian.h>
@@ -71,54 +72,41 @@
CTASSERT(sizeof(struct uuid_private) == 16);
+struct uuid_macaddr {
+ uint16_t state;
+#define UUID_ETHER_EMPTY 0
+#define UUID_ETHER_RANDOM 1
+#define UUID_ETHER_UNIQUE 2
+ uint16_t node[UUID_NODE_LEN>>1];
+};
+
static struct uuid_private uuid_last;
+#define UUID_NETHER 4
+static struct uuid_macaddr uuid_ether[UUID_NETHER];
+
static struct mtx uuid_mutex;
MTX_SYSINIT(uuid_lock, &uuid_mutex, "UUID generator mutex lock", MTX_DEF);
/*
- * Return the first MAC address we encounter or, if none was found,
- * construct a sufficiently random multicast address. We don't try
- * to return the same MAC address as previously returned. We always
- * generate a new multicast address if no MAC address exists in the
- * system.
- * It would be nice to know if 'ifnet' or any of its sub-structures
- * has been changed in any way. If not, we could simply skip the
- * scan and safely return the MAC address we returned before.
+ * Return the first MAC address added in the array. If it's empty, then
+ * construct a sufficiently random multicast MAC address first. Any
+ * addresses added later will bump the random MAC address up tp the next
+ * index.
*/
static void
uuid_node(uint16_t *node)
{
- struct ifnet *ifp;
- struct ifaddr *ifa;
- struct sockaddr_dl *sdl;
int i;
- CURVNET_SET(TD_TO_VNET(curthread));
- IFNET_RLOCK_NOSLEEP();
- TAILQ_FOREACH(ifp, &V_ifnet, if_link) {
- /* Walk the address list */
- IF_ADDR_RLOCK(ifp);
- TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
- sdl = (struct sockaddr_dl*)ifa->ifa_addr;
- if (sdl != NULL && sdl->sdl_family == AF_LINK &&
- sdl->sdl_type == IFT_ETHER) {
- /* Got a MAC address. */
- bcopy(LLADDR(sdl), node, UUID_NODE_LEN);
- IF_ADDR_RUNLOCK(ifp);
- IFNET_RUNLOCK_NOSLEEP();
- CURVNET_RESTORE();
- return;
- }
- }
- IF_ADDR_RUNLOCK(ifp);
+ if (uuid_ether[0].state == UUID_ETHER_EMPTY) {
+ for (i = 0; i < (UUID_NODE_LEN>>1); i++)
+ uuid_ether[0].node[i] = (uint16_t)arc4random();
+ *((uint8_t*)uuid_ether[0].node) |= 0x01;
+ uuid_ether[0].state = UUID_ETHER_RANDOM;
}
- IFNET_RUNLOCK_NOSLEEP();
-
for (i = 0; i < (UUID_NODE_LEN>>1); i++)
- node[i] = (uint16_t)arc4random();
- *((uint8_t*)node) |= 0x01;
- CURVNET_RESTORE();
+ node[i] = uuid_ether[0].node[i];
}
/*
@@ -211,6 +199,76 @@
}
int
+uuid_ether_add(const uint8_t *addr)
+{
+ int i, sum;
+
+ /*
+ * Validate input. No multicast (flag 0x1), no locally administered
+ * (flag 0x2) and no 'all-zeroes' addresses.
+ */
+ if (addr[0] & 0x03)
+ return (EINVAL);
+ sum = 0;
+ for (i = 0; i < UUID_NODE_LEN; i++)
+ sum += addr[i];
+ if (sum == 0)
+ return (EINVAL);
+
+ mtx_lock(&uuid_mutex);
+
+ /* Make sure the MAC isn't known already and that there's space. */
+ i = 0;
+ while (i < UUID_NETHER && uuid_ether[i].state == UUID_ETHER_UNIQUE) {
+ if (!bcmp(addr, uuid_ether[i].node, UUID_NODE_LEN)) {
+ mtx_unlock(&uuid_mutex);
+ return (EEXIST);
+ }
+ i++;
+ }
+ if (i == UUID_NETHER) {
+ mtx_unlock(&uuid_mutex);
+ return (ENOSPC);
+ }
+
+ /* Insert MAC at index, moving the non-empty entry if possible. */
+ if (uuid_ether[i].state == UUID_ETHER_RANDOM && i < UUID_NETHER - 1)
+ uuid_ether[i + 1] = uuid_ether[i];
+ uuid_ether[i].state = UUID_ETHER_UNIQUE;
+ bcopy(addr, uuid_ether[i].node, UUID_NODE_LEN);
+ mtx_unlock(&uuid_mutex);
+ return (0);
+}
+
+int
+uuid_ether_del(const uint8_t *addr)
+{
+ int i;
+
+ mtx_lock(&uuid_mutex);
+ i = 0;
+ while (i < UUID_NETHER && uuid_ether[i].state == UUID_ETHER_UNIQUE &&
+ bcmp(addr, uuid_ether[i].node, UUID_NODE_LEN))
+ i++;
+ if (i == UUID_NETHER || uuid_ether[i].state != UUID_ETHER_UNIQUE) {
+ mtx_unlock(&uuid_mutex);
+ return (ENOENT);
+ }
+
+ /* Remove it by shifting higher index entries down. */
+ while (i < UUID_NETHER - 1 && uuid_ether[i].state != UUID_ETHER_EMPTY) {
+ uuid_ether[i] = uuid_ether[i + 1];
+ i++;
+ }
+ if (uuid_ether[i].state != UUID_ETHER_EMPTY) {
+ uuid_ether[i].state = UUID_ETHER_EMPTY;
+ bzero(uuid_ether[i].node, UUID_NODE_LEN);
+ }
+ mtx_unlock(&uuid_mutex);
+ return (0);
+}
+
+int
snprintf_uuid(char *buf, size_t sz, struct uuid *uuid)
{
struct uuid_private *id;
@@ -314,7 +372,7 @@
p = buf;
uuid->time_low = be32dec(p);
- uuid->time_mid = le16dec(p + 4);
+ uuid->time_mid = be16dec(p + 4);
uuid->time_hi_and_version = be16dec(p + 6);
uuid->clock_seq_hi_and_reserved = p[8];
uuid->clock_seq_low = p[9];
Modified: trunk/sys/kern/ksched.c
===================================================================
--- trunk/sys/kern/ksched.c 2018-05-25 20:59:46 UTC (rev 9949)
+++ trunk/sys/kern/ksched.c 2018-05-25 21:07:09 UTC (rev 9950)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
/*-
* Copyright (c) 1996, 1997
* HD Associates, Inc. All rights reserved.
@@ -30,11 +31,10 @@
* SUCH DAMAGE.
*/
-/* ksched: Soft real time scheduling based on "rtprio".
- */
+/* ksched: Soft real time scheduling based on "rtprio". */
#include <sys/cdefs.h>
-__FBSDID("$MidnightBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/kern/ksched.c 287508 2015-09-06 17:36:09Z kib $");
#include "opt_posix.h"
@@ -51,8 +51,7 @@
FEATURE(kposix_priority_scheduling, "POSIX P1003.1B realtime extensions");
-/* ksched: Real-time extension to support POSIX priority scheduling.
- */
+/* ksched: Real-time extension to support POSIX priority scheduling. */
struct ksched {
struct timespec rr_interval;
@@ -61,21 +60,21 @@
int
ksched_attach(struct ksched **p)
{
- struct ksched *ksched= p31b_malloc(sizeof(*ksched));
+ struct ksched *ksched;
+ ksched = malloc(sizeof(*ksched), M_P31B, M_WAITOK);
ksched->rr_interval.tv_sec = 0;
- ksched->rr_interval.tv_nsec = 1000000000L / sched_rr_interval();
-
+ ksched->rr_interval.tv_nsec = 1000000000L / hz * sched_rr_interval();
*p = ksched;
- return 0;
+ return (0);
}
int
ksched_detach(struct ksched *ks)
{
- p31b_free(ks);
- return 0;
+ free(ks, M_P31B);
+ return (0);
}
/*
@@ -108,25 +107,22 @@
getscheduler(struct ksched *ksched, struct thread *td, int *policy)
{
struct rtprio rtp;
- int e = 0;
+ int e;
+ e = 0;
pri_to_rtp(td, &rtp);
- switch (rtp.type)
- {
- case RTP_PRIO_FIFO:
+ switch (rtp.type) {
+ case RTP_PRIO_FIFO:
*policy = SCHED_FIFO;
break;
-
- case RTP_PRIO_REALTIME:
+ case RTP_PRIO_REALTIME:
*policy = SCHED_RR;
break;
-
- default:
+ default:
*policy = SCHED_OTHER;
break;
}
-
- return e;
+ return (e);
}
int
@@ -133,22 +129,17 @@
ksched_setparam(struct ksched *ksched,
struct thread *td, const struct sched_param *param)
{
- int policy;
- int e;
+ int e, policy;
e = getscheduler(ksched, td, &policy);
-
if (e == 0)
- {
- e = ksched_setscheduler(ksched, td, policy, param);
- }
-
- return e;
+ e = ksched_setscheduler(ksched, td, policy, param);
+ return (e);
}
int
-ksched_getparam(struct ksched *ksched,
- struct thread *td, struct sched_param *param)
+ksched_getparam(struct ksched *ksched, struct thread *td,
+ struct sched_param *param)
{
struct rtprio rtp;
@@ -159,13 +150,14 @@
if (PRI_MIN_TIMESHARE < rtp.prio)
/*
* The interactive score has it to min realtime
- * so we must show max (64 most likely
+ * so we must show max (64 most likely).
*/
- param->sched_priority = (PRI_MAX_TIMESHARE - PRI_MIN_TIMESHARE);
+ param->sched_priority = PRI_MAX_TIMESHARE -
+ PRI_MIN_TIMESHARE;
else
param->sched_priority = tsprio_to_p4prio(rtp.prio);
}
- return 0;
+ return (0);
}
/*
@@ -176,117 +168,106 @@
*
*/
int
-ksched_setscheduler(struct ksched *ksched,
- struct thread *td, int policy, const struct sched_param *param)
+ksched_setscheduler(struct ksched *ksched, struct thread *td, int policy,
+ const struct sched_param *param)
{
- int e = 0;
struct rtprio rtp;
+ int e;
- switch(policy)
- {
- case SCHED_RR:
- case SCHED_FIFO:
-
+ e = 0;
+ switch(policy) {
+ case SCHED_RR:
+ case SCHED_FIFO:
if (param->sched_priority >= P1B_PRIO_MIN &&
- param->sched_priority <= P1B_PRIO_MAX)
- {
+ param->sched_priority <= P1B_PRIO_MAX) {
rtp.prio = p4prio_to_rtpprio(param->sched_priority);
- rtp.type = (policy == SCHED_FIFO)
- ? RTP_PRIO_FIFO : RTP_PRIO_REALTIME;
-
+ rtp.type = (policy == SCHED_FIFO) ? RTP_PRIO_FIFO :
+ RTP_PRIO_REALTIME;
rtp_to_pri(&rtp, td);
+ } else {
+ e = EPERM;
}
- else
- e = EPERM;
-
-
break;
-
- case SCHED_OTHER:
- if (param->sched_priority >= 0 &&
- param->sched_priority <= (PRI_MAX_TIMESHARE - PRI_MIN_TIMESHARE)) {
+ case SCHED_OTHER:
+ if (param->sched_priority >= 0 && param->sched_priority <=
+ (PRI_MAX_TIMESHARE - PRI_MIN_TIMESHARE)) {
rtp.type = RTP_PRIO_NORMAL;
rtp.prio = p4prio_to_tsprio(param->sched_priority);
rtp_to_pri(&rtp, td);
- } else
+ } else {
e = EINVAL;
-
+ }
break;
-
- default:
- e = EINVAL;
- break;
+ default:
+ e = EINVAL;
+ break;
}
-
- return e;
+ return (e);
}
int
ksched_getscheduler(struct ksched *ksched, struct thread *td, int *policy)
{
- return getscheduler(ksched, td, policy);
+
+ return (getscheduler(ksched, td, policy));
}
-/* ksched_yield: Yield the CPU.
- */
+/* ksched_yield: Yield the CPU. */
int
ksched_yield(struct ksched *ksched)
{
+
sched_relinquish(curthread);
- return 0;
+ return (0);
}
int
ksched_get_priority_max(struct ksched *ksched, int policy, int *prio)
{
- int e = 0;
+ int e;
- switch (policy)
- {
- case SCHED_FIFO:
- case SCHED_RR:
- *prio = RTP_PRIO_MAX;
+ e = 0;
+ switch (policy) {
+ case SCHED_FIFO:
+ case SCHED_RR:
+ *prio = P1B_PRIO_MAX;
break;
-
- case SCHED_OTHER:
+ case SCHED_OTHER:
*prio = PRI_MAX_TIMESHARE - PRI_MIN_TIMESHARE;
break;
-
- default:
+ default:
e = EINVAL;
+ break;
}
-
- return e;
+ return (e);
}
int
ksched_get_priority_min(struct ksched *ksched, int policy, int *prio)
{
- int e = 0;
+ int e;
- switch (policy)
- {
- case SCHED_FIFO:
- case SCHED_RR:
+ e = 0;
+ switch (policy) {
+ case SCHED_FIFO:
+ case SCHED_RR:
*prio = P1B_PRIO_MIN;
break;
-
- case SCHED_OTHER:
+ case SCHED_OTHER:
*prio = 0;
break;
-
- default:
+ default:
e = EINVAL;
+ break;
}
-
- return e;
+ return (e);
}
int
-ksched_rr_get_interval(struct ksched *ksched,
- struct thread *td, struct timespec *timespec)
+ksched_rr_get_interval(struct ksched *ksched, struct thread *td,
+ struct timespec *timespec)
{
+
*timespec = ksched->rr_interval;
-
- return 0;
+ return (0);
}
Modified: trunk/sys/kern/link_elf.c
===================================================================
--- trunk/sys/kern/link_elf.c 2018-05-25 20:59:46 UTC (rev 9949)
+++ trunk/sys/kern/link_elf.c 2018-05-25 21:07:09 UTC (rev 9950)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
/*-
* Copyright (c) 1998-2000 Doug Rabson
* All rights reserved.
@@ -25,7 +26,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$MidnightBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/kern/link_elf.c 296729 2016-03-12 17:23:15Z kib $");
#include "opt_ddb.h"
#include "opt_gdb.h"
@@ -158,7 +159,7 @@
static void link_elf_reloc_local(linker_file_t);
static long link_elf_symtab_get(linker_file_t, const Elf_Sym **);
static long link_elf_strtab_get(linker_file_t, caddr_t *);
-static Elf_Addr elf_lookup(linker_file_t, Elf_Size, int);
+static int elf_lookup(linker_file_t, Elf_Size, int, Elf_Addr *);
static kobj_method_t link_elf_methods[] = {
KOBJMETHOD(linker_lookup_symbol, link_elf_lookup_symbol),
@@ -575,7 +576,7 @@
static int
parse_dpcpu(elf_file_t ef)
-{
+{
int count;
int error;
@@ -606,7 +607,7 @@
#ifdef VIMAGE
static int
parse_vnet(elf_file_t ef)
-{
+{
int count;
int error;
@@ -702,16 +703,6 @@
int error;
ef = (elf_file_t) lf;
-#if 0 /* this will be more trouble than it's worth for now */
- for (dp = ef->dynamic; dp->d_tag != DT_NULL; dp++) {
- if (dp->d_tag != DT_NEEDED)
- continue;
- modname = ef->strtab + dp->d_un.d_val;
- error = linker_load_module(modname, lf);
- if (error != 0)
- goto out;
- }
-#endif
error = relocate_file(ef);
if (error != 0)
return (error);
@@ -750,17 +741,15 @@
int symstrindex;
int symcnt;
int strcnt;
- int vfslocked;
shdr = NULL;
lf = NULL;
- NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE, UIO_SYSSPACE, filename, td);
+ NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, filename, td);
flags = FREAD;
error = vn_open(&nd, &flags, 0, NULL);
if (error != 0)
return (error);
- vfslocked = NDHASGIANT(&nd);
NDFREE(&nd, NDF_ONLY_PNBUF);
if (nd.ni_vp->v_type != VREG) {
error = ENOEXEC;
@@ -884,7 +873,7 @@
*/
base_offset = trunc_page(segs[0]->p_offset);
base_vaddr = trunc_page(segs[0]->p_vaddr);
- base_vlimit = round_page(segs[nsegs - 1]->p_vaddr +
+ base_vlimit = round_page(segs[nsegs - 1]->p_vaddr +
segs[nsegs - 1]->p_memsz);
mapsize = base_vlimit - base_vaddr;
@@ -903,7 +892,7 @@
}
ef->address = (caddr_t) vm_map_min(kernel_map);
error = vm_map_find(kernel_map, ef->object, 0,
- (vm_offset_t *) &ef->address, mapsize, 1,
+ (vm_offset_t *) &ef->address, mapsize, 0, VMFS_OPTIMAL_SPACE,
VM_PROT_ALL, VM_PROT_ALL, 0);
if (error != 0) {
vm_object_deallocate(ef->object);
@@ -975,16 +964,6 @@
vn_lock(nd.ni_vp, LK_EXCLUSIVE | LK_RETRY);
if (error != 0)
goto out;
-#if 0 /* this will be more trouble than it's worth for now */
- for (dp = ef->dynamic; dp->d_tag != DT_NULL; dp++) {
- if (dp->d_tag != DT_NEEDED)
- continue;
- modname = ef->strtab + dp->d_un.d_val;
- error = linker_load_module(modname, lf);
- if (error != 0)
- goto out;
- }
-#endif
error = relocate_file(ef);
if (error != 0)
goto out;
@@ -1047,13 +1026,10 @@
out:
VOP_UNLOCK(nd.ni_vp, 0);
vn_close(nd.ni_vp, FREAD, td->td_ucred, td);
- VFS_UNLOCK_GIANT(vfslocked);
if (error != 0 && lf != NULL)
linker_file_unload(lf, LINKER_UNLOAD_FORCE);
- if (shdr != NULL)
- free(shdr, M_LINKER);
- if (firstpage != NULL)
- free(firstpage, M_LINKER);
+ free(shdr, M_LINKER);
+ free(firstpage, M_LINKER);
return (error);
}
@@ -1115,19 +1091,13 @@
+ (ef->object->size << PAGE_SHIFT));
}
#else
- if (ef->address != NULL)
- free(ef->address, M_LINKER);
+ free(ef->address, M_LINKER);
#endif
- if (ef->symbase != NULL)
- free(ef->symbase, M_LINKER);
- if (ef->strbase != NULL)
- free(ef->strbase, M_LINKER);
- if (ef->ctftab != NULL)
- free(ef->ctftab, M_LINKER);
- if (ef->ctfoff != NULL)
- free(ef->ctfoff, M_LINKER);
- if (ef->typoff != NULL)
- free(ef->typoff, M_LINKER);
+ free(ef->symbase, M_LINKER);
+ free(ef->strbase, M_LINKER);
+ free(ef->ctftab, M_LINKER);
+ free(ef->ctfoff, M_LINKER);
+ free(ef->typoff, M_LINKER);
}
static void
@@ -1439,7 +1409,7 @@
elf_file_t ef = (elf_file_t)file;
const Elf_Sym *symp;
int i, error;
-
+
/* Exhaustive search */
for (i = 0, symp = ef->ddbsymtab; i < ef->ddbsymcnt; i++, symp++) {
if (symp->st_value != 0 &&
@@ -1521,8 +1491,8 @@
* This is not only more efficient, it's also more correct. It's not always
* the case that the symbol can be found through the hash table.
*/
-static Elf_Addr
-elf_lookup(linker_file_t lf, Elf_Size symidx, int deps)
+static int
+elf_lookup(linker_file_t lf, Elf_Size symidx, int deps, Elf_Addr *res)
{
elf_file_t ef = (elf_file_t)lf;
const Elf_Sym *sym;
@@ -1530,8 +1500,10 @@
Elf_Addr addr, start, base;
/* Don't even try to lookup the symbol if the index is bogus. */
- if (symidx >= ef->nchains)
- return (0);
+ if (symidx >= ef->nchains) {
+ *res = 0;
+ return (EINVAL);
+ }
sym = ef->symtab + symidx;
@@ -1541,9 +1513,12 @@
*/
if (ELF_ST_BIND(sym->st_info) == STB_LOCAL) {
/* Force lookup failure when we have an insanity. */
- if (sym->st_shndx == SHN_UNDEF || sym->st_value == 0)
- return (0);
- return ((Elf_Addr)ef->address + sym->st_value);
+ if (sym->st_shndx == SHN_UNDEF || sym->st_value == 0) {
+ *res = 0;
+ return (EINVAL);
+ }
+ *res = ((Elf_Addr)ef->address + sym->st_value);
+ return (0);
}
/*
@@ -1556,10 +1531,16 @@
symbol = ef->strtab + sym->st_name;
/* Force a lookup failure if the symbol name is bogus. */
- if (*symbol == 0)
- return (0);
+ if (*symbol == 0) {
+ *res = 0;
+ return (EINVAL);
+ }
addr = ((Elf_Addr)linker_file_lookup_symbol(lf, symbol, deps));
+ if (addr == 0 && ELF_ST_BIND(sym->st_info) != STB_WEAK) {
+ *res = 0;
+ return (EINVAL);
+ }
if (elf_set_find(&set_pcpu_list, addr, &start, &base))
addr = addr - start + base;
@@ -1567,7 +1548,8 @@
else if (elf_set_find(&set_vnet_list, addr, &start, &base))
addr = addr - start + base;
#endif
- return addr;
+ *res = addr;
+ return (0);
}
static void
@@ -1613,7 +1595,7 @@
return (ef->ddbsymcnt);
}
-
+
static long
link_elf_strtab_get(linker_file_t lf, caddr_t *strtab)
{
Modified: trunk/sys/kern/link_elf_obj.c
===================================================================
--- trunk/sys/kern/link_elf_obj.c 2018-05-25 20:59:46 UTC (rev 9949)
+++ trunk/sys/kern/link_elf_obj.c 2018-05-25 21:07:09 UTC (rev 9950)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
/*-
* Copyright (c) 1998-2000 Doug Rabson
* Copyright (c) 2004 Peter Wemm
@@ -26,7 +27,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$MidnightBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/kern/link_elf_obj.c 302234 2016-06-27 21:50:30Z bdrewery $");
#include "opt_ddb.h"
@@ -140,11 +141,12 @@
static int link_elf_each_function_nameval(linker_file_t,
linker_function_nameval_callback_t,
void *);
-static void link_elf_reloc_local(linker_file_t);
+static int link_elf_reloc_local(linker_file_t);
static long link_elf_symtab_get(linker_file_t, const Elf_Sym **);
static long link_elf_strtab_get(linker_file_t, caddr_t *);
-static Elf_Addr elf_obj_lookup(linker_file_t lf, Elf_Size symidx, int deps);
+static int elf_obj_lookup(linker_file_t lf, Elf_Size symidx, int deps,
+ Elf_Addr *);
static kobj_method_t link_elf_methods[] = {
KOBJMETHOD(linker_lookup_symbol, link_elf_lookup_symbol),
@@ -173,6 +175,7 @@
};
static int relocate_file(elf_file_t ef);
+static void elf_obj_cleanup_globals_cache(elf_file_t);
static void
link_elf_error(const char *filename, const char *s)
@@ -255,6 +258,9 @@
switch (shdr[i].sh_type) {
case SHT_PROGBITS:
case SHT_NOBITS:
+#ifdef __amd64__
+ case SHT_AMD64_UNWIND:
+#endif
ef->nprogtab++;
break;
case SHT_SYMTAB:
@@ -325,9 +331,16 @@
switch (shdr[i].sh_type) {
case SHT_PROGBITS:
case SHT_NOBITS:
+#ifdef __amd64__
+ case SHT_AMD64_UNWIND:
+#endif
ef->progtab[pb].addr = (void *)shdr[i].sh_addr;
if (shdr[i].sh_type == SHT_PROGBITS)
ef->progtab[pb].name = "<<PROGBITS>>";
+#ifdef __amd64__
+ else if (shdr[i].sh_type == SHT_AMD64_UNWIND)
+ ef->progtab[pb].name = "<<UNWIND>>";
+#endif
else
ef->progtab[pb].name = "<<NOBITS>>";
ef->progtab[pb].size = shdr[i].sh_size;
@@ -389,15 +402,26 @@
break;
}
}
- if (pb != ef->nprogtab)
- panic("lost progbits");
- if (rl != ef->nreltab)
- panic("lost reltab");
- if (ra != ef->nrelatab)
- panic("lost relatab");
+ if (pb != ef->nprogtab) {
+ printf("%s: lost progbits\n", filename);
+ error = ENOEXEC;
+ goto out;
+ }
+ if (rl != ef->nreltab) {
+ printf("%s: lost reltab\n", filename);
+ error = ENOEXEC;
+ goto out;
+ }
+ if (ra != ef->nrelatab) {
+ printf("%s: lost relatab\n", filename);
+ error = ENOEXEC;
+ goto out;
+ }
/* Local intra-module relocations */
- link_elf_reloc_local(lf);
+ error = link_elf_reloc_local(lf);
+ if (error != 0)
+ goto out;
*result = lf;
return (0);
@@ -450,7 +474,6 @@
int nsym;
int pb, rl, ra;
int alignmask;
- int vfslocked;
shdr = NULL;
lf = NULL;
@@ -457,12 +480,11 @@
mapsize = 0;
hdr = NULL;
- NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE, UIO_SYSSPACE, filename, td);
+ NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, filename, td);
flags = FREAD;
error = vn_open(&nd, &flags, 0, NULL);
if (error)
return error;
- vfslocked = NDHASGIANT(&nd);
NDFREE(&nd, NDF_ONLY_PNBUF);
if (nd.ni_vp->v_type != VREG) {
error = ENOEXEC;
@@ -553,6 +575,9 @@
switch (shdr[i].sh_type) {
case SHT_PROGBITS:
case SHT_NOBITS:
+#ifdef __amd64__
+ case SHT_AMD64_UNWIND:
+#endif
ef->nprogtab++;
break;
case SHT_SYMTAB:
@@ -599,8 +624,11 @@
ef->relatab = malloc(ef->nrelatab * sizeof(*ef->relatab),
M_LINKER, M_WAITOK | M_ZERO);
- if (symtabindex == -1)
- panic("lost symbol table index");
+ if (symtabindex == -1) {
+ link_elf_error(filename, "lost symbol table index");
+ error = ENOEXEC;
+ goto out;
+ }
/* Allocate space for and load the symbol table */
ef->ddbsymcnt = shdr[symtabindex].sh_size / sizeof(Elf_Sym);
ef->ddbsymtab = malloc(shdr[symtabindex].sh_size, M_LINKER, M_WAITOK);
@@ -615,8 +643,11 @@
goto out;
}
- if (symstrindex == -1)
- panic("lost symbol string index");
+ if (symstrindex == -1) {
+ link_elf_error(filename, "lost symbol string index");
+ error = ENOEXEC;
+ goto out;
+ }
/* Allocate space for and load the symbol strings */
ef->ddbstrcnt = shdr[symstrindex].sh_size;
ef->ddbstrtab = malloc(shdr[symstrindex].sh_size, M_LINKER, M_WAITOK);
@@ -659,6 +690,9 @@
switch (shdr[i].sh_type) {
case SHT_PROGBITS:
case SHT_NOBITS:
+#ifdef __amd64__
+ case SHT_AMD64_UNWIND:
+#endif
alignmask = shdr[i].sh_addralign - 1;
mapsize += alignmask;
mapsize &= ~alignmask;
@@ -685,9 +719,14 @@
* location of code and data in the kernel's address space, request a
* mapping that is above the kernel.
*/
+#ifdef __amd64__
mapbase = KERNBASE;
+#else
+ mapbase = VM_MIN_KERNEL_ADDRESS;
+#endif
error = vm_map_find(kernel_map, ef->object, 0, &mapbase,
- round_page(mapsize), TRUE, VM_PROT_ALL, VM_PROT_ALL, FALSE);
+ round_page(mapsize), 0, VMFS_OPTIMAL_SPACE, VM_PROT_ALL,
+ VM_PROT_ALL, 0);
if (error) {
vm_object_deallocate(ef->object);
ef->object = 0;
@@ -721,6 +760,9 @@
switch (shdr[i].sh_type) {
case SHT_PROGBITS:
case SHT_NOBITS:
+#ifdef __amd64__
+ case SHT_AMD64_UNWIND:
+#endif
alignmask = shdr[i].sh_addralign - 1;
mapbase += alignmask;
mapbase &= ~alignmask;
@@ -729,6 +771,10 @@
ef->shstrtab + shdr[i].sh_name;
else if (shdr[i].sh_type == SHT_PROGBITS)
ef->progtab[pb].name = "<<PROGBITS>>";
+#ifdef __amd64__
+ else if (shdr[i].sh_type == SHT_AMD64_UNWIND)
+ ef->progtab[pb].name = "<<UNWIND>>";
+#endif
else
ef->progtab[pb].name = "<<NOBITS>>";
if (ef->progtab[pb].name != NULL &&
@@ -750,7 +796,11 @@
}
ef->progtab[pb].size = shdr[i].sh_size;
ef->progtab[pb].sec = i;
- if (shdr[i].sh_type == SHT_PROGBITS) {
+ if (shdr[i].sh_type == SHT_PROGBITS
+#ifdef __amd64__
+ || shdr[i].sh_type == SHT_AMD64_UNWIND
+#endif
+ ) {
error = vn_rdwr(UIO_READ, nd.ni_vp,
ef->progtab[pb].addr,
shdr[i].sh_size, shdr[i].sh_offset,
@@ -826,19 +876,35 @@
break;
}
}
- if (pb != ef->nprogtab)
- panic("lost progbits");
- if (rl != ef->nreltab)
- panic("lost reltab");
- if (ra != ef->nrelatab)
- panic("lost relatab");
- if (mapbase != (vm_offset_t)ef->address + mapsize)
- panic("mapbase 0x%lx != address %p + mapsize 0x%lx (0x%lx)\n",
+ if (pb != ef->nprogtab) {
+ link_elf_error(filename, "lost progbits");
+ error = ENOEXEC;
+ goto out;
+ }
+ if (rl != ef->nreltab) {
+ link_elf_error(filename, "lost reltab");
+ error = ENOEXEC;
+ goto out;
+ }
+ if (ra != ef->nrelatab) {
+ link_elf_error(filename, "lost relatab");
+ error = ENOEXEC;
+ goto out;
+ }
+ if (mapbase != (vm_offset_t)ef->address + mapsize) {
+ printf(
+ "%s: mapbase 0x%lx != address %p + mapsize 0x%lx (0x%lx)\n",
+ filename != NULL ? filename : "<none>",
(u_long)mapbase, ef->address, (u_long)mapsize,
(u_long)(vm_offset_t)ef->address + mapsize);
+ error = ENOMEM;
+ goto out;
+ }
/* Local intra-module relocations */
- link_elf_reloc_local(lf);
+ error = link_elf_reloc_local(lf);
+ if (error != 0)
+ goto out;
/* Pull in dependencies */
VOP_UNLOCK(nd.ni_vp, 0);
@@ -862,11 +928,9 @@
out:
VOP_UNLOCK(nd.ni_vp, 0);
vn_close(nd.ni_vp, FREAD, td->td_ucred, td);
- VFS_UNLOCK_GIANT(vfslocked);
if (error && lf)
linker_file_unload(lf, LINKER_UNLOAD_FORCE);
- if (hdr)
- free(hdr, M_LINKER);
+ free(hdr, M_LINKER);
return error;
}
@@ -897,18 +961,12 @@
}
}
if (ef->preloaded) {
- if (ef->reltab)
- free(ef->reltab, M_LINKER);
- if (ef->relatab)
- free(ef->relatab, M_LINKER);
- if (ef->progtab)
- free(ef->progtab, M_LINKER);
- if (ef->ctftab)
- free(ef->ctftab, M_LINKER);
- if (ef->ctfoff)
- free(ef->ctfoff, M_LINKER);
- if (ef->typoff)
- free(ef->typoff, M_LINKER);
+ free(ef->reltab, M_LINKER);
+ free(ef->relatab, M_LINKER);
+ free(ef->progtab, M_LINKER);
+ free(ef->ctftab, M_LINKER);
+ free(ef->ctfoff, M_LINKER);
+ free(ef->typoff, M_LINKER);
if (file->filename != NULL)
preload_delete_name(file->filename);
/* XXX reclaim module memory? */
@@ -916,17 +974,12 @@
}
for (i = 0; i < ef->nreltab; i++)
- if (ef->reltab[i].rel)
- free(ef->reltab[i].rel, M_LINKER);
+ free(ef->reltab[i].rel, M_LINKER);
for (i = 0; i < ef->nrelatab; i++)
- if (ef->relatab[i].rela)
- free(ef->relatab[i].rela, M_LINKER);
- if (ef->reltab)
- free(ef->reltab, M_LINKER);
- if (ef->relatab)
- free(ef->relatab, M_LINKER);
- if (ef->progtab)
- free(ef->progtab, M_LINKER);
+ free(ef->relatab[i].rela, M_LINKER);
+ free(ef->reltab, M_LINKER);
+ free(ef->relatab, M_LINKER);
+ free(ef->progtab, M_LINKER);
if (ef->object) {
vm_map_remove(kernel_map, (vm_offset_t) ef->address,
@@ -933,20 +986,13 @@
(vm_offset_t) ef->address +
(ef->object->size << PAGE_SHIFT));
}
- if (ef->e_shdr)
- free(ef->e_shdr, M_LINKER);
- if (ef->ddbsymtab)
- free(ef->ddbsymtab, M_LINKER);
- if (ef->ddbstrtab)
- free(ef->ddbstrtab, M_LINKER);
- if (ef->shstrtab)
- free(ef->shstrtab, M_LINKER);
- if (ef->ctftab)
- free(ef->ctftab, M_LINKER);
- if (ef->ctfoff)
- free(ef->ctfoff, M_LINKER);
- if (ef->typoff)
- free(ef->typoff, M_LINKER);
+ free(ef->e_shdr, M_LINKER);
+ free(ef->ddbsymtab, M_LINKER);
+ free(ef->ddbstrtab, M_LINKER);
+ free(ef->shstrtab, M_LINKER);
+ free(ef->ctftab, M_LINKER);
+ free(ef->ctfoff, M_LINKER);
+ free(ef->typoff, M_LINKER);
}
static const char *
@@ -993,12 +1039,16 @@
/* Perform relocations without addend if there are any: */
for (i = 0; i < ef->nreltab; i++) {
rel = ef->reltab[i].rel;
- if (rel == NULL)
- panic("lost a reltab!");
+ if (rel == NULL) {
+ link_elf_error(ef->lf.filename, "lost a reltab!");
+ return (ENOEXEC);
+ }
rellim = rel + ef->reltab[i].nrel;
base = findbase(ef, ef->reltab[i].sec);
- if (base == 0)
- panic("lost base for reltab");
+ if (base == 0) {
+ link_elf_error(ef->lf.filename, "lost base for reltab");
+ return (ENOEXEC);
+ }
for ( ; rel < rellim; rel++) {
symidx = ELF_R_SYM(rel->r_info);
if (symidx >= ef->ddbsymcnt)
@@ -1012,7 +1062,7 @@
symname = symbol_name(ef, rel->r_info);
printf("link_elf_obj: symbol %s undefined\n",
symname);
- return ENOENT;
+ return (ENOENT);
}
}
}
@@ -1020,12 +1070,17 @@
/* Perform relocations with addend if there are any: */
for (i = 0; i < ef->nrelatab; i++) {
rela = ef->relatab[i].rela;
- if (rela == NULL)
- panic("lost a relatab!");
+ if (rela == NULL) {
+ link_elf_error(ef->lf.filename, "lost a relatab!");
+ return (ENOEXEC);
+ }
relalim = rela + ef->relatab[i].nrela;
base = findbase(ef, ef->relatab[i].sec);
- if (base == 0)
- panic("lost base for relatab");
+ if (base == 0) {
+ link_elf_error(ef->lf.filename,
+ "lost base for relatab");
+ return (ENOEXEC);
+ }
for ( ; rela < relalim; rela++) {
symidx = ELF_R_SYM(rela->r_info);
if (symidx >= ef->ddbsymcnt)
@@ -1039,12 +1094,19 @@
symname = symbol_name(ef, rela->r_info);
printf("link_elf_obj: symbol %s undefined\n",
symname);
- return ENOENT;
+ return (ENOENT);
}
}
}
- return 0;
+ /*
+ * Only clean SHN_FBSD_CACHED for successful return. If we
+ * modified symbol table for the object but found an
+ * unresolved symbol, there is no reason to roll back.
+ */
+ elf_obj_cleanup_globals_cache(ef);
+
+ return (0);
}
static int
@@ -1192,6 +1254,21 @@
return (0);
}
+static void
+elf_obj_cleanup_globals_cache(elf_file_t ef)
+{
+ Elf_Sym *sym;
+ Elf_Size i;
+
+ for (i = 0; i < ef->ddbsymcnt; i++) {
+ sym = ef->ddbsymtab + i;
+ if (sym->st_shndx == SHN_FBSD_CACHED) {
+ sym->st_shndx = SHN_UNDEF;
+ sym->st_value = 0;
+ }
+ }
+}
+
/*
* Symbol lookup function that can be used when the symbol index is known (ie
* in relocations). It uses the symbol index instead of doing a fully fledged
@@ -1199,46 +1276,71 @@
* This is not only more efficient, it's also more correct. It's not always
* the case that the symbol can be found through the hash table.
*/
-static Elf_Addr
-elf_obj_lookup(linker_file_t lf, Elf_Size symidx, int deps)
+static int
+elf_obj_lookup(linker_file_t lf, Elf_Size symidx, int deps, Elf_Addr *res)
{
elf_file_t ef = (elf_file_t)lf;
- const Elf_Sym *sym;
+ Elf_Sym *sym;
const char *symbol;
- Elf_Addr ret;
+ Elf_Addr res1;
/* Don't even try to lookup the symbol if the index is bogus. */
- if (symidx >= ef->ddbsymcnt)
- return (0);
+ if (symidx >= ef->ddbsymcnt) {
+ *res = 0;
+ return (EINVAL);
+ }
sym = ef->ddbsymtab + symidx;
/* Quick answer if there is a definition included. */
- if (sym->st_shndx != SHN_UNDEF)
- return (sym->st_value);
+ if (sym->st_shndx != SHN_UNDEF) {
+ *res = sym->st_value;
+ return (0);
+ }
/* If we get here, then it is undefined and needs a lookup. */
switch (ELF_ST_BIND(sym->st_info)) {
case STB_LOCAL:
/* Local, but undefined? huh? */
- return (0);
+ *res = 0;
+ return (EINVAL);
case STB_GLOBAL:
+ case STB_WEAK:
/* Relative to Data or Function name */
symbol = ef->ddbstrtab + sym->st_name;
/* Force a lookup failure if the symbol name is bogus. */
- if (*symbol == 0)
+ if (*symbol == 0) {
+ *res = 0;
+ return (EINVAL);
+ }
+ res1 = (Elf_Addr)linker_file_lookup_symbol(lf, symbol, deps);
+
+ /*
+ * Cache global lookups during module relocation. The failure
+ * case is particularly expensive for callers, who must scan
+ * through the entire globals table doing strcmp(). Cache to
+ * avoid doing such work repeatedly.
+ *
+ * After relocation is complete, undefined globals will be
+ * restored to SHN_UNDEF in elf_obj_cleanup_globals_cache(),
+ * above.
+ */
+ if (res1 != 0) {
+ sym->st_shndx = SHN_FBSD_CACHED;
+ sym->st_value = res1;
+ *res = res1;
return (0);
- ret = ((Elf_Addr)linker_file_lookup_symbol(lf, symbol, deps));
- return ret;
+ } else if (ELF_ST_BIND(sym->st_info) == STB_WEAK) {
+ sym->st_value = 0;
+ *res = 0;
+ return (0);
+ }
+ return (EINVAL);
- case STB_WEAK:
- printf("link_elf_obj: Weak symbols not supported\n");
- return (0);
-
default:
- return (0);
+ return (EINVAL);
}
}
@@ -1287,7 +1389,7 @@
}
}
-static void
+static int
link_elf_reloc_local(linker_file_t lf)
{
elf_file_t ef = (elf_file_t)lf;
@@ -1305,12 +1407,16 @@
/* Perform relocations without addend if there are any: */
for (i = 0; i < ef->nreltab; i++) {
rel = ef->reltab[i].rel;
- if (rel == NULL)
- panic("lost a reltab!");
+ if (rel == NULL) {
+ link_elf_error(ef->lf.filename, "lost a reltab");
+ return (ENOEXEC);
+ }
rellim = rel + ef->reltab[i].nrel;
base = findbase(ef, ef->reltab[i].sec);
- if (base == 0)
- panic("lost base for reltab");
+ if (base == 0) {
+ link_elf_error(ef->lf.filename, "lost base for reltab");
+ return (ENOEXEC);
+ }
for ( ; rel < rellim; rel++) {
symidx = ELF_R_SYM(rel->r_info);
if (symidx >= ef->ddbsymcnt)
@@ -1327,12 +1433,16 @@
/* Perform relocations with addend if there are any: */
for (i = 0; i < ef->nrelatab; i++) {
rela = ef->relatab[i].rela;
- if (rela == NULL)
- panic("lost a relatab!");
+ if (rela == NULL) {
+ link_elf_error(ef->lf.filename, "lost a relatab!");
+ return (ENOEXEC);
+ }
relalim = rela + ef->relatab[i].nrela;
base = findbase(ef, ef->relatab[i].sec);
- if (base == 0)
- panic("lost base for relatab");
+ if (base == 0) {
+ link_elf_error(ef->lf.filename, "lost base for reltab");
+ return (ENOEXEC);
+ }
for ( ; rela < relalim; rela++) {
symidx = ELF_R_SYM(rela->r_info);
if (symidx >= ef->ddbsymcnt)
@@ -1345,6 +1455,7 @@
elf_obj_lookup);
}
}
+ return (0);
}
static long
Modified: trunk/sys/kern/md4c.c
===================================================================
--- trunk/sys/kern/md4c.c 2018-05-25 20:59:46 UTC (rev 9949)
+++ trunk/sys/kern/md4c.c 2018-05-25 21:07:09 UTC (rev 9950)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
/* MD4C.C - RSA Data Security, Inc., MD4 message-digest algorithm
*/
@@ -24,7 +25,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$MidnightBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/kern/md4c.c 139804 2005-01-06 23:35:40Z imp $");
#include <sys/param.h>
#include <sys/systm.h>
Modified: trunk/sys/kern/md5c.c
===================================================================
--- trunk/sys/kern/md5c.c 2018-05-25 20:59:46 UTC (rev 9949)
+++ trunk/sys/kern/md5c.c 2018-05-25 21:07:09 UTC (rev 9950)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
/*-
* MD5C.C - RSA Data Security, Inc., MD5 message-digest algorithm
*
@@ -30,7 +31,7 @@
* This file should be kept in sync with src/lib/libmd/md5c.c
*/
#include <sys/cdefs.h>
-__FBSDID("$MidnightBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/kern/md5c.c 157304 2006-03-30 18:45:50Z pjd $");
#include <sys/types.h>
Modified: trunk/sys/kern/p1003_1b.c
===================================================================
--- trunk/sys/kern/p1003_1b.c 2018-05-25 20:59:46 UTC (rev 9949)
+++ trunk/sys/kern/p1003_1b.c 2018-05-25 21:07:09 UTC (rev 9950)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
/*-
* Copyright (c) 1996, 1997, 1998
* HD Associates, Inc. All rights reserved.
@@ -34,7 +35,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$MidnightBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/kern/p1003_1b.c 293485 2016-01-09 14:44:41Z dchagin $");
#include "opt_posix.h"
@@ -130,16 +131,29 @@
targettd = FIRST_THREAD_IN_PROC(targetp);
}
- e = p_cansched(td, targetp);
- if (e == 0) {
- e = ksched_setparam(ksched, targettd,
- (const struct sched_param *)&sched_param);
- }
+ e = kern_sched_setparam(td, targettd, &sched_param);
PROC_UNLOCK(targetp);
return (e);
}
int
+kern_sched_setparam(struct thread *td, struct thread *targettd,
+ struct sched_param *param)
+{
+ struct proc *targetp;
+ int error;
+
+ targetp = targettd->td_proc;
+ PROC_LOCK_ASSERT(targetp, MA_OWNED);
+
+ error = p_cansched(td, targetp);
+ if (error == 0)
+ error = ksched_setparam(ksched, targettd,
+ (const struct sched_param *)param);
+ return (error);
+}
+
+int
sys_sched_getparam(struct thread *td, struct sched_getparam_args *uap)
{
int e;
@@ -159,10 +173,7 @@
targettd = FIRST_THREAD_IN_PROC(targetp);
}
- e = p_cansee(td, targetp);
- if (e == 0) {
- e = ksched_getparam(ksched, targettd, &sched_param);
- }
+ e = kern_sched_getparam(td, targettd, &sched_param);
PROC_UNLOCK(targetp);
if (e == 0)
e = copyout(&sched_param, uap->param, sizeof(sched_param));
@@ -170,6 +181,22 @@
}
int
+kern_sched_getparam(struct thread *td, struct thread *targettd,
+ struct sched_param *param)
+{
+ struct proc *targetp;
+ int error;
+
+ targetp = targettd->td_proc;
+ PROC_LOCK_ASSERT(targetp, MA_OWNED);
+
+ error = p_cansee(td, targetp);
+ if (error == 0)
+ error = ksched_getparam(ksched, targettd, param);
+ return (error);
+}
+
+int
sys_sched_setscheduler(struct thread *td, struct sched_setscheduler_args *uap)
{
int e;
@@ -177,11 +204,6 @@
struct thread *targettd;
struct proc *targetp;
- /* Don't allow non root user to set a scheduler policy. */
- e = priv_check(td, PRIV_SCHED_SET);
- if (e)
- return (e);
-
e = copyin(uap->param, &sched_param, sizeof(sched_param));
if (e)
return (e);
@@ -197,16 +219,35 @@
targettd = FIRST_THREAD_IN_PROC(targetp);
}
- e = p_cansched(td, targetp);
- if (e == 0) {
- e = ksched_setscheduler(ksched, targettd,
- uap->policy, (const struct sched_param *)&sched_param);
- }
+ e = kern_sched_setscheduler(td, targettd, uap->policy,
+ &sched_param);
PROC_UNLOCK(targetp);
return (e);
}
int
+kern_sched_setscheduler(struct thread *td, struct thread *targettd,
+ int policy, struct sched_param *param)
+{
+ struct proc *targetp;
+ int error;
+
+ targetp = targettd->td_proc;
+ PROC_LOCK_ASSERT(targetp, MA_OWNED);
+
+ /* Don't allow non root user to set a scheduler policy. */
+ error = priv_check(td, PRIV_SCHED_SET);
+ if (error)
+ return (error);
+
+ error = p_cansched(td, targetp);
+ if (error == 0)
+ error = ksched_setscheduler(ksched, targettd, policy,
+ (const struct sched_param *)param);
+ return (error);
+}
+
+int
sys_sched_getscheduler(struct thread *td, struct sched_getscheduler_args *uap)
{
int e, policy;
@@ -224,17 +265,31 @@
targettd = FIRST_THREAD_IN_PROC(targetp);
}
- e = p_cansee(td, targetp);
- if (e == 0) {
- e = ksched_getscheduler(ksched, targettd, &policy);
+ e = kern_sched_getscheduler(td, targettd, &policy);
+ PROC_UNLOCK(targetp);
+ if (e == 0)
td->td_retval[0] = policy;
- }
- PROC_UNLOCK(targetp);
return (e);
}
int
+kern_sched_getscheduler(struct thread *td, struct thread *targettd,
+ int *policy)
+{
+ struct proc *targetp;
+ int error;
+
+ targetp = targettd->td_proc;
+ PROC_LOCK_ASSERT(targetp, MA_OWNED);
+
+ error = p_cansee(td, targetp);
+ if (error == 0)
+ error = ksched_getscheduler(ksched, targettd, policy);
+ return (error);
+}
+
+int
sys_sched_yield(struct thread *td, struct sched_yield_args *uap)
{
@@ -296,13 +351,26 @@
targettd = FIRST_THREAD_IN_PROC(targetp);
}
- e = p_cansee(td, targetp);
- if (e == 0)
- e = ksched_rr_get_interval(ksched, targettd, ts);
+ e = kern_sched_rr_get_interval_td(td, targettd, ts);
PROC_UNLOCK(targetp);
return (e);
}
+int
+kern_sched_rr_get_interval_td(struct thread *td, struct thread *targettd,
+ struct timespec *ts)
+{
+ struct proc *p;
+ int error;
+
+ p = targettd->td_proc;
+ PROC_LOCK_ASSERT(p, MA_OWNED);
+
+ error = p_cansee(td, p);
+ if (error == 0)
+ error = ksched_rr_get_interval(ksched, targettd, ts);
+ return (error);
+}
#endif
static void
Modified: trunk/sys/kern/posix4_mib.c
===================================================================
--- trunk/sys/kern/posix4_mib.c 2018-05-25 20:59:46 UTC (rev 9949)
+++ trunk/sys/kern/posix4_mib.c 2018-05-25 21:07:09 UTC (rev 9950)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
/*-
* Copyright (c) 1998
* HD Associates, Inc. All rights reserved.
@@ -31,7 +32,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$MidnightBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/kern/posix4_mib.c 299613 2016-05-13 07:56:14Z ngie $");
#include <sys/param.h>
#include <sys/systm.h>
@@ -114,9 +115,9 @@
num = arg2;
if (!P31B_VALID(num))
return (EINVAL);
- val = facility_initialized[num] ? facility[num - 1] : 0;
+ val = facility_initialized[num - 1] ? facility[num - 1] : 0;
error = sysctl_handle_int(oidp, &val, 0, req);
- if (error == 0 && req->newptr != NULL && facility_initialized[num])
+ if (error == 0 && req->newptr != NULL && facility_initialized[num - 1])
facility[num - 1] = val;
return (error);
}
@@ -138,7 +139,7 @@
{
facility[num - 1] = 0;
- facility_initialized[num -1] = 0;
+ facility_initialized[num - 1] = 0;
}
int
Modified: trunk/sys/kern/sched_4bsd.c
===================================================================
--- trunk/sys/kern/sched_4bsd.c 2018-05-25 20:59:46 UTC (rev 9949)
+++ trunk/sys/kern/sched_4bsd.c 2018-05-25 21:07:09 UTC (rev 9950)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
/*-
* Copyright (c) 1982, 1986, 1990, 1991, 1993
* The Regents of the University of California. All rights reserved.
@@ -33,7 +34,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$MidnightBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/kern/sched_4bsd.c 316841 2017-04-14 14:44:06Z avg $");
#include "opt_hwpmc_hooks.h"
#include "opt_sched.h"
@@ -143,7 +144,7 @@
schedcpu_thread,
NULL
};
-SYSINIT(schedcpu, SI_SUB_RUN_SCHEDULER, SI_ORDER_FIRST, kproc_start,
+SYSINIT(schedcpu, SI_SUB_LAST, SI_ORDER_FIRST, kproc_start,
&sched_kp);
SYSINIT(sched_setup, SI_SUB_RUN_QUEUE, SI_ORDER_FIRST, sched_setup, NULL);
@@ -255,20 +256,20 @@
SDT_PROVIDER_DEFINE(sched);
-SDT_PROBE_DEFINE3(sched, , , change_pri, change-pri, "struct thread *",
+SDT_PROBE_DEFINE3(sched, , , change__pri, "struct thread *",
"struct proc *", "uint8_t");
-SDT_PROBE_DEFINE3(sched, , , dequeue, dequeue, "struct thread *",
+SDT_PROBE_DEFINE3(sched, , , dequeue, "struct thread *",
"struct proc *", "void *");
-SDT_PROBE_DEFINE4(sched, , , enqueue, enqueue, "struct thread *",
+SDT_PROBE_DEFINE4(sched, , , enqueue, "struct thread *",
"struct proc *", "void *", "int");
-SDT_PROBE_DEFINE4(sched, , , lend_pri, lend-pri, "struct thread *",
+SDT_PROBE_DEFINE4(sched, , , lend__pri, "struct thread *",
"struct proc *", "uint8_t", "struct thread *");
-SDT_PROBE_DEFINE2(sched, , , load_change, load-change, "int", "int");
-SDT_PROBE_DEFINE2(sched, , , off_cpu, off-cpu, "struct thread *",
+SDT_PROBE_DEFINE2(sched, , , load__change, "int", "int");
+SDT_PROBE_DEFINE2(sched, , , off__cpu, "struct thread *",
"struct proc *");
-SDT_PROBE_DEFINE(sched, , , on_cpu, on-cpu);
-SDT_PROBE_DEFINE(sched, , , remain_cpu, remain-cpu);
-SDT_PROBE_DEFINE2(sched, , , surrender, surrender, "struct thread *",
+SDT_PROBE_DEFINE(sched, , , on__cpu);
+SDT_PROBE_DEFINE(sched, , , remain__cpu);
+SDT_PROBE_DEFINE2(sched, , , surrender, "struct thread *",
"struct proc *");
static __inline void
@@ -277,7 +278,7 @@
sched_tdcnt++;
KTR_COUNTER0(KTR_SCHED, "load", "global load", sched_tdcnt);
- SDT_PROBE2(sched, , , load_change, NOCPU, sched_tdcnt);
+ SDT_PROBE2(sched, , , load__change, NOCPU, sched_tdcnt);
}
static __inline void
@@ -286,7 +287,7 @@
sched_tdcnt--;
KTR_COUNTER0(KTR_SCHED, "load", "global load", sched_tdcnt);
- SDT_PROBE2(sched, , , load_change, NOCPU, sched_tdcnt);
+ SDT_PROBE2(sched, , , load__change, NOCPU, sched_tdcnt);
}
/*
* Arrange to reschedule if necessary, taking the priorities and
@@ -304,9 +305,8 @@
/*
* This function is called when a thread is about to be put on run queue
* because it has been made runnable or its priority has been adjusted. It
- * determines if the new thread should be immediately preempted to. If so,
- * it switches to it and eventually returns true. If not, it returns false
- * so that the caller may place the thread on an appropriate run queue.
+ * determines if the new thread should preempt the current thread. If so,
+ * it sets td_owepreempt to request a preemption.
*/
int
maybe_preempt(struct thread *td)
@@ -352,29 +352,8 @@
return (0);
#endif
- if (ctd->td_critnest > 1) {
- CTR1(KTR_PROC, "maybe_preempt: in critical section %d",
- ctd->td_critnest);
- ctd->td_owepreempt = 1;
- return (0);
- }
- /*
- * Thread is runnable but not yet put on system run queue.
- */
- MPASS(ctd->td_lock == td->td_lock);
- MPASS(TD_ON_RUNQ(td));
- TD_SET_RUNNING(td);
- CTR3(KTR_PROC, "preempting to thread %p (pid %d, %s)\n", td,
- td->td_proc->p_pid, td->td_name);
- mi_switch(SW_INVOL | SW_PREEMPT | SWT_PREEMPT, td);
- /*
- * td's lock pointer may have changed. We have to return with it
- * locked.
- */
- spinlock_enter();
- thread_unlock(ctd);
- thread_lock(td);
- spinlock_exit();
+ CTR0(KTR_PROC, "maybe_preempt: scheduling preemption");
+ ctd->td_owepreempt = 1;
return (1);
#else
return (0);
@@ -793,6 +772,8 @@
{
struct td_sched *ts;
+ childtd->td_oncpu = NOCPU;
+ childtd->td_lastcpu = NOCPU;
childtd->td_estcpu = td->td_estcpu;
childtd->td_lock = &sched_lock;
childtd->td_cpuset = cpuset_ref(td->td_cpuset);
@@ -836,12 +817,12 @@
KTR_POINT3(KTR_SCHED, "thread", sched_tdname(td), "priority change",
"prio:%d", td->td_priority, "new prio:%d", prio, KTR_ATTR_LINKED,
sched_tdname(curthread));
- SDT_PROBE3(sched, , , change_pri, td, td->td_proc, prio);
+ SDT_PROBE3(sched, , , change__pri, td, td->td_proc, prio);
if (td != curthread && prio > td->td_priority) {
KTR_POINT3(KTR_SCHED, "thread", sched_tdname(curthread),
"lend prio", "prio:%d", td->td_priority, "new prio:%d",
prio, KTR_ATTR_LINKED, sched_tdname(td));
- SDT_PROBE4(sched, , , lend_pri, td, td->td_proc, prio,
+ SDT_PROBE4(sched, , , lend__pri, td, td->td_proc, prio,
curthread);
}
THREAD_LOCK_ASSERT(td, MA_OWNED);
@@ -983,7 +964,8 @@
sched_load_rem();
td->td_lastcpu = td->td_oncpu;
- preempted = !(td->td_flags & TDF_SLICEEND);
+ preempted = (td->td_flags & TDF_SLICEEND) == 0 &&
+ (flags & SW_PREEMPT) != 0;
td->td_flags &= ~(TDF_NEEDRESCHED | TDF_SLICEEND);
td->td_owepreempt = 0;
td->td_oncpu = NOCPU;
@@ -1027,6 +1009,16 @@
MPASS(newtd->td_lock == &sched_lock);
}
+#if (KTR_COMPILE & KTR_SCHED) != 0
+ if (TD_IS_IDLETHREAD(td))
+ KTR_STATE1(KTR_SCHED, "thread", sched_tdname(td), "idle",
+ "prio:%d", td->td_priority);
+ else
+ KTR_STATE3(KTR_SCHED, "thread", sched_tdname(td), KTDSTATE(td),
+ "prio:%d", td->td_priority, "wmesg:\"%s\"", td->td_wmesg,
+ "lockname:\"%s\"", td->td_lockname);
+#endif
+
if (td != newtd) {
#ifdef HWPMC_HOOKS
if (PMC_PROC_IS_USING_PMCS(td->td_proc))
@@ -1033,7 +1025,7 @@
PMC_SWITCH_CONTEXT(td, PMC_FN_CSW_OUT);
#endif
- SDT_PROBE2(sched, , , off_cpu, td, td->td_proc);
+ SDT_PROBE2(sched, , , off__cpu, newtd, newtd->td_proc);
/* I feel sleepy */
lock_profile_release_lock(&sched_lock.lock_object);
@@ -1067,14 +1059,17 @@
* need to reap it.
*/
- SDT_PROBE0(sched, , , on_cpu);
+ SDT_PROBE0(sched, , , on__cpu);
#ifdef HWPMC_HOOKS
if (PMC_PROC_IS_USING_PMCS(td->td_proc))
PMC_SWITCH_CONTEXT(td, PMC_FN_CSW_IN);
#endif
} else
- SDT_PROBE0(sched, , , remain_cpu);
+ SDT_PROBE0(sched, , , remain__cpu);
+ KTR_STATE1(KTR_SCHED, "thread", sched_tdname(td), "running",
+ "prio:%d", td->td_priority);
+
#ifdef SMP
if (td->td_flags & TDF_IDLETD)
CPU_SET(PCPU_GET(cpuid), &idle_cpus_mask);
@@ -1232,7 +1227,7 @@
mtx_assert(&sched_lock, MA_OWNED);
- if (THREAD_CAN_SCHED(td, td->td_lastcpu))
+ if (td->td_lastcpu != NOCPU && THREAD_CAN_SCHED(td, td->td_lastcpu))
best = td->td_lastcpu;
else
best = NOCPU;
@@ -1323,6 +1318,12 @@
ts->ts_runq = &runq;
}
+ if ((td->td_flags & TDF_NOLOAD) == 0)
+ sched_load_add();
+ runq_add(ts->ts_runq, td, flags);
+ if (cpu != NOCPU)
+ runq_length[cpu]++;
+
cpuid = PCPU_GET(cpuid);
if (single_cpu && cpu != cpuid) {
kick_other_cpu(td->td_priority, cpu);
@@ -1339,18 +1340,10 @@
}
if (!forwarded) {
- if ((flags & SRQ_YIELDING) == 0 && maybe_preempt(td))
- return;
- else
+ if (!maybe_preempt(td))
maybe_resched(td);
}
}
-
- if ((td->td_flags & TDF_NOLOAD) == 0)
- sched_load_add();
- runq_add(ts->ts_runq, td, flags);
- if (cpu != NOCPU)
- runq_length[cpu]++;
}
#else /* SMP */
{
@@ -1384,23 +1377,11 @@
CTR2(KTR_RUNQ, "sched_add: adding td_sched:%p (td:%p) to runq", ts, td);
ts->ts_runq = &runq;
- /*
- * If we are yielding (on the way out anyhow) or the thread
- * being saved is US, then don't try be smart about preemption
- * or kicking off another CPU as it won't help and may hinder.
- * In the YIEDLING case, we are about to run whoever is being
- * put in the queue anyhow, and in the OURSELF case, we are
- * puting ourself on the run queue which also only happens
- * when we are about to yield.
- */
- if ((flags & SRQ_YIELDING) == 0) {
- if (maybe_preempt(td))
- return;
- }
if ((td->td_flags & TDF_NOLOAD) == 0)
sched_load_add();
runq_add(ts->ts_runq, td, flags);
- maybe_resched(td);
+ if (!maybe_preempt(td))
+ maybe_resched(td);
}
#endif /* SMP */
@@ -1585,7 +1566,7 @@
return (ts->ts_pctcpu);
}
-#ifdef RACCT
+#ifdef RACCT
/*
* Calculates the contribution to the thread cpu usage for the latest
* (unfinished) second.
@@ -1632,6 +1613,7 @@
{
struct pcpuidlestat *stat;
+ THREAD_NO_SLEEPING();
stat = DPCPU_PTR(idlestat);
for (;;) {
mtx_assert(&Giant, MA_NOTOWNED);
@@ -1670,6 +1652,8 @@
} else {
lock_profile_release_lock(&sched_lock.lock_object);
MPASS(td->td_lock == &sched_lock);
+ td->td_lastcpu = td->td_oncpu;
+ td->td_oncpu = NOCPU;
}
mtx_assert(&sched_lock, MA_OWNED);
KASSERT(curthread->td_md.md_spinlock_count == 1, ("invalid count"));
@@ -1689,6 +1673,10 @@
lock_profile_obtain_lock_success(&sched_lock.lock_object,
0, 0, __FILE__, __LINE__);
THREAD_LOCK_ASSERT(td, MA_OWNED | MA_NOTRECURSED);
+
+ KTR_STATE1(KTR_SCHED, "thread", sched_tdname(td), "running",
+ "prio:%d", td->td_priority);
+ SDT_PROBE0(sched, , , on__cpu);
}
char *
Modified: trunk/sys/kern/sched_ule.c
===================================================================
--- trunk/sys/kern/sched_ule.c 2018-05-25 20:59:46 UTC (rev 9949)
+++ trunk/sys/kern/sched_ule.c 2018-05-25 21:07:09 UTC (rev 9950)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
/*-
* Copyright (c) 2002-2007, Jeffrey Roberson <jeff at freebsd.org>
* All rights reserved.
@@ -36,7 +37,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$MidnightBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/kern/sched_ule.c 316841 2017-04-14 14:44:06Z avg $");
#include "opt_hwpmc_hooks.h"
#include "opt_kdtrace.h"
@@ -77,10 +78,6 @@
#include <machine/cpu.h>
#include <machine/smp.h>
-#if defined(__powerpc__) && defined(E500)
-#error "This architecture is not currently compatible with ULE"
-#endif
-
#define KTR_ULE 0
#define TS_NAME_LEN (MAXCOMLEN + sizeof(" td ") + sizeof(__XSTRING(UINT_MAX)))
@@ -189,6 +186,12 @@
#define SCHED_INTERACT_HALF (SCHED_INTERACT_MAX / 2)
#define SCHED_INTERACT_THRESH (30)
+/*
+ * These parameters determine the slice behavior for batch work.
+ */
+#define SCHED_SLICE_DEFAULT_DIVISOR 10 /* ~94 ms, 12 stathz ticks. */
+#define SCHED_SLICE_MIN_DIVISOR 6 /* DEFAULT/MIN = ~16 ms. */
+
/* Flags kept in td_flags. */
#define TDF_SLICEEND TDF_SCHED2 /* Thread time slice is over. */
@@ -201,9 +204,10 @@
* preempt_thresh: Priority threshold for preemption and remote IPIs.
*/
static int sched_interact = SCHED_INTERACT_THRESH;
-static int realstathz = 127;
static int tickincr = 8 << SCHED_TICK_SHIFT;
-static int sched_slice = 12;
+static int realstathz = 127; /* reset during boot. */
+static int sched_slice = 10; /* reset during boot. */
+static int sched_slice_min = 1; /* reset during boot. */
#ifdef PREEMPTION
#ifdef FULL_PREEMPTION
static int preempt_thresh = PRI_MAX_IDLE;
@@ -223,8 +227,12 @@
* locking in sched_pickcpu();
*/
struct tdq {
- /* Ordered to improve efficiency of cpu_search() and switch(). */
- struct mtx tdq_lock; /* run queue lock. */
+ /*
+ * Ordered to improve efficiency of cpu_search() and switch().
+ * tdq_lock is padded to avoid false sharing with tdq_load and
+ * tdq_cpu_idle.
+ */
+ struct mtx_padalign tdq_lock; /* run queue lock. */
struct cpu_group *tdq_cg; /* Pointer to cpu topology. */
volatile int tdq_load; /* Aggregate load. */
volatile int tdq_cpu_idle; /* cpu_idle() is active. */
@@ -287,7 +295,7 @@
#define TDQ_LOCK(t) mtx_lock_spin(TDQ_LOCKPTR((t)))
#define TDQ_LOCK_FLAGS(t, f) mtx_lock_spin_flags(TDQ_LOCKPTR((t)), (f))
#define TDQ_UNLOCK(t) mtx_unlock_spin(TDQ_LOCKPTR((t)))
-#define TDQ_LOCKPTR(t) (&(t)->tdq_lock)
+#define TDQ_LOCKPTR(t) ((struct mtx *)(&(t)->tdq_lock))
static void sched_priority(struct thread *);
static void sched_thread_priority(struct thread *, u_char);
@@ -333,20 +341,20 @@
SDT_PROVIDER_DEFINE(sched);
-SDT_PROBE_DEFINE3(sched, , , change_pri, change-pri, "struct thread *",
+SDT_PROBE_DEFINE3(sched, , , change__pri, "struct thread *",
"struct proc *", "uint8_t");
-SDT_PROBE_DEFINE3(sched, , , dequeue, dequeue, "struct thread *",
+SDT_PROBE_DEFINE3(sched, , , dequeue, "struct thread *",
"struct proc *", "void *");
-SDT_PROBE_DEFINE4(sched, , , enqueue, enqueue, "struct thread *",
+SDT_PROBE_DEFINE4(sched, , , enqueue, "struct thread *",
"struct proc *", "void *", "int");
-SDT_PROBE_DEFINE4(sched, , , lend_pri, lend-pri, "struct thread *",
+SDT_PROBE_DEFINE4(sched, , , lend__pri, "struct thread *",
"struct proc *", "uint8_t", "struct thread *");
-SDT_PROBE_DEFINE2(sched, , , load_change, load-change, "int", "int");
-SDT_PROBE_DEFINE2(sched, , , off_cpu, off-cpu, "struct thread *",
+SDT_PROBE_DEFINE2(sched, , , load__change, "int", "int");
+SDT_PROBE_DEFINE2(sched, , , off__cpu, "struct thread *",
"struct proc *");
-SDT_PROBE_DEFINE(sched, , , on_cpu, on-cpu);
-SDT_PROBE_DEFINE(sched, , , remain_cpu, remain-cpu);
-SDT_PROBE_DEFINE2(sched, , , surrender, surrender, "struct thread *",
+SDT_PROBE_DEFINE(sched, , , on__cpu);
+SDT_PROBE_DEFINE(sched, , , remain__cpu);
+SDT_PROBE_DEFINE2(sched, , , surrender, "struct thread *",
"struct proc *");
/*
@@ -531,7 +539,7 @@
if ((td->td_flags & TDF_NOLOAD) == 0)
tdq->tdq_sysload++;
KTR_COUNTER0(KTR_SCHED, "load", tdq->tdq_loadname, tdq->tdq_load);
- SDT_PROBE2(sched, , , load_change, (int)TDQ_ID(tdq), tdq->tdq_load);
+ SDT_PROBE2(sched, , , load__change, (int)TDQ_ID(tdq), tdq->tdq_load);
}
/*
@@ -551,10 +559,34 @@
if ((td->td_flags & TDF_NOLOAD) == 0)
tdq->tdq_sysload--;
KTR_COUNTER0(KTR_SCHED, "load", tdq->tdq_loadname, tdq->tdq_load);
- SDT_PROBE2(sched, , , load_change, (int)TDQ_ID(tdq), tdq->tdq_load);
+ SDT_PROBE2(sched, , , load__change, (int)TDQ_ID(tdq), tdq->tdq_load);
}
/*
+ * Bound timeshare latency by decreasing slice size as load increases. We
+ * consider the maximum latency as the sum of the threads waiting to run
+ * aside from curthread and target no more than sched_slice latency but
+ * no less than sched_slice_min runtime.
+ */
+static inline int
+tdq_slice(struct tdq *tdq)
+{
+ int load;
+
+ /*
+ * It is safe to use sys_load here because this is called from
+ * contexts where timeshare threads are running and so there
+ * cannot be higher priority load in the system.
+ */
+ load = tdq->tdq_sysload - 1;
+ if (load >= SCHED_SLICE_MIN_DIVISOR)
+ return (sched_slice_min);
+ if (load <= 1)
+ return (sched_slice);
+ return (sched_slice / load);
+}
+
+/*
* Set lowpri to its exact value by searching the run-queue and
* evaluating curthread. curthread may be passed as an optimization.
*/
@@ -591,12 +623,14 @@
for ((cpu) = 0; (cpu) <= mp_maxid; (cpu)++) \
if (CPU_ISSET(cpu, &mask))
-static __inline int cpu_search(const struct cpu_group *cg, struct cpu_search *low,
- struct cpu_search *high, const int match);
-int cpu_search_lowest(const struct cpu_group *cg, struct cpu_search *low);
-int cpu_search_highest(const struct cpu_group *cg, struct cpu_search *high);
-int cpu_search_both(const struct cpu_group *cg, struct cpu_search *low,
+static __always_inline int cpu_search(const struct cpu_group *cg,
+ struct cpu_search *low, struct cpu_search *high, const int match);
+int __noinline cpu_search_lowest(const struct cpu_group *cg,
+ struct cpu_search *low);
+int __noinline cpu_search_highest(const struct cpu_group *cg,
struct cpu_search *high);
+int __noinline cpu_search_both(const struct cpu_group *cg,
+ struct cpu_search *low, struct cpu_search *high);
/*
* Search the tree of cpu_groups for the lowest or highest loaded cpu
@@ -609,7 +643,7 @@
* match argument. It is reduced to the minimum set for each case. It is
* also recursive to the depth of the tree.
*/
-static __inline int
+static __always_inline int
cpu_search(const struct cpu_group *cg, struct cpu_search *low,
struct cpu_search *high, const int match)
{
@@ -632,10 +666,14 @@
}
/* Iterate through the child CPU groups and then remaining CPUs. */
- for (i = cg->cg_children, cpu = mp_maxid; i >= 0; ) {
+ for (i = cg->cg_children, cpu = mp_maxid; ; ) {
if (i == 0) {
+#ifdef HAVE_INLINE_FFSL
+ cpu = CPU_FFS(&cpumask) - 1;
+#else
while (cpu >= 0 && !CPU_ISSET(cpu, &cpumask))
cpu--;
+#endif
if (cpu < 0)
break;
child = NULL;
@@ -660,6 +698,7 @@
break;
}
} else { /* Handle child CPU. */
+ CPU_CLR(cpu, &cpumask);
tdq = TDQ_CPU(cpu);
load = tdq->tdq_load * 256;
rndptr = DPCPU_PTR(randomval);
@@ -707,8 +746,11 @@
i--;
if (i == 0 && CPU_EMPTY(&cpumask))
break;
- } else
+ }
+#ifndef HAVE_INLINE_FFSL
+ else
cpu--;
+#endif
}
return (total);
}
@@ -771,30 +813,6 @@
return high.cs_cpu;
}
-/*
- * Simultaneously find the highest and lowest loaded cpu reachable via
- * cg.
- */
-static inline void
-sched_both(const struct cpu_group *cg, cpuset_t mask, int *lowcpu, int *highcpu)
-{
- struct cpu_search high;
- struct cpu_search low;
-
- low.cs_cpu = -1;
- low.cs_prefer = -1;
- low.cs_pri = -1;
- low.cs_limit = INT_MAX;
- low.cs_mask = mask;
- high.cs_cpu = -1;
- high.cs_limit = -1;
- high.cs_mask = mask;
- cpu_search_both(cg, &low, &high);
- *lowcpu = low.cs_cpu;
- *highcpu = high.cs_cpu;
- return;
-}
-
static void
sched_balance_group(struct cpu_group *cg)
{
@@ -905,10 +923,8 @@
* reschedule with the new workload.
*/
cpu = TDQ_ID(low);
- sched_pin();
if (cpu != PCPU_GET(cpuid))
ipi_cpu(cpu, IPI_PREEMPT);
- sched_unpin();
}
tdq_unlock_pair(high, low);
return (moved);
@@ -1022,6 +1038,14 @@
ctd = pcpu_find(cpu)->pc_curthread;
if (!sched_shouldpreempt(pri, ctd->td_priority, 1))
return;
+
+ /*
+ * Make sure that tdq_load updated before calling this function
+ * is globally visible before we read tdq_cpu_idle. Idle thread
+ * accesses both of them without locks, and the order is important.
+ */
+ mb();
+
if (TD_IS_IDLETHREAD(ctd)) {
/*
* If the MD code has an idle wakeup routine try that before
@@ -1382,7 +1406,8 @@
int incr;
realstathz = stathz ? stathz : hz;
- sched_slice = realstathz / 10; /* ~100ms */
+ sched_slice = realstathz / SCHED_SLICE_DEFAULT_DIVISOR;
+ sched_slice_min = sched_slice / SCHED_SLICE_MIN_DIVISOR;
hogticks = imax(1, (2 * hz * sched_slice + realstathz / 2) /
realstathz);
@@ -1407,7 +1432,7 @@
affinity = SCHED_AFFINITY_DEFAULT;
#endif
if (sched_idlespinthresh < 0)
- sched_idlespinthresh = imax(16, 2 * hz / realstathz);
+ sched_idlespinthresh = 2 * max(10000, 6 * hz) / realstathz;
}
@@ -1491,7 +1516,7 @@
pri = SCHED_PRI_MIN;
if (td->td_sched->ts_ticks)
pri += min(SCHED_PRI_TICKS(td->td_sched),
- SCHED_PRI_RANGE);
+ SCHED_PRI_RANGE - 1);
pri += SCHED_PRI_NICE(td->td_proc->p_nice);
KASSERT(pri >= PRI_MIN_BATCH && pri <= PRI_MAX_BATCH,
("sched_priority: invalid priority %d: nice %d, "
@@ -1583,7 +1608,7 @@
thread0.td_sched = &td_sched0;
td_sched0.ts_ltick = ticks;
td_sched0.ts_ftick = ticks;
- td_sched0.ts_slice = sched_slice;
+ td_sched0.ts_slice = 0;
}
/*
@@ -1638,12 +1663,12 @@
KTR_POINT3(KTR_SCHED, "thread", sched_tdname(td), "prio",
"prio:%d", td->td_priority, "new prio:%d", prio,
KTR_ATTR_LINKED, sched_tdname(curthread));
- SDT_PROBE3(sched, , , change_pri, td, td->td_proc, prio);
+ SDT_PROBE3(sched, , , change__pri, td, td->td_proc, prio);
if (td != curthread && prio < td->td_priority) {
KTR_POINT3(KTR_SCHED, "thread", sched_tdname(curthread),
"lend prio", "prio:%d", td->td_priority, "new prio:%d",
prio, KTR_ATTR_LINKED, sched_tdname(td));
- SDT_PROBE4(sched, , , lend_pri, td, td->td_proc, prio,
+ SDT_PROBE4(sched, , , lend__pri, td, td->td_proc, prio,
curthread);
}
ts = td->td_sched;
@@ -1846,10 +1871,12 @@
ts->ts_rltick = ticks;
td->td_lastcpu = td->td_oncpu;
td->td_oncpu = NOCPU;
- preempted = !(td->td_flags & TDF_SLICEEND);
+ preempted = (td->td_flags & TDF_SLICEEND) == 0 &&
+ (flags & SW_PREEMPT) != 0;
td->td_flags &= ~(TDF_NEEDRESCHED | TDF_SLICEEND);
td->td_owepreempt = 0;
- tdq->tdq_switchcnt++;
+ if (!TD_IS_IDLETHREAD(td))
+ tdq->tdq_switchcnt++;
/*
* The lock pointer in an idle thread should never change. Reset it
* to CAN_RUN as well.
@@ -1880,6 +1907,17 @@
mtx = thread_lock_block(td);
tdq_load_rem(tdq, td);
}
+
+#if (KTR_COMPILE & KTR_SCHED) != 0
+ if (TD_IS_IDLETHREAD(td))
+ KTR_STATE1(KTR_SCHED, "thread", sched_tdname(td), "idle",
+ "prio:%d", td->td_priority);
+ else
+ KTR_STATE3(KTR_SCHED, "thread", sched_tdname(td), KTDSTATE(td),
+ "prio:%d", td->td_priority, "wmesg:\"%s\"", td->td_wmesg,
+ "lockname:\"%s\"", td->td_lockname);
+#endif
+
/*
* We enter here with the thread blocked and assigned to the
* appropriate cpu run-queue or sleep-queue and with the current
@@ -1895,7 +1933,7 @@
if (PMC_PROC_IS_USING_PMCS(td->td_proc))
PMC_SWITCH_CONTEXT(td, PMC_FN_CSW_OUT);
#endif
- SDT_PROBE2(sched, , , off_cpu, td, td->td_proc);
+ SDT_PROBE2(sched, , , off__cpu, newtd, newtd->td_proc);
lock_profile_release_lock(&TDQ_LOCKPTR(tdq)->lock_object);
TDQ_LOCKPTR(tdq)->mtx_lock = (uintptr_t)newtd;
sched_pctcpu_update(newtd->td_sched, 0);
@@ -1921,7 +1959,7 @@
lock_profile_obtain_lock_success(
&TDQ_LOCKPTR(tdq)->lock_object, 0, 0, __FILE__, __LINE__);
- SDT_PROBE0(sched, , , on_cpu);
+ SDT_PROBE0(sched, , , on__cpu);
#ifdef HWPMC_HOOKS
if (PMC_PROC_IS_USING_PMCS(td->td_proc))
PMC_SWITCH_CONTEXT(td, PMC_FN_CSW_IN);
@@ -1928,8 +1966,12 @@
#endif
} else {
thread_unblock_switch(td, mtx);
- SDT_PROBE0(sched, , , remain_cpu);
+ SDT_PROBE0(sched, , , remain__cpu);
}
+
+ KTR_STATE1(KTR_SCHED, "thread", sched_tdname(td), "running",
+ "prio:%d", td->td_priority);
+
/*
* Assert that all went well and return.
*/
@@ -2001,8 +2043,10 @@
sched_interact_update(td);
sched_pctcpu_update(ts, 0);
}
- /* Reset the slice value after we sleep. */
- ts->ts_slice = sched_slice;
+ /*
+ * Reset the slice value since we slept and advanced the round-robin.
+ */
+ ts->ts_slice = 0;
sched_add(td, SRQ_BORING);
}
@@ -2034,7 +2078,9 @@
{
struct td_sched *ts;
struct td_sched *ts2;
+ struct tdq *tdq;
+ tdq = TDQ_SELF();
THREAD_LOCK_ASSERT(td, MA_OWNED);
/*
* Initialize child.
@@ -2041,7 +2087,9 @@
*/
ts = td->td_sched;
ts2 = child->td_sched;
- child->td_lock = TDQ_LOCKPTR(TDQ_SELF());
+ child->td_oncpu = NOCPU;
+ child->td_lastcpu = NOCPU;
+ child->td_lock = TDQ_LOCKPTR(tdq);
child->td_cpuset = cpuset_ref(td->td_cpuset);
ts2->ts_cpu = ts->ts_cpu;
ts2->ts_flags = 0;
@@ -2060,7 +2108,8 @@
*/
ts2->ts_slptime = ts->ts_slptime;
ts2->ts_runtime = ts->ts_runtime;
- ts2->ts_slice = 1; /* Attempt to quickly learn interactivity. */
+ /* Attempt to quickly learn interactivity. */
+ ts2->ts_slice = tdq_slice(tdq) - sched_slice_min;
#ifdef KTR
bzero(ts2->ts_name, sizeof(ts2->ts_name));
#endif
@@ -2225,8 +2274,8 @@
* Force a context switch if the current thread has used up a full
* time slice (default is 100ms).
*/
- if (!TD_IS_IDLETHREAD(td) && --ts->ts_slice <= 0) {
- ts->ts_slice = sched_slice;
+ if (!TD_IS_IDLETHREAD(td) && ++ts->ts_slice >= tdq_slice(tdq)) {
+ ts->ts_slice = 0;
td->td_flags |= TDF_NEEDRESCHED | TDF_SLICEEND;
}
}
@@ -2575,18 +2624,31 @@
{
struct thread *td;
struct tdq *tdq;
- int switchcnt;
+ int oldswitchcnt, switchcnt;
int i;
mtx_assert(&Giant, MA_NOTOWNED);
td = curthread;
tdq = TDQ_SELF();
+ THREAD_NO_SLEEPING();
+ oldswitchcnt = -1;
for (;;) {
+ if (tdq->tdq_load) {
+ thread_lock(td);
+ mi_switch(SW_VOL | SWT_IDLE, NULL);
+ thread_unlock(td);
+ }
+ switchcnt = tdq->tdq_switchcnt + tdq->tdq_oldswitchcnt;
#ifdef SMP
- if (tdq_idled(tdq) == 0)
- continue;
+ if (switchcnt != oldswitchcnt) {
+ oldswitchcnt = switchcnt;
+ if (tdq_idled(tdq) == 0)
+ continue;
+ }
+ switchcnt = tdq->tdq_switchcnt + tdq->tdq_oldswitchcnt;
+#else
+ oldswitchcnt = switchcnt;
#endif
- switchcnt = tdq->tdq_switchcnt + tdq->tdq_oldswitchcnt;
/*
* If we're switching very frequently, spin while checking
* for load rather than entering a low power state that
@@ -2601,20 +2663,32 @@
cpu_spinwait();
}
}
+
+ /* If there was context switch during spin, restart it. */
switchcnt = tdq->tdq_switchcnt + tdq->tdq_oldswitchcnt;
- if (tdq->tdq_load == 0) {
- tdq->tdq_cpu_idle = 1;
- if (tdq->tdq_load == 0) {
- cpu_idle(switchcnt > sched_idlespinthresh * 4);
- tdq->tdq_switchcnt++;
- }
- tdq->tdq_cpu_idle = 0;
- }
- if (tdq->tdq_load) {
- thread_lock(td);
- mi_switch(SW_VOL | SWT_IDLE, NULL);
- thread_unlock(td);
- }
+ if (tdq->tdq_load != 0 || switchcnt != oldswitchcnt)
+ continue;
+
+ /* Run main MD idle handler. */
+ tdq->tdq_cpu_idle = 1;
+ /*
+ * Make sure that tdq_cpu_idle update is globally visible
+ * before cpu_idle() read tdq_load. The order is important
+ * to avoid race with tdq_notify.
+ */
+ mb();
+ cpu_idle(switchcnt * 4 > sched_idlespinthresh);
+ tdq->tdq_cpu_idle = 0;
+
+ /*
+ * Account thread-less hardware interrupts and
+ * other wakeup reasons equal to context switches.
+ */
+ switchcnt = tdq->tdq_switchcnt + tdq->tdq_oldswitchcnt;
+ if (switchcnt != oldswitchcnt)
+ continue;
+ tdq->tdq_switchcnt++;
+ oldswitchcnt++;
}
}
@@ -2638,6 +2712,8 @@
MPASS(td->td_lock == TDQ_LOCKPTR(tdq));
tdq_load_rem(tdq, td);
lock_profile_release_lock(&TDQ_LOCKPTR(tdq)->lock_object);
+ td->td_lastcpu = td->td_oncpu;
+ td->td_oncpu = NOCPU;
}
KASSERT(curthread->td_md.md_spinlock_count == 1, ("invalid count"));
newtd = choosethread();
@@ -2652,7 +2728,6 @@
void
sched_fork_exit(struct thread *td)
{
- struct td_sched *ts;
struct tdq *tdq;
int cpuid;
@@ -2662,7 +2737,6 @@
*/
cpuid = PCPU_GET(cpuid);
tdq = TDQ_CPU(cpuid);
- ts = td->td_sched;
if (TD_IS_IDLETHREAD(td))
td->td_lock = TDQ_LOCKPTR(tdq);
MPASS(td->td_lock == TDQ_LOCKPTR(tdq));
@@ -2670,6 +2744,10 @@
TDQ_LOCK_ASSERT(tdq, MA_OWNED | MA_NOTRECURSED);
lock_profile_obtain_lock_success(
&TDQ_LOCKPTR(tdq)->lock_object, 0, 0, __FILE__, __LINE__);
+
+ KTR_STATE1(KTR_SCHED, "thread", sched_tdname(td), "running",
+ "prio:%d", td->td_priority);
+ SDT_PROBE0(sched, , , on__cpu);
}
/*
@@ -2796,6 +2874,7 @@
if (new_val <= 0)
return (EINVAL);
sched_slice = imax(1, (new_val + period / 2) / period);
+ sched_slice_min = sched_slice / SCHED_SLICE_MIN_DIVISOR;
hogticks = imax(1, (2 * hz * sched_slice + realstathz / 2) /
realstathz);
return (0);
Modified: trunk/sys/kern/stack_protector.c
===================================================================
--- trunk/sys/kern/stack_protector.c 2018-05-25 20:59:46 UTC (rev 9949)
+++ trunk/sys/kern/stack_protector.c 2018-05-25 21:07:09 UTC (rev 9950)
@@ -1,5 +1,6 @@
+/* $MidnightBSD$ */
#include <sys/cdefs.h>
-__FBSDID("$MidnightBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/kern/stack_protector.c 198295 2009-10-20 16:36:51Z ru $");
#include <sys/types.h>
#include <sys/param.h>
Modified: trunk/sys/kern/subr_acl_nfs4.c
===================================================================
--- trunk/sys/kern/subr_acl_nfs4.c 2018-05-25 20:59:46 UTC (rev 9949)
+++ trunk/sys/kern/subr_acl_nfs4.c 2018-05-25 21:07:09 UTC (rev 9950)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
/*-
* Copyright (c) 2008-2010 Edward Tomasz Napierała <trasz at FreeBSD.org>
* All rights reserved.
@@ -32,9 +33,11 @@
#ifdef _KERNEL
#include <sys/cdefs.h>
-__FBSDID("$MidnightBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/kern/subr_acl_nfs4.c 290893 2015-11-15 23:54:34Z ngie $");
#include <sys/param.h>
+#include <sys/kernel.h>
+#include <sys/module.h>
#include <sys/systm.h>
#include <sys/mount.h>
#include <sys/priv.h>
@@ -1066,6 +1069,7 @@
child_aclp->acl_cnt++;
entry->ae_flags &= ~ACL_ENTRY_INHERIT_ONLY;
+ entry->ae_flags |= ACL_ENTRY_INHERITED;
/*
* If the type of the ACE is neither ALLOW nor DENY,
@@ -1370,3 +1374,46 @@
return (0);
}
+
+#ifdef _KERNEL
+static int
+acl_nfs4_modload(module_t module, int what, void *arg)
+{
+ int ret;
+
+ ret = 0;
+
+ switch (what) {
+ case MOD_LOAD:
+ case MOD_SHUTDOWN:
+ break;
+
+ case MOD_QUIESCE:
+ /* XXX TODO */
+ ret = 0;
+ break;
+
+ case MOD_UNLOAD:
+ /* XXX TODO */
+ ret = 0;
+ break;
+ default:
+ ret = EINVAL;
+ break;
+ }
+
+ return (ret);
+}
+
+static moduledata_t acl_nfs4_mod = {
+ "acl_nfs4",
+ acl_nfs4_modload,
+ NULL
+};
+
+/*
+ * XXX TODO: which subsystem, order?
+ */
+DECLARE_MODULE(acl_nfs4, acl_nfs4_mod, SI_SUB_VFS, SI_ORDER_FIRST);
+MODULE_VERSION(acl_nfs4, 1);
+#endif /* _KERNEL */
Modified: trunk/sys/kern/subr_blist.c
===================================================================
--- trunk/sys/kern/subr_blist.c 2018-05-25 20:59:46 UTC (rev 9949)
+++ trunk/sys/kern/subr_blist.c 2018-05-25 21:07:09 UTC (rev 9950)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
/*-
* Copyright (c) 1998 Matthew Dillon. All Rights Reserved.
* Redistribution and use in source and binary forms, with or without
@@ -57,8 +58,8 @@
* The non-blocking features of the blist code are used in the swap code
* (vm/swap_pager.c).
*
- * LAYOUT: The radix tree is layed out recursively using a
- * linear array. Each meta node is immediately followed (layed out
+ * LAYOUT: The radix tree is laid out recursively using a
+ * linear array. Each meta node is immediately followed (laid out
* sequentially in memory) by BLIST_META_RADIX lower level nodes. This
* is a recursive structure but one that can be easily scanned through
* a very simple 'skip' calculation. In order to support large radixes,
@@ -80,7 +81,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$MidnightBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/kern/subr_blist.c 321459 2017-07-25 04:13:43Z alc $");
#ifdef _KERNEL
@@ -99,19 +100,18 @@
#define BLIST_DEBUG
#endif
-#define SWAPBLK_NONE ((daddr_t)-1)
-
#include <sys/types.h>
+#include <sys/malloc.h>
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <stdarg.h>
+#include <stdbool.h>
+#define bitcount64(x) __bitcount64((uint64_t)(x))
#define malloc(a,b,c) calloc(a, 1)
#define free(a,b) free(a)
-typedef unsigned int u_daddr_t;
-
#include <sys/blist.h>
void panic(const char *ctl, ...);
@@ -122,22 +122,23 @@
* static support functions
*/
-static daddr_t blst_leaf_alloc(blmeta_t *scan, daddr_t blk, int count);
-static daddr_t blst_meta_alloc(blmeta_t *scan, daddr_t blk,
- daddr_t count, daddr_t radix, int skip);
+static daddr_t blst_leaf_alloc(blmeta_t *scan, daddr_t blk, int count,
+ daddr_t cursor);
+static daddr_t blst_meta_alloc(blmeta_t *scan, daddr_t blk, daddr_t count,
+ daddr_t radix, daddr_t skip, daddr_t cursor);
static void blst_leaf_free(blmeta_t *scan, daddr_t relblk, int count);
static void blst_meta_free(blmeta_t *scan, daddr_t freeBlk, daddr_t count,
- daddr_t radix, int skip, daddr_t blk);
+ daddr_t radix, daddr_t skip, daddr_t blk);
static void blst_copy(blmeta_t *scan, daddr_t blk, daddr_t radix,
daddr_t skip, blist_t dest, daddr_t count);
-static int blst_leaf_fill(blmeta_t *scan, daddr_t blk, int count);
-static int blst_meta_fill(blmeta_t *scan, daddr_t allocBlk, daddr_t count,
- daddr_t radix, int skip, daddr_t blk);
-static daddr_t blst_radix_init(blmeta_t *scan, daddr_t radix,
- int skip, daddr_t count);
+static daddr_t blst_leaf_fill(blmeta_t *scan, daddr_t blk, int count);
+static daddr_t blst_meta_fill(blmeta_t *scan, daddr_t allocBlk, daddr_t count,
+ daddr_t radix, daddr_t skip, daddr_t blk);
+static daddr_t blst_radix_init(blmeta_t *scan, daddr_t radix, daddr_t skip,
+ daddr_t count);
#ifndef _KERNEL
-static void blst_radix_print(blmeta_t *scan, daddr_t blk,
- daddr_t radix, int skip, int tab);
+static void blst_radix_print(blmeta_t *scan, daddr_t blk, daddr_t radix,
+ daddr_t skip, int tab);
#endif
#ifdef _KERNEL
@@ -159,27 +160,33 @@
blist_create(daddr_t blocks, int flags)
{
blist_t bl;
- int radix;
- int skip = 0;
+ daddr_t nodes, radix, skip;
/*
* Calculate radix and skip field used for scanning.
*/
radix = BLIST_BMAP_RADIX;
-
+ skip = 0;
while (radix < blocks) {
radix *= BLIST_META_RADIX;
skip = (skip + 1) * BLIST_META_RADIX;
}
+ nodes = 1 + blst_radix_init(NULL, radix, skip, blocks);
- bl = malloc(sizeof(struct blist), M_SWAP, flags | M_ZERO);
+ bl = malloc(sizeof(struct blist), M_SWAP, flags);
+ if (bl == NULL)
+ return (NULL);
bl->bl_blocks = blocks;
bl->bl_radix = radix;
bl->bl_skip = skip;
- bl->bl_rootblks = 1 +
- blst_radix_init(NULL, bl->bl_radix, bl->bl_skip, blocks);
- bl->bl_root = malloc(sizeof(blmeta_t) * bl->bl_rootblks, M_SWAP, flags);
+ bl->bl_cursor = 0;
+ bl->bl_root = malloc(nodes * sizeof(blmeta_t), M_SWAP, flags);
+ if (bl->bl_root == NULL) {
+ free(bl, M_SWAP);
+ return (NULL);
+ }
+ blst_radix_init(bl->bl_root, radix, skip, blocks);
#if defined(BLIST_DEBUG)
printf(
@@ -187,14 +194,13 @@
", requiring %lldK of ram\n",
(long long)bl->bl_blocks,
(long long)bl->bl_blocks * 4 / 1024,
- (long long)(bl->bl_rootblks * sizeof(blmeta_t) + 1023) / 1024
+ (long long)(nodes * sizeof(blmeta_t) + 1023) / 1024
);
printf("BLIST raw radix tree contains %lld records\n",
- (long long)bl->bl_rootblks);
+ (long long)nodes);
#endif
- blst_radix_init(bl->bl_root, bl->bl_radix, bl->bl_skip, blocks);
- return(bl);
+ return (bl);
}
void
@@ -205,7 +211,7 @@
}
/*
- * blist_alloc() - reserve space in the block bitmap. Return the base
+ * blist_alloc() - reserve space in the block bitmap. Return the base
* of a contiguous region or SWAPBLK_NONE if space could
* not be allocated.
*/
@@ -213,20 +219,45 @@
daddr_t
blist_alloc(blist_t bl, daddr_t count)
{
- daddr_t blk = SWAPBLK_NONE;
+ daddr_t blk;
- if (bl) {
+ /*
+ * This loop iterates at most twice. An allocation failure in the
+ * first iteration leads to a second iteration only if the cursor was
+ * non-zero. When the cursor is zero, an allocation failure will
+ * reduce the hint, stopping further iterations.
+ */
+ while (count <= bl->bl_root->bm_bighint) {
if (bl->bl_radix == BLIST_BMAP_RADIX)
- blk = blst_leaf_alloc(bl->bl_root, 0, count);
+ blk = blst_leaf_alloc(bl->bl_root, 0, count,
+ bl->bl_cursor);
else
- blk = blst_meta_alloc(bl->bl_root, 0, count, bl->bl_radix, bl->bl_skip);
- if (blk != SWAPBLK_NONE)
- bl->bl_free -= count;
+ blk = blst_meta_alloc(bl->bl_root, 0, count,
+ bl->bl_radix, bl->bl_skip, bl->bl_cursor);
+ if (blk != SWAPBLK_NONE) {
+ bl->bl_cursor = blk + count;
+ return (blk);
+ } else if (bl->bl_cursor != 0)
+ bl->bl_cursor = 0;
}
- return(blk);
+ return (SWAPBLK_NONE);
}
/*
+ * blist_avail() - return the number of free blocks.
+ */
+
+daddr_t
+blist_avail(blist_t bl)
+{
+
+ if (bl->bl_radix == BLIST_BMAP_RADIX)
+ return (bitcount64(bl->bl_root->u.bmu_bitmap));
+ else
+ return (bl->bl_root->u.bmu_avail);
+}
+
+/*
* blist_free() - free up space in the block bitmap. Return the base
* of a contiguous region. Panic if an inconsistancy is
* found.
@@ -239,8 +270,8 @@
if (bl->bl_radix == BLIST_BMAP_RADIX)
blst_leaf_free(bl->bl_root, blkno, count);
else
- blst_meta_free(bl->bl_root, blkno, count, bl->bl_radix, bl->bl_skip, 0);
- bl->bl_free += count;
+ blst_meta_free(bl->bl_root, blkno, count,
+ bl->bl_radix, bl->bl_skip, 0);
}
}
@@ -251,10 +282,10 @@
* actually filled that were free before the call.
*/
-int
+daddr_t
blist_fill(blist_t bl, daddr_t blkno, daddr_t count)
{
- int filled;
+ daddr_t filled;
if (bl) {
if (bl->bl_radix == BLIST_BMAP_RADIX)
@@ -262,10 +293,9 @@
else
filled = blst_meta_fill(bl->bl_root, blkno, count,
bl->bl_radix, bl->bl_skip, 0);
- bl->bl_free -= filled;
- return filled;
- } else
- return 0;
+ return (filled);
+ }
+ return (0);
}
/*
@@ -325,77 +355,92 @@
/*
* blist_leaf_alloc() - allocate at a leaf in the radix tree (a bitmap).
*
- * This is the core of the allocator and is optimized for the 1 block
- * and the BLIST_BMAP_RADIX block allocation cases. Other cases are
- * somewhat slower. The 1 block allocation case is log2 and extremely
- * quick.
+ * This is the core of the allocator and is optimized for the
+ * BLIST_BMAP_RADIX block allocation case. Otherwise, execution
+ * time is proportional to log2(count) + log2(BLIST_BMAP_RADIX).
*/
static daddr_t
-blst_leaf_alloc(
- blmeta_t *scan,
- daddr_t blk,
- int count
-) {
- u_daddr_t orig = scan->u.bmu_bitmap;
+blst_leaf_alloc(blmeta_t *scan, daddr_t blk, int count, daddr_t cursor)
+{
+ u_daddr_t mask;
+ int count1, hi, lo, mid, num_shifts, range1, range_ext;
- if (orig == 0) {
+ if (count == BLIST_BMAP_RADIX) {
/*
- * Optimize bitmap all-allocated case. Also, count = 1
- * case assumes at least 1 bit is free in the bitmap, so
- * we have to take care of this case here.
+ * Optimize allocation of BLIST_BMAP_RADIX bits. If this wasn't
+ * a special case, then forming the final value of 'mask' below
+ * would require special handling to avoid an invalid left shift
+ * when count equals the number of bits in mask.
*/
+ if (~scan->u.bmu_bitmap != 0) {
+ scan->bm_bighint = BLIST_BMAP_RADIX - 1;
+ return (SWAPBLK_NONE);
+ }
+ if (cursor != blk)
+ return (SWAPBLK_NONE);
+ scan->u.bmu_bitmap = 0;
scan->bm_bighint = 0;
- return(SWAPBLK_NONE);
+ return (blk);
}
- if (count == 1) {
+ range1 = 0;
+ count1 = count - 1;
+ num_shifts = fls(count1);
+ mask = scan->u.bmu_bitmap;
+ while (mask != 0 && num_shifts > 0) {
/*
- * Optimized code to allocate one bit out of the bitmap
+ * If bit i is set in mask, then bits in [i, i+range1] are set
+ * in scan->u.bmu_bitmap. The value of range1 is equal to
+ * count1 >> num_shifts. Grow range and reduce num_shifts to 0,
+ * while preserving these invariants. The updates to mask leave
+ * fewer bits set, but each bit that remains set represents a
+ * longer string of consecutive bits set in scan->u.bmu_bitmap.
*/
- u_daddr_t mask;
- int j = BLIST_BMAP_RADIX/2;
- int r = 0;
-
- mask = (u_daddr_t)-1 >> (BLIST_BMAP_RADIX/2);
-
- while (j) {
- if ((orig & mask) == 0) {
- r += j;
- orig >>= j;
- }
- j >>= 1;
- mask >>= j;
- }
- scan->u.bmu_bitmap &= ~(1 << r);
- return(blk + r);
+ num_shifts--;
+ range_ext = range1 + ((count1 >> num_shifts) & 1);
+ mask &= mask >> range_ext;
+ range1 += range_ext;
}
- if (count <= BLIST_BMAP_RADIX) {
+ if (mask == 0) {
/*
- * non-optimized code to allocate N bits out of the bitmap.
- * The more bits, the faster the code runs. It will run
- * the slowest allocating 2 bits, but since there aren't any
- * memory ops in the core loop (or shouldn't be, anyway),
- * you probably won't notice the difference.
+ * Update bighint. There is no allocation bigger than range1
+ * available in this leaf.
*/
- int j;
- int n = BLIST_BMAP_RADIX - count;
- u_daddr_t mask;
+ scan->bm_bighint = range1;
+ return (SWAPBLK_NONE);
+ }
- mask = (u_daddr_t)-1 >> n;
+ /*
+ * Discard any candidates that appear before the cursor.
+ */
+ lo = cursor - blk;
+ mask &= ~(u_daddr_t)0 << lo;
- for (j = 0; j <= n; ++j) {
- if ((orig & mask) == mask) {
- scan->u.bmu_bitmap &= ~mask;
- return(blk + j);
- }
- mask = (mask << 1);
- }
+ if (mask == 0)
+ return (SWAPBLK_NONE);
+
+ /*
+ * The least significant set bit in mask marks the start of the first
+ * available range of sufficient size. Clear all the bits but that one,
+ * and then perform a binary search to find its position.
+ */
+ mask &= -mask;
+ hi = BLIST_BMAP_RADIX - count1;
+ while (lo + 1 < hi) {
+ mid = (lo + hi) >> 1;
+ if ((mask >> mid) != 0)
+ lo = mid;
+ else
+ hi = mid;
}
+
/*
- * We couldn't allocate count in this subtree, update bighint.
+ * Set in mask exactly the bits being allocated, and clear them from
+ * the set of available bits.
*/
- scan->bm_bighint = count - 1;
- return(SWAPBLK_NONE);
+ mask = (mask << count) - mask;
+ scan->u.bmu_bitmap &= ~mask;
+ return (blk + lo);
}
/*
@@ -408,62 +453,72 @@
*/
static daddr_t
-blst_meta_alloc(
- blmeta_t *scan,
- daddr_t blk,
- daddr_t count,
- daddr_t radix,
- int skip
-) {
- int i;
- int next_skip = ((u_int)skip / BLIST_META_RADIX);
+blst_meta_alloc(blmeta_t *scan, daddr_t blk, daddr_t count, daddr_t radix,
+ daddr_t skip, daddr_t cursor)
+{
+ daddr_t i, next_skip, r;
+ int child;
+ bool scan_from_start;
- if (scan->u.bmu_avail == 0) {
+ if (scan->u.bmu_avail < count) {
/*
- * ALL-ALLOCATED special case
+ * The meta node's hint must be too large if the allocation
+ * exceeds the number of free blocks. Reduce the hint, and
+ * return failure.
*/
- scan->bm_bighint = count;
- return(SWAPBLK_NONE);
+ scan->bm_bighint = scan->u.bmu_avail;
+ return (SWAPBLK_NONE);
}
+ next_skip = skip / BLIST_META_RADIX;
+ /*
+ * An ALL-FREE meta node requires special handling before allocating
+ * any of its blocks.
+ */
if (scan->u.bmu_avail == radix) {
radix /= BLIST_META_RADIX;
/*
- * ALL-FREE special case, initialize uninitialize
- * sublevel.
+ * Reinitialize each of the meta node's children. An ALL-FREE
+ * meta node cannot have a terminator in any subtree.
*/
for (i = 1; i <= skip; i += next_skip) {
- if (scan[i].bm_bighint == (daddr_t)-1)
- break;
- if (next_skip == 1) {
+ if (next_skip == 1)
scan[i].u.bmu_bitmap = (u_daddr_t)-1;
- scan[i].bm_bighint = BLIST_BMAP_RADIX;
- } else {
- scan[i].bm_bighint = radix;
+ else
scan[i].u.bmu_avail = radix;
- }
+ scan[i].bm_bighint = radix;
}
} else {
radix /= BLIST_META_RADIX;
}
- for (i = 1; i <= skip; i += next_skip) {
+ if (count > radix) {
+ /*
+ * The allocation exceeds the number of blocks that are
+ * managed by a subtree of this meta node.
+ */
+ panic("allocation too large");
+ }
+ scan_from_start = cursor == blk;
+ child = (cursor - blk) / radix;
+ blk += child * radix;
+ for (i = 1 + child * next_skip; i <= skip; i += next_skip) {
if (count <= scan[i].bm_bighint) {
/*
- * count fits in object
+ * The allocation might fit in the i'th subtree.
*/
- daddr_t r;
if (next_skip == 1) {
- r = blst_leaf_alloc(&scan[i], blk, count);
+ r = blst_leaf_alloc(&scan[i], blk, count,
+ cursor > blk ? cursor : blk);
} else {
- r = blst_meta_alloc(&scan[i], blk, count, radix, next_skip - 1);
+ r = blst_meta_alloc(&scan[i], blk, count,
+ radix, next_skip - 1, cursor > blk ?
+ cursor : blk);
}
if (r != SWAPBLK_NONE) {
scan->u.bmu_avail -= count;
- if (scan->bm_bighint > scan->u.bmu_avail)
- scan->bm_bighint = scan->u.bmu_avail;
- return(r);
+ return (r);
}
} else if (scan[i].bm_bighint == (daddr_t)-1) {
/*
@@ -470,12 +525,6 @@
* Terminator
*/
break;
- } else if (count > radix) {
- /*
- * count does not fit in object even if it were
- * complete free.
- */
- panic("blist_meta_alloc: allocation too large");
}
blk += radix;
}
@@ -483,9 +532,10 @@
/*
* We couldn't allocate count in this subtree, update bighint.
*/
- if (scan->bm_bighint >= count)
+ if (scan_from_start && scan->bm_bighint >= count)
scan->bm_bighint = count - 1;
- return(SWAPBLK_NONE);
+
+ return (SWAPBLK_NONE);
}
/*
@@ -538,16 +588,11 @@
*/
static void
-blst_meta_free(
- blmeta_t *scan,
- daddr_t freeBlk,
- daddr_t count,
- daddr_t radix,
- int skip,
- daddr_t blk
-) {
- int i;
- int next_skip = ((u_int)skip / BLIST_META_RADIX);
+blst_meta_free(blmeta_t *scan, daddr_t freeBlk, daddr_t count, daddr_t radix,
+ daddr_t skip, daddr_t blk)
+{
+ daddr_t i, next_skip, v;
+ int child;
#if 0
printf("free (%llx,%lld) FROM (%llx,%lld)\n",
@@ -555,6 +600,7 @@
(long long)blk, (long long)radix
);
#endif
+ next_skip = skip / BLIST_META_RADIX;
if (scan->u.bmu_avail == 0) {
/*
@@ -599,13 +645,10 @@
radix /= BLIST_META_RADIX;
- i = (freeBlk - blk) / radix;
- blk += i * radix;
- i = i * next_skip + 1;
-
+ child = (freeBlk - blk) / radix;
+ blk += child * radix;
+ i = 1 + child * next_skip;
while (i <= skip && blk < freeBlk + count) {
- daddr_t v;
-
v = blk + radix - freeBlk;
if (v > count)
v = count;
@@ -642,8 +685,7 @@
blist_t dest,
daddr_t count
) {
- int next_skip;
- int i;
+ daddr_t i, next_skip;
/*
* Leaf node
@@ -658,7 +700,7 @@
int i;
for (i = 0; i < BLIST_BMAP_RADIX && i < count; ++i) {
- if (v & (1 << i))
+ if (v & ((u_daddr_t)1 << i))
blist_free(dest, blk + i, 1);
}
}
@@ -688,7 +730,7 @@
radix /= BLIST_META_RADIX;
- next_skip = ((u_int)skip / BLIST_META_RADIX);
+ next_skip = skip / BLIST_META_RADIX;
for (i = 1; count && i <= skip; i += next_skip) {
if (scan[i].bm_bighint == (daddr_t)-1)
@@ -729,23 +771,21 @@
* the number of blocks allocated by the call.
*/
-static int
+static daddr_t
blst_leaf_fill(blmeta_t *scan, daddr_t blk, int count)
{
int n = blk & (BLIST_BMAP_RADIX - 1);
- int nblks;
- u_daddr_t mask, bitmap;
+ daddr_t nblks;
+ u_daddr_t mask;
mask = ((u_daddr_t)-1 << n) &
((u_daddr_t)-1 >> (BLIST_BMAP_RADIX - count - n));
- /* Count the number of blocks we're about to allocate */
- bitmap = scan->u.bmu_bitmap & mask;
- for (nblks = 0; bitmap != 0; nblks++)
- bitmap &= bitmap - 1;
+ /* Count the number of blocks that we are allocating. */
+ nblks = bitcount64(scan->u.bmu_bitmap & mask);
scan->u.bmu_bitmap &= ~mask;
- return nblks;
+ return (nblks);
}
/*
@@ -756,19 +796,20 @@
* range must be within the extent of this node. Returns the
* number of blocks allocated by the call.
*/
-static int
-blst_meta_fill(
- blmeta_t *scan,
- daddr_t allocBlk,
- daddr_t count,
- daddr_t radix,
- int skip,
- daddr_t blk
-) {
- int i;
- int next_skip = ((u_int)skip / BLIST_META_RADIX);
- int nblks = 0;
+static daddr_t
+blst_meta_fill(blmeta_t *scan, daddr_t allocBlk, daddr_t count, daddr_t radix,
+ daddr_t skip, daddr_t blk)
+{
+ daddr_t i, nblks, next_skip, v;
+ int child;
+ if (count > radix) {
+ /*
+ * The allocation exceeds the number of blocks that are
+ * managed by this meta node.
+ */
+ panic("allocation too large");
+ }
if (count == radix || scan->u.bmu_avail == 0) {
/*
* ALL-ALLOCATED special case
@@ -775,19 +816,23 @@
*/
nblks = scan->u.bmu_avail;
scan->u.bmu_avail = 0;
- scan->bm_bighint = count;
+ scan->bm_bighint = 0;
return nblks;
}
+ next_skip = skip / BLIST_META_RADIX;
+ /*
+ * An ALL-FREE meta node requires special handling before allocating
+ * any of its blocks.
+ */
if (scan->u.bmu_avail == radix) {
radix /= BLIST_META_RADIX;
/*
- * ALL-FREE special case, initialize sublevel
+ * Reinitialize each of the meta node's children. An ALL-FREE
+ * meta node cannot have a terminator in any subtree.
*/
for (i = 1; i <= skip; i += next_skip) {
- if (scan[i].bm_bighint == (daddr_t)-1)
- break;
if (next_skip == 1) {
scan[i].u.bmu_bitmap = (u_daddr_t)-1;
scan[i].bm_bighint = BLIST_BMAP_RADIX;
@@ -800,16 +845,11 @@
radix /= BLIST_META_RADIX;
}
- if (count > radix)
- panic("blist_meta_fill: allocation too large");
-
- i = (allocBlk - blk) / radix;
- blk += i * radix;
- i = i * next_skip + 1;
-
+ nblks = 0;
+ child = (allocBlk - blk) / radix;
+ blk += child * radix;
+ i = 1 + child * next_skip;
while (i <= skip && blk < allocBlk + count) {
- daddr_t v;
-
v = blk + radix - allocBlk;
if (v > count)
v = count;
@@ -842,12 +882,12 @@
*/
static daddr_t
-blst_radix_init(blmeta_t *scan, daddr_t radix, int skip, daddr_t count)
+blst_radix_init(blmeta_t *scan, daddr_t radix, daddr_t skip, daddr_t count)
{
- int i;
- int next_skip;
- daddr_t memindex = 0;
+ daddr_t i, memindex, next_skip;
+ memindex = 0;
+
/*
* Leaf node
*/
@@ -872,7 +912,7 @@
}
radix /= BLIST_META_RADIX;
- next_skip = ((u_int)skip / BLIST_META_RADIX);
+ next_skip = skip / BLIST_META_RADIX;
for (i = 1; i <= skip; i += next_skip) {
if (count >= radix) {
@@ -914,15 +954,14 @@
#ifdef BLIST_DEBUG
static void
-blst_radix_print(blmeta_t *scan, daddr_t blk, daddr_t radix, int skip, int tab)
+blst_radix_print(blmeta_t *scan, daddr_t blk, daddr_t radix, daddr_t skip,
+ int tab)
{
- int i;
- int next_skip;
- int lastState = 0;
+ daddr_t i, next_skip;
if (radix == BLIST_BMAP_RADIX) {
printf(
- "%*.*s(%08llx,%lld): bitmap %08llx big=%lld\n",
+ "%*.*s(%08llx,%lld): bitmap %016llx big=%lld\n",
tab, tab, "",
(long long)blk, (long long)radix,
(long long)scan->u.bmu_bitmap,
@@ -960,7 +999,7 @@
);
radix /= BLIST_META_RADIX;
- next_skip = ((u_int)skip / BLIST_META_RADIX);
+ next_skip = skip / BLIST_META_RADIX;
tab += 4;
for (i = 1; i <= skip; i += next_skip) {
@@ -970,7 +1009,6 @@
tab, tab, "",
(long long)blk, (long long)radix
);
- lastState = 0;
break;
}
blst_radix_print(
@@ -1016,11 +1054,10 @@
for (;;) {
char buf[1024];
- daddr_t da = 0;
- daddr_t count = 0;
+ long long da = 0;
+ long long count = 0;
-
- printf("%lld/%lld/%lld> ", (long long)bl->bl_free,
+ printf("%lld/%lld/%lld> ", (long long)blist_avail(bl),
(long long)size, (long long)bl->bl_radix);
fflush(stdout);
if (fgets(buf, sizeof(buf), stdin) == NULL)
@@ -1028,7 +1065,7 @@
switch(buf[0]) {
case 'r':
if (sscanf(buf + 1, "%lld", &count) == 1) {
- blist_resize(&bl, count, 1);
+ blist_resize(&bl, count, 1, M_WAITOK);
} else {
printf("?\n");
}
@@ -1044,8 +1081,7 @@
}
break;
case 'f':
- if (sscanf(buf + 1, "%llx %lld",
- (long long *)&da, (long long *)&count) == 2) {
+ if (sscanf(buf + 1, "%llx %lld", &da, &count) == 2) {
blist_free(bl, da, count);
} else {
printf("?\n");
@@ -1052,10 +1088,9 @@
}
break;
case 'l':
- if (sscanf(buf + 1, "%llx %lld",
- (long long *)&da, (long long *)&count) == 2) {
- printf(" n=%d\n",
- blist_fill(bl, da, count));
+ if (sscanf(buf + 1, "%llx %lld", &da, &count) == 2) {
+ printf(" n=%jd\n",
+ (intmax_t)blist_fill(bl, da, count));
} else {
printf("?\n");
}
Modified: trunk/sys/kern/subr_bufring.c
===================================================================
--- trunk/sys/kern/subr_bufring.c 2018-05-25 20:59:46 UTC (rev 9949)
+++ trunk/sys/kern/subr_bufring.c 2018-05-25 21:07:09 UTC (rev 9950)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
/*-
* Copyright (c) 2007, 2008 Kip Macy <kmacy at freebsd.org>
* All rights reserved.
@@ -25,7 +26,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$MidnightBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/kern/subr_bufring.c 207673 2010-05-05 20:39:02Z joel $");
#include <sys/param.h>
Modified: trunk/sys/kern/subr_bus.c
===================================================================
--- trunk/sys/kern/subr_bus.c 2018-05-25 20:59:46 UTC (rev 9949)
+++ trunk/sys/kern/subr_bus.c 2018-05-25 21:07:09 UTC (rev 9950)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
/*-
* Copyright (c) 1997,1998,2003 Doug Rabson
* All rights reserved.
@@ -25,9 +26,10 @@
*/
#include <sys/cdefs.h>
-__MBSDID("$MidnightBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/kern/subr_bus.c 308402 2016-11-07 09:19:04Z hselasky $");
#include "opt_bus.h"
+#include "opt_random.h"
#include <sys/param.h>
#include <sys/conf.h>
@@ -40,10 +42,12 @@
#include <sys/module.h>
#include <sys/mutex.h>
#include <sys/poll.h>
+#include <sys/priv.h>
#include <sys/proc.h>
#include <sys/condvar.h>
#include <sys/queue.h>
#include <machine/bus.h>
+#include <sys/random.h>
#include <sys/rman.h>
#include <sys/selinfo.h>
#include <sys/signalvar.h>
@@ -52,9 +56,11 @@
#include <sys/uio.h>
#include <sys/bus.h>
#include <sys/interrupt.h>
+#include <sys/cpuset.h>
#include <net/vnet.h>
+#include <machine/cpu.h>
#include <machine/stdarg.h>
#include <vm/uma.h>
@@ -143,6 +149,8 @@
static MALLOC_DEFINE(M_BUS, "bus", "Bus data structures");
static MALLOC_DEFINE(M_BUS_SC, "bus-sc", "Bus data structures, softc");
+static void devctl2_init(void);
+
#ifdef BUS_DEBUG
static int bus_debug = 1;
@@ -281,6 +289,7 @@
device_sysctl_init(device_t dev)
{
devclass_t dc = dev->devclass;
+ int domain;
if (dev->sysctl_tree != NULL)
return;
@@ -310,6 +319,10 @@
OID_AUTO, "%parent", CTLTYPE_STRING | CTLFLAG_RD,
dev, DEVICE_SYSCTL_PARENT, device_sysctl_handler, "A",
"parent device");
+ if (bus_get_domain(dev, &domain) == 0)
+ SYSCTL_ADD_INT(&dev->sysctl_ctx,
+ SYSCTL_CHILDREN(dev->sysctl_tree), OID_AUTO, "%domain",
+ CTLFLAG_RD, NULL, domain, "NUMA domain");
}
static void
@@ -355,15 +368,16 @@
/* Deprecated way to adjust queue length */
static int sysctl_devctl_disable(SYSCTL_HANDLER_ARGS);
/* XXX Need to support old-style tunable hw.bus.devctl_disable" */
-SYSCTL_PROC(_hw_bus, OID_AUTO, devctl_disable, CTLTYPE_INT | CTLFLAG_RW, NULL,
- 0, sysctl_devctl_disable, "I", "devctl disable -- deprecated");
+SYSCTL_PROC(_hw_bus, OID_AUTO, devctl_disable, CTLTYPE_INT | CTLFLAG_RW |
+ CTLFLAG_MPSAFE, NULL, 0, sysctl_devctl_disable, "I",
+ "devctl disable -- deprecated");
#define DEVCTL_DEFAULT_QUEUE_LEN 1000
static int sysctl_devctl_queue(SYSCTL_HANDLER_ARGS);
static int devctl_queue_length = DEVCTL_DEFAULT_QUEUE_LEN;
TUNABLE_INT("hw.bus.devctl_queue", &devctl_queue_length);
-SYSCTL_PROC(_hw_bus, OID_AUTO, devctl_queue, CTLTYPE_INT | CTLFLAG_RW, NULL,
- 0, sysctl_devctl_queue, "I", "devctl queue length");
+SYSCTL_PROC(_hw_bus, OID_AUTO, devctl_queue, CTLTYPE_INT | CTLFLAG_RW |
+ CTLFLAG_MPSAFE, NULL, 0, sysctl_devctl_queue, "I", "devctl queue length");
static d_open_t devopen;
static d_close_t devclose;
@@ -370,15 +384,16 @@
static d_read_t devread;
static d_ioctl_t devioctl;
static d_poll_t devpoll;
+static d_kqfilter_t devkqfilter;
static struct cdevsw dev_cdevsw = {
.d_version = D_VERSION,
- .d_flags = D_NEEDGIANT,
.d_open = devopen,
.d_close = devclose,
.d_read = devread,
.d_ioctl = devioctl,
.d_poll = devpoll,
+ .d_kqfilter = devkqfilter,
.d_name = "devctl",
};
@@ -395,13 +410,23 @@
int inuse;
int nonblock;
int queued;
+ int async;
struct mtx mtx;
struct cv cv;
struct selinfo sel;
struct devq devq;
- struct proc *async_proc;
+ struct sigio *sigio;
} devsoftc;
+static void filt_devctl_detach(struct knote *kn);
+static int filt_devctl_read(struct knote *kn, long hint);
+
+struct filterops devctl_rfiltops = {
+ .f_isfd = 1,
+ .f_detach = filt_devctl_detach,
+ .f_event = filt_devctl_read,
+};
+
static struct cdev *devctl_dev;
static void
@@ -412,17 +437,22 @@
mtx_init(&devsoftc.mtx, "dev mtx", "devd", MTX_DEF);
cv_init(&devsoftc.cv, "dev cv");
TAILQ_INIT(&devsoftc.devq);
+ knlist_init_mtx(&devsoftc.sel.si_note, &devsoftc.mtx);
+ devctl2_init();
}
static int
devopen(struct cdev *dev, int oflags, int devtype, struct thread *td)
{
- if (devsoftc.inuse)
+
+ mtx_lock(&devsoftc.mtx);
+ if (devsoftc.inuse) {
+ mtx_unlock(&devsoftc.mtx);
return (EBUSY);
+ }
/* move to init */
devsoftc.inuse = 1;
- devsoftc.nonblock = 0;
- devsoftc.async_proc = NULL;
+ mtx_unlock(&devsoftc.mtx);
return (0);
}
@@ -429,11 +459,14 @@
static int
devclose(struct cdev *dev, int fflag, int devtype, struct thread *td)
{
+
+ mtx_lock(&devsoftc.mtx);
devsoftc.inuse = 0;
- mtx_lock(&devsoftc.mtx);
+ devsoftc.nonblock = 0;
+ devsoftc.async = 0;
cv_broadcast(&devsoftc.cv);
+ funsetown(&devsoftc.sigio);
mtx_unlock(&devsoftc.mtx);
- devsoftc.async_proc = NULL;
return (0);
}
@@ -489,17 +522,20 @@
return (0);
case FIOASYNC:
if (*(int*)data)
- devsoftc.async_proc = td->td_proc;
+ devsoftc.async = 1;
else
- devsoftc.async_proc = NULL;
+ devsoftc.async = 0;
return (0);
+ case FIOSETOWN:
+ return fsetown(*(int *)data, &devsoftc.sigio);
+ case FIOGETOWN:
+ *(int *)data = fgetown(&devsoftc.sigio);
+ return (0);
/* (un)Support for other fcntl() calls. */
case FIOCLEX:
case FIONCLEX:
case FIONREAD:
- case FIOSETOWN:
- case FIOGETOWN:
default:
break;
}
@@ -523,6 +559,34 @@
return (revents);
}
+static int
+devkqfilter(struct cdev *dev, struct knote *kn)
+{
+ int error;
+
+ if (kn->kn_filter == EVFILT_READ) {
+ kn->kn_fop = &devctl_rfiltops;
+ knlist_add(&devsoftc.sel.si_note, kn, 0);
+ error = 0;
+ } else
+ error = EINVAL;
+ return (error);
+}
+
+static void
+filt_devctl_detach(struct knote *kn)
+{
+
+ knlist_remove(&devsoftc.sel.si_note, kn, 0);
+}
+
+static int
+filt_devctl_read(struct knote *kn, long hint)
+{
+ kn->kn_data = devsoftc.queued;
+ return (kn->kn_data != 0);
+}
+
/**
* @brief Return whether the userland process is running
*/
@@ -543,7 +607,6 @@
devctl_queue_data_f(char *data, int flags)
{
struct dev_event_info *n1 = NULL, *n2 = NULL;
- struct proc *p;
if (strlen(data) == 0)
goto out;
@@ -571,14 +634,11 @@
TAILQ_INSERT_TAIL(&devsoftc.devq, n1, dei_link);
devsoftc.queued++;
cv_broadcast(&devsoftc.cv);
+ KNOTE_LOCKED(&devsoftc.sel.si_note, 0);
mtx_unlock(&devsoftc.mtx);
selwakeup(&devsoftc.sel);
- p = devsoftc.async_proc;
- if (p != NULL) {
- PROC_LOCK(p);
- kern_psignal(p, SIGIO);
- PROC_UNLOCK(p);
- }
+ if (devsoftc.async && devsoftc.sigio != NULL)
+ pgsigio(&devsoftc.sigio, SIGIO, 0);
return;
out:
/*
@@ -1812,6 +1872,8 @@
PDEBUG(("%s at %s with order %u as unit %d",
name, DEVICENAME(dev), order, unit));
+ KASSERT(name != NULL || unit == -1,
+ ("child device with wildcard name and specific unit number"));
child = make_device(dev, name, unit);
if (child == NULL)
@@ -1862,7 +1924,11 @@
PDEBUG(("%s from %s", DEVICENAME(child), DEVICENAME(dev)));
- /* remove children first */
+ /* detach parent before deleting children, if any */
+ if ((error = device_detach(child)) != 0)
+ return (error);
+
+ /* remove children second */
while ((grandchild = TAILQ_FIRST(&child->children)) != NULL) {
error = device_delete_child(child, grandchild);
if (error)
@@ -1869,8 +1935,6 @@
return (error);
}
- if ((error = device_detach(child)) != 0)
- return (error);
if (child->devclass)
devclass_delete_device(child->devclass, child);
if (child->parent)
@@ -2023,9 +2087,15 @@
if (!hasclass) {
if (device_set_devclass(child,
dl->driver->name) != 0) {
+ char const * devname =
+ device_get_name(child);
+ if (devname == NULL)
+ devname = "(unknown)";
printf("driver bug: Unable to set "
- "devclass (devname: %s)\n",
- device_get_name(child));
+ "devclass (class: %s "
+ "devname: %s)\n",
+ dl->driver->name,
+ devname);
(void)device_set_driver(child, NULL);
continue;
}
@@ -2053,6 +2123,16 @@
}
/*
+ * Probes that return BUS_PROBE_NOWILDCARD or lower
+ * only match on devices whose driver was explicitly
+ * specified.
+ */
+ if (result <= BUS_PROBE_NOWILDCARD &&
+ !(child->flags & DF_FIXEDCLASS)) {
+ result = ENXIO;
+ }
+
+ /*
* The driver returned an error so it
* certainly doesn't match.
*/
@@ -2067,14 +2147,6 @@
* of pri for the first match.
*/
if (best == NULL || result > pri) {
- /*
- * Probes that return BUS_PROBE_NOWILDCARD
- * or lower only match when they are set
- * in stone by the parent bus.
- */
- if (result <= BUS_PROBE_NOWILDCARD &&
- child->flags & DF_WILDCARD)
- continue;
best = dl;
pri = result;
continue;
@@ -2758,6 +2830,7 @@
int
device_attach(device_t dev)
{
+ uint64_t attachtime;
int error;
if (resource_disabled(dev->driver->name, dev->unit)) {
@@ -2770,6 +2843,7 @@
device_sysctl_init(dev);
if (!device_is_quiet(dev))
device_print_child(dev->parent, dev);
+ attachtime = get_cyclecount();
dev->state = DS_ATTACHING;
if ((error = DEVICE_ATTACH(dev)) != 0) {
printf("device_attach: %s%d attach returned %d\n",
@@ -2782,6 +2856,17 @@
dev->state = DS_NOTPRESENT;
return (error);
}
+ attachtime = get_cyclecount() - attachtime;
+ /*
+ * 4 bits per device is a reasonable value for desktop and server
+ * hardware with good get_cyclecount() implementations, but may
+ * need to be adjusted on other platforms.
+ */
+#ifdef RANDOM_DEBUG
+ printf("%s(): feeding %d bit(s) of entropy from %s%d\n",
+ __func__, 4, dev->driver->name, dev->unit);
+#endif
+ random_harvest(&attachtime, sizeof(attachtime), 4, RANDOM_ATTACH);
device_sysctl_update(dev);
if (dev->busy)
dev->state = DS_BUSY;
@@ -3230,7 +3315,10 @@
rle->flags |= RLE_ALLOCATED;
return (rle->res);
}
- panic("resource_list_alloc: resource entry is busy");
+ device_printf(bus,
+ "resource entry %#x type %d for child %s is busy\n", *rid,
+ type, device_get_nameunit(child));
+ return (NULL);
}
if (isdefault) {
@@ -3314,9 +3402,51 @@
}
/**
+ * @brief Release all active resources of a given type
+ *
+ * Release all active resources of a specified type. This is intended
+ * to be used to cleanup resources leaked by a driver after detach or
+ * a failed attach.
+ *
+ * @param rl the resource list which was allocated from
+ * @param bus the parent device of @p child
+ * @param child the device whose active resources are being released
+ * @param type the type of resources to release
+ *
+ * @retval 0 success
+ * @retval EBUSY at least one resource was active
+ */
+int
+resource_list_release_active(struct resource_list *rl, device_t bus,
+ device_t child, int type)
+{
+ struct resource_list_entry *rle;
+ int error, retval;
+
+ retval = 0;
+ STAILQ_FOREACH(rle, rl, link) {
+ if (rle->type != type)
+ continue;
+ if (rle->res == NULL)
+ continue;
+ if ((rle->flags & (RLE_RESERVED | RLE_ALLOCATED)) ==
+ RLE_RESERVED)
+ continue;
+ retval = EBUSY;
+ error = resource_list_release(rl, bus, child, type,
+ rman_get_rid(rle->res), rle->res);
+ if (error != 0)
+ device_printf(bus,
+ "Failed to release active resource: %d\n", error);
+ }
+ return (retval);
+}
+
+
+/**
* @brief Fully release a reserved resource
*
- * Fully releases a resouce reserved via resource_list_reserve().
+ * Fully releases a resource reserved via resource_list_reserve().
*
* @param rl the resource list which was allocated from
* @param bus the parent device of @p child
@@ -3604,6 +3734,25 @@
/**
* @brief Helper function for implementing BUS_PRINT_CHILD().
*
+ * This function prints out the VM domain for the given device.
+ *
+ * @returns the number of characters printed
+ */
+int
+bus_print_child_domain(device_t dev, device_t child)
+{
+ int domain;
+
+ /* No domain? Don't print anything */
+ if (BUS_GET_DOMAIN(dev, child, &domain) != 0)
+ return (0);
+
+ return (printf(" numa-domain %d", domain));
+}
+
+/**
+ * @brief Helper function for implementing BUS_PRINT_CHILD().
+ *
* This function simply calls bus_print_child_header() followed by
* bus_print_child_footer().
*
@@ -3615,6 +3764,7 @@
int retval = 0;
retval += bus_print_child_header(dev, child);
+ retval += bus_print_child_domain(dev, child);
retval += bus_print_child_footer(dev, child);
return (retval);
@@ -4029,6 +4179,16 @@
return (BUS_CHILD_PRESENT(device_get_parent(dev), dev));
}
+int
+bus_generic_get_domain(device_t dev, device_t child, int *domain)
+{
+
+ if (dev->parent)
+ return (BUS_GET_DOMAIN(dev->parent, dev, domain));
+
+ return (ENOENT);
+}
+
/*
* Some convenience functions to make it easier for drivers to use the
* resource-management functions. All these really do is hide the
@@ -4361,6 +4521,18 @@
return (BUS_GET_DMA_TAG(parent, dev));
}
+/**
+ * @brief Wrapper function for BUS_GET_DOMAIN().
+ *
+ * This function simply calls the BUS_GET_DOMAIN() method of the
+ * parent of @p dev.
+ */
+int
+bus_get_domain(device_t dev, int *domain)
+{
+ return (BUS_GET_DOMAIN(device_get_parent(dev), dev, domain));
+}
+
/* Resume all devices and then notify userland that we're up again. */
static int
root_resume(device_t dev)
@@ -4395,7 +4567,7 @@
}
/*
- * If we get here, assume that the device is permanant and really is
+ * If we get here, assume that the device is permanent and really is
* present in the system. Removable bus drivers are expected to intercept
* this call long before it gets here. We return -1 so that drivers that
* really care can check vs -1 or some ERRNO returned higher in the food
@@ -4833,3 +5005,259 @@
return (0);
return (bus_release_resource(dev, type, rman_get_rid(r), r));
}
+
+device_t
+device_lookup_by_name(const char *name)
+{
+ device_t dev;
+
+ TAILQ_FOREACH(dev, &bus_data_devices, devlink) {
+ if (dev->nameunit != NULL && strcmp(dev->nameunit, name) == 0)
+ return (dev);
+ }
+ return (NULL);
+}
+
+/*
+ * /dev/devctl2 implementation. The existing /dev/devctl device has
+ * implicit semantics on open, so it could not be reused for this.
+ * Another option would be to call this /dev/bus?
+ */
+static int
+find_device(struct devreq *req, device_t *devp)
+{
+ device_t dev;
+
+ /*
+ * First, ensure that the name is nul terminated.
+ */
+ if (memchr(req->dr_name, '\0', sizeof(req->dr_name)) == NULL)
+ return (EINVAL);
+
+ /*
+ * Second, try to find an attached device whose name matches
+ * 'name'.
+ */
+ dev = device_lookup_by_name(req->dr_name);
+ if (dev != NULL) {
+ *devp = dev;
+ return (0);
+ }
+
+ /* Finally, give device enumerators a chance. */
+ dev = NULL;
+ EVENTHANDLER_INVOKE(dev_lookup, req->dr_name, &dev);
+ if (dev == NULL)
+ return (ENOENT);
+ *devp = dev;
+ return (0);
+}
+
+static bool
+driver_exists(struct device *bus, const char *driver)
+{
+ devclass_t dc;
+
+ for (dc = bus->devclass; dc != NULL; dc = dc->parent) {
+ if (devclass_find_driver_internal(dc, driver) != NULL)
+ return (true);
+ }
+ return (false);
+}
+
+static int
+devctl2_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag,
+ struct thread *td)
+{
+ struct devreq *req;
+ device_t dev;
+ int error, old;
+
+ /* Locate the device to control. */
+ mtx_lock(&Giant);
+ req = (struct devreq *)data;
+ switch (cmd) {
+ case DEV_ATTACH:
+ case DEV_DETACH:
+ case DEV_ENABLE:
+ case DEV_DISABLE:
+ case DEV_SET_DRIVER:
+ case DEV_CLEAR_DRIVER:
+ error = priv_check(td, PRIV_DRIVER);
+ if (error == 0)
+ error = find_device(req, &dev);
+ break;
+ default:
+ error = ENOTTY;
+ break;
+ }
+ if (error) {
+ mtx_unlock(&Giant);
+ return (error);
+ }
+
+ /* Perform the requested operation. */
+ switch (cmd) {
+ case DEV_ATTACH:
+ if (device_is_attached(dev) && (dev->flags & DF_REBID) == 0)
+ error = EBUSY;
+ else if (!device_is_enabled(dev))
+ error = ENXIO;
+ else
+ error = device_probe_and_attach(dev);
+ break;
+ case DEV_DETACH:
+ if (!device_is_attached(dev)) {
+ error = ENXIO;
+ break;
+ }
+ if (!(req->dr_flags & DEVF_FORCE_DETACH)) {
+ error = device_quiesce(dev);
+ if (error)
+ break;
+ }
+ error = device_detach(dev);
+ break;
+ case DEV_ENABLE:
+ if (device_is_enabled(dev)) {
+ error = EBUSY;
+ break;
+ }
+
+ /*
+ * If the device has been probed but not attached (e.g.
+ * when it has been disabled by a loader hint), just
+ * attach the device rather than doing a full probe.
+ */
+ device_enable(dev);
+ if (device_is_alive(dev)) {
+ /*
+ * If the device was disabled via a hint, clear
+ * the hint.
+ */
+ if (resource_disabled(dev->driver->name, dev->unit))
+ resource_unset_value(dev->driver->name,
+ dev->unit, "disabled");
+ error = device_attach(dev);
+ } else
+ error = device_probe_and_attach(dev);
+ break;
+ case DEV_DISABLE:
+ if (!device_is_enabled(dev)) {
+ error = ENXIO;
+ break;
+ }
+
+ if (!(req->dr_flags & DEVF_FORCE_DETACH)) {
+ error = device_quiesce(dev);
+ if (error)
+ break;
+ }
+
+ /*
+ * Force DF_FIXEDCLASS on around detach to preserve
+ * the existing name.
+ */
+ old = dev->flags;
+ dev->flags |= DF_FIXEDCLASS;
+ error = device_detach(dev);
+ if (!(old & DF_FIXEDCLASS))
+ dev->flags &= ~DF_FIXEDCLASS;
+ if (error == 0)
+ device_disable(dev);
+ break;
+ case DEV_SET_DRIVER: {
+ devclass_t dc;
+ char driver[128];
+
+ error = copyinstr(req->dr_data, driver, sizeof(driver), NULL);
+ if (error)
+ break;
+ if (driver[0] == '\0') {
+ error = EINVAL;
+ break;
+ }
+ if (dev->devclass != NULL &&
+ strcmp(driver, dev->devclass->name) == 0)
+ /* XXX: Could possibly force DF_FIXEDCLASS on? */
+ break;
+
+ /*
+ * Scan drivers for this device's bus looking for at
+ * least one matching driver.
+ */
+ if (dev->parent == NULL) {
+ error = EINVAL;
+ break;
+ }
+ if (!driver_exists(dev->parent, driver)) {
+ error = ENOENT;
+ break;
+ }
+ dc = devclass_create(driver);
+ if (dc == NULL) {
+ error = ENOMEM;
+ break;
+ }
+
+ /* Detach device if necessary. */
+ if (device_is_attached(dev)) {
+ if (req->dr_flags & DEVF_SET_DRIVER_DETACH)
+ error = device_detach(dev);
+ else
+ error = EBUSY;
+ if (error)
+ break;
+ }
+
+ /* Clear any previously-fixed device class and unit. */
+ if (dev->flags & DF_FIXEDCLASS)
+ devclass_delete_device(dev->devclass, dev);
+ dev->flags |= DF_WILDCARD;
+ dev->unit = -1;
+
+ /* Force the new device class. */
+ error = devclass_add_device(dc, dev);
+ if (error)
+ break;
+ dev->flags |= DF_FIXEDCLASS;
+ error = device_probe_and_attach(dev);
+ break;
+ }
+ case DEV_CLEAR_DRIVER:
+ if (!(dev->flags & DF_FIXEDCLASS)) {
+ error = 0;
+ break;
+ }
+ if (device_is_attached(dev)) {
+ if (req->dr_flags & DEVF_CLEAR_DRIVER_DETACH)
+ error = device_detach(dev);
+ else
+ error = EBUSY;
+ if (error)
+ break;
+ }
+
+ dev->flags &= ~DF_FIXEDCLASS;
+ dev->flags |= DF_WILDCARD;
+ devclass_delete_device(dev->devclass, dev);
+ error = device_probe_and_attach(dev);
+ break;
+ }
+ mtx_unlock(&Giant);
+ return (error);
+}
+
+static struct cdevsw devctl2_cdevsw = {
+ .d_version = D_VERSION,
+ .d_ioctl = devctl2_ioctl,
+ .d_name = "devctl2",
+};
+
+static void
+devctl2_init(void)
+{
+
+ make_dev_credf(MAKEDEV_ETERNAL, &devctl2_cdevsw, 0, NULL,
+ UID_ROOT, GID_WHEEL, 0600, "devctl2");
+}
Added: trunk/sys/kern/subr_bus_dma.c
===================================================================
--- trunk/sys/kern/subr_bus_dma.c (rev 0)
+++ trunk/sys/kern/subr_bus_dma.c 2018-05-25 21:07:09 UTC (rev 9950)
@@ -0,0 +1,582 @@
+/* $MidnightBSD$ */
+/*-
+ * Copyright (c) 2012 EMC Corp.
+ * All rights reserved.
+ *
+ * Copyright (c) 1997, 1998 Justin T. Gibbs.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: stable/10/sys/kern/subr_bus_dma.c 292348 2015-12-16 19:01:14Z ken $");
+
+#include "opt_bus.h"
+
+#include <sys/param.h>
+#include <sys/conf.h>
+#include <sys/systm.h>
+#include <sys/bio.h>
+#include <sys/bus.h>
+#include <sys/callout.h>
+#include <sys/mbuf.h>
+#include <sys/memdesc.h>
+#include <sys/proc.h>
+#include <sys/uio.h>
+
+#include <vm/vm.h>
+#include <vm/vm_page.h>
+#include <vm/vm_map.h>
+#include <vm/pmap.h>
+
+#include <cam/cam.h>
+#include <cam/cam_ccb.h>
+
+#include <machine/bus.h>
+
+/*
+ * Load up data starting at offset within a region specified by a
+ * list of virtual address ranges until either length or the region
+ * are exhausted.
+ */
+static int
+_bus_dmamap_load_vlist(bus_dma_tag_t dmat, bus_dmamap_t map,
+ bus_dma_segment_t *list, int sglist_cnt, struct pmap *pmap, int *nsegs,
+ int flags, size_t offset, size_t length)
+{
+ int error;
+
+ error = 0;
+ for (; sglist_cnt > 0 && length != 0; sglist_cnt--, list++) {
+ char *addr;
+ size_t ds_len;
+
+ KASSERT((offset < list->ds_len),
+ ("Invalid mid-segment offset"));
+ addr = (char *)(uintptr_t)list->ds_addr + offset;
+ ds_len = list->ds_len - offset;
+ offset = 0;
+ if (ds_len > length)
+ ds_len = length;
+ length -= ds_len;
+ KASSERT((ds_len != 0), ("Segment length is zero"));
+ error = _bus_dmamap_load_buffer(dmat, map, addr, ds_len, pmap,
+ flags, NULL, nsegs);
+ if (error)
+ break;
+ }
+ return (error);
+}
+
+/*
+ * Load a list of physical addresses.
+ */
+static int
+_bus_dmamap_load_plist(bus_dma_tag_t dmat, bus_dmamap_t map,
+ bus_dma_segment_t *list, int sglist_cnt, int *nsegs, int flags)
+{
+ int error;
+
+ error = 0;
+ for (; sglist_cnt > 0; sglist_cnt--, list++) {
+ error = _bus_dmamap_load_phys(dmat, map,
+ (vm_paddr_t)list->ds_addr, list->ds_len, flags, NULL,
+ nsegs);
+ if (error)
+ break;
+ }
+ return (error);
+}
+
+/*
+ * Load an mbuf chain.
+ */
+static int
+_bus_dmamap_load_mbuf_sg(bus_dma_tag_t dmat, bus_dmamap_t map,
+ struct mbuf *m0, bus_dma_segment_t *segs, int *nsegs, int flags)
+{
+ struct mbuf *m;
+ int error;
+
+ error = 0;
+ for (m = m0; m != NULL && error == 0; m = m->m_next) {
+ if (m->m_len > 0) {
+ error = _bus_dmamap_load_buffer(dmat, map, m->m_data,
+ m->m_len, kernel_pmap, flags | BUS_DMA_LOAD_MBUF,
+ segs, nsegs);
+ }
+ }
+ CTR5(KTR_BUSDMA, "%s: tag %p tag flags 0x%x error %d nsegs %d",
+ __func__, dmat, flags, error, *nsegs);
+ return (error);
+}
+
+/*
+ * Load tlen data starting at offset within a region specified by a list of
+ * physical pages.
+ */
+static int
+_bus_dmamap_load_pages(bus_dma_tag_t dmat, bus_dmamap_t map,
+ vm_page_t *pages, bus_size_t tlen, int offset, int *nsegs, int flags)
+{
+ vm_paddr_t paddr;
+ bus_size_t len;
+ int error, i;
+
+ for (i = 0, error = 0; error == 0 && tlen > 0; i++, tlen -= len) {
+ len = min(PAGE_SIZE - offset, tlen);
+ paddr = VM_PAGE_TO_PHYS(pages[i]) + offset;
+ error = _bus_dmamap_load_phys(dmat, map, paddr, len,
+ flags, NULL, nsegs);
+ offset = 0;
+ }
+ return (error);
+}
+
+/*
+ * Load from block io.
+ */
+static int
+_bus_dmamap_load_bio(bus_dma_tag_t dmat, bus_dmamap_t map, struct bio *bio,
+ int *nsegs, int flags)
+{
+
+ if ((bio->bio_flags & BIO_VLIST) != 0) {
+ bus_dma_segment_t *segs = (bus_dma_segment_t *)bio->bio_data;
+ return (_bus_dmamap_load_vlist(dmat, map, segs, bio->bio_ma_n,
+ kernel_pmap, nsegs, flags, bio->bio_ma_offset,
+ bio->bio_bcount));
+ }
+
+ if ((bio->bio_flags & BIO_UNMAPPED) != 0)
+ return (_bus_dmamap_load_pages(dmat, map, bio->bio_ma,
+ bio->bio_bcount, bio->bio_ma_offset, nsegs, flags));
+
+ return (_bus_dmamap_load_buffer(dmat, map, bio->bio_data,
+ bio->bio_bcount, kernel_pmap, flags, NULL, nsegs));
+}
+
+int
+bus_dmamap_load_ma_triv(bus_dma_tag_t dmat, bus_dmamap_t map,
+ struct vm_page **ma, bus_size_t tlen, int ma_offs, int flags,
+ bus_dma_segment_t *segs, int *segp)
+{
+ vm_paddr_t paddr;
+ bus_size_t len;
+ int error, i;
+
+ error = 0;
+ for (i = 0; tlen > 0; i++, tlen -= len) {
+ len = min(PAGE_SIZE - ma_offs, tlen);
+ paddr = VM_PAGE_TO_PHYS(ma[i]) + ma_offs;
+ error = _bus_dmamap_load_phys(dmat, map, paddr, len,
+ flags, segs, segp);
+ if (error != 0)
+ break;
+ ma_offs = 0;
+ }
+ return (error);
+}
+
+/*
+ * Load a cam control block.
+ */
+static int
+_bus_dmamap_load_ccb(bus_dma_tag_t dmat, bus_dmamap_t map, union ccb *ccb,
+ int *nsegs, int flags)
+{
+ struct ccb_hdr *ccb_h;
+ void *data_ptr;
+ int error;
+ uint32_t dxfer_len;
+ uint16_t sglist_cnt;
+
+ error = 0;
+ ccb_h = &ccb->ccb_h;
+ switch (ccb_h->func_code) {
+ case XPT_SCSI_IO: {
+ struct ccb_scsiio *csio;
+
+ csio = &ccb->csio;
+ data_ptr = csio->data_ptr;
+ dxfer_len = csio->dxfer_len;
+ sglist_cnt = csio->sglist_cnt;
+ break;
+ }
+ case XPT_CONT_TARGET_IO: {
+ struct ccb_scsiio *ctio;
+
+ ctio = &ccb->ctio;
+ data_ptr = ctio->data_ptr;
+ dxfer_len = ctio->dxfer_len;
+ sglist_cnt = ctio->sglist_cnt;
+ break;
+ }
+ case XPT_ATA_IO: {
+ struct ccb_ataio *ataio;
+
+ ataio = &ccb->ataio;
+ data_ptr = ataio->data_ptr;
+ dxfer_len = ataio->dxfer_len;
+ sglist_cnt = 0;
+ break;
+ }
+ default:
+ panic("_bus_dmamap_load_ccb: Unsupported func code %d",
+ ccb_h->func_code);
+ }
+
+ switch ((ccb_h->flags & CAM_DATA_MASK)) {
+ case CAM_DATA_VADDR:
+ error = _bus_dmamap_load_buffer(dmat, map, data_ptr, dxfer_len,
+ kernel_pmap, flags, NULL, nsegs);
+ break;
+ case CAM_DATA_PADDR:
+ error = _bus_dmamap_load_phys(dmat, map,
+ (vm_paddr_t)(uintptr_t)data_ptr, dxfer_len, flags, NULL,
+ nsegs);
+ break;
+ case CAM_DATA_SG:
+ error = _bus_dmamap_load_vlist(dmat, map,
+ (bus_dma_segment_t *)data_ptr, sglist_cnt, kernel_pmap,
+ nsegs, flags, 0, dxfer_len);
+ break;
+ case CAM_DATA_SG_PADDR:
+ error = _bus_dmamap_load_plist(dmat, map,
+ (bus_dma_segment_t *)data_ptr, sglist_cnt, nsegs, flags);
+ break;
+ case CAM_DATA_BIO:
+ error = _bus_dmamap_load_bio(dmat, map, (struct bio *)data_ptr,
+ nsegs, flags);
+ break;
+ default:
+ panic("_bus_dmamap_load_ccb: flags 0x%X unimplemented",
+ ccb_h->flags);
+ }
+ return (error);
+}
+
+/*
+ * Load a uio.
+ */
+static int
+_bus_dmamap_load_uio(bus_dma_tag_t dmat, bus_dmamap_t map, struct uio *uio,
+ int *nsegs, int flags)
+{
+ bus_size_t resid;
+ bus_size_t minlen;
+ struct iovec *iov;
+ pmap_t pmap;
+ caddr_t addr;
+ int error, i;
+
+ if (uio->uio_segflg == UIO_USERSPACE) {
+ KASSERT(uio->uio_td != NULL,
+ ("bus_dmamap_load_uio: USERSPACE but no proc"));
+ pmap = vmspace_pmap(uio->uio_td->td_proc->p_vmspace);
+ } else
+ pmap = kernel_pmap;
+ resid = uio->uio_resid;
+ iov = uio->uio_iov;
+ error = 0;
+
+ for (i = 0; i < uio->uio_iovcnt && resid != 0 && !error; i++) {
+ /*
+ * Now at the first iovec to load. Load each iovec
+ * until we have exhausted the residual count.
+ */
+
+ addr = (caddr_t) iov[i].iov_base;
+ minlen = resid < iov[i].iov_len ? resid : iov[i].iov_len;
+ if (minlen > 0) {
+ error = _bus_dmamap_load_buffer(dmat, map, addr,
+ minlen, pmap, flags, NULL, nsegs);
+ resid -= minlen;
+ }
+ }
+
+ return (error);
+}
+
+/*
+ * Map the buffer buf into bus space using the dmamap map.
+ */
+int
+bus_dmamap_load(bus_dma_tag_t dmat, bus_dmamap_t map, void *buf,
+ bus_size_t buflen, bus_dmamap_callback_t *callback,
+ void *callback_arg, int flags)
+{
+ bus_dma_segment_t *segs;
+ struct memdesc mem;
+ int error;
+ int nsegs;
+
+ if ((flags & BUS_DMA_NOWAIT) == 0) {
+ mem = memdesc_vaddr(buf, buflen);
+ _bus_dmamap_waitok(dmat, map, &mem, callback, callback_arg);
+ }
+
+ nsegs = -1;
+ error = _bus_dmamap_load_buffer(dmat, map, buf, buflen, kernel_pmap,
+ flags, NULL, &nsegs);
+ nsegs++;
+
+ CTR5(KTR_BUSDMA, "%s: tag %p tag flags 0x%x error %d nsegs %d",
+ __func__, dmat, flags, error, nsegs);
+
+ if (error == EINPROGRESS)
+ return (error);
+
+ segs = _bus_dmamap_complete(dmat, map, NULL, nsegs, error);
+ if (error)
+ (*callback)(callback_arg, segs, 0, error);
+ else
+ (*callback)(callback_arg, segs, nsegs, 0);
+
+ /*
+ * Return ENOMEM to the caller so that it can pass it up the stack.
+ * This error only happens when NOWAIT is set, so deferral is disabled.
+ */
+ if (error == ENOMEM)
+ return (error);
+
+ return (0);
+}
+
+int
+bus_dmamap_load_mbuf(bus_dma_tag_t dmat, bus_dmamap_t map, struct mbuf *m0,
+ bus_dmamap_callback2_t *callback, void *callback_arg, int flags)
+{
+ bus_dma_segment_t *segs;
+ int nsegs, error;
+
+ M_ASSERTPKTHDR(m0);
+
+ flags |= BUS_DMA_NOWAIT;
+ nsegs = -1;
+ error = _bus_dmamap_load_mbuf_sg(dmat, map, m0, NULL, &nsegs, flags);
+ ++nsegs;
+
+ segs = _bus_dmamap_complete(dmat, map, NULL, nsegs, error);
+ if (error)
+ (*callback)(callback_arg, segs, 0, 0, error);
+ else
+ (*callback)(callback_arg, segs, nsegs, m0->m_pkthdr.len, error);
+
+ CTR5(KTR_BUSDMA, "%s: tag %p tag flags 0x%x error %d nsegs %d",
+ __func__, dmat, flags, error, nsegs);
+ return (error);
+}
+
+int
+bus_dmamap_load_mbuf_sg(bus_dma_tag_t dmat, bus_dmamap_t map, struct mbuf *m0,
+ bus_dma_segment_t *segs, int *nsegs, int flags)
+{
+ int error;
+
+ flags |= BUS_DMA_NOWAIT;
+ *nsegs = -1;
+ error = _bus_dmamap_load_mbuf_sg(dmat, map, m0, segs, nsegs, flags);
+ ++*nsegs;
+ _bus_dmamap_complete(dmat, map, segs, *nsegs, error);
+ return (error);
+}
+
+int
+bus_dmamap_load_uio(bus_dma_tag_t dmat, bus_dmamap_t map, struct uio *uio,
+ bus_dmamap_callback2_t *callback, void *callback_arg, int flags)
+{
+ bus_dma_segment_t *segs;
+ int nsegs, error;
+
+ flags |= BUS_DMA_NOWAIT;
+ nsegs = -1;
+ error = _bus_dmamap_load_uio(dmat, map, uio, &nsegs, flags);
+ nsegs++;
+
+ segs = _bus_dmamap_complete(dmat, map, NULL, nsegs, error);
+ if (error)
+ (*callback)(callback_arg, segs, 0, 0, error);
+ else
+ (*callback)(callback_arg, segs, nsegs, uio->uio_resid, error);
+
+ CTR5(KTR_BUSDMA, "%s: tag %p tag flags 0x%x error %d nsegs %d",
+ __func__, dmat, flags, error, nsegs);
+ return (error);
+}
+
+int
+bus_dmamap_load_ccb(bus_dma_tag_t dmat, bus_dmamap_t map, union ccb *ccb,
+ bus_dmamap_callback_t *callback, void *callback_arg,
+ int flags)
+{
+ bus_dma_segment_t *segs;
+ struct ccb_hdr *ccb_h;
+ struct memdesc mem;
+ int error;
+ int nsegs;
+
+ ccb_h = &ccb->ccb_h;
+ if ((ccb_h->flags & CAM_DIR_MASK) == CAM_DIR_NONE) {
+ callback(callback_arg, NULL, 0, 0);
+ return (0);
+ }
+ if ((flags & BUS_DMA_NOWAIT) == 0) {
+ mem = memdesc_ccb(ccb);
+ _bus_dmamap_waitok(dmat, map, &mem, callback, callback_arg);
+ }
+ nsegs = -1;
+ error = _bus_dmamap_load_ccb(dmat, map, ccb, &nsegs, flags);
+ nsegs++;
+
+ CTR5(KTR_BUSDMA, "%s: tag %p tag flags 0x%x error %d nsegs %d",
+ __func__, dmat, flags, error, nsegs);
+
+ if (error == EINPROGRESS)
+ return (error);
+
+ segs = _bus_dmamap_complete(dmat, map, NULL, nsegs, error);
+ if (error)
+ (*callback)(callback_arg, segs, 0, error);
+ else
+ (*callback)(callback_arg, segs, nsegs, error);
+ /*
+ * Return ENOMEM to the caller so that it can pass it up the stack.
+ * This error only happens when NOWAIT is set, so deferral is disabled.
+ */
+ if (error == ENOMEM)
+ return (error);
+
+ return (0);
+}
+
+int
+bus_dmamap_load_bio(bus_dma_tag_t dmat, bus_dmamap_t map, struct bio *bio,
+ bus_dmamap_callback_t *callback, void *callback_arg,
+ int flags)
+{
+ bus_dma_segment_t *segs;
+ struct memdesc mem;
+ int error;
+ int nsegs;
+
+ if ((flags & BUS_DMA_NOWAIT) == 0) {
+ mem = memdesc_bio(bio);
+ _bus_dmamap_waitok(dmat, map, &mem, callback, callback_arg);
+ }
+ nsegs = -1;
+ error = _bus_dmamap_load_bio(dmat, map, bio, &nsegs, flags);
+ nsegs++;
+
+ CTR5(KTR_BUSDMA, "%s: tag %p tag flags 0x%x error %d nsegs %d",
+ __func__, dmat, flags, error, nsegs);
+
+ if (error == EINPROGRESS)
+ return (error);
+
+ segs = _bus_dmamap_complete(dmat, map, NULL, nsegs, error);
+ if (error)
+ (*callback)(callback_arg, segs, 0, error);
+ else
+ (*callback)(callback_arg, segs, nsegs, error);
+ /*
+ * Return ENOMEM to the caller so that it can pass it up the stack.
+ * This error only happens when NOWAIT is set, so deferral is disabled.
+ */
+ if (error == ENOMEM)
+ return (error);
+
+ return (0);
+}
+
+int
+bus_dmamap_load_mem(bus_dma_tag_t dmat, bus_dmamap_t map,
+ struct memdesc *mem, bus_dmamap_callback_t *callback,
+ void *callback_arg, int flags)
+{
+ bus_dma_segment_t *segs;
+ int error;
+ int nsegs;
+
+ if ((flags & BUS_DMA_NOWAIT) == 0)
+ _bus_dmamap_waitok(dmat, map, mem, callback, callback_arg);
+
+ nsegs = -1;
+ error = 0;
+ switch (mem->md_type) {
+ case MEMDESC_VADDR:
+ error = _bus_dmamap_load_buffer(dmat, map, mem->u.md_vaddr,
+ mem->md_opaque, kernel_pmap, flags, NULL, &nsegs);
+ break;
+ case MEMDESC_PADDR:
+ error = _bus_dmamap_load_phys(dmat, map, mem->u.md_paddr,
+ mem->md_opaque, flags, NULL, &nsegs);
+ break;
+ case MEMDESC_VLIST:
+ error = _bus_dmamap_load_vlist(dmat, map, mem->u.md_list,
+ mem->md_opaque, kernel_pmap, &nsegs, flags, 0, SIZE_T_MAX);
+ break;
+ case MEMDESC_PLIST:
+ error = _bus_dmamap_load_plist(dmat, map, mem->u.md_list,
+ mem->md_opaque, &nsegs, flags);
+ break;
+ case MEMDESC_BIO:
+ error = _bus_dmamap_load_bio(dmat, map, mem->u.md_bio,
+ &nsegs, flags);
+ break;
+ case MEMDESC_UIO:
+ error = _bus_dmamap_load_uio(dmat, map, mem->u.md_uio,
+ &nsegs, flags);
+ break;
+ case MEMDESC_MBUF:
+ error = _bus_dmamap_load_mbuf_sg(dmat, map, mem->u.md_mbuf,
+ NULL, &nsegs, flags);
+ break;
+ case MEMDESC_CCB:
+ error = _bus_dmamap_load_ccb(dmat, map, mem->u.md_ccb, &nsegs,
+ flags);
+ break;
+ }
+ nsegs++;
+
+ CTR5(KTR_BUSDMA, "%s: tag %p tag flags 0x%x error %d nsegs %d",
+ __func__, dmat, flags, error, nsegs);
+
+ if (error == EINPROGRESS)
+ return (error);
+
+ segs = _bus_dmamap_complete(dmat, map, NULL, nsegs, error);
+ if (error)
+ (*callback)(callback_arg, segs, 0, error);
+ else
+ (*callback)(callback_arg, segs, nsegs, 0);
+
+ /*
+ * Return ENOMEM to the caller so that it can pass it up the stack.
+ * This error only happens when NOWAIT is set, so deferral is disabled.
+ */
+ if (error == ENOMEM)
+ return (error);
+
+ return (0);
+}
Property changes on: trunk/sys/kern/subr_bus_dma.c
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/sys/kern/subr_busdma_bufalloc.c
===================================================================
--- trunk/sys/kern/subr_busdma_bufalloc.c (rev 0)
+++ trunk/sys/kern/subr_busdma_bufalloc.c 2018-05-25 21:07:09 UTC (rev 9950)
@@ -0,0 +1,175 @@
+/* $MidnightBSD$ */
+/*-
+ * Copyright (c) 2012 Ian Lepore
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: stable/10/sys/kern/subr_busdma_bufalloc.c 294677 2016-01-24 19:21:53Z ian $");
+
+/*
+ * Buffer allocation support routines for bus_dmamem_alloc implementations.
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/bus.h>
+#include <sys/busdma_bufalloc.h>
+#include <sys/malloc.h>
+
+#include <vm/vm.h>
+#include <vm/vm_extern.h>
+#include <vm/vm_kern.h>
+#include <vm/uma.h>
+
+/*
+ * We manage buffer zones up to a page in size. Buffers larger than a page can
+ * be managed by one of the kernel's page-oriented memory allocation routines as
+ * efficiently as what we can do here. Also, a page is the largest size for
+ * which we can g'tee contiguity when using uma, and contiguity is one of the
+ * requirements we have to fulfill.
+ */
+#define MIN_ZONE_BUFSIZE 32
+#define MAX_ZONE_BUFSIZE PAGE_SIZE
+
+/*
+ * The static array of 12 bufzones is big enough to handle all the zones for the
+ * smallest supported allocation size of 32 through the largest supported page
+ * size of 64K. If you up the biggest page size number, up the array size too.
+ * Basically the size of the array needs to be log2(maxsize)-log2(minsize)+1,
+ * but I don't know of an easy way to express that as a compile-time constant.
+ */
+#if PAGE_SIZE > 65536
+#error Unsupported page size
+#endif
+
+struct busdma_bufalloc {
+ bus_size_t min_size;
+ size_t num_zones;
+ struct busdma_bufzone buf_zones[12];
+};
+
+busdma_bufalloc_t
+busdma_bufalloc_create(const char *name, bus_size_t minimum_alignment,
+ uma_alloc alloc_func, uma_free free_func, u_int32_t zcreate_flags)
+{
+ struct busdma_bufalloc *ba;
+ struct busdma_bufzone *bz;
+ int i;
+ bus_size_t cursize;
+
+ ba = malloc(sizeof(struct busdma_bufalloc), M_DEVBUF,
+ M_ZERO | M_WAITOK);
+
+ ba->min_size = MAX(MIN_ZONE_BUFSIZE, minimum_alignment);
+
+ /*
+ * Each uma zone is created with an alignment of size-1, meaning that
+ * the alignment is equal to the size (I.E., 64 byte buffers are aligned
+ * to 64 byte boundaries, etc). This allows for a fast efficient test
+ * when deciding whether a pool buffer meets the constraints of a given
+ * tag used for allocation: the buffer is usable if tag->alignment <=
+ * bufzone->size.
+ */
+ for (i = 0, bz = ba->buf_zones, cursize = ba->min_size;
+ i < nitems(ba->buf_zones) && cursize <= MAX_ZONE_BUFSIZE;
+ ++i, ++bz, cursize <<= 1) {
+ snprintf(bz->name, sizeof(bz->name), "dma %.10s %ju",
+ name, (uintmax_t)cursize);
+ bz->size = cursize;
+ bz->umazone = uma_zcreate(bz->name, bz->size,
+ NULL, NULL, NULL, NULL, bz->size - 1, zcreate_flags);
+ if (bz->umazone == NULL) {
+ busdma_bufalloc_destroy(ba);
+ return (NULL);
+ }
+ if (alloc_func != NULL)
+ uma_zone_set_allocf(bz->umazone, alloc_func);
+ if (free_func != NULL)
+ uma_zone_set_freef(bz->umazone, free_func);
+ ++ba->num_zones;
+ }
+
+ return (ba);
+}
+
+void
+busdma_bufalloc_destroy(busdma_bufalloc_t ba)
+{
+ struct busdma_bufzone *bz;
+ int i;
+
+ if (ba == NULL)
+ return;
+
+ for (i = 0, bz = ba->buf_zones; i < ba->num_zones; ++i, ++bz) {
+ uma_zdestroy(bz->umazone);
+ }
+
+ free(ba, M_DEVBUF);
+}
+
+struct busdma_bufzone *
+busdma_bufalloc_findzone(busdma_bufalloc_t ba, bus_size_t size)
+{
+ struct busdma_bufzone *bz;
+ int i;
+
+ if (size > MAX_ZONE_BUFSIZE)
+ return (NULL);
+
+ for (i = 0, bz = ba->buf_zones; i < ba->num_zones; ++i, ++bz) {
+ if (bz->size >= size)
+ return (bz);
+ }
+
+ panic("Didn't find a buffer zone of the right size");
+}
+
+void *
+busdma_bufalloc_alloc_uncacheable(uma_zone_t zone, vm_size_t size,
+ uint8_t *pflag, int wait)
+{
+#ifdef VM_MEMATTR_UNCACHEABLE
+
+ /* Inform UMA that this allocator uses kernel_arena/object. */
+ *pflag = UMA_SLAB_KERNEL;
+
+ return ((void *)kmem_alloc_attr(kernel_arena, size, wait, 0,
+ BUS_SPACE_MAXADDR, VM_MEMATTR_UNCACHEABLE));
+
+#else
+
+ panic("VM_MEMATTR_UNCACHEABLE unavailable");
+
+#endif /* VM_MEMATTR_UNCACHEABLE */
+}
+
+void
+busdma_bufalloc_free_uncacheable(void *item, vm_size_t size, uint8_t pflag)
+{
+
+ kmem_free(kernel_arena, (vm_offset_t)item, size);
+}
+
Property changes on: trunk/sys/kern/subr_busdma_bufalloc.c
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Modified: trunk/sys/kern/subr_clock.c
===================================================================
--- trunk/sys/kern/subr_clock.c 2018-05-25 20:59:46 UTC (rev 9949)
+++ trunk/sys/kern/subr_clock.c 2018-05-25 21:07:09 UTC (rev 9950)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
/*-
* Copyright (c) 1988 University of Utah.
* Copyright (c) 1982, 1990, 1993
@@ -39,7 +40,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$MidnightBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/kern/subr_clock.c 275932 2014-12-19 09:34:14Z kib $");
#include <sys/param.h>
#include <sys/systm.h>
@@ -46,6 +47,7 @@
#include <sys/kernel.h>
#include <sys/bus.h>
#include <sys/clock.h>
+#include <sys/limits.h>
#include <sys/sysctl.h>
#include <sys/timetc.h>
@@ -132,7 +134,6 @@
int
clock_ct_to_ts(struct clocktime *ct, struct timespec *ts)
{
- time_t secs;
int i, year, days;
year = ct->year;
@@ -147,7 +148,7 @@
if (ct->mon < 1 || ct->mon > 12 || ct->day < 1 ||
ct->day > days_in_month(year, ct->mon) ||
ct->hour > 23 || ct->min > 59 || ct->sec > 59 ||
- ct->year > 2037) { /* time_t overflow */
+ (sizeof(time_t) == 4 && year > 2037)) { /* time_t overflow */
if (ct_debug)
printf(" = EINVAL\n");
return (EINVAL);
@@ -166,11 +167,10 @@
days += days_in_month(year, i);
days += (ct->day - 1);
- /* Add hours, minutes, seconds. */
- secs = ((days * 24 + ct->hour) * 60 + ct->min) * 60 + ct->sec;
+ ts->tv_sec = (((time_t)days * 24 + ct->hour) * 60 + ct->min) * 60 +
+ ct->sec;
+ ts->tv_nsec = ct->nsec;
- ts->tv_sec = secs;
- ts->tv_nsec = ct->nsec;
if (ct_debug)
printf(" = %ld.%09ld\n", (long)ts->tv_sec, (long)ts->tv_nsec);
return (0);
Added: trunk/sys/kern/subr_counter.c
===================================================================
--- trunk/sys/kern/subr_counter.c (rev 0)
+++ trunk/sys/kern/subr_counter.c 2018-05-25 21:07:09 UTC (rev 9950)
@@ -0,0 +1,97 @@
+/* $MidnightBSD$ */
+/*-
+ * Copyright (c) 2012 Gleb Smirnoff <glebius at FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: stable/10/sys/kern/subr_counter.c 262739 2014-03-04 14:46:30Z glebius $");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/lock.h>
+#include <sys/mutex.h>
+#include <sys/proc.h>
+#include <sys/sched.h>
+#include <sys/smp.h>
+#include <sys/sysctl.h>
+#include <vm/uma.h>
+
+#define IN_SUBR_COUNTER_C
+#include <sys/counter.h>
+
+void
+counter_u64_zero(counter_u64_t c)
+{
+
+ counter_u64_zero_inline(c);
+}
+
+uint64_t
+counter_u64_fetch(counter_u64_t c)
+{
+
+ return (counter_u64_fetch_inline(c));
+}
+
+counter_u64_t
+counter_u64_alloc(int flags)
+{
+ counter_u64_t r;
+
+ r = uma_zalloc(pcpu_zone_64, flags);
+ if (r != NULL)
+ counter_u64_zero(r);
+
+ return (r);
+}
+
+void
+counter_u64_free(counter_u64_t c)
+{
+
+ uma_zfree(pcpu_zone_64, c);
+}
+
+int
+sysctl_handle_counter_u64(SYSCTL_HANDLER_ARGS)
+{
+ uint64_t out;
+ int error;
+
+ out = counter_u64_fetch(*(counter_u64_t *)arg1);
+
+ error = SYSCTL_OUT(req, &out, sizeof(uint64_t));
+
+ if (error || !req->newptr)
+ return (error);
+
+ /*
+ * Any write attempt to a counter zeroes it.
+ */
+ counter_u64_zero(*(counter_u64_t *)arg1);
+
+ return (0);
+}
Property changes on: trunk/sys/kern/subr_counter.c
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Modified: trunk/sys/kern/subr_devstat.c
===================================================================
--- trunk/sys/kern/subr_devstat.c 2018-05-25 20:59:46 UTC (rev 9949)
+++ trunk/sys/kern/subr_devstat.c 2018-05-25 21:07:09 UTC (rev 9950)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
/*-
* Copyright (c) 1997, 1998, 1999 Kenneth D. Merry.
* All rights reserved.
@@ -27,7 +28,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$MidnightBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/kern/subr_devstat.c 302234 2016-06-27 21:50:30Z bdrewery $");
#include "opt_kdtrace.h"
@@ -36,6 +37,7 @@
#include <sys/systm.h>
#include <sys/bio.h>
#include <sys/devicestat.h>
+#include <sys/sdt.h>
#include <sys/sysctl.h>
#include <sys/malloc.h>
#include <sys/lock.h>
@@ -46,58 +48,22 @@
#include <machine/atomic.h>
-#ifdef KDTRACE_HOOKS
-#include <sys/dtrace_bsd.h>
+SDT_PROVIDER_DEFINE(io);
-dtrace_io_start_probe_func_t dtrace_io_start_probe;
-dtrace_io_done_probe_func_t dtrace_io_done_probe;
-dtrace_io_wait_start_probe_func_t dtrace_io_wait_start_probe;
-dtrace_io_wait_done_probe_func_t dtrace_io_wait_done_probe;
+SDT_PROBE_DEFINE2(io, , , start, "struct bio *", "struct devstat *");
+SDT_PROBE_DEFINE2(io, , , done, "struct bio *", "struct devstat *");
+SDT_PROBE_DEFINE2(io, , , wait__start, "struct bio *",
+ "struct devstat *");
+SDT_PROBE_DEFINE2(io, , , wait__done, "struct bio *",
+ "struct devstat *");
-uint32_t dtio_start_id;
-uint32_t dtio_done_id;
-uint32_t dtio_wait_start_id;
-uint32_t dtio_wait_done_id;
+#define DTRACE_DEVSTAT_START() SDT_PROBE2(io, , , start, NULL, ds)
+#define DTRACE_DEVSTAT_BIO_START() SDT_PROBE2(io, , , start, bp, ds)
+#define DTRACE_DEVSTAT_DONE() SDT_PROBE2(io, , , done, NULL, ds)
+#define DTRACE_DEVSTAT_BIO_DONE() SDT_PROBE2(io, , , done, bp, ds)
+#define DTRACE_DEVSTAT_WAIT_START() SDT_PROBE2(io, , , wait__start, NULL, ds)
+#define DTRACE_DEVSTAT_WAIT_DONE() SDT_PROBE2(io, , , wait__done, NULL, ds)
-#define DTRACE_DEVSTAT_START() \
- if (dtrace_io_start_probe != NULL) \
- (*dtrace_io_start_probe)(dtio_start_id, NULL, ds);
-
-#define DTRACE_DEVSTAT_BIO_START() \
- if (dtrace_io_start_probe != NULL) \
- (*dtrace_io_start_probe)(dtio_start_id, bp, ds);
-
-#define DTRACE_DEVSTAT_DONE() \
- if (dtrace_io_done_probe != NULL) \
- (*dtrace_io_done_probe)(dtio_done_id, NULL, ds);
-
-#define DTRACE_DEVSTAT_BIO_DONE() \
- if (dtrace_io_done_probe != NULL) \
- (*dtrace_io_done_probe)(dtio_done_id, bp, ds);
-
-#define DTRACE_DEVSTAT_WAIT_START() \
- if (dtrace_io_wait_start_probe != NULL) \
- (*dtrace_io_wait_start_probe)(dtio_wait_start_id, NULL, ds);
-
-#define DTRACE_DEVSTAT_WAIT_DONE() \
- if (dtrace_io_wait_done_probe != NULL) \
- (*dtrace_io_wait_done_probe)(dtio_wait_done_id, NULL, ds);
-
-#else /* ! KDTRACE_HOOKS */
-
-#define DTRACE_DEVSTAT_START()
-
-#define DTRACE_DEVSTAT_BIO_START()
-
-#define DTRACE_DEVSTAT_DONE()
-
-#define DTRACE_DEVSTAT_BIO_DONE()
-
-#define DTRACE_DEVSTAT_WAIT_START()
-
-#define DTRACE_DEVSTAT_WAIT_DONE()
-#endif /* KDTRACE_HOOKS */
-
static int devstat_num_devs;
static long devstat_generation = 1;
static int devstat_version = DEVSTAT_VERSION;
@@ -131,6 +97,7 @@
ds = devstat_alloc();
mtx_lock(&devstat_mutex);
if (unit_number == -1) {
+ ds->unit_number = unit_number;
ds->id = dev_name;
binuptime(&ds->creation_time);
devstat_generation++;
@@ -242,7 +209,7 @@
/* Remove this entry from the devstat queue */
atomic_add_acq_int(&ds->sequence1, 1);
- if (ds->id == NULL) {
+ if (ds->unit_number != -1) {
devstat_num_devs--;
STAILQ_REMOVE(devstat_head, ds, devstat, dev_links);
}
@@ -374,6 +341,14 @@
void
devstat_end_transaction_bio(struct devstat *ds, struct bio *bp)
{
+
+ devstat_end_transaction_bio_bt(ds, bp, NULL);
+}
+
+void
+devstat_end_transaction_bio_bt(struct devstat *ds, struct bio *bp,
+ struct bintime *now)
+{
devstat_trans_flags flg;
/* sanity check */
@@ -390,7 +365,7 @@
flg = DEVSTAT_NO_DATA;
devstat_end_transaction(ds, bp->bio_bcount - bp->bio_resid,
- DEVSTAT_TAG_SIMPLE, flg, NULL, &bp->bio_t0);
+ DEVSTAT_TAG_SIMPLE, flg, now, &bp->bio_t0);
DTRACE_DEVSTAT_BIO_DONE();
}
@@ -417,7 +392,7 @@
* XXX devstat_generation should really be "volatile" but that
* XXX freaks out the sysctl macro below. The places where we
* XXX change it and inspect it are bracketed in the mutex which
- * XXX guarantees us proper write barriers. I don't belive the
+ * XXX guarantees us proper write barriers. I don't believe the
* XXX compiler is allowed to optimize mygen away across calls
* XXX to other functions, so the following is belived to be safe.
*/
@@ -533,7 +508,7 @@
mtx_assert(&devstat_mutex, MA_NOTOWNED);
if (!once) {
make_dev_credf(MAKEDEV_ETERNAL | MAKEDEV_CHECKNAME,
- &devstat_cdevsw, 0, NULL, UID_ROOT, GID_WHEEL, 0440,
+ &devstat_cdevsw, 0, NULL, UID_ROOT, GID_WHEEL, 0444,
DEVSTAT_DEVICE_NAME);
once = 1;
}
@@ -603,4 +578,4 @@
}
SYSCTL_INT(_debug_sizeof, OID_AUTO, devstat, CTLFLAG_RD,
- NULL, sizeof(struct devstat), "sizeof(struct devstat)");
+ SYSCTL_NULL_INT_PTR, sizeof(struct devstat), "sizeof(struct devstat)");
Modified: trunk/sys/kern/subr_disk.c
===================================================================
--- trunk/sys/kern/subr_disk.c 2018-05-25 20:59:46 UTC (rev 9949)
+++ trunk/sys/kern/subr_disk.c 2018-05-25 21:07:09 UTC (rev 9950)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
/*-
* ----------------------------------------------------------------------------
* "THE BEER-WARE LICENSE" (Revision 42):
@@ -12,7 +13,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$MidnightBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/kern/subr_disk.c 212160 2010-09-02 19:40:28Z gibbs $");
#include "opt_geom.h"
Added: trunk/sys/kern/subr_dnvlist.c
===================================================================
--- trunk/sys/kern/subr_dnvlist.c (rev 0)
+++ trunk/sys/kern/subr_dnvlist.c 2018-05-25 21:07:09 UTC (rev 9950)
@@ -0,0 +1,129 @@
+/* $MidnightBSD$ */
+/*-
+ * Copyright (c) 2013 The FreeBSD Foundation
+ * All rights reserved.
+ *
+ * This software was developed by Pawel Jakub Dawidek under sponsorship from
+ * the FreeBSD Foundation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: stable/10/sys/kern/subr_dnvlist.c 292973 2015-12-31 03:28:14Z ngie $");
+
+#ifdef _KERNEL
+
+#include <sys/types.h>
+#include <sys/param.h>
+#include <sys/kernel.h>
+#include <sys/systm.h>
+#include <sys/malloc.h>
+
+#include <machine/stdarg.h>
+
+#else
+#include <stdarg.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdlib.h>
+#endif
+
+#include <sys/nv.h>
+#include <sys/nv_impl.h>
+
+#include <sys/dnv.h>
+
+#define DNVLIST_GET(ftype, type) \
+ftype \
+dnvlist_get_##type(const nvlist_t *nvl, const char *name, ftype defval) \
+{ \
+ \
+ if (nvlist_exists_##type(nvl, name)) \
+ return (nvlist_get_##type(nvl, name)); \
+ else \
+ return (defval); \
+}
+
+DNVLIST_GET(bool, bool)
+DNVLIST_GET(uint64_t, number)
+DNVLIST_GET(const char *, string)
+DNVLIST_GET(const nvlist_t *, nvlist)
+#ifndef _KERNEL
+DNVLIST_GET(int, descriptor)
+#endif
+
+#undef DNVLIST_GET
+
+const void *
+dnvlist_get_binary(const nvlist_t *nvl, const char *name, size_t *sizep,
+ const void *defval, size_t defsize)
+{
+ const void *value;
+
+ if (nvlist_exists_binary(nvl, name))
+ value = nvlist_get_binary(nvl, name, sizep);
+ else {
+ if (sizep != NULL)
+ *sizep = defsize;
+ value = defval;
+ }
+ return (value);
+}
+
+#define DNVLIST_TAKE(ftype, type) \
+ftype \
+dnvlist_take_##type(nvlist_t *nvl, const char *name, ftype defval) \
+{ \
+ \
+ if (nvlist_exists_##type(nvl, name)) \
+ return (nvlist_take_##type(nvl, name)); \
+ else \
+ return (defval); \
+}
+
+DNVLIST_TAKE(bool, bool)
+DNVLIST_TAKE(uint64_t, number)
+DNVLIST_TAKE(char *, string)
+DNVLIST_TAKE(nvlist_t *, nvlist)
+#ifndef _KERNEL
+DNVLIST_TAKE(int, descriptor)
+#endif
+
+#undef DNVLIST_TAKE
+
+void *
+dnvlist_take_binary(nvlist_t *nvl, const char *name, size_t *sizep,
+ void *defval, size_t defsize)
+{
+ void *value;
+
+ if (nvlist_exists_binary(nvl, name))
+ value = nvlist_take_binary(nvl, name, sizep);
+ else {
+ if (sizep != NULL)
+ *sizep = defsize;
+ value = defval;
+ }
+ return (value);
+}
+
Property changes on: trunk/sys/kern/subr_dnvlist.c
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Modified: trunk/sys/kern/subr_dummy_vdso_tc.c
===================================================================
--- trunk/sys/kern/subr_dummy_vdso_tc.c 2018-05-25 20:59:46 UTC (rev 9949)
+++ trunk/sys/kern/subr_dummy_vdso_tc.c 2018-05-25 21:07:09 UTC (rev 9950)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
/*-
* Copyright 2012 Konstantin Belousov <kib at FreeBSD.ORG>.
* All rights reserved.
@@ -25,7 +26,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/kern/subr_dummy_vdso_tc.c 237433 2012-06-22 07:06:40Z kib $");
#include "opt_compat.h"
Modified: trunk/sys/kern/subr_eventhandler.c
===================================================================
--- trunk/sys/kern/subr_eventhandler.c 2018-05-25 20:59:46 UTC (rev 9949)
+++ trunk/sys/kern/subr_eventhandler.c 2018-05-25 21:07:09 UTC (rev 9950)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
/*-
* Copyright (c) 1999 Michael Smith <msmith at freebsd.org>
* All rights reserved.
@@ -25,7 +26,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$MidnightBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/kern/subr_eventhandler.c 205345 2010-03-19 19:51:03Z bz $");
#include <sys/param.h>
#include <sys/kernel.h>
Modified: trunk/sys/kern/subr_fattime.c
===================================================================
--- trunk/sys/kern/subr_fattime.c 2018-05-25 20:59:46 UTC (rev 9949)
+++ trunk/sys/kern/subr_fattime.c 2018-05-25 21:07:09 UTC (rev 9950)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
/*-
* Copyright (c) 2006 Poul-Henning Kamp
* All rights reserved.
@@ -23,7 +24,7 @@
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
- * $MidnightBSD$
+ * $FreeBSD: stable/10/sys/kern/subr_fattime.c 266368 2014-05-17 22:03:44Z ian $
*
* Convert MS-DOS FAT format timestamps to and from unix timespecs
*
@@ -49,9 +50,9 @@
* "New Technology". Anyway...
*
* The 'utc' argument determines if the resulting FATTIME timestamp
- * should b on the UTC or local timezone calendar.
+ * should be on the UTC or local timezone calendar.
*
- * The conversion functions below cut time into four-year leap-second
+ * The conversion functions below cut time into four-year leap-year
* cycles rather than single years and uses table lookups inside those
* cycles to get the months and years sorted out.
*
Modified: trunk/sys/kern/subr_firmware.c
===================================================================
--- trunk/sys/kern/subr_firmware.c 2018-05-25 20:59:46 UTC (rev 9949)
+++ trunk/sys/kern/subr_firmware.c 2018-05-25 21:07:09 UTC (rev 9950)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
/*-
* Copyright (c) 2005-2008, Sam Leffler <sam at errno.com>
* All rights reserved.
@@ -25,7 +26,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$MidnightBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/kern/subr_firmware.c 237546 2012-06-25 05:41:16Z kevlo $");
#include <sys/param.h>
#include <sys/kernel.h>
@@ -175,7 +176,10 @@
unsigned int version, const struct firmware *parent)
{
struct priv_fw *match, *frp;
+ char *str;
+ str = strdup(imagename, M_TEMP);
+
mtx_lock(&firmware_mtx);
/*
* Do a lookup to make sure the name is unique or find a free slot.
@@ -185,6 +189,7 @@
mtx_unlock(&firmware_mtx);
printf("%s: image %s already registered!\n",
__func__, imagename);
+ free(str, M_TEMP);
return NULL;
}
if (frp == NULL) {
@@ -191,10 +196,11 @@
mtx_unlock(&firmware_mtx);
printf("%s: cannot register image %s, firmware table full!\n",
__func__, imagename);
+ free(str, M_TEMP);
return NULL;
}
bzero(frp, sizeof(*frp)); /* start from a clean record */
- frp->fw.name = imagename;
+ frp->fw.name = str;
frp->fw.data = data;
frp->fw.datasize = datasize;
frp->fw.version = version;
@@ -230,7 +236,7 @@
err = 0;
} else if (fp->refcnt != 0) { /* cannot unregister */
err = EBUSY;
- } else {
+ } else {
linker_file_t x = fp->file; /* save value */
/*
@@ -238,6 +244,7 @@
* do not forget anything. Then restore 'file' which is
* non-null for autoloaded images.
*/
+ free((void *) (uintptr_t) fp->fw.name, M_TEMP);
bzero(fp, sizeof(struct priv_fw));
fp->file = x;
err = 0;
Modified: trunk/sys/kern/subr_hash.c
===================================================================
--- trunk/sys/kern/subr_hash.c 2018-05-25 20:59:46 UTC (rev 9949)
+++ trunk/sys/kern/subr_hash.c 2018-05-25 21:07:09 UTC (rev 9950)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
/*-
* Copyright (c) 1982, 1986, 1991, 1993
* The Regents of the University of California. All rights reserved.
@@ -35,7 +36,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$MidnightBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/kern/subr_hash.c 230486 2012-01-23 16:31:46Z glebius $");
#include <sys/param.h>
#include <sys/systm.h>
Modified: trunk/sys/kern/subr_hints.c
===================================================================
--- trunk/sys/kern/subr_hints.c 2018-05-25 20:59:46 UTC (rev 9949)
+++ trunk/sys/kern/subr_hints.c 2018-05-25 21:07:09 UTC (rev 9950)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
/*-
* Copyright (c) 2000,2001 Peter Wemm <peter at FreeBSD.org>
* All rights reserved.
@@ -25,11 +26,13 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$MidnightBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/kern/subr_hints.c 295131 2016-02-01 23:07:31Z jhb $");
#include <sys/param.h>
#include <sys/lock.h>
+#include <sys/malloc.h>
#include <sys/mutex.h>
+#include <sys/sysctl.h>
#include <sys/systm.h>
#include <sys/bus.h>
@@ -42,6 +45,85 @@
static char *hintp;
/*
+ * Define kern.hintmode sysctl, which only accept value 2, that cause to
+ * switch from Static KENV mode to Dynamic KENV. So systems that have hints
+ * compiled into kernel will be able to see/modify KENV (and hints too).
+ */
+
+static int
+sysctl_hintmode(SYSCTL_HANDLER_ARGS)
+{
+ const char *cp;
+ char *line, *eq;
+ int eqidx, error, from_kenv, i, value;
+
+ from_kenv = 0;
+ cp = kern_envp;
+ value = hintmode;
+
+ /* Fetch candidate for new hintmode value */
+ error = sysctl_handle_int(oidp, &value, 0, req);
+ if (error || req->newptr == NULL)
+ return (error);
+
+ if (value != 2)
+ /* Only accept swithing to hintmode 2 */
+ return (EINVAL);
+
+ /* Migrate from static to dynamic hints */
+ switch (hintmode) {
+ case 0:
+ if (dynamic_kenv) {
+ /*
+ * Already here. But assign hintmode to 2, to not
+ * check it in the future.
+ */
+ hintmode = 2;
+ return (0);
+ }
+ from_kenv = 1;
+ cp = kern_envp;
+ break;
+ case 1:
+ cp = static_hints;
+ break;
+ case 2:
+ /* Nothing to do, hintmode already 2 */
+ return (0);
+ }
+
+ while (cp) {
+ i = strlen(cp);
+ if (i == 0)
+ break;
+ if (from_kenv) {
+ if (strncmp(cp, "hint.", 5) != 0)
+ /* kenv can have not only hints */
+ continue;
+ }
+ eq = strchr(cp, '=');
+ if (eq == NULL)
+ /* Bad hint value */
+ continue;
+ eqidx = eq - cp;
+
+ line = malloc(i+1, M_TEMP, M_WAITOK);
+ strcpy(line, cp);
+ line[eqidx] = '\0';
+ setenv(line, line + eqidx + 1);
+ free(line, M_TEMP);
+ cp += i + 1;
+ }
+
+ hintmode = value;
+ use_kenv = 1;
+ return (0);
+}
+
+SYSCTL_PROC(_kern, OID_AUTO, hintmode, CTLTYPE_INT|CTLFLAG_RW,
+ &hintmode, 0, sysctl_hintmode, "I", "Get/set current hintmode");
+
+/*
* Evil wildcarding resource string lookup.
* This walks the supplied env string table and returns a match.
* The start point can be remembered for incremental searches.
@@ -129,12 +211,11 @@
if (strncmp(cp, "hint.", 5) != 0)
hit = 0;
else
- n = sscanf(cp, "hint.%32[^.].%d.%32[^=]=%128s",
+ n = sscanf(cp, "hint.%32[^.].%d.%32[^=]=%127s",
r_name, &r_unit, r_resname, r_value);
if (hit && n != 4) {
printf("CONFIG: invalid hint '%s'\n", cp);
- /* XXX: abuse bogus index() declaration */
- p = index(cp, 'h');
+ p = strchr(cp, 'h');
*p = 'H';
hit = 0;
}
@@ -172,18 +253,18 @@
s = cp;
/* This is a bit of a hack, but at least is reentrant */
/* Note that it returns some !unterminated! strings. */
- s = index(s, '.') + 1; /* start of device */
+ s = strchr(s, '.') + 1; /* start of device */
if (ret_name)
*ret_name = s;
- s = index(s, '.') + 1; /* start of unit */
+ s = strchr(s, '.') + 1; /* start of unit */
if (ret_namelen && ret_name)
*ret_namelen = s - *ret_name - 1; /* device length */
if (ret_unit)
*ret_unit = r_unit;
- s = index(s, '.') + 1; /* start of resname */
+ s = strchr(s, '.') + 1; /* start of resname */
if (ret_resname)
*ret_resname = s;
- s = index(s, '=') + 1; /* start of value */
+ s = strchr(s, '=') + 1; /* start of value */
if (ret_resnamelen && ret_resname)
*ret_resnamelen = s - *ret_resname - 1; /* value len */
if (ret_value)
@@ -381,3 +462,31 @@
return (0);
return (value);
}
+
+/*
+ * Clear a value associated with a device by removing it from
+ * the kernel environment. This only removes a hint for an
+ * exact unit.
+ */
+int
+resource_unset_value(const char *name, int unit, const char *resname)
+{
+ char varname[128];
+ const char *retname, *retvalue;
+ int error, line;
+ size_t len;
+
+ line = 0;
+ error = resource_find(&line, NULL, name, &unit, resname, NULL,
+ &retname, NULL, NULL, NULL, NULL, &retvalue);
+ if (error)
+ return (error);
+
+ retname -= strlen("hint.");
+ len = retvalue - retname - 1;
+ if (len > sizeof(varname) - 1)
+ return (ENAMETOOLONG);
+ memcpy(varname, retname, len);
+ varname[len] = '\0';
+ return (unsetenv(varname));
+}
Modified: trunk/sys/kern/subr_kdb.c
===================================================================
--- trunk/sys/kern/subr_kdb.c 2018-05-25 20:59:46 UTC (rev 9949)
+++ trunk/sys/kern/subr_kdb.c 2018-05-25 21:07:09 UTC (rev 9950)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
/*-
* Copyright (c) 2004 The FreeBSD Project
* All rights reserved.
@@ -25,7 +26,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$MidnightBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/kern/subr_kdb.c 325460 2017-11-05 22:34:27Z ngie $");
#include "opt_kdb.h"
#include "opt_stack.h"
@@ -91,25 +92,30 @@
SYSCTL_PROC(_debug_kdb, OID_AUTO, current, CTLTYPE_STRING | CTLFLAG_RW, NULL,
0, kdb_sysctl_current, "A", "currently selected KDB backend");
-SYSCTL_PROC(_debug_kdb, OID_AUTO, enter, CTLTYPE_INT | CTLFLAG_RW, NULL, 0,
+SYSCTL_PROC(_debug_kdb, OID_AUTO, enter,
+ CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_SECURE, NULL, 0,
kdb_sysctl_enter, "I", "set to enter the debugger");
-SYSCTL_PROC(_debug_kdb, OID_AUTO, panic, CTLTYPE_INT | CTLFLAG_RW, NULL, 0,
+SYSCTL_PROC(_debug_kdb, OID_AUTO, panic,
+ CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_SECURE, NULL, 0,
kdb_sysctl_panic, "I", "set to panic the kernel");
-SYSCTL_PROC(_debug_kdb, OID_AUTO, trap, CTLTYPE_INT | CTLFLAG_RW, NULL, 0,
+SYSCTL_PROC(_debug_kdb, OID_AUTO, trap,
+ CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_SECURE, NULL, 0,
kdb_sysctl_trap, "I", "set to cause a page fault via data access");
-SYSCTL_PROC(_debug_kdb, OID_AUTO, trap_code, CTLTYPE_INT | CTLFLAG_RW, NULL, 0,
+SYSCTL_PROC(_debug_kdb, OID_AUTO, trap_code,
+ CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_SECURE, NULL, 0,
kdb_sysctl_trap_code, "I", "set to cause a page fault via code access");
-SYSCTL_INT(_debug_kdb, OID_AUTO, break_to_debugger, CTLTYPE_INT | CTLFLAG_RW |
- CTLFLAG_TUN, &kdb_break_to_debugger, 0, "Enable break to debugger");
+SYSCTL_INT(_debug_kdb, OID_AUTO, break_to_debugger,
+ CTLFLAG_RWTUN | CTLFLAG_SECURE,
+ &kdb_break_to_debugger, 0, "Enable break to debugger");
TUNABLE_INT("debug.kdb.break_to_debugger", &kdb_break_to_debugger);
-SYSCTL_INT(_debug_kdb, OID_AUTO, alt_break_to_debugger, CTLTYPE_INT |
- CTLFLAG_RW | CTLFLAG_TUN, &kdb_alt_break_to_debugger, 0,
- "Enable alternative break to debugger");
+SYSCTL_INT(_debug_kdb, OID_AUTO, alt_break_to_debugger,
+ CTLFLAG_RWTUN | CTLFLAG_SECURE,
+ &kdb_alt_break_to_debugger, 0, "Enable alternative break to debugger");
TUNABLE_INT("debug.kdb.alt_break_to_debugger", &kdb_alt_break_to_debugger);
/*
@@ -498,6 +504,8 @@
if (!kdb_active || kdb_jmpbufp == NULL)
return;
+ printf("KDB: reentering\n");
+ kdb_backtrace();
longjmp(kdb_jmpbufp, 1);
/* NOTREACHED */
}
@@ -508,12 +516,12 @@
struct pcb *
kdb_thr_ctx(struct thread *thr)
-{
+{
#if defined(SMP) && defined(KDB_STOPPEDPCB)
struct pcpu *pc;
#endif
-
- if (thr == curthread)
+
+ if (thr == curthread)
return (&kdb_pcb);
#if defined(SMP) && defined(KDB_STOPPEDPCB)
Modified: trunk/sys/kern/subr_kobj.c
===================================================================
--- trunk/sys/kern/subr_kobj.c 2018-05-25 20:59:46 UTC (rev 9949)
+++ trunk/sys/kern/subr_kobj.c 2018-05-25 21:07:09 UTC (rev 9950)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
/*-
* Copyright (c) 2000,2003 Doug Rabson
* All rights reserved.
@@ -25,7 +26,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$MidnightBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/kern/subr_kobj.c 318275 2017-05-14 14:21:11Z marius $");
#include <sys/param.h>
#include <sys/kernel.h>
@@ -83,7 +84,7 @@
* desc pointer is NULL, it is guaranteed never to match any read
* descriptors.
*/
-static struct kobj_method null_method = {
+static const struct kobj_method null_method = {
0, 0,
};
@@ -213,19 +214,11 @@
{
kobj_method_t *ce;
-#ifdef KOBJ_STATS
- /*
- * Correct for the 'hit' assumption in KOBJOPLOOKUP and record
- * a 'miss'.
- */
- kobj_lookup_hits--;
- kobj_lookup_misses++;
-#endif
-
ce = kobj_lookup_method_mi(cls, desc);
if (!ce)
- ce = desc->deflt;
- *cep = ce;
+ ce = &desc->deflt;
+ if (cep)
+ *cep = ce;
return ce;
}
Modified: trunk/sys/kern/subr_lock.c
===================================================================
--- trunk/sys/kern/subr_lock.c 2018-05-25 20:59:46 UTC (rev 9949)
+++ trunk/sys/kern/subr_lock.c 2018-05-25 21:07:09 UTC (rev 9950)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
/*-
* Copyright (c) 2006 John Baldwin <jhb at FreeBSD.org>
* All rights reserved.
@@ -10,9 +11,6 @@
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
- * 3. Neither the name of the author nor the names of any co-contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
@@ -33,7 +31,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$MidnightBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/kern/subr_lock.c 323870 2017-09-21 19:24:11Z marius $");
#include "opt_ddb.h"
#include "opt_mprof.h"
@@ -58,6 +56,7 @@
#endif
#include <machine/cpufunc.h>
+#include <machine/cpu.h>
CTASSERT(LOCK_CLASS_MAX == 15);
@@ -66,6 +65,7 @@
&lock_class_mtx_sleep,
&lock_class_sx,
&lock_class_rm,
+ &lock_class_rm_sleepable,
&lock_class_rw,
&lock_class_lockmgr,
};
@@ -77,8 +77,8 @@
int i;
/* Check for double-init and zero object. */
- KASSERT(!lock_initalized(lock), ("lock \"%s\" %p already initialized",
- name, lock));
+ KASSERT(flags & LO_NEW || !lock_initalized(lock),
+ ("lock \"%s\" %p already initialized", name, lock));
/* Look up lock class to find its index. */
for (i = 0; i < LOCK_CLASS_MAX; i++)
@@ -105,6 +105,34 @@
lock->lo_flags &= ~LO_INITIALIZED;
}
+void
+lock_delay(struct lock_delay_arg *la)
+{
+ u_int i, delay, backoff, min, max;
+ struct lock_delay_config *lc = la->config;
+
+ delay = la->delay;
+
+ if (delay == 0)
+ delay = lc->initial;
+ else {
+ delay += lc->step;
+ max = lc->max;
+ if (delay > max)
+ delay = max;
+ }
+
+ backoff = cpu_ticks() % delay;
+ min = lc->min;
+ if (backoff < min)
+ backoff = min;
+ for (i = 0; i < backoff; i++)
+ cpu_spinwait();
+
+ la->delay = delay;
+ la->spin_cnt += backoff;
+}
+
#ifdef DDB
DB_SHOW_COMMAND(lock, db_show_lock)
{
@@ -240,34 +268,13 @@
}
SYSINIT(lockprof, SI_SUB_SMP, SI_ORDER_ANY, lock_prof_init, NULL);
-/*
- * To be certain that lock profiling has idled on all cpus before we
- * reset, we schedule the resetting thread on all active cpus. Since
- * all operations happen within critical sections we can be sure that
- * it is safe to zero the profiling structures.
- */
static void
-lock_prof_idle(void)
-{
- struct thread *td;
- int cpu;
-
- td = curthread;
- thread_lock(td);
- CPU_FOREACH(cpu) {
- sched_bind(td, cpu);
- }
- sched_unbind(td);
- thread_unlock(td);
-}
-
-static void
lock_prof_reset_wait(void)
{
/*
- * Spin relinquishing our cpu so that lock_prof_idle may
- * run on it.
+ * Spin relinquishing our cpu so that quiesce_all_cpus may
+ * complete.
*/
while (lock_prof_resetting)
sched_relinquish(curthread);
@@ -289,7 +296,7 @@
atomic_store_rel_int(&lock_prof_resetting, 1);
enabled = lock_prof_enable;
lock_prof_enable = 0;
- lock_prof_idle();
+ quiesce_all_cpus("profreset", 0);
/*
* Some objects may have migrated between CPUs. Clear all links
* before we zero the structures. Some items may still be linked
@@ -401,7 +408,7 @@
"max", "wait_max", "total", "wait_total", "count", "avg", "wait_avg", "cnt_hold", "cnt_lock", "name");
enabled = lock_prof_enable;
lock_prof_enable = 0;
- lock_prof_idle();
+ quiesce_all_cpus("profstat", 0);
t = ticks;
for (cpu = 0; cpu <= mp_maxid; cpu++) {
if (lp_cpu[cpu] == NULL)
Modified: trunk/sys/kern/subr_log.c
===================================================================
--- trunk/sys/kern/subr_log.c 2018-05-25 20:59:46 UTC (rev 9949)
+++ trunk/sys/kern/subr_log.c 2018-05-25 21:07:09 UTC (rev 9950)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
/*-
* Copyright (c) 1982, 1986, 1993
* The Regents of the University of California. All rights reserved.
@@ -34,7 +35,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$MidnightBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/kern/subr_log.c 247798 2013-03-04 16:07:55Z davide $");
#include <sys/param.h>
#include <sys/systm.h>
@@ -117,8 +118,8 @@
return (EBUSY);
}
log_open = 1;
- callout_reset(&logsoftc.sc_callout, hz / log_wakeups_per_second,
- logtimeout, NULL);
+ callout_reset_sbt(&logsoftc.sc_callout,
+ SBT_1S / log_wakeups_per_second, 0, logtimeout, NULL, C_PREL(1));
mtx_unlock(&msgbuf_lock);
fsetown(td->td_proc->p_pid, &logsoftc.sc_sigio); /* signal process only */
@@ -233,15 +234,8 @@
if (!log_open)
return;
- if (log_wakeups_per_second < 1) {
- printf("syslog wakeup is less than one. Adjusting to 1.\n");
- log_wakeups_per_second = 1;
- }
- if (msgbuftrigger == 0) {
- callout_schedule(&logsoftc.sc_callout,
- hz / log_wakeups_per_second);
- return;
- }
+ if (msgbuftrigger == 0)
+ goto done;
msgbuftrigger = 0;
selwakeuppri(&logsoftc.sc_selp, LOG_RDPRI);
KNOTE_LOCKED(&logsoftc.sc_selp.si_note, 0);
@@ -248,7 +242,13 @@
if ((logsoftc.sc_state & LOG_ASYNC) && logsoftc.sc_sigio != NULL)
pgsigio(&logsoftc.sc_sigio, SIGIO, 0);
cv_broadcastpri(&log_wakeup, LOG_RDPRI);
- callout_schedule(&logsoftc.sc_callout, hz / log_wakeups_per_second);
+done:
+ if (log_wakeups_per_second < 1) {
+ printf("syslog wakeup is less than one. Adjusting to 1.\n");
+ log_wakeups_per_second = 1;
+ }
+ callout_reset_sbt(&logsoftc.sc_callout,
+ SBT_1S / log_wakeups_per_second, 0, logtimeout, NULL, C_PREL(1));
}
/*ARGSUSED*/
Modified: trunk/sys/kern/subr_mbpool.c
===================================================================
--- trunk/sys/kern/subr_mbpool.c 2018-05-25 20:59:46 UTC (rev 9949)
+++ trunk/sys/kern/subr_mbpool.c 2018-05-25 21:07:09 UTC (rev 9950)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
/*-
* Copyright (c) 2003
* Fraunhofer Institute for Open Communication Systems (FhG Fokus).
@@ -28,7 +29,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$MidnightBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/kern/subr_mbpool.c 302234 2016-06-27 21:50:30Z bdrewery $");
#include <sys/param.h>
#include <sys/lock.h>
@@ -40,6 +41,7 @@
#include <machine/bus.h>
+#include <sys/mbuf.h>
#include <sys/mbpool.h>
MODULE_VERSION(libmbpool, 1);
@@ -209,16 +211,13 @@
pg = &p->pages[p->npages];
error = bus_dmamem_alloc(p->dmat, &pg->va, BUS_DMA_NOWAIT, &pg->map);
- if (error != 0) {
- free(pg, M_MBPOOL);
+ if (error != 0)
return;
- }
error = bus_dmamap_load(p->dmat, pg->map, pg->va, p->page_size,
mbp_callback, &pg->phy, 0);
if (error != 0) {
bus_dmamem_free(p->dmat, pg->va, pg->map);
- free(pg, M_MBPOOL);
return;
}
@@ -282,14 +281,16 @@
/*
* Mbuf system external mbuf free routine
*/
-void
-mbp_ext_free(void *buf, void *arg)
+int
+mbp_ext_free(struct mbuf *m, void *buf, void *arg)
{
mbp_free(arg, buf);
+
+ return (EXT_FREE_OK);
}
/*
- * Free all buffers that are marked as beeing on the card
+ * Free all buffers that are marked as being on the card
*/
void
mbp_card_free(struct mbpool *p)
Modified: trunk/sys/kern/subr_mchain.c
===================================================================
--- trunk/sys/kern/subr_mchain.c 2018-05-25 20:59:46 UTC (rev 9949)
+++ trunk/sys/kern/subr_mchain.c 2018-05-25 21:07:09 UTC (rev 9950)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
/*-
* Copyright (c) 2000, 2001 Boris Popov
* All rights reserved.
@@ -28,7 +29,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$MidnightBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/kern/subr_mchain.c 302234 2016-06-27 21:50:30Z bdrewery $");
#include <sys/param.h>
#include <sys/systm.h>
@@ -59,10 +60,10 @@
{
struct mbuf *m;
- m = m_gethdr(M_WAIT, MT_DATA);
+ m = m_gethdr(M_WAITOK, MT_DATA);
m->m_len = 0;
mb_initm(mbp, m);
- return 0;
+ return (0);
}
void
@@ -89,19 +90,19 @@
m = mbp->mb_top;
mbp->mb_top = NULL;
- return m;
+ return (m);
}
int
mb_fixhdr(struct mbchain *mbp)
{
- return mbp->mb_top->m_pkthdr.len = m_fixhdr(mbp->mb_top);
+ return (mbp->mb_top->m_pkthdr.len = m_fixhdr(mbp->mb_top));
}
/*
* Check if object of size 'size' fit to the current position and
* allocate new mbuf if not. Advance pointers and increase length of mbuf(s).
- * Return pointer to the object placeholder or NULL if any error occured.
+ * Return pointer to the object placeholder or NULL if any error occurred.
* Note: size should be <= MLEN
*/
caddr_t
@@ -114,7 +115,7 @@
panic("mb_reserve: size = %d\n", size);
m = mbp->mb_cur;
if (mbp->mb_mleft < size) {
- mn = m_get(M_WAIT, MT_DATA);
+ mn = m_get(M_WAITOK, MT_DATA);
mbp->mb_cur = m->m_next = mn;
m = mn;
m->m_len = 0;
@@ -124,7 +125,7 @@
mbp->mb_count += size;
bpos = mtod(m, caddr_t) + m->m_len;
m->m_len += size;
- return bpos;
+ return (bpos);
}
int
@@ -131,21 +132,21 @@
mb_put_padbyte(struct mbchain *mbp)
{
caddr_t dst;
- char x = 0;
+ uint8_t x = 0;
dst = mtod(mbp->mb_cur, caddr_t) + mbp->mb_cur->m_len;
- /* only add padding if address is odd */
+ /* Only add padding if address is odd */
if ((unsigned long)dst & 1)
- return mb_put_mem(mbp, (caddr_t)&x, 1, MB_MSYSTEM);
+ return (mb_put_mem(mbp, (caddr_t)&x, sizeof(x), MB_MSYSTEM));
else
- return 0;
+ return (0);
}
int
mb_put_uint8(struct mbchain *mbp, uint8_t x)
{
- return mb_put_mem(mbp, (caddr_t)&x, sizeof(x), MB_MSYSTEM);
+ return (mb_put_mem(mbp, (caddr_t)&x, sizeof(x), MB_MSYSTEM));
}
int
@@ -152,7 +153,7 @@
mb_put_uint16be(struct mbchain *mbp, uint16_t x)
{
x = htobe16(x);
- return mb_put_mem(mbp, (caddr_t)&x, sizeof(x), MB_MSYSTEM);
+ return (mb_put_mem(mbp, (caddr_t)&x, sizeof(x), MB_MSYSTEM));
}
int
@@ -159,7 +160,7 @@
mb_put_uint16le(struct mbchain *mbp, uint16_t x)
{
x = htole16(x);
- return mb_put_mem(mbp, (caddr_t)&x, sizeof(x), MB_MSYSTEM);
+ return (mb_put_mem(mbp, (caddr_t)&x, sizeof(x), MB_MSYSTEM));
}
int
@@ -166,7 +167,7 @@
mb_put_uint32be(struct mbchain *mbp, uint32_t x)
{
x = htobe32(x);
- return mb_put_mem(mbp, (caddr_t)&x, sizeof(x), MB_MSYSTEM);
+ return (mb_put_mem(mbp, (caddr_t)&x, sizeof(x), MB_MSYSTEM));
}
int
@@ -173,7 +174,7 @@
mb_put_uint32le(struct mbchain *mbp, uint32_t x)
{
x = htole32(x);
- return mb_put_mem(mbp, (caddr_t)&x, sizeof(x), MB_MSYSTEM);
+ return (mb_put_mem(mbp, (caddr_t)&x, sizeof(x), MB_MSYSTEM));
}
int
@@ -180,7 +181,7 @@
mb_put_int64be(struct mbchain *mbp, int64_t x)
{
x = htobe64(x);
- return mb_put_mem(mbp, (caddr_t)&x, sizeof(x), MB_MSYSTEM);
+ return (mb_put_mem(mbp, (caddr_t)&x, sizeof(x), MB_MSYSTEM));
}
int
@@ -187,7 +188,7 @@
mb_put_int64le(struct mbchain *mbp, int64_t x)
{
x = htole64(x);
- return mb_put_mem(mbp, (caddr_t)&x, sizeof(x), MB_MSYSTEM);
+ return (mb_put_mem(mbp, (caddr_t)&x, sizeof(x), MB_MSYSTEM));
}
int
@@ -205,7 +206,7 @@
while (size > 0) {
if (mleft == 0) {
if (m->m_next == NULL)
- m = m_getm(m, size, M_WAIT, MT_DATA);
+ m = m_getm(m, size, M_WAITOK, MT_DATA);
else
m = m->m_next;
mleft = M_TRAILINGSPACE(m);
@@ -220,7 +221,7 @@
dstlen = mleft;
error = mbp->mb_copy(mbp, source, dst, &srclen, &dstlen);
if (error)
- return error;
+ return (error);
break;
case MB_MINLINE:
for (src = source, count = cplen; count; count--)
@@ -232,7 +233,7 @@
case MB_MUSER:
error = copyin(source, dst, cplen);
if (error)
- return error;
+ return (error);
break;
case MB_MZERO:
bzero(dst, cplen);
@@ -246,7 +247,7 @@
}
mbp->mb_cur = m;
mbp->mb_mleft = mleft;
- return 0;
+ return (0);
}
int
@@ -261,7 +262,7 @@
}
mbp->mb_mleft = M_TRAILINGSPACE(m);
mbp->mb_cur = m;
- return 0;
+ return (0);
}
/*
@@ -277,7 +278,7 @@
while (size > 0 && uiop->uio_resid) {
if (uiop->uio_iovcnt <= 0 || uiop->uio_iov == NULL)
- return EFBIG;
+ return (EFBIG);
left = uiop->uio_iov->iov_len;
if (left == 0) {
uiop->uio_iov++;
@@ -288,7 +289,7 @@
left = size;
error = mb_put_mem(mbp, uiop->uio_iov->iov_base, left, mtype);
if (error)
- return error;
+ return (error);
uiop->uio_offset += left;
uiop->uio_resid -= left;
uiop->uio_iov->iov_base =
@@ -296,7 +297,7 @@
uiop->uio_iov->iov_len -= left;
size -= left;
}
- return 0;
+ return (0);
}
/*
@@ -307,10 +308,10 @@
{
struct mbuf *m;
- m = m_gethdr(M_WAIT, MT_DATA);
+ m = m_gethdr(M_WAITOK, MT_DATA);
m->m_len = 0;
md_initm(mdp, m);
- return 0;
+ return (0);
}
void
@@ -360,25 +361,25 @@
struct mbuf *m;
if (mdp->md_top == NULL)
- return ENOENT;
+ return (ENOENT);
m = mdp->md_top->m_nextpkt;
md_done(mdp);
if (m == NULL)
- return ENOENT;
+ return (ENOENT);
md_initm(mdp, m);
- return 0;
+ return (0);
}
int
md_get_uint8(struct mdchain *mdp, uint8_t *x)
{
- return md_get_mem(mdp, x, 1, MB_MINLINE);
+ return (md_get_mem(mdp, x, 1, MB_MINLINE));
}
int
md_get_uint16(struct mdchain *mdp, uint16_t *x)
{
- return md_get_mem(mdp, (caddr_t)x, 2, MB_MINLINE);
+ return (md_get_mem(mdp, (caddr_t)x, 2, MB_MINLINE));
}
int
@@ -389,7 +390,7 @@
if (x != NULL)
*x = le16toh(v);
- return error;
+ return (error);
}
int
@@ -400,13 +401,13 @@
if (x != NULL)
*x = be16toh(v);
- return error;
+ return (error);
}
int
md_get_uint32(struct mdchain *mdp, uint32_t *x)
{
- return md_get_mem(mdp, (caddr_t)x, 4, MB_MINLINE);
+ return (md_get_mem(mdp, (caddr_t)x, 4, MB_MINLINE));
}
int
@@ -418,7 +419,7 @@
error = md_get_uint32(mdp, &v);
if (x != NULL)
*x = be32toh(v);
- return error;
+ return (error);
}
int
@@ -430,13 +431,13 @@
error = md_get_uint32(mdp, &v);
if (x != NULL)
*x = le32toh(v);
- return error;
+ return (error);
}
int
md_get_int64(struct mdchain *mdp, int64_t *x)
{
- return md_get_mem(mdp, (caddr_t)x, 8, MB_MINLINE);
+ return (md_get_mem(mdp, (caddr_t)x, 8, MB_MINLINE));
}
int
@@ -448,7 +449,7 @@
error = md_get_int64(mdp, &v);
if (x != NULL)
*x = be64toh(v);
- return error;
+ return (error);
}
int
@@ -460,7 +461,7 @@
error = md_get_int64(mdp, &v);
if (x != NULL)
*x = le64toh(v);
- return error;
+ return (error);
}
int
@@ -474,7 +475,7 @@
while (size > 0) {
if (m == NULL) {
MBERROR("incomplete copy\n");
- return EBADRPC;
+ return (EBADRPC);
}
s = mdp->md_pos;
count = mtod(m, u_char*) + m->m_len - s;
@@ -506,7 +507,7 @@
}
target += count;
}
- return 0;
+ return (0);
}
int
@@ -514,10 +515,10 @@
{
struct mbuf *m = mdp->md_cur, *rm;
- rm = m_copym(m, mdp->md_pos - mtod(m, u_char*), size, M_WAIT);
+ rm = m_copym(m, mdp->md_pos - mtod(m, u_char*), size, M_WAITOK);
md_get_mem(mdp, NULL, size, MB_MZERO);
*ret = rm;
- return 0;
+ return (0);
}
int
@@ -530,7 +531,7 @@
mtype = (uiop->uio_segflg == UIO_SYSSPACE) ? MB_MSYSTEM : MB_MUSER;
while (size > 0 && uiop->uio_resid) {
if (uiop->uio_iovcnt <= 0 || uiop->uio_iov == NULL)
- return EFBIG;
+ return (EFBIG);
left = uiop->uio_iov->iov_len;
if (left == 0) {
uiop->uio_iov++;
@@ -542,7 +543,7 @@
left = size;
error = md_get_mem(mdp, uiocp, left, mtype);
if (error)
- return error;
+ return (error);
uiop->uio_offset += left;
uiop->uio_resid -= left;
uiop->uio_iov->iov_base =
@@ -550,5 +551,5 @@
uiop->uio_iov->iov_len -= left;
size -= left;
}
- return 0;
+ return (0);
}
Modified: trunk/sys/kern/subr_module.c
===================================================================
--- trunk/sys/kern/subr_module.c 2018-05-25 20:59:46 UTC (rev 9949)
+++ trunk/sys/kern/subr_module.c 2018-05-25 21:07:09 UTC (rev 9950)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
/*-
* Copyright (c) 1998 Michael Smith
* All rights reserved.
@@ -25,7 +26,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$MidnightBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/kern/subr_module.c 218494 2011-02-09 19:08:21Z marcel $");
#include <sys/param.h>
#include <sys/systm.h>
Modified: trunk/sys/kern/subr_msgbuf.c
===================================================================
--- trunk/sys/kern/subr_msgbuf.c 2018-05-25 20:59:46 UTC (rev 9949)
+++ trunk/sys/kern/subr_msgbuf.c 2018-05-25 21:07:09 UTC (rev 9950)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
/*-
* Copyright (c) 2003 Ian Dowse. All rights reserved.
*
@@ -22,7 +23,7 @@
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
- * $MidnightBSD$
+ * $FreeBSD: stable/10/sys/kern/subr_msgbuf.c 302234 2016-06-27 21:50:30Z bdrewery $
*/
/*
@@ -50,7 +51,7 @@
/*
* Timestamps in msgbuf are useful when trying to diagnose when core dumps
- * or other actions occured.
+ * or other actions occurred.
*/
static int msgbuf_show_timestamp = 0;
SYSCTL_INT(_kern, OID_AUTO, msgbuf_show_timestamp, CTLFLAG_RW | CTLFLAG_TUN,
Added: trunk/sys/kern/subr_nvlist.c
===================================================================
--- trunk/sys/kern/subr_nvlist.c (rev 0)
+++ trunk/sys/kern/subr_nvlist.c 2018-05-25 21:07:09 UTC (rev 9950)
@@ -0,0 +1,1476 @@
+/* $MidnightBSD$ */
+/*-
+ * Copyright (c) 2009-2013 The FreeBSD Foundation
+ * All rights reserved.
+ *
+ * This software was developed by Pawel Jakub Dawidek under sponsorship from
+ * the FreeBSD Foundation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: stable/10/sys/kern/subr_nvlist.c 292973 2015-12-31 03:28:14Z ngie $");
+
+#include <sys/param.h>
+#include <sys/endian.h>
+#include <sys/queue.h>
+
+#ifdef _KERNEL
+
+#include <sys/errno.h>
+#include <sys/kernel.h>
+#include <sys/lock.h>
+#include <sys/malloc.h>
+#include <sys/systm.h>
+
+#include <machine/stdarg.h>
+
+#else
+#include <sys/socket.h>
+
+#include <errno.h>
+#include <stdarg.h>
+#include <stdbool.h>
+#include <stdint.h>
+#define _WITH_DPRINTF
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include "msgio.h"
+#endif
+
+#ifdef HAVE_PJDLOG
+#include <pjdlog.h>
+#endif
+
+#include <sys/nv.h>
+#include <sys/nv_impl.h>
+#include <sys/nvlist_impl.h>
+#include <sys/nvpair_impl.h>
+
+#ifndef HAVE_PJDLOG
+#ifdef _KERNEL
+#define PJDLOG_ASSERT(...) MPASS(__VA_ARGS__)
+#define PJDLOG_RASSERT(expr, ...) KASSERT(expr, (__VA_ARGS__))
+#define PJDLOG_ABORT(...) panic(__VA_ARGS__)
+#else
+#include <assert.h>
+#define PJDLOG_ASSERT(...) assert(__VA_ARGS__)
+#define PJDLOG_RASSERT(expr, ...) assert(expr)
+#define PJDLOG_ABORT(...) do { \
+ fprintf(stderr, "%s:%u: ", __FILE__, __LINE__); \
+ fprintf(stderr, __VA_ARGS__); \
+ fprintf(stderr, "\n"); \
+ abort(); \
+} while (0)
+#endif
+#endif
+
+#define NV_FLAG_PRIVATE_MASK (NV_FLAG_BIG_ENDIAN)
+#define NV_FLAG_PUBLIC_MASK (NV_FLAG_IGNORE_CASE)
+#define NV_FLAG_ALL_MASK (NV_FLAG_PRIVATE_MASK | NV_FLAG_PUBLIC_MASK)
+
+#define NVLIST_MAGIC 0x6e766c /* "nvl" */
+struct nvlist {
+ int nvl_magic;
+ int nvl_error;
+ int nvl_flags;
+ nvpair_t *nvl_parent;
+ struct nvl_head nvl_head;
+};
+
+#define NVLIST_ASSERT(nvl) do { \
+ PJDLOG_ASSERT((nvl) != NULL); \
+ PJDLOG_ASSERT((nvl)->nvl_magic == NVLIST_MAGIC); \
+} while (0)
+
+#ifdef _KERNEL
+MALLOC_DEFINE(M_NVLIST, "nvlist", "kernel nvlist");
+#endif
+
+#define NVPAIR_ASSERT(nvp) nvpair_assert(nvp)
+
+#define NVLIST_HEADER_MAGIC 0x6c
+#define NVLIST_HEADER_VERSION 0x00
+struct nvlist_header {
+ uint8_t nvlh_magic;
+ uint8_t nvlh_version;
+ uint8_t nvlh_flags;
+ uint64_t nvlh_descriptors;
+ uint64_t nvlh_size;
+} __packed;
+
+nvlist_t *
+nvlist_create(int flags)
+{
+ nvlist_t *nvl;
+
+ PJDLOG_ASSERT((flags & ~(NV_FLAG_PUBLIC_MASK)) == 0);
+
+ nvl = nv_malloc(sizeof(*nvl));
+ nvl->nvl_error = 0;
+ nvl->nvl_flags = flags;
+ nvl->nvl_parent = NULL;
+ TAILQ_INIT(&nvl->nvl_head);
+ nvl->nvl_magic = NVLIST_MAGIC;
+
+ return (nvl);
+}
+
+void
+nvlist_destroy(nvlist_t *nvl)
+{
+ nvpair_t *nvp;
+ int serrno;
+
+ if (nvl == NULL)
+ return;
+
+ SAVE_ERRNO(serrno);
+
+ NVLIST_ASSERT(nvl);
+
+ while ((nvp = nvlist_first_nvpair(nvl)) != NULL) {
+ nvlist_remove_nvpair(nvl, nvp);
+ nvpair_free(nvp);
+ }
+ nvl->nvl_magic = 0;
+ nv_free(nvl);
+
+ RESTORE_ERRNO(serrno);
+}
+
+void
+nvlist_set_error(nvlist_t *nvl, int error)
+{
+
+ PJDLOG_ASSERT(error != 0);
+
+ /*
+ * Check for error != 0 so that we don't do the wrong thing if somebody
+ * tries to abuse this API when asserts are disabled.
+ */
+ if (nvl != NULL && error != 0 && nvl->nvl_error == 0)
+ nvl->nvl_error = error;
+}
+
+int
+nvlist_error(const nvlist_t *nvl)
+{
+
+ if (nvl == NULL)
+ return (ENOMEM);
+
+ NVLIST_ASSERT(nvl);
+
+ return (nvl->nvl_error);
+}
+
+nvpair_t *
+nvlist_get_nvpair_parent(const nvlist_t *nvl)
+{
+
+ NVLIST_ASSERT(nvl);
+
+ return (nvl->nvl_parent);
+}
+
+const nvlist_t *
+nvlist_get_parent(const nvlist_t *nvl, void **cookiep)
+{
+ nvpair_t *nvp;
+
+ NVLIST_ASSERT(nvl);
+
+ nvp = nvl->nvl_parent;
+ if (cookiep != NULL)
+ *cookiep = nvp;
+ if (nvp == NULL)
+ return (NULL);
+
+ return (nvpair_nvlist(nvp));
+}
+
+void
+nvlist_set_parent(nvlist_t *nvl, nvpair_t *parent)
+{
+
+ NVLIST_ASSERT(nvl);
+
+ nvl->nvl_parent = parent;
+}
+
+bool
+nvlist_empty(const nvlist_t *nvl)
+{
+
+ NVLIST_ASSERT(nvl);
+ PJDLOG_ASSERT(nvl->nvl_error == 0);
+
+ return (nvlist_first_nvpair(nvl) == NULL);
+}
+
+int
+nvlist_flags(const nvlist_t *nvl)
+{
+
+ NVLIST_ASSERT(nvl);
+ PJDLOG_ASSERT(nvl->nvl_error == 0);
+ PJDLOG_ASSERT((nvl->nvl_flags & ~(NV_FLAG_PUBLIC_MASK)) == 0);
+
+ return (nvl->nvl_flags);
+}
+
+static void
+nvlist_report_missing(int type, const char *name)
+{
+
+ PJDLOG_ABORT("Element '%s' of type %s doesn't exist.",
+ name, nvpair_type_string(type));
+}
+
+static nvpair_t *
+nvlist_find(const nvlist_t *nvl, int type, const char *name)
+{
+ nvpair_t *nvp;
+
+ NVLIST_ASSERT(nvl);
+ PJDLOG_ASSERT(nvl->nvl_error == 0);
+ PJDLOG_ASSERT(type == NV_TYPE_NONE ||
+ (type >= NV_TYPE_FIRST && type <= NV_TYPE_LAST));
+
+ for (nvp = nvlist_first_nvpair(nvl); nvp != NULL;
+ nvp = nvlist_next_nvpair(nvl, nvp)) {
+ if (type != NV_TYPE_NONE && nvpair_type(nvp) != type)
+ continue;
+ if ((nvl->nvl_flags & NV_FLAG_IGNORE_CASE) != 0) {
+ if (strcasecmp(nvpair_name(nvp), name) != 0)
+ continue;
+ } else {
+ if (strcmp(nvpair_name(nvp), name) != 0)
+ continue;
+ }
+ break;
+ }
+
+ if (nvp == NULL)
+ RESTORE_ERRNO(ENOENT);
+
+ return (nvp);
+}
+
+bool
+nvlist_exists_type(const nvlist_t *nvl, const char *name, int type)
+{
+
+ NVLIST_ASSERT(nvl);
+ PJDLOG_ASSERT(nvl->nvl_error == 0);
+ PJDLOG_ASSERT(type == NV_TYPE_NONE ||
+ (type >= NV_TYPE_FIRST && type <= NV_TYPE_LAST));
+
+ return (nvlist_find(nvl, type, name) != NULL);
+}
+
+void
+nvlist_free_type(nvlist_t *nvl, const char *name, int type)
+{
+ nvpair_t *nvp;
+
+ NVLIST_ASSERT(nvl);
+ PJDLOG_ASSERT(nvl->nvl_error == 0);
+ PJDLOG_ASSERT(type == NV_TYPE_NONE ||
+ (type >= NV_TYPE_FIRST && type <= NV_TYPE_LAST));
+
+ nvp = nvlist_find(nvl, type, name);
+ if (nvp != NULL)
+ nvlist_free_nvpair(nvl, nvp);
+ else
+ nvlist_report_missing(type, name);
+}
+
+nvlist_t *
+nvlist_clone(const nvlist_t *nvl)
+{
+ nvlist_t *newnvl;
+ nvpair_t *nvp, *newnvp;
+
+ NVLIST_ASSERT(nvl);
+
+ if (nvl->nvl_error != 0) {
+ RESTORE_ERRNO(nvl->nvl_error);
+ return (NULL);
+ }
+
+ newnvl = nvlist_create(nvl->nvl_flags & NV_FLAG_PUBLIC_MASK);
+ for (nvp = nvlist_first_nvpair(nvl); nvp != NULL;
+ nvp = nvlist_next_nvpair(nvl, nvp)) {
+ newnvp = nvpair_clone(nvp);
+ if (newnvp == NULL)
+ break;
+ nvlist_move_nvpair(newnvl, newnvp);
+ }
+ if (nvp != NULL) {
+ nvlist_destroy(newnvl);
+ return (NULL);
+ }
+ return (newnvl);
+}
+
+#ifndef _KERNEL
+static bool
+nvlist_dump_error_check(const nvlist_t *nvl, int fd, int level)
+{
+
+ if (nvlist_error(nvl) != 0) {
+ dprintf(fd, "%*serror: %d\n", level * 4, "",
+ nvlist_error(nvl));
+ return (true);
+ }
+
+ return (false);
+}
+
+/*
+ * Dump content of nvlist.
+ */
+void
+nvlist_dump(const nvlist_t *nvl, int fd)
+{
+ const nvlist_t *tmpnvl;
+ nvpair_t *nvp, *tmpnvp;
+ void *cookie;
+ int level;
+
+ level = 0;
+ if (nvlist_dump_error_check(nvl, fd, level))
+ return;
+
+ nvp = nvlist_first_nvpair(nvl);
+ while (nvp != NULL) {
+ dprintf(fd, "%*s%s (%s):", level * 4, "", nvpair_name(nvp),
+ nvpair_type_string(nvpair_type(nvp)));
+ switch (nvpair_type(nvp)) {
+ case NV_TYPE_NULL:
+ dprintf(fd, " null\n");
+ break;
+ case NV_TYPE_BOOL:
+ dprintf(fd, " %s\n", nvpair_get_bool(nvp) ?
+ "TRUE" : "FALSE");
+ break;
+ case NV_TYPE_NUMBER:
+ dprintf(fd, " %ju (%jd) (0x%jx)\n",
+ (uintmax_t)nvpair_get_number(nvp),
+ (intmax_t)nvpair_get_number(nvp),
+ (uintmax_t)nvpair_get_number(nvp));
+ break;
+ case NV_TYPE_STRING:
+ dprintf(fd, " [%s]\n", nvpair_get_string(nvp));
+ break;
+ case NV_TYPE_NVLIST:
+ dprintf(fd, "\n");
+ tmpnvl = nvpair_get_nvlist(nvp);
+ if (nvlist_dump_error_check(tmpnvl, fd, level + 1))
+ break;
+ tmpnvp = nvlist_first_nvpair(tmpnvl);
+ if (tmpnvp != NULL) {
+ nvl = tmpnvl;
+ nvp = tmpnvp;
+ level++;
+ continue;
+ }
+ break;
+ case NV_TYPE_DESCRIPTOR:
+ dprintf(fd, " %d\n", nvpair_get_descriptor(nvp));
+ break;
+ case NV_TYPE_BINARY:
+ {
+ const unsigned char *binary;
+ unsigned int ii;
+ size_t size;
+
+ binary = nvpair_get_binary(nvp, &size);
+ dprintf(fd, " %zu ", size);
+ for (ii = 0; ii < size; ii++)
+ dprintf(fd, "%02hhx", binary[ii]);
+ dprintf(fd, "\n");
+ break;
+ }
+ default:
+ PJDLOG_ABORT("Unknown type: %d.", nvpair_type(nvp));
+ }
+
+ while ((nvp = nvlist_next_nvpair(nvl, nvp)) == NULL) {
+ cookie = NULL;
+ nvl = nvlist_get_parent(nvl, &cookie);
+ if (nvl == NULL)
+ return;
+ nvp = cookie;
+ level--;
+ }
+ }
+}
+
+void
+nvlist_fdump(const nvlist_t *nvl, FILE *fp)
+{
+
+ fflush(fp);
+ nvlist_dump(nvl, fileno(fp));
+}
+#endif
+
+/*
+ * The function obtains size of the nvlist after nvlist_pack().
+ */
+size_t
+nvlist_size(const nvlist_t *nvl)
+{
+ const nvlist_t *tmpnvl;
+ const nvpair_t *nvp, *tmpnvp;
+ void *cookie;
+ size_t size;
+
+ NVLIST_ASSERT(nvl);
+ PJDLOG_ASSERT(nvl->nvl_error == 0);
+
+ size = sizeof(struct nvlist_header);
+ nvp = nvlist_first_nvpair(nvl);
+ while (nvp != NULL) {
+ size += nvpair_header_size();
+ size += strlen(nvpair_name(nvp)) + 1;
+ if (nvpair_type(nvp) == NV_TYPE_NVLIST) {
+ size += sizeof(struct nvlist_header);
+ size += nvpair_header_size() + 1;
+ tmpnvl = nvpair_get_nvlist(nvp);
+ PJDLOG_ASSERT(tmpnvl->nvl_error == 0);
+ tmpnvp = nvlist_first_nvpair(tmpnvl);
+ if (tmpnvp != NULL) {
+ nvl = tmpnvl;
+ nvp = tmpnvp;
+ continue;
+ }
+ } else {
+ size += nvpair_size(nvp);
+ }
+
+ while ((nvp = nvlist_next_nvpair(nvl, nvp)) == NULL) {
+ cookie = NULL;
+ nvl = nvlist_get_parent(nvl, &cookie);
+ if (nvl == NULL)
+ goto out;
+ nvp = cookie;
+ }
+ }
+
+out:
+ return (size);
+}
+
+#ifndef _KERNEL
+static int *
+nvlist_xdescriptors(const nvlist_t *nvl, int *descs, int level)
+{
+ const nvpair_t *nvp;
+
+ NVLIST_ASSERT(nvl);
+ PJDLOG_ASSERT(nvl->nvl_error == 0);
+ PJDLOG_ASSERT(level < 3);
+
+ for (nvp = nvlist_first_nvpair(nvl); nvp != NULL;
+ nvp = nvlist_next_nvpair(nvl, nvp)) {
+ switch (nvpair_type(nvp)) {
+ case NV_TYPE_DESCRIPTOR:
+ *descs = nvpair_get_descriptor(nvp);
+ descs++;
+ break;
+ case NV_TYPE_NVLIST:
+ descs = nvlist_xdescriptors(nvpair_get_nvlist(nvp),
+ descs, level + 1);
+ break;
+ }
+ }
+
+ return (descs);
+}
+#endif
+
+#ifndef _KERNEL
+int *
+nvlist_descriptors(const nvlist_t *nvl, size_t *nitemsp)
+{
+ size_t nitems;
+ int *fds;
+
+ nitems = nvlist_ndescriptors(nvl);
+ fds = nv_malloc(sizeof(fds[0]) * (nitems + 1));
+ if (fds == NULL)
+ return (NULL);
+ if (nitems > 0)
+ nvlist_xdescriptors(nvl, fds, 0);
+ fds[nitems] = -1;
+ if (nitemsp != NULL)
+ *nitemsp = nitems;
+ return (fds);
+}
+#endif
+
+static size_t
+nvlist_xndescriptors(const nvlist_t *nvl, int level)
+{
+#ifndef _KERNEL
+ const nvpair_t *nvp;
+ size_t ndescs;
+
+ NVLIST_ASSERT(nvl);
+ PJDLOG_ASSERT(nvl->nvl_error == 0);
+ PJDLOG_ASSERT(level < 3);
+
+ ndescs = 0;
+ for (nvp = nvlist_first_nvpair(nvl); nvp != NULL;
+ nvp = nvlist_next_nvpair(nvl, nvp)) {
+ switch (nvpair_type(nvp)) {
+ case NV_TYPE_DESCRIPTOR:
+ ndescs++;
+ break;
+ case NV_TYPE_NVLIST:
+ ndescs += nvlist_xndescriptors(nvpair_get_nvlist(nvp),
+ level + 1);
+ break;
+ }
+ }
+
+ return (ndescs);
+#else
+ return (0);
+#endif
+}
+
+size_t
+nvlist_ndescriptors(const nvlist_t *nvl)
+{
+
+ return (nvlist_xndescriptors(nvl, 0));
+}
+
+static unsigned char *
+nvlist_pack_header(const nvlist_t *nvl, unsigned char *ptr, size_t *leftp)
+{
+ struct nvlist_header nvlhdr;
+
+ NVLIST_ASSERT(nvl);
+
+ nvlhdr.nvlh_magic = NVLIST_HEADER_MAGIC;
+ nvlhdr.nvlh_version = NVLIST_HEADER_VERSION;
+ nvlhdr.nvlh_flags = nvl->nvl_flags;
+#if BYTE_ORDER == BIG_ENDIAN
+ nvlhdr.nvlh_flags |= NV_FLAG_BIG_ENDIAN;
+#endif
+ nvlhdr.nvlh_descriptors = nvlist_ndescriptors(nvl);
+ nvlhdr.nvlh_size = *leftp - sizeof(nvlhdr);
+ PJDLOG_ASSERT(*leftp >= sizeof(nvlhdr));
+ memcpy(ptr, &nvlhdr, sizeof(nvlhdr));
+ ptr += sizeof(nvlhdr);
+ *leftp -= sizeof(nvlhdr);
+
+ return (ptr);
+}
+
+void *
+nvlist_xpack(const nvlist_t *nvl, int64_t *fdidxp, size_t *sizep)
+{
+ unsigned char *buf, *ptr;
+ size_t left, size;
+ const nvlist_t *tmpnvl;
+ nvpair_t *nvp, *tmpnvp;
+ void *cookie;
+
+ NVLIST_ASSERT(nvl);
+
+ if (nvl->nvl_error != 0) {
+ RESTORE_ERRNO(nvl->nvl_error);
+ return (NULL);
+ }
+
+ size = nvlist_size(nvl);
+ buf = nv_malloc(size);
+ if (buf == NULL)
+ return (NULL);
+
+ ptr = buf;
+ left = size;
+
+ ptr = nvlist_pack_header(nvl, ptr, &left);
+
+ nvp = nvlist_first_nvpair(nvl);
+ while (nvp != NULL) {
+ NVPAIR_ASSERT(nvp);
+
+ nvpair_init_datasize(nvp);
+ ptr = nvpair_pack_header(nvp, ptr, &left);
+ if (ptr == NULL) {
+ nv_free(buf);
+ return (NULL);
+ }
+ switch (nvpair_type(nvp)) {
+ case NV_TYPE_NULL:
+ ptr = nvpair_pack_null(nvp, ptr, &left);
+ break;
+ case NV_TYPE_BOOL:
+ ptr = nvpair_pack_bool(nvp, ptr, &left);
+ break;
+ case NV_TYPE_NUMBER:
+ ptr = nvpair_pack_number(nvp, ptr, &left);
+ break;
+ case NV_TYPE_STRING:
+ ptr = nvpair_pack_string(nvp, ptr, &left);
+ break;
+ case NV_TYPE_NVLIST:
+ tmpnvl = nvpair_get_nvlist(nvp);
+ ptr = nvlist_pack_header(tmpnvl, ptr, &left);
+ if (ptr == NULL)
+ goto out;
+ tmpnvp = nvlist_first_nvpair(tmpnvl);
+ if (tmpnvp != NULL) {
+ nvl = tmpnvl;
+ nvp = tmpnvp;
+ continue;
+ }
+ ptr = nvpair_pack_nvlist_up(ptr, &left);
+ break;
+#ifndef _KERNEL
+ case NV_TYPE_DESCRIPTOR:
+ ptr = nvpair_pack_descriptor(nvp, ptr, fdidxp, &left);
+ break;
+#endif
+ case NV_TYPE_BINARY:
+ ptr = nvpair_pack_binary(nvp, ptr, &left);
+ break;
+ default:
+ PJDLOG_ABORT("Invalid type (%d).", nvpair_type(nvp));
+ }
+ if (ptr == NULL) {
+ nv_free(buf);
+ return (NULL);
+ }
+ while ((nvp = nvlist_next_nvpair(nvl, nvp)) == NULL) {
+ cookie = NULL;
+ nvl = nvlist_get_parent(nvl, &cookie);
+ if (nvl == NULL)
+ goto out;
+ nvp = cookie;
+ ptr = nvpair_pack_nvlist_up(ptr, &left);
+ if (ptr == NULL)
+ goto out;
+ }
+ }
+
+out:
+ if (sizep != NULL)
+ *sizep = size;
+ return (buf);
+}
+
+void *
+nvlist_pack(const nvlist_t *nvl, size_t *sizep)
+{
+
+ NVLIST_ASSERT(nvl);
+
+ if (nvl->nvl_error != 0) {
+ RESTORE_ERRNO(nvl->nvl_error);
+ return (NULL);
+ }
+
+ if (nvlist_ndescriptors(nvl) > 0) {
+ RESTORE_ERRNO(EOPNOTSUPP);
+ return (NULL);
+ }
+
+ return (nvlist_xpack(nvl, NULL, sizep));
+}
+
+static bool
+nvlist_check_header(struct nvlist_header *nvlhdrp)
+{
+
+ if (nvlhdrp->nvlh_magic != NVLIST_HEADER_MAGIC) {
+ RESTORE_ERRNO(EINVAL);
+ return (false);
+ }
+ if ((nvlhdrp->nvlh_flags & ~NV_FLAG_ALL_MASK) != 0) {
+ RESTORE_ERRNO(EINVAL);
+ return (false);
+ }
+#if BYTE_ORDER == BIG_ENDIAN
+ if ((nvlhdrp->nvlh_flags & NV_FLAG_BIG_ENDIAN) == 0) {
+ nvlhdrp->nvlh_size = le64toh(nvlhdrp->nvlh_size);
+ nvlhdrp->nvlh_descriptors = le64toh(nvlhdrp->nvlh_descriptors);
+ }
+#else
+ if ((nvlhdrp->nvlh_flags & NV_FLAG_BIG_ENDIAN) != 0) {
+ nvlhdrp->nvlh_size = be64toh(nvlhdrp->nvlh_size);
+ nvlhdrp->nvlh_descriptors = be64toh(nvlhdrp->nvlh_descriptors);
+ }
+#endif
+ return (true);
+}
+
+const unsigned char *
+nvlist_unpack_header(nvlist_t *nvl, const unsigned char *ptr, size_t nfds,
+ bool *isbep, size_t *leftp)
+{
+ struct nvlist_header nvlhdr;
+
+ if (*leftp < sizeof(nvlhdr))
+ goto failed;
+
+ memcpy(&nvlhdr, ptr, sizeof(nvlhdr));
+
+ if (!nvlist_check_header(&nvlhdr))
+ goto failed;
+
+ if (nvlhdr.nvlh_size != *leftp - sizeof(nvlhdr))
+ goto failed;
+
+ /*
+ * nvlh_descriptors might be smaller than nfds in embedded nvlists.
+ */
+ if (nvlhdr.nvlh_descriptors > nfds)
+ goto failed;
+
+ if ((nvlhdr.nvlh_flags & ~NV_FLAG_ALL_MASK) != 0)
+ goto failed;
+
+ nvl->nvl_flags = (nvlhdr.nvlh_flags & NV_FLAG_PUBLIC_MASK);
+
+ ptr += sizeof(nvlhdr);
+ if (isbep != NULL)
+ *isbep = (((int)nvlhdr.nvlh_flags & NV_FLAG_BIG_ENDIAN) != 0);
+ *leftp -= sizeof(nvlhdr);
+
+ return (ptr);
+failed:
+ RESTORE_ERRNO(EINVAL);
+ return (NULL);
+}
+
+nvlist_t *
+nvlist_xunpack(const void *buf, size_t size, const int *fds, size_t nfds)
+{
+ const unsigned char *ptr;
+ nvlist_t *nvl, *retnvl, *tmpnvl;
+ nvpair_t *nvp;
+ size_t left;
+ bool isbe;
+
+ left = size;
+ ptr = buf;
+
+ tmpnvl = NULL;
+ nvl = retnvl = nvlist_create(0);
+ if (nvl == NULL)
+ goto failed;
+
+ ptr = nvlist_unpack_header(nvl, ptr, nfds, &isbe, &left);
+ if (ptr == NULL)
+ goto failed;
+
+ while (left > 0) {
+ ptr = nvpair_unpack(isbe, ptr, &left, &nvp);
+ if (ptr == NULL)
+ goto failed;
+ switch (nvpair_type(nvp)) {
+ case NV_TYPE_NULL:
+ ptr = nvpair_unpack_null(isbe, nvp, ptr, &left);
+ break;
+ case NV_TYPE_BOOL:
+ ptr = nvpair_unpack_bool(isbe, nvp, ptr, &left);
+ break;
+ case NV_TYPE_NUMBER:
+ ptr = nvpair_unpack_number(isbe, nvp, ptr, &left);
+ break;
+ case NV_TYPE_STRING:
+ ptr = nvpair_unpack_string(isbe, nvp, ptr, &left);
+ break;
+ case NV_TYPE_NVLIST:
+ ptr = nvpair_unpack_nvlist(isbe, nvp, ptr, &left, nfds,
+ &tmpnvl);
+ nvlist_set_parent(tmpnvl, nvp);
+ break;
+#ifndef _KERNEL
+ case NV_TYPE_DESCRIPTOR:
+ ptr = nvpair_unpack_descriptor(isbe, nvp, ptr, &left,
+ fds, nfds);
+ break;
+#endif
+ case NV_TYPE_BINARY:
+ ptr = nvpair_unpack_binary(isbe, nvp, ptr, &left);
+ break;
+ case NV_TYPE_NVLIST_UP:
+ if (nvl->nvl_parent == NULL)
+ goto failed;
+ nvl = nvpair_nvlist(nvl->nvl_parent);
+ continue;
+ default:
+ PJDLOG_ABORT("Invalid type (%d).", nvpair_type(nvp));
+ }
+ if (ptr == NULL)
+ goto failed;
+ nvlist_move_nvpair(nvl, nvp);
+ if (tmpnvl != NULL) {
+ nvl = tmpnvl;
+ tmpnvl = NULL;
+ }
+ }
+
+ return (retnvl);
+failed:
+ nvlist_destroy(retnvl);
+ return (NULL);
+}
+
+nvlist_t *
+nvlist_unpack(const void *buf, size_t size)
+{
+
+ return (nvlist_xunpack(buf, size, NULL, 0));
+}
+
+#ifndef _KERNEL
+int
+nvlist_send(int sock, const nvlist_t *nvl)
+{
+ size_t datasize, nfds;
+ int *fds;
+ void *data;
+ int64_t fdidx;
+ int serrno, ret;
+
+ if (nvlist_error(nvl) != 0) {
+ errno = nvlist_error(nvl);
+ return (-1);
+ }
+
+ fds = nvlist_descriptors(nvl, &nfds);
+ if (fds == NULL)
+ return (-1);
+
+ ret = -1;
+ data = NULL;
+ fdidx = 0;
+
+ data = nvlist_xpack(nvl, &fdidx, &datasize);
+ if (data == NULL)
+ goto out;
+
+ if (buf_send(sock, data, datasize) == -1)
+ goto out;
+
+ if (nfds > 0) {
+ if (fd_send(sock, fds, nfds) == -1)
+ goto out;
+ }
+
+ ret = 0;
+out:
+ serrno = errno;
+ free(fds);
+ free(data);
+ errno = serrno;
+ return (ret);
+}
+
+nvlist_t *
+nvlist_recv(int sock)
+{
+ struct nvlist_header nvlhdr;
+ nvlist_t *nvl, *ret;
+ unsigned char *buf;
+ size_t nfds, size, i;
+ int serrno, *fds;
+
+ if (buf_recv(sock, &nvlhdr, sizeof(nvlhdr)) == -1)
+ return (NULL);
+
+ if (!nvlist_check_header(&nvlhdr))
+ return (NULL);
+
+ nfds = (size_t)nvlhdr.nvlh_descriptors;
+ size = sizeof(nvlhdr) + (size_t)nvlhdr.nvlh_size;
+
+ buf = malloc(size);
+ if (buf == NULL)
+ return (NULL);
+
+ memcpy(buf, &nvlhdr, sizeof(nvlhdr));
+
+ ret = NULL;
+ fds = NULL;
+
+ if (buf_recv(sock, buf + sizeof(nvlhdr), size - sizeof(nvlhdr)) == -1)
+ goto out;
+
+ if (nfds > 0) {
+ fds = malloc(nfds * sizeof(fds[0]));
+ if (fds == NULL)
+ goto out;
+ if (fd_recv(sock, fds, nfds) == -1)
+ goto out;
+ }
+
+ nvl = nvlist_xunpack(buf, size, fds, nfds);
+ if (nvl == NULL) {
+ for (i = 0; i < nfds; i++)
+ close(fds[i]);
+ goto out;
+ }
+
+ ret = nvl;
+out:
+ serrno = errno;
+ free(buf);
+ free(fds);
+ errno = serrno;
+
+ return (ret);
+}
+
+nvlist_t *
+nvlist_xfer(int sock, nvlist_t *nvl)
+{
+
+ if (nvlist_send(sock, nvl) < 0) {
+ nvlist_destroy(nvl);
+ return (NULL);
+ }
+ nvlist_destroy(nvl);
+ return (nvlist_recv(sock));
+}
+#endif
+
+nvpair_t *
+nvlist_first_nvpair(const nvlist_t *nvl)
+{
+
+ NVLIST_ASSERT(nvl);
+
+ return (TAILQ_FIRST(&nvl->nvl_head));
+}
+
+nvpair_t *
+nvlist_next_nvpair(const nvlist_t *nvl, const nvpair_t *nvp)
+{
+ nvpair_t *retnvp;
+
+ NVLIST_ASSERT(nvl);
+ NVPAIR_ASSERT(nvp);
+ PJDLOG_ASSERT(nvpair_nvlist(nvp) == nvl);
+
+ retnvp = nvpair_next(nvp);
+ PJDLOG_ASSERT(retnvp == NULL || nvpair_nvlist(retnvp) == nvl);
+
+ return (retnvp);
+
+}
+
+nvpair_t *
+nvlist_prev_nvpair(const nvlist_t *nvl, const nvpair_t *nvp)
+{
+ nvpair_t *retnvp;
+
+ NVLIST_ASSERT(nvl);
+ NVPAIR_ASSERT(nvp);
+ PJDLOG_ASSERT(nvpair_nvlist(nvp) == nvl);
+
+ retnvp = nvpair_prev(nvp);
+ PJDLOG_ASSERT(nvpair_nvlist(retnvp) == nvl);
+
+ return (retnvp);
+}
+
+const char *
+nvlist_next(const nvlist_t *nvl, int *typep, void **cookiep)
+{
+ nvpair_t *nvp;
+
+ NVLIST_ASSERT(nvl);
+ PJDLOG_ASSERT(cookiep != NULL);
+
+ if (*cookiep == NULL)
+ nvp = nvlist_first_nvpair(nvl);
+ else
+ nvp = nvlist_next_nvpair(nvl, *cookiep);
+ if (nvp == NULL)
+ return (NULL);
+ if (typep != NULL)
+ *typep = nvpair_type(nvp);
+ *cookiep = nvp;
+ return (nvpair_name(nvp));
+}
+
+bool
+nvlist_exists(const nvlist_t *nvl, const char *name)
+{
+
+ return (nvlist_find(nvl, NV_TYPE_NONE, name) != NULL);
+}
+
+#define NVLIST_EXISTS(type, TYPE) \
+bool \
+nvlist_exists_##type(const nvlist_t *nvl, const char *name) \
+{ \
+ \
+ return (nvlist_find(nvl, NV_TYPE_##TYPE, name) != NULL); \
+}
+
+NVLIST_EXISTS(null, NULL)
+NVLIST_EXISTS(bool, BOOL)
+NVLIST_EXISTS(number, NUMBER)
+NVLIST_EXISTS(string, STRING)
+NVLIST_EXISTS(nvlist, NVLIST)
+#ifndef _KERNEL
+NVLIST_EXISTS(descriptor, DESCRIPTOR)
+#endif
+NVLIST_EXISTS(binary, BINARY)
+
+#undef NVLIST_EXISTS
+
+void
+nvlist_add_nvpair(nvlist_t *nvl, const nvpair_t *nvp)
+{
+ nvpair_t *newnvp;
+
+ NVPAIR_ASSERT(nvp);
+
+ if (nvlist_error(nvl) != 0) {
+ RESTORE_ERRNO(nvlist_error(nvl));
+ return;
+ }
+ if (nvlist_exists(nvl, nvpair_name(nvp))) {
+ nvl->nvl_error = EEXIST;
+ RESTORE_ERRNO(nvlist_error(nvl));
+ return;
+ }
+
+ newnvp = nvpair_clone(nvp);
+ if (newnvp == NULL) {
+ nvl->nvl_error = ERRNO_OR_DEFAULT(ENOMEM);
+ RESTORE_ERRNO(nvlist_error(nvl));
+ return;
+ }
+
+ nvpair_insert(&nvl->nvl_head, newnvp, nvl);
+}
+
+void
+nvlist_add_stringf(nvlist_t *nvl, const char *name, const char *valuefmt, ...)
+{
+ va_list valueap;
+
+ va_start(valueap, valuefmt);
+ nvlist_add_stringv(nvl, name, valuefmt, valueap);
+ va_end(valueap);
+}
+
+void
+nvlist_add_stringv(nvlist_t *nvl, const char *name, const char *valuefmt,
+ va_list valueap)
+{
+ nvpair_t *nvp;
+
+ if (nvlist_error(nvl) != 0) {
+ RESTORE_ERRNO(nvlist_error(nvl));
+ return;
+ }
+
+ nvp = nvpair_create_stringv(name, valuefmt, valueap);
+ if (nvp == NULL) {
+ nvl->nvl_error = ERRNO_OR_DEFAULT(ENOMEM);
+ RESTORE_ERRNO(nvl->nvl_error);
+ } else
+ nvlist_move_nvpair(nvl, nvp);
+}
+
+void
+nvlist_add_null(nvlist_t *nvl, const char *name)
+{
+ nvpair_t *nvp;
+
+ if (nvlist_error(nvl) != 0) {
+ RESTORE_ERRNO(nvlist_error(nvl));
+ return;
+ }
+
+ nvp = nvpair_create_null(name);
+ if (nvp == NULL) {
+ nvl->nvl_error = ERRNO_OR_DEFAULT(ENOMEM);
+ RESTORE_ERRNO(nvl->nvl_error);
+ } else
+ nvlist_move_nvpair(nvl, nvp);
+}
+
+void
+nvlist_add_bool(nvlist_t *nvl, const char *name, bool value)
+{
+ nvpair_t *nvp;
+
+ if (nvlist_error(nvl) != 0) {
+ RESTORE_ERRNO(nvlist_error(nvl));
+ return;
+ }
+
+ nvp = nvpair_create_bool(name, value);
+ if (nvp == NULL) {
+ nvl->nvl_error = ERRNO_OR_DEFAULT(ENOMEM);
+ RESTORE_ERRNO(nvl->nvl_error);
+ } else
+ nvlist_move_nvpair(nvl, nvp);
+}
+
+void
+nvlist_add_number(nvlist_t *nvl, const char *name, uint64_t value)
+{
+ nvpair_t *nvp;
+
+ if (nvlist_error(nvl) != 0) {
+ RESTORE_ERRNO(nvlist_error(nvl));
+ return;
+ }
+
+ nvp = nvpair_create_number(name, value);
+ if (nvp == NULL) {
+ nvl->nvl_error = ERRNO_OR_DEFAULT(ENOMEM);
+ RESTORE_ERRNO(nvl->nvl_error);
+ } else
+ nvlist_move_nvpair(nvl, nvp);
+}
+
+void
+nvlist_add_string(nvlist_t *nvl, const char *name, const char *value)
+{
+ nvpair_t *nvp;
+
+ if (nvlist_error(nvl) != 0) {
+ RESTORE_ERRNO(nvlist_error(nvl));
+ return;
+ }
+
+ nvp = nvpair_create_string(name, value);
+ if (nvp == NULL) {
+ nvl->nvl_error = ERRNO_OR_DEFAULT(ENOMEM);
+ RESTORE_ERRNO(nvl->nvl_error);
+ } else
+ nvlist_move_nvpair(nvl, nvp);
+}
+
+void
+nvlist_add_nvlist(nvlist_t *nvl, const char *name, const nvlist_t *value)
+{
+ nvpair_t *nvp;
+
+ if (nvlist_error(nvl) != 0) {
+ RESTORE_ERRNO(nvlist_error(nvl));
+ return;
+ }
+
+ nvp = nvpair_create_nvlist(name, value);
+ if (nvp == NULL) {
+ nvl->nvl_error = ERRNO_OR_DEFAULT(ENOMEM);
+ RESTORE_ERRNO(nvl->nvl_error);
+ } else
+ nvlist_move_nvpair(nvl, nvp);
+}
+
+#ifndef _KERNEL
+void
+nvlist_add_descriptor(nvlist_t *nvl, const char *name, int value)
+{
+ nvpair_t *nvp;
+
+ if (nvlist_error(nvl) != 0) {
+ errno = nvlist_error(nvl);
+ return;
+ }
+
+ nvp = nvpair_create_descriptor(name, value);
+ if (nvp == NULL)
+ nvl->nvl_error = errno = (errno != 0 ? errno : ENOMEM);
+ else
+ nvlist_move_nvpair(nvl, nvp);
+}
+#endif
+
+void
+nvlist_add_binary(nvlist_t *nvl, const char *name, const void *value,
+ size_t size)
+{
+ nvpair_t *nvp;
+
+ if (nvlist_error(nvl) != 0) {
+ RESTORE_ERRNO(nvlist_error(nvl));
+ return;
+ }
+
+ nvp = nvpair_create_binary(name, value, size);
+ if (nvp == NULL) {
+ nvl->nvl_error = ERRNO_OR_DEFAULT(ENOMEM);
+ RESTORE_ERRNO(nvl->nvl_error);
+ } else
+ nvlist_move_nvpair(nvl, nvp);
+}
+
+void
+nvlist_move_nvpair(nvlist_t *nvl, nvpair_t *nvp)
+{
+
+ NVPAIR_ASSERT(nvp);
+ PJDLOG_ASSERT(nvpair_nvlist(nvp) == NULL);
+
+ if (nvlist_error(nvl) != 0) {
+ nvpair_free(nvp);
+ RESTORE_ERRNO(nvlist_error(nvl));
+ return;
+ }
+ if (nvlist_exists(nvl, nvpair_name(nvp))) {
+ nvpair_free(nvp);
+ nvl->nvl_error = EEXIST;
+ RESTORE_ERRNO(nvl->nvl_error);
+ return;
+ }
+
+ nvpair_insert(&nvl->nvl_head, nvp, nvl);
+}
+
+void
+nvlist_move_string(nvlist_t *nvl, const char *name, char *value)
+{
+ nvpair_t *nvp;
+
+ if (nvlist_error(nvl) != 0) {
+ nv_free(value);
+ RESTORE_ERRNO(nvlist_error(nvl));
+ return;
+ }
+
+ nvp = nvpair_move_string(name, value);
+ if (nvp == NULL) {
+ nvl->nvl_error = ERRNO_OR_DEFAULT(ENOMEM);
+ RESTORE_ERRNO(nvl->nvl_error);
+ } else
+ nvlist_move_nvpair(nvl, nvp);
+}
+
+void
+nvlist_move_nvlist(nvlist_t *nvl, const char *name, nvlist_t *value)
+{
+ nvpair_t *nvp;
+
+ if (nvlist_error(nvl) != 0) {
+ if (value != NULL && nvlist_get_nvpair_parent(value) != NULL)
+ nvlist_destroy(value);
+ RESTORE_ERRNO(nvlist_error(nvl));
+ return;
+ }
+
+ nvp = nvpair_move_nvlist(name, value);
+ if (nvp == NULL) {
+ nvl->nvl_error = ERRNO_OR_DEFAULT(ENOMEM);
+ RESTORE_ERRNO(nvl->nvl_error);
+ } else
+ nvlist_move_nvpair(nvl, nvp);
+}
+
+#ifndef _KERNEL
+void
+nvlist_move_descriptor(nvlist_t *nvl, const char *name, int value)
+{
+ nvpair_t *nvp;
+
+ if (nvlist_error(nvl) != 0) {
+ close(value);
+ errno = nvlist_error(nvl);
+ return;
+ }
+
+ nvp = nvpair_move_descriptor(name, value);
+ if (nvp == NULL)
+ nvl->nvl_error = errno = (errno != 0 ? errno : ENOMEM);
+ else
+ nvlist_move_nvpair(nvl, nvp);
+}
+#endif
+
+void
+nvlist_move_binary(nvlist_t *nvl, const char *name, void *value, size_t size)
+{
+ nvpair_t *nvp;
+
+ if (nvlist_error(nvl) != 0) {
+ nv_free(value);
+ RESTORE_ERRNO(nvlist_error(nvl));
+ return;
+ }
+
+ nvp = nvpair_move_binary(name, value, size);
+ if (nvp == NULL) {
+ nvl->nvl_error = ERRNO_OR_DEFAULT(ENOMEM);
+ RESTORE_ERRNO(nvl->nvl_error);
+ } else
+ nvlist_move_nvpair(nvl, nvp);
+}
+
+const nvpair_t *
+nvlist_get_nvpair(const nvlist_t *nvl, const char *name)
+{
+
+ return (nvlist_find(nvl, NV_TYPE_NONE, name));
+}
+
+#define NVLIST_GET(ftype, type, TYPE) \
+ftype \
+nvlist_get_##type(const nvlist_t *nvl, const char *name) \
+{ \
+ const nvpair_t *nvp; \
+ \
+ nvp = nvlist_find(nvl, NV_TYPE_##TYPE, name); \
+ if (nvp == NULL) \
+ nvlist_report_missing(NV_TYPE_##TYPE, name); \
+ return (nvpair_get_##type(nvp)); \
+}
+
+NVLIST_GET(bool, bool, BOOL)
+NVLIST_GET(uint64_t, number, NUMBER)
+NVLIST_GET(const char *, string, STRING)
+NVLIST_GET(const nvlist_t *, nvlist, NVLIST)
+#ifndef _KERNEL
+NVLIST_GET(int, descriptor, DESCRIPTOR)
+#endif
+
+#undef NVLIST_GET
+
+const void *
+nvlist_get_binary(const nvlist_t *nvl, const char *name, size_t *sizep)
+{
+ nvpair_t *nvp;
+
+ nvp = nvlist_find(nvl, NV_TYPE_BINARY, name);
+ if (nvp == NULL)
+ nvlist_report_missing(NV_TYPE_BINARY, name);
+
+ return (nvpair_get_binary(nvp, sizep));
+}
+
+#define NVLIST_TAKE(ftype, type, TYPE) \
+ftype \
+nvlist_take_##type(nvlist_t *nvl, const char *name) \
+{ \
+ nvpair_t *nvp; \
+ ftype value; \
+ \
+ nvp = nvlist_find(nvl, NV_TYPE_##TYPE, name); \
+ if (nvp == NULL) \
+ nvlist_report_missing(NV_TYPE_##TYPE, name); \
+ value = (ftype)(intptr_t)nvpair_get_##type(nvp); \
+ nvlist_remove_nvpair(nvl, nvp); \
+ nvpair_free_structure(nvp); \
+ return (value); \
+}
+
+NVLIST_TAKE(bool, bool, BOOL)
+NVLIST_TAKE(uint64_t, number, NUMBER)
+NVLIST_TAKE(char *, string, STRING)
+NVLIST_TAKE(nvlist_t *, nvlist, NVLIST)
+#ifndef _KERNEL
+NVLIST_TAKE(int, descriptor, DESCRIPTOR)
+#endif
+
+#undef NVLIST_TAKE
+
+void *
+nvlist_take_binary(nvlist_t *nvl, const char *name, size_t *sizep)
+{
+ nvpair_t *nvp;
+ void *value;
+
+ nvp = nvlist_find(nvl, NV_TYPE_BINARY, name);
+ if (nvp == NULL)
+ nvlist_report_missing(NV_TYPE_BINARY, name);
+
+ value = (void *)(intptr_t)nvpair_get_binary(nvp, sizep);
+ nvlist_remove_nvpair(nvl, nvp);
+ nvpair_free_structure(nvp);
+ return (value);
+}
+
+void
+nvlist_remove_nvpair(nvlist_t *nvl, nvpair_t *nvp)
+{
+
+ NVLIST_ASSERT(nvl);
+ NVPAIR_ASSERT(nvp);
+ PJDLOG_ASSERT(nvpair_nvlist(nvp) == nvl);
+
+ nvpair_remove(&nvl->nvl_head, nvp, nvl);
+}
+
+void
+nvlist_free(nvlist_t *nvl, const char *name)
+{
+
+ nvlist_free_type(nvl, name, NV_TYPE_NONE);
+}
+
+#define NVLIST_FREE(type, TYPE) \
+void \
+nvlist_free_##type(nvlist_t *nvl, const char *name) \
+{ \
+ \
+ nvlist_free_type(nvl, name, NV_TYPE_##TYPE); \
+}
+
+NVLIST_FREE(null, NULL)
+NVLIST_FREE(bool, BOOL)
+NVLIST_FREE(number, NUMBER)
+NVLIST_FREE(string, STRING)
+NVLIST_FREE(nvlist, NVLIST)
+#ifndef _KERNEL
+NVLIST_FREE(descriptor, DESCRIPTOR)
+#endif
+NVLIST_FREE(binary, BINARY)
+
+#undef NVLIST_FREE
+
+void
+nvlist_free_nvpair(nvlist_t *nvl, nvpair_t *nvp)
+{
+
+ NVLIST_ASSERT(nvl);
+ NVPAIR_ASSERT(nvp);
+ PJDLOG_ASSERT(nvpair_nvlist(nvp) == nvl);
+
+ nvlist_remove_nvpair(nvl, nvp);
+ nvpair_free(nvp);
+}
+
Property changes on: trunk/sys/kern/subr_nvlist.c
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/sys/kern/subr_nvpair.c
===================================================================
--- trunk/sys/kern/subr_nvpair.c (rev 0)
+++ trunk/sys/kern/subr_nvpair.c 2018-05-25 21:07:09 UTC (rev 9950)
@@ -0,0 +1,1112 @@
+/* $MidnightBSD$ */
+/*-
+ * Copyright (c) 2009-2013 The FreeBSD Foundation
+ * All rights reserved.
+ *
+ * This software was developed by Pawel Jakub Dawidek under sponsorship from
+ * the FreeBSD Foundation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: stable/10/sys/kern/subr_nvpair.c 292973 2015-12-31 03:28:14Z ngie $");
+
+#include <sys/param.h>
+#include <sys/endian.h>
+#include <sys/queue.h>
+
+#ifdef _KERNEL
+
+#include <sys/errno.h>
+#include <sys/lock.h>
+#include <sys/malloc.h>
+#include <sys/systm.h>
+
+#include <machine/stdarg.h>
+
+#else
+#include <errno.h>
+#include <fcntl.h>
+#include <stdarg.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include "common_impl.h"
+#endif
+
+#ifdef HAVE_PJDLOG
+#include <pjdlog.h>
+#endif
+
+#include <sys/nv.h>
+#include <sys/nv_impl.h>
+#include <sys/nvlist_impl.h>
+#include <sys/nvpair_impl.h>
+
+#ifndef HAVE_PJDLOG
+#ifdef _KERNEL
+#define PJDLOG_ASSERT(...) MPASS(__VA_ARGS__)
+#define PJDLOG_RASSERT(expr, ...) KASSERT(expr, (__VA_ARGS__))
+#define PJDLOG_ABORT(...) panic(__VA_ARGS__)
+#else
+#include <assert.h>
+#define PJDLOG_ASSERT(...) assert(__VA_ARGS__)
+#define PJDLOG_RASSERT(expr, ...) assert(expr)
+#define PJDLOG_ABORT(...) abort()
+#endif
+#endif
+
+#define NVPAIR_MAGIC 0x6e7670 /* "nvp" */
+struct nvpair {
+ int nvp_magic;
+ char *nvp_name;
+ int nvp_type;
+ uint64_t nvp_data;
+ size_t nvp_datasize;
+ nvlist_t *nvp_list;
+ TAILQ_ENTRY(nvpair) nvp_next;
+};
+
+#define NVPAIR_ASSERT(nvp) do { \
+ PJDLOG_ASSERT((nvp) != NULL); \
+ PJDLOG_ASSERT((nvp)->nvp_magic == NVPAIR_MAGIC); \
+} while (0)
+
+struct nvpair_header {
+ uint8_t nvph_type;
+ uint16_t nvph_namesize;
+ uint64_t nvph_datasize;
+} __packed;
+
+
+void
+nvpair_assert(const nvpair_t *nvp)
+{
+
+ NVPAIR_ASSERT(nvp);
+}
+
+nvlist_t *
+nvpair_nvlist(const nvpair_t *nvp)
+{
+
+ NVPAIR_ASSERT(nvp);
+
+ return (nvp->nvp_list);
+}
+
+nvpair_t *
+nvpair_next(const nvpair_t *nvp)
+{
+
+ NVPAIR_ASSERT(nvp);
+ PJDLOG_ASSERT(nvp->nvp_list != NULL);
+
+ return (TAILQ_NEXT(nvp, nvp_next));
+}
+
+nvpair_t *
+nvpair_prev(const nvpair_t *nvp)
+{
+
+ NVPAIR_ASSERT(nvp);
+ PJDLOG_ASSERT(nvp->nvp_list != NULL);
+
+ return (TAILQ_PREV(nvp, nvl_head, nvp_next));
+}
+
+void
+nvpair_insert(struct nvl_head *head, nvpair_t *nvp, nvlist_t *nvl)
+{
+
+ NVPAIR_ASSERT(nvp);
+ PJDLOG_ASSERT(nvp->nvp_list == NULL);
+ PJDLOG_ASSERT(!nvlist_exists(nvl, nvpair_name(nvp)));
+
+ TAILQ_INSERT_TAIL(head, nvp, nvp_next);
+ nvp->nvp_list = nvl;
+}
+
+static void
+nvpair_remove_nvlist(nvpair_t *nvp)
+{
+ nvlist_t *nvl;
+
+ /* XXX: DECONST is bad, mkay? */
+ nvl = __DECONST(nvlist_t *, nvpair_get_nvlist(nvp));
+ PJDLOG_ASSERT(nvl != NULL);
+ nvlist_set_parent(nvl, NULL);
+}
+
+void
+nvpair_remove(struct nvl_head *head, nvpair_t *nvp, const nvlist_t *nvl)
+{
+
+ NVPAIR_ASSERT(nvp);
+ PJDLOG_ASSERT(nvp->nvp_list == nvl);
+
+ if (nvpair_type(nvp) == NV_TYPE_NVLIST)
+ nvpair_remove_nvlist(nvp);
+
+ TAILQ_REMOVE(head, nvp, nvp_next);
+ nvp->nvp_list = NULL;
+}
+
+nvpair_t *
+nvpair_clone(const nvpair_t *nvp)
+{
+ nvpair_t *newnvp;
+ const char *name;
+ const void *data;
+ size_t datasize;
+
+ NVPAIR_ASSERT(nvp);
+
+ name = nvpair_name(nvp);
+
+ switch (nvpair_type(nvp)) {
+ case NV_TYPE_NULL:
+ newnvp = nvpair_create_null(name);
+ break;
+ case NV_TYPE_BOOL:
+ newnvp = nvpair_create_bool(name, nvpair_get_bool(nvp));
+ break;
+ case NV_TYPE_NUMBER:
+ newnvp = nvpair_create_number(name, nvpair_get_number(nvp));
+ break;
+ case NV_TYPE_STRING:
+ newnvp = nvpair_create_string(name, nvpair_get_string(nvp));
+ break;
+ case NV_TYPE_NVLIST:
+ newnvp = nvpair_create_nvlist(name, nvpair_get_nvlist(nvp));
+ break;
+#ifndef _KERNEL
+ case NV_TYPE_DESCRIPTOR:
+ newnvp = nvpair_create_descriptor(name,
+ nvpair_get_descriptor(nvp));
+ break;
+#endif
+ case NV_TYPE_BINARY:
+ data = nvpair_get_binary(nvp, &datasize);
+ newnvp = nvpair_create_binary(name, data, datasize);
+ break;
+ default:
+ PJDLOG_ABORT("Unknown type: %d.", nvpair_type(nvp));
+ }
+
+ return (newnvp);
+}
+
+size_t
+nvpair_header_size(void)
+{
+
+ return (sizeof(struct nvpair_header));
+}
+
+size_t
+nvpair_size(const nvpair_t *nvp)
+{
+
+ NVPAIR_ASSERT(nvp);
+
+ return (nvp->nvp_datasize);
+}
+
+unsigned char *
+nvpair_pack_header(const nvpair_t *nvp, unsigned char *ptr, size_t *leftp)
+{
+ struct nvpair_header nvphdr;
+ size_t namesize;
+
+ NVPAIR_ASSERT(nvp);
+
+ nvphdr.nvph_type = nvp->nvp_type;
+ namesize = strlen(nvp->nvp_name) + 1;
+ PJDLOG_ASSERT(namesize > 0 && namesize <= UINT16_MAX);
+ nvphdr.nvph_namesize = namesize;
+ nvphdr.nvph_datasize = nvp->nvp_datasize;
+ PJDLOG_ASSERT(*leftp >= sizeof(nvphdr));
+ memcpy(ptr, &nvphdr, sizeof(nvphdr));
+ ptr += sizeof(nvphdr);
+ *leftp -= sizeof(nvphdr);
+
+ PJDLOG_ASSERT(*leftp >= namesize);
+ memcpy(ptr, nvp->nvp_name, namesize);
+ ptr += namesize;
+ *leftp -= namesize;
+
+ return (ptr);
+}
+
+unsigned char *
+nvpair_pack_null(const nvpair_t *nvp, unsigned char *ptr,
+ size_t *leftp __unused)
+{
+
+ NVPAIR_ASSERT(nvp);
+ PJDLOG_ASSERT(nvp->nvp_type == NV_TYPE_NULL);
+
+ return (ptr);
+}
+
+unsigned char *
+nvpair_pack_bool(const nvpair_t *nvp, unsigned char *ptr, size_t *leftp)
+{
+ uint8_t value;
+
+ NVPAIR_ASSERT(nvp);
+ PJDLOG_ASSERT(nvp->nvp_type == NV_TYPE_BOOL);
+
+ value = (uint8_t)nvp->nvp_data;
+
+ PJDLOG_ASSERT(*leftp >= sizeof(value));
+ memcpy(ptr, &value, sizeof(value));
+ ptr += sizeof(value);
+ *leftp -= sizeof(value);
+
+ return (ptr);
+}
+
+unsigned char *
+nvpair_pack_number(const nvpair_t *nvp, unsigned char *ptr, size_t *leftp)
+{
+ uint64_t value;
+
+ NVPAIR_ASSERT(nvp);
+ PJDLOG_ASSERT(nvp->nvp_type == NV_TYPE_NUMBER);
+
+ value = (uint64_t)nvp->nvp_data;
+
+ PJDLOG_ASSERT(*leftp >= sizeof(value));
+ memcpy(ptr, &value, sizeof(value));
+ ptr += sizeof(value);
+ *leftp -= sizeof(value);
+
+ return (ptr);
+}
+
+unsigned char *
+nvpair_pack_string(const nvpair_t *nvp, unsigned char *ptr, size_t *leftp)
+{
+
+ NVPAIR_ASSERT(nvp);
+ PJDLOG_ASSERT(nvp->nvp_type == NV_TYPE_STRING);
+
+ PJDLOG_ASSERT(*leftp >= nvp->nvp_datasize);
+ memcpy(ptr, (const void *)(intptr_t)nvp->nvp_data, nvp->nvp_datasize);
+ ptr += nvp->nvp_datasize;
+ *leftp -= nvp->nvp_datasize;
+
+ return (ptr);
+}
+
+unsigned char *
+nvpair_pack_nvlist_up(unsigned char *ptr, size_t *leftp)
+{
+ struct nvpair_header nvphdr;
+ size_t namesize;
+ const char *name = "";
+
+ namesize = 1;
+ nvphdr.nvph_type = NV_TYPE_NVLIST_UP;
+ nvphdr.nvph_namesize = namesize;
+ nvphdr.nvph_datasize = 0;
+ PJDLOG_ASSERT(*leftp >= sizeof(nvphdr));
+ memcpy(ptr, &nvphdr, sizeof(nvphdr));
+ ptr += sizeof(nvphdr);
+ *leftp -= sizeof(nvphdr);
+
+ PJDLOG_ASSERT(*leftp >= namesize);
+ memcpy(ptr, name, namesize);
+ ptr += namesize;
+ *leftp -= namesize;
+
+ return (ptr);
+}
+
+#ifndef _KERNEL
+unsigned char *
+nvpair_pack_descriptor(const nvpair_t *nvp, unsigned char *ptr, int64_t *fdidxp,
+ size_t *leftp)
+{
+ int64_t value;
+
+ NVPAIR_ASSERT(nvp);
+ PJDLOG_ASSERT(nvp->nvp_type == NV_TYPE_DESCRIPTOR);
+
+ value = (int64_t)nvp->nvp_data;
+ if (value != -1) {
+ /*
+ * If there is a real descriptor here, we change its number
+ * to position in the array of descriptors send via control
+ * message.
+ */
+ PJDLOG_ASSERT(fdidxp != NULL);
+
+ value = *fdidxp;
+ (*fdidxp)++;
+ }
+
+ PJDLOG_ASSERT(*leftp >= sizeof(value));
+ memcpy(ptr, &value, sizeof(value));
+ ptr += sizeof(value);
+ *leftp -= sizeof(value);
+
+ return (ptr);
+}
+#endif
+
+unsigned char *
+nvpair_pack_binary(const nvpair_t *nvp, unsigned char *ptr, size_t *leftp)
+{
+
+ NVPAIR_ASSERT(nvp);
+ PJDLOG_ASSERT(nvp->nvp_type == NV_TYPE_BINARY);
+
+ PJDLOG_ASSERT(*leftp >= nvp->nvp_datasize);
+ memcpy(ptr, (const void *)(intptr_t)nvp->nvp_data, nvp->nvp_datasize);
+ ptr += nvp->nvp_datasize;
+ *leftp -= nvp->nvp_datasize;
+
+ return (ptr);
+}
+
+void
+nvpair_init_datasize(nvpair_t *nvp)
+{
+
+ NVPAIR_ASSERT(nvp);
+
+ if (nvp->nvp_type == NV_TYPE_NVLIST) {
+ if (nvp->nvp_data == 0) {
+ nvp->nvp_datasize = 0;
+ } else {
+ nvp->nvp_datasize =
+ nvlist_size((const nvlist_t *)(intptr_t)nvp->nvp_data);
+ }
+ }
+}
+
+const unsigned char *
+nvpair_unpack_header(bool isbe, nvpair_t *nvp, const unsigned char *ptr,
+ size_t *leftp)
+{
+ struct nvpair_header nvphdr;
+
+ if (*leftp < sizeof(nvphdr))
+ goto failed;
+
+ memcpy(&nvphdr, ptr, sizeof(nvphdr));
+ ptr += sizeof(nvphdr);
+ *leftp -= sizeof(nvphdr);
+
+#if NV_TYPE_FIRST > 0
+ if (nvphdr.nvph_type < NV_TYPE_FIRST)
+ goto failed;
+#endif
+ if (nvphdr.nvph_type > NV_TYPE_LAST &&
+ nvphdr.nvph_type != NV_TYPE_NVLIST_UP) {
+ goto failed;
+ }
+
+#if BYTE_ORDER == BIG_ENDIAN
+ if (!isbe) {
+ nvphdr.nvph_namesize = le16toh(nvphdr.nvph_namesize);
+ nvphdr.nvph_datasize = le64toh(nvphdr.nvph_datasize);
+ }
+#else
+ if (isbe) {
+ nvphdr.nvph_namesize = be16toh(nvphdr.nvph_namesize);
+ nvphdr.nvph_datasize = be64toh(nvphdr.nvph_datasize);
+ }
+#endif
+
+ if (nvphdr.nvph_namesize > NV_NAME_MAX)
+ goto failed;
+ if (*leftp < nvphdr.nvph_namesize)
+ goto failed;
+ if (nvphdr.nvph_namesize < 1)
+ goto failed;
+ if (strnlen((const char *)ptr, nvphdr.nvph_namesize) !=
+ (size_t)(nvphdr.nvph_namesize - 1)) {
+ goto failed;
+ }
+
+ memcpy(nvp->nvp_name, ptr, nvphdr.nvph_namesize);
+ ptr += nvphdr.nvph_namesize;
+ *leftp -= nvphdr.nvph_namesize;
+
+ if (*leftp < nvphdr.nvph_datasize)
+ goto failed;
+
+ nvp->nvp_type = nvphdr.nvph_type;
+ nvp->nvp_data = 0;
+ nvp->nvp_datasize = nvphdr.nvph_datasize;
+
+ return (ptr);
+failed:
+ RESTORE_ERRNO(EINVAL);
+ return (NULL);
+}
+
+const unsigned char *
+nvpair_unpack_null(bool isbe __unused, nvpair_t *nvp, const unsigned char *ptr,
+ size_t *leftp __unused)
+{
+
+ PJDLOG_ASSERT(nvp->nvp_type == NV_TYPE_NULL);
+
+ if (nvp->nvp_datasize != 0) {
+ RESTORE_ERRNO(EINVAL);
+ return (NULL);
+ }
+
+ return (ptr);
+}
+
+const unsigned char *
+nvpair_unpack_bool(bool isbe __unused, nvpair_t *nvp, const unsigned char *ptr,
+ size_t *leftp)
+{
+ uint8_t value;
+
+ PJDLOG_ASSERT(nvp->nvp_type == NV_TYPE_BOOL);
+
+ if (nvp->nvp_datasize != sizeof(value)) {
+ RESTORE_ERRNO(EINVAL);
+ return (NULL);
+ }
+ if (*leftp < sizeof(value)) {
+ RESTORE_ERRNO(EINVAL);
+ return (NULL);
+ }
+
+ memcpy(&value, ptr, sizeof(value));
+ ptr += sizeof(value);
+ *leftp -= sizeof(value);
+
+ if (value != 0 && value != 1) {
+ RESTORE_ERRNO(EINVAL);
+ return (NULL);
+ }
+
+ nvp->nvp_data = (uint64_t)value;
+
+ return (ptr);
+}
+
+const unsigned char *
+nvpair_unpack_number(bool isbe, nvpair_t *nvp, const unsigned char *ptr,
+ size_t *leftp)
+{
+
+ PJDLOG_ASSERT(nvp->nvp_type == NV_TYPE_NUMBER);
+
+ if (nvp->nvp_datasize != sizeof(uint64_t)) {
+ RESTORE_ERRNO(EINVAL);
+ return (NULL);
+ }
+ if (*leftp < sizeof(uint64_t)) {
+ RESTORE_ERRNO(EINVAL);
+ return (NULL);
+ }
+
+ if (isbe)
+ nvp->nvp_data = be64dec(ptr);
+ else
+ nvp->nvp_data = le64dec(ptr);
+ ptr += sizeof(uint64_t);
+ *leftp -= sizeof(uint64_t);
+
+ return (ptr);
+}
+
+const unsigned char *
+nvpair_unpack_string(bool isbe __unused, nvpair_t *nvp,
+ const unsigned char *ptr, size_t *leftp)
+{
+
+ PJDLOG_ASSERT(nvp->nvp_type == NV_TYPE_STRING);
+
+ if (*leftp < nvp->nvp_datasize || nvp->nvp_datasize == 0) {
+ RESTORE_ERRNO(EINVAL);
+ return (NULL);
+ }
+
+ if (strnlen((const char *)ptr, nvp->nvp_datasize) !=
+ nvp->nvp_datasize - 1) {
+ RESTORE_ERRNO(EINVAL);
+ return (NULL);
+ }
+
+ nvp->nvp_data = (uint64_t)(uintptr_t)nv_strdup((const char *)ptr);
+ if (nvp->nvp_data == 0)
+ return (NULL);
+
+ ptr += nvp->nvp_datasize;
+ *leftp -= nvp->nvp_datasize;
+
+ return (ptr);
+}
+
+const unsigned char *
+nvpair_unpack_nvlist(bool isbe __unused, nvpair_t *nvp,
+ const unsigned char *ptr, size_t *leftp, size_t nfds, nvlist_t **child)
+{
+ nvlist_t *value;
+
+ PJDLOG_ASSERT(nvp->nvp_type == NV_TYPE_NVLIST);
+
+ if (*leftp < nvp->nvp_datasize || nvp->nvp_datasize == 0) {
+ RESTORE_ERRNO(EINVAL);
+ return (NULL);
+ }
+
+ value = nvlist_create(0);
+ if (value == NULL)
+ return (NULL);
+
+ ptr = nvlist_unpack_header(value, ptr, nfds, NULL, leftp);
+ if (ptr == NULL)
+ return (NULL);
+
+ nvp->nvp_data = (uint64_t)(uintptr_t)value;
+ *child = value;
+
+ return (ptr);
+}
+
+#ifndef _KERNEL
+const unsigned char *
+nvpair_unpack_descriptor(bool isbe, nvpair_t *nvp, const unsigned char *ptr,
+ size_t *leftp, const int *fds, size_t nfds)
+{
+ int64_t idx;
+
+ PJDLOG_ASSERT(nvp->nvp_type == NV_TYPE_DESCRIPTOR);
+
+ if (nvp->nvp_datasize != sizeof(idx)) {
+ errno = EINVAL;
+ return (NULL);
+ }
+ if (*leftp < sizeof(idx)) {
+ errno = EINVAL;
+ return (NULL);
+ }
+
+ if (isbe)
+ idx = be64dec(ptr);
+ else
+ idx = le64dec(ptr);
+
+ if (idx < 0) {
+ errno = EINVAL;
+ return (NULL);
+ }
+
+ if ((size_t)idx >= nfds) {
+ errno = EINVAL;
+ return (NULL);
+ }
+
+ nvp->nvp_data = (uint64_t)fds[idx];
+
+ ptr += sizeof(idx);
+ *leftp -= sizeof(idx);
+
+ return (ptr);
+}
+#endif
+
+const unsigned char *
+nvpair_unpack_binary(bool isbe __unused, nvpair_t *nvp,
+ const unsigned char *ptr, size_t *leftp)
+{
+ void *value;
+
+ PJDLOG_ASSERT(nvp->nvp_type == NV_TYPE_BINARY);
+
+ if (*leftp < nvp->nvp_datasize || nvp->nvp_datasize == 0) {
+ RESTORE_ERRNO(EINVAL);
+ return (NULL);
+ }
+
+ value = nv_malloc(nvp->nvp_datasize);
+ if (value == NULL)
+ return (NULL);
+
+ memcpy(value, ptr, nvp->nvp_datasize);
+ ptr += nvp->nvp_datasize;
+ *leftp -= nvp->nvp_datasize;
+
+ nvp->nvp_data = (uint64_t)(uintptr_t)value;
+
+ return (ptr);
+}
+
+const unsigned char *
+nvpair_unpack(bool isbe, const unsigned char *ptr, size_t *leftp,
+ nvpair_t **nvpp)
+{
+ nvpair_t *nvp, *tmp;
+
+ nvp = nv_calloc(1, sizeof(*nvp) + NV_NAME_MAX);
+ if (nvp == NULL)
+ return (NULL);
+ nvp->nvp_name = (char *)(nvp + 1);
+
+ ptr = nvpair_unpack_header(isbe, nvp, ptr, leftp);
+ if (ptr == NULL)
+ goto failed;
+ tmp = nv_realloc(nvp, sizeof(*nvp) + strlen(nvp->nvp_name) + 1);
+ if (tmp == NULL)
+ goto failed;
+ nvp = tmp;
+
+ /* Update nvp_name after realloc(). */
+ nvp->nvp_name = (char *)(nvp + 1);
+ nvp->nvp_data = 0x00;
+ nvp->nvp_magic = NVPAIR_MAGIC;
+ *nvpp = nvp;
+ return (ptr);
+failed:
+ nv_free(nvp);
+ return (NULL);
+}
+
+int
+nvpair_type(const nvpair_t *nvp)
+{
+
+ NVPAIR_ASSERT(nvp);
+
+ return (nvp->nvp_type);
+}
+
+const char *
+nvpair_name(const nvpair_t *nvp)
+{
+
+ NVPAIR_ASSERT(nvp);
+
+ return (nvp->nvp_name);
+}
+
+static nvpair_t *
+nvpair_allocv(const char *name, int type, uint64_t data, size_t datasize)
+{
+ nvpair_t *nvp;
+ size_t namelen;
+
+ PJDLOG_ASSERT(type >= NV_TYPE_FIRST && type <= NV_TYPE_LAST);
+
+ namelen = strlen(name);
+ if (namelen >= NV_NAME_MAX) {
+ RESTORE_ERRNO(ENAMETOOLONG);
+ return (NULL);
+ }
+
+ nvp = nv_calloc(1, sizeof(*nvp) + namelen + 1);
+ if (nvp != NULL) {
+ nvp->nvp_name = (char *)(nvp + 1);
+ memcpy(nvp->nvp_name, name, namelen);
+ nvp->nvp_name[namelen + 1] = '\0';
+ nvp->nvp_type = type;
+ nvp->nvp_data = data;
+ nvp->nvp_datasize = datasize;
+ nvp->nvp_magic = NVPAIR_MAGIC;
+ }
+
+ return (nvp);
+};
+
+nvpair_t *
+nvpair_create_stringf(const char *name, const char *valuefmt, ...)
+{
+ va_list valueap;
+ nvpair_t *nvp;
+
+ va_start(valueap, valuefmt);
+ nvp = nvpair_create_stringv(name, valuefmt, valueap);
+ va_end(valueap);
+
+ return (nvp);
+}
+
+nvpair_t *
+nvpair_create_stringv(const char *name, const char *valuefmt, va_list valueap)
+{
+ nvpair_t *nvp;
+ char *str;
+ int len;
+
+ len = nv_vasprintf(&str, valuefmt, valueap);
+ if (len < 0)
+ return (NULL);
+ nvp = nvpair_create_string(name, str);
+ if (nvp == NULL)
+ nv_free(str);
+ return (nvp);
+}
+
+nvpair_t *
+nvpair_create_null(const char *name)
+{
+
+ return (nvpair_allocv(name, NV_TYPE_NULL, 0, 0));
+}
+
+nvpair_t *
+nvpair_create_bool(const char *name, bool value)
+{
+
+ return (nvpair_allocv(name, NV_TYPE_BOOL, value ? 1 : 0,
+ sizeof(uint8_t)));
+}
+
+nvpair_t *
+nvpair_create_number(const char *name, uint64_t value)
+{
+
+ return (nvpair_allocv(name, NV_TYPE_NUMBER, value, sizeof(value)));
+}
+
+nvpair_t *
+nvpair_create_string(const char *name, const char *value)
+{
+ nvpair_t *nvp;
+ size_t size;
+ char *data;
+
+ if (value == NULL) {
+ RESTORE_ERRNO(EINVAL);
+ return (NULL);
+ }
+
+ data = nv_strdup(value);
+ if (data == NULL)
+ return (NULL);
+ size = strlen(value) + 1;
+
+ nvp = nvpair_allocv(name, NV_TYPE_STRING, (uint64_t)(uintptr_t)data,
+ size);
+ if (nvp == NULL)
+ nv_free(data);
+
+ return (nvp);
+}
+
+nvpair_t *
+nvpair_create_nvlist(const char *name, const nvlist_t *value)
+{
+ nvlist_t *nvl;
+ nvpair_t *nvp;
+
+ if (value == NULL) {
+ RESTORE_ERRNO(EINVAL);
+ return (NULL);
+ }
+
+ nvl = nvlist_clone(value);
+ if (nvl == NULL)
+ return (NULL);
+
+ nvp = nvpair_allocv(name, NV_TYPE_NVLIST, (uint64_t)(uintptr_t)nvl, 0);
+ if (nvp == NULL)
+ nvlist_destroy(nvl);
+ else
+ nvlist_set_parent(nvl, nvp);
+
+ return (nvp);
+}
+
+#ifndef _KERNEL
+nvpair_t *
+nvpair_create_descriptor(const char *name, int value)
+{
+ nvpair_t *nvp;
+
+ if (value < 0 || !fd_is_valid(value)) {
+ errno = EBADF;
+ return (NULL);
+ }
+
+ value = fcntl(value, F_DUPFD_CLOEXEC, 0);
+ if (value < 0)
+ return (NULL);
+
+ nvp = nvpair_allocv(name, NV_TYPE_DESCRIPTOR, (uint64_t)value,
+ sizeof(int64_t));
+ if (nvp == NULL)
+ close(value);
+
+ return (nvp);
+}
+#endif
+
+nvpair_t *
+nvpair_create_binary(const char *name, const void *value, size_t size)
+{
+ nvpair_t *nvp;
+ void *data;
+
+ if (value == NULL || size == 0) {
+ RESTORE_ERRNO(EINVAL);
+ return (NULL);
+ }
+
+ data = nv_malloc(size);
+ if (data == NULL)
+ return (NULL);
+ memcpy(data, value, size);
+
+ nvp = nvpair_allocv(name, NV_TYPE_BINARY, (uint64_t)(uintptr_t)data,
+ size);
+ if (nvp == NULL)
+ nv_free(data);
+
+ return (nvp);
+}
+
+nvpair_t *
+nvpair_move_string(const char *name, char *value)
+{
+ nvpair_t *nvp;
+ int serrno;
+
+ if (value == NULL) {
+ RESTORE_ERRNO(EINVAL);
+ return (NULL);
+ }
+
+ nvp = nvpair_allocv(name, NV_TYPE_STRING, (uint64_t)(uintptr_t)value,
+ strlen(value) + 1);
+ if (nvp == NULL) {
+ SAVE_ERRNO(serrno);
+ nv_free(value);
+ RESTORE_ERRNO(serrno);
+ }
+
+ return (nvp);
+}
+
+nvpair_t *
+nvpair_move_nvlist(const char *name, nvlist_t *value)
+{
+ nvpair_t *nvp;
+
+ if (value == NULL || nvlist_get_nvpair_parent(value) != NULL) {
+ RESTORE_ERRNO(EINVAL);
+ return (NULL);
+ }
+
+ if (nvlist_error(value) != 0) {
+ RESTORE_ERRNO(nvlist_error(value));
+ nvlist_destroy(value);
+ return (NULL);
+ }
+
+ nvp = nvpair_allocv(name, NV_TYPE_NVLIST, (uint64_t)(uintptr_t)value,
+ 0);
+ if (nvp == NULL)
+ nvlist_destroy(value);
+ else
+ nvlist_set_parent(value, nvp);
+
+ return (nvp);
+}
+
+#ifndef _KERNEL
+nvpair_t *
+nvpair_move_descriptor(const char *name, int value)
+{
+ nvpair_t *nvp;
+ int serrno;
+
+ if (value < 0 || !fd_is_valid(value)) {
+ errno = EBADF;
+ return (NULL);
+ }
+
+ nvp = nvpair_allocv(name, NV_TYPE_DESCRIPTOR, (uint64_t)value,
+ sizeof(int64_t));
+ if (nvp == NULL) {
+ serrno = errno;
+ close(value);
+ errno = serrno;
+ }
+
+ return (nvp);
+}
+#endif
+
+nvpair_t *
+nvpair_move_binary(const char *name, void *value, size_t size)
+{
+ nvpair_t *nvp;
+ int serrno;
+
+ if (value == NULL || size == 0) {
+ RESTORE_ERRNO(EINVAL);
+ return (NULL);
+ }
+
+ nvp = nvpair_allocv(name, NV_TYPE_BINARY, (uint64_t)(uintptr_t)value,
+ size);
+ if (nvp == NULL) {
+ SAVE_ERRNO(serrno);
+ nv_free(value);
+ RESTORE_ERRNO(serrno);
+ }
+
+ return (nvp);
+}
+
+bool
+nvpair_get_bool(const nvpair_t *nvp)
+{
+
+ NVPAIR_ASSERT(nvp);
+
+ return (nvp->nvp_data == 1);
+}
+
+uint64_t
+nvpair_get_number(const nvpair_t *nvp)
+{
+
+ NVPAIR_ASSERT(nvp);
+
+ return (nvp->nvp_data);
+}
+
+const char *
+nvpair_get_string(const nvpair_t *nvp)
+{
+
+ NVPAIR_ASSERT(nvp);
+ PJDLOG_ASSERT(nvp->nvp_type == NV_TYPE_STRING);
+
+ return ((const char *)(intptr_t)nvp->nvp_data);
+}
+
+const nvlist_t *
+nvpair_get_nvlist(const nvpair_t *nvp)
+{
+
+ NVPAIR_ASSERT(nvp);
+ PJDLOG_ASSERT(nvp->nvp_type == NV_TYPE_NVLIST);
+
+ return ((const nvlist_t *)(intptr_t)nvp->nvp_data);
+}
+
+#ifndef _KERNEL
+int
+nvpair_get_descriptor(const nvpair_t *nvp)
+{
+
+ NVPAIR_ASSERT(nvp);
+ PJDLOG_ASSERT(nvp->nvp_type == NV_TYPE_DESCRIPTOR);
+
+ return ((int)nvp->nvp_data);
+}
+#endif
+
+const void *
+nvpair_get_binary(const nvpair_t *nvp, size_t *sizep)
+{
+
+ NVPAIR_ASSERT(nvp);
+ PJDLOG_ASSERT(nvp->nvp_type == NV_TYPE_BINARY);
+
+ if (sizep != NULL)
+ *sizep = nvp->nvp_datasize;
+ return ((const void *)(intptr_t)nvp->nvp_data);
+}
+
+void
+nvpair_free(nvpair_t *nvp)
+{
+
+ NVPAIR_ASSERT(nvp);
+ PJDLOG_ASSERT(nvp->nvp_list == NULL);
+
+ nvp->nvp_magic = 0;
+ switch (nvp->nvp_type) {
+#ifndef _KERNEL
+ case NV_TYPE_DESCRIPTOR:
+ close((int)nvp->nvp_data);
+ break;
+#endif
+ case NV_TYPE_NVLIST:
+ nvlist_destroy((nvlist_t *)(intptr_t)nvp->nvp_data);
+ break;
+ case NV_TYPE_STRING:
+ nv_free((char *)(intptr_t)nvp->nvp_data);
+ break;
+ case NV_TYPE_BINARY:
+ nv_free((void *)(intptr_t)nvp->nvp_data);
+ break;
+ }
+ nv_free(nvp);
+}
+
+void
+nvpair_free_structure(nvpair_t *nvp)
+{
+
+ NVPAIR_ASSERT(nvp);
+ PJDLOG_ASSERT(nvp->nvp_list == NULL);
+
+ nvp->nvp_magic = 0;
+ nv_free(nvp);
+}
+
+const char *
+nvpair_type_string(int type)
+{
+
+ switch (type) {
+ case NV_TYPE_NULL:
+ return ("NULL");
+ case NV_TYPE_BOOL:
+ return ("BOOL");
+ case NV_TYPE_NUMBER:
+ return ("NUMBER");
+ case NV_TYPE_STRING:
+ return ("STRING");
+ case NV_TYPE_NVLIST:
+ return ("NVLIST");
+ case NV_TYPE_DESCRIPTOR:
+ return ("DESCRIPTOR");
+ case NV_TYPE_BINARY:
+ return ("BINARY");
+ default:
+ return ("<UNKNOWN>");
+ }
+}
+
Property changes on: trunk/sys/kern/subr_nvpair.c
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
More information about the Midnightbsd-cvs
mailing list