[Midnightbsd-cvs] src [9963] trunk/sys/kern: sync with freebsd 10-stable

laffer1 at midnightbsd.org laffer1 at midnightbsd.org
Sat May 26 10:34:14 EDT 2018


Revision: 9963
          http://svnweb.midnightbsd.org/src/?rev=9963
Author:   laffer1
Date:     2018-05-26 10:34:14 -0400 (Sat, 26 May 2018)
Log Message:
-----------
sync with freebsd 10-stable

Modified Paths:
--------------
    trunk/sys/kern/kern_rangelock.c
    trunk/sys/kern/kern_resource.c
    trunk/sys/kern/kern_shutdown.c

Modified: trunk/sys/kern/kern_rangelock.c
===================================================================
--- trunk/sys/kern/kern_rangelock.c	2018-05-26 14:33:11 UTC (rev 9962)
+++ trunk/sys/kern/kern_rangelock.c	2018-05-26 14:34:14 UTC (rev 9963)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * Copyright (c) 2009 Konstantin Belousov <kib at FreeBSD.org>
  * All rights reserved.
@@ -25,7 +26,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/kern/kern_rangelock.c 254380 2013-08-15 20:19:17Z cperciva $");
 
 #include <sys/param.h>
 #include <sys/kernel.h>
@@ -84,20 +85,14 @@
 }
 
 /*
- * Verifies the supplied rl_q_entries for compatibility.  Returns true
- * if the rangelock queue entries are not compatible, false if they are.
- *
  * Two entries are compatible if their ranges do not overlap, or both
  * entries are for read.
  */
 static int
-rangelock_incompatible(const struct rl_q_entry *e1,
+ranges_overlap(const struct rl_q_entry *e1,
     const struct rl_q_entry *e2)
 {
 
-	if ((e1->rl_q_flags & RL_LOCK_TYPE_MASK) == RL_LOCK_READ &&
-	    (e2->rl_q_flags & RL_LOCK_TYPE_MASK) == RL_LOCK_READ)
-		return (0);
 	if (e1->rl_q_start < e2->rl_q_end && e1->rl_q_end > e2->rl_q_start)
 		return (1);
 	return (0);
@@ -109,30 +104,38 @@
 static void
 rangelock_calc_block(struct rangelock *lock)
 {
-	struct rl_q_entry *entry, *entry1, *whead;
+	struct rl_q_entry *entry, *nextentry, *entry1;
 
-	if (lock->rl_currdep == TAILQ_FIRST(&lock->rl_waiters) &&
-	    lock->rl_currdep != NULL)
-		lock->rl_currdep = TAILQ_NEXT(lock->rl_currdep, rl_q_link);
-	for (entry = lock->rl_currdep; entry != NULL;
-	     entry = TAILQ_NEXT(entry, rl_q_link)) {
-		TAILQ_FOREACH(entry1, &lock->rl_waiters, rl_q_link) {
-			if (rangelock_incompatible(entry, entry1))
-				goto out;
-			if (entry1 == entry)
-				break;
+	for (entry = lock->rl_currdep; entry != NULL; entry = nextentry) {
+		nextentry = TAILQ_NEXT(entry, rl_q_link);
+		if (entry->rl_q_flags & RL_LOCK_READ) {
+			/* Reads must not overlap with granted writes. */
+			for (entry1 = TAILQ_FIRST(&lock->rl_waiters);
+			    !(entry1->rl_q_flags & RL_LOCK_READ);
+			    entry1 = TAILQ_NEXT(entry1, rl_q_link)) {
+				if (ranges_overlap(entry, entry1))
+					goto out;
+			}
+		} else {
+			/* Write must not overlap with any granted locks. */
+			for (entry1 = TAILQ_FIRST(&lock->rl_waiters);
+			    entry1 != entry;
+			    entry1 = TAILQ_NEXT(entry1, rl_q_link)) {
+				if (ranges_overlap(entry, entry1))
+					goto out;
+			}
+
+			/* Move grantable write locks to the front. */
+			TAILQ_REMOVE(&lock->rl_waiters, entry, rl_q_link);
+			TAILQ_INSERT_HEAD(&lock->rl_waiters, entry, rl_q_link);
 		}
+
+		/* Grant this lock. */
+		entry->rl_q_flags |= RL_LOCK_GRANTED;
+		wakeup(entry);
 	}
 out:
 	lock->rl_currdep = entry;
-	TAILQ_FOREACH(whead, &lock->rl_waiters, rl_q_link) {
-		if (whead == lock->rl_currdep)
-			break;
-		if (!(whead->rl_q_flags & RL_LOCK_GRANTED)) {
-			whead->rl_q_flags |= RL_LOCK_GRANTED;
-			wakeup(whead);
-		}
-	}
 }
 
 static void

Modified: trunk/sys/kern/kern_resource.c
===================================================================
--- trunk/sys/kern/kern_resource.c	2018-05-26 14:33:11 UTC (rev 9962)
+++ trunk/sys/kern/kern_resource.c	2018-05-26 14:34:14 UTC (rev 9963)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * Copyright (c) 1982, 1986, 1991, 1993
  *	The Regents of the University of California.  All rights reserved.
@@ -35,7 +36,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$MidnightBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/kern/kern_resource.c 293473 2016-01-09 14:08:10Z dchagin $");
 
 #include "opt_compat.h"
 
@@ -80,6 +81,8 @@
 static struct uidinfo *uilookup(uid_t uid);
 static void	ruxagg_locked(struct rusage_ext *rux, struct thread *td);
 
+static __inline int	lim_shared(struct plimit *limp);
+
 /*
  * Resource controls and accounting.
  */
@@ -469,8 +472,7 @@
 int
 rtp_to_pri(struct rtprio *rtp, struct thread *td)
 {
-	u_char	newpri;
-	u_char	oldpri;
+	u_char  newpri, oldclass, oldpri;
 
 	switch (RTP_PRIO_BASE(rtp->type)) {
 	case RTP_PRIO_REALTIME:
@@ -493,11 +495,12 @@
 	}
 
 	thread_lock(td);
+	oldclass = td->td_pri_class;
 	sched_class(td, rtp->type);	/* XXX fix */
 	oldpri = td->td_user_pri;
 	sched_user_prio(td, newpri);
-	if (td->td_user_pri != oldpri && (td == curthread ||
-	    td->td_priority == oldpri || td->td_user_pri <= PRI_MAX_REALTIME))
+	if (td->td_user_pri != oldpri && (oldclass != RTP_PRIO_NORMAL ||
+	    td->td_pri_class != RTP_PRIO_NORMAL))
 		sched_prio(td, td->td_user_pri);
 	if (TD_ON_UPILOCK(td) && oldpri != newpri) {
 		critical_enter();
@@ -629,11 +632,11 @@
 	 */
 	if (p->p_cpulimit == RLIM_INFINITY)
 		return;
-	PROC_SLOCK(p);
+	PROC_STATLOCK(p);
 	FOREACH_THREAD_IN_PROC(p, td) {
 		ruxagg(p, td);
 	}
-	PROC_SUNLOCK(p);
+	PROC_STATUNLOCK(p);
 	if (p->p_rux.rux_runtime > p->p_cpulimit * cpu_tickrate()) {
 		lim_rlimit(p, RLIMIT_CPU, &rlim);
 		if (p->p_rux.rux_runtime >= rlim.rlim_max * cpu_tickrate()) {
@@ -645,7 +648,8 @@
 		}
 	}
 	if ((p->p_flag & P_WEXIT) == 0)
-		callout_reset(&p->p_limco, hz, lim_cb, p);
+		callout_reset_sbt(&p->p_limco, SBT_1S, 0,
+		    lim_cb, p, C_PREL(1));
 }
 
 int
@@ -676,8 +680,13 @@
 		limp->rlim_max = RLIM_INFINITY;
 
 	oldssiz.rlim_cur = 0;
-	newlim = lim_alloc();
+	newlim = NULL;
 	PROC_LOCK(p);
+	if (lim_shared(p->p_limit)) {
+		PROC_UNLOCK(p);
+		newlim = lim_alloc();
+		PROC_LOCK(p);
+	}
 	oldlim = p->p_limit;
 	alimp = &oldlim->pl_rlimit[which];
 	if (limp->rlim_cur > alimp->rlim_max ||
@@ -684,13 +693,16 @@
 	    limp->rlim_max > alimp->rlim_max)
 		if ((error = priv_check(td, PRIV_PROC_SETRLIMIT))) {
 			PROC_UNLOCK(p);
-			lim_free(newlim);
+			if (newlim != NULL)
+				lim_free(newlim);
 			return (error);
 		}
 	if (limp->rlim_cur > limp->rlim_max)
 		limp->rlim_cur = limp->rlim_max;
-	lim_copy(newlim, oldlim);
-	alimp = &newlim->pl_rlimit[which];
+	if (newlim != NULL) {
+		lim_copy(newlim, oldlim);
+		alimp = &newlim->pl_rlimit[which];
+	}
 
 	switch (which) {
 
@@ -697,7 +709,8 @@
 	case RLIMIT_CPU:
 		if (limp->rlim_cur != RLIM_INFINITY &&
 		    p->p_cpulimit == RLIM_INFINITY)
-			callout_reset(&p->p_limco, hz, lim_cb, p);
+			callout_reset_sbt(&p->p_limco, SBT_1S, 0,
+			    lim_cb, p, C_PREL(1));
 		p->p_cpulimit = limp->rlim_cur;
 		break;
 	case RLIMIT_DATA:
@@ -739,11 +752,18 @@
 	if (p->p_sysent->sv_fixlimit != NULL)
 		p->p_sysent->sv_fixlimit(limp, which);
 	*alimp = *limp;
-	p->p_limit = newlim;
+	if (newlim != NULL)
+		p->p_limit = newlim;
 	PROC_UNLOCK(p);
-	lim_free(oldlim);
+	if (newlim != NULL)
+		lim_free(oldlim);
 
-	if (which == RLIMIT_STACK) {
+	if (which == RLIMIT_STACK &&
+	    /*
+	     * Skip calls from exec_new_vmspace(), done when stack is
+	     * not mapped yet.
+	     */
+	    (td != curthread || (p->p_flag & P_INEXEC) == 0)) {
 		/*
 		 * Stack is allocated to the max at exec time with only
 		 * "rlim_cur" bytes accessible.  If stack limit is going
@@ -828,7 +848,7 @@
 	uint64_t runtime, u;
 
 	PROC_LOCK_ASSERT(p, MA_OWNED);
-	PROC_SLOCK_ASSERT(p, MA_OWNED);
+	PROC_STATLOCK_ASSERT(p, MA_OWNED);
 	/*
 	 * If we are getting stats for the current process, then add in the
 	 * stats that this thread has accumulated in its current time slice.
@@ -860,7 +880,7 @@
 	uint64_t runtime, u;
 
 	p = td->td_proc;
-	PROC_SLOCK_ASSERT(p, MA_OWNED);
+	PROC_STATLOCK_ASSERT(p, MA_OWNED);
 	THREAD_LOCK_ASSERT(td, MA_OWNED);
 	/*
 	 * If we are getting stats for the current thread, then add in the
@@ -996,11 +1016,11 @@
 		break;
 
 	case RUSAGE_THREAD:
-		PROC_SLOCK(p);
+		PROC_STATLOCK(p);
 		thread_lock(td);
 		rufetchtd(td, rup);
 		thread_unlock(td);
-		PROC_SUNLOCK(p);
+		PROC_STATUNLOCK(p);
 		break;
 
 	default:
@@ -1047,7 +1067,7 @@
 {
 
 	THREAD_LOCK_ASSERT(td, MA_OWNED);
-	PROC_SLOCK_ASSERT(td->td_proc, MA_OWNED);
+	PROC_STATLOCK_ASSERT(td->td_proc, MA_OWNED);
 	rux->rux_runtime += td->td_incruntime;
 	rux->rux_uticks += td->td_uticks;
 	rux->rux_sticks += td->td_sticks;
@@ -1077,7 +1097,7 @@
 {
 	struct thread *td;
 
-	PROC_SLOCK_ASSERT(p, MA_OWNED);
+	PROC_STATLOCK_ASSERT(p, MA_OWNED);
 
 	*ru = p->p_ru;
 	if (p->p_numthreads > 0)  {
@@ -1098,10 +1118,10 @@
     struct timeval *sp)
 {
 
-	PROC_SLOCK(p);
+	PROC_STATLOCK(p);
 	rufetch(p, ru);
 	calcru(p, up, sp);
-	PROC_SUNLOCK(p);
+	PROC_STATUNLOCK(p);
 }
 
 /*
@@ -1127,13 +1147,26 @@
 	return (limp);
 }
 
+static __inline int
+lim_shared(limp)
+	struct plimit *limp;
+{
+
+	return (limp->pl_refcnt > 1);
+}
+
 void
 lim_fork(struct proc *p1, struct proc *p2)
 {
+
+	PROC_LOCK_ASSERT(p1, MA_OWNED);
+	PROC_LOCK_ASSERT(p2, MA_OWNED);
+
 	p2->p_limit = lim_hold(p1->p_limit);
 	callout_init_mtx(&p2->p_limco, &p2->p_mtx, 0);
 	if (p1->p_cpulimit != RLIM_INFINITY)
-		callout_reset(&p2->p_limco, hz, lim_cb, p2);
+		callout_reset_sbt(&p2->p_limco, SBT_1S, 0,
+		    lim_cb, p2, C_PREL(1));
 }
 
 void
@@ -1155,7 +1188,7 @@
 	struct plimit *dst, *src;
 {
 
-	KASSERT(dst->pl_refcnt == 1, ("lim_copy to shared limit"));
+	KASSERT(!lim_shared(dst), ("lim_copy to shared limit"));
 	bcopy(src->pl_rlimit, dst->pl_rlimit, sizeof(src->pl_rlimit));
 }
 
@@ -1425,21 +1458,3 @@
 	}
 	return (1);
 }
-
-int
-chgkqcnt(struct uidinfo *uip, int diff, rlim_t max)
-{
-
-	if (diff > 0 && max != 0) {
-		if (atomic_fetchadd_long(&uip->ui_kqcnt, (long)diff) +
-		    diff > max) {
-			atomic_subtract_long(&uip->ui_kqcnt, (long)diff);
-			return (0);
-		}
-	} else {
-		atomic_add_long(&uip->ui_kqcnt, (long)diff);
-		if (uip->ui_kqcnt < 0)
-			printf("negative kqcnt for uid = %d\n", uip->ui_uid);
-	}
-	return (1);
-}

Modified: trunk/sys/kern/kern_shutdown.c
===================================================================
--- trunk/sys/kern/kern_shutdown.c	2018-05-26 14:33:11 UTC (rev 9962)
+++ trunk/sys/kern/kern_shutdown.c	2018-05-26 14:34:14 UTC (rev 9963)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * Copyright (c) 1986, 1988, 1991, 1993
  *	The Regents of the University of California.  All rights reserved.
@@ -35,7 +36,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$MidnightBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/kern/kern_shutdown.c 305853 2016-09-16 00:14:26Z hiren $");
 
 #include "opt_ddb.h"
 #include "opt_kdb.h"
@@ -50,11 +51,13 @@
 #include <sys/conf.h>
 #include <sys/cons.h>
 #include <sys/eventhandler.h>
+#include <sys/filedesc.h>
 #include <sys/jail.h>
 #include <sys/kdb.h>
 #include <sys/kernel.h>
 #include <sys/kerneldump.h>
 #include <sys/kthread.h>
+#include <sys/ktr.h>
 #include <sys/malloc.h>
 #include <sys/mount.h>
 #include <sys/priv.h>
@@ -61,6 +64,7 @@
 #include <sys/proc.h>
 #include <sys/reboot.h>
 #include <sys/resourcevar.h>
+#include <sys/rwlock.h>
 #include <sys/sched.h>
 #include <sys/smp.h>
 #include <sys/sysctl.h>
@@ -87,6 +91,11 @@
 #ifndef PANIC_REBOOT_WAIT_TIME
 #define PANIC_REBOOT_WAIT_TIME 15 /* default to 15 seconds */
 #endif
+static int panic_reboot_wait_time = PANIC_REBOOT_WAIT_TIME;
+SYSCTL_INT(_kern, OID_AUTO, panic_reboot_wait_time, CTLFLAG_RW | CTLFLAG_TUN,
+    &panic_reboot_wait_time, 0,
+    "Seconds to wait before rebooting after a panic");
+TUNABLE_INT("kern.panic_reboot_wait_time", &panic_reboot_wait_time);
 
 /*
  * Note that stdarg.h and the ANSI style va_start macro is used for both
@@ -121,11 +130,6 @@
 	&sync_on_panic, 0, "Do a sync before rebooting from a panic");
 TUNABLE_INT("kern.sync_on_panic", &sync_on_panic);
 
-static int stop_scheduler_on_panic = 1;
-SYSCTL_INT(_kern, OID_AUTO, stop_scheduler_on_panic, CTLFLAG_RW | CTLFLAG_TUN,
-    &stop_scheduler_on_panic, 0, "stop scheduler upon entering panic");
-TUNABLE_INT("kern.stop_scheduler_on_panic", &stop_scheduler_on_panic);
-
 static SYSCTL_NODE(_kern, OID_AUTO, shutdown, CTLFLAG_RW, 0,
     "Shutdown environment");
 
@@ -137,6 +141,10 @@
 SYSCTL_INT(_kern_shutdown, OID_AUTO, show_busybufs, CTLFLAG_RW,
 	&show_busybufs, 0, "");
 
+int suspend_blocked = 0;
+SYSCTL_INT(_kern, OID_AUTO, suspend_blocked, CTLFLAG_RW,
+	&suspend_blocked, 0, "Block suspend due to a pending shutdown");
+
 /*
  * Variable panicstr contains argument to first call to panic; used as flag
  * to indicate that the kernel has already called panic.
@@ -151,10 +159,16 @@
 static struct pcb dumppcb;		/* Registers. */
 lwpid_t dumptid;			/* Thread ID. */
 
+static struct cdevsw reroot_cdevsw = {
+     .d_version = D_VERSION,
+     .d_name    = "reroot",
+};
+
 static void poweroff_wait(void *, int);
 static void shutdown_halt(void *junk, int howto);
 static void shutdown_panic(void *junk, int howto);
 static void shutdown_reset(void *junk, int howto);
+static int kern_reroot(void);
 
 /* register various local shutdown events */
 static void
@@ -174,6 +188,26 @@
 SYSINIT(shutdown_conf, SI_SUB_INTRINSIC, SI_ORDER_ANY, shutdown_conf, NULL);
 
 /*
+ * The only reason this exists is to create the /dev/reroot/ directory,
+ * used by reroot code in init(8) as a mountpoint for tmpfs.
+ */
+static void
+reroot_conf(void *unused)
+{
+	int error;
+	struct cdev *cdev;
+
+	error = make_dev_p(MAKEDEV_CHECKNAME | MAKEDEV_WAITOK, &cdev,
+	    &reroot_cdevsw, NULL, UID_ROOT, GID_WHEEL, 0600, "reroot/reroot");
+	if (error != 0) {
+		printf("%s: failed to create device node, error %d",
+		    __func__, error);
+	}
+}
+
+SYSINIT(reroot_conf, SI_SUB_DEVFS, SI_ORDER_ANY, reroot_conf, NULL);
+
+/*
  * The system call that results in a reboot.
  */
 /* ARGSUSED */
@@ -189,9 +223,13 @@
 	if (error == 0)
 		error = priv_check(td, PRIV_REBOOT);
 	if (error == 0) {
-		mtx_lock(&Giant);
-		kern_reboot(uap->opt);
-		mtx_unlock(&Giant);
+		if (uap->opt & RB_REROOT) {
+			error = kern_reroot();
+		} else {
+			mtx_lock(&Giant);
+			kern_reboot(uap->opt);
+			mtx_unlock(&Giant);
+		}
 	}
 	return (error);
 }
@@ -456,6 +494,102 @@
 }
 
 /*
+ * The system call that results in changing the rootfs.
+ */
+static int
+kern_reroot(void)
+{
+	struct vnode *oldrootvnode, *vp;
+	struct mount *mp, *devmp;
+	int error;
+
+	if (curproc != initproc)
+		return (EPERM);
+
+	/*
+	 * Mark the filesystem containing currently-running executable
+	 * (the temporary copy of init(8)) busy.
+	 */
+	vp = curproc->p_textvp;
+	error = vn_lock(vp, LK_SHARED);
+	if (error != 0)
+		return (error);
+	mp = vp->v_mount;
+	error = vfs_busy(mp, MBF_NOWAIT);
+	if (error != 0) {
+		vfs_ref(mp);
+		VOP_UNLOCK(vp, 0);
+		error = vfs_busy(mp, 0);
+		vn_lock(vp, LK_SHARED | LK_RETRY);
+		vfs_rel(mp);
+		if (error != 0) {
+			VOP_UNLOCK(vp, 0);
+			return (ENOENT);
+		}
+		if (vp->v_iflag & VI_DOOMED) {
+			VOP_UNLOCK(vp, 0);
+			vfs_unbusy(mp);
+			return (ENOENT);
+		}
+	}
+	VOP_UNLOCK(vp, 0);
+
+	/*
+	 * Remove the filesystem containing currently-running executable
+	 * from the mount list, to prevent it from being unmounted
+	 * by vfs_unmountall(), and to avoid confusing vfs_mountroot().
+	 *
+	 * Also preserve /dev - forcibly unmounting it could cause driver
+	 * reinitialization.
+	 */
+
+	vfs_ref(rootdevmp);
+	devmp = rootdevmp;
+	rootdevmp = NULL;
+
+	mtx_lock(&mountlist_mtx);
+	TAILQ_REMOVE(&mountlist, mp, mnt_list);
+	TAILQ_REMOVE(&mountlist, devmp, mnt_list);
+	mtx_unlock(&mountlist_mtx);
+
+	oldrootvnode = rootvnode;
+
+	/*
+	 * Unmount everything except for the two filesystems preserved above.
+	 */
+	vfs_unmountall();
+
+	/*
+	 * Add /dev back; vfs_mountroot() will move it into its new place.
+	 */
+	mtx_lock(&mountlist_mtx);
+	TAILQ_INSERT_HEAD(&mountlist, devmp, mnt_list);
+	mtx_unlock(&mountlist_mtx);
+	rootdevmp = devmp;
+	vfs_rel(rootdevmp);
+
+	/*
+	 * Mount the new rootfs.
+	 */
+	vfs_mountroot();
+
+	/*
+	 * Update all references to the old rootvnode.
+	 */
+	mountcheckdirs(oldrootvnode, rootvnode);
+
+	/*
+	 * Add the temporary filesystem back and unbusy it.
+	 */
+	mtx_lock(&mountlist_mtx);
+	TAILQ_INSERT_TAIL(&mountlist, mp, mnt_list);
+	mtx_unlock(&mountlist_mtx);
+	vfs_unbusy(mp);
+
+	return (0);
+}
+
+/*
  * If the shutdown was a clean halt, behave accordingly.
  */
 static void
@@ -487,12 +621,12 @@
 	int loop;
 
 	if (howto & RB_DUMP) {
-		if (PANIC_REBOOT_WAIT_TIME != 0) {
-			if (PANIC_REBOOT_WAIT_TIME != -1) {
+		if (panic_reboot_wait_time != 0) {
+			if (panic_reboot_wait_time != -1) {
 				printf("Automatic reboot in %d seconds - "
 				       "press a key on the console to abort\n",
-					PANIC_REBOOT_WAIT_TIME);
-				for (loop = PANIC_REBOOT_WAIT_TIME * 10;
+					panic_reboot_wait_time);
+				for (loop = panic_reboot_wait_time * 10;
 				     loop > 0; --loop) {
 					DELAY(1000 * 100); /* 1/10th second */
 					/* Did user type a key? */
@@ -543,7 +677,135 @@
 	/* NOTREACHED */ /* assuming reset worked */
 }
 
+#if defined(WITNESS) || defined(INVARIANTS)
+static int kassert_warn_only = 0;
+#ifdef KDB
+static int kassert_do_kdb = 0;
+#endif
+#ifdef KTR
+static int kassert_do_ktr = 0;
+#endif
+static int kassert_do_log = 1;
+static int kassert_log_pps_limit = 4;
+static int kassert_log_mute_at = 0;
+static int kassert_log_panic_at = 0;
+static int kassert_warnings = 0;
+
+SYSCTL_NODE(_debug, OID_AUTO, kassert, CTLFLAG_RW, NULL, "kassert options");
+
+SYSCTL_INT(_debug_kassert, OID_AUTO, warn_only, CTLFLAG_RW | CTLFLAG_TUN,
+    &kassert_warn_only, 0,
+    "KASSERT triggers a panic (1) or just a warning (0)");
+TUNABLE_INT("debug.kassert.warn_only", &kassert_warn_only);
+
+#ifdef KDB
+SYSCTL_INT(_debug_kassert, OID_AUTO, do_kdb, CTLFLAG_RW | CTLFLAG_TUN,
+    &kassert_do_kdb, 0, "KASSERT will enter the debugger");
+TUNABLE_INT("debug.kassert.do_kdb", &kassert_do_kdb);
+#endif
+
+#ifdef KTR
+SYSCTL_UINT(_debug_kassert, OID_AUTO, do_ktr, CTLFLAG_RW | CTLFLAG_TUN,
+    &kassert_do_ktr, 0,
+    "KASSERT does a KTR, set this to the KTRMASK you want");
+TUNABLE_INT("debug.kassert.do_ktr", &kassert_do_ktr);
+#endif
+
+SYSCTL_INT(_debug_kassert, OID_AUTO, do_log, CTLFLAG_RW | CTLFLAG_TUN,
+    &kassert_do_log, 0, "KASSERT triggers a panic (1) or just a warning (0)");
+TUNABLE_INT("debug.kassert.do_log", &kassert_do_log);
+
+SYSCTL_INT(_debug_kassert, OID_AUTO, warnings, CTLFLAG_RW | CTLFLAG_TUN,
+    &kassert_warnings, 0, "number of KASSERTs that have been triggered");
+TUNABLE_INT("debug.kassert.warnings", &kassert_warnings);
+
+SYSCTL_INT(_debug_kassert, OID_AUTO, log_panic_at, CTLFLAG_RW | CTLFLAG_TUN,
+    &kassert_log_panic_at, 0, "max number of KASSERTS before we will panic");
+TUNABLE_INT("debug.kassert.log_panic_at", &kassert_log_panic_at);
+
+SYSCTL_INT(_debug_kassert, OID_AUTO, log_pps_limit, CTLFLAG_RW | CTLFLAG_TUN,
+    &kassert_log_pps_limit, 0, "limit number of log messages per second");
+TUNABLE_INT("debug.kassert.log_pps_limit", &kassert_log_pps_limit);
+
+SYSCTL_INT(_debug_kassert, OID_AUTO, log_mute_at, CTLFLAG_RW | CTLFLAG_TUN,
+    &kassert_log_mute_at, 0, "max number of KASSERTS to log");
+TUNABLE_INT("debug.kassert.log_mute_at", &kassert_log_mute_at);
+
+static int kassert_sysctl_kassert(SYSCTL_HANDLER_ARGS);
+
+SYSCTL_PROC(_debug_kassert, OID_AUTO, kassert,
+    CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_SECURE, NULL, 0,
+    kassert_sysctl_kassert, "I", "set to trigger a test kassert");
+
+static int
+kassert_sysctl_kassert(SYSCTL_HANDLER_ARGS)
+{
+	int error, i;
+
+	error = sysctl_wire_old_buffer(req, sizeof(int));
+	if (error == 0) {
+		i = 0;
+		error = sysctl_handle_int(oidp, &i, 0, req);
+	}
+	if (error != 0 || req->newptr == NULL)
+		return (error);
+	KASSERT(0, ("kassert_sysctl_kassert triggered kassert %d", i));
+	return (0);
+}
+
 /*
+ * Called by KASSERT, this decides if we will panic
+ * or if we will log via printf and/or ktr.
+ */
+void
+kassert_panic(const char *fmt, ...)
+{
+	static char buf[256];
+	va_list ap;
+
+	va_start(ap, fmt);
+	(void)vsnprintf(buf, sizeof(buf), fmt, ap);
+	va_end(ap);
+
+	/*
+	 * panic if we're not just warning, or if we've exceeded
+	 * kassert_log_panic_at warnings.
+	 */
+	if (!kassert_warn_only ||
+	    (kassert_log_panic_at > 0 &&
+	     kassert_warnings >= kassert_log_panic_at)) {
+		va_start(ap, fmt);
+		vpanic(fmt, ap);
+		/* NORETURN */
+	}
+#ifdef KTR
+	if (kassert_do_ktr)
+		CTR0(ktr_mask, buf);
+#endif /* KTR */
+	/*
+	 * log if we've not yet met the mute limit.
+	 */
+	if (kassert_do_log &&
+	    (kassert_log_mute_at == 0 ||
+	     kassert_warnings < kassert_log_mute_at)) {
+		static  struct timeval lasterr;
+		static  int curerr;
+
+		if (ppsratecheck(&lasterr, &curerr, kassert_log_pps_limit)) {
+			printf("KASSERT failed: %s\n", buf);
+			kdb_backtrace();
+		}
+	}
+#ifdef KDB
+	if (kassert_do_kdb) {
+		kdb_enter(KDB_WHY_KASSERT, buf);
+	}
+#endif
+	atomic_add_int(&kassert_warnings, 1);
+}
+#endif
+
+/*
  * Panic is called on unresolvable fatal errors.  It prints "panic: mesg",
  * and then reboots.  If we are called twice, then we avoid trying to sync
  * the disks as this often leads to recursive panics.
@@ -551,48 +813,43 @@
 void
 panic(const char *fmt, ...)
 {
+	va_list ap;
+
+	va_start(ap, fmt);
+	vpanic(fmt, ap);
+}
+
+void
+vpanic(const char *fmt, va_list ap)
+{
 #ifdef SMP
-	static volatile u_int panic_cpu = NOCPU;
 	cpuset_t other_cpus;
 #endif
 	struct thread *td = curthread;
 	int bootopt, newpanic;
-	va_list ap;
 	static char buf[256];
 
-	if (stop_scheduler_on_panic)
-		spinlock_enter();
-	else
-		critical_enter();
+	spinlock_enter();
 
 #ifdef SMP
 	/*
-	 * We don't want multiple CPU's to panic at the same time, so we
-	 * use panic_cpu as a simple spinlock.  We have to keep checking
-	 * panic_cpu if we are spinning in case the panic on the first
-	 * CPU is canceled.
+	 * stop_cpus_hard(other_cpus) should prevent multiple CPUs from
+	 * concurrently entering panic.  Only the winner will proceed
+	 * further.
 	 */
-	if (panic_cpu != PCPU_GET(cpuid))
-		while (atomic_cmpset_int(&panic_cpu, NOCPU,
-		    PCPU_GET(cpuid)) == 0)
-			while (panic_cpu != NOCPU)
-				; /* nothing */
+	if (panicstr == NULL && !kdb_active) {
+		other_cpus = all_cpus;
+		CPU_CLR(PCPU_GET(cpuid), &other_cpus);
+		stop_cpus_hard(other_cpus);
+	}
 
-	if (stop_scheduler_on_panic) {
-		if (panicstr == NULL && !kdb_active) {
-			other_cpus = all_cpus;
-			CPU_CLR(PCPU_GET(cpuid), &other_cpus);
-			stop_cpus_hard(other_cpus);
-		}
-
-		/*
-		 * We set stop_scheduler here and not in the block above,
-		 * because we want to ensure that if panic has been called and
-		 * stop_scheduler_on_panic is true, then stop_scheduler will
-		 * always be set.  Even if panic has been entered from kdb.
-		 */
-		td->td_stopsched = 1;
-	}
+	/*
+	 * We set stop_scheduler here and not in the block above,
+	 * because we want to ensure that if panic has been called and
+	 * stop_scheduler_on_panic is true, then stop_scheduler will
+	 * always be set.  Even if panic has been entered from kdb.
+	 */
+	td->td_stopsched = 1;
 #endif
 
 	bootopt = RB_AUTOBOOT;
@@ -605,7 +862,6 @@
 		newpanic = 1;
 	}
 
-	va_start(ap, fmt);
 	if (newpanic) {
 		(void)vsnprintf(buf, sizeof(buf), fmt, ap);
 		panicstr = buf;
@@ -616,7 +872,6 @@
 		vprintf(fmt, ap);
 		printf("\n");
 	}
-	va_end(ap);
 #ifdef SMP
 	printf("cpuid = %d\n", PCPU_GET(cpuid));
 #endif
@@ -632,8 +887,6 @@
 	/* thread_unlock(td); */
 	if (!sync_on_panic)
 		bootopt |= RB_NOSYNC;
-	if (!stop_scheduler_on_panic)
-		critical_exit();
 	kern_reboot(bootopt);
 }
 
@@ -650,7 +903,7 @@
 static int poweroff_delay = POWEROFF_DELAY;
 
 SYSCTL_INT(_kern_shutdown, OID_AUTO, poweroff_delay, CTLFLAG_RW,
-	&poweroff_delay, 0, "");
+    &poweroff_delay, 0, "Delay before poweroff to write disk caches (msec)");
 
 static void
 poweroff_wait(void *junk, int howto)
@@ -670,7 +923,7 @@
  */
 static int kproc_shutdown_wait = 60;
 SYSCTL_INT(_kern_shutdown, OID_AUTO, kproc_shutdown_wait, CTLFLAG_RW,
-    &kproc_shutdown_wait, 0, "");
+    &kproc_shutdown_wait, 0, "Max wait time (sec) to stop for each process");
 
 void
 kproc_shutdown(void *arg, int howto)
@@ -712,18 +965,34 @@
 		printf("done\n");
 }
 
+static char dumpdevname[sizeof(((struct cdev*)NULL)->si_name)];
+SYSCTL_STRING(_kern_shutdown, OID_AUTO, dumpdevname, CTLFLAG_RD,
+    dumpdevname, 0, "Device for kernel dumps");
+
 /* Registration of dumpers */
 int
-set_dumper(struct dumperinfo *di)
+set_dumper(struct dumperinfo *di, const char *devname, struct thread *td)
 {
+	size_t wantcopy;
+	int error;
 
+	error = priv_check(td, PRIV_SETDUMPER);
+	if (error != 0)
+		return (error);
+
 	if (di == NULL) {
 		bzero(&dumper, sizeof dumper);
+		dumpdevname[0] = '\0';
 		return (0);
 	}
 	if (dumper.dumper != NULL)
 		return (EBUSY);
 	dumper = *di;
+	wantcopy = strlcpy(dumpdevname, devname, sizeof(dumpdevname));
+	if (wantcopy >= sizeof(dumpdevname)) {
+		printf("set_dumper: device name truncated from '%s' -> '%s'\n",
+			devname, dumpdevname);
+	}
 	return (0);
 }
 
@@ -750,16 +1019,27 @@
 {
 
 	bzero(kdh, sizeof(*kdh));
-	strncpy(kdh->magic, magic, sizeof(kdh->magic));
-	strncpy(kdh->architecture, MACHINE_ARCH, sizeof(kdh->architecture));
+	strlcpy(kdh->magic, magic, sizeof(kdh->magic));
+	strlcpy(kdh->architecture, MACHINE_ARCH, sizeof(kdh->architecture));
 	kdh->version = htod32(KERNELDUMPVERSION);
 	kdh->architectureversion = htod32(archver);
 	kdh->dumplength = htod64(dumplen);
 	kdh->dumptime = htod64(time_second);
 	kdh->blocksize = htod32(blksz);
-	strncpy(kdh->hostname, prison0.pr_hostname, sizeof(kdh->hostname));
-	strncpy(kdh->versionstring, version, sizeof(kdh->versionstring));
+	strlcpy(kdh->hostname, prison0.pr_hostname, sizeof(kdh->hostname));
+	strlcpy(kdh->versionstring, version, sizeof(kdh->versionstring));
 	if (panicstr != NULL)
-		strncpy(kdh->panicstring, panicstr, sizeof(kdh->panicstring));
+		strlcpy(kdh->panicstring, panicstr, sizeof(kdh->panicstring));
 	kdh->parity = kerneldump_parity(kdh);
 }
+
+#ifdef DDB
+DB_SHOW_COMMAND(panic, db_show_panic)
+{
+
+	if (panicstr == NULL)
+		db_printf("panicstr not set\n");
+	else
+		db_printf("panic: %s\n", panicstr);
+}
+#endif



More information about the Midnightbsd-cvs mailing list